Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
8028 hidnplayr 1
#ifndef MINIMP3_H
2
#define MINIMP3_H
3
/*
4
    https://github.com/lieff/minimp3
5
    To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
6
    This software is distributed without any warranty.
7
    See .
8
*/
9
#include 
10
 
11
#define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
12
 
13
typedef struct
14
{
15
    int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;
16
} mp3dec_frame_info_t;
17
 
18
typedef struct
19
{
20
    float mdct_overlap[2][9*32], qmf_state[15*2*32];
21
    int reserv, free_format_bytes;
22
    unsigned char header[4], reserv_buf[511];
23
} mp3dec_t;
24
 
25
#ifdef __cplusplus
26
extern "C" {
27
#endif /* __cplusplus */
28
 
29
void mp3dec_init(mp3dec_t *dec);
30
#ifndef MINIMP3_FLOAT_OUTPUT
31
typedef int16_t mp3d_sample_t;
32
#else /* MINIMP3_FLOAT_OUTPUT */
33
typedef float mp3d_sample_t;
34
void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);
35
#endif /* MINIMP3_FLOAT_OUTPUT */
36
int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);
37
 
38
#ifdef __cplusplus
39
}
40
#endif /* __cplusplus */
41
 
42
#endif /* MINIMP3_H */
43
#if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)
44
#define _MINIMP3_IMPLEMENTATION_GUARD
45
 
46
#include 
47
#include 
48
 
49
#define MAX_FREE_FORMAT_FRAME_SIZE  2304    /* more than ISO spec's */
50
#ifndef MAX_FRAME_SYNC_MATCHES
51
#define MAX_FRAME_SYNC_MATCHES      10
52
#endif /* MAX_FRAME_SYNC_MATCHES */
53
 
54
#define MAX_L3_FRAME_PAYLOAD_BYTES  MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
55
 
56
#define MAX_BITRESERVOIR_BYTES      511
57
#define SHORT_BLOCK_TYPE            2
58
#define STOP_BLOCK_TYPE             3
59
#define MODE_MONO                   3
60
#define MODE_JOINT_STEREO           1
61
#define HDR_SIZE                    4
62
#define HDR_IS_MONO(h)              (((h[3]) & 0xC0) == 0xC0)
63
#define HDR_IS_MS_STEREO(h)         (((h[3]) & 0xE0) == 0x60)
64
#define HDR_IS_FREE_FORMAT(h)       (((h[2]) & 0xF0) == 0)
65
#define HDR_IS_CRC(h)               (!((h[1]) & 1))
66
#define HDR_TEST_PADDING(h)         ((h[2]) & 0x2)
67
#define HDR_TEST_MPEG1(h)           ((h[1]) & 0x8)
68
#define HDR_TEST_NOT_MPEG25(h)      ((h[1]) & 0x10)
69
#define HDR_TEST_I_STEREO(h)        ((h[3]) & 0x10)
70
#define HDR_TEST_MS_STEREO(h)       ((h[3]) & 0x20)
71
#define HDR_GET_STEREO_MODE(h)      (((h[3]) >> 6) & 3)
72
#define HDR_GET_STEREO_MODE_EXT(h)  (((h[3]) >> 4) & 3)
73
#define HDR_GET_LAYER(h)            (((h[1]) >> 1) & 3)
74
#define HDR_GET_BITRATE(h)          ((h[2]) >> 4)
75
#define HDR_GET_SAMPLE_RATE(h)      (((h[2]) >> 2) & 3)
76
#define HDR_GET_MY_SAMPLE_RATE(h)   (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
77
#define HDR_IS_FRAME_576(h)         ((h[1] & 14) == 2)
78
#define HDR_IS_LAYER_1(h)           ((h[1] & 6) == 6)
79
 
80
#define BITS_DEQUANTIZER_OUT        -1
81
#define MAX_SCF                     (255 + BITS_DEQUANTIZER_OUT*4 - 210)
82
#define MAX_SCFI                    ((MAX_SCF + 3) & ~3)
83
 
84
#define MINIMP3_MIN(a, b)           ((a) > (b) ? (b) : (a))
85
#define MINIMP3_MAX(a, b)           ((a) < (b) ? (b) : (a))
86
 
87
#if !defined(MINIMP3_NO_SIMD)
88
 
89
#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
90
/* x64 always have SSE2, arm64 always have neon, no need for generic code */
91
#define MINIMP3_ONLY_SIMD
92
#endif /* SIMD checks... */
93
 
94
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
95
#if defined(_MSC_VER)
96
#include 
97
#endif /* defined(_MSC_VER) */
98
#include 
99
#define HAVE_SSE 1
100
#define HAVE_SIMD 1
101
#define VSTORE _mm_storeu_ps
102
#define VLD _mm_loadu_ps
103
#define VSET _mm_set1_ps
104
#define VADD _mm_add_ps
105
#define VSUB _mm_sub_ps
106
#define VMUL _mm_mul_ps
107
#define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
108
#define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
109
#define VMUL_S(x, s)  _mm_mul_ps(x, _mm_set1_ps(s))
110
#define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
111
typedef __m128 f4;
112
#if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
113
#define minimp3_cpuid __cpuid
114
#else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
115
static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
116
{
117
#if defined(__PIC__)
118
    __asm__ __volatile__(
119
#if defined(__x86_64__)
120
        "push %%rbx\n"
121
        "cpuid\n"
122
        "xchgl %%ebx, %1\n"
123
        "pop  %%rbx\n"
124
#else /* defined(__x86_64__) */
125
        "xchgl %%ebx, %1\n"
126
        "cpuid\n"
127
        "xchgl %%ebx, %1\n"
128
#endif /* defined(__x86_64__) */
129
        : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
130
        : "a" (InfoType));
131
#else /* defined(__PIC__) */
132
    __asm__ __volatile__(
133
        "cpuid"
134
        : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
135
        : "a" (InfoType));
136
#endif /* defined(__PIC__)*/
137
}
138
#endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
139
static int have_simd()
140
{
141
#ifdef MINIMP3_ONLY_SIMD
142
    return 1;
143
#else /* MINIMP3_ONLY_SIMD */
144
    static int g_have_simd;
145
    int CPUInfo[4];
146
#ifdef MINIMP3_TEST
147
    static int g_counter;
148
    if (g_counter++ > 100)
149
        return 0;
150
#endif /* MINIMP3_TEST */
151
    if (g_have_simd)
152
        goto end;
153
    minimp3_cpuid(CPUInfo, 0);
154
    g_have_simd = 1;
155
    if (CPUInfo[0] > 0)
156
    {
157
        minimp3_cpuid(CPUInfo, 1);
158
        g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
159
    }
160
end:
161
    return g_have_simd - 1;
162
#endif /* MINIMP3_ONLY_SIMD */
163
}
164
#elif defined(__ARM_NEON) || defined(__aarch64__)
165
#include 
166
#define HAVE_SSE 0
167
#define HAVE_SIMD 1
168
#define VSTORE vst1q_f32
169
#define VLD vld1q_f32
170
#define VSET vmovq_n_f32
171
#define VADD vaddq_f32
172
#define VSUB vsubq_f32
173
#define VMUL vmulq_f32
174
#define VMAC(a, x, y) vmlaq_f32(a, x, y)
175
#define VMSB(a, x, y) vmlsq_f32(a, x, y)
176
#define VMUL_S(x, s)  vmulq_f32(x, vmovq_n_f32(s))
177
#define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
178
typedef float32x4_t f4;
179
static int have_simd()
180
{   /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
181
    return 1;
182
}
183
#else /* SIMD checks... */
184
#define HAVE_SSE 0
185
#define HAVE_SIMD 0
186
#ifdef MINIMP3_ONLY_SIMD
187
#error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
188
#endif /* MINIMP3_ONLY_SIMD */
189
#endif /* SIMD checks... */
190
#else /* !defined(MINIMP3_NO_SIMD) */
191
#define HAVE_SIMD 0
192
#endif /* !defined(MINIMP3_NO_SIMD) */
193
 
194
#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__)
195
#define HAVE_ARMV6 1
196
static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)
197
{
198
    int32_t x = 0;
199
    __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
200
    return x;
201
}
202
#endif
203
 
204
typedef struct
205
{
206
    const uint8_t *buf;
207
    int pos, limit;
208
} bs_t;
209
 
210
typedef struct
211
{
212
    float scf[3*64];
213
    uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
214
} L12_scale_info;
215
 
216
typedef struct
217
{
218
    uint8_t tab_offset, code_tab_width, band_count;
219
} L12_subband_alloc_t;
220
 
221
typedef struct
222
{
223
    const uint8_t *sfbtab;
224
    uint16_t part_23_length, big_values, scalefac_compress;
225
    uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
226
    uint8_t table_select[3], region_count[3], subblock_gain[3];
227
    uint8_t preflag, scalefac_scale, count1_table, scfsi;
228
} L3_gr_info_t;
229
 
230
typedef struct
231
{
232
    bs_t bs;
233
    uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
234
    L3_gr_info_t gr_info[4];
235
    float grbuf[2][576], scf[40], syn[18 + 15][2*32];
236
    uint8_t ist_pos[2][39];
237
} mp3dec_scratch_t;
238
 
239
static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
240
{
241
    bs->buf   = data;
242
    bs->pos   = 0;
243
    bs->limit = bytes*8;
244
}
245
 
246
static uint32_t get_bits(bs_t *bs, int n)
247
{
248
    uint32_t next, cache = 0, s = bs->pos & 7;
249
    int shl = n + s;
250
    const uint8_t *p = bs->buf + (bs->pos >> 3);
251
    if ((bs->pos += n) > bs->limit)
252
        return 0;
253
    next = *p++ & (255 >> s);
254
    while ((shl -= 8) > 0)
255
    {
256
        cache |= next << shl;
257
        next = *p++;
258
    }
259
    return cache | (next >> -shl);
260
}
261
 
262
static int hdr_valid(const uint8_t *h)
263
{
264
    return h[0] == 0xff &&
265
        ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
266
        (HDR_GET_LAYER(h) != 0) &&
267
        (HDR_GET_BITRATE(h) != 15) &&
268
        (HDR_GET_SAMPLE_RATE(h) != 3);
269
}
270
 
271
static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
272
{
273
    return hdr_valid(h2) &&
274
        ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
275
        ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
276
        !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
277
}
278
 
279
static unsigned hdr_bitrate_kbps(const uint8_t *h)
280
{
281
    static const uint8_t halfrate[2][3][15] = {
282
        { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
283
        { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
284
    };
285
    return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
286
}
287
 
288
static unsigned hdr_sample_rate_hz(const uint8_t *h)
289
{
290
    static const unsigned g_hz[3] = { 44100, 48000, 32000 };
291
    return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
292
}
293
 
294
static unsigned hdr_frame_samples(const uint8_t *h)
295
{
296
    return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
297
}
298
 
299
static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
300
{
301
    int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
302
    if (HDR_IS_LAYER_1(h))
303
    {
304
        frame_bytes &= ~3; /* slot align */
305
    }
306
    return frame_bytes ? frame_bytes : free_format_size;
307
}
308
 
309
static int hdr_padding(const uint8_t *h)
310
{
311
    return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
312
}
313
 
314
#ifndef MINIMP3_ONLY_MP3
315
static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
316
{
317
    const L12_subband_alloc_t *alloc;
318
    int mode = HDR_GET_STEREO_MODE(hdr);
319
    int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
320
 
321
    if (HDR_IS_LAYER_1(hdr))
322
    {
323
        static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
324
        alloc = g_alloc_L1;
325
        nbands = 32;
326
    } else if (!HDR_TEST_MPEG1(hdr))
327
    {
328
        static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
329
        alloc = g_alloc_L2M2;
330
        nbands = 30;
331
    } else
332
    {
333
        static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
334
        int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
335
        unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
336
        if (!kbps) /* free-format */
337
        {
338
            kbps = 192;
339
        }
340
 
341
        alloc = g_alloc_L2M1;
342
        nbands = 27;
343
        if (kbps < 56)
344
        {
345
            static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
346
            alloc = g_alloc_L2M1_lowrate;
347
            nbands = sample_rate_idx == 2 ? 12 : 8;
348
        } else if (kbps >= 96 && sample_rate_idx != 1)
349
        {
350
            nbands = 30;
351
        }
352
    }
353
 
354
    sci->total_bands = (uint8_t)nbands;
355
    sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
356
 
357
    return alloc;
358
}
359
 
360
static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
361
{
362
    static const float g_deq_L12[18*3] = {
363
#define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
364
        DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
365
    };
366
    int i, m;
367
    for (i = 0; i < bands; i++)
368
    {
369
        float s = 0;
370
        int ba = *pba++;
371
        int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
372
        for (m = 4; m; m >>= 1)
373
        {
374
            if (mask & m)
375
            {
376
                int b = get_bits(bs, 6);
377
                s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
378
            }
379
            *scf++ = s;
380
        }
381
    }
382
}
383
 
384
static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
385
{
386
    static const uint8_t g_bitalloc_code_tab[] = {
387
        0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
388
        0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
389
        0,17,18, 3,19,4,5,16,
390
        0,17,18,16,
391
        0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
392
        0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
393
        0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
394
    };
395
    const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
396
 
397
    int i, k = 0, ba_bits = 0;
398
    const uint8_t *ba_code_tab = g_bitalloc_code_tab;
399
 
400
    for (i = 0; i < sci->total_bands; i++)
401
    {
402
        uint8_t ba;
403
        if (i == k)
404
        {
405
            k += subband_alloc->band_count;
406
            ba_bits = subband_alloc->code_tab_width;
407
            ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
408
            subband_alloc++;
409
        }
410
        ba = ba_code_tab[get_bits(bs, ba_bits)];
411
        sci->bitalloc[2*i] = ba;
412
        if (i < sci->stereo_bands)
413
        {
414
            ba = ba_code_tab[get_bits(bs, ba_bits)];
415
        }
416
        sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
417
    }
418
 
419
    for (i = 0; i < 2*sci->total_bands; i++)
420
    {
421
        sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
422
    }
423
 
424
    L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
425
 
426
    for (i = sci->stereo_bands; i < sci->total_bands; i++)
427
    {
428
        sci->bitalloc[2*i + 1] = 0;
429
    }
430
}
431
 
432
static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
433
{
434
    int i, j, k, choff = 576;
435
    for (j = 0; j < 4; j++)
436
    {
437
        float *dst = grbuf + group_size*j;
438
        for (i = 0; i < 2*sci->total_bands; i++)
439
        {
440
            int ba = sci->bitalloc[i];
441
            if (ba != 0)
442
            {
443
                if (ba < 17)
444
                {
445
                    int half = (1 << (ba - 1)) - 1;
446
                    for (k = 0; k < group_size; k++)
447
                    {
448
                        dst[k] = (float)((int)get_bits(bs, ba) - half);
449
                    }
450
                } else
451
                {
452
                    unsigned mod = (2 << (ba - 17)) + 1;    /* 3, 5, 9 */
453
                    unsigned code = get_bits(bs, mod + 2 - (mod >> 3));  /* 5, 7, 10 */
454
                    for (k = 0; k < group_size; k++, code /= mod)
455
                    {
456
                        dst[k] = (float)((int)(code % mod - mod/2));
457
                    }
458
                }
459
            }
460
            dst += choff;
461
            choff = 18 - choff;
462
        }
463
    }
464
    return group_size*4;
465
}
466
 
467
static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
468
{
469
    int i, k;
470
    memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
471
    for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
472
    {
473
        for (k = 0; k < 12; k++)
474
        {
475
            dst[k + 0]   *= scf[0];
476
            dst[k + 576] *= scf[3];
477
        }
478
    }
479
}
480
#endif /* MINIMP3_ONLY_MP3 */
481
 
482
static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
483
{
484
    static const uint8_t g_scf_long[8][23] = {
485
        { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
486
        { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
487
        { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
488
        { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
489
        { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
490
        { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
491
        { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
492
        { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
493
    };
494
    static const uint8_t g_scf_short[8][40] = {
495
        { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
496
        { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
497
        { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
498
        { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
499
        { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
500
        { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
501
        { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
502
        { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
503
    };
504
    static const uint8_t g_scf_mixed[8][40] = {
505
        { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
506
        { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
507
        { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
508
        { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
509
        { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
510
        { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
511
        { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
512
        { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
513
    };
514
 
515
    unsigned tables, scfsi = 0;
516
    int main_data_begin, part_23_sum = 0;
517
    int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
518
    int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
519
 
520
    if (HDR_TEST_MPEG1(hdr))
521
    {
522
        gr_count *= 2;
523
        main_data_begin = get_bits(bs, 9);
524
        scfsi = get_bits(bs, 7 + gr_count);
525
    } else
526
    {
527
        main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
528
    }
529
 
530
    do
531
    {
532
        if (HDR_IS_MONO(hdr))
533
        {
534
            scfsi <<= 4;
535
        }
536
        gr->part_23_length = (uint16_t)get_bits(bs, 12);
537
        part_23_sum += gr->part_23_length;
538
        gr->big_values = (uint16_t)get_bits(bs,  9);
539
        if (gr->big_values > 288)
540
        {
541
            return -1;
542
        }
543
        gr->global_gain = (uint8_t)get_bits(bs, 8);
544
        gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
545
        gr->sfbtab = g_scf_long[sr_idx];
546
        gr->n_long_sfb  = 22;
547
        gr->n_short_sfb = 0;
548
        if (get_bits(bs, 1))
549
        {
550
            gr->block_type = (uint8_t)get_bits(bs, 2);
551
            if (!gr->block_type)
552
            {
553
                return -1;
554
            }
555
            gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
556
            gr->region_count[0] = 7;
557
            gr->region_count[1] = 255;
558
            if (gr->block_type == SHORT_BLOCK_TYPE)
559
            {
560
                scfsi &= 0x0F0F;
561
                if (!gr->mixed_block_flag)
562
                {
563
                    gr->region_count[0] = 8;
564
                    gr->sfbtab = g_scf_short[sr_idx];
565
                    gr->n_long_sfb = 0;
566
                    gr->n_short_sfb = 39;
567
                } else
568
                {
569
                    gr->sfbtab = g_scf_mixed[sr_idx];
570
                    gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
571
                    gr->n_short_sfb = 30;
572
                }
573
            }
574
            tables = get_bits(bs, 10);
575
            tables <<= 5;
576
            gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
577
            gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
578
            gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
579
        } else
580
        {
581
            gr->block_type = 0;
582
            gr->mixed_block_flag = 0;
583
            tables = get_bits(bs, 15);
584
            gr->region_count[0] = (uint8_t)get_bits(bs, 4);
585
            gr->region_count[1] = (uint8_t)get_bits(bs, 3);
586
            gr->region_count[2] = 255;
587
        }
588
        gr->table_select[0] = (uint8_t)(tables >> 10);
589
        gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
590
        gr->table_select[2] = (uint8_t)((tables) & 31);
591
        gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
592
        gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
593
        gr->count1_table = (uint8_t)get_bits(bs, 1);
594
        gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
595
        scfsi <<= 4;
596
        gr++;
597
    } while(--gr_count);
598
 
599
    if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
600
    {
601
        return -1;
602
    }
603
 
604
    return main_data_begin;
605
}
606
 
607
static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
608
{
609
    int i, k;
610
    for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
611
    {
612
        int cnt = scf_count[i];
613
        if (scfsi & 8)
614
        {
615
            memcpy(scf, ist_pos, cnt);
616
        } else
617
        {
618
            int bits = scf_size[i];
619
            if (!bits)
620
            {
621
                memset(scf, 0, cnt);
622
                memset(ist_pos, 0, cnt);
623
            } else
624
            {
625
                int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
626
                for (k = 0; k < cnt; k++)
627
                {
628
                    int s = get_bits(bitbuf, bits);
629
                    ist_pos[k] = (s == max_scf ? -1 : s);
630
                    scf[k] = s;
631
                }
632
            }
633
        }
634
        ist_pos += cnt;
635
        scf += cnt;
636
    }
637
    scf[0] = scf[1] = scf[2] = 0;
638
}
639
 
640
static float L3_ldexp_q2(float y, int exp_q2)
641
{
642
    static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
643
    int e;
644
    do
645
    {
646
        e = MINIMP3_MIN(30*4, exp_q2);
647
        y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
648
    } while ((exp_q2 -= e) > 0);
649
    return y;
650
}
651
 
652
static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
653
{
654
    static const uint8_t g_scf_partitions[3][28] = {
655
        { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
656
        { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
657
        { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
658
    };
659
    const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
660
    uint8_t scf_size[4], iscf[40];
661
    int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
662
    float gain;
663
 
664
    if (HDR_TEST_MPEG1(hdr))
665
    {
666
        static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
667
        int part = g_scfc_decode[gr->scalefac_compress];
668
        scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
669
        scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
670
    } else
671
    {
672
        static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
673
        int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
674
        sfc = gr->scalefac_compress >> ist;
675
        for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
676
        {
677
            for (modprod = 1, i = 3; i >= 0; i--)
678
            {
679
                scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
680
                modprod *= g_mod[k + i];
681
            }
682
        }
683
        scf_partition += k;
684
        scfsi = -16;
685
    }
686
    L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
687
 
688
    if (gr->n_short_sfb)
689
    {
690
        int sh = 3 - scf_shift;
691
        for (i = 0; i < gr->n_short_sfb; i += 3)
692
        {
693
            iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
694
            iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
695
            iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
696
        }
697
    } else if (gr->preflag)
698
    {
699
        static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
700
        for (i = 0; i < 10; i++)
701
        {
702
            iscf[11 + i] += g_preamp[i];
703
        }
704
    }
705
 
706
    gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
707
    gain = L3_ldexp_q2(1 << (MAX_SCFI/4),  MAX_SCFI - gain_exp);
708
    for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
709
    {
710
        scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
711
    }
712
}
713
 
714
static const float g_pow43[129 + 16] = {
715
    0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
716
    0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
717
};
718
 
719
static float L3_pow_43(int x)
720
{
721
    float frac;
722
    int sign, mult = 256;
723
 
724
    if (x < 129)
725
    {
726
        return g_pow43[16 + x];
727
    }
728
 
729
    if (x < 1024)
730
    {
731
        mult = 16;
732
        x <<= 3;
733
    }
734
 
735
    sign = 2*x & 64;
736
    frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
737
    return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
738
}
739
 
740
static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
741
{
742
    static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
743
        785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
744
        -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
745
        -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
746
        -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
747
        -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
748
        -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
749
        -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
750
        -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
751
        -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
752
        -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
753
        -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
754
        -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
755
        -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
756
        -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
757
        -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
758
    static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
759
    static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
760
    static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
761
    static const uint8_t g_linbits[] =  { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
762
 
763
#define PEEK_BITS(n)  (bs_cache >> (32 - n))
764
#define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
765
#define CHECK_BITS    while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
766
#define BSPOS         ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
767
 
768
    float one = 0.0f;
769
    int ireg = 0, big_val_cnt = gr_info->big_values;
770
    const uint8_t *sfb = gr_info->sfbtab;
771
    const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
772
    uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
773
    int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
774
    bs_next_ptr += 4;
775
 
776
    while (big_val_cnt > 0)
777
    {
778
        int tab_num = gr_info->table_select[ireg];
779
        int sfb_cnt = gr_info->region_count[ireg++];
780
        const int16_t *codebook = tabs + tabindex[tab_num];
781
        int linbits = g_linbits[tab_num];
782
        if (linbits)
783
        {
784
            do
785
            {
786
                np = *sfb++ / 2;
787
                pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
788
                one = *scf++;
789
                do
790
                {
791
                    int j, w = 5;
792
                    int leaf = codebook[PEEK_BITS(w)];
793
                    while (leaf < 0)
794
                    {
795
                        FLUSH_BITS(w);
796
                        w = leaf & 7;
797
                        leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
798
                    }
799
                    FLUSH_BITS(leaf >> 8);
800
 
801
                    for (j = 0; j < 2; j++, dst++, leaf >>= 4)
802
                    {
803
                        int lsb = leaf & 0x0F;
804
                        if (lsb == 15)
805
                        {
806
                            lsb += PEEK_BITS(linbits);
807
                            FLUSH_BITS(linbits);
808
                            CHECK_BITS;
809
                            *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
810
                        } else
811
                        {
812
                            *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
813
                        }
814
                        FLUSH_BITS(lsb ? 1 : 0);
815
                    }
816
                    CHECK_BITS;
817
                } while (--pairs_to_decode);
818
            } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
819
        } else
820
        {
821
            do
822
            {
823
                np = *sfb++ / 2;
824
                pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
825
                one = *scf++;
826
                do
827
                {
828
                    int j, w = 5;
829
                    int leaf = codebook[PEEK_BITS(w)];
830
                    while (leaf < 0)
831
                    {
832
                        FLUSH_BITS(w);
833
                        w = leaf & 7;
834
                        leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
835
                    }
836
                    FLUSH_BITS(leaf >> 8);
837
 
838
                    for (j = 0; j < 2; j++, dst++, leaf >>= 4)
839
                    {
840
                        int lsb = leaf & 0x0F;
841
                        *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
842
                        FLUSH_BITS(lsb ? 1 : 0);
843
                    }
844
                    CHECK_BITS;
845
                } while (--pairs_to_decode);
846
            } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
847
        }
848
    }
849
 
850
    for (np = 1 - big_val_cnt;; dst += 4)
851
    {
852
        const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
853
        int leaf = codebook_count1[PEEK_BITS(4)];
854
        if (!(leaf & 8))
855
        {
856
            leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
857
        }
858
        FLUSH_BITS(leaf & 7);
859
        if (BSPOS > layer3gr_limit)
860
        {
861
            break;
862
        }
863
#define RELOAD_SCALEFACTOR  if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
864
#define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
865
        RELOAD_SCALEFACTOR;
866
        DEQ_COUNT1(0);
867
        DEQ_COUNT1(1);
868
        RELOAD_SCALEFACTOR;
869
        DEQ_COUNT1(2);
870
        DEQ_COUNT1(3);
871
        CHECK_BITS;
872
    }
873
 
874
    bs->pos = layer3gr_limit;
875
}
876
 
877
static void L3_midside_stereo(float *left, int n)
878
{
879
    int i = 0;
880
    float *right = left + 576;
881
#if HAVE_SIMD
882
    if (have_simd()) for (; i < n - 3; i += 4)
883
    {
884
        f4 vl = VLD(left + i);
885
        f4 vr = VLD(right + i);
886
        VSTORE(left + i, VADD(vl, vr));
887
        VSTORE(right + i, VSUB(vl, vr));
888
    }
889
#endif /* HAVE_SIMD */
890
    for (; i < n; i++)
891
    {
892
        float a = left[i];
893
        float b = right[i];
894
        left[i] = a + b;
895
        right[i] = a - b;
896
    }
897
}
898
 
899
static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
900
{
901
    int i;
902
    for (i = 0; i < n; i++)
903
    {
904
        left[i + 576] = left[i]*kr;
905
        left[i] = left[i]*kl;
906
    }
907
}
908
 
909
static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
910
{
911
    int i, k;
912
 
913
    max_band[0] = max_band[1] = max_band[2] = -1;
914
 
915
    for (i = 0; i < nbands; i++)
916
    {
917
        for (k = 0; k < sfb[i]; k += 2)
918
        {
919
            if (right[k] != 0 || right[k + 1] != 0)
920
            {
921
                max_band[i % 3] = i;
922
                break;
923
            }
924
        }
925
        right += sfb[i];
926
    }
927
}
928
 
929
static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
930
{
931
    static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
932
    unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
933
 
934
    for (i = 0; sfb[i]; i++)
935
    {
936
        unsigned ipos = ist_pos[i];
937
        if ((int)i > max_band[i % 3] && ipos < max_pos)
938
        {
939
            float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
940
            if (HDR_TEST_MPEG1(hdr))
941
            {
942
                kl = g_pan[2*ipos];
943
                kr = g_pan[2*ipos + 1];
944
            } else
945
            {
946
                kl = 1;
947
                kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
948
                if (ipos & 1)
949
                {
950
                    kl = kr;
951
                    kr = 1;
952
                }
953
            }
954
            L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
955
        } else if (HDR_TEST_MS_STEREO(hdr))
956
        {
957
            L3_midside_stereo(left, sfb[i]);
958
        }
959
        left += sfb[i];
960
    }
961
}
962
 
963
static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
964
{
965
    int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
966
    int i, max_blocks = gr->n_short_sfb ? 3 : 1;
967
 
968
    L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
969
    if (gr->n_long_sfb)
970
    {
971
        max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
972
    }
973
    for (i = 0; i < max_blocks; i++)
974
    {
975
        int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
976
        int itop = n_sfb - max_blocks + i;
977
        int prev = itop - max_blocks;
978
        ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
979
    }
980
    L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
981
}
982
 
983
static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
984
{
985
    int i, len;
986
    float *src = grbuf, *dst = scratch;
987
 
988
    for (;0 != (len = *sfb); sfb += 3, src += 2*len)
989
    {
990
        for (i = 0; i < len; i++, src++)
991
        {
992
            *dst++ = src[0*len];
993
            *dst++ = src[1*len];
994
            *dst++ = src[2*len];
995
        }
996
    }
997
    memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
998
}
999
 
1000
static void L3_antialias(float *grbuf, int nbands)
1001
{
1002
    static const float g_aa[2][8] = {
1003
        {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
1004
        {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
1005
    };
1006
 
1007
    for (; nbands > 0; nbands--, grbuf += 18)
1008
    {
1009
        int i = 0;
1010
#if HAVE_SIMD
1011
        if (have_simd()) for (; i < 8; i += 4)
1012
        {
1013
            f4 vu = VLD(grbuf + 18 + i);
1014
            f4 vd = VLD(grbuf + 14 - i);
1015
            f4 vc0 = VLD(g_aa[0] + i);
1016
            f4 vc1 = VLD(g_aa[1] + i);
1017
            vd = VREV(vd);
1018
            VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
1019
            vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
1020
            VSTORE(grbuf + 14 - i, VREV(vd));
1021
        }
1022
#endif /* HAVE_SIMD */
1023
#ifndef MINIMP3_ONLY_SIMD
1024
        for(; i < 8; i++)
1025
        {
1026
            float u = grbuf[18 + i];
1027
            float d = grbuf[17 - i];
1028
            grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
1029
            grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
1030
        }
1031
#endif /* MINIMP3_ONLY_SIMD */
1032
    }
1033
}
1034
 
1035
static void L3_dct3_9(float *y)
1036
{
1037
    float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
1038
 
1039
    s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
1040
    t0 = s0 + s6*0.5f;
1041
    s0 -= s6;
1042
    t4 = (s4 + s2)*0.93969262f;
1043
    t2 = (s8 + s2)*0.76604444f;
1044
    s6 = (s4 - s8)*0.17364818f;
1045
    s4 += s8 - s2;
1046
 
1047
    s2 = s0 - s4*0.5f;
1048
    y[4] = s4 + s0;
1049
    s8 = t0 - t2 + s6;
1050
    s0 = t0 - t4 + t2;
1051
    s4 = t0 + t4 - s6;
1052
 
1053
    s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
1054
 
1055
    s3 *= 0.86602540f;
1056
    t0 = (s5 + s1)*0.98480775f;
1057
    t4 = (s5 - s7)*0.34202014f;
1058
    t2 = (s1 + s7)*0.64278761f;
1059
    s1 = (s1 - s5 - s7)*0.86602540f;
1060
 
1061
    s5 = t0 - s3 - t2;
1062
    s7 = t4 - s3 - t0;
1063
    s3 = t4 + s3 - t2;
1064
 
1065
    y[0] = s4 - s7;
1066
    y[1] = s2 + s1;
1067
    y[2] = s0 - s3;
1068
    y[3] = s8 + s5;
1069
    y[5] = s8 - s5;
1070
    y[6] = s0 + s3;
1071
    y[7] = s2 - s1;
1072
    y[8] = s4 + s7;
1073
}
1074
 
1075
static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
1076
{
1077
    int i, j;
1078
    static const float g_twid9[18] = {
1079
        0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
1080
    };
1081
 
1082
    for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
1083
    {
1084
        float co[9], si[9];
1085
        co[0] = -grbuf[0];
1086
        si[0] = grbuf[17];
1087
        for (i = 0; i < 4; i++)
1088
        {
1089
            si[8 - 2*i] =   grbuf[4*i + 1] - grbuf[4*i + 2];
1090
            co[1 + 2*i] =   grbuf[4*i + 1] + grbuf[4*i + 2];
1091
            si[7 - 2*i] =   grbuf[4*i + 4] - grbuf[4*i + 3];
1092
            co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
1093
        }
1094
        L3_dct3_9(co);
1095
        L3_dct3_9(si);
1096
 
1097
        si[1] = -si[1];
1098
        si[3] = -si[3];
1099
        si[5] = -si[5];
1100
        si[7] = -si[7];
1101
 
1102
        i = 0;
1103
 
1104
#if HAVE_SIMD
1105
        if (have_simd()) for (; i < 8; i += 4)
1106
        {
1107
            f4 vovl = VLD(overlap + i);
1108
            f4 vc = VLD(co + i);
1109
            f4 vs = VLD(si + i);
1110
            f4 vr0 = VLD(g_twid9 + i);
1111
            f4 vr1 = VLD(g_twid9 + 9 + i);
1112
            f4 vw0 = VLD(window + i);
1113
            f4 vw1 = VLD(window + 9 + i);
1114
            f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1115
            VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1116
            VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1117
            vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1118
            VSTORE(grbuf + 14 - i, VREV(vsum));
1119
        }
1120
#endif /* HAVE_SIMD */
1121
        for (; i < 9; i++)
1122
        {
1123
            float ovl  = overlap[i];
1124
            float sum  = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
1125
            overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
1126
            grbuf[i]      = ovl*window[0 + i] - sum*window[9 + i];
1127
            grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
1128
        }
1129
    }
1130
}
1131
 
1132
static void L3_idct3(float x0, float x1, float x2, float *dst)
1133
{
1134
    float m1 = x1*0.86602540f;
1135
    float a1 = x0 - x2*0.5f;
1136
    dst[1] = x0 + x2;
1137
    dst[0] = a1 + m1;
1138
    dst[2] = a1 - m1;
1139
}
1140
 
1141
static void L3_imdct12(float *x, float *dst, float *overlap)
1142
{
1143
    static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
1144
    float co[3], si[3];
1145
    int i;
1146
 
1147
    L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
1148
    L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
1149
    si[1] = -si[1];
1150
 
1151
    for (i = 0; i < 3; i++)
1152
    {
1153
        float ovl  = overlap[i];
1154
        float sum  = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
1155
        overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
1156
        dst[i]     = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
1157
        dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
1158
    }
1159
}
1160
 
1161
static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
1162
{
1163
    for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
1164
    {
1165
        float tmp[18];
1166
        memcpy(tmp, grbuf, sizeof(tmp));
1167
        memcpy(grbuf, overlap, 6*sizeof(float));
1168
        L3_imdct12(tmp, grbuf + 6, overlap + 6);
1169
        L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
1170
        L3_imdct12(tmp + 2, overlap, overlap + 6);
1171
    }
1172
}
1173
 
1174
static void L3_change_sign(float *grbuf)
1175
{
1176
    int b, i;
1177
    for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1178
        for (i = 1; i < 18; i += 2)
1179
            grbuf[i] = -grbuf[i];
1180
}
1181
 
1182
static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
1183
{
1184
    static const float g_mdct_window[2][18] = {
1185
        { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
1186
        { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
1187
    };
1188
    if (n_long_bands)
1189
    {
1190
        L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
1191
        grbuf += 18*n_long_bands;
1192
        overlap += 9*n_long_bands;
1193
    }
1194
    if (block_type == SHORT_BLOCK_TYPE)
1195
        L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1196
    else
1197
        L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
1198
}
1199
 
1200
static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
1201
{
1202
    int pos = (s->bs.pos + 7)/8u;
1203
    int remains = s->bs.limit/8u - pos;
1204
    if (remains > MAX_BITRESERVOIR_BYTES)
1205
    {
1206
        pos += remains - MAX_BITRESERVOIR_BYTES;
1207
        remains = MAX_BITRESERVOIR_BYTES;
1208
    }
1209
    if (remains > 0)
1210
    {
1211
        memmove(h->reserv_buf, s->maindata + pos, remains);
1212
    }
1213
    h->reserv = remains;
1214
}
1215
 
1216
static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
1217
{
1218
    int frame_bytes = (bs->limit - bs->pos)/8;
1219
    int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
1220
    memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
1221
    memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
1222
    bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
1223
    return h->reserv >= main_data_begin;
1224
}
1225
 
1226
static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
1227
{
1228
    int ch;
1229
 
1230
    for (ch = 0; ch < nch; ch++)
1231
    {
1232
        int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
1233
        L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
1234
        L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
1235
    }
1236
 
1237
    if (HDR_TEST_I_STEREO(h->header))
1238
    {
1239
        L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
1240
    } else if (HDR_IS_MS_STEREO(h->header))
1241
    {
1242
        L3_midside_stereo(s->grbuf[0], 576);
1243
    }
1244
 
1245
    for (ch = 0; ch < nch; ch++, gr_info++)
1246
    {
1247
        int aa_bands = 31;
1248
        int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
1249
 
1250
        if (gr_info->n_short_sfb)
1251
        {
1252
            aa_bands = n_long_bands - 1;
1253
            L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
1254
        }
1255
 
1256
        L3_antialias(s->grbuf[ch], aa_bands);
1257
        L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
1258
        L3_change_sign(s->grbuf[ch]);
1259
    }
1260
}
1261
 
1262
static void mp3d_DCT_II(float *grbuf, int n)
1263
{
1264
    static const float g_sec[24] = {
1265
        10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
1266
    };
1267
    int i, k = 0;
1268
#if HAVE_SIMD
1269
    if (have_simd()) for (; k < n; k += 4)
1270
    {
1271
        f4 t[4][8], *x;
1272
        float *y = grbuf + k;
1273
 
1274
        for (x = t[0], i = 0; i < 8; i++, x++)
1275
        {
1276
            f4 x0 = VLD(&y[i*18]);
1277
            f4 x1 = VLD(&y[(15 - i)*18]);
1278
            f4 x2 = VLD(&y[(16 + i)*18]);
1279
            f4 x3 = VLD(&y[(31 - i)*18]);
1280
            f4 t0 = VADD(x0, x3);
1281
            f4 t1 = VADD(x1, x2);
1282
            f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
1283
            f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
1284
            x[0] = VADD(t0, t1);
1285
            x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
1286
            x[16] = VADD(t3, t2);
1287
            x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
1288
        }
1289
        for (x = t[0], i = 0; i < 4; i++, x += 8)
1290
        {
1291
            f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1292
            xt = VSUB(x0, x7); x0 = VADD(x0, x7);
1293
            x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
1294
            x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
1295
            x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
1296
            x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
1297
            x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
1298
            x[0] = VADD(x0, x1);
1299
            x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1300
            x5 = VADD(x5, x6);
1301
            x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1302
            x7 = VADD(x7, xt);
1303
            x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1304
            x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1305
            x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1306
            x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1307
            x0 = VSUB(xt, x6); xt = VADD(xt, x6);
1308
            x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1309
            x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1310
            x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1311
            x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1312
            x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1313
            x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1314
        }
1315
 
1316
        if (k > n - 3)
1317
        {
1318
#if HAVE_SSE
1319
#define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
1320
#else /* HAVE_SSE */
1321
#define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18],  vget_low_f32(v))
1322
#endif /* HAVE_SSE */
1323
            for (i = 0; i < 7; i++, y += 4*18)
1324
            {
1325
                f4 s = VADD(t[3][i], t[3][i + 1]);
1326
                VSAVE2(0, t[0][i]);
1327
                VSAVE2(1, VADD(t[2][i], s));
1328
                VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1329
                VSAVE2(3, VADD(t[2][1 + i], s));
1330
            }
1331
            VSAVE2(0, t[0][7]);
1332
            VSAVE2(1, VADD(t[2][7], t[3][7]));
1333
            VSAVE2(2, t[1][7]);
1334
            VSAVE2(3, t[3][7]);
1335
        } else
1336
        {
1337
#define VSAVE4(i, v) VSTORE(&y[i*18], v)
1338
            for (i = 0; i < 7; i++, y += 4*18)
1339
            {
1340
                f4 s = VADD(t[3][i], t[3][i + 1]);
1341
                VSAVE4(0, t[0][i]);
1342
                VSAVE4(1, VADD(t[2][i], s));
1343
                VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1344
                VSAVE4(3, VADD(t[2][1 + i], s));
1345
            }
1346
            VSAVE4(0, t[0][7]);
1347
            VSAVE4(1, VADD(t[2][7], t[3][7]));
1348
            VSAVE4(2, t[1][7]);
1349
            VSAVE4(3, t[3][7]);
1350
        }
1351
    } else
1352
#endif /* HAVE_SIMD */
1353
#ifdef MINIMP3_ONLY_SIMD
1354
    {}
1355
#else /* MINIMP3_ONLY_SIMD */
1356
    for (; k < n; k++)
1357
    {
1358
        float t[4][8], *x, *y = grbuf + k;
1359
 
1360
        for (x = t[0], i = 0; i < 8; i++, x++)
1361
        {
1362
            float x0 = y[i*18];
1363
            float x1 = y[(15 - i)*18];
1364
            float x2 = y[(16 + i)*18];
1365
            float x3 = y[(31 - i)*18];
1366
            float t0 = x0 + x3;
1367
            float t1 = x1 + x2;
1368
            float t2 = (x1 - x2)*g_sec[3*i + 0];
1369
            float t3 = (x0 - x3)*g_sec[3*i + 1];
1370
            x[0] = t0 + t1;
1371
            x[8] = (t0 - t1)*g_sec[3*i + 2];
1372
            x[16] = t3 + t2;
1373
            x[24] = (t3 - t2)*g_sec[3*i + 2];
1374
        }
1375
        for (x = t[0], i = 0; i < 4; i++, x += 8)
1376
        {
1377
            float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1378
            xt = x0 - x7; x0 += x7;
1379
            x7 = x1 - x6; x1 += x6;
1380
            x6 = x2 - x5; x2 += x5;
1381
            x5 = x3 - x4; x3 += x4;
1382
            x4 = x0 - x3; x0 += x3;
1383
            x3 = x1 - x2; x1 += x2;
1384
            x[0] = x0 + x1;
1385
            x[4] = (x0 - x1)*0.70710677f;
1386
            x5 =  x5 + x6;
1387
            x6 = (x6 + x7)*0.70710677f;
1388
            x7 =  x7 + xt;
1389
            x3 = (x3 + x4)*0.70710677f;
1390
            x5 -= x7*0.198912367f;  /* rotate by PI/8 */
1391
            x7 += x5*0.382683432f;
1392
            x5 -= x7*0.198912367f;
1393
            x0 = xt - x6; xt += x6;
1394
            x[1] = (xt + x7)*0.50979561f;
1395
            x[2] = (x4 + x3)*0.54119611f;
1396
            x[3] = (x0 - x5)*0.60134488f;
1397
            x[5] = (x0 + x5)*0.89997619f;
1398
            x[6] = (x4 - x3)*1.30656302f;
1399
            x[7] = (xt - x7)*2.56291556f;
1400
 
1401
        }
1402
        for (i = 0; i < 7; i++, y += 4*18)
1403
        {
1404
            y[0*18] = t[0][i];
1405
            y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
1406
            y[2*18] = t[1][i] + t[1][i + 1];
1407
            y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1408
        }
1409
        y[0*18] = t[0][7];
1410
        y[1*18] = t[2][7] + t[3][7];
1411
        y[2*18] = t[1][7];
1412
        y[3*18] = t[3][7];
1413
    }
1414
#endif /* MINIMP3_ONLY_SIMD */
1415
}
1416
 
1417
#ifndef MINIMP3_FLOAT_OUTPUT
1418
static int16_t mp3d_scale_pcm(float sample)
1419
{
1420
#if HAVE_ARMV6
1421
    int32_t s32 = (int32_t)(sample + .5f);
1422
    s32 -= (s32 < 0);
1423
    int16_t s = (int16_t)minimp3_clip_int16_arm(s32);
1424
#else
1425
    if (sample >=  32766.5) return (int16_t) 32767;
1426
    if (sample <= -32767.5) return (int16_t)-32768;
1427
    int16_t s = (int16_t)(sample + .5f);
1428
    s -= (s < 0);   /* away from zero, to be compliant */
1429
#endif
1430
    return s;
1431
}
1432
#else /* MINIMP3_FLOAT_OUTPUT */
1433
static float mp3d_scale_pcm(float sample)
1434
{
1435
    return sample*(1.f/32768.f);
1436
}
1437
#endif /* MINIMP3_FLOAT_OUTPUT */
1438
 
1439
static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
1440
{
1441
    float a;
1442
    a  = (z[14*64] - z[    0]) * 29;
1443
    a += (z[ 1*64] + z[13*64]) * 213;
1444
    a += (z[12*64] - z[ 2*64]) * 459;
1445
    a += (z[ 3*64] + z[11*64]) * 2037;
1446
    a += (z[10*64] - z[ 4*64]) * 5153;
1447
    a += (z[ 5*64] + z[ 9*64]) * 6574;
1448
    a += (z[ 8*64] - z[ 6*64]) * 37489;
1449
    a +=  z[ 7*64]             * 75038;
1450
    pcm[0] = mp3d_scale_pcm(a);
1451
 
1452
    z += 2;
1453
    a  = z[14*64] * 104;
1454
    a += z[12*64] * 1567;
1455
    a += z[10*64] * 9727;
1456
    a += z[ 8*64] * 64019;
1457
    a += z[ 6*64] * -9975;
1458
    a += z[ 4*64] * -45;
1459
    a += z[ 2*64] * 146;
1460
    a += z[ 0*64] * -5;
1461
    pcm[16*nch] = mp3d_scale_pcm(a);
1462
}
1463
 
1464
static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
1465
{
1466
    int i;
1467
    float *xr = xl + 576*(nch - 1);
1468
    mp3d_sample_t *dstr = dstl + (nch - 1);
1469
 
1470
    static const float g_win[] = {
1471
        -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
1472
        -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
1473
        -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
1474
        -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
1475
        -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
1476
        -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
1477
        -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
1478
        -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
1479
        -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
1480
        -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
1481
        -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
1482
        -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
1483
        -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
1484
        -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
1485
        -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
1486
    };
1487
    float *zlin = lins + 15*64;
1488
    const float *w = g_win;
1489
 
1490
    zlin[4*15]     = xl[18*16];
1491
    zlin[4*15 + 1] = xr[18*16];
1492
    zlin[4*15 + 2] = xl[0];
1493
    zlin[4*15 + 3] = xr[0];
1494
 
1495
    zlin[4*31]     = xl[1 + 18*16];
1496
    zlin[4*31 + 1] = xr[1 + 18*16];
1497
    zlin[4*31 + 2] = xl[1];
1498
    zlin[4*31 + 3] = xr[1];
1499
 
1500
    mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
1501
    mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
1502
    mp3d_synth_pair(dstl, nch, lins + 4*15);
1503
    mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
1504
 
1505
#if HAVE_SIMD
1506
    if (have_simd()) for (i = 14; i >= 0; i--)
1507
    {
1508
#define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
1509
#define V0(k) { VLOAD(k) b =         VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a =         VSUB(VMUL(vz, w0), VMUL(vy, w1));  }
1510
#define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
1511
#define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
1512
        f4 a, b;
1513
        zlin[4*i]     = xl[18*(31 - i)];
1514
        zlin[4*i + 1] = xr[18*(31 - i)];
1515
        zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1516
        zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1517
        zlin[4*i + 64] = xl[1 + 18*(1 + i)];
1518
        zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
1519
        zlin[4*i - 64 + 2] = xl[18*(1 + i)];
1520
        zlin[4*i - 64 + 3] = xr[18*(1 + i)];
1521
 
1522
        V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
1523
 
1524
        {
1525
#ifndef MINIMP3_FLOAT_OUTPUT
1526
#if HAVE_SSE
1527
            static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1528
            static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1529
            __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1530
                                           _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1531
            dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
1532
            dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);
1533
            dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);
1534
            dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);
1535
            dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);
1536
            dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);
1537
            dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);
1538
            dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);
1539
#else /* HAVE_SSE */
1540
            int16x4_t pcma, pcmb;
1541
            a = VADD(a, VSET(0.5f));
1542
            b = VADD(b, VSET(0.5f));
1543
            pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1544
            pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1545
            vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
1546
            vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
1547
            vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
1548
            vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
1549
            vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
1550
            vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
1551
            vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
1552
            vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
1553
#endif /* HAVE_SSE */
1554
 
1555
#else /* MINIMP3_FLOAT_OUTPUT */
1556
 
1557
            static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
1558
            a = VMUL(a, g_scale);
1559
            b = VMUL(b, g_scale);
1560
#if HAVE_SSE
1561
            _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1562
            _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1563
            _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1564
            _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1565
            _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1566
            _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1567
            _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1568
            _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1569
#else /* HAVE_SSE */
1570
            vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
1571
            vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
1572
            vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
1573
            vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
1574
            vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
1575
            vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
1576
            vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
1577
            vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
1578
#endif /* HAVE_SSE */
1579
#endif /* MINIMP3_FLOAT_OUTPUT */
1580
        }
1581
    } else
1582
#endif /* HAVE_SIMD */
1583
#ifdef MINIMP3_ONLY_SIMD
1584
    {}
1585
#else /* MINIMP3_ONLY_SIMD */
1586
    for (i = 14; i >= 0; i--)
1587
    {
1588
#define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
1589
#define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j]  = vz[j]*w1 + vy[j]*w0, a[j]  = vz[j]*w0 - vy[j]*w1; }
1590
#define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
1591
#define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
1592
        float a[4], b[4];
1593
 
1594
        zlin[4*i]     = xl[18*(31 - i)];
1595
        zlin[4*i + 1] = xr[18*(31 - i)];
1596
        zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1597
        zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1598
        zlin[4*(i + 16)]   = xl[1 + 18*(1 + i)];
1599
        zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
1600
        zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
1601
        zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
1602
 
1603
        S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
1604
 
1605
        dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
1606
        dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
1607
        dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
1608
        dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
1609
        dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
1610
        dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
1611
        dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
1612
        dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
1613
    }
1614
#endif /* MINIMP3_ONLY_SIMD */
1615
}
1616
 
1617
static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
1618
{
1619
    int i;
1620
    for (i = 0; i < nch; i++)
1621
    {
1622
        mp3d_DCT_II(grbuf + 576*i, nbands);
1623
    }
1624
 
1625
    memcpy(lins, qmf_state, sizeof(float)*15*64);
1626
 
1627
    for (i = 0; i < nbands; i += 2)
1628
    {
1629
        mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
1630
    }
1631
#ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
1632
    if (nch == 1)
1633
    {
1634
        for (i = 0; i < 15*64; i += 2)
1635
        {
1636
            qmf_state[i] = lins[nbands*64 + i];
1637
        }
1638
    } else
1639
#endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */
1640
    {
1641
        memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
1642
    }
1643
}
1644
 
1645
static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
1646
{
1647
    int i, nmatch;
1648
    for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
1649
    {
1650
        i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1651
        if (i + HDR_SIZE > mp3_bytes)
1652
            return nmatch > 0;
1653
        if (!hdr_compare(hdr, hdr + i))
1654
            return 0;
1655
    }
1656
    return 1;
1657
}
1658
 
1659
static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
1660
{
1661
    int i, k;
1662
    for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
1663
    {
1664
        if (hdr_valid(mp3))
1665
        {
1666
            int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1667
            int frame_and_padding = frame_bytes + hdr_padding(mp3);
1668
 
1669
            for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
1670
            {
1671
                if (hdr_compare(mp3, mp3 + k))
1672
                {
1673
                    int fb = k - hdr_padding(mp3);
1674
                    int nextfb = fb + hdr_padding(mp3 + k);
1675
                    if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1676
                        continue;
1677
                    frame_and_padding = k;
1678
                    frame_bytes = fb;
1679
                    *free_format_bytes = fb;
1680
                }
1681
            }
1682
            if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
1683
                mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
1684
                (!i && frame_and_padding == mp3_bytes))
1685
            {
1686
                *ptr_frame_bytes = frame_and_padding;
1687
                return i;
1688
            }
1689
            *free_format_bytes = 0;
1690
        }
1691
    }
1692
    *ptr_frame_bytes = 0;
1693
    return mp3_bytes;
1694
}
1695
 
1696
void mp3dec_init(mp3dec_t *dec)
1697
{
1698
    dec->header[0] = 0;
1699
}
1700
 
1701
int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
1702
{
1703
    int i = 0, igr, frame_size = 0, success = 1;
1704
    const uint8_t *hdr;
1705
    bs_t bs_frame[1];
1706
    mp3dec_scratch_t scratch;
1707
 
1708
    if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
1709
    {
1710
        frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
1711
        if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
1712
        {
1713
            frame_size = 0;
1714
        }
1715
    }
1716
    if (!frame_size)
1717
    {
1718
        memset(dec, 0, sizeof(mp3dec_t));
1719
        i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
1720
        if (!frame_size || i + frame_size > mp3_bytes)
1721
        {
1722
            info->frame_bytes = i;
1723
            return 0;
1724
        }
1725
    }
1726
 
1727
    hdr = mp3 + i;
1728
    memcpy(dec->header, hdr, HDR_SIZE);
1729
    info->frame_bytes = i + frame_size;
1730
    info->frame_offset = i;
1731
    info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
1732
    info->hz = hdr_sample_rate_hz(hdr);
1733
    info->layer = 4 - HDR_GET_LAYER(hdr);
1734
    info->bitrate_kbps = hdr_bitrate_kbps(hdr);
1735
 
1736
    if (!pcm)
1737
    {
1738
        return hdr_frame_samples(hdr);
1739
    }
1740
 
1741
    bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1742
    if (HDR_IS_CRC(hdr))
1743
    {
1744
        get_bits(bs_frame, 16);
1745
    }
1746
 
1747
    if (info->layer == 3)
1748
    {
1749
        int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
1750
        if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
1751
        {
1752
            mp3dec_init(dec);
1753
            return 0;
1754
        }
1755
        success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
1756
        if (success)
1757
        {
1758
            for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
1759
            {
1760
                memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1761
                L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
1762
                mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
1763
            }
1764
        }
1765
        L3_save_reservoir(dec, &scratch);
1766
    } else
1767
    {
1768
#ifdef MINIMP3_ONLY_MP3
1769
        return 0;
1770
#else /* MINIMP3_ONLY_MP3 */
1771
        L12_scale_info sci[1];
1772
        L12_read_scale_info(hdr, bs_frame, sci);
1773
 
1774
        memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1775
        for (i = 0, igr = 0; igr < 3; igr++)
1776
        {
1777
            if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
1778
            {
1779
                i = 0;
1780
                L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
1781
                mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
1782
                memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1783
                pcm += 384*info->channels;
1784
            }
1785
            if (bs_frame->pos > bs_frame->limit)
1786
            {
1787
                mp3dec_init(dec);
1788
                return 0;
1789
            }
1790
        }
1791
#endif /* MINIMP3_ONLY_MP3 */
1792
    }
1793
    return success*hdr_frame_samples(dec->header);
1794
}
1795
 
1796
#ifdef MINIMP3_FLOAT_OUTPUT
1797
void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
1798
{
1799
    int i = 0;
1800
#if HAVE_SIMD
1801
    int aligned_count = num_samples & ~7;
1802
    for(; i < aligned_count; i += 8)
1803
    {
1804
        static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
1805
        f4 a = VMUL(VLD(&in[i  ]), g_scale);
1806
        f4 b = VMUL(VLD(&in[i+4]), g_scale);
1807
#if HAVE_SSE
1808
        static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1809
        static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1810
        __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1811
                                       _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1812
        out[i  ] = _mm_extract_epi16(pcm8, 0);
1813
        out[i+1] = _mm_extract_epi16(pcm8, 1);
1814
        out[i+2] = _mm_extract_epi16(pcm8, 2);
1815
        out[i+3] = _mm_extract_epi16(pcm8, 3);
1816
        out[i+4] = _mm_extract_epi16(pcm8, 4);
1817
        out[i+5] = _mm_extract_epi16(pcm8, 5);
1818
        out[i+6] = _mm_extract_epi16(pcm8, 6);
1819
        out[i+7] = _mm_extract_epi16(pcm8, 7);
1820
#else /* HAVE_SSE */
1821
        int16x4_t pcma, pcmb;
1822
        a = VADD(a, VSET(0.5f));
1823
        b = VADD(b, VSET(0.5f));
1824
        pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1825
        pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1826
        vst1_lane_s16(out+i  , pcma, 0);
1827
        vst1_lane_s16(out+i+1, pcma, 1);
1828
        vst1_lane_s16(out+i+2, pcma, 2);
1829
        vst1_lane_s16(out+i+3, pcma, 3);
1830
        vst1_lane_s16(out+i+4, pcmb, 0);
1831
        vst1_lane_s16(out+i+5, pcmb, 1);
1832
        vst1_lane_s16(out+i+6, pcmb, 2);
1833
        vst1_lane_s16(out+i+7, pcmb, 3);
1834
#endif /* HAVE_SSE */
1835
    }
1836
#endif /* HAVE_SIMD */
1837
    for(; i < num_samples; i++)
1838
    {
1839
        float sample = in[i] * 32768.0f;
1840
        if (sample >=  32766.5)
1841
            out[i] = (int16_t) 32767;
1842
        else if (sample <= -32767.5)
1843
            out[i] = (int16_t)-32768;
1844
        else
1845
        {
1846
            int16_t s = (int16_t)(sample + .5f);
1847
            s -= (s < 0);   /* away from zero, to be compliant */
1848
            out[i] = s;
1849
        }
1850
    }
1851
}
1852
#endif /* MINIMP3_FLOAT_OUTPUT */
1853
#endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */