Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * ATRAC3 compatible decoder |
||
3 | * Copyright (c) 2006-2008 Maxim Poliakovski |
||
4 | * Copyright (c) 2006-2008 Benjamin Larsson |
||
5 | * |
||
6 | * This file is part of FFmpeg. |
||
7 | * |
||
8 | * FFmpeg is free software; you can redistribute it and/or |
||
9 | * modify it under the terms of the GNU Lesser General Public |
||
10 | * License as published by the Free Software Foundation; either |
||
11 | * version 2.1 of the License, or (at your option) any later version. |
||
12 | * |
||
13 | * FFmpeg is distributed in the hope that it will be useful, |
||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
16 | * Lesser General Public License for more details. |
||
17 | * |
||
18 | * You should have received a copy of the GNU Lesser General Public |
||
19 | * License along with FFmpeg; if not, write to the Free Software |
||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
21 | */ |
||
22 | |||
23 | /** |
||
24 | * @file |
||
25 | * ATRAC3 compatible decoder. |
||
26 | * This decoder handles Sony's ATRAC3 data. |
||
27 | * |
||
28 | * Container formats used to store ATRAC3 data: |
||
29 | * RealMedia (.rm), RIFF WAV (.wav, .at3), Sony OpenMG (.oma, .aa3). |
||
30 | * |
||
31 | * To use this decoder, a calling application must supply the extradata |
||
32 | * bytes provided in the containers above. |
||
33 | */ |
||
34 | |||
35 | #include |
||
36 | #include |
||
37 | #include |
||
38 | |||
39 | #include "libavutil/attributes.h" |
||
40 | #include "libavutil/float_dsp.h" |
||
41 | #include "libavutil/libm.h" |
||
42 | #include "avcodec.h" |
||
43 | #include "bytestream.h" |
||
44 | #include "fft.h" |
||
45 | #include "fmtconvert.h" |
||
46 | #include "get_bits.h" |
||
47 | #include "internal.h" |
||
48 | |||
49 | #include "atrac.h" |
||
50 | #include "atrac3data.h" |
||
51 | |||
52 | #define JOINT_STEREO 0x12 |
||
53 | #define STEREO 0x2 |
||
54 | |||
55 | #define SAMPLES_PER_FRAME 1024 |
||
56 | #define MDCT_SIZE 512 |
||
57 | |||
58 | typedef struct GainBlock { |
||
59 | AtracGainInfo g_block[4]; |
||
60 | } GainBlock; |
||
61 | |||
62 | typedef struct TonalComponent { |
||
63 | int pos; |
||
64 | int num_coefs; |
||
65 | float coef[8]; |
||
66 | } TonalComponent; |
||
67 | |||
68 | typedef struct ChannelUnit { |
||
69 | int bands_coded; |
||
70 | int num_components; |
||
71 | float prev_frame[SAMPLES_PER_FRAME]; |
||
72 | int gc_blk_switch; |
||
73 | TonalComponent components[64]; |
||
74 | GainBlock gain_block[2]; |
||
75 | |||
76 | DECLARE_ALIGNED(32, float, spectrum)[SAMPLES_PER_FRAME]; |
||
77 | DECLARE_ALIGNED(32, float, imdct_buf)[SAMPLES_PER_FRAME]; |
||
78 | |||
79 | float delay_buf1[46]; /// |
||
80 | float delay_buf2[46]; |
||
81 | float delay_buf3[46]; |
||
82 | } ChannelUnit; |
||
83 | |||
84 | typedef struct ATRAC3Context { |
||
85 | GetBitContext gb; |
||
86 | //@{ |
||
87 | /** stream data */ |
||
88 | int coding_mode; |
||
89 | |||
90 | ChannelUnit *units; |
||
91 | //@} |
||
92 | //@{ |
||
93 | /** joint-stereo related variables */ |
||
94 | int matrix_coeff_index_prev[4]; |
||
95 | int matrix_coeff_index_now[4]; |
||
96 | int matrix_coeff_index_next[4]; |
||
97 | int weighting_delay[6]; |
||
98 | //@} |
||
99 | //@{ |
||
100 | /** data buffers */ |
||
101 | uint8_t *decoded_bytes_buffer; |
||
102 | float temp_buf[1070]; |
||
103 | //@} |
||
104 | //@{ |
||
105 | /** extradata */ |
||
106 | int scrambled_stream; |
||
107 | //@} |
||
108 | |||
109 | AtracGCContext gainc_ctx; |
||
110 | FFTContext mdct_ctx; |
||
111 | FmtConvertContext fmt_conv; |
||
112 | AVFloatDSPContext fdsp; |
||
113 | } ATRAC3Context; |
||
114 | |||
115 | static DECLARE_ALIGNED(32, float, mdct_window)[MDCT_SIZE]; |
||
116 | static VLC_TYPE atrac3_vlc_table[4096][2]; |
||
117 | static VLC spectral_coeff_tab[7]; |
||
118 | |||
119 | /** |
||
120 | * Regular 512 points IMDCT without overlapping, with the exception of the |
||
121 | * swapping of odd bands caused by the reverse spectra of the QMF. |
||
122 | * |
||
123 | * @param odd_band 1 if the band is an odd band |
||
124 | */ |
||
125 | static void imlt(ATRAC3Context *q, float *input, float *output, int odd_band) |
||
126 | { |
||
127 | int i; |
||
128 | |||
129 | if (odd_band) { |
||
130 | /** |
||
131 | * Reverse the odd bands before IMDCT, this is an effect of the QMF |
||
132 | * transform or it gives better compression to do it this way. |
||
133 | * FIXME: It should be possible to handle this in imdct_calc |
||
134 | * for that to happen a modification of the prerotation step of |
||
135 | * all SIMD code and C code is needed. |
||
136 | * Or fix the functions before so they generate a pre reversed spectrum. |
||
137 | */ |
||
138 | for (i = 0; i < 128; i++) |
||
139 | FFSWAP(float, input[i], input[255 - i]); |
||
140 | } |
||
141 | |||
142 | q->mdct_ctx.imdct_calc(&q->mdct_ctx, output, input); |
||
143 | |||
144 | /* Perform windowing on the output. */ |
||
145 | q->fdsp.vector_fmul(output, output, mdct_window, MDCT_SIZE); |
||
146 | } |
||
147 | |||
148 | /* |
||
149 | * indata descrambling, only used for data coming from the rm container |
||
150 | */ |
||
151 | static int decode_bytes(const uint8_t *input, uint8_t *out, int bytes) |
||
152 | { |
||
153 | int i, off; |
||
154 | uint32_t c; |
||
155 | const uint32_t *buf; |
||
156 | uint32_t *output = (uint32_t *)out; |
||
157 | |||
158 | off = (intptr_t)input & 3; |
||
159 | buf = (const uint32_t *)(input - off); |
||
160 | if (off) |
||
161 | c = av_be2ne32((0x537F6103U >> (off * 8)) | (0x537F6103U << (32 - (off * 8)))); |
||
162 | else |
||
163 | c = av_be2ne32(0x537F6103U); |
||
164 | bytes += 3 + off; |
||
165 | for (i = 0; i < bytes / 4; i++) |
||
166 | output[i] = c ^ buf[i]; |
||
167 | |||
168 | if (off) |
||
169 | avpriv_request_sample(NULL, "Offset of %d", off); |
||
170 | |||
171 | return off; |
||
172 | } |
||
173 | |||
174 | static av_cold void init_imdct_window(void) |
||
175 | { |
||
176 | int i, j; |
||
177 | |||
178 | /* generate the mdct window, for details see |
||
179 | * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */ |
||
180 | for (i = 0, j = 255; i < 128; i++, j--) { |
||
181 | float wi = sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0; |
||
182 | float wj = sin(((j + 0.5) / 256.0 - 0.5) * M_PI) + 1.0; |
||
183 | float w = 0.5 * (wi * wi + wj * wj); |
||
184 | mdct_window[i] = mdct_window[511 - i] = wi / w; |
||
185 | mdct_window[j] = mdct_window[511 - j] = wj / w; |
||
186 | } |
||
187 | } |
||
188 | |||
189 | static av_cold int atrac3_decode_close(AVCodecContext *avctx) |
||
190 | { |
||
191 | ATRAC3Context *q = avctx->priv_data; |
||
192 | |||
193 | av_free(q->units); |
||
194 | av_free(q->decoded_bytes_buffer); |
||
195 | |||
196 | ff_mdct_end(&q->mdct_ctx); |
||
197 | |||
198 | return 0; |
||
199 | } |
||
200 | |||
201 | /** |
||
202 | * Mantissa decoding |
||
203 | * |
||
204 | * @param selector which table the output values are coded with |
||
205 | * @param coding_flag constant length coding or variable length coding |
||
206 | * @param mantissas mantissa output table |
||
207 | * @param num_codes number of values to get |
||
208 | */ |
||
209 | static void read_quant_spectral_coeffs(GetBitContext *gb, int selector, |
||
210 | int coding_flag, int *mantissas, |
||
211 | int num_codes) |
||
212 | { |
||
213 | int i, code, huff_symb; |
||
214 | |||
215 | if (selector == 1) |
||
216 | num_codes /= 2; |
||
217 | |||
218 | if (coding_flag != 0) { |
||
219 | /* constant length coding (CLC) */ |
||
220 | int num_bits = clc_length_tab[selector]; |
||
221 | |||
222 | if (selector > 1) { |
||
223 | for (i = 0; i < num_codes; i++) { |
||
224 | if (num_bits) |
||
225 | code = get_sbits(gb, num_bits); |
||
226 | else |
||
227 | code = 0; |
||
228 | mantissas[i] = code; |
||
229 | } |
||
230 | } else { |
||
231 | for (i = 0; i < num_codes; i++) { |
||
232 | if (num_bits) |
||
233 | code = get_bits(gb, num_bits); // num_bits is always 4 in this case |
||
234 | else |
||
235 | code = 0; |
||
236 | mantissas[i * 2 ] = mantissa_clc_tab[code >> 2]; |
||
237 | mantissas[i * 2 + 1] = mantissa_clc_tab[code & 3]; |
||
238 | } |
||
239 | } |
||
240 | } else { |
||
241 | /* variable length coding (VLC) */ |
||
242 | if (selector != 1) { |
||
243 | for (i = 0; i < num_codes; i++) { |
||
244 | huff_symb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, |
||
245 | spectral_coeff_tab[selector-1].bits, 3); |
||
246 | huff_symb += 1; |
||
247 | code = huff_symb >> 1; |
||
248 | if (huff_symb & 1) |
||
249 | code = -code; |
||
250 | mantissas[i] = code; |
||
251 | } |
||
252 | } else { |
||
253 | for (i = 0; i < num_codes; i++) { |
||
254 | huff_symb = get_vlc2(gb, spectral_coeff_tab[selector - 1].table, |
||
255 | spectral_coeff_tab[selector - 1].bits, 3); |
||
256 | mantissas[i * 2 ] = mantissa_vlc_tab[huff_symb * 2 ]; |
||
257 | mantissas[i * 2 + 1] = mantissa_vlc_tab[huff_symb * 2 + 1]; |
||
258 | } |
||
259 | } |
||
260 | } |
||
261 | } |
||
262 | |||
263 | /** |
||
264 | * Restore the quantized band spectrum coefficients |
||
265 | * |
||
266 | * @return subband count, fix for broken specification/files |
||
267 | */ |
||
268 | static int decode_spectrum(GetBitContext *gb, float *output) |
||
269 | { |
||
270 | int num_subbands, coding_mode, i, j, first, last, subband_size; |
||
271 | int subband_vlc_index[32], sf_index[32]; |
||
272 | int mantissas[128]; |
||
273 | float scale_factor; |
||
274 | |||
275 | num_subbands = get_bits(gb, 5); // number of coded subbands |
||
276 | coding_mode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC |
||
277 | |||
278 | /* get the VLC selector table for the subbands, 0 means not coded */ |
||
279 | for (i = 0; i <= num_subbands; i++) |
||
280 | subband_vlc_index[i] = get_bits(gb, 3); |
||
281 | |||
282 | /* read the scale factor indexes from the stream */ |
||
283 | for (i = 0; i <= num_subbands; i++) { |
||
284 | if (subband_vlc_index[i] != 0) |
||
285 | sf_index[i] = get_bits(gb, 6); |
||
286 | } |
||
287 | |||
288 | for (i = 0; i <= num_subbands; i++) { |
||
289 | first = subband_tab[i ]; |
||
290 | last = subband_tab[i + 1]; |
||
291 | |||
292 | subband_size = last - first; |
||
293 | |||
294 | if (subband_vlc_index[i] != 0) { |
||
295 | /* decode spectral coefficients for this subband */ |
||
296 | /* TODO: This can be done faster is several blocks share the |
||
297 | * same VLC selector (subband_vlc_index) */ |
||
298 | read_quant_spectral_coeffs(gb, subband_vlc_index[i], coding_mode, |
||
299 | mantissas, subband_size); |
||
300 | |||
301 | /* decode the scale factor for this subband */ |
||
302 | scale_factor = ff_atrac_sf_table[sf_index[i]] * |
||
303 | inv_max_quant[subband_vlc_index[i]]; |
||
304 | |||
305 | /* inverse quantize the coefficients */ |
||
306 | for (j = 0; first < last; first++, j++) |
||
307 | output[first] = mantissas[j] * scale_factor; |
||
308 | } else { |
||
309 | /* this subband was not coded, so zero the entire subband */ |
||
310 | memset(output + first, 0, subband_size * sizeof(*output)); |
||
311 | } |
||
312 | } |
||
313 | |||
314 | /* clear the subbands that were not coded */ |
||
315 | first = subband_tab[i]; |
||
316 | memset(output + first, 0, (SAMPLES_PER_FRAME - first) * sizeof(*output)); |
||
317 | return num_subbands; |
||
318 | } |
||
319 | |||
320 | /** |
||
321 | * Restore the quantized tonal components |
||
322 | * |
||
323 | * @param components tonal components |
||
324 | * @param num_bands number of coded bands |
||
325 | */ |
||
326 | static int decode_tonal_components(GetBitContext *gb, |
||
327 | TonalComponent *components, int num_bands) |
||
328 | { |
||
329 | int i, b, c, m; |
||
330 | int nb_components, coding_mode_selector, coding_mode; |
||
331 | int band_flags[4], mantissa[8]; |
||
332 | int component_count = 0; |
||
333 | |||
334 | nb_components = get_bits(gb, 5); |
||
335 | |||
336 | /* no tonal components */ |
||
337 | if (nb_components == 0) |
||
338 | return 0; |
||
339 | |||
340 | coding_mode_selector = get_bits(gb, 2); |
||
341 | if (coding_mode_selector == 2) |
||
342 | return AVERROR_INVALIDDATA; |
||
343 | |||
344 | coding_mode = coding_mode_selector & 1; |
||
345 | |||
346 | for (i = 0; i < nb_components; i++) { |
||
347 | int coded_values_per_component, quant_step_index; |
||
348 | |||
349 | for (b = 0; b <= num_bands; b++) |
||
350 | band_flags[b] = get_bits1(gb); |
||
351 | |||
352 | coded_values_per_component = get_bits(gb, 3); |
||
353 | |||
354 | quant_step_index = get_bits(gb, 3); |
||
355 | if (quant_step_index <= 1) |
||
356 | return AVERROR_INVALIDDATA; |
||
357 | |||
358 | if (coding_mode_selector == 3) |
||
359 | coding_mode = get_bits1(gb); |
||
360 | |||
361 | for (b = 0; b < (num_bands + 1) * 4; b++) { |
||
362 | int coded_components; |
||
363 | |||
364 | if (band_flags[b >> 2] == 0) |
||
365 | continue; |
||
366 | |||
367 | coded_components = get_bits(gb, 3); |
||
368 | |||
369 | for (c = 0; c < coded_components; c++) { |
||
370 | TonalComponent *cmp = &components[component_count]; |
||
371 | int sf_index, coded_values, max_coded_values; |
||
372 | float scale_factor; |
||
373 | |||
374 | sf_index = get_bits(gb, 6); |
||
375 | if (component_count >= 64) |
||
376 | return AVERROR_INVALIDDATA; |
||
377 | |||
378 | cmp->pos = b * 64 + get_bits(gb, 6); |
||
379 | |||
380 | max_coded_values = SAMPLES_PER_FRAME - cmp->pos; |
||
381 | coded_values = coded_values_per_component + 1; |
||
382 | coded_values = FFMIN(max_coded_values, coded_values); |
||
383 | |||
384 | scale_factor = ff_atrac_sf_table[sf_index] * |
||
385 | inv_max_quant[quant_step_index]; |
||
386 | |||
387 | read_quant_spectral_coeffs(gb, quant_step_index, coding_mode, |
||
388 | mantissa, coded_values); |
||
389 | |||
390 | cmp->num_coefs = coded_values; |
||
391 | |||
392 | /* inverse quant */ |
||
393 | for (m = 0; m < coded_values; m++) |
||
394 | cmp->coef[m] = mantissa[m] * scale_factor; |
||
395 | |||
396 | component_count++; |
||
397 | } |
||
398 | } |
||
399 | } |
||
400 | |||
401 | return component_count; |
||
402 | } |
||
403 | |||
404 | /** |
||
405 | * Decode gain parameters for the coded bands |
||
406 | * |
||
407 | * @param block the gainblock for the current band |
||
408 | * @param num_bands amount of coded bands |
||
409 | */ |
||
410 | static int decode_gain_control(GetBitContext *gb, GainBlock *block, |
||
411 | int num_bands) |
||
412 | { |
||
413 | int b, j; |
||
414 | int *level, *loc; |
||
415 | |||
416 | AtracGainInfo *gain = block->g_block; |
||
417 | |||
418 | for (b = 0; b <= num_bands; b++) { |
||
419 | gain[b].num_points = get_bits(gb, 3); |
||
420 | level = gain[b].lev_code; |
||
421 | loc = gain[b].loc_code; |
||
422 | |||
423 | for (j = 0; j < gain[b].num_points; j++) { |
||
424 | level[j] = get_bits(gb, 4); |
||
425 | loc[j] = get_bits(gb, 5); |
||
426 | if (j && loc[j] <= loc[j - 1]) |
||
427 | return AVERROR_INVALIDDATA; |
||
428 | } |
||
429 | } |
||
430 | |||
431 | /* Clear the unused blocks. */ |
||
432 | for (; b < 4 ; b++) |
||
433 | gain[b].num_points = 0; |
||
434 | |||
435 | return 0; |
||
436 | } |
||
437 | |||
438 | /** |
||
439 | * Combine the tonal band spectrum and regular band spectrum |
||
440 | * |
||
441 | * @param spectrum output spectrum buffer |
||
442 | * @param num_components number of tonal components |
||
443 | * @param components tonal components for this band |
||
444 | * @return position of the last tonal coefficient |
||
445 | */ |
||
446 | static int add_tonal_components(float *spectrum, int num_components, |
||
447 | TonalComponent *components) |
||
448 | { |
||
449 | int i, j, last_pos = -1; |
||
450 | float *input, *output; |
||
451 | |||
452 | for (i = 0; i < num_components; i++) { |
||
453 | last_pos = FFMAX(components[i].pos + components[i].num_coefs, last_pos); |
||
454 | input = components[i].coef; |
||
455 | output = &spectrum[components[i].pos]; |
||
456 | |||
457 | for (j = 0; j < components[i].num_coefs; j++) |
||
458 | output[j] += input[j]; |
||
459 | } |
||
460 | |||
461 | return last_pos; |
||
462 | } |
||
463 | |||
464 | #define INTERPOLATE(old, new, nsample) \ |
||
465 | ((old) + (nsample) * 0.125 * ((new) - (old))) |
||
466 | |||
467 | static void reverse_matrixing(float *su1, float *su2, int *prev_code, |
||
468 | int *curr_code) |
||
469 | { |
||
470 | int i, nsample, band; |
||
471 | float mc1_l, mc1_r, mc2_l, mc2_r; |
||
472 | |||
473 | for (i = 0, band = 0; band < 4 * 256; band += 256, i++) { |
||
474 | int s1 = prev_code[i]; |
||
475 | int s2 = curr_code[i]; |
||
476 | nsample = band; |
||
477 | |||
478 | if (s1 != s2) { |
||
479 | /* Selector value changed, interpolation needed. */ |
||
480 | mc1_l = matrix_coeffs[s1 * 2 ]; |
||
481 | mc1_r = matrix_coeffs[s1 * 2 + 1]; |
||
482 | mc2_l = matrix_coeffs[s2 * 2 ]; |
||
483 | mc2_r = matrix_coeffs[s2 * 2 + 1]; |
||
484 | |||
485 | /* Interpolation is done over the first eight samples. */ |
||
486 | for (; nsample < band + 8; nsample++) { |
||
487 | float c1 = su1[nsample]; |
||
488 | float c2 = su2[nsample]; |
||
489 | c2 = c1 * INTERPOLATE(mc1_l, mc2_l, nsample - band) + |
||
490 | c2 * INTERPOLATE(mc1_r, mc2_r, nsample - band); |
||
491 | su1[nsample] = c2; |
||
492 | su2[nsample] = c1 * 2.0 - c2; |
||
493 | } |
||
494 | } |
||
495 | |||
496 | /* Apply the matrix without interpolation. */ |
||
497 | switch (s2) { |
||
498 | case 0: /* M/S decoding */ |
||
499 | for (; nsample < band + 256; nsample++) { |
||
500 | float c1 = su1[nsample]; |
||
501 | float c2 = su2[nsample]; |
||
502 | su1[nsample] = c2 * 2.0; |
||
503 | su2[nsample] = (c1 - c2) * 2.0; |
||
504 | } |
||
505 | break; |
||
506 | case 1: |
||
507 | for (; nsample < band + 256; nsample++) { |
||
508 | float c1 = su1[nsample]; |
||
509 | float c2 = su2[nsample]; |
||
510 | su1[nsample] = (c1 + c2) * 2.0; |
||
511 | su2[nsample] = c2 * -2.0; |
||
512 | } |
||
513 | break; |
||
514 | case 2: |
||
515 | case 3: |
||
516 | for (; nsample < band + 256; nsample++) { |
||
517 | float c1 = su1[nsample]; |
||
518 | float c2 = su2[nsample]; |
||
519 | su1[nsample] = c1 + c2; |
||
520 | su2[nsample] = c1 - c2; |
||
521 | } |
||
522 | break; |
||
523 | default: |
||
524 | av_assert1(0); |
||
525 | } |
||
526 | } |
||
527 | } |
||
528 | |||
529 | static void get_channel_weights(int index, int flag, float ch[2]) |
||
530 | { |
||
531 | if (index == 7) { |
||
532 | ch[0] = 1.0; |
||
533 | ch[1] = 1.0; |
||
534 | } else { |
||
535 | ch[0] = (index & 7) / 7.0; |
||
536 | ch[1] = sqrt(2 - ch[0] * ch[0]); |
||
537 | if (flag) |
||
538 | FFSWAP(float, ch[0], ch[1]); |
||
539 | } |
||
540 | } |
||
541 | |||
542 | static void channel_weighting(float *su1, float *su2, int *p3) |
||
543 | { |
||
544 | int band, nsample; |
||
545 | /* w[x][y] y=0 is left y=1 is right */ |
||
546 | float w[2][2]; |
||
547 | |||
548 | if (p3[1] != 7 || p3[3] != 7) { |
||
549 | get_channel_weights(p3[1], p3[0], w[0]); |
||
550 | get_channel_weights(p3[3], p3[2], w[1]); |
||
551 | |||
552 | for (band = 256; band < 4 * 256; band += 256) { |
||
553 | for (nsample = band; nsample < band + 8; nsample++) { |
||
554 | su1[nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample - band); |
||
555 | su2[nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample - band); |
||
556 | } |
||
557 | for(; nsample < band + 256; nsample++) { |
||
558 | su1[nsample] *= w[1][0]; |
||
559 | su2[nsample] *= w[1][1]; |
||
560 | } |
||
561 | } |
||
562 | } |
||
563 | } |
||
564 | |||
565 | /** |
||
566 | * Decode a Sound Unit |
||
567 | * |
||
568 | * @param snd the channel unit to be used |
||
569 | * @param output the decoded samples before IQMF in float representation |
||
570 | * @param channel_num channel number |
||
571 | * @param coding_mode the coding mode (JOINT_STEREO or regular stereo/mono) |
||
572 | */ |
||
573 | static int decode_channel_sound_unit(ATRAC3Context *q, GetBitContext *gb, |
||
574 | ChannelUnit *snd, float *output, |
||
575 | int channel_num, int coding_mode) |
||
576 | { |
||
577 | int band, ret, num_subbands, last_tonal, num_bands; |
||
578 | GainBlock *gain1 = &snd->gain_block[ snd->gc_blk_switch]; |
||
579 | GainBlock *gain2 = &snd->gain_block[1 - snd->gc_blk_switch]; |
||
580 | |||
581 | if (coding_mode == JOINT_STEREO && channel_num == 1) { |
||
582 | if (get_bits(gb, 2) != 3) { |
||
583 | av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); |
||
584 | return AVERROR_INVALIDDATA; |
||
585 | } |
||
586 | } else { |
||
587 | if (get_bits(gb, 6) != 0x28) { |
||
588 | av_log(NULL,AV_LOG_ERROR,"Sound Unit id != 0x28.\n"); |
||
589 | return AVERROR_INVALIDDATA; |
||
590 | } |
||
591 | } |
||
592 | |||
593 | /* number of coded QMF bands */ |
||
594 | snd->bands_coded = get_bits(gb, 2); |
||
595 | |||
596 | ret = decode_gain_control(gb, gain2, snd->bands_coded); |
||
597 | if (ret) |
||
598 | return ret; |
||
599 | |||
600 | snd->num_components = decode_tonal_components(gb, snd->components, |
||
601 | snd->bands_coded); |
||
602 | if (snd->num_components < 0) |
||
603 | return snd->num_components; |
||
604 | |||
605 | num_subbands = decode_spectrum(gb, snd->spectrum); |
||
606 | |||
607 | /* Merge the decoded spectrum and tonal components. */ |
||
608 | last_tonal = add_tonal_components(snd->spectrum, snd->num_components, |
||
609 | snd->components); |
||
610 | |||
611 | |||
612 | /* calculate number of used MLT/QMF bands according to the amount of coded |
||
613 | spectral lines */ |
||
614 | num_bands = (subband_tab[num_subbands] - 1) >> 8; |
||
615 | if (last_tonal >= 0) |
||
616 | num_bands = FFMAX((last_tonal + 256) >> 8, num_bands); |
||
617 | |||
618 | |||
619 | /* Reconstruct time domain samples. */ |
||
620 | for (band = 0; band < 4; band++) { |
||
621 | /* Perform the IMDCT step without overlapping. */ |
||
622 | if (band <= num_bands) |
||
623 | imlt(q, &snd->spectrum[band * 256], snd->imdct_buf, band & 1); |
||
624 | else |
||
625 | memset(snd->imdct_buf, 0, 512 * sizeof(*snd->imdct_buf)); |
||
626 | |||
627 | /* gain compensation and overlapping */ |
||
628 | ff_atrac_gain_compensation(&q->gainc_ctx, snd->imdct_buf, |
||
629 | &snd->prev_frame[band * 256], |
||
630 | &gain1->g_block[band], &gain2->g_block[band], |
||
631 | 256, &output[band * 256]); |
||
632 | } |
||
633 | |||
634 | /* Swap the gain control buffers for the next frame. */ |
||
635 | snd->gc_blk_switch ^= 1; |
||
636 | |||
637 | return 0; |
||
638 | } |
||
639 | |||
640 | static int decode_frame(AVCodecContext *avctx, const uint8_t *databuf, |
||
641 | float **out_samples) |
||
642 | { |
||
643 | ATRAC3Context *q = avctx->priv_data; |
||
644 | int ret, i; |
||
645 | uint8_t *ptr1; |
||
646 | |||
647 | if (q->coding_mode == JOINT_STEREO) { |
||
648 | /* channel coupling mode */ |
||
649 | /* decode Sound Unit 1 */ |
||
650 | init_get_bits(&q->gb, databuf, avctx->block_align * 8); |
||
651 | |||
652 | ret = decode_channel_sound_unit(q, &q->gb, q->units, out_samples[0], 0, |
||
653 | JOINT_STEREO); |
||
654 | if (ret != 0) |
||
655 | return ret; |
||
656 | |||
657 | /* Framedata of the su2 in the joint-stereo mode is encoded in |
||
658 | * reverse byte order so we need to swap it first. */ |
||
659 | if (databuf == q->decoded_bytes_buffer) { |
||
660 | uint8_t *ptr2 = q->decoded_bytes_buffer + avctx->block_align - 1; |
||
661 | ptr1 = q->decoded_bytes_buffer; |
||
662 | for (i = 0; i < avctx->block_align / 2; i++, ptr1++, ptr2--) |
||
663 | FFSWAP(uint8_t, *ptr1, *ptr2); |
||
664 | } else { |
||
665 | const uint8_t *ptr2 = databuf + avctx->block_align - 1; |
||
666 | for (i = 0; i < avctx->block_align; i++) |
||
667 | q->decoded_bytes_buffer[i] = *ptr2--; |
||
668 | } |
||
669 | |||
670 | /* Skip the sync codes (0xF8). */ |
||
671 | ptr1 = q->decoded_bytes_buffer; |
||
672 | for (i = 4; *ptr1 == 0xF8; i++, ptr1++) { |
||
673 | if (i >= avctx->block_align) |
||
674 | return AVERROR_INVALIDDATA; |
||
675 | } |
||
676 | |||
677 | |||
678 | /* set the bitstream reader at the start of the second Sound Unit*/ |
||
679 | init_get_bits8(&q->gb, ptr1, q->decoded_bytes_buffer + avctx->block_align - ptr1); |
||
680 | |||
681 | /* Fill the Weighting coeffs delay buffer */ |
||
682 | memmove(q->weighting_delay, &q->weighting_delay[2], |
||
683 | 4 * sizeof(*q->weighting_delay)); |
||
684 | q->weighting_delay[4] = get_bits1(&q->gb); |
||
685 | q->weighting_delay[5] = get_bits(&q->gb, 3); |
||
686 | |||
687 | for (i = 0; i < 4; i++) { |
||
688 | q->matrix_coeff_index_prev[i] = q->matrix_coeff_index_now[i]; |
||
689 | q->matrix_coeff_index_now[i] = q->matrix_coeff_index_next[i]; |
||
690 | q->matrix_coeff_index_next[i] = get_bits(&q->gb, 2); |
||
691 | } |
||
692 | |||
693 | /* Decode Sound Unit 2. */ |
||
694 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[1], |
||
695 | out_samples[1], 1, JOINT_STEREO); |
||
696 | if (ret != 0) |
||
697 | return ret; |
||
698 | |||
699 | /* Reconstruct the channel coefficients. */ |
||
700 | reverse_matrixing(out_samples[0], out_samples[1], |
||
701 | q->matrix_coeff_index_prev, |
||
702 | q->matrix_coeff_index_now); |
||
703 | |||
704 | channel_weighting(out_samples[0], out_samples[1], q->weighting_delay); |
||
705 | } else { |
||
706 | /* normal stereo mode or mono */ |
||
707 | /* Decode the channel sound units. */ |
||
708 | for (i = 0; i < avctx->channels; i++) { |
||
709 | /* Set the bitstream reader at the start of a channel sound unit. */ |
||
710 | init_get_bits(&q->gb, |
||
711 | databuf + i * avctx->block_align / avctx->channels, |
||
712 | avctx->block_align * 8 / avctx->channels); |
||
713 | |||
714 | ret = decode_channel_sound_unit(q, &q->gb, &q->units[i], |
||
715 | out_samples[i], i, q->coding_mode); |
||
716 | if (ret != 0) |
||
717 | return ret; |
||
718 | } |
||
719 | } |
||
720 | |||
721 | /* Apply the iQMF synthesis filter. */ |
||
722 | for (i = 0; i < avctx->channels; i++) { |
||
723 | float *p1 = out_samples[i]; |
||
724 | float *p2 = p1 + 256; |
||
725 | float *p3 = p2 + 256; |
||
726 | float *p4 = p3 + 256; |
||
727 | ff_atrac_iqmf(p1, p2, 256, p1, q->units[i].delay_buf1, q->temp_buf); |
||
728 | ff_atrac_iqmf(p4, p3, 256, p3, q->units[i].delay_buf2, q->temp_buf); |
||
729 | ff_atrac_iqmf(p1, p3, 512, p1, q->units[i].delay_buf3, q->temp_buf); |
||
730 | } |
||
731 | |||
732 | return 0; |
||
733 | } |
||
734 | |||
735 | static int atrac3_decode_frame(AVCodecContext *avctx, void *data, |
||
736 | int *got_frame_ptr, AVPacket *avpkt) |
||
737 | { |
||
738 | AVFrame *frame = data; |
||
739 | const uint8_t *buf = avpkt->data; |
||
740 | int buf_size = avpkt->size; |
||
741 | ATRAC3Context *q = avctx->priv_data; |
||
742 | int ret; |
||
743 | const uint8_t *databuf; |
||
744 | |||
745 | if (buf_size < avctx->block_align) { |
||
746 | av_log(avctx, AV_LOG_ERROR, |
||
747 | "Frame too small (%d bytes). Truncated file?\n", buf_size); |
||
748 | return AVERROR_INVALIDDATA; |
||
749 | } |
||
750 | |||
751 | /* get output buffer */ |
||
752 | frame->nb_samples = SAMPLES_PER_FRAME; |
||
753 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
||
754 | return ret; |
||
755 | |||
756 | /* Check if we need to descramble and what buffer to pass on. */ |
||
757 | if (q->scrambled_stream) { |
||
758 | decode_bytes(buf, q->decoded_bytes_buffer, avctx->block_align); |
||
759 | databuf = q->decoded_bytes_buffer; |
||
760 | } else { |
||
761 | databuf = buf; |
||
762 | } |
||
763 | |||
764 | ret = decode_frame(avctx, databuf, (float **)frame->extended_data); |
||
765 | if (ret) { |
||
766 | av_log(NULL, AV_LOG_ERROR, "Frame decoding error!\n"); |
||
767 | return ret; |
||
768 | } |
||
769 | |||
770 | *got_frame_ptr = 1; |
||
771 | |||
772 | return avctx->block_align; |
||
773 | } |
||
774 | |||
775 | static av_cold void atrac3_init_static_data(void) |
||
776 | { |
||
777 | int i; |
||
778 | |||
779 | init_imdct_window(); |
||
780 | ff_atrac_generate_tables(); |
||
781 | |||
782 | /* Initialize the VLC tables. */ |
||
783 | for (i = 0; i < 7; i++) { |
||
784 | spectral_coeff_tab[i].table = &atrac3_vlc_table[atrac3_vlc_offs[i]]; |
||
785 | spectral_coeff_tab[i].table_allocated = atrac3_vlc_offs[i + 1] - |
||
786 | atrac3_vlc_offs[i ]; |
||
787 | init_vlc(&spectral_coeff_tab[i], 9, huff_tab_sizes[i], |
||
788 | huff_bits[i], 1, 1, |
||
789 | huff_codes[i], 1, 1, INIT_VLC_USE_NEW_STATIC); |
||
790 | } |
||
791 | } |
||
792 | |||
793 | static av_cold int atrac3_decode_init(AVCodecContext *avctx) |
||
794 | { |
||
795 | static int static_init_done; |
||
796 | int i, ret; |
||
797 | int version, delay, samples_per_frame, frame_factor; |
||
798 | const uint8_t *edata_ptr = avctx->extradata; |
||
799 | ATRAC3Context *q = avctx->priv_data; |
||
800 | |||
801 | if (avctx->channels <= 0 || avctx->channels > 2) { |
||
802 | av_log(avctx, AV_LOG_ERROR, "Channel configuration error!\n"); |
||
803 | return AVERROR(EINVAL); |
||
804 | } |
||
805 | |||
806 | if (!static_init_done) |
||
807 | atrac3_init_static_data(); |
||
808 | static_init_done = 1; |
||
809 | |||
810 | /* Take care of the codec-specific extradata. */ |
||
811 | if (avctx->extradata_size == 14) { |
||
812 | /* Parse the extradata, WAV format */ |
||
813 | av_log(avctx, AV_LOG_DEBUG, "[0-1] %d\n", |
||
814 | bytestream_get_le16(&edata_ptr)); // Unknown value always 1 |
||
815 | edata_ptr += 4; // samples per channel |
||
816 | q->coding_mode = bytestream_get_le16(&edata_ptr); |
||
817 | av_log(avctx, AV_LOG_DEBUG,"[8-9] %d\n", |
||
818 | bytestream_get_le16(&edata_ptr)); //Dupe of coding mode |
||
819 | frame_factor = bytestream_get_le16(&edata_ptr); // Unknown always 1 |
||
820 | av_log(avctx, AV_LOG_DEBUG,"[12-13] %d\n", |
||
821 | bytestream_get_le16(&edata_ptr)); // Unknown always 0 |
||
822 | |||
823 | /* setup */ |
||
824 | samples_per_frame = SAMPLES_PER_FRAME * avctx->channels; |
||
825 | version = 4; |
||
826 | delay = 0x88E; |
||
827 | q->coding_mode = q->coding_mode ? JOINT_STEREO : STEREO; |
||
828 | q->scrambled_stream = 0; |
||
829 | |||
830 | if (avctx->block_align != 96 * avctx->channels * frame_factor && |
||
831 | avctx->block_align != 152 * avctx->channels * frame_factor && |
||
832 | avctx->block_align != 192 * avctx->channels * frame_factor) { |
||
833 | av_log(avctx, AV_LOG_ERROR, "Unknown frame/channel/frame_factor " |
||
834 | "configuration %d/%d/%d\n", avctx->block_align, |
||
835 | avctx->channels, frame_factor); |
||
836 | return AVERROR_INVALIDDATA; |
||
837 | } |
||
838 | } else if (avctx->extradata_size == 12 || avctx->extradata_size == 10) { |
||
839 | /* Parse the extradata, RM format. */ |
||
840 | version = bytestream_get_be32(&edata_ptr); |
||
841 | samples_per_frame = bytestream_get_be16(&edata_ptr); |
||
842 | delay = bytestream_get_be16(&edata_ptr); |
||
843 | q->coding_mode = bytestream_get_be16(&edata_ptr); |
||
844 | q->scrambled_stream = 1; |
||
845 | |||
846 | } else { |
||
847 | av_log(NULL, AV_LOG_ERROR, "Unknown extradata size %d.\n", |
||
848 | avctx->extradata_size); |
||
849 | return AVERROR(EINVAL); |
||
850 | } |
||
851 | |||
852 | /* Check the extradata */ |
||
853 | |||
854 | if (version != 4) { |
||
855 | av_log(avctx, AV_LOG_ERROR, "Version %d != 4.\n", version); |
||
856 | return AVERROR_INVALIDDATA; |
||
857 | } |
||
858 | |||
859 | if (samples_per_frame != SAMPLES_PER_FRAME && |
||
860 | samples_per_frame != SAMPLES_PER_FRAME * 2) { |
||
861 | av_log(avctx, AV_LOG_ERROR, "Unknown amount of samples per frame %d.\n", |
||
862 | samples_per_frame); |
||
863 | return AVERROR_INVALIDDATA; |
||
864 | } |
||
865 | |||
866 | if (delay != 0x88E) { |
||
867 | av_log(avctx, AV_LOG_ERROR, "Unknown amount of delay %x != 0x88E.\n", |
||
868 | delay); |
||
869 | return AVERROR_INVALIDDATA; |
||
870 | } |
||
871 | |||
872 | if (q->coding_mode == STEREO) |
||
873 | av_log(avctx, AV_LOG_DEBUG, "Normal stereo detected.\n"); |
||
874 | else if (q->coding_mode == JOINT_STEREO) { |
||
875 | if (avctx->channels != 2) { |
||
876 | av_log(avctx, AV_LOG_ERROR, "Invalid coding mode\n"); |
||
877 | return AVERROR_INVALIDDATA; |
||
878 | } |
||
879 | av_log(avctx, AV_LOG_DEBUG, "Joint stereo detected.\n"); |
||
880 | } else { |
||
881 | av_log(avctx, AV_LOG_ERROR, "Unknown channel coding mode %x!\n", |
||
882 | q->coding_mode); |
||
883 | return AVERROR_INVALIDDATA; |
||
884 | } |
||
885 | |||
886 | if (avctx->block_align >= UINT_MAX / 2) |
||
887 | return AVERROR(EINVAL); |
||
888 | |||
889 | q->decoded_bytes_buffer = av_mallocz(FFALIGN(avctx->block_align, 4) + |
||
890 | FF_INPUT_BUFFER_PADDING_SIZE); |
||
891 | if (q->decoded_bytes_buffer == NULL) |
||
892 | return AVERROR(ENOMEM); |
||
893 | |||
894 | avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; |
||
895 | |||
896 | /* initialize the MDCT transform */ |
||
897 | if ((ret = ff_mdct_init(&q->mdct_ctx, 9, 1, 1.0 / 32768)) < 0) { |
||
898 | av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); |
||
899 | av_freep(&q->decoded_bytes_buffer); |
||
900 | return ret; |
||
901 | } |
||
902 | |||
903 | /* init the joint-stereo decoding data */ |
||
904 | q->weighting_delay[0] = 0; |
||
905 | q->weighting_delay[1] = 7; |
||
906 | q->weighting_delay[2] = 0; |
||
907 | q->weighting_delay[3] = 7; |
||
908 | q->weighting_delay[4] = 0; |
||
909 | q->weighting_delay[5] = 7; |
||
910 | |||
911 | for (i = 0; i < 4; i++) { |
||
912 | q->matrix_coeff_index_prev[i] = 3; |
||
913 | q->matrix_coeff_index_now[i] = 3; |
||
914 | q->matrix_coeff_index_next[i] = 3; |
||
915 | } |
||
916 | |||
917 | ff_atrac_init_gain_compensation(&q->gainc_ctx, 4, 3); |
||
918 | avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
||
919 | ff_fmt_convert_init(&q->fmt_conv, avctx); |
||
920 | |||
921 | q->units = av_mallocz(sizeof(*q->units) * avctx->channels); |
||
922 | if (!q->units) { |
||
923 | atrac3_decode_close(avctx); |
||
924 | return AVERROR(ENOMEM); |
||
925 | } |
||
926 | |||
927 | return 0; |
||
928 | } |
||
929 | |||
930 | AVCodec ff_atrac3_decoder = { |
||
931 | .name = "atrac3", |
||
932 | .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 (Adaptive TRansform Acoustic Coding 3)"), |
||
933 | .type = AVMEDIA_TYPE_AUDIO, |
||
934 | .id = AV_CODEC_ID_ATRAC3, |
||
935 | .priv_data_size = sizeof(ATRAC3Context), |
||
936 | .init = atrac3_decode_init, |
||
937 | .close = atrac3_decode_close, |
||
938 | .decode = atrac3_decode_frame, |
||
939 | .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1, |
||
940 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
||
941 | AV_SAMPLE_FMT_NONE }, |
||
942 | };>>=>>>>>>>>>=>>>>>>>>>>>>>>=>>=>>>>=>=>>>=>=>=>>>>>>>><>> |