Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * Simple free lossless/lossy audio codec |
||
3 | * Copyright (c) 2004 Alex Beregszaszi |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | #include "avcodec.h" |
||
22 | #include "get_bits.h" |
||
23 | #include "golomb.h" |
||
24 | #include "internal.h" |
||
25 | |||
26 | /** |
||
27 | * @file |
||
28 | * Simple free lossless/lossy audio codec |
||
29 | * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk) |
||
30 | * Written and designed by Alex Beregszaszi |
||
31 | * |
||
32 | * TODO: |
||
33 | * - CABAC put/get_symbol |
||
34 | * - independent quantizer for channels |
||
35 | * - >2 channels support |
||
36 | * - more decorrelation types |
||
37 | * - more tap_quant tests |
||
38 | * - selectable intlist writers/readers (bonk-style, golomb, cabac) |
||
39 | */ |
||
40 | |||
41 | #define MAX_CHANNELS 2 |
||
42 | |||
43 | #define MID_SIDE 0 |
||
44 | #define LEFT_SIDE 1 |
||
45 | #define RIGHT_SIDE 2 |
||
46 | |||
47 | typedef struct SonicContext { |
||
48 | int lossless, decorrelation; |
||
49 | |||
50 | int num_taps, downsampling; |
||
51 | double quantization; |
||
52 | |||
53 | int channels, samplerate, block_align, frame_size; |
||
54 | |||
55 | int *tap_quant; |
||
56 | int *int_samples; |
||
57 | int *coded_samples[MAX_CHANNELS]; |
||
58 | |||
59 | // for encoding |
||
60 | int *tail; |
||
61 | int tail_size; |
||
62 | int *window; |
||
63 | int window_size; |
||
64 | |||
65 | // for decoding |
||
66 | int *predictor_k; |
||
67 | int *predictor_state[MAX_CHANNELS]; |
||
68 | } SonicContext; |
||
69 | |||
70 | #define LATTICE_SHIFT 10 |
||
71 | #define SAMPLE_SHIFT 4 |
||
72 | #define LATTICE_FACTOR (1 << LATTICE_SHIFT) |
||
73 | #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) |
||
74 | |||
75 | #define BASE_QUANT 0.6 |
||
76 | #define RATE_VARIATION 3.0 |
||
77 | |||
78 | static inline int shift(int a,int b) |
||
79 | { |
||
80 | return (a+(1<<(b-1))) >> b; |
||
81 | } |
||
82 | |||
83 | static inline int shift_down(int a,int b) |
||
84 | { |
||
85 | return (a>>b)+(a<0); |
||
86 | } |
||
87 | |||
88 | #if 1 |
||
89 | static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) |
||
90 | { |
||
91 | int i; |
||
92 | |||
93 | for (i = 0; i < entries; i++) |
||
94 | set_se_golomb(pb, buf[i]); |
||
95 | |||
96 | return 1; |
||
97 | } |
||
98 | |||
99 | static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) |
||
100 | { |
||
101 | int i; |
||
102 | |||
103 | for (i = 0; i < entries; i++) |
||
104 | buf[i] = get_se_golomb(gb); |
||
105 | |||
106 | return 1; |
||
107 | } |
||
108 | |||
109 | #else |
||
110 | |||
111 | #define ADAPT_LEVEL 8 |
||
112 | |||
113 | static int bits_to_store(uint64_t x) |
||
114 | { |
||
115 | int res = 0; |
||
116 | |||
117 | while(x) |
||
118 | { |
||
119 | res++; |
||
120 | x >>= 1; |
||
121 | } |
||
122 | return res; |
||
123 | } |
||
124 | |||
125 | static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max) |
||
126 | { |
||
127 | int i, bits; |
||
128 | |||
129 | if (!max) |
||
130 | return; |
||
131 | |||
132 | bits = bits_to_store(max); |
||
133 | |||
134 | for (i = 0; i < bits-1; i++) |
||
135 | put_bits(pb, 1, value & (1 << i)); |
||
136 | |||
137 | if ( (value | (1 << (bits-1))) <= max) |
||
138 | put_bits(pb, 1, value & (1 << (bits-1))); |
||
139 | } |
||
140 | |||
141 | static unsigned int read_uint_max(GetBitContext *gb, int max) |
||
142 | { |
||
143 | int i, bits, value = 0; |
||
144 | |||
145 | if (!max) |
||
146 | return 0; |
||
147 | |||
148 | bits = bits_to_store(max); |
||
149 | |||
150 | for (i = 0; i < bits-1; i++) |
||
151 | if (get_bits1(gb)) |
||
152 | value += 1 << i; |
||
153 | |||
154 | if ( (value | (1<<(bits-1))) <= max) |
||
155 | if (get_bits1(gb)) |
||
156 | value += 1 << (bits-1); |
||
157 | |||
158 | return value; |
||
159 | } |
||
160 | |||
161 | static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part) |
||
162 | { |
||
163 | int i, j, x = 0, low_bits = 0, max = 0; |
||
164 | int step = 256, pos = 0, dominant = 0, any = 0; |
||
165 | int *copy, *bits; |
||
166 | |||
167 | copy = av_calloc(entries, sizeof(*copy)); |
||
168 | if (!copy) |
||
169 | return AVERROR(ENOMEM); |
||
170 | |||
171 | if (base_2_part) |
||
172 | { |
||
173 | int energy = 0; |
||
174 | |||
175 | for (i = 0; i < entries; i++) |
||
176 | energy += abs(buf[i]); |
||
177 | |||
178 | low_bits = bits_to_store(energy / (entries * 2)); |
||
179 | if (low_bits > 15) |
||
180 | low_bits = 15; |
||
181 | |||
182 | put_bits(pb, 4, low_bits); |
||
183 | } |
||
184 | |||
185 | for (i = 0; i < entries; i++) |
||
186 | { |
||
187 | put_bits(pb, low_bits, abs(buf[i])); |
||
188 | copy[i] = abs(buf[i]) >> low_bits; |
||
189 | if (copy[i] > max) |
||
190 | max = abs(copy[i]); |
||
191 | } |
||
192 | |||
193 | bits = av_calloc(entries*max, sizeof(*bits)); |
||
194 | if (!bits) |
||
195 | { |
||
196 | // av_free(copy); |
||
197 | return AVERROR(ENOMEM); |
||
198 | } |
||
199 | |||
200 | for (i = 0; i <= max; i++) |
||
201 | { |
||
202 | for (j = 0; j < entries; j++) |
||
203 | if (copy[j] >= i) |
||
204 | bits[x++] = copy[j] > i; |
||
205 | } |
||
206 | |||
207 | // store bitstream |
||
208 | while (pos < x) |
||
209 | { |
||
210 | int steplet = step >> 8; |
||
211 | |||
212 | if (pos + steplet > x) |
||
213 | steplet = x - pos; |
||
214 | |||
215 | for (i = 0; i < steplet; i++) |
||
216 | if (bits[i+pos] != dominant) |
||
217 | any = 1; |
||
218 | |||
219 | put_bits(pb, 1, any); |
||
220 | |||
221 | if (!any) |
||
222 | { |
||
223 | pos += steplet; |
||
224 | step += step / ADAPT_LEVEL; |
||
225 | } |
||
226 | else |
||
227 | { |
||
228 | int interloper = 0; |
||
229 | |||
230 | while (((pos + interloper) < x) && (bits[pos + interloper] == dominant)) |
||
231 | interloper++; |
||
232 | |||
233 | // note change |
||
234 | write_uint_max(pb, interloper, (step >> 8) - 1); |
||
235 | |||
236 | pos += interloper + 1; |
||
237 | step -= step / ADAPT_LEVEL; |
||
238 | } |
||
239 | |||
240 | if (step < 256) |
||
241 | { |
||
242 | step = 65536 / step; |
||
243 | dominant = !dominant; |
||
244 | } |
||
245 | } |
||
246 | |||
247 | // store signs |
||
248 | for (i = 0; i < entries; i++) |
||
249 | if (buf[i]) |
||
250 | put_bits(pb, 1, buf[i] < 0); |
||
251 | |||
252 | // av_free(bits); |
||
253 | // av_free(copy); |
||
254 | |||
255 | return 0; |
||
256 | } |
||
257 | |||
258 | static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part) |
||
259 | { |
||
260 | int i, low_bits = 0, x = 0; |
||
261 | int n_zeros = 0, step = 256, dominant = 0; |
||
262 | int pos = 0, level = 0; |
||
263 | int *bits = av_calloc(entries, sizeof(*bits)); |
||
264 | |||
265 | if (!bits) |
||
266 | return AVERROR(ENOMEM); |
||
267 | |||
268 | if (base_2_part) |
||
269 | { |
||
270 | low_bits = get_bits(gb, 4); |
||
271 | |||
272 | if (low_bits) |
||
273 | for (i = 0; i < entries; i++) |
||
274 | buf[i] = get_bits(gb, low_bits); |
||
275 | } |
||
276 | |||
277 | // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits); |
||
278 | |||
279 | while (n_zeros < entries) |
||
280 | { |
||
281 | int steplet = step >> 8; |
||
282 | |||
283 | if (!get_bits1(gb)) |
||
284 | { |
||
285 | for (i = 0; i < steplet; i++) |
||
286 | bits[x++] = dominant; |
||
287 | |||
288 | if (!dominant) |
||
289 | n_zeros += steplet; |
||
290 | |||
291 | step += step / ADAPT_LEVEL; |
||
292 | } |
||
293 | else |
||
294 | { |
||
295 | int actual_run = read_uint_max(gb, steplet-1); |
||
296 | |||
297 | // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run); |
||
298 | |||
299 | for (i = 0; i < actual_run; i++) |
||
300 | bits[x++] = dominant; |
||
301 | |||
302 | bits[x++] = !dominant; |
||
303 | |||
304 | if (!dominant) |
||
305 | n_zeros += actual_run; |
||
306 | else |
||
307 | n_zeros++; |
||
308 | |||
309 | step -= step / ADAPT_LEVEL; |
||
310 | } |
||
311 | |||
312 | if (step < 256) |
||
313 | { |
||
314 | step = 65536 / step; |
||
315 | dominant = !dominant; |
||
316 | } |
||
317 | } |
||
318 | |||
319 | // reconstruct unsigned values |
||
320 | n_zeros = 0; |
||
321 | for (i = 0; n_zeros < entries; i++) |
||
322 | { |
||
323 | while(1) |
||
324 | { |
||
325 | if (pos >= entries) |
||
326 | { |
||
327 | pos = 0; |
||
328 | level += 1 << low_bits; |
||
329 | } |
||
330 | |||
331 | if (buf[pos] >= level) |
||
332 | break; |
||
333 | |||
334 | pos++; |
||
335 | } |
||
336 | |||
337 | if (bits[i]) |
||
338 | buf[pos] += 1 << low_bits; |
||
339 | else |
||
340 | n_zeros++; |
||
341 | |||
342 | pos++; |
||
343 | } |
||
344 | // av_free(bits); |
||
345 | |||
346 | // read signs |
||
347 | for (i = 0; i < entries; i++) |
||
348 | if (buf[i] && get_bits1(gb)) |
||
349 | buf[i] = -buf[i]; |
||
350 | |||
351 | // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos); |
||
352 | |||
353 | return 0; |
||
354 | } |
||
355 | #endif |
||
356 | |||
357 | static void predictor_init_state(int *k, int *state, int order) |
||
358 | { |
||
359 | int i; |
||
360 | |||
361 | for (i = order-2; i >= 0; i--) |
||
362 | { |
||
363 | int j, p, x = state[i]; |
||
364 | |||
365 | for (j = 0, p = i+1; p < order; j++,p++) |
||
366 | { |
||
367 | int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT); |
||
368 | state[p] += shift_down(k[j]*x, LATTICE_SHIFT); |
||
369 | x = tmp; |
||
370 | } |
||
371 | } |
||
372 | } |
||
373 | |||
374 | static int predictor_calc_error(int *k, int *state, int order, int error) |
||
375 | { |
||
376 | int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT); |
||
377 | |||
378 | #if 1 |
||
379 | int *k_ptr = &(k[order-2]), |
||
380 | *state_ptr = &(state[order-2]); |
||
381 | for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) |
||
382 | { |
||
383 | int k_value = *k_ptr, state_value = *state_ptr; |
||
384 | x -= shift_down(k_value * state_value, LATTICE_SHIFT); |
||
385 | state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT); |
||
386 | } |
||
387 | #else |
||
388 | for (i = order-2; i >= 0; i--) |
||
389 | { |
||
390 | x -= shift_down(k[i] * state[i], LATTICE_SHIFT); |
||
391 | state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT); |
||
392 | } |
||
393 | #endif |
||
394 | |||
395 | // don't drift too far, to avoid overflows |
||
396 | if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16); |
||
397 | if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16); |
||
398 | |||
399 | state[0] = x; |
||
400 | |||
401 | return x; |
||
402 | } |
||
403 | |||
404 | #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER |
||
405 | // Heavily modified Levinson-Durbin algorithm which |
||
406 | // copes better with quantization, and calculates the |
||
407 | // actual whitened result as it goes. |
||
408 | |||
409 | static void modified_levinson_durbin(int *window, int window_entries, |
||
410 | int *out, int out_entries, int channels, int *tap_quant) |
||
411 | { |
||
412 | int i; |
||
413 | int *state = av_calloc(window_entries, sizeof(*state)); |
||
414 | |||
415 | memcpy(state, window, 4* window_entries); |
||
416 | |||
417 | for (i = 0; i < out_entries; i++) |
||
418 | { |
||
419 | int step = (i+1)*channels, k, j; |
||
420 | double xx = 0.0, xy = 0.0; |
||
421 | #if 1 |
||
422 | int *x_ptr = &(window[step]); |
||
423 | int *state_ptr = &(state[0]); |
||
424 | j = window_entries - step; |
||
425 | for (;j>0;j--,x_ptr++,state_ptr++) |
||
426 | { |
||
427 | double x_value = *x_ptr; |
||
428 | double state_value = *state_ptr; |
||
429 | xx += state_value*state_value; |
||
430 | xy += x_value*state_value; |
||
431 | } |
||
432 | #else |
||
433 | for (j = 0; j <= (window_entries - step); j++); |
||
434 | { |
||
435 | double stepval = window[step+j]; |
||
436 | double stateval = window[j]; |
||
437 | // xx += (double)window[j]*(double)window[j]; |
||
438 | // xy += (double)window[step+j]*(double)window[j]; |
||
439 | xx += stateval*stateval; |
||
440 | xy += stepval*stateval; |
||
441 | } |
||
442 | #endif |
||
443 | if (xx == 0.0) |
||
444 | k = 0; |
||
445 | else |
||
446 | k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5)); |
||
447 | |||
448 | if (k > (LATTICE_FACTOR/tap_quant[i])) |
||
449 | k = LATTICE_FACTOR/tap_quant[i]; |
||
450 | if (-k > (LATTICE_FACTOR/tap_quant[i])) |
||
451 | k = -(LATTICE_FACTOR/tap_quant[i]); |
||
452 | |||
453 | out[i] = k; |
||
454 | k *= tap_quant[i]; |
||
455 | |||
456 | #if 1 |
||
457 | x_ptr = &(window[step]); |
||
458 | state_ptr = &(state[0]); |
||
459 | j = window_entries - step; |
||
460 | for (;j>0;j--,x_ptr++,state_ptr++) |
||
461 | { |
||
462 | int x_value = *x_ptr; |
||
463 | int state_value = *state_ptr; |
||
464 | *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT); |
||
465 | *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT); |
||
466 | } |
||
467 | #else |
||
468 | for (j=0; j <= (window_entries - step); j++) |
||
469 | { |
||
470 | int stepval = window[step+j]; |
||
471 | int stateval=state[j]; |
||
472 | window[step+j] += shift_down(k * stateval, LATTICE_SHIFT); |
||
473 | state[j] += shift_down(k * stepval, LATTICE_SHIFT); |
||
474 | } |
||
475 | #endif |
||
476 | } |
||
477 | |||
478 | av_free(state); |
||
479 | } |
||
480 | |||
481 | static inline int code_samplerate(int samplerate) |
||
482 | { |
||
483 | switch (samplerate) |
||
484 | { |
||
485 | case 44100: return 0; |
||
486 | case 22050: return 1; |
||
487 | case 11025: return 2; |
||
488 | case 96000: return 3; |
||
489 | case 48000: return 4; |
||
490 | case 32000: return 5; |
||
491 | case 24000: return 6; |
||
492 | case 16000: return 7; |
||
493 | case 8000: return 8; |
||
494 | } |
||
495 | return AVERROR(EINVAL); |
||
496 | } |
||
497 | |||
498 | static av_cold int sonic_encode_init(AVCodecContext *avctx) |
||
499 | { |
||
500 | SonicContext *s = avctx->priv_data; |
||
501 | PutBitContext pb; |
||
502 | int i, version = 0; |
||
503 | |||
504 | if (avctx->channels > MAX_CHANNELS) |
||
505 | { |
||
506 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); |
||
507 | return AVERROR(EINVAL); /* only stereo or mono for now */ |
||
508 | } |
||
509 | |||
510 | if (avctx->channels == 2) |
||
511 | s->decorrelation = MID_SIDE; |
||
512 | else |
||
513 | s->decorrelation = 3; |
||
514 | |||
515 | if (avctx->codec->id == AV_CODEC_ID_SONIC_LS) |
||
516 | { |
||
517 | s->lossless = 1; |
||
518 | s->num_taps = 32; |
||
519 | s->downsampling = 1; |
||
520 | s->quantization = 0.0; |
||
521 | } |
||
522 | else |
||
523 | { |
||
524 | s->num_taps = 128; |
||
525 | s->downsampling = 2; |
||
526 | s->quantization = 1.0; |
||
527 | } |
||
528 | |||
529 | // max tap 2048 |
||
530 | if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) { |
||
531 | av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n"); |
||
532 | return AVERROR_INVALIDDATA; |
||
533 | } |
||
534 | |||
535 | // generate taps |
||
536 | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); |
||
537 | for (i = 0; i < s->num_taps; i++) |
||
538 | s->tap_quant[i] = ff_sqrt(i+1); |
||
539 | |||
540 | s->channels = avctx->channels; |
||
541 | s->samplerate = avctx->sample_rate; |
||
542 | |||
543 | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); |
||
544 | s->frame_size = s->channels*s->block_align*s->downsampling; |
||
545 | |||
546 | s->tail_size = s->num_taps*s->channels; |
||
547 | s->tail = av_calloc(s->tail_size, sizeof(*s->tail)); |
||
548 | if (!s->tail) |
||
549 | return AVERROR(ENOMEM); |
||
550 | |||
551 | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) ); |
||
552 | if (!s->predictor_k) |
||
553 | return AVERROR(ENOMEM); |
||
554 | |||
555 | for (i = 0; i < s->channels; i++) |
||
556 | { |
||
557 | s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); |
||
558 | if (!s->coded_samples[i]) |
||
559 | return AVERROR(ENOMEM); |
||
560 | } |
||
561 | |||
562 | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); |
||
563 | |||
564 | s->window_size = ((2*s->tail_size)+s->frame_size); |
||
565 | s->window = av_calloc(s->window_size, sizeof(*s->window)); |
||
566 | if (!s->window) |
||
567 | return AVERROR(ENOMEM); |
||
568 | |||
569 | avctx->extradata = av_mallocz(16); |
||
570 | if (!avctx->extradata) |
||
571 | return AVERROR(ENOMEM); |
||
572 | init_put_bits(&pb, avctx->extradata, 16*8); |
||
573 | |||
574 | put_bits(&pb, 2, version); // version |
||
575 | if (version == 1) |
||
576 | { |
||
577 | put_bits(&pb, 2, s->channels); |
||
578 | put_bits(&pb, 4, code_samplerate(s->samplerate)); |
||
579 | } |
||
580 | put_bits(&pb, 1, s->lossless); |
||
581 | if (!s->lossless) |
||
582 | put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision |
||
583 | put_bits(&pb, 2, s->decorrelation); |
||
584 | put_bits(&pb, 2, s->downsampling); |
||
585 | put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024 |
||
586 | put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table |
||
587 | |||
588 | flush_put_bits(&pb); |
||
589 | avctx->extradata_size = put_bits_count(&pb)/8; |
||
590 | |||
591 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", |
||
592 | version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); |
||
593 | |||
594 | avctx->frame_size = s->block_align*s->downsampling; |
||
595 | |||
596 | return 0; |
||
597 | } |
||
598 | |||
599 | static av_cold int sonic_encode_close(AVCodecContext *avctx) |
||
600 | { |
||
601 | SonicContext *s = avctx->priv_data; |
||
602 | int i; |
||
603 | |||
604 | for (i = 0; i < s->channels; i++) |
||
605 | av_freep(&s->coded_samples[i]); |
||
606 | |||
607 | av_freep(&s->predictor_k); |
||
608 | av_freep(&s->tail); |
||
609 | av_freep(&s->tap_quant); |
||
610 | av_freep(&s->window); |
||
611 | av_freep(&s->int_samples); |
||
612 | |||
613 | return 0; |
||
614 | } |
||
615 | |||
616 | static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, |
||
617 | const AVFrame *frame, int *got_packet_ptr) |
||
618 | { |
||
619 | SonicContext *s = avctx->priv_data; |
||
620 | PutBitContext pb; |
||
621 | int i, j, ch, quant = 0, x = 0; |
||
622 | int ret; |
||
623 | const short *samples = (const int16_t*)frame->data[0]; |
||
624 | |||
625 | if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0) |
||
626 | return ret; |
||
627 | |||
628 | init_put_bits(&pb, avpkt->data, avpkt->size); |
||
629 | |||
630 | // short -> internal |
||
631 | for (i = 0; i < s->frame_size; i++) |
||
632 | s->int_samples[i] = samples[i]; |
||
633 | |||
634 | if (!s->lossless) |
||
635 | for (i = 0; i < s->frame_size; i++) |
||
636 | s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT; |
||
637 | |||
638 | switch(s->decorrelation) |
||
639 | { |
||
640 | case MID_SIDE: |
||
641 | for (i = 0; i < s->frame_size; i += s->channels) |
||
642 | { |
||
643 | s->int_samples[i] += s->int_samples[i+1]; |
||
644 | s->int_samples[i+1] -= shift(s->int_samples[i], 1); |
||
645 | } |
||
646 | break; |
||
647 | case LEFT_SIDE: |
||
648 | for (i = 0; i < s->frame_size; i += s->channels) |
||
649 | s->int_samples[i+1] -= s->int_samples[i]; |
||
650 | break; |
||
651 | case RIGHT_SIDE: |
||
652 | for (i = 0; i < s->frame_size; i += s->channels) |
||
653 | s->int_samples[i] -= s->int_samples[i+1]; |
||
654 | break; |
||
655 | } |
||
656 | |||
657 | memset(s->window, 0, 4* s->window_size); |
||
658 | |||
659 | for (i = 0; i < s->tail_size; i++) |
||
660 | s->window[x++] = s->tail[i]; |
||
661 | |||
662 | for (i = 0; i < s->frame_size; i++) |
||
663 | s->window[x++] = s->int_samples[i]; |
||
664 | |||
665 | for (i = 0; i < s->tail_size; i++) |
||
666 | s->window[x++] = 0; |
||
667 | |||
668 | for (i = 0; i < s->tail_size; i++) |
||
669 | s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i]; |
||
670 | |||
671 | // generate taps |
||
672 | modified_levinson_durbin(s->window, s->window_size, |
||
673 | s->predictor_k, s->num_taps, s->channels, s->tap_quant); |
||
674 | if ((ret = intlist_write(&pb, s->predictor_k, s->num_taps, 0)) < 0) |
||
675 | return ret; |
||
676 | |||
677 | for (ch = 0; ch < s->channels; ch++) |
||
678 | { |
||
679 | x = s->tail_size+ch; |
||
680 | for (i = 0; i < s->block_align; i++) |
||
681 | { |
||
682 | int sum = 0; |
||
683 | for (j = 0; j < s->downsampling; j++, x += s->channels) |
||
684 | sum += s->window[x]; |
||
685 | s->coded_samples[ch][i] = sum; |
||
686 | } |
||
687 | } |
||
688 | |||
689 | // simple rate control code |
||
690 | if (!s->lossless) |
||
691 | { |
||
692 | double energy1 = 0.0, energy2 = 0.0; |
||
693 | for (ch = 0; ch < s->channels; ch++) |
||
694 | { |
||
695 | for (i = 0; i < s->block_align; i++) |
||
696 | { |
||
697 | double sample = s->coded_samples[ch][i]; |
||
698 | energy2 += sample*sample; |
||
699 | energy1 += fabs(sample); |
||
700 | } |
||
701 | } |
||
702 | |||
703 | energy2 = sqrt(energy2/(s->channels*s->block_align)); |
||
704 | energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align); |
||
705 | |||
706 | // increase bitrate when samples are like a gaussian distribution |
||
707 | // reduce bitrate when samples are like a two-tailed exponential distribution |
||
708 | |||
709 | if (energy2 > energy1) |
||
710 | energy2 += (energy2-energy1)*RATE_VARIATION; |
||
711 | |||
712 | quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR); |
||
713 | // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2); |
||
714 | |||
715 | quant = av_clip(quant, 1, 65534); |
||
716 | |||
717 | set_ue_golomb(&pb, quant); |
||
718 | |||
719 | quant *= SAMPLE_FACTOR; |
||
720 | } |
||
721 | |||
722 | // write out coded samples |
||
723 | for (ch = 0; ch < s->channels; ch++) |
||
724 | { |
||
725 | if (!s->lossless) |
||
726 | for (i = 0; i < s->block_align; i++) |
||
727 | s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant); |
||
728 | |||
729 | if ((ret = intlist_write(&pb, s->coded_samples[ch], s->block_align, 1)) < 0) |
||
730 | return ret; |
||
731 | } |
||
732 | |||
733 | // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8); |
||
734 | |||
735 | flush_put_bits(&pb); |
||
736 | avpkt->size = (put_bits_count(&pb)+7)/8; |
||
737 | *got_packet_ptr = 1; |
||
738 | return 0; |
||
739 | } |
||
740 | #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */ |
||
741 | |||
742 | #if CONFIG_SONIC_DECODER |
||
743 | static const int samplerate_table[] = |
||
744 | { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 }; |
||
745 | |||
746 | static av_cold int sonic_decode_init(AVCodecContext *avctx) |
||
747 | { |
||
748 | SonicContext *s = avctx->priv_data; |
||
749 | GetBitContext gb; |
||
750 | int i, version; |
||
751 | |||
752 | s->channels = avctx->channels; |
||
753 | s->samplerate = avctx->sample_rate; |
||
754 | |||
755 | if (!avctx->extradata) |
||
756 | { |
||
757 | av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n"); |
||
758 | return AVERROR_INVALIDDATA; |
||
759 | } |
||
760 | |||
761 | init_get_bits8(&gb, avctx->extradata, avctx->extradata_size); |
||
762 | |||
763 | version = get_bits(&gb, 2); |
||
764 | if (version > 1) |
||
765 | { |
||
766 | av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n"); |
||
767 | return AVERROR_INVALIDDATA; |
||
768 | } |
||
769 | |||
770 | if (version == 1) |
||
771 | { |
||
772 | s->channels = get_bits(&gb, 2); |
||
773 | s->samplerate = samplerate_table[get_bits(&gb, 4)]; |
||
774 | av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n", |
||
775 | s->channels, s->samplerate); |
||
776 | } |
||
777 | |||
778 | if (s->channels > MAX_CHANNELS) |
||
779 | { |
||
780 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); |
||
781 | return AVERROR_INVALIDDATA; |
||
782 | } |
||
783 | |||
784 | s->lossless = get_bits1(&gb); |
||
785 | if (!s->lossless) |
||
786 | skip_bits(&gb, 3); // XXX FIXME |
||
787 | s->decorrelation = get_bits(&gb, 2); |
||
788 | if (s->decorrelation != 3 && s->channels != 2) { |
||
789 | av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation); |
||
790 | return AVERROR_INVALIDDATA; |
||
791 | } |
||
792 | |||
793 | s->downsampling = get_bits(&gb, 2); |
||
794 | if (!s->downsampling) { |
||
795 | av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n"); |
||
796 | return AVERROR_INVALIDDATA; |
||
797 | } |
||
798 | |||
799 | s->num_taps = (get_bits(&gb, 5)+1)<<5; |
||
800 | if (get_bits1(&gb)) // XXX FIXME |
||
801 | av_log(avctx, AV_LOG_INFO, "Custom quant table\n"); |
||
802 | |||
803 | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); |
||
804 | s->frame_size = s->channels*s->block_align*s->downsampling; |
||
805 | // avctx->frame_size = s->block_align; |
||
806 | |||
807 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", |
||
808 | version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); |
||
809 | |||
810 | // generate taps |
||
811 | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); |
||
812 | for (i = 0; i < s->num_taps; i++) |
||
813 | s->tap_quant[i] = ff_sqrt(i+1); |
||
814 | |||
815 | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k)); |
||
816 | |||
817 | for (i = 0; i < s->channels; i++) |
||
818 | { |
||
819 | s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state)); |
||
820 | if (!s->predictor_state[i]) |
||
821 | return AVERROR(ENOMEM); |
||
822 | } |
||
823 | |||
824 | for (i = 0; i < s->channels; i++) |
||
825 | { |
||
826 | s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples)); |
||
827 | if (!s->coded_samples[i]) |
||
828 | return AVERROR(ENOMEM); |
||
829 | } |
||
830 | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); |
||
831 | |||
832 | avctx->sample_fmt = AV_SAMPLE_FMT_S16; |
||
833 | return 0; |
||
834 | } |
||
835 | |||
836 | static av_cold int sonic_decode_close(AVCodecContext *avctx) |
||
837 | { |
||
838 | SonicContext *s = avctx->priv_data; |
||
839 | int i; |
||
840 | |||
841 | av_freep(&s->int_samples); |
||
842 | av_freep(&s->tap_quant); |
||
843 | av_freep(&s->predictor_k); |
||
844 | |||
845 | for (i = 0; i < s->channels; i++) |
||
846 | { |
||
847 | av_freep(&s->predictor_state[i]); |
||
848 | av_freep(&s->coded_samples[i]); |
||
849 | } |
||
850 | |||
851 | return 0; |
||
852 | } |
||
853 | |||
854 | static int sonic_decode_frame(AVCodecContext *avctx, |
||
855 | void *data, int *got_frame_ptr, |
||
856 | AVPacket *avpkt) |
||
857 | { |
||
858 | const uint8_t *buf = avpkt->data; |
||
859 | int buf_size = avpkt->size; |
||
860 | SonicContext *s = avctx->priv_data; |
||
861 | GetBitContext gb; |
||
862 | int i, quant, ch, j, ret; |
||
863 | int16_t *samples; |
||
864 | AVFrame *frame = data; |
||
865 | |||
866 | if (buf_size == 0) return 0; |
||
867 | |||
868 | frame->nb_samples = s->frame_size / avctx->channels; |
||
869 | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) |
||
870 | return ret; |
||
871 | samples = (int16_t *)frame->data[0]; |
||
872 | |||
873 | // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size); |
||
874 | |||
875 | init_get_bits8(&gb, buf, buf_size); |
||
876 | |||
877 | intlist_read(&gb, s->predictor_k, s->num_taps, 0); |
||
878 | |||
879 | // dequantize |
||
880 | for (i = 0; i < s->num_taps; i++) |
||
881 | s->predictor_k[i] *= s->tap_quant[i]; |
||
882 | |||
883 | if (s->lossless) |
||
884 | quant = 1; |
||
885 | else |
||
886 | quant = get_ue_golomb(&gb) * SAMPLE_FACTOR; |
||
887 | |||
888 | // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant); |
||
889 | |||
890 | for (ch = 0; ch < s->channels; ch++) |
||
891 | { |
||
892 | int x = ch; |
||
893 | |||
894 | predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps); |
||
895 | |||
896 | intlist_read(&gb, s->coded_samples[ch], s->block_align, 1); |
||
897 | |||
898 | for (i = 0; i < s->block_align; i++) |
||
899 | { |
||
900 | for (j = 0; j < s->downsampling - 1; j++) |
||
901 | { |
||
902 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0); |
||
903 | x += s->channels; |
||
904 | } |
||
905 | |||
906 | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant); |
||
907 | x += s->channels; |
||
908 | } |
||
909 | |||
910 | for (i = 0; i < s->num_taps; i++) |
||
911 | s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels]; |
||
912 | } |
||
913 | |||
914 | switch(s->decorrelation) |
||
915 | { |
||
916 | case MID_SIDE: |
||
917 | for (i = 0; i < s->frame_size; i += s->channels) |
||
918 | { |
||
919 | s->int_samples[i+1] += shift(s->int_samples[i], 1); |
||
920 | s->int_samples[i] -= s->int_samples[i+1]; |
||
921 | } |
||
922 | break; |
||
923 | case LEFT_SIDE: |
||
924 | for (i = 0; i < s->frame_size; i += s->channels) |
||
925 | s->int_samples[i+1] += s->int_samples[i]; |
||
926 | break; |
||
927 | case RIGHT_SIDE: |
||
928 | for (i = 0; i < s->frame_size; i += s->channels) |
||
929 | s->int_samples[i] += s->int_samples[i+1]; |
||
930 | break; |
||
931 | } |
||
932 | |||
933 | if (!s->lossless) |
||
934 | for (i = 0; i < s->frame_size; i++) |
||
935 | s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT); |
||
936 | |||
937 | // internal -> short |
||
938 | for (i = 0; i < s->frame_size; i++) |
||
939 | samples[i] = av_clip_int16(s->int_samples[i]); |
||
940 | |||
941 | align_get_bits(&gb); |
||
942 | |||
943 | *got_frame_ptr = 1; |
||
944 | |||
945 | return (get_bits_count(&gb)+7)/8; |
||
946 | } |
||
947 | |||
948 | AVCodec ff_sonic_decoder = { |
||
949 | .name = "sonic", |
||
950 | .long_name = NULL_IF_CONFIG_SMALL("Sonic"), |
||
951 | .type = AVMEDIA_TYPE_AUDIO, |
||
952 | .id = AV_CODEC_ID_SONIC, |
||
953 | .priv_data_size = sizeof(SonicContext), |
||
954 | .init = sonic_decode_init, |
||
955 | .close = sonic_decode_close, |
||
956 | .decode = sonic_decode_frame, |
||
957 | .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL, |
||
958 | }; |
||
959 | #endif /* CONFIG_SONIC_DECODER */ |
||
960 | |||
961 | #if CONFIG_SONIC_ENCODER |
||
962 | AVCodec ff_sonic_encoder = { |
||
963 | .name = "sonic", |
||
964 | .long_name = NULL_IF_CONFIG_SMALL("Sonic"), |
||
965 | .type = AVMEDIA_TYPE_AUDIO, |
||
966 | .id = AV_CODEC_ID_SONIC, |
||
967 | .priv_data_size = sizeof(SonicContext), |
||
968 | .init = sonic_encode_init, |
||
969 | .encode2 = sonic_encode_frame, |
||
970 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, |
||
971 | .capabilities = CODEC_CAP_EXPERIMENTAL, |
||
972 | .close = sonic_encode_close, |
||
973 | }; |
||
974 | #endif |
||
975 | |||
976 | #if CONFIG_SONIC_LS_ENCODER |
||
977 | AVCodec ff_sonic_ls_encoder = { |
||
978 | .name = "sonicls", |
||
979 | .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"), |
||
980 | .type = AVMEDIA_TYPE_AUDIO, |
||
981 | .id = AV_CODEC_ID_SONIC_LS, |
||
982 | .priv_data_size = sizeof(SonicContext), |
||
983 | .init = sonic_encode_init, |
||
984 | .encode2 = sonic_encode_frame, |
||
985 | .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, |
||
986 | .capabilities = CODEC_CAP_EXPERIMENTAL, |
||
987 | .close = sonic_encode_close, |
||
988 | }; |
||
989 | #endif>>>>>>>>>>>>>>>5; |