Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6147 | serge | 1 | /* |
2 | * WMA compatible encoder |
||
3 | * Copyright (c) 2007 Michael Niedermayer |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | #include "libavutil/attributes.h" |
||
23 | |||
24 | #include "avcodec.h" |
||
25 | #include "internal.h" |
||
26 | #include "wma.h" |
||
27 | #include "libavutil/avassert.h" |
||
28 | |||
29 | |||
30 | static av_cold int encode_init(AVCodecContext *avctx) |
||
31 | { |
||
32 | WMACodecContext *s = avctx->priv_data; |
||
33 | int i, flags1, flags2, block_align; |
||
34 | uint8_t *extradata; |
||
35 | |||
36 | s->avctx = avctx; |
||
37 | |||
38 | if (avctx->channels > MAX_CHANNELS) { |
||
39 | av_log(avctx, AV_LOG_ERROR, |
||
40 | "too many channels: got %i, need %i or fewer\n", |
||
41 | avctx->channels, MAX_CHANNELS); |
||
42 | return AVERROR(EINVAL); |
||
43 | } |
||
44 | |||
45 | if (avctx->sample_rate > 48000) { |
||
46 | av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n", |
||
47 | avctx->sample_rate); |
||
48 | return AVERROR(EINVAL); |
||
49 | } |
||
50 | |||
51 | if (avctx->bit_rate < 24 * 1000) { |
||
52 | av_log(avctx, AV_LOG_ERROR, |
||
53 | "bitrate too low: got %i, need 24000 or higher\n", |
||
54 | avctx->bit_rate); |
||
55 | return AVERROR(EINVAL); |
||
56 | } |
||
57 | |||
58 | /* extract flag infos */ |
||
59 | flags1 = 0; |
||
60 | flags2 = 1; |
||
61 | if (avctx->codec->id == AV_CODEC_ID_WMAV1) { |
||
62 | extradata = av_malloc(4); |
||
63 | if (!extradata) |
||
64 | return AVERROR(ENOMEM); |
||
65 | avctx->extradata_size = 4; |
||
66 | AV_WL16(extradata, flags1); |
||
67 | AV_WL16(extradata + 2, flags2); |
||
68 | } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) { |
||
69 | extradata = av_mallocz(10); |
||
70 | if (!extradata) |
||
71 | return AVERROR(ENOMEM); |
||
72 | avctx->extradata_size = 10; |
||
73 | AV_WL32(extradata, flags1); |
||
74 | AV_WL16(extradata + 4, flags2); |
||
75 | } else { |
||
76 | av_assert0(0); |
||
77 | } |
||
78 | avctx->extradata = extradata; |
||
79 | s->use_exp_vlc = flags2 & 0x0001; |
||
80 | s->use_bit_reservoir = flags2 & 0x0002; |
||
81 | s->use_variable_block_len = flags2 & 0x0004; |
||
82 | if (avctx->channels == 2) |
||
83 | s->ms_stereo = 1; |
||
84 | |||
85 | ff_wma_init(avctx, flags2); |
||
86 | |||
87 | /* init MDCT */ |
||
88 | for (i = 0; i < s->nb_block_sizes; i++) |
||
89 | ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0); |
||
90 | |||
91 | block_align = avctx->bit_rate * (int64_t) s->frame_len / |
||
92 | (avctx->sample_rate * 8); |
||
93 | block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE); |
||
94 | avctx->block_align = block_align; |
||
95 | avctx->frame_size = avctx->initial_padding = s->frame_len; |
||
96 | |||
97 | return 0; |
||
98 | } |
||
99 | |||
100 | static void apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame) |
||
101 | { |
||
102 | WMACodecContext *s = avctx->priv_data; |
||
103 | float **audio = (float **) frame->extended_data; |
||
104 | int len = frame->nb_samples; |
||
105 | int window_index = s->frame_len_bits - s->block_len_bits; |
||
106 | FFTContext *mdct = &s->mdct_ctx[window_index]; |
||
107 | int ch; |
||
108 | const float *win = s->windows[window_index]; |
||
109 | int window_len = 1 << s->block_len_bits; |
||
110 | float n = 2.0 * 32768.0 / window_len; |
||
111 | |||
112 | for (ch = 0; ch < avctx->channels; ch++) { |
||
113 | memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output)); |
||
114 | s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len); |
||
115 | s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch], |
||
116 | win, len); |
||
117 | s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len); |
||
118 | mdct->mdct_calc(mdct, s->coefs[ch], s->output); |
||
119 | } |
||
120 | } |
||
121 | |||
122 | // FIXME use for decoding too |
||
123 | static void init_exp(WMACodecContext *s, int ch, const int *exp_param) |
||
124 | { |
||
125 | int n; |
||
126 | const uint16_t *ptr; |
||
127 | float v, *q, max_scale, *q_end; |
||
128 | |||
129 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
||
130 | q = s->exponents[ch]; |
||
131 | q_end = q + s->block_len; |
||
132 | max_scale = 0; |
||
133 | while (q < q_end) { |
||
134 | /* XXX: use a table */ |
||
135 | v = pow(10, *exp_param++ *(1.0 / 16.0)); |
||
136 | max_scale = FFMAX(max_scale, v); |
||
137 | n = *ptr++; |
||
138 | do { |
||
139 | *q++ = v; |
||
140 | } while (--n); |
||
141 | } |
||
142 | s->max_exponent[ch] = max_scale; |
||
143 | } |
||
144 | |||
145 | static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param) |
||
146 | { |
||
147 | int last_exp; |
||
148 | const uint16_t *ptr; |
||
149 | float *q, *q_end; |
||
150 | |||
151 | ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
||
152 | q = s->exponents[ch]; |
||
153 | q_end = q + s->block_len; |
||
154 | if (s->version == 1) { |
||
155 | last_exp = *exp_param++; |
||
156 | av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32); |
||
157 | put_bits(&s->pb, 5, last_exp - 10); |
||
158 | q += *ptr++; |
||
159 | } else |
||
160 | last_exp = 36; |
||
161 | while (q < q_end) { |
||
162 | int exp = *exp_param++; |
||
163 | int code = exp - last_exp + 60; |
||
164 | av_assert1(code >= 0 && code < 120); |
||
165 | put_bits(&s->pb, ff_aac_scalefactor_bits[code], |
||
166 | ff_aac_scalefactor_code[code]); |
||
167 | /* XXX: use a table */ |
||
168 | q += *ptr++; |
||
169 | last_exp = exp; |
||
170 | } |
||
171 | } |
||
172 | |||
173 | static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], |
||
174 | int total_gain) |
||
175 | { |
||
176 | int v, bsize, ch, coef_nb_bits, parse_exponents; |
||
177 | float mdct_norm; |
||
178 | int nb_coefs[MAX_CHANNELS]; |
||
179 | static const int fixed_exp[25] = { |
||
180 | 20, 20, 20, 20, 20, |
||
181 | 20, 20, 20, 20, 20, |
||
182 | 20, 20, 20, 20, 20, |
||
183 | 20, 20, 20, 20, 20, |
||
184 | 20, 20, 20, 20, 20 |
||
185 | }; |
||
186 | |||
187 | // FIXME remove duplication relative to decoder |
||
188 | if (s->use_variable_block_len) { |
||
189 | av_assert0(0); // FIXME not implemented |
||
190 | } else { |
||
191 | /* fixed block len */ |
||
192 | s->next_block_len_bits = s->frame_len_bits; |
||
193 | s->prev_block_len_bits = s->frame_len_bits; |
||
194 | s->block_len_bits = s->frame_len_bits; |
||
195 | } |
||
196 | |||
197 | s->block_len = 1 << s->block_len_bits; |
||
198 | // av_assert0((s->block_pos + s->block_len) <= s->frame_len); |
||
199 | bsize = s->frame_len_bits - s->block_len_bits; |
||
200 | |||
201 | // FIXME factor |
||
202 | v = s->coefs_end[bsize] - s->coefs_start; |
||
203 | for (ch = 0; ch < s->avctx->channels; ch++) |
||
204 | nb_coefs[ch] = v; |
||
205 | { |
||
206 | int n4 = s->block_len / 2; |
||
207 | mdct_norm = 1.0 / (float) n4; |
||
208 | if (s->version == 1) |
||
209 | mdct_norm *= sqrt(n4); |
||
210 | } |
||
211 | |||
212 | if (s->avctx->channels == 2) |
||
213 | put_bits(&s->pb, 1, !!s->ms_stereo); |
||
214 | |||
215 | for (ch = 0; ch < s->avctx->channels; ch++) { |
||
216 | // FIXME only set channel_coded when needed, instead of always |
||
217 | s->channel_coded[ch] = 1; |
||
218 | if (s->channel_coded[ch]) |
||
219 | init_exp(s, ch, fixed_exp); |
||
220 | } |
||
221 | |||
222 | for (ch = 0; ch < s->avctx->channels; ch++) { |
||
223 | if (s->channel_coded[ch]) { |
||
224 | WMACoef *coefs1; |
||
225 | float *coefs, *exponents, mult; |
||
226 | int i, n; |
||
227 | |||
228 | coefs1 = s->coefs1[ch]; |
||
229 | exponents = s->exponents[ch]; |
||
230 | mult = pow(10, total_gain * 0.05) / s->max_exponent[ch]; |
||
231 | mult *= mdct_norm; |
||
232 | coefs = src_coefs[ch]; |
||
233 | if (s->use_noise_coding && 0) { |
||
234 | av_assert0(0); // FIXME not implemented |
||
235 | } else { |
||
236 | coefs += s->coefs_start; |
||
237 | n = nb_coefs[ch]; |
||
238 | for (i = 0; i < n; i++) { |
||
239 | double t = *coefs++ / (exponents[i] * mult); |
||
240 | if (t < -32768 || t > 32767) |
||
241 | return -1; |
||
242 | |||
243 | coefs1[i] = lrint(t); |
||
244 | } |
||
245 | } |
||
246 | } |
||
247 | } |
||
248 | |||
249 | v = 0; |
||
250 | for (ch = 0; ch < s->avctx->channels; ch++) { |
||
251 | int a = s->channel_coded[ch]; |
||
252 | put_bits(&s->pb, 1, a); |
||
253 | v |= a; |
||
254 | } |
||
255 | |||
256 | if (!v) |
||
257 | return 1; |
||
258 | |||
259 | for (v = total_gain - 1; v >= 127; v -= 127) |
||
260 | put_bits(&s->pb, 7, 127); |
||
261 | put_bits(&s->pb, 7, v); |
||
262 | |||
263 | coef_nb_bits = ff_wma_total_gain_to_bits(total_gain); |
||
264 | |||
265 | if (s->use_noise_coding) { |
||
266 | for (ch = 0; ch < s->avctx->channels; ch++) { |
||
267 | if (s->channel_coded[ch]) { |
||
268 | int i, n; |
||
269 | n = s->exponent_high_sizes[bsize]; |
||
270 | for (i = 0; i < n; i++) { |
||
271 | put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0); |
||
272 | if (0) |
||
273 | nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; |
||
274 | } |
||
275 | } |
||
276 | } |
||
277 | } |
||
278 | |||
279 | parse_exponents = 1; |
||
280 | if (s->block_len_bits != s->frame_len_bits) |
||
281 | put_bits(&s->pb, 1, parse_exponents); |
||
282 | |||
283 | if (parse_exponents) { |
||
284 | for (ch = 0; ch < s->avctx->channels; ch++) { |
||
285 | if (s->channel_coded[ch]) { |
||
286 | if (s->use_exp_vlc) { |
||
287 | encode_exp_vlc(s, ch, fixed_exp); |
||
288 | } else { |
||
289 | av_assert0(0); // FIXME not implemented |
||
290 | // encode_exp_lsp(s, ch); |
||
291 | } |
||
292 | } |
||
293 | } |
||
294 | } else |
||
295 | av_assert0(0); // FIXME not implemented |
||
296 | |||
297 | for (ch = 0; ch < s->avctx->channels; ch++) { |
||
298 | if (s->channel_coded[ch]) { |
||
299 | int run, tindex; |
||
300 | WMACoef *ptr, *eptr; |
||
301 | tindex = (ch == 1 && s->ms_stereo); |
||
302 | ptr = &s->coefs1[ch][0]; |
||
303 | eptr = ptr + nb_coefs[ch]; |
||
304 | |||
305 | run = 0; |
||
306 | for (; ptr < eptr; ptr++) { |
||
307 | if (*ptr) { |
||
308 | int level = *ptr; |
||
309 | int abs_level = FFABS(level); |
||
310 | int code = 0; |
||
311 | if (abs_level <= s->coef_vlcs[tindex]->max_level) |
||
312 | if (run < s->coef_vlcs[tindex]->levels[abs_level - 1]) |
||
313 | code = run + s->int_table[tindex][abs_level - 1]; |
||
314 | |||
315 | av_assert2(code < s->coef_vlcs[tindex]->n); |
||
316 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], |
||
317 | s->coef_vlcs[tindex]->huffcodes[code]); |
||
318 | |||
319 | if (code == 0) { |
||
320 | if (1 << coef_nb_bits <= abs_level) |
||
321 | return -1; |
||
322 | |||
323 | put_bits(&s->pb, coef_nb_bits, abs_level); |
||
324 | put_bits(&s->pb, s->frame_len_bits, run); |
||
325 | } |
||
326 | // FIXME the sign is flipped somewhere |
||
327 | put_bits(&s->pb, 1, level < 0); |
||
328 | run = 0; |
||
329 | } else |
||
330 | run++; |
||
331 | } |
||
332 | if (run) |
||
333 | put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1], |
||
334 | s->coef_vlcs[tindex]->huffcodes[1]); |
||
335 | } |
||
336 | if (s->version == 1 && s->avctx->channels >= 2) |
||
337 | avpriv_align_put_bits(&s->pb); |
||
338 | } |
||
339 | return 0; |
||
340 | } |
||
341 | |||
342 | static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], |
||
343 | uint8_t *buf, int buf_size, int total_gain) |
||
344 | { |
||
345 | init_put_bits(&s->pb, buf, buf_size); |
||
346 | |||
347 | if (s->use_bit_reservoir) |
||
348 | av_assert0(0); // FIXME not implemented |
||
349 | else if (encode_block(s, src_coefs, total_gain) < 0) |
||
350 | return INT_MAX; |
||
351 | |||
352 | avpriv_align_put_bits(&s->pb); |
||
353 | |||
354 | return put_bits_count(&s->pb) / 8 - s->avctx->block_align; |
||
355 | } |
||
356 | |||
357 | static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt, |
||
358 | const AVFrame *frame, int *got_packet_ptr) |
||
359 | { |
||
360 | WMACodecContext *s = avctx->priv_data; |
||
361 | int i, total_gain, ret, error; |
||
362 | |||
363 | s->block_len_bits = s->frame_len_bits; // required by non variable block len |
||
364 | s->block_len = 1 << s->block_len_bits; |
||
365 | |||
366 | apply_window_and_mdct(avctx, frame); |
||
367 | |||
368 | if (s->ms_stereo) { |
||
369 | float a, b; |
||
370 | int i; |
||
371 | |||
372 | for (i = 0; i < s->block_len; i++) { |
||
373 | a = s->coefs[0][i] * 0.5; |
||
374 | b = s->coefs[1][i] * 0.5; |
||
375 | s->coefs[0][i] = a + b; |
||
376 | s->coefs[1][i] = a - b; |
||
377 | } |
||
378 | } |
||
379 | |||
380 | if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE, 0)) < 0) |
||
381 | return ret; |
||
382 | |||
383 | total_gain = 128; |
||
384 | for (i = 64; i; i >>= 1) { |
||
385 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, |
||
386 | total_gain - i); |
||
387 | if (error <= 0) |
||
388 | total_gain -= i; |
||
389 | } |
||
390 | |||
391 | while(total_gain <= 128 && error > 0) |
||
392 | error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++); |
||
393 | if (error > 0) { |
||
394 | av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n"); |
||
395 | avpkt->size = 0; |
||
396 | return AVERROR(EINVAL); |
||
397 | } |
||
398 | av_assert0((put_bits_count(&s->pb) & 7) == 0); |
||
399 | i= avctx->block_align - (put_bits_count(&s->pb)+7)/8; |
||
400 | av_assert0(i>=0); |
||
401 | while(i--) |
||
402 | put_bits(&s->pb, 8, 'N'); |
||
403 | |||
404 | flush_put_bits(&s->pb); |
||
405 | av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align); |
||
406 | |||
407 | if (frame->pts != AV_NOPTS_VALUE) |
||
408 | avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding); |
||
409 | |||
410 | avpkt->size = avctx->block_align; |
||
411 | *got_packet_ptr = 1; |
||
412 | return 0; |
||
413 | } |
||
414 | |||
415 | #if CONFIG_WMAV1_ENCODER |
||
416 | AVCodec ff_wmav1_encoder = { |
||
417 | .name = "wmav1", |
||
418 | .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"), |
||
419 | .type = AVMEDIA_TYPE_AUDIO, |
||
420 | .id = AV_CODEC_ID_WMAV1, |
||
421 | .priv_data_size = sizeof(WMACodecContext), |
||
422 | .init = encode_init, |
||
423 | .encode2 = encode_superframe, |
||
424 | .close = ff_wma_end, |
||
425 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
||
426 | AV_SAMPLE_FMT_NONE }, |
||
427 | }; |
||
428 | #endif |
||
429 | #if CONFIG_WMAV2_ENCODER |
||
430 | AVCodec ff_wmav2_encoder = { |
||
431 | .name = "wmav2", |
||
432 | .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"), |
||
433 | .type = AVMEDIA_TYPE_AUDIO, |
||
434 | .id = AV_CODEC_ID_WMAV2, |
||
435 | .priv_data_size = sizeof(WMACodecContext), |
||
436 | .init = encode_init, |
||
437 | .encode2 = encode_superframe, |
||
438 | .close = ff_wma_end, |
||
439 | .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP, |
||
440 | AV_SAMPLE_FMT_NONE }, |
||
441 | }; |
||
442 | #endif=>=>>>><>>>=>><>>>=>>>>>>>>>>>>=>><>>>>>>><>>> |