WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/libspeexenc.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* Copyright (C) 2009 Justin Ruggles
		3	* Copyright (c) 2009 Xuggle Incorporated
		4	*
		5	* This file is part of FFmpeg.
		6	*
		7	* FFmpeg is free software; you can redistribute it and/or
		8	* modify it under the terms of the GNU Lesser General Public
		9	* License as published by the Free Software Foundation; either
		10	* version 2.1 of the License, or (at your option) any later version.
		11	*
		12	* FFmpeg is distributed in the hope that it will be useful,
		13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		15	* Lesser General Public License for more details.
		16	*
		17	* You should have received a copy of the GNU Lesser General Public
		18	* License along with FFmpeg; if not, write to the Free Software
		19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		20	*/
		21
		22	/**
		23	* @file
		24	* libspeex Speex audio encoder
		25	*
		26	* Usage Guide
		27	* This explains the values that need to be set prior to initialization in
		28	* order to control various encoding parameters.
		29	*
		30	* Channels
		31	* Speex only supports mono or stereo, so avctx->channels must be set to
		32	* 1 or 2.
		33	*
		34	* Sample Rate / Encoding Mode
		35	* Speex has 3 modes, each of which uses a specific sample rate.
		36	* narrowband : 8 kHz
		37	* wideband : 16 kHz
		38	* ultra-wideband : 32 kHz
		39	* avctx->sample_rate must be set to one of these 3 values. This will be
		40	* used to set the encoding mode.
		41	*
		42	* Rate Control
		43	* VBR mode is turned on by setting CODEC_FLAG_QSCALE in avctx->flags.
		44	* avctx->global_quality is used to set the encoding quality.
		45	* For CBR mode, avctx->bit_rate can be used to set the constant bitrate.
		46	* Alternatively, the 'cbr_quality' option can be set from 0 to 10 to set
		47	* a constant bitrate based on quality.
		48	* For ABR mode, set avctx->bit_rate and set the 'abr' option to 1.
		49	* Approx. Bitrate Range:
		50	* narrowband : 2400 - 25600 bps
		51	* wideband : 4000 - 43200 bps
		52	* ultra-wideband : 4400 - 45200 bps
		53	*
		54	* Complexity
		55	* Encoding complexity is controlled by setting avctx->compression_level.
		56	* The valid range is 0 to 10. A higher setting gives generally better
		57	* quality at the expense of encoding speed. This does not affect the
		58	* bit rate.
		59	*
		60	* Frames-per-Packet
		61	* The encoder defaults to using 1 frame-per-packet. However, it is
		62	* sometimes desirable to use multiple frames-per-packet to reduce the
		63	* amount of container overhead. This can be done by setting the
		64	* 'frames_per_packet' option to a value 1 to 8.
		65	*
		66	*
		67	* Optional features
		68	* Speex encoder supports several optional features, which can be useful
		69	* for some conditions.
		70	*
		71	* Voice Activity Detection
		72	* When enabled, voice activity detection detects whether the audio
		73	* being encoded is speech or silence/background noise. VAD is always
		74	* implicitly activated when encoding in VBR, so the option is only useful
		75	* in non-VBR operation. In this case, Speex detects non-speech periods and
		76	* encodes them with just enough bits to reproduce the background noise.
		77	*
		78	* Discontinuous Transmission (DTX)
		79	* DTX is an addition to VAD/VBR operation, that allows to stop transmitting
		80	* completely when the background noise is stationary.
		81	* In file-based operation only 5 bits are used for such frames.
		82	*/
		83
		84	#include
		85	#include
		86	#include
		87
		88	#include "libavutil/channel_layout.h"
		89	#include "libavutil/common.h"
		90	#include "libavutil/opt.h"
		91	#include "avcodec.h"
		92	#include "internal.h"
		93	#include "audio_frame_queue.h"
		94
		95	/* TODO: Think about converting abr, vad, dtx and such flags to a bit field */
		96	typedef struct {
		97	AVClass *class; ///< AVClass for private options
		98	SpeexBits bits; ///< libspeex bitwriter context
		99	SpeexHeader header; ///< libspeex header struct
		100	void *enc_state; ///< libspeex encoder state
		101	int frames_per_packet; ///< number of frames to encode in each packet
		102	float vbr_quality; ///< VBR quality 0.0 to 10.0
		103	int cbr_quality; ///< CBR quality 0 to 10
		104	int abr; ///< flag to enable ABR
		105	int vad; ///< flag to enable VAD
		106	int dtx; ///< flag to enable DTX
		107	int pkt_frame_count; ///< frame count for the current packet
		108	AudioFrameQueue afq; ///< frame queue
		109	} LibSpeexEncContext;
		110
		111	static av_cold void print_enc_params(AVCodecContext *avctx,
		112	LibSpeexEncContext *s)
		113	{
		114	const char *mode_str = "unknown";
		115
		116	av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
		117	switch (s->header.mode) {
		118	case SPEEX_MODEID_NB: mode_str = "narrowband"; break;
		119	case SPEEX_MODEID_WB: mode_str = "wideband"; break;
		120	case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
		121	}
		122	av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
		123	if (s->header.vbr) {
		124	av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
		125	av_log(avctx, AV_LOG_DEBUG, " quality: %f\n", s->vbr_quality);
		126	} else if (s->abr) {
		127	av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
		128	av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
		129	} else {
		130	av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
		131	av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
		132	}
		133	av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
		134	avctx->compression_level);
		135	av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
		136	avctx->frame_size);
		137	av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
		138	s->frames_per_packet);
		139	av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
		140	avctx->frame_size * s->frames_per_packet);
		141	av_log(avctx, AV_LOG_DEBUG, "voice activity detection: %d\n", s->vad);
		142	av_log(avctx, AV_LOG_DEBUG, "discontinuous transmission: %d\n", s->dtx);
		143	}
		144
		145	static av_cold int encode_init(AVCodecContext *avctx)
		146	{
		147	LibSpeexEncContext *s = avctx->priv_data;
		148	const SpeexMode *mode;
		149	uint8_t *header_data;
		150	int header_size;
		151	int32_t complexity;
		152
		153	/* channels */
		154	if (avctx->channels < 1 \|\| avctx->channels > 2) {
		155	av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
		156	"mono are supported\n", avctx->channels);
		157	return AVERROR(EINVAL);
		158	}
		159
		160	/* sample rate and encoding mode */
		161	switch (avctx->sample_rate) {
		162	case 8000: mode = &speex_nb_mode; break;
		163	case 16000: mode = &speex_wb_mode; break;
		164	case 32000: mode = &speex_uwb_mode; break;
		165	default:
		166	av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
		167	"Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
		168	return AVERROR(EINVAL);
		169	}
		170
		171	/* initialize libspeex */
		172	s->enc_state = speex_encoder_init(mode);
		173	if (!s->enc_state) {
		174	av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
		175	return -1;
		176	}
		177	speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
		178
		179	/* rate control method and parameters */
		180	if (avctx->flags & CODEC_FLAG_QSCALE) {
		181	/* VBR */
		182	s->header.vbr = 1;
		183	s->vad = 1; /* VAD is always implicitly activated for VBR */
		184	speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
		185	s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
		186	0.0f, 10.0f);
		187	speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
		188	} else {
		189	s->header.bitrate = avctx->bit_rate;
		190	if (avctx->bit_rate > 0) {
		191	/* CBR or ABR by bitrate */
		192	if (s->abr) {
		193	speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
		194	&s->header.bitrate);
		195	speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
		196	&s->header.bitrate);
		197	} else {
		198	speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
		199	&s->header.bitrate);
		200	speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
		201	&s->header.bitrate);
		202	}
		203	} else {
		204	/* CBR by quality */
		205	speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
		206	&s->cbr_quality);
		207	speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
		208	&s->header.bitrate);
		209	}
		210	/* stereo side information adds about 800 bps to the base bitrate */
		211	/* TODO: this should be calculated exactly */
		212	avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
		213	}
		214
		215	/* VAD is activated with VBR or can be turned on by itself */
		216	if (s->vad)
		217	speex_encoder_ctl(s->enc_state, SPEEX_SET_VAD, &s->vad);
		218
		219	/* Activiting Discontinuous Transmission */
		220	if (s->dtx) {
		221	speex_encoder_ctl(s->enc_state, SPEEX_SET_DTX, &s->dtx);
		222	if (!(s->abr \|\| s->vad \|\| s->header.vbr))
		223	av_log(avctx, AV_LOG_WARNING, "DTX is not much of use without ABR, VAD or VBR\n");
		224	}
		225
		226	/* set encoding complexity */
		227	if (avctx->compression_level > FF_COMPRESSION_DEFAULT) {
		228	complexity = av_clip(avctx->compression_level, 0, 10);
		229	speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
		230	}
		231	speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
		232	avctx->compression_level = complexity;
		233
		234	/* set packet size */
		235	avctx->frame_size = s->header.frame_size;
		236	s->header.frames_per_packet = s->frames_per_packet;
		237
		238	/* set encoding delay */
		239	speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
		240	ff_af_queue_init(avctx, &s->afq);
		241
		242	/* create header packet bytes from header struct */
		243	/* note: libspeex allocates the memory for header_data, which is freed
		244	below with speex_header_free() */
		245	header_data = speex_header_to_packet(&s->header, &header_size);
		246
		247	/* allocate extradata and coded_frame */
		248	avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
		249	if (!avctx->extradata) {
		250	speex_header_free(header_data);
		251	speex_encoder_destroy(s->enc_state);
		252	av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
		253	return AVERROR(ENOMEM);
		254	}
		255
		256	/* copy header packet to extradata */
		257	memcpy(avctx->extradata, header_data, header_size);
		258	avctx->extradata_size = header_size;
		259	speex_header_free(header_data);
		260
		261	/* init libspeex bitwriter */
		262	speex_bits_init(&s->bits);
		263
		264	print_enc_params(avctx, s);
		265	return 0;
		266	}
		267
		268	static int encode_frame(AVCodecContext avctx, AVPacket avpkt,
		269	const AVFrame frame, int got_packet_ptr)
		270	{
		271	LibSpeexEncContext *s = avctx->priv_data;
		272	int16_t samples = frame ? (int16_t )frame->data[0] : NULL;
		273	int ret;
		274
		275	if (samples) {
		276	/* encode Speex frame */
		277	if (avctx->channels == 2)
		278	speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
		279	speex_encode_int(s->enc_state, samples, &s->bits);
		280	s->pkt_frame_count++;
		281	if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
		282	return ret;
		283	} else {
		284	/* handle end-of-stream */
		285	if (!s->pkt_frame_count)
		286	return 0;
		287	/* add extra terminator codes for unused frames in last packet */
		288	while (s->pkt_frame_count < s->frames_per_packet) {
		289	speex_bits_pack(&s->bits, 15, 5);
		290	s->pkt_frame_count++;
		291	}
		292	}
		293
		294	/* write output if all frames for the packet have been encoded */
		295	if (s->pkt_frame_count == s->frames_per_packet) {
		296	s->pkt_frame_count = 0;
		297	if ((ret = ff_alloc_packet2(avctx, avpkt, speex_bits_nbytes(&s->bits))) < 0)
		298	return ret;
		299	ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
		300	speex_bits_reset(&s->bits);
		301
		302	/* Get the next frame pts/duration */
		303	ff_af_queue_remove(&s->afq, s->frames_per_packet * avctx->frame_size,
		304	&avpkt->pts, &avpkt->duration);
		305
		306	avpkt->size = ret;
		307	*got_packet_ptr = 1;
		308	return 0;
		309	}
		310	return 0;
		311	}
		312
		313	static av_cold int encode_close(AVCodecContext *avctx)
		314	{
		315	LibSpeexEncContext *s = avctx->priv_data;
		316
		317	speex_bits_destroy(&s->bits);
		318	speex_encoder_destroy(s->enc_state);
		319
		320	ff_af_queue_close(&s->afq);
		321	av_freep(&avctx->extradata);
		322
		323	return 0;
		324	}
		325
		326	#define OFFSET(x) offsetof(LibSpeexEncContext, x)
		327	#define AE AV_OPT_FLAG_AUDIO_PARAM \| AV_OPT_FLAG_ENCODING_PARAM
		328	static const AVOption options[] = {
		329	{ "abr", "Use average bit rate", OFFSET(abr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
		330	{ "cbr_quality", "Set quality value (0 to 10) for CBR", OFFSET(cbr_quality), AV_OPT_TYPE_INT, { .i64 = 8 }, 0, 10, AE },
		331	{ "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 8, AE },
		332	{ "vad", "Voice Activity Detection", OFFSET(vad), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
		333	{ "dtx", "Discontinuous Transmission", OFFSET(dtx), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
		334	{ NULL },
		335	};
		336
		337	static const AVClass speex_class = {
		338	.class_name = "libspeex",
		339	.item_name = av_default_item_name,
		340	.option = options,
		341	.version = LIBAVUTIL_VERSION_INT,
		342	};
		343
		344	static const AVCodecDefault defaults[] = {
		345	{ "b", "0" },
		346	{ "compression_level", "3" },
		347	{ NULL },
		348	};
		349
		350	AVCodec ff_libspeex_encoder = {
		351	.name = "libspeex",
		352	.long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
		353	.type = AVMEDIA_TYPE_AUDIO,
		354	.id = AV_CODEC_ID_SPEEX,
		355	.priv_data_size = sizeof(LibSpeexEncContext),
		356	.init = encode_init,
		357	.encode2 = encode_frame,
		358	.close = encode_close,
		359	.capabilities = CODEC_CAP_DELAY,
		360	.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
		361	AV_SAMPLE_FMT_NONE },
		362	.channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
		363	AV_CH_LAYOUT_STEREO,
		364
		365	.supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
		366	.priv_class = &speex_class,
		367	.defaults = defaults,
		368	};

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/libspeexenc.c – Rev 4349