WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavfilter/af_atempo.c

Rev	Author	Line No.	Line
6148	serge	1	/*
		2	* Copyright (c) 2012 Pavel Koshevoy
		3	*
		4	* This file is part of FFmpeg.
		5	*
		6	* FFmpeg is free software; you can redistribute it and/or
		7	* modify it under the terms of the GNU Lesser General Public
		8	* License as published by the Free Software Foundation; either
		9	* version 2.1 of the License, or (at your option) any later version.
		10	*
		11	* FFmpeg is distributed in the hope that it will be useful,
		12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		14	* Lesser General Public License for more details.
		15	*
		16	* You should have received a copy of the GNU Lesser General Public
		17	* License along with FFmpeg; if not, write to the Free Software
		18	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		19	*/
		20
		21	/**
		22	* @file
		23	* tempo scaling audio filter -- an implementation of WSOLA algorithm
		24	*
		25	* Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h
		26	* from Apprentice Video player by Pavel Koshevoy.
		27	* https://sourceforge.net/projects/apprenticevideo/
		28	*
		29	* An explanation of SOLA algorithm is available at
		30	* http://www.surina.net/article/time-and-pitch-scaling.html
		31	*
		32	* WSOLA is very similar to SOLA, only one major difference exists between
		33	* these algorithms. SOLA shifts audio fragments along the output stream,
		34	* where as WSOLA shifts audio fragments along the input stream.
		35	*
		36	* The advantage of WSOLA algorithm is that the overlap region size is
		37	* always the same, therefore the blending function is constant and
		38	* can be precomputed.
		39	*/
		40
		41	#include
		42	#include "libavcodec/avfft.h"
		43	#include "libavutil/avassert.h"
		44	#include "libavutil/avstring.h"
		45	#include "libavutil/channel_layout.h"
		46	#include "libavutil/eval.h"
		47	#include "libavutil/opt.h"
		48	#include "libavutil/samplefmt.h"
		49	#include "avfilter.h"
		50	#include "audio.h"
		51	#include "internal.h"
		52
		53	/**
		54	* A fragment of audio waveform
		55	*/
		56	typedef struct {
		57	// index of the first sample of this fragment in the overall waveform;
		58	// 0: input sample position
		59	// 1: output sample position
		60	int64_t position[2];
		61
		62	// original packed multi-channel samples:
		63	uint8_t *data;
		64
		65	// number of samples in this fragment:
		66	int nsamples;
		67
		68	// rDFT transform of the down-mixed mono fragment, used for
		69	// fast waveform alignment via correlation in frequency domain:
		70	FFTSample *xdat;
		71	} AudioFragment;
		72
		73	/**
		74	* Filter state machine states
		75	*/
		76	typedef enum {
		77	YAE_LOAD_FRAGMENT,
		78	YAE_ADJUST_POSITION,
		79	YAE_RELOAD_FRAGMENT,
		80	YAE_OUTPUT_OVERLAP_ADD,
		81	YAE_FLUSH_OUTPUT,
		82	} FilterState;
		83
		84	/**
		85	* Filter state machine
		86	*/
		87	typedef struct {
		88	const AVClass *class;
		89
		90	// ring-buffer of input samples, necessary because some times
		91	// input fragment position may be adjusted backwards:
		92	uint8_t *buffer;
		93
		94	// ring-buffer maximum capacity, expressed in sample rate time base:
		95	int ring;
		96
		97	// ring-buffer house keeping:
		98	int size;
		99	int head;
		100	int tail;
		101
		102	// 0: input sample position corresponding to the ring buffer tail
		103	// 1: output sample position
		104	int64_t position[2];
		105
		106	// sample format:
		107	enum AVSampleFormat format;
		108
		109	// number of channels:
		110	int channels;
		111
		112	// row of bytes to skip from one sample to next, across multple channels;
		113	// stride = (number-of-channels * bits-per-sample-per-channel) / 8
		114	int stride;
		115
		116	// fragment window size, power-of-two integer:
		117	int window;
		118
		119	// Hann window coefficients, for feathering
		120	// (blending) the overlapping fragment region:
		121	float *hann;
		122
		123	// tempo scaling factor:
		124	double tempo;
		125
		126	// a snapshot of previous fragment input and output position values
		127	// captured when the tempo scale factor was set most recently:
		128	int64_t origin[2];
		129
		130	// current/previous fragment ring-buffer:
		131	AudioFragment frag[2];
		132
		133	// current fragment index:
		134	uint64_t nfrag;
		135
		136	// current state:
		137	FilterState state;
		138
		139	// for fast correlation calculation in frequency domain:
		140	RDFTContext *real_to_complex;
		141	RDFTContext *complex_to_real;
		142	FFTSample *correlation;
		143
		144	// for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
		145	AVFrame *dst_buffer;
		146	uint8_t *dst;
		147	uint8_t *dst_end;
		148	uint64_t nsamples_in;
		149	uint64_t nsamples_out;
		150	} ATempoContext;
		151
		152	#define OFFSET(x) offsetof(ATempoContext, x)
		153
		154	static const AVOption atempo_options[] = {
		155	{ "tempo", "set tempo scale factor",
		156	OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.5, 2.0,
		157	AV_OPT_FLAG_AUDIO_PARAM \| AV_OPT_FLAG_FILTERING_PARAM },
		158	{ NULL }
		159	};
		160
		161	AVFILTER_DEFINE_CLASS(atempo);
		162
		163	inline static AudioFragment yae_curr_frag(ATempoContext atempo)
		164	{
		165	return &atempo->frag[atempo->nfrag % 2];
		166	}
		167
		168	inline static AudioFragment yae_prev_frag(ATempoContext atempo)
		169	{
		170	return &atempo->frag[(atempo->nfrag + 1) % 2];
		171	}
		172
		173	/**
		174	* Reset filter to initial state, do not deallocate existing local buffers.
		175	*/
		176	static void yae_clear(ATempoContext *atempo)
		177	{
		178	atempo->size = 0;
		179	atempo->head = 0;
		180	atempo->tail = 0;
		181
		182	atempo->nfrag = 0;
		183	atempo->state = YAE_LOAD_FRAGMENT;
		184
		185	atempo->position[0] = 0;
		186	atempo->position[1] = 0;
		187
		188	atempo->origin[0] = 0;
		189	atempo->origin[1] = 0;
		190
		191	atempo->frag[0].position[0] = 0;
		192	atempo->frag[0].position[1] = 0;
		193	atempo->frag[0].nsamples = 0;
		194
		195	atempo->frag[1].position[0] = 0;
		196	atempo->frag[1].position[1] = 0;
		197	atempo->frag[1].nsamples = 0;
		198
		199	// shift left position of 1st fragment by half a window
		200	// so that no re-normalization would be required for
		201	// the left half of the 1st fragment:
		202	atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2);
		203	atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2);
		204
		205	av_frame_free(&atempo->dst_buffer);
		206	atempo->dst = NULL;
		207	atempo->dst_end = NULL;
		208
		209	atempo->nsamples_in = 0;
		210	atempo->nsamples_out = 0;
		211	}
		212
		213	/**
		214	* Reset filter to initial state and deallocate all buffers.
		215	*/
		216	static void yae_release_buffers(ATempoContext *atempo)
		217	{
		218	yae_clear(atempo);
		219
		220	av_freep(&atempo->frag[0].data);
		221	av_freep(&atempo->frag[1].data);
		222	av_freep(&atempo->frag[0].xdat);
		223	av_freep(&atempo->frag[1].xdat);
		224
		225	av_freep(&atempo->buffer);
		226	av_freep(&atempo->hann);
		227	av_freep(&atempo->correlation);
		228
		229	av_rdft_end(atempo->real_to_complex);
		230	atempo->real_to_complex = NULL;
		231
		232	av_rdft_end(atempo->complex_to_real);
		233	atempo->complex_to_real = NULL;
		234	}
		235
		236	/* av_realloc is not aligned enough; fortunately, the data does not need to
		237	* be preserved */
		238	#define RE_MALLOC_OR_FAIL(field, field_size) \
		239	do { \
		240	av_freep(&field); \
		241	field = av_malloc(field_size); \
		242	if (!field) { \
		243	yae_release_buffers(atempo); \
		244	return AVERROR(ENOMEM); \
		245	} \
		246	} while (0)
		247
		248	/**
		249	* Prepare filter for processing audio data of given format,
		250	* sample rate and number of channels.
		251	*/
		252	static int yae_reset(ATempoContext *atempo,
		253	enum AVSampleFormat format,
		254	int sample_rate,
		255	int channels)
		256	{
		257	const int sample_size = av_get_bytes_per_sample(format);
		258	uint32_t nlevels = 0;
		259	uint32_t pot;
		260	int i;
		261
		262	atempo->format = format;
		263	atempo->channels = channels;
		264	atempo->stride = sample_size * channels;
		265
		266	// pick a segment window size:
		267	atempo->window = sample_rate / 24;
		268
		269	// adjust window size to be a power-of-two integer:
		270	nlevels = av_log2(atempo->window);
		271	pot = 1 << nlevels;
		272	av_assert0(pot <= atempo->window);
		273
		274	if (pot < atempo->window) {
		275	atempo->window = pot * 2;
		276	nlevels++;
		277	}
		278
		279	// initialize audio fragment buffers:
		280	RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
		281	RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
		282	RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex));
		283	RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex));
		284
		285	// initialize rDFT contexts:
		286	av_rdft_end(atempo->real_to_complex);
		287	atempo->real_to_complex = NULL;
		288
		289	av_rdft_end(atempo->complex_to_real);
		290	atempo->complex_to_real = NULL;
		291
		292	atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
		293	if (!atempo->real_to_complex) {
		294	yae_release_buffers(atempo);
		295	return AVERROR(ENOMEM);
		296	}
		297
		298	atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
		299	if (!atempo->complex_to_real) {
		300	yae_release_buffers(atempo);
		301	return AVERROR(ENOMEM);
		302	}
		303
		304	RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex));
		305
		306	atempo->ring = atempo->window * 3;
		307	RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
		308
		309	// initialize the Hann window function:
		310	RE_MALLOC_OR_FAIL(atempo->hann, atempo->window * sizeof(float));
		311
		312	for (i = 0; i < atempo->window; i++) {
		313	double t = (double)i / (double)(atempo->window - 1);
		314	double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
		315	atempo->hann[i] = (float)h;
		316	}
		317
		318	yae_clear(atempo);
		319	return 0;
		320	}
		321
		322	static int yae_set_tempo(AVFilterContext ctx, const char arg_tempo)
		323	{
		324	const AudioFragment *prev;
		325	ATempoContext *atempo = ctx->priv;
		326	char *tail = NULL;
		327	double tempo = av_strtod(arg_tempo, &tail);
		328
		329	if (tail && *tail) {
		330	av_log(ctx, AV_LOG_ERROR, "Invalid tempo value '%s'\n", arg_tempo);
		331	return AVERROR(EINVAL);
		332	}
		333
		334	if (tempo < 0.5 \|\| tempo > 2.0) {
		335	av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [0.5, 2.0] range\n",
		336	tempo);
		337	return AVERROR(EINVAL);
		338	}
		339
		340	prev = yae_prev_frag(atempo);
		341	atempo->origin[0] = prev->position[0] + atempo->window / 2;
		342	atempo->origin[1] = prev->position[1] + atempo->window / 2;
		343	atempo->tempo = tempo;
		344	return 0;
		345	}
		346
		347	/**
		348	* A helper macro for initializing complex data buffer with scalar data
		349	* of a given type.
		350	*/
		351	#define yae_init_xdat(scalar_type, scalar_max) \
		352	do { \
		353	const uint8_t *src_end = src + \
		354	frag->nsamples * atempo->channels * sizeof(scalar_type); \
		355	\
		356	FFTSample *xdat = frag->xdat; \
		357	scalar_type tmp; \
		358	\
		359	if (atempo->channels == 1) { \
		360	for (; src < src_end; xdat++) { \
		361	tmp = (const scalar_type )src; \
		362	src += sizeof(scalar_type); \
		363	\
		364	*xdat = (FFTSample)tmp; \
		365	} \
		366	} else { \
		367	FFTSample s, max, ti, si; \
		368	int i; \
		369	\
		370	for (; src < src_end; xdat++) { \
		371	tmp = (const scalar_type )src; \
		372	src += sizeof(scalar_type); \
		373	\
		374	max = (FFTSample)tmp; \
		375	s = FFMIN((FFTSample)scalar_max, \
		376	(FFTSample)fabsf(max)); \
		377	\
		378	for (i = 1; i < atempo->channels; i++) { \
		379	tmp = (const scalar_type )src; \
		380	src += sizeof(scalar_type); \
		381	\
		382	ti = (FFTSample)tmp; \
		383	si = FFMIN((FFTSample)scalar_max, \
		384	(FFTSample)fabsf(ti)); \
		385	\
		386	if (s < si) { \
		387	s = si; \
		388	max = ti; \
		389	} \
		390	} \
		391	\
		392	*xdat = max; \
		393	} \
		394	} \
		395	} while (0)
		396
		397	/**
		398	* Initialize complex data buffer of a given audio fragment
		399	* with down-mixed mono data of appropriate scalar type.
		400	*/
		401	static void yae_downmix(ATempoContext atempo, AudioFragment frag)
		402	{
		403	// shortcuts:
		404	const uint8_t *src = frag->data;
		405
		406	// init complex data buffer used for FFT and Correlation:
		407	memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
		408
		409	if (atempo->format == AV_SAMPLE_FMT_U8) {
		410	yae_init_xdat(uint8_t, 127);
		411	} else if (atempo->format == AV_SAMPLE_FMT_S16) {
		412	yae_init_xdat(int16_t, 32767);
		413	} else if (atempo->format == AV_SAMPLE_FMT_S32) {
		414	yae_init_xdat(int, 2147483647);
		415	} else if (atempo->format == AV_SAMPLE_FMT_FLT) {
		416	yae_init_xdat(float, 1);
		417	} else if (atempo->format == AV_SAMPLE_FMT_DBL) {
		418	yae_init_xdat(double, 1);
		419	}
		420	}
		421
		422	/**
		423	* Populate the internal data buffer on as-needed basis.
		424	*
		425	* @return
		426	* 0 if requested data was already available or was successfully loaded,
		427	* AVERROR(EAGAIN) if more input data is required.
		428	*/
		429	static int yae_load_data(ATempoContext *atempo,
		430	const uint8_t **src_ref,
		431	const uint8_t *src_end,
		432	int64_t stop_here)
		433	{
		434	// shortcut:
		435	const uint8_t src = src_ref;
		436	const int read_size = stop_here - atempo->position[0];
		437
		438	if (stop_here <= atempo->position[0]) {
		439	return 0;
		440	}
		441
		442	// samples are not expected to be skipped:
		443	av_assert0(read_size <= atempo->ring);
		444
		445	while (atempo->position[0] < stop_here && src < src_end) {
		446	int src_samples = (src_end - src) / atempo->stride;
		447
		448	// load data piece-wise, in order to avoid complicating the logic:
		449	int nsamples = FFMIN(read_size, src_samples);
		450	int na;
		451	int nb;
		452
		453	nsamples = FFMIN(nsamples, atempo->ring);
		454	na = FFMIN(nsamples, atempo->ring - atempo->tail);
		455	nb = FFMIN(nsamples - na, atempo->ring);
		456
		457	if (na) {
		458	uint8_t a = atempo->buffer + atempo->tail atempo->stride;
		459	memcpy(a, src, na * atempo->stride);
		460
		461	src += na * atempo->stride;
		462	atempo->position[0] += na;
		463
		464	atempo->size = FFMIN(atempo->size + na, atempo->ring);
		465	atempo->tail = (atempo->tail + na) % atempo->ring;
		466	atempo->head =
		467	atempo->size < atempo->ring ?
		468	atempo->tail - atempo->size :
		469	atempo->tail;
		470	}
		471
		472	if (nb) {
		473	uint8_t *b = atempo->buffer;
		474	memcpy(b, src, nb * atempo->stride);
		475
		476	src += nb * atempo->stride;
		477	atempo->position[0] += nb;
		478
		479	atempo->size = FFMIN(atempo->size + nb, atempo->ring);
		480	atempo->tail = (atempo->tail + nb) % atempo->ring;
		481	atempo->head =
		482	atempo->size < atempo->ring ?
		483	atempo->tail - atempo->size :
		484	atempo->tail;
		485	}
		486	}
		487
		488	// pass back the updated source buffer pointer:
		489	*src_ref = src;
		490
		491	// sanity check:
		492	av_assert0(atempo->position[0] <= stop_here);
		493
		494	return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN);
		495	}
		496
		497	/**
		498	* Populate current audio fragment data buffer.
		499	*
		500	* @return
		501	* 0 when the fragment is ready,
		502	* AVERROR(EAGAIN) if more input data is required.
		503	*/
		504	static int yae_load_frag(ATempoContext *atempo,
		505	const uint8_t **src_ref,
		506	const uint8_t *src_end)
		507	{
		508	// shortcuts:
		509	AudioFragment *frag = yae_curr_frag(atempo);
		510	uint8_t *dst;
		511	int64_t missing, start, zeros;
		512	uint32_t nsamples;
		513	const uint8_t a, b;
		514	int i0, i1, n0, n1, na, nb;
		515
		516	int64_t stop_here = frag->position[0] + atempo->window;
		517	if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) {
		518	return AVERROR(EAGAIN);
		519	}
		520
		521	// calculate the number of samples we don't have:
		522	missing =
		523	stop_here > atempo->position[0] ?
		524	stop_here - atempo->position[0] : 0;
		525
		526	nsamples =
		527	missing < (int64_t)atempo->window ?
		528	(uint32_t)(atempo->window - missing) : 0;
		529
		530	// setup the output buffer:
		531	frag->nsamples = nsamples;
		532	dst = frag->data;
		533
		534	start = atempo->position[0] - atempo->size;
		535	zeros = 0;
		536
		537	if (frag->position[0] < start) {
		538	// what we don't have we substitute with zeros:
		539	zeros = FFMIN(start - frag->position[0], (int64_t)nsamples);
		540	av_assert0(zeros != nsamples);
		541
		542	memset(dst, 0, zeros * atempo->stride);
		543	dst += zeros * atempo->stride;
		544	}
		545
		546	if (zeros == nsamples) {
		547	return 0;
		548	}
		549
		550	// get the remaining data from the ring buffer:
		551	na = (atempo->head < atempo->tail ?
		552	atempo->tail - atempo->head :
		553	atempo->ring - atempo->head);
		554
		555	nb = atempo->head < atempo->tail ? 0 : atempo->tail;
		556
		557	// sanity check:
		558	av_assert0(nsamples <= zeros + na + nb);
		559
		560	a = atempo->buffer + atempo->head * atempo->stride;
		561	b = atempo->buffer;
		562
		563	i0 = frag->position[0] + zeros - start;
		564	i1 = i0 < na ? 0 : i0 - na;
		565
		566	n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0;
		567	n1 = nsamples - zeros - n0;
		568
		569	if (n0) {
		570	memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride);
		571	dst += n0 * atempo->stride;
		572	}
		573
		574	if (n1) {
		575	memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride);
		576	}
		577
		578	return 0;
		579	}
		580
		581	/**
		582	* Prepare for loading next audio fragment.
		583	*/
		584	static void yae_advance_to_next_frag(ATempoContext *atempo)
		585	{
		586	const double fragment_step = atempo->tempo * (double)(atempo->window / 2);
		587
		588	const AudioFragment *prev;
		589	AudioFragment *frag;
		590
		591	atempo->nfrag++;
		592	prev = yae_prev_frag(atempo);
		593	frag = yae_curr_frag(atempo);
		594
		595	frag->position[0] = prev->position[0] + (int64_t)fragment_step;
		596	frag->position[1] = prev->position[1] + atempo->window / 2;
		597	frag->nsamples = 0;
		598	}
		599
		600	/**
		601	* Calculate cross-correlation via rDFT.
		602	*
		603	* Multiply two vectors of complex numbers (result of real_to_complex rDFT)
		604	* and transform back via complex_to_real rDFT.
		605	*/
		606	static void yae_xcorr_via_rdft(FFTSample *xcorr,
		607	RDFTContext *complex_to_real,
		608	const FFTComplex *xa,
		609	const FFTComplex *xb,
		610	const int window)
		611	{
		612	FFTComplex xc = (FFTComplex )xcorr;
		613	int i;
		614
		615	// NOTE: first element requires special care -- Given Y = rDFT(X),
		616	// Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
		617	// stores Re(Y[N/2]) in place of Im(Y[0]).
		618
		619	xc->re = xa->re * xb->re;
		620	xc->im = xa->im * xb->im;
		621	xa++;
		622	xb++;
		623	xc++;
		624
		625	for (i = 1; i < window; i++, xa++, xb++, xc++) {
		626	xc->re = (xa->re * xb->re + xa->im * xb->im);
		627	xc->im = (xa->im * xb->re - xa->re * xb->im);
		628	}
		629
		630	// apply inverse rDFT:
		631	av_rdft_calc(complex_to_real, xcorr);
		632	}
		633
		634	/**
		635	* Calculate alignment offset for given fragment
		636	* relative to the previous fragment.
		637	*
		638	* @return alignment offset of current fragment relative to previous.
		639	*/
		640	static int yae_align(AudioFragment *frag,
		641	const AudioFragment *prev,
		642	const int window,
		643	const int delta_max,
		644	const int drift,
		645	FFTSample *correlation,
		646	RDFTContext *complex_to_real)
		647	{
		648	int best_offset = -drift;
		649	FFTSample best_metric = -FLT_MAX;
		650	FFTSample *xcorr;
		651
		652	int i0;
		653	int i1;
		654	int i;
		655
		656	yae_xcorr_via_rdft(correlation,
		657	complex_to_real,
		658	(const FFTComplex *)prev->xdat,
		659	(const FFTComplex *)frag->xdat,
		660	window);
		661
		662	// identify search window boundaries:
		663	i0 = FFMAX(window / 2 - delta_max - drift, 0);
		664	i0 = FFMIN(i0, window);
		665
		666	i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16);
		667	i1 = FFMAX(i1, 0);
		668
		669	// identify cross-correlation peaks within search window:
		670	xcorr = correlation + i0;
		671
		672	for (i = i0; i < i1; i++, xcorr++) {
		673	FFTSample metric = *xcorr;
		674
		675	// normalize:
		676	FFTSample drifti = (FFTSample)(drift + i);
		677	metric = drifti (FFTSample)(i - i0) * (FFTSample)(i1 - i);
		678
		679	if (metric > best_metric) {
		680	best_metric = metric;
		681	best_offset = i - window / 2;
		682	}
		683	}
		684
		685	return best_offset;
		686	}
		687
		688	/**
		689	* Adjust current fragment position for better alignment
		690	* with previous fragment.
		691	*
		692	* @return alignment correction.
		693	*/
		694	static int yae_adjust_position(ATempoContext *atempo)
		695	{
		696	const AudioFragment *prev = yae_prev_frag(atempo);
		697	AudioFragment *frag = yae_curr_frag(atempo);
		698
		699	const double prev_output_position =
		700	(double)(prev->position[1] - atempo->origin[1] + atempo->window / 2);
		701
		702	const double ideal_output_position =
		703	(double)(prev->position[0] - atempo->origin[0] + atempo->window / 2) /
		704	atempo->tempo;
		705
		706	const int drift = (int)(prev_output_position - ideal_output_position);
		707
		708	const int delta_max = atempo->window / 2;
		709	const int correction = yae_align(frag,
		710	prev,
		711	atempo->window,
		712	delta_max,
		713	drift,
		714	atempo->correlation,
		715	atempo->complex_to_real);
		716
		717	if (correction) {
		718	// adjust fragment position:
		719	frag->position[0] -= correction;
		720
		721	// clear so that the fragment can be reloaded:
		722	frag->nsamples = 0;
		723	}
		724
		725	return correction;
		726	}
		727
		728	/**
		729	* A helper macro for blending the overlap region of previous
		730	* and current audio fragment.
		731	*/
		732	#define yae_blend(scalar_type) \
		733	do { \
		734	const scalar_type aaa = (const scalar_type )a; \
		735	const scalar_type bbb = (const scalar_type )b; \
		736	\
		737	scalar_type out = (scalar_type )dst; \
		738	scalar_type out_end = (scalar_type )dst_end; \
		739	int64_t i; \
		740	\
		741	for (i = 0; i < overlap && out < out_end; \
		742	i++, atempo->position[1]++, wa++, wb++) { \
		743	float w0 = *wa; \
		744	float w1 = *wb; \
		745	int j; \
		746	\
		747	for (j = 0; j < atempo->channels; \
		748	j++, aaa++, bbb++, out++) { \
		749	float t0 = (float)*aaa; \
		750	float t1 = (float)*bbb; \
		751	\
		752	*out = \
		753	frag->position[0] + i < 0 ? \
		754	*aaa : \
		755	(scalar_type)(t0 * w0 + t1 * w1); \
		756	} \
		757	} \
		758	dst = (uint8_t *)out; \
		759	} while (0)
		760
		761	/**
		762	* Blend the overlap region of previous and current audio fragment
		763	* and output the results to the given destination buffer.
		764	*
		765	* @return
		766	* 0 if the overlap region was completely stored in the dst buffer,
		767	* AVERROR(EAGAIN) if more destination buffer space is required.
		768	*/
		769	static int yae_overlap_add(ATempoContext *atempo,
		770	uint8_t **dst_ref,
		771	uint8_t *dst_end)
		772	{
		773	// shortcuts:
		774	const AudioFragment *prev = yae_prev_frag(atempo);
		775	const AudioFragment *frag = yae_curr_frag(atempo);
		776
		777	const int64_t start_here = FFMAX(atempo->position[1],
		778	frag->position[1]);
		779
		780	const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
		781	frag->position[1] + frag->nsamples);
		782
		783	const int64_t overlap = stop_here - start_here;
		784
		785	const int64_t ia = start_here - prev->position[1];
		786	const int64_t ib = start_here - frag->position[1];
		787
		788	const float *wa = atempo->hann + ia;
		789	const float *wb = atempo->hann + ib;
		790
		791	const uint8_t a = prev->data + ia atempo->stride;
		792	const uint8_t b = frag->data + ib atempo->stride;
		793
		794	uint8_t dst = dst_ref;
		795
		796	av_assert0(start_here <= stop_here &&
		797	frag->position[1] <= start_here &&
		798	overlap <= frag->nsamples);
		799
		800	if (atempo->format == AV_SAMPLE_FMT_U8) {
		801	yae_blend(uint8_t);
		802	} else if (atempo->format == AV_SAMPLE_FMT_S16) {
		803	yae_blend(int16_t);
		804	} else if (atempo->format == AV_SAMPLE_FMT_S32) {
		805	yae_blend(int);
		806	} else if (atempo->format == AV_SAMPLE_FMT_FLT) {
		807	yae_blend(float);
		808	} else if (atempo->format == AV_SAMPLE_FMT_DBL) {
		809	yae_blend(double);
		810	}
		811
		812	// pass-back the updated destination buffer pointer:
		813	*dst_ref = dst;
		814
		815	return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
		816	}
		817
		818	/**
		819	* Feed as much data to the filter as it is able to consume
		820	* and receive as much processed data in the destination buffer
		821	* as it is able to produce or store.
		822	*/
		823	static void
		824	yae_apply(ATempoContext *atempo,
		825	const uint8_t **src_ref,
		826	const uint8_t *src_end,
		827	uint8_t **dst_ref,
		828	uint8_t *dst_end)
		829	{
		830	while (1) {
		831	if (atempo->state == YAE_LOAD_FRAGMENT) {
		832	// load additional data for the current fragment:
		833	if (yae_load_frag(atempo, src_ref, src_end) != 0) {
		834	break;
		835	}
		836
		837	// down-mix to mono:
		838	yae_downmix(atempo, yae_curr_frag(atempo));
		839
		840	// apply rDFT:
		841	av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
		842
		843	// must load the second fragment before alignment can start:
		844	if (!atempo->nfrag) {
		845	yae_advance_to_next_frag(atempo);
		846	continue;
		847	}
		848
		849	atempo->state = YAE_ADJUST_POSITION;
		850	}
		851
		852	if (atempo->state == YAE_ADJUST_POSITION) {
		853	// adjust position for better alignment:
		854	if (yae_adjust_position(atempo)) {
		855	// reload the fragment at the corrected position, so that the
		856	// Hann window blending would not require normalization:
		857	atempo->state = YAE_RELOAD_FRAGMENT;
		858	} else {
		859	atempo->state = YAE_OUTPUT_OVERLAP_ADD;
		860	}
		861	}
		862
		863	if (atempo->state == YAE_RELOAD_FRAGMENT) {
		864	// load additional data if necessary due to position adjustment:
		865	if (yae_load_frag(atempo, src_ref, src_end) != 0) {
		866	break;
		867	}
		868
		869	// down-mix to mono:
		870	yae_downmix(atempo, yae_curr_frag(atempo));
		871
		872	// apply rDFT:
		873	av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
		874
		875	atempo->state = YAE_OUTPUT_OVERLAP_ADD;
		876	}
		877
		878	if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
		879	// overlap-add and output the result:
		880	if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
		881	break;
		882	}
		883
		884	// advance to the next fragment, repeat:
		885	yae_advance_to_next_frag(atempo);
		886	atempo->state = YAE_LOAD_FRAGMENT;
		887	}
		888	}
		889	}
		890
		891	/**
		892	* Flush any buffered data from the filter.
		893	*
		894	* @return
		895	* 0 if all data was completely stored in the dst buffer,
		896	* AVERROR(EAGAIN) if more destination buffer space is required.
		897	*/
		898	static int yae_flush(ATempoContext *atempo,
		899	uint8_t **dst_ref,
		900	uint8_t *dst_end)
		901	{
		902	AudioFragment *frag = yae_curr_frag(atempo);
		903	int64_t overlap_end;
		904	int64_t start_here;
		905	int64_t stop_here;
		906	int64_t offset;
		907
		908	const uint8_t *src;
		909	uint8_t *dst;
		910
		911	int src_size;
		912	int dst_size;
		913	int nbytes;
		914
		915	atempo->state = YAE_FLUSH_OUTPUT;
		916
		917	if (atempo->position[0] == frag->position[0] + frag->nsamples &&
		918	atempo->position[1] == frag->position[1] + frag->nsamples) {
		919	// the current fragment is already flushed:
		920	return 0;
		921	}
		922
		923	if (frag->position[0] + frag->nsamples < atempo->position[0]) {
		924	// finish loading the current (possibly partial) fragment:
		925	yae_load_frag(atempo, NULL, NULL);
		926
		927	if (atempo->nfrag) {
		928	// down-mix to mono:
		929	yae_downmix(atempo, frag);
		930
		931	// apply rDFT:
		932	av_rdft_calc(atempo->real_to_complex, frag->xdat);
		933
		934	// align current fragment to previous fragment:
		935	if (yae_adjust_position(atempo)) {
		936	// reload the current fragment due to adjusted position:
		937	yae_load_frag(atempo, NULL, NULL);
		938	}
		939	}
		940	}
		941
		942	// flush the overlap region:
		943	overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
		944	frag->nsamples);
		945
		946	while (atempo->position[1] < overlap_end) {
		947	if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
		948	return AVERROR(EAGAIN);
		949	}
		950	}
		951
		952	// flush the remaininder of the current fragment:
		953	start_here = FFMAX(atempo->position[1], overlap_end);
		954	stop_here = frag->position[1] + frag->nsamples;
		955	offset = start_here - frag->position[1];
		956	av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
		957
		958	src = frag->data + offset * atempo->stride;
		959	dst = (uint8_t )dst_ref;
		960
		961	src_size = (int)(stop_here - start_here) * atempo->stride;
		962	dst_size = dst_end - dst;
		963	nbytes = FFMIN(src_size, dst_size);
		964
		965	memcpy(dst, src, nbytes);
		966	dst += nbytes;
		967
		968	atempo->position[1] += (nbytes / atempo->stride);
		969
		970	// pass-back the updated destination buffer pointer:
		971	dst_ref = (uint8_t )dst;
		972
		973	return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
		974	}
		975
		976	static av_cold int init(AVFilterContext *ctx)
		977	{
		978	ATempoContext *atempo = ctx->priv;
		979	atempo->format = AV_SAMPLE_FMT_NONE;
		980	atempo->state = YAE_LOAD_FRAGMENT;
		981	return 0;
		982	}
		983
		984	static av_cold void uninit(AVFilterContext *ctx)
		985	{
		986	ATempoContext *atempo = ctx->priv;
		987	yae_release_buffers(atempo);
		988	}
		989
		990	static int query_formats(AVFilterContext *ctx)
		991	{
		992	AVFilterChannelLayouts *layouts = NULL;
		993	AVFilterFormats *formats = NULL;
		994
		995	// WSOLA necessitates an internal sliding window ring buffer
		996	// for incoming audio stream.
		997	//
		998	// Planar sample formats are too cumbersome to store in a ring buffer,
		999	// therefore planar sample formats are not supported.
		1000	//
		1001	static const enum AVSampleFormat sample_fmts[] = {
		1002	AV_SAMPLE_FMT_U8,
		1003	AV_SAMPLE_FMT_S16,
		1004	AV_SAMPLE_FMT_S32,
		1005	AV_SAMPLE_FMT_FLT,
		1006	AV_SAMPLE_FMT_DBL,
		1007	AV_SAMPLE_FMT_NONE
		1008	};
		1009
		1010	layouts = ff_all_channel_layouts();
		1011	if (!layouts) {
		1012	return AVERROR(ENOMEM);
		1013	}
		1014	ff_set_common_channel_layouts(ctx, layouts);
		1015
		1016	formats = ff_make_format_list(sample_fmts);
		1017	if (!formats) {
		1018	return AVERROR(ENOMEM);
		1019	}
		1020	ff_set_common_formats(ctx, formats);
		1021
		1022	formats = ff_all_samplerates();
		1023	if (!formats) {
		1024	return AVERROR(ENOMEM);
		1025	}
		1026	ff_set_common_samplerates(ctx, formats);
		1027
		1028	return 0;
		1029	}
		1030
		1031	static int config_props(AVFilterLink *inlink)
		1032	{
		1033	AVFilterContext *ctx = inlink->dst;
		1034	ATempoContext *atempo = ctx->priv;
		1035
		1036	enum AVSampleFormat format = inlink->format;
		1037	int sample_rate = (int)inlink->sample_rate;
		1038	int channels = av_get_channel_layout_nb_channels(inlink->channel_layout);
		1039
		1040	ctx->outputs[0]->flags \|= FF_LINK_FLAG_REQUEST_LOOP;
		1041
		1042	return yae_reset(atempo, format, sample_rate, channels);
		1043	}
		1044
		1045	static int push_samples(ATempoContext *atempo,
		1046	AVFilterLink *outlink,
		1047	int n_out)
		1048	{
		1049	int ret;
		1050
		1051	atempo->dst_buffer->sample_rate = outlink->sample_rate;
		1052	atempo->dst_buffer->nb_samples = n_out;
		1053
		1054	// adjust the PTS:
		1055	atempo->dst_buffer->pts =
		1056	av_rescale_q(atempo->nsamples_out,
		1057	(AVRational){ 1, outlink->sample_rate },
		1058	outlink->time_base);
		1059
		1060	ret = ff_filter_frame(outlink, atempo->dst_buffer);
		1061	if (ret < 0)
		1062	return ret;
		1063	atempo->dst_buffer = NULL;
		1064	atempo->dst = NULL;
		1065	atempo->dst_end = NULL;
		1066
		1067	atempo->nsamples_out += n_out;
		1068	return 0;
		1069	}
		1070
		1071	static int filter_frame(AVFilterLink inlink, AVFrame src_buffer)
		1072	{
		1073	AVFilterContext *ctx = inlink->dst;
		1074	ATempoContext *atempo = ctx->priv;
		1075	AVFilterLink *outlink = ctx->outputs[0];
		1076
		1077	int ret = 0;
		1078	int n_in = src_buffer->nb_samples;
		1079	int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
		1080
		1081	const uint8_t *src = src_buffer->data[0];
		1082	const uint8_t src_end = src + n_in atempo->stride;
		1083
		1084	while (src < src_end) {
		1085	if (!atempo->dst_buffer) {
		1086	atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out);
		1087	if (!atempo->dst_buffer)
		1088	return AVERROR(ENOMEM);
		1089	av_frame_copy_props(atempo->dst_buffer, src_buffer);
		1090
		1091	atempo->dst = atempo->dst_buffer->data[0];
		1092	atempo->dst_end = atempo->dst + n_out * atempo->stride;
		1093	}
		1094
		1095	yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
		1096
		1097	if (atempo->dst == atempo->dst_end) {
		1098	int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) /
		1099	atempo->stride);
		1100	ret = push_samples(atempo, outlink, n_samples);
		1101	if (ret < 0)
		1102	goto end;
		1103	}
		1104	}
		1105
		1106	atempo->nsamples_in += n_in;
		1107	end:
		1108	av_frame_free(&src_buffer);
		1109	return ret;
		1110	}
		1111
		1112	static int request_frame(AVFilterLink *outlink)
		1113	{
		1114	AVFilterContext *ctx = outlink->src;
		1115	ATempoContext *atempo = ctx->priv;
		1116	int ret;
		1117
		1118	ret = ff_request_frame(ctx->inputs[0]);
		1119
		1120	if (ret == AVERROR_EOF) {
		1121	// flush the filter:
		1122	int n_max = atempo->ring;
		1123	int n_out;
		1124	int err = AVERROR(EAGAIN);
		1125
		1126	while (err == AVERROR(EAGAIN)) {
		1127	if (!atempo->dst_buffer) {
		1128	atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max);
		1129	if (!atempo->dst_buffer)
		1130	return AVERROR(ENOMEM);
		1131
		1132	atempo->dst = atempo->dst_buffer->data[0];
		1133	atempo->dst_end = atempo->dst + n_max * atempo->stride;
		1134	}
		1135
		1136	err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
		1137
		1138	n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
		1139	atempo->stride);
		1140
		1141	if (n_out) {
		1142	ret = push_samples(atempo, outlink, n_out);
		1143	}
		1144	}
		1145
		1146	av_frame_free(&atempo->dst_buffer);
		1147	atempo->dst = NULL;
		1148	atempo->dst_end = NULL;
		1149
		1150	return AVERROR_EOF;
		1151	}
		1152
		1153	return ret;
		1154	}
		1155
		1156	static int process_command(AVFilterContext *ctx,
		1157	const char *cmd,
		1158	const char *arg,
		1159	char *res,
		1160	int res_len,
		1161	int flags)
		1162	{
		1163	return !strcmp(cmd, "tempo") ? yae_set_tempo(ctx, arg) : AVERROR(ENOSYS);
		1164	}
		1165
		1166	static const AVFilterPad atempo_inputs[] = {
		1167	{
		1168	.name = "default",
		1169	.type = AVMEDIA_TYPE_AUDIO,
		1170	.filter_frame = filter_frame,
		1171	.config_props = config_props,
		1172	},
		1173	{ NULL }
		1174	};
		1175
		1176	static const AVFilterPad atempo_outputs[] = {
		1177	{
		1178	.name = "default",
		1179	.request_frame = request_frame,
		1180	.type = AVMEDIA_TYPE_AUDIO,
		1181	},
		1182	{ NULL }
		1183	};
		1184
		1185	AVFilter avfilter_af_atempo = {
		1186	.name = "atempo",
		1187	.description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
		1188	.init = init,
		1189	.uninit = uninit,
		1190	.query_formats = query_formats,
		1191	.process_command = process_command,
		1192	.priv_size = sizeof(ATempoContext),
		1193	.priv_class = &atempo_class,
		1194	.inputs = atempo_inputs,
		1195	.outputs = atempo_outputs,
		1196	};

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavfilter/af_atempo.c – Rev 6148