WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/mips/aacdec_mips.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* Copyright (c) 2012
		3	* MIPS Technologies, Inc., California.
		4	*
		5	* Redistribution and use in source and binary forms, with or without
		6	* modification, are permitted provided that the following conditions
		7	* are met:
		8	* 1. Redistributions of source code must retain the above copyright
		9	* notice, this list of conditions and the following disclaimer.
		10	* 2. Redistributions in binary form must reproduce the above copyright
		11	* notice, this list of conditions and the following disclaimer in the
		12	* documentation and/or other materials provided with the distribution.
		13	* 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
		14	* contributors may be used to endorse or promote products derived from
		15	* this software without specific prior written permission.
		16	*
		17	* THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
		18	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
		19	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
		20	* ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
		21	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
		22	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
		23	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
		24	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
		25	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
		26	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
		27	* SUCH DAMAGE.
		28	*
		29	* Authors: Darko Laus (darko@mips.com)
		30	* Djordje Pesut (djordje@mips.com)
		31	* Mirjana Vulin (mvulin@mips.com)
		32	*
		33	* This file is part of FFmpeg.
		34	*
		35	* FFmpeg is free software; you can redistribute it and/or
		36	* modify it under the terms of the GNU Lesser General Public
		37	* License as published by the Free Software Foundation; either
		38	* version 2.1 of the License, or (at your option) any later version.
		39	*
		40	* FFmpeg is distributed in the hope that it will be useful,
		41	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		42	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		43	* Lesser General Public License for more details.
		44	*
		45	* You should have received a copy of the GNU Lesser General Public
		46	* License along with FFmpeg; if not, write to the Free Software
		47	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		48	*/
		49
		50	/**
		51	* @file
		52	* Reference: libavcodec/aacdec.c
		53	*/
		54
		55	#include "libavcodec/aac.h"
		56	#include "aacdec_mips.h"
		57	#include "libavcodec/aactab.h"
		58	#include "libavcodec/sinewin.h"
		59
		60	#if HAVE_INLINE_ASM
		61	static av_always_inline int lcg_random(unsigned previous_val)
		62	{
		63	union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
		64	return v.s;
		65	}
		66
		67	static void imdct_and_windowing_mips(AACContext ac, SingleChannelElement sce)
		68	{
		69	IndividualChannelStream *ics = &sce->ics;
		70	float *in = sce->coeffs;
		71	float *out = sce->ret;
		72	float *saved = sce->saved;
		73	const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
		74	const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
		75	const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
		76	float *buf = ac->buf_mdct;
		77	int i;
		78
		79	if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
		80	for (i = 0; i < 1024; i += 128)
		81	ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
		82	} else
		83	ac->mdct.imdct_half(&ac->mdct, buf, in);
		84
		85	/* window overlapping
		86	* NOTE: To simplify the overlapping code, all 'meaningless' short to long
		87	* and long to short transitions are considered to be short to short
		88	* transitions. This leaves just two cases (long to long and short to short)
		89	* with a little special sauce for EIGHT_SHORT_SEQUENCE.
		90	*/
		91	if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE \|\| ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
		92	(ics->window_sequence[0] == ONLY_LONG_SEQUENCE \|\| ics->window_sequence[0] == LONG_START_SEQUENCE)) {
		93	ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512);
		94	} else {
		95	{
		96	float *buf1 = saved;
		97	float *buf2 = out;
		98	int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
		99	int loop_end;
		100
		101	/* loop unrolled 8 times */
		102	__asm__ volatile (
		103	".set push \n\t"
		104	".set noreorder \n\t"
		105	"addiu %[loop_end], %[src], 1792 \n\t"
		106	"1: \n\t"
		107	"lw %[temp0], 0(%[src]) \n\t"
		108	"lw %[temp1], 4(%[src]) \n\t"
		109	"lw %[temp2], 8(%[src]) \n\t"
		110	"lw %[temp3], 12(%[src]) \n\t"
		111	"lw %[temp4], 16(%[src]) \n\t"
		112	"lw %[temp5], 20(%[src]) \n\t"
		113	"lw %[temp6], 24(%[src]) \n\t"
		114	"lw %[temp7], 28(%[src]) \n\t"
		115	"addiu %[src], %[src], 32 \n\t"
		116	"sw %[temp0], 0(%[dst]) \n\t"
		117	"sw %[temp1], 4(%[dst]) \n\t"
		118	"sw %[temp2], 8(%[dst]) \n\t"
		119	"sw %[temp3], 12(%[dst]) \n\t"
		120	"sw %[temp4], 16(%[dst]) \n\t"
		121	"sw %[temp5], 20(%[dst]) \n\t"
		122	"sw %[temp6], 24(%[dst]) \n\t"
		123	"sw %[temp7], 28(%[dst]) \n\t"
		124	"bne %[src], %[loop_end], 1b \n\t"
		125	" addiu %[dst], %[dst], 32 \n\t"
		126	".set pop \n\t"
		127
		128	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		129	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		130	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		131	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		132	[loop_end]"=&r"(loop_end), [src]"+r"(buf1),
		133	[dst]"+r"(buf2)
		134	:
		135	: "memory"
		136	);
		137	}
		138
		139	if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
		140	{
		141	float wi;
		142	float wj;
		143	int i;
		144	float temp0, temp1, temp2, temp3;
		145	float dst0 = out + 448 + 0128;
		146	float *dst1 = dst0 + 64 + 63;
		147	float *dst2 = saved + 63;
		148	float win0 = (float)swindow;
		149	float *win1 = win0 + 64 + 63;
		150	float win0_prev = (float)swindow_prev;
		151	float *win1_prev = win0_prev + 64 + 63;
		152	float *src0_prev = saved + 448;
		153	float src1_prev = buf + 0128 + 63;
		154	float src0 = buf + 0128 + 64;
		155	float src1 = buf + 1128 + 63;
		156
		157	for(i = 0; i < 64; i++)
		158	{
		159	temp0 = src0_prev[0];
		160	temp1 = src1_prev[0];
		161	wi = *win0_prev;
		162	wj = *win1_prev;
		163	temp2 = src0[0];
		164	temp3 = src1[0];
		165	dst0[0] = temp0 * wj - temp1 * wi;
		166	dst1[0] = temp0 * wi + temp1 * wj;
		167
		168	wi = *win0;
		169	wj = *win1;
		170
		171	temp0 = src0[128];
		172	temp1 = src1[128];
		173	dst0[128] = temp2 * wj - temp3 * wi;
		174	dst1[128] = temp2 * wi + temp3 * wj;
		175
		176	temp2 = src0[256];
		177	temp3 = src1[256];
		178	dst0[256] = temp0 * wj - temp1 * wi;
		179	dst1[256] = temp0 * wi + temp1 * wj;
		180	dst0[384] = temp2 * wj - temp3 * wi;
		181	dst1[384] = temp2 * wi + temp3 * wj;
		182
		183	temp0 = src0[384];
		184	temp1 = src1[384];
		185	dst0[512] = temp0 * wj - temp1 * wi;
		186	dst2[0] = temp0 * wi + temp1 * wj;
		187
		188	src0++;
		189	src1--;
		190	src0_prev++;
		191	src1_prev--;
		192	win0++;
		193	win1--;
		194	win0_prev++;
		195	win1_prev--;
		196	dst0++;
		197	dst1--;
		198	dst2--;
		199	}
		200	}
		201	} else {
		202	ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
		203	{
		204	float *buf1 = buf + 64;
		205	float *buf2 = out + 576;
		206	int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
		207	int loop_end;
		208
		209	/* loop unrolled 8 times */
		210	__asm__ volatile (
		211	".set push \n\t"
		212	".set noreorder \n\t"
		213	"addiu %[loop_end], %[src], 1792 \n\t"
		214	"1: \n\t"
		215	"lw %[temp0], 0(%[src]) \n\t"
		216	"lw %[temp1], 4(%[src]) \n\t"
		217	"lw %[temp2], 8(%[src]) \n\t"
		218	"lw %[temp3], 12(%[src]) \n\t"
		219	"lw %[temp4], 16(%[src]) \n\t"
		220	"lw %[temp5], 20(%[src]) \n\t"
		221	"lw %[temp6], 24(%[src]) \n\t"
		222	"lw %[temp7], 28(%[src]) \n\t"
		223	"addiu %[src], %[src], 32 \n\t"
		224	"sw %[temp0], 0(%[dst]) \n\t"
		225	"sw %[temp1], 4(%[dst]) \n\t"
		226	"sw %[temp2], 8(%[dst]) \n\t"
		227	"sw %[temp3], 12(%[dst]) \n\t"
		228	"sw %[temp4], 16(%[dst]) \n\t"
		229	"sw %[temp5], 20(%[dst]) \n\t"
		230	"sw %[temp6], 24(%[dst]) \n\t"
		231	"sw %[temp7], 28(%[dst]) \n\t"
		232	"bne %[src], %[loop_end], 1b \n\t"
		233	" addiu %[dst], %[dst], 32 \n\t"
		234	".set pop \n\t"
		235
		236	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		237	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		238	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		239	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		240	[loop_end]"=&r"(loop_end), [src]"+r"(buf1),
		241	[dst]"+r"(buf2)
		242	:
		243	: "memory"
		244	);
		245	}
		246	}
		247	}
		248
		249	// buffer update
		250	if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
		251	ac->fdsp.vector_fmul_window(saved + 64, buf + 4128 + 64, buf + 5128, swindow, 64);
		252	ac->fdsp.vector_fmul_window(saved + 192, buf + 5128 + 64, buf + 6128, swindow, 64);
		253	ac->fdsp.vector_fmul_window(saved + 320, buf + 6128 + 64, buf + 7128, swindow, 64);
		254	{
		255	float buf1 = buf + 7128 + 64;
		256	float *buf2 = saved + 448;
		257	int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
		258	int loop_end;
		259
		260	/* loop unrolled 8 times */
		261	__asm__ volatile (
		262	".set push \n\t"
		263	".set noreorder \n\t"
		264	"addiu %[loop_end], %[src], 256 \n\t"
		265	"1: \n\t"
		266	"lw %[temp0], 0(%[src]) \n\t"
		267	"lw %[temp1], 4(%[src]) \n\t"
		268	"lw %[temp2], 8(%[src]) \n\t"
		269	"lw %[temp3], 12(%[src]) \n\t"
		270	"lw %[temp4], 16(%[src]) \n\t"
		271	"lw %[temp5], 20(%[src]) \n\t"
		272	"lw %[temp6], 24(%[src]) \n\t"
		273	"lw %[temp7], 28(%[src]) \n\t"
		274	"addiu %[src], %[src], 32 \n\t"
		275	"sw %[temp0], 0(%[dst]) \n\t"
		276	"sw %[temp1], 4(%[dst]) \n\t"
		277	"sw %[temp2], 8(%[dst]) \n\t"
		278	"sw %[temp3], 12(%[dst]) \n\t"
		279	"sw %[temp4], 16(%[dst]) \n\t"
		280	"sw %[temp5], 20(%[dst]) \n\t"
		281	"sw %[temp6], 24(%[dst]) \n\t"
		282	"sw %[temp7], 28(%[dst]) \n\t"
		283	"bne %[src], %[loop_end], 1b \n\t"
		284	" addiu %[dst], %[dst], 32 \n\t"
		285	".set pop \n\t"
		286
		287	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		288	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		289	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		290	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		291	[loop_end]"=&r"(loop_end), [src]"+r"(buf1),
		292	[dst]"+r"(buf2)
		293	:
		294	: "memory"
		295	);
		296	}
		297	} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
		298	float *buf1 = buf + 512;
		299	float *buf2 = saved;
		300	int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
		301	int loop_end;
		302
		303	/* loop unrolled 8 times */
		304	__asm__ volatile (
		305	".set push \n\t"
		306	".set noreorder \n\t"
		307	"addiu %[loop_end], %[src], 1792 \n\t"
		308	"1: \n\t"
		309	"lw %[temp0], 0(%[src]) \n\t"
		310	"lw %[temp1], 4(%[src]) \n\t"
		311	"lw %[temp2], 8(%[src]) \n\t"
		312	"lw %[temp3], 12(%[src]) \n\t"
		313	"lw %[temp4], 16(%[src]) \n\t"
		314	"lw %[temp5], 20(%[src]) \n\t"
		315	"lw %[temp6], 24(%[src]) \n\t"
		316	"lw %[temp7], 28(%[src]) \n\t"
		317	"addiu %[src], %[src], 32 \n\t"
		318	"sw %[temp0], 0(%[dst]) \n\t"
		319	"sw %[temp1], 4(%[dst]) \n\t"
		320	"sw %[temp2], 8(%[dst]) \n\t"
		321	"sw %[temp3], 12(%[dst]) \n\t"
		322	"sw %[temp4], 16(%[dst]) \n\t"
		323	"sw %[temp5], 20(%[dst]) \n\t"
		324	"sw %[temp6], 24(%[dst]) \n\t"
		325	"sw %[temp7], 28(%[dst]) \n\t"
		326	"bne %[src], %[loop_end], 1b \n\t"
		327	" addiu %[dst], %[dst], 32 \n\t"
		328	".set pop \n\t"
		329
		330	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		331	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		332	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		333	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		334	[loop_end]"=&r"(loop_end), [src]"+r"(buf1),
		335	[dst]"+r"(buf2)
		336	:
		337	: "memory"
		338	);
		339	{
		340	float buf1 = buf + 7128 + 64;
		341	float *buf2 = saved + 448;
		342	int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
		343	int loop_end;
		344
		345	/* loop unrolled 8 times */
		346	__asm__ volatile (
		347	".set push \n\t"
		348	".set noreorder \n\t"
		349	"addiu %[loop_end], %[src], 256 \n\t"
		350	"1: \n\t"
		351	"lw %[temp0], 0(%[src]) \n\t"
		352	"lw %[temp1], 4(%[src]) \n\t"
		353	"lw %[temp2], 8(%[src]) \n\t"
		354	"lw %[temp3], 12(%[src]) \n\t"
		355	"lw %[temp4], 16(%[src]) \n\t"
		356	"lw %[temp5], 20(%[src]) \n\t"
		357	"lw %[temp6], 24(%[src]) \n\t"
		358	"lw %[temp7], 28(%[src]) \n\t"
		359	"addiu %[src], %[src], 32 \n\t"
		360	"sw %[temp0], 0(%[dst]) \n\t"
		361	"sw %[temp1], 4(%[dst]) \n\t"
		362	"sw %[temp2], 8(%[dst]) \n\t"
		363	"sw %[temp3], 12(%[dst]) \n\t"
		364	"sw %[temp4], 16(%[dst]) \n\t"
		365	"sw %[temp5], 20(%[dst]) \n\t"
		366	"sw %[temp6], 24(%[dst]) \n\t"
		367	"sw %[temp7], 28(%[dst]) \n\t"
		368	"bne %[src], %[loop_end], 1b \n\t"
		369	" addiu %[dst], %[dst], 32 \n\t"
		370	".set pop \n\t"
		371
		372	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		373	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		374	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		375	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		376	[loop_end]"=&r"(loop_end), [src]"+r"(buf1),
		377	[dst]"+r"(buf2)
		378	:
		379	: "memory"
		380	);
		381	}
		382	} else { // LONG_STOP or ONLY_LONG
		383	float *buf1 = buf + 512;
		384	float *buf2 = saved;
		385	int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
		386	int loop_end;
		387
		388	/* loop unrolled 8 times */
		389	__asm__ volatile (
		390	".set push \n\t"
		391	".set noreorder \n\t"
		392	"addiu %[loop_end], %[src], 2048 \n\t"
		393	"1: \n\t"
		394	"lw %[temp0], 0(%[src]) \n\t"
		395	"lw %[temp1], 4(%[src]) \n\t"
		396	"lw %[temp2], 8(%[src]) \n\t"
		397	"lw %[temp3], 12(%[src]) \n\t"
		398	"lw %[temp4], 16(%[src]) \n\t"
		399	"lw %[temp5], 20(%[src]) \n\t"
		400	"lw %[temp6], 24(%[src]) \n\t"
		401	"lw %[temp7], 28(%[src]) \n\t"
		402	"addiu %[src], %[src], 32 \n\t"
		403	"sw %[temp0], 0(%[dst]) \n\t"
		404	"sw %[temp1], 4(%[dst]) \n\t"
		405	"sw %[temp2], 8(%[dst]) \n\t"
		406	"sw %[temp3], 12(%[dst]) \n\t"
		407	"sw %[temp4], 16(%[dst]) \n\t"
		408	"sw %[temp5], 20(%[dst]) \n\t"
		409	"sw %[temp6], 24(%[dst]) \n\t"
		410	"sw %[temp7], 28(%[dst]) \n\t"
		411	"bne %[src], %[loop_end], 1b \n\t"
		412	" addiu %[dst], %[dst], 32 \n\t"
		413	".set pop \n\t"
		414
		415	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		416	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		417	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		418	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		419	[loop_end]"=&r"(loop_end), [src]"+r"(buf1),
		420	[dst]"+r"(buf2)
		421	:
		422	: "memory"
		423	);
		424	}
		425	}
		426
		427	static void apply_ltp_mips(AACContext ac, SingleChannelElement sce)
		428	{
		429	const LongTermPrediction *ltp = &sce->ics.ltp;
		430	const uint16_t *offsets = sce->ics.swb_offset;
		431	int i, sfb;
		432	int j, k;
		433
		434	if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
		435	float *predTime = sce->ret;
		436	float *predFreq = ac->buf_mdct;
		437	float *p_predTime;
		438	int16_t num_samples = 2048;
		439
		440	if (ltp->lag < 1024)
		441	num_samples = ltp->lag + 1024;
		442	j = (2048 - num_samples) >> 2;
		443	k = (2048 - num_samples) & 3;
		444	p_predTime = &predTime[num_samples];
		445
		446	for (i = 0; i < num_samples; i++)
		447	predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
		448	for (i = 0; i < j; i++) {
		449
		450	/* loop unrolled 4 times */
		451	__asm__ volatile (
		452	"sw $0, 0(%[p_predTime]) \n\t"
		453	"sw $0, 4(%[p_predTime]) \n\t"
		454	"sw $0, 8(%[p_predTime]) \n\t"
		455	"sw $0, 12(%[p_predTime]) \n\t"
		456	"addiu %[p_predTime], %[p_predTime], 16 \n\t"
		457
		458	: [p_predTime]"+r"(p_predTime)
		459	:
		460	: "memory"
		461	);
		462	}
		463	for (i = 0; i < k; i++) {
		464
		465	__asm__ volatile (
		466	"sw $0, 0(%[p_predTime]) \n\t"
		467	"addiu %[p_predTime], %[p_predTime], 4 \n\t"
		468
		469	: [p_predTime]"+r"(p_predTime)
		470	:
		471	: "memory"
		472	);
		473	}
		474
		475	ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
		476
		477	if (sce->tns.present)
		478	ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
		479
		480	for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
		481	if (ltp->used[sfb])
		482	for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
		483	sce->coeffs[i] += predFreq[i];
		484	}
		485	}
		486
		487	#if HAVE_MIPSFPU
		488	static void update_ltp_mips(AACContext ac, SingleChannelElement sce)
		489	{
		490	IndividualChannelStream *ics = &sce->ics;
		491	float *saved = sce->saved;
		492	float *saved_ltp = sce->coeffs;
		493	const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
		494	const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
		495	int i;
		496	int loop_end, loop_end1, loop_end2;
		497	float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11;
		498
		499	if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
		500	float *buf = saved;
		501	float *buf0 = saved_ltp;
		502	float *p_saved_ltp = saved_ltp + 576;
		503	float *ptr1 = &saved_ltp[512];
		504	float *ptr2 = &ac->buf_mdct[1023];
		505	float ptr3 = (float)&swindow[63];
		506	loop_end1 = (int)(p_saved_ltp + 448);
		507
		508	/* loop unrolled 8 times */
		509	__asm__ volatile (
		510	".set push \n\t"
		511	".set noreorder \n\t"
		512	"addiu %[loop_end], %[src], 2048 \n\t"
		513	"1: \n\t"
		514	"lw %[temp0], 0(%[src]) \n\t"
		515	"lw %[temp1], 4(%[src]) \n\t"
		516	"lw %[temp2], 8(%[src]) \n\t"
		517	"lw %[temp3], 12(%[src]) \n\t"
		518	"lw %[temp4], 16(%[src]) \n\t"
		519	"lw %[temp5], 20(%[src]) \n\t"
		520	"lw %[temp6], 24(%[src]) \n\t"
		521	"lw %[temp7], 28(%[src]) \n\t"
		522	"addiu %[src], %[src], 32 \n\t"
		523	"sw %[temp0], 0(%[dst]) \n\t"
		524	"sw %[temp1], 4(%[dst]) \n\t"
		525	"sw %[temp2], 8(%[dst]) \n\t"
		526	"sw %[temp3], 12(%[dst]) \n\t"
		527	"sw %[temp4], 16(%[dst]) \n\t"
		528	"sw %[temp5], 20(%[dst]) \n\t"
		529	"sw %[temp6], 24(%[dst]) \n\t"
		530	"sw %[temp7], 28(%[dst]) \n\t"
		531	"bne %[src], %[loop_end], 1b \n\t"
		532	" addiu %[dst], %[dst], 32 \n\t"
		533	".set pop \n\t"
		534
		535	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		536	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		537	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		538	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		539	[loop_end]"=&r"(loop_end), [src]"+r"(buf),
		540	[dst]"+r"(buf0)
		541	:
		542	: "memory"
		543	);
		544
		545	/* loop unrolled 8 times */
		546	__asm__ volatile (
		547	"1: \n\t"
		548	"sw $0, 0(%[p_saved_ltp]) \n\t"
		549	"sw $0, 4(%[p_saved_ltp]) \n\t"
		550	"sw $0, 8(%[p_saved_ltp]) \n\t"
		551	"sw $0, 12(%[p_saved_ltp]) \n\t"
		552	"sw $0, 16(%[p_saved_ltp]) \n\t"
		553	"sw $0, 20(%[p_saved_ltp]) \n\t"
		554	"sw $0, 24(%[p_saved_ltp]) \n\t"
		555	"sw $0, 28(%[p_saved_ltp]) \n\t"
		556	"addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t"
		557	"bne %[p_saved_ltp], %[loop_end1], 1b \n\t"
		558
		559	: [p_saved_ltp]"+r"(p_saved_ltp)
		560	: [loop_end1]"r"(loop_end1)
		561	: "memory"
		562	);
		563
		564	ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
		565	for (i = 0; i < 16; i++){
		566	/* loop unrolled 4 times */
		567	__asm__ volatile (
		568	"lwc1 %[temp0], 0(%[ptr2]) \n\t"
		569	"lwc1 %[temp1], -4(%[ptr2]) \n\t"
		570	"lwc1 %[temp2], -8(%[ptr2]) \n\t"
		571	"lwc1 %[temp3], -12(%[ptr2]) \n\t"
		572	"lwc1 %[temp4], 0(%[ptr3]) \n\t"
		573	"lwc1 %[temp5], -4(%[ptr3]) \n\t"
		574	"lwc1 %[temp6], -8(%[ptr3]) \n\t"
		575	"lwc1 %[temp7], -12(%[ptr3]) \n\t"
		576	"mul.s %[temp8], %[temp0], %[temp4] \n\t"
		577	"mul.s %[temp9], %[temp1], %[temp5] \n\t"
		578	"mul.s %[temp10], %[temp2], %[temp6] \n\t"
		579	"mul.s %[temp11], %[temp3], %[temp7] \n\t"
		580	"swc1 %[temp8], 0(%[ptr1]) \n\t"
		581	"swc1 %[temp9], 4(%[ptr1]) \n\t"
		582	"swc1 %[temp10], 8(%[ptr1]) \n\t"
		583	"swc1 %[temp11], 12(%[ptr1]) \n\t"
		584	"addiu %[ptr1], %[ptr1], 16 \n\t"
		585	"addiu %[ptr2], %[ptr2], -16 \n\t"
		586	"addiu %[ptr3], %[ptr3], -16 \n\t"
		587
		588	: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
		589	[temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
		590	[temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
		591	[temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
		592	[temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
		593	[temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
		594	[ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
		595	:
		596	: "memory"
		597	);
		598	}
		599	} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
		600	float *buff0 = saved;
		601	float *buff1 = saved_ltp;
		602	float *ptr1 = &saved_ltp[512];
		603	float *ptr2 = &ac->buf_mdct[1023];
		604	float ptr3 = (float)&swindow[63];
		605	loop_end = (int)(saved + 448);
		606
		607	/* loop unrolled 8 times */
		608	__asm__ volatile (
		609	".set push \n\t"
		610	".set noreorder \n\t"
		611	"1: \n\t"
		612	"lw %[temp0], 0(%[src]) \n\t"
		613	"lw %[temp1], 4(%[src]) \n\t"
		614	"lw %[temp2], 8(%[src]) \n\t"
		615	"lw %[temp3], 12(%[src]) \n\t"
		616	"lw %[temp4], 16(%[src]) \n\t"
		617	"lw %[temp5], 20(%[src]) \n\t"
		618	"lw %[temp6], 24(%[src]) \n\t"
		619	"lw %[temp7], 28(%[src]) \n\t"
		620	"addiu %[src], %[src], 32 \n\t"
		621	"sw %[temp0], 0(%[dst]) \n\t"
		622	"sw %[temp1], 4(%[dst]) \n\t"
		623	"sw %[temp2], 8(%[dst]) \n\t"
		624	"sw %[temp3], 12(%[dst]) \n\t"
		625	"sw %[temp4], 16(%[dst]) \n\t"
		626	"sw %[temp5], 20(%[dst]) \n\t"
		627	"sw %[temp6], 24(%[dst]) \n\t"
		628	"sw %[temp7], 28(%[dst]) \n\t"
		629	"sw $0, 2304(%[dst]) \n\t"
		630	"sw $0, 2308(%[dst]) \n\t"
		631	"sw $0, 2312(%[dst]) \n\t"
		632	"sw $0, 2316(%[dst]) \n\t"
		633	"sw $0, 2320(%[dst]) \n\t"
		634	"sw $0, 2324(%[dst]) \n\t"
		635	"sw $0, 2328(%[dst]) \n\t"
		636	"sw $0, 2332(%[dst]) \n\t"
		637	"bne %[src], %[loop_end], 1b \n\t"
		638	" addiu %[dst], %[dst], 32 \n\t"
		639	".set pop \n\t"
		640
		641	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		642	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		643	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		644	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		645	[src]"+r"(buff0), [dst]"+r"(buff1)
		646	: [loop_end]"r"(loop_end)
		647	: "memory"
		648	);
		649	ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64);
		650	for (i = 0; i < 16; i++){
		651	/* loop unrolled 8 times */
		652	__asm__ volatile (
		653	"lwc1 %[temp0], 0(%[ptr2]) \n\t"
		654	"lwc1 %[temp1], -4(%[ptr2]) \n\t"
		655	"lwc1 %[temp2], -8(%[ptr2]) \n\t"
		656	"lwc1 %[temp3], -12(%[ptr2]) \n\t"
		657	"lwc1 %[temp4], 0(%[ptr3]) \n\t"
		658	"lwc1 %[temp5], -4(%[ptr3]) \n\t"
		659	"lwc1 %[temp6], -8(%[ptr3]) \n\t"
		660	"lwc1 %[temp7], -12(%[ptr3]) \n\t"
		661	"mul.s %[temp8], %[temp0], %[temp4] \n\t"
		662	"mul.s %[temp9], %[temp1], %[temp5] \n\t"
		663	"mul.s %[temp10], %[temp2], %[temp6] \n\t"
		664	"mul.s %[temp11], %[temp3], %[temp7] \n\t"
		665	"swc1 %[temp8], 0(%[ptr1]) \n\t"
		666	"swc1 %[temp9], 4(%[ptr1]) \n\t"
		667	"swc1 %[temp10], 8(%[ptr1]) \n\t"
		668	"swc1 %[temp11], 12(%[ptr1]) \n\t"
		669	"addiu %[ptr1], %[ptr1], 16 \n\t"
		670	"addiu %[ptr2], %[ptr2], -16 \n\t"
		671	"addiu %[ptr3], %[ptr3], -16 \n\t"
		672
		673	: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
		674	[temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
		675	[temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
		676	[temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
		677	[temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
		678	[temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
		679	[ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
		680	:
		681	: "memory"
		682	);
		683	}
		684	} else { // LONG_STOP or ONLY_LONG
		685	float ptr1, ptr2, *ptr3;
		686	ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512);
		687
		688	ptr1 = &saved_ltp[512];
		689	ptr2 = &ac->buf_mdct[1023];
		690	ptr3 = (float*)&lwindow[511];
		691
		692	for (i = 0; i < 512; i+=4){
		693	/* loop unrolled 4 times */
		694	__asm__ volatile (
		695	"lwc1 %[temp0], 0(%[ptr2]) \n\t"
		696	"lwc1 %[temp1], -4(%[ptr2]) \n\t"
		697	"lwc1 %[temp2], -8(%[ptr2]) \n\t"
		698	"lwc1 %[temp3], -12(%[ptr2]) \n\t"
		699	"lwc1 %[temp4], 0(%[ptr3]) \n\t"
		700	"lwc1 %[temp5], -4(%[ptr3]) \n\t"
		701	"lwc1 %[temp6], -8(%[ptr3]) \n\t"
		702	"lwc1 %[temp7], -12(%[ptr3]) \n\t"
		703	"mul.s %[temp8], %[temp0], %[temp4] \n\t"
		704	"mul.s %[temp9], %[temp1], %[temp5] \n\t"
		705	"mul.s %[temp10], %[temp2], %[temp6] \n\t"
		706	"mul.s %[temp11], %[temp3], %[temp7] \n\t"
		707	"swc1 %[temp8], 0(%[ptr1]) \n\t"
		708	"swc1 %[temp9], 4(%[ptr1]) \n\t"
		709	"swc1 %[temp10], 8(%[ptr1]) \n\t"
		710	"swc1 %[temp11], 12(%[ptr1]) \n\t"
		711	"addiu %[ptr1], %[ptr1], 16 \n\t"
		712	"addiu %[ptr2], %[ptr2], -16 \n\t"
		713	"addiu %[ptr3], %[ptr3], -16 \n\t"
		714
		715	: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
		716	[temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
		717	[temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
		718	[temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
		719	[temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
		720	[temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
		721	[ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2),
		722	[ptr3]"+r"(ptr3)
		723	:
		724	: "memory"
		725	);
		726	}
		727	}
		728
		729	{
		730	float *buf1 = sce->ltp_state+1024;
		731	float *buf2 = sce->ltp_state;
		732	float *buf3 = sce->ret;
		733	float *buf4 = sce->ltp_state+1024;
		734	float *buf5 = saved_ltp;
		735	float *buf6 = sce->ltp_state+2048;
		736
		737	/* loops unrolled 8 times */
		738	__asm__ volatile (
		739	".set push \n\t"
		740	".set noreorder \n\t"
		741	"addiu %[loop_end], %[src], 4096 \n\t"
		742	"addiu %[loop_end1], %[src1], 4096 \n\t"
		743	"addiu %[loop_end2], %[src2], 4096 \n\t"
		744	"1: \n\t"
		745	"lw %[temp0], 0(%[src]) \n\t"
		746	"lw %[temp1], 4(%[src]) \n\t"
		747	"lw %[temp2], 8(%[src]) \n\t"
		748	"lw %[temp3], 12(%[src]) \n\t"
		749	"lw %[temp4], 16(%[src]) \n\t"
		750	"lw %[temp5], 20(%[src]) \n\t"
		751	"lw %[temp6], 24(%[src]) \n\t"
		752	"lw %[temp7], 28(%[src]) \n\t"
		753	"addiu %[src], %[src], 32 \n\t"
		754	"sw %[temp0], 0(%[dst]) \n\t"
		755	"sw %[temp1], 4(%[dst]) \n\t"
		756	"sw %[temp2], 8(%[dst]) \n\t"
		757	"sw %[temp3], 12(%[dst]) \n\t"
		758	"sw %[temp4], 16(%[dst]) \n\t"
		759	"sw %[temp5], 20(%[dst]) \n\t"
		760	"sw %[temp6], 24(%[dst]) \n\t"
		761	"sw %[temp7], 28(%[dst]) \n\t"
		762	"bne %[src], %[loop_end], 1b \n\t"
		763	" addiu %[dst], %[dst], 32 \n\t"
		764	"2: \n\t"
		765	"lw %[temp0], 0(%[src1]) \n\t"
		766	"lw %[temp1], 4(%[src1]) \n\t"
		767	"lw %[temp2], 8(%[src1]) \n\t"
		768	"lw %[temp3], 12(%[src1]) \n\t"
		769	"lw %[temp4], 16(%[src1]) \n\t"
		770	"lw %[temp5], 20(%[src1]) \n\t"
		771	"lw %[temp6], 24(%[src1]) \n\t"
		772	"lw %[temp7], 28(%[src1]) \n\t"
		773	"addiu %[src1], %[src1], 32 \n\t"
		774	"sw %[temp0], 0(%[dst1]) \n\t"
		775	"sw %[temp1], 4(%[dst1]) \n\t"
		776	"sw %[temp2], 8(%[dst1]) \n\t"
		777	"sw %[temp3], 12(%[dst1]) \n\t"
		778	"sw %[temp4], 16(%[dst1]) \n\t"
		779	"sw %[temp5], 20(%[dst1]) \n\t"
		780	"sw %[temp6], 24(%[dst1]) \n\t"
		781	"sw %[temp7], 28(%[dst1]) \n\t"
		782	"bne %[src1], %[loop_end1], 2b \n\t"
		783	" addiu %[dst1], %[dst1], 32 \n\t"
		784	"3: \n\t"
		785	"lw %[temp0], 0(%[src2]) \n\t"
		786	"lw %[temp1], 4(%[src2]) \n\t"
		787	"lw %[temp2], 8(%[src2]) \n\t"
		788	"lw %[temp3], 12(%[src2]) \n\t"
		789	"lw %[temp4], 16(%[src2]) \n\t"
		790	"lw %[temp5], 20(%[src2]) \n\t"
		791	"lw %[temp6], 24(%[src2]) \n\t"
		792	"lw %[temp7], 28(%[src2]) \n\t"
		793	"addiu %[src2], %[src2], 32 \n\t"
		794	"sw %[temp0], 0(%[dst2]) \n\t"
		795	"sw %[temp1], 4(%[dst2]) \n\t"
		796	"sw %[temp2], 8(%[dst2]) \n\t"
		797	"sw %[temp3], 12(%[dst2]) \n\t"
		798	"sw %[temp4], 16(%[dst2]) \n\t"
		799	"sw %[temp5], 20(%[dst2]) \n\t"
		800	"sw %[temp6], 24(%[dst2]) \n\t"
		801	"sw %[temp7], 28(%[dst2]) \n\t"
		802	"bne %[src2], %[loop_end2], 3b \n\t"
		803	" addiu %[dst2], %[dst2], 32 \n\t"
		804	".set pop \n\t"
		805
		806	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
		807	[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
		808	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
		809	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
		810	[loop_end]"=&r"(loop_end), [loop_end1]"=&r"(loop_end1),
		811	[loop_end2]"=&r"(loop_end2), [src]"+r"(buf1),
		812	[dst]"+r"(buf2), [src1]"+r"(buf3), [dst1]"+r"(buf4),
		813	[src2]"+r"(buf5), [dst2]"+r"(buf6)
		814	:
		815	: "memory"
		816	);
		817	}
		818	}
		819	#endif /* HAVE_MIPSFPU */
		820	#endif /* HAVE_INLINE_ASM */
		821
		822	void ff_aacdec_init_mips(AACContext *c)
		823	{
		824	#if HAVE_INLINE_ASM
		825	c->imdct_and_windowing = imdct_and_windowing_mips;
		826	c->apply_ltp = apply_ltp_mips;
		827	#if HAVE_MIPSFPU
		828	c->update_ltp = update_ltp_mips;
		829	#endif /* HAVE_MIPSFPU */
		830	#endif /* HAVE_INLINE_ASM */
		831	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/mips/aacdec_mips.c – Rev 4826