WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavutil/arm/float_dsp_vfp.S

Rev	Author	Line No.	Line
6148	serge	1	/*
		2	* Copyright (c) 2008 Siarhei Siamashka
		3	*
		4	* This file is part of FFmpeg
		5	*
		6	* FFmpeg is free software; you can redistribute it and/or
		7	* modify it under the terms of the GNU Lesser General Public
		8	* License as published by the Free Software Foundation; either
		9	* version 2.1 of the License, or (at your option) any later version.
		10	*
		11	* FFmpeg is distributed in the hope that it will be useful,
		12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		14	* Lesser General Public License for more details.
		15	*
		16	* You should have received a copy of the GNU Lesser General Public
		17	* License along with FFmpeg; if not, write to the Free Software
		18	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		19	*/
		20
		21	#include "config.h"
		22	#include "asm.S"
		23
		24	/**
		25	* Assume that len is a positive number and is multiple of 8
		26	*/
		27	@ void ff_vector_fmul_vfp(float dst, const float src0, const float *src1, int len)
		28	function ff_vector_fmul_vfp, export=1
		29	vpush {d8-d15}
		30	fmrx r12, fpscr
		31	orr r12, r12, #(3 << 16) /* set vector size to 4 */
		32	fmxr fpscr, r12
		33
		34	vldmia r1!, {s0-s3}
		35	vldmia r2!, {s8-s11}
		36	vldmia r1!, {s4-s7}
		37	vldmia r2!, {s12-s15}
		38	vmul.f32 s8, s0, s8
		39	1:
		40	subs r3, r3, #16
		41	vmul.f32 s12, s4, s12
		42	itttt ge
		43	vldmiage r1!, {s16-s19}
		44	vldmiage r2!, {s24-s27}
		45	vldmiage r1!, {s20-s23}
		46	vldmiage r2!, {s28-s31}
		47	it ge
		48	vmulge.f32 s24, s16, s24
		49	vstmia r0!, {s8-s11}
		50	vstmia r0!, {s12-s15}
		51	it ge
		52	vmulge.f32 s28, s20, s28
		53	itttt gt
		54	vldmiagt r1!, {s0-s3}
		55	vldmiagt r2!, {s8-s11}
		56	vldmiagt r1!, {s4-s7}
		57	vldmiagt r2!, {s12-s15}
		58	ittt ge
		59	vmulge.f32 s8, s0, s8
		60	vstmiage r0!, {s24-s27}
		61	vstmiage r0!, {s28-s31}
		62	bgt 1b
		63
		64	bic r12, r12, #(7 << 16) /* set vector size back to 1 */
		65	fmxr fpscr, r12
		66	vpop {d8-d15}
		67	bx lr
		68	endfunc
		69
		70	/**
		71	* ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
		72	* Assume that len is a positive number and is multiple of 8
		73	*/
		74	@ void ff_vector_fmul_reverse_vfp(float dst, const float src0,
		75	@ const float *src1, int len)
		76	function ff_vector_fmul_reverse_vfp, export=1
		77	vpush {d8-d15}
		78	add r2, r2, r3, lsl #2
		79	vldmdb r2!, {s0-s3}
		80	vldmia r1!, {s8-s11}
		81	vldmdb r2!, {s4-s7}
		82	vldmia r1!, {s12-s15}
		83	vmul.f32 s8, s3, s8
		84	vmul.f32 s9, s2, s9
		85	vmul.f32 s10, s1, s10
		86	vmul.f32 s11, s0, s11
		87	1:
		88	subs r3, r3, #16
		89	it ge
		90	vldmdbge r2!, {s16-s19}
		91	vmul.f32 s12, s7, s12
		92	it ge
		93	vldmiage r1!, {s24-s27}
		94	vmul.f32 s13, s6, s13
		95	it ge
		96	vldmdbge r2!, {s20-s23}
		97	vmul.f32 s14, s5, s14
		98	it ge
		99	vldmiage r1!, {s28-s31}
		100	vmul.f32 s15, s4, s15
		101	it ge
		102	vmulge.f32 s24, s19, s24
		103	it gt
		104	vldmdbgt r2!, {s0-s3}
		105	it ge
		106	vmulge.f32 s25, s18, s25
		107	vstmia r0!, {s8-s13}
		108	it ge
		109	vmulge.f32 s26, s17, s26
		110	it gt
		111	vldmiagt r1!, {s8-s11}
		112	itt ge
		113	vmulge.f32 s27, s16, s27
		114	vmulge.f32 s28, s23, s28
		115	it gt
		116	vldmdbgt r2!, {s4-s7}
		117	it ge
		118	vmulge.f32 s29, s22, s29
		119	vstmia r0!, {s14-s15}
		120	ittt ge
		121	vmulge.f32 s30, s21, s30
		122	vmulge.f32 s31, s20, s31
		123	vmulge.f32 s8, s3, s8
		124	it gt
		125	vldmiagt r1!, {s12-s15}
		126	itttt ge
		127	vmulge.f32 s9, s2, s9
		128	vmulge.f32 s10, s1, s10
		129	vstmiage r0!, {s24-s27}
		130	vmulge.f32 s11, s0, s11
		131	it ge
		132	vstmiage r0!, {s28-s31}
		133	bgt 1b
		134
		135	vpop {d8-d15}
		136	bx lr
		137	endfunc

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavutil/arm/float_dsp_vfp.S – Rev 6148