WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/ppc/int_altivec.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* Copyright (c) 2007 Luca Barbato
		3	*
		4	* This file is part of FFmpeg.
		5	*
		6	* FFmpeg is free software; you can redistribute it and/or
		7	* modify it under the terms of the GNU Lesser General Public
		8	* License as published by the Free Software Foundation; either
		9	* version 2.1 of the License, or (at your option) any later version.
		10	*
		11	* FFmpeg is distributed in the hope that it will be useful,
		12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		14	* Lesser General Public License for more details.
		15	*
		16	* You should have received a copy of the GNU Lesser General Public
		17	* License along with FFmpeg; if not, write to the Free Software
		18	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		19	*/
		20
		21	/**
		22	** @file
		23	** integer misc ops.
		24	**/
		25
		26	#include "config.h"
		27	#if HAVE_ALTIVEC_H
		28	#include
		29	#endif
		30
		31	#include "libavutil/attributes.h"
		32	#include "libavutil/ppc/types_altivec.h"
		33	#include "libavcodec/dsputil.h"
		34
		35	#include "dsputil_altivec.h"
		36
		37	static int ssd_int8_vs_int16_altivec(const int8_t pix1, const int16_t pix2,
		38	int size) {
		39	int i, size16;
		40	vector signed char vpix1;
		41	vector signed short vpix2, vdiff, vpix1l,vpix1h;
		42	union { vector signed int vscore;
		43	int32_t score[4];
		44	} u;
		45	u.vscore = vec_splat_s32(0);
		46	//
		47	//XXX lazy way, fix it later
		48
		49	#define vec_unaligned_load(b) \
		50	vec_perm(vec_ld(0,b),vec_ld(15,b),vec_lvsl(0, b));
		51
		52	size16 = size >> 4;
		53	while(size16) {
		54	// score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
		55	//load pix1 and the first batch of pix2
		56
		57	vpix1 = vec_unaligned_load(pix1);
		58	vpix2 = vec_unaligned_load(pix2);
		59	pix2 += 8;
		60	//unpack
		61	vpix1h = vec_unpackh(vpix1);
		62	vdiff = vec_sub(vpix1h, vpix2);
		63	vpix1l = vec_unpackl(vpix1);
		64	// load another batch from pix2
		65	vpix2 = vec_unaligned_load(pix2);
		66	u.vscore = vec_msum(vdiff, vdiff, u.vscore);
		67	vdiff = vec_sub(vpix1l, vpix2);
		68	u.vscore = vec_msum(vdiff, vdiff, u.vscore);
		69	pix1 += 16;
		70	pix2 += 8;
		71	size16--;
		72	}
		73	u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
		74
		75	size %= 16;
		76	for (i = 0; i < size; i++) {
		77	u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
		78	}
		79	return u.score[3];
		80	}
		81
		82	static int32_t scalarproduct_int16_altivec(const int16_t v1, const int16_t v2,
		83	int order)
		84	{
		85	int i;
		86	LOAD_ZERO;
		87	register vec_s16 vec1;
		88	register vec_s32 res = vec_splat_s32(0), t;
		89	int32_t ires;
		90
		91	for(i = 0; i < order; i += 8){
		92	vec1 = vec_unaligned_load(v1);
		93	t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
		94	res = vec_sums(t, res);
		95	v1 += 8;
		96	v2 += 8;
		97	}
		98	res = vec_splat(res, 3);
		99	vec_ste(res, 0, &ires);
		100	return ires;
		101	}
		102
		103	static int32_t scalarproduct_and_madd_int16_altivec(int16_t v1, const int16_t v2, const int16_t *v3, int order, int mul)
		104	{
		105	LOAD_ZERO;
		106	vec_s16 pv1 = (vec_s16)v1;
		107	register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};
		108	register vec_s16 t0, t1, i0, i1, i4;
		109	register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3);
		110	register vec_s32 res = zero_s32v;
		111	register vec_u8 align = vec_lvsl(0, v2);
		112	int32_t ires;
		113	order >>= 4;
		114	do {
		115	i1 = vec_ld(16, v2);
		116	t0 = vec_perm(i2, i1, align);
		117	i2 = vec_ld(32, v2);
		118	t1 = vec_perm(i1, i2, align);
		119	i0 = pv1[0];
		120	i1 = pv1[1];
		121	res = vec_msum(t0, i0, res);
		122	res = vec_msum(t1, i1, res);
		123	i4 = vec_ld(16, v3);
		124	t0 = vec_perm(i3, i4, align);
		125	i3 = vec_ld(32, v3);
		126	t1 = vec_perm(i4, i3, align);
		127	pv1[0] = vec_mladd(t0, muls, i0);
		128	pv1[1] = vec_mladd(t1, muls, i1);
		129	pv1 += 2;
		130	v2 += 16;
		131	v3 += 16;
		132	} while(--order);
		133	res = vec_splat(vec_sums(res, zero_s32v), 3);
		134	vec_ste(res, 0, &ires);
		135	return ires;
		136	}
		137
		138	av_cold void ff_int_init_altivec(DSPContext c, AVCodecContext avctx)
		139	{
		140	c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
		141	c->scalarproduct_int16 = scalarproduct_int16_altivec;
		142	c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
		143	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/ppc/int_altivec.c – Rev 4349