WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libavcodec/ppc/pixblockdsp.c

Rev	Author	Line No.	Line
6147	serge	1	/*
		2	* Copyright (c) 2002 Brian Foley
		3	* Copyright (c) 2002 Dieter Shirley
		4	* Copyright (c) 2003-2004 Romain Dolbeau
		5	*
		6	* This file is part of FFmpeg.
		7	*
		8	* FFmpeg is free software; you can redistribute it and/or
		9	* modify it under the terms of the GNU Lesser General Public
		10	* License as published by the Free Software Foundation; either
		11	* version 2.1 of the License, or (at your option) any later version.
		12	*
		13	* FFmpeg is distributed in the hope that it will be useful,
		14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		16	* Lesser General Public License for more details.
		17	*
		18	* You should have received a copy of the GNU Lesser General Public
		19	* License along with FFmpeg; if not, write to the Free Software
		20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		21	*/
		22
		23	#include "config.h"
		24	#if HAVE_ALTIVEC_H
		25	#include
		26	#endif
		27
		28	#include "libavutil/attributes.h"
		29	#include "libavutil/cpu.h"
		30	#include "libavutil/ppc/cpu.h"
		31	#include "libavutil/ppc/types_altivec.h"
		32	#include "libavutil/ppc/util_altivec.h"
		33	#include "libavcodec/avcodec.h"
		34	#include "libavcodec/pixblockdsp.h"
		35
		36	#if HAVE_ALTIVEC
		37
		38	#if HAVE_VSX
		39	static void get_pixels_altivec(int16_t restrict block, const uint8_t pixels,
		40	ptrdiff_t line_size)
		41	{
		42	int i;
		43	vector unsigned char perm =
		44	(vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
		45	0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
		46	const vector unsigned char zero =
		47	(const vector unsigned char) vec_splat_u8(0);
		48
		49	for (i = 0; i < 8; i++) {
		50	/* Read potentially unaligned pixels.
		51	* We're reading 16 pixels, and actually only want 8,
		52	* but we simply ignore the extras. */
		53	vector unsigned char bytes = vec_vsx_ld(0, pixels);
		54
		55	// Convert the bytes into shorts.
		56	//vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
		57	vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
		58
		59	// Save the data to the block, we assume the block is 16-byte aligned.
		60	vec_vsx_st(shorts, i * 16, (vector signed short *) block);
		61
		62	pixels += line_size;
		63	}
		64	}
		65	#else
		66	static void get_pixels_altivec(int16_t restrict block, const uint8_t pixels,
		67	ptrdiff_t line_size)
		68	{
		69	int i;
		70	vec_u8 perm = vec_lvsl(0, pixels);
		71	const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
		72
		73	for (i = 0; i < 8; i++) {
		74	/* Read potentially unaligned pixels.
		75	* We're reading 16 pixels, and actually only want 8,
		76	* but we simply ignore the extras. */
		77	vec_u8 pixl = vec_ld(0, pixels);
		78	vec_u8 pixr = vec_ld(7, pixels);
		79	vec_u8 bytes = vec_perm(pixl, pixr, perm);
		80
		81	// Convert the bytes into shorts.
		82	vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
		83
		84	// Save the data to the block, we assume the block is 16-byte aligned.
		85	vec_st(shorts, i * 16, (vec_s16 *)block);
		86
		87	pixels += line_size;
		88	}
		89	}
		90
		91	#endif /* HAVE_VSX */
		92
		93	#if HAVE_VSX
		94	static void diff_pixels_altivec(int16_t restrict block, const uint8_t s1,
		95	const uint8_t *s2, int stride)
		96	{
		97	int i;
		98	const vector unsigned char zero =
		99	(const vector unsigned char) vec_splat_u8(0);
		100	vector signed short shorts1, shorts2;
		101
		102	for (i = 0; i < 4; i++) {
		103	/* Read potentially unaligned pixels.
		104	* We're reading 16 pixels, and actually only want 8,
		105	* but we simply ignore the extras. */
		106	vector unsigned char bytes = vec_vsx_ld(0, s1);
		107
		108	// Convert the bytes into shorts.
		109	shorts1 = (vector signed short) vec_mergeh(bytes, zero);
		110
		111	// Do the same for the second block of pixels.
		112	bytes =vec_vsx_ld(0, s2);
		113
		114	// Convert the bytes into shorts.
		115	shorts2 = (vector signed short) vec_mergeh(bytes, zero);
		116
		117	// Do the subtraction.
		118	shorts1 = vec_sub(shorts1, shorts2);
		119
		120	// Save the data to the block, we assume the block is 16-byte aligned.
		121	vec_vsx_st(shorts1, 0, (vector signed short *) block);
		122
		123	s1 += stride;
		124	s2 += stride;
		125	block += 8;
		126
		127	/* The code below is a copy of the code above...
		128	* This is a manual unroll. */
		129
		130	/* Read potentially unaligned pixels.
		131	* We're reading 16 pixels, and actually only want 8,
		132	* but we simply ignore the extras. */
		133	bytes = vec_vsx_ld(0, s1);
		134
		135	// Convert the bytes into shorts.
		136	shorts1 = (vector signed short) vec_mergeh(bytes, zero);
		137
		138	// Do the same for the second block of pixels.
		139	bytes = vec_vsx_ld(0, s2);
		140
		141	// Convert the bytes into shorts.
		142	shorts2 = (vector signed short) vec_mergeh(bytes, zero);
		143
		144	// Do the subtraction.
		145	shorts1 = vec_sub(shorts1, shorts2);
		146
		147	// Save the data to the block, we assume the block is 16-byte aligned.
		148	vec_vsx_st(shorts1, 0, (vector signed short *) block);
		149
		150	s1 += stride;
		151	s2 += stride;
		152	block += 8;
		153	}
		154	}
		155	#else
		156	static void diff_pixels_altivec(int16_t restrict block, const uint8_t s1,
		157	const uint8_t *s2, int stride)
		158	{
		159	int i;
		160	vec_u8 perm1 = vec_lvsl(0, s1);
		161	vec_u8 perm2 = vec_lvsl(0, s2);
		162	const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
		163	vec_s16 shorts1, shorts2;
		164
		165	for (i = 0; i < 4; i++) {
		166	/* Read potentially unaligned pixels.
		167	* We're reading 16 pixels, and actually only want 8,
		168	* but we simply ignore the extras. */
		169	vec_u8 pixl = vec_ld(0, s1);
		170	vec_u8 pixr = vec_ld(15, s1);
		171	vec_u8 bytes = vec_perm(pixl, pixr, perm1);
		172
		173	// Convert the bytes into shorts.
		174	shorts1 = (vec_s16)vec_mergeh(zero, bytes);
		175
		176	// Do the same for the second block of pixels.
		177	pixl = vec_ld(0, s2);
		178	pixr = vec_ld(15, s2);
		179	bytes = vec_perm(pixl, pixr, perm2);
		180
		181	// Convert the bytes into shorts.
		182	shorts2 = (vec_s16)vec_mergeh(zero, bytes);
		183
		184	// Do the subtraction.
		185	shorts1 = vec_sub(shorts1, shorts2);
		186
		187	// Save the data to the block, we assume the block is 16-byte aligned.
		188	vec_st(shorts1, 0, (vec_s16 *)block);
		189
		190	s1 += stride;
		191	s2 += stride;
		192	block += 8;
		193
		194	/* The code below is a copy of the code above...
		195	* This is a manual unroll. */
		196
		197	/* Read potentially unaligned pixels.
		198	* We're reading 16 pixels, and actually only want 8,
		199	* but we simply ignore the extras. */
		200	pixl = vec_ld(0, s1);
		201	pixr = vec_ld(15, s1);
		202	bytes = vec_perm(pixl, pixr, perm1);
		203
		204	// Convert the bytes into shorts.
		205	shorts1 = (vec_s16)vec_mergeh(zero, bytes);
		206
		207	// Do the same for the second block of pixels.
		208	pixl = vec_ld(0, s2);
		209	pixr = vec_ld(15, s2);
		210	bytes = vec_perm(pixl, pixr, perm2);
		211
		212	// Convert the bytes into shorts.
		213	shorts2 = (vec_s16)vec_mergeh(zero, bytes);
		214
		215	// Do the subtraction.
		216	shorts1 = vec_sub(shorts1, shorts2);
		217
		218	// Save the data to the block, we assume the block is 16-byte aligned.
		219	vec_st(shorts1, 0, (vec_s16 *)block);
		220
		221	s1 += stride;
		222	s2 += stride;
		223	block += 8;
		224	}
		225	}
		226
		227	#endif /* HAVE_VSX */
		228
		229	#endif /* HAVE_ALTIVEC */
		230
		231	#if HAVE_VSX
		232	static void get_pixels_vsx(int16_t restrict block, const uint8_t pixels,
		233	ptrdiff_t line_size)
		234	{
		235	int i;
		236	for (i = 0; i < 8; i++) {
		237	vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
		238
		239	vec_vsx_st(shorts, i * 16, block);
		240
		241	pixels += line_size;
		242	}
		243	}
		244
		245	static void diff_pixels_vsx(int16_t restrict block, const uint8_t s1,
		246	const uint8_t *s2, int stride)
		247	{
		248	int i;
		249	vec_s16 shorts1, shorts2;
		250	for (i = 0; i < 8; i++) {
		251	shorts1 = vsx_ld_u8_s16(0, s1);
		252	shorts2 = vsx_ld_u8_s16(0, s2);
		253
		254	shorts1 = vec_sub(shorts1, shorts2);
		255
		256	vec_vsx_st(shorts1, 0, block);
		257
		258	s1 += stride;
		259	s2 += stride;
		260	block += 8;
		261	}
		262	}
		263	#endif /* HAVE_VSX */
		264
		265	av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
		266	AVCodecContext *avctx,
		267	unsigned high_bit_depth)
		268	{
		269	#if HAVE_ALTIVEC
		270	if (!PPC_ALTIVEC(av_get_cpu_flags()))
		271	return;
		272
		273	c->diff_pixels = diff_pixels_altivec;
		274
		275	if (!high_bit_depth) {
		276	c->get_pixels = get_pixels_altivec;
		277	}
		278	#endif /* HAVE_ALTIVEC */
		279
		280	#if HAVE_VSX
		281	if (!PPC_VSX(av_get_cpu_flags()))
		282	return;
		283
		284	c->diff_pixels = diff_pixels_vsx;
		285
		286	if (!high_bit_depth)
		287	c->get_pixels = get_pixels_vsx;
		288	#endif /* HAVE_VSX */
		289	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libavcodec/ppc/pixblockdsp.c – Rev 6147