WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-9.2.5/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

Rev	Author	Line No.	Line
5563	serge	1	/**************************************************************************
		2	*
		3	* Copyright 2009 VMware, Inc.
		4	* All Rights Reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the
		8	* "Software"), to deal in the Software without restriction, including
		9	* without limitation the rights to use, copy, modify, merge, publish,
		10	* distribute, sub license, and/or sell copies of the Software, and to
		11	* permit persons to whom the Software is furnished to do so, subject to
		12	* the following conditions:
		13	*
		14	* The above copyright notice and this permission notice (including the
		15	* next paragraph) shall be included in all copies or substantial portions
		16	* of the Software.
		17	*
		18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		19	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
		21	* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
		22	* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
		23	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
		24	* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25	*
		26	**************************************************************************/
		27
		28
		29	#include "pipe/p_defines.h"
		30
		31	#include "util/u_format.h"
		32	#include "util/u_memory.h"
		33	#include "util/u_string.h"
		34
		35	#include "lp_bld_type.h"
		36	#include "lp_bld_const.h"
		37	#include "lp_bld_conv.h"
		38	#include "lp_bld_swizzle.h"
		39	#include "lp_bld_gather.h"
		40	#include "lp_bld_debug.h"
		41	#include "lp_bld_format.h"
		42
		43
		44	void
		45	lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
		46	struct lp_build_context *bld,
		47	const LLVMValueRef *unswizzled,
		48	LLVMValueRef swizzled_out[4])
		49	{
		50	assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
		51	assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
		52
		53	if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
		54	enum util_format_swizzle swizzle;
		55	LLVMValueRef depth_or_stencil;
		56
		57	if (util_format_has_stencil(format_desc) &&
		58	!util_format_has_depth(format_desc)) {
		59	assert(!bld->type.floating);
		60	swizzle = format_desc->swizzle[1];
		61	}
		62	else {
		63	assert(bld->type.floating);
		64	swizzle = format_desc->swizzle[0];
		65	}
		66	/*
		67	* Return zzz1 or sss1 for depth-stencil formats here.
		68	* Correct swizzling will be handled by apply_sampler_swizzle() later.
		69	*/
		70	depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
		71
		72	swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
		73	swizzled_out[3] = bld->one;
		74	}
		75	else {
		76	unsigned chan;
		77	for (chan = 0; chan < 4; ++chan) {
		78	enum util_format_swizzle swizzle = format_desc->swizzle[chan];
		79	swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
		80	}
		81	}
		82	}
		83
		84
		85	/**
		86	* Unpack several pixels in SoA.
		87	*
		88	* It takes a vector of packed pixels:
		89	*
		90	* packed = {P0, P1, P2, P3, ..., Pn}
		91	*
		92	* And will produce four vectors:
		93	*
		94	* red = {R0, R1, R2, R3, ..., Rn}
		95	* green = {G0, G1, G2, G3, ..., Gn}
		96	* blue = {B0, B1, B2, B3, ..., Bn}
		97	* alpha = {A0, A1, A2, A3, ..., An}
		98	*
		99	* It requires that a packed pixel fits into an element of the output
		100	* channels. The common case is when converting pixel with a depth of 32 bit or
		101	* less into floats.
		102	*
		103	* \param format_desc the format of the 'packed' incoming pixel vector
		104	* \param type the desired type for rgba_out (type.length = n, above)
		105	* \param packed the incoming vector of packed pixels
		106	* \param rgba_out returns the SoA R,G,B,A vectors
		107	*/
		108	void
		109	lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
		110	const struct util_format_description *format_desc,
		111	struct lp_type type,
		112	LLVMValueRef packed,
		113	LLVMValueRef rgba_out[4])
		114	{
		115	LLVMBuilderRef builder = gallivm->builder;
		116	struct lp_build_context bld;
		117	LLVMValueRef inputs[4];
		118	unsigned chan;
		119
		120	assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
		121	assert(format_desc->block.width == 1);
		122	assert(format_desc->block.height == 1);
		123	assert(format_desc->block.bits <= type.width);
		124	/* FIXME: Support more output types */
		125	assert(type.width == 32);
		126
		127	lp_build_context_init(&bld, gallivm, type);
		128
		129	/* Decode the input vector components */
		130	for (chan = 0; chan < format_desc->nr_channels; ++chan) {
		131	const unsigned width = format_desc->channel[chan].size;
		132	const unsigned start = format_desc->channel[chan].shift;
		133	const unsigned stop = start + width;
		134	LLVMValueRef input;
		135
		136	input = packed;
		137
		138	switch(format_desc->channel[chan].type) {
		139	case UTIL_FORMAT_TYPE_VOID:
		140	input = lp_build_undef(gallivm, type);
		141	break;
		142
		143	case UTIL_FORMAT_TYPE_UNSIGNED:
		144	/*
		145	* Align the LSB
		146	*/
		147
		148	if (start) {
		149	input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
		150	}
		151
		152	/*
		153	* Zero the MSBs
		154	*/
		155
		156	if (stop < format_desc->block.bits) {
		157	unsigned mask = ((unsigned long long)1 << width) - 1;
		158	input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
		159	}
		160
		161	/*
		162	* Type conversion
		163	*/
		164
		165	if (type.floating) {
		166	if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
		167	assert(width == 8);
		168	if (format_desc->swizzle[3] == chan) {
		169	input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
		170	}
		171	else {
		172	struct lp_type conv_type = lp_uint_type(type);
		173	input = lp_build_srgb_to_linear(gallivm, conv_type, input);
		174	}
		175	}
		176	else {
		177	if(format_desc->channel[chan].normalized)
		178	input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
		179	else
		180	input = LLVMBuildSIToFP(builder, input,
		181	lp_build_vec_type(gallivm, type), "");
		182	}
		183	}
		184	else if (format_desc->channel[chan].pure_integer) {
		185	/* Nothing to do */
		186	} else {
		187	/* FIXME */
		188	assert(0);
		189	}
		190
		191	break;
		192
		193	case UTIL_FORMAT_TYPE_SIGNED:
		194	/*
		195	* Align the sign bit first.
		196	*/
		197
		198	if (stop < type.width) {
		199	unsigned bits = type.width - stop;
		200	LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
		201	input = LLVMBuildShl(builder, input, bits_val, "");
		202	}
		203
		204	/*
		205	* Align the LSB (with an arithmetic shift to preserve the sign)
		206	*/
		207
		208	if (format_desc->channel[chan].size < type.width) {
		209	unsigned bits = type.width - format_desc->channel[chan].size;
		210	LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
		211	input = LLVMBuildAShr(builder, input, bits_val, "");
		212	}
		213
		214	/*
		215	* Type conversion
		216	*/
		217
		218	if (type.floating) {
		219	input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
		220	if (format_desc->channel[chan].normalized) {
		221	double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
		222	LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
		223	input = LLVMBuildFMul(builder, input, scale_val, "");
		224	}
		225	}
		226	else if (format_desc->channel[chan].pure_integer) {
		227	/* Nothing to do */
		228	} else {
		229	/* FIXME */
		230	assert(0);
		231	}
		232
		233	break;
		234
		235	case UTIL_FORMAT_TYPE_FLOAT:
		236	if (type.floating) {
		237	assert(start == 0);
		238	assert(stop == 32);
		239	assert(type.width == 32);
		240	input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
		241	}
		242	else {
		243	/* FIXME */
		244	assert(0);
		245	input = lp_build_undef(gallivm, type);
		246	}
		247	break;
		248
		249	case UTIL_FORMAT_TYPE_FIXED:
		250	if (type.floating) {
		251	double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
		252	LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
		253	input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
		254	input = LLVMBuildFMul(builder, input, scale_val, "");
		255	}
		256	else {
		257	/* FIXME */
		258	assert(0);
		259	input = lp_build_undef(gallivm, type);
		260	}
		261	break;
		262
		263	default:
		264	assert(0);
		265	input = lp_build_undef(gallivm, type);
		266	break;
		267	}
		268
		269	inputs[chan] = input;
		270	}
		271
		272	lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
		273	}
		274
		275
		276	/**
		277	* Convert a vector of rgba8 values into 32bit wide SoA vectors.
		278	*
		279	* \param dst_type The desired return type. For pure integer formats
		280	* this should be a 32bit wide int or uint vector type,
		281	* otherwise a float vector type.
		282	*
		283	* \param packed The rgba8 values to pack.
		284	*
		285	* \param rgba The 4 SoA return vectors.
		286	*/
		287	void
		288	lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
		289	struct lp_type dst_type,
		290	LLVMValueRef packed,
		291	LLVMValueRef *rgba)
		292	{
		293	LLVMBuilderRef builder = gallivm->builder;
		294	LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
		295	unsigned chan;
		296
		297	/* XXX technically shouldn't use that for uint dst_type */
		298	packed = LLVMBuildBitCast(builder, packed,
		299	lp_build_int_vec_type(gallivm, dst_type), "");
		300
		301	/* Decode the input vector components */
		302	for (chan = 0; chan < 4; ++chan) {
		303	#ifdef PIPE_ARCH_LITTLE_ENDIAN
		304	unsigned start = chan*8;
		305	#else
		306	unsigned start = (3-chan)*8;
		307	#endif
		308	unsigned stop = start + 8;
		309	LLVMValueRef input;
		310
		311	input = packed;
		312
		313	if (start)
		314	input = LLVMBuildLShr(builder, input,
		315	lp_build_const_int_vec(gallivm, dst_type, start), "");
		316
		317	if (stop < 32)
		318	input = LLVMBuildAnd(builder, input, mask, "");
		319
		320	if (dst_type.floating)
		321	input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
		322
		323	rgba[chan] = input;
		324	}
		325	}
		326
		327
		328
		329	/**
		330	* Fetch a texels from a texture, returning them in SoA layout.
		331	*
		332	* \param type the desired return type for 'rgba'. The vector length
		333	* is the number of texels to fetch
		334	*
		335	* \param base_ptr points to the base of the texture mip tree.
		336	* \param offset offset to start of the texture image block. For non-
		337	* compressed formats, this simply is an offset to the texel.
		338	* For compressed formats, it is an offset to the start of the
		339	* compressed data block.
		340	*
		341	* \param i, j the sub-block pixel coordinates. For non-compressed formats
		342	* these will always be (0,0). For compressed formats, i will
		343	* be in [0, block_width-1] and j will be in [0, block_height-1].
		344	*/
		345	void
		346	lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
		347	const struct util_format_description *format_desc,
		348	struct lp_type type,
		349	LLVMValueRef base_ptr,
		350	LLVMValueRef offset,
		351	LLVMValueRef i,
		352	LLVMValueRef j,
		353	LLVMValueRef rgba_out[4])
		354	{
		355	LLVMBuilderRef builder = gallivm->builder;
		356
		357	if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
		358	(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB \|\|
		359	format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB \|\|
		360	format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
		361	format_desc->block.width == 1 &&
		362	format_desc->block.height == 1 &&
		363	format_desc->block.bits <= type.width &&
		364	(format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT \|\|
		365	format_desc->channel[0].size == 32))
		366	{
		367	/*
		368	* The packed pixel fits into an element of the destination format. Put
		369	* the packed pixels into a vector and extract each component for all
		370	* vector elements in parallel.
		371	*/
		372
		373	LLVMValueRef packed;
		374
		375	/*
		376	* gather the texels from the texture
		377	* Ex: packed = {XYZW, XYZW, XYZW, XYZW}
		378	*/
		379	assert(format_desc->block.bits <= type.width);
		380	packed = lp_build_gather(gallivm,
		381	type.length,
		382	format_desc->block.bits,
		383	type.width,
		384	base_ptr, offset, FALSE);
		385
		386	/*
		387	* convert texels to float rgba
		388	*/
		389	lp_build_unpack_rgba_soa(gallivm,
		390	format_desc,
		391	type,
		392	packed, rgba_out);
		393	return;
		394	}
		395
		396	if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT \|\|
		397	format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
		398	/*
		399	* similar conceptually to above but requiring special
		400	* AoS packed -> SoA float conversion code.
		401	*/
		402	LLVMValueRef packed;
		403
		404	assert(type.floating);
		405	assert(type.width == 32);
		406
		407	packed = lp_build_gather(gallivm, type.length,
		408	format_desc->block.bits,
		409	type.width, base_ptr, offset,
		410	FALSE);
		411	if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
		412	lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
		413	}
		414	else {
		415	lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
		416	}
		417	return;
		418	}
		419
		420	if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
		421	format_desc->block.bits == 64) {
		422	/*
		423	* special case the format is 64 bits but we only require
		424	* 32bit (or 8bit) from each block.
		425	*/
		426	LLVMValueRef packed;
		427
		428	if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
		429	/*
		430	* for stencil simply fix up offsets - could in fact change
		431	* base_ptr instead even outside the shader.
		432	*/
		433	unsigned mask = (1 << 8) - 1;
		434	LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
		435	offset = LLVMBuildAdd(builder, offset, s_offset, "");
		436	packed = lp_build_gather(gallivm, type.length,
		437	32, type.width, base_ptr, offset, FALSE);
		438	packed = LLVMBuildAnd(builder, packed,
		439	lp_build_const_int_vec(gallivm, type, mask), "");
		440	}
		441	else {
		442	assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
		443	packed = lp_build_gather(gallivm, type.length,
		444	32, type.width, base_ptr, offset, TRUE);
		445	packed = LLVMBuildBitCast(builder, packed,
		446	lp_build_vec_type(gallivm, type), "");
		447	}
		448	/* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
		449	rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
		450	rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
		451	return;
		452	}
		453
		454	/*
		455	* Try calling lp_build_fetch_rgba_aos for all pixels.
		456	*/
		457
		458	if (util_format_fits_8unorm(format_desc) &&
		459	type.floating && type.width == 32 &&
		460	(type.length == 1 \|\| (type.length % 4 == 0))) {
		461	struct lp_type tmp_type;
		462	LLVMValueRef tmp;
		463
		464	memset(&tmp_type, 0, sizeof tmp_type);
		465	tmp_type.width = 8;
		466	tmp_type.length = type.length * 4;
		467	tmp_type.norm = TRUE;
		468
		469	tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
		470	base_ptr, offset, i, j);
		471
		472	lp_build_rgba8_to_fi32_soa(gallivm,
		473	type,
		474	tmp,
		475	rgba_out);
		476
		477	return;
		478	}
		479
		480	/*
		481	* Fallback to calling lp_build_fetch_rgba_aos for each pixel.
		482	*
		483	* This is not the most efficient way of fetching pixels, as we
		484	* miss some opportunities to do vectorization, but this is
		485	* convenient for formats or scenarios for which there was no
		486	* opportunity or incentive to optimize.
		487	*/
		488
		489	{
		490	unsigned k, chan;
		491	struct lp_type tmp_type;
		492
		493	if (gallivm_debug & GALLIVM_DEBUG_PERF) {
		494	debug_printf("%s: scalar unpacking of %s\n",
		495	__FUNCTION__, format_desc->short_name);
		496	}
		497
		498	tmp_type = type;
		499	tmp_type.length = 4;
		500
		501	for (chan = 0; chan < 4; ++chan) {
		502	rgba_out[chan] = lp_build_undef(gallivm, type);
		503	}
		504
		505	/* loop over number of pixels */
		506	for(k = 0; k < type.length; ++k) {
		507	LLVMValueRef index = lp_build_const_int32(gallivm, k);
		508	LLVMValueRef offset_elem;
		509	LLVMValueRef i_elem, j_elem;
		510	LLVMValueRef tmp;
		511
		512	offset_elem = LLVMBuildExtractElement(builder, offset,
		513	index, "");
		514
		515	i_elem = LLVMBuildExtractElement(builder, i, index, "");
		516	j_elem = LLVMBuildExtractElement(builder, j, index, "");
		517
		518	/* Get a single float[4]={R,G,B,A} pixel */
		519	tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
		520	base_ptr, offset_elem,
		521	i_elem, j_elem);
		522
		523	/*
		524	* Insert the AoS tmp value channels into the SoA result vectors at
		525	* position = 'index'.
		526	*/
		527	for (chan = 0; chan < 4; ++chan) {
		528	LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
		529	tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
		530	rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
		531	tmp_chan, index, "");
		532	}
		533	}
		534	}
		535	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-9.2.5/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c – Rev 5563