WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/auxiliary/gallivm/lp_bld_format_float.c

Rev	Author	Line No.	Line
5564	serge	1	/**************************************************************************
		2	*
		3	* Copyright 2013 VMware, Inc.
		4	* All Rights Reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the
		8	* "Software"), to deal in the Software without restriction, including
		9	* without limitation the rights to use, copy, modify, merge, publish,
		10	* distribute, sub license, and/or sell copies of the Software, and to
		11	* permit persons to whom the Software is furnished to do so, subject to
		12	* the following conditions:
		13	*
		14	* The above copyright notice and this permission notice (including the
		15	* next paragraph) shall be included in all copies or substantial portions
		16	* of the Software.
		17	*
		18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		19	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
		21	* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
		22	* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
		23	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
		24	* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25	*
		26	**************************************************************************/
		27
		28
		29	/**
		30	* @file
		31	* Format conversion code for "special" float formats.
		32	*
		33	* @author Roland Scheidegger
		34	*/
		35
		36
		37	#include "util/u_debug.h"
		38
		39	#include "lp_bld_type.h"
		40	#include "lp_bld_const.h"
		41	#include "lp_bld_arit.h"
		42	#include "lp_bld_bitarit.h"
		43	#include "lp_bld_logic.h"
		44	#include "lp_bld_format.h"
		45
		46
		47	/**
		48	* Convert float32 to a float-like value with less exponent and mantissa
		49	* bits. The mantissa is still biased, and the mantissa still has an implied 1,
		50	* and there may be a sign bit.
		51	*
		52	* @param src (vector) float value to convert
		53	* @param mantissa_bits the number of mantissa bits
		54	* @param exponent_bits the number of exponent bits
		55	* @param mantissa_start the start position of the small float in result value
		56	* @param has_sign if the small float has a sign bit
		57	*
		58	* This implements round-towards-zero (trunc) hence too large numbers get
		59	* converted to largest representable number, not infinity.
		60	* Small numbers may get converted to denorms, depending on normal
		61	* float denorm handling of the cpu.
		62	* Note that compared to the references, below, we skip any rounding bias
		63	* since we do rounding towards zero - OpenGL allows rounding towards zero
		64	* (though not preferred) and DX10 even seems to require it.
		65	* Note that this will pack mantissa, exponent and sign bit (if any) together,
		66	* and shift the result to mantissa_start.
		67	*
		68	* ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
		69	* ref https://gist.github.com/rygorous/2156668
		70	*/
		71	LLVMValueRef
		72	lp_build_float_to_smallfloat(struct gallivm_state *gallivm,
		73	struct lp_type i32_type,
		74	LLVMValueRef src,
		75	unsigned mantissa_bits,
		76	unsigned exponent_bits,
		77	unsigned mantissa_start,
		78	boolean has_sign)
		79	{
		80	LLVMBuilderRef builder = gallivm->builder;
		81	LLVMValueRef i32_floatexpmask, i32_smallexpmask, magic, normal;
		82	LLVMValueRef rescale_src, i32_roundmask, small_max;
		83	LLVMValueRef i32_qnanbit, shift, res;
		84	LLVMValueRef is_nan_or_inf, nan_or_inf, mask, i32_src;
		85	struct lp_type f32_type = lp_type_float_vec(32, 32 * i32_type.length);
		86	struct lp_build_context f32_bld, i32_bld;
		87	LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f);
		88	unsigned exponent_start = mantissa_start + mantissa_bits;
		89	boolean always_preserve_nans = true;
		90	boolean maybe_correct_denorm_rounding = true;
		91
		92	lp_build_context_init(&f32_bld, gallivm, f32_type);
		93	lp_build_context_init(&i32_bld, gallivm, i32_type);
		94
		95	i32_smallexpmask = lp_build_const_int_vec(gallivm, i32_type,
		96	((1 << exponent_bits) - 1) << 23);
		97	i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
		98
		99	i32_src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
		100
		101	if (has_sign) {
		102	rescale_src = src;
		103	}
		104	else {
		105	/* clamp to pos range (can still have sign bit if NaN or negative zero) */
		106	rescale_src = lp_build_max(&f32_bld, zero, src);
		107	}
		108	rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
		109
		110	/* "ordinary" number */
		111	/*
		112	* get rid of excess mantissa bits and sign bit
		113	* This is only really needed for correct rounding of denorms I think
		114	* but only if we use the preserve NaN path does using
		115	* src_abs instead save us any instruction.
		116	*/
		117	if (maybe_correct_denorm_rounding \|\| !always_preserve_nans) {
		118	i32_roundmask = lp_build_const_int_vec(gallivm, i32_type,
		119	~((1 << (23 - mantissa_bits)) - 1) &
		120	0x7fffffff);
		121	rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
		122	rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask);
		123	rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, "");
		124	}
		125	else {
		126	rescale_src = lp_build_abs(&f32_bld, src);
		127	}
		128
		129	/* bias exponent (and denormalize if necessary) */
		130	magic = lp_build_const_int_vec(gallivm, i32_type,
		131	((1 << (exponent_bits - 1)) - 1) << 23);
		132	magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
		133	normal = lp_build_mul(&f32_bld, rescale_src, magic);
		134
		135	/* clamp to max value - largest non-infinity number */
		136	small_max = lp_build_const_int_vec(gallivm, i32_type,
		137	(((1 << exponent_bits) - 2) << 23) \|
		138	(((1 << mantissa_bits) - 1) << (23 - mantissa_bits)));
		139	small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, "");
		140	normal = lp_build_min(&f32_bld, normal, small_max);
		141	normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, "");
		142
		143	/*
		144	* handle nan/inf cases
		145	* a little bit tricky since -Inf -> 0, +Inf -> +Inf, +-Nan -> +Nan
		146	* (for no sign) else ->Inf -> ->Inf too.
		147	* could use explicit "unordered" comparison checking for NaNs
		148	* which might save us from calculating src_abs too.
		149	* (Cannot actually save the comparison since we need to distinguish
		150	* Inf and NaN cases anyway, but it would be better for AVX.)
		151	*/
		152	if (always_preserve_nans) {
		153	LLVMValueRef infcheck_src, is_inf, is_nan;
		154	LLVMValueRef src_abs = lp_build_abs(&f32_bld, src);
		155	src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, "");
		156
		157	if (has_sign) {
		158	infcheck_src = src_abs;
		159	}
		160	else {
		161	infcheck_src = i32_src;
		162	}
		163	is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER,
		164	src_abs, i32_floatexpmask);
		165	is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL,
		166	infcheck_src, i32_floatexpmask);
		167	is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf);
		168	/* could also set more mantissa bits but need at least the highest mantissa bit */
		169	i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22);
		170	/* combine maxexp with qnanbit */
		171	nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask,
		172	lp_build_and(&i32_bld, is_nan, i32_qnanbit));
		173	}
		174	else {
		175	/*
		176	* A couple simplifications, with mostly 2 drawbacks (so disabled):
		177	* - it will promote some SNaNs (those which only had bits set
		178	* in the mantissa part which got chopped off) to +-Infinity.
		179	* (Those bits get chopped off anyway later so can as well use
		180	* rescale_src instead of src_abs here saving the calculation of that.)
		181	* - for no sign case, it relies on the max() being used for rescale_src
		182	* to give back the NaN (which is NOT ieee754r behavior, but should work
		183	* with sse2 on a full moon (rather if I got the operand order right) -
		184	* we _don't_ have well-defined behavior specified with min/max wrt NaNs,
		185	* however, and if it gets converted to cmp/select it may not work (we
		186	* don't really have specified behavior for cmp wrt NaNs neither).
		187	*/
		188	rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
		189	is_nan_or_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GEQUAL,
		190	rescale_src, i32_floatexpmask);
		191	/* note this will introduce excess exponent bits */
		192	nan_or_inf = rescale_src;
		193	}
		194	res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal);
		195
		196	if (mantissa_start > 0 \|\| !always_preserve_nans) {
		197	/* mask off excess bits */
		198	unsigned maskbits = (1 << (mantissa_bits + exponent_bits)) - 1;
		199	mask = lp_build_const_int_vec(gallivm, i32_type,
		200	maskbits << (23 - mantissa_bits));
		201	res = lp_build_and(&i32_bld, res, mask);
		202	}
		203
		204	/* add back sign bit at right position */
		205	if (has_sign) {
		206	LLVMValueRef sign;
		207	struct lp_type u32_type = lp_type_uint_vec(32, 32 * i32_type.length);
		208	struct lp_build_context u32_bld;
		209	lp_build_context_init(&u32_bld, gallivm, u32_type);
		210
		211	mask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
		212	shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
		213	sign = lp_build_and(&i32_bld, mask, i32_src);
		214	sign = lp_build_shr(&u32_bld, sign, shift);
		215	res = lp_build_or(&i32_bld, sign, res);
		216	}
		217
		218	/* shift to final position */
		219	if (exponent_start < 23) {
		220	shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
		221	res = lp_build_shr(&i32_bld, res, shift);
		222	}
		223	else {
		224	shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
		225	res = lp_build_shl(&i32_bld, res, shift);
		226	}
		227	return res;
		228	}
		229
		230
		231	/**
		232	* Convert rgba float SoA values to packed r11g11b10 values.
		233	*
		234	* @param src SoA float (vector) values to convert.
		235	*/
		236	LLVMValueRef
		237	lp_build_float_to_r11g11b10(struct gallivm_state *gallivm,
		238	LLVMValueRef *src)
		239	{
		240	LLVMValueRef dst, rcomp, bcomp, gcomp;
		241	struct lp_build_context i32_bld;
		242	LLVMTypeRef src_type = LLVMTypeOf(*src);
		243	unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
		244	LLVMGetVectorSize(src_type) : 1;
		245	struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
		246
		247	lp_build_context_init(&i32_bld, gallivm, i32_type);
		248
		249	/* "rescale" and put in right position */
		250	rcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[0], 6, 5, 0, false);
		251	gcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[1], 6, 5, 11, false);
		252	bcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[2], 5, 5, 22, false);
		253
		254	/* combine the values */
		255	dst = lp_build_or(&i32_bld, rcomp, gcomp);
		256	return lp_build_or(&i32_bld, dst, bcomp);
		257	}
		258
		259
		260	/**
		261	* Convert a float-like value with less exponent and mantissa
		262	* bits than a normal float32 to a float32. The mantissa of
		263	* the source value is assumed to have an implied 1, and the exponent
		264	* is biased. There may be a sign bit.
		265	* The source value to extract must be in a 32bit int (bits not part of
		266	* the value to convert will be masked off).
		267	* This works for things like 11-bit floats or half-floats,
		268	* mantissa, exponent (and sign if present) must be packed
		269	* the same as they are in a ordinary float.
		270	*
		271	* @param src (vector) value to convert
		272	* @param mantissa_bits the number of mantissa bits
		273	* @param exponent_bits the number of exponent bits
		274	* @param mantissa_start the bit start position of the packed component
		275	* @param has_sign if the small float has a sign bit
		276	*
		277	* ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
		278	* ref https://gist.github.com/rygorous/2156668
		279	*/
		280	LLVMValueRef
		281	lp_build_smallfloat_to_float(struct gallivm_state *gallivm,
		282	struct lp_type f32_type,
		283	LLVMValueRef src,
		284	unsigned mantissa_bits,
		285	unsigned exponent_bits,
		286	unsigned mantissa_start,
		287	boolean has_sign)
		288	{
		289	LLVMBuilderRef builder = gallivm->builder;
		290	LLVMValueRef smallexpmask, i32_floatexpmask, magic;
		291	LLVMValueRef wasinfnan, tmp, res, shift, maskabs, srcabs, sign;
		292	unsigned exponent_start = mantissa_start + mantissa_bits;
		293	struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
		294	struct lp_build_context f32_bld, i32_bld;
		295
		296	lp_build_context_init(&f32_bld, gallivm, f32_type);
		297	lp_build_context_init(&i32_bld, gallivm, i32_type);
		298
		299	/* extract the component to "float position" */
		300	if (exponent_start < 23) {
		301	shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
		302	src = lp_build_shl(&i32_bld, src, shift);
		303	}
		304	else {
		305	shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
		306	src = lp_build_shr(&i32_bld, src, shift);
		307	}
		308	maskabs = lp_build_const_int_vec(gallivm, i32_type,
		309	((1 << (mantissa_bits + exponent_bits)) - 1)
		310	<< (23 - mantissa_bits));
		311	srcabs = lp_build_and(&i32_bld, src, maskabs);
		312
		313	/* now do the actual scaling */
		314	smallexpmask = lp_build_const_int_vec(gallivm, i32_type,
		315	((1 << exponent_bits) - 1) << 23);
		316	i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
		317
		318	if (0) {
		319	/*
		320	* Note that this code path, while simpler, will convert small
		321	* float denorms to floats according to current cpu denorm mode, if
		322	* denorms are disabled it will flush them to zero!
		323	* If cpu denorms are enabled, it should be faster though as long as
		324	* there's no denorms in the inputs, but if there are actually denorms
		325	* it's likely to be an order of magnitude slower (on x86 cpus).
		326	*/
		327
		328	srcabs = LLVMBuildBitCast(builder, srcabs, f32_bld.vec_type, "");
		329
		330	/*
		331	* magic number has exponent new exp bias + (new exp bias - old exp bias),
		332	* mantissa is 0.
		333	*/
		334	magic = lp_build_const_int_vec(gallivm, i32_type,
		335	(255 - (1 << (exponent_bits - 1))) << 23);
		336	magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
		337
		338	/* adjust exponent and fix denorms */
		339	res = lp_build_mul(&f32_bld, srcabs, magic);
		340
		341	/*
		342	* if exp was max (== NaN or Inf) set new exp to max (keep mantissa),
		343	* so a simple "or" will do (because exp adjust will leave mantissa intact)
		344	*/
		345	/* use float compare (better for AVX 8-wide / no AVX2 but else should use int) */
		346	smallexpmask = LLVMBuildBitCast(builder, smallexpmask, f32_bld.vec_type, "");
		347	wasinfnan = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL, srcabs, smallexpmask);
		348	res = LLVMBuildBitCast(builder, res, i32_bld.vec_type, "");
		349	tmp = lp_build_and(&i32_bld, i32_floatexpmask, wasinfnan);
		350	res = lp_build_or(&i32_bld, tmp, res);
		351	}
		352
		353	else {
		354	LLVMValueRef exp_one, isdenorm, denorm, normal, exp_adj;
		355
		356	/* denorm (or zero) if exponent is zero */
		357	exp_one = lp_build_const_int_vec(gallivm, i32_type, 1 << 23);
		358	isdenorm = lp_build_cmp(&i32_bld, PIPE_FUNC_LESS, srcabs, exp_one);
		359
		360	/* inf or nan if exponent is max */
		361	wasinfnan = lp_build_cmp(&i32_bld, PIPE_FUNC_GEQUAL, srcabs, smallexpmask);
		362
		363	/* for denormal (or zero), add (== or) magic exp to mantissa (== srcabs) (as int)
		364	* then subtract it (as float).
		365	* Another option would be to just do inttofp then do a rescale mul.
		366	*/
		367	magic = lp_build_const_int_vec(gallivm, i32_type,
		368	(127 - ((1 << (exponent_bits - 1)) - 2)) << 23);
		369	denorm = lp_build_or(&i32_bld, srcabs, magic);
		370	denorm = LLVMBuildBitCast(builder, denorm, f32_bld.vec_type, "");
		371	denorm = lp_build_sub(&f32_bld, denorm,
		372	LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""));
		373	denorm = LLVMBuildBitCast(builder, denorm, i32_bld.vec_type, "");
		374
		375	/* for normals, Infs, Nans fix up exponent */
		376	exp_adj = lp_build_const_int_vec(gallivm, i32_type,
		377	(127 - ((1 << (exponent_bits - 1)) - 1)) << 23);
		378	normal = lp_build_add(&i32_bld, srcabs, exp_adj);
		379	tmp = lp_build_and(&i32_bld, wasinfnan, i32_floatexpmask);
		380	normal = lp_build_or(&i32_bld, tmp, normal);
		381
		382	res = lp_build_select(&i32_bld, isdenorm, denorm, normal);
		383	}
		384
		385	if (has_sign) {
		386	LLVMValueRef signmask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
		387	shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
		388	sign = lp_build_shl(&i32_bld, src, shift);
		389	sign = lp_build_and(&i32_bld, signmask, sign);
		390	res = lp_build_or(&i32_bld, res, sign);
		391	}
		392
		393	return LLVMBuildBitCast(builder, res, f32_bld.vec_type, "");
		394	}
		395
		396
		397	/**
		398	* Convert packed float format (r11g11b10) value(s) to rgba float SoA values.
		399	*
		400	* @param src packed AoS r11g11b10 values (as (vector) int32)
		401	* @param dst pointer to the SoA result values
		402	*/
		403	void
		404	lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
		405	LLVMValueRef src,
		406	LLVMValueRef *dst)
		407	{
		408	LLVMTypeRef src_type = LLVMTypeOf(src);
		409	unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
		410	LLVMGetVectorSize(src_type) : 1;
		411	struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
		412
		413	dst[0] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 0, false);
		414	dst[1] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 11, false);
		415	dst[2] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 5, 5, 22, false);
		416
		417	/* Just set alpha to one */
		418	dst[3] = lp_build_one(gallivm, f32_type);
		419	}
		420
		421
		422	static LLVMValueRef
		423	lp_build_rgb9_to_float_helper(struct gallivm_state *gallivm,
		424	struct lp_type f32_type,
		425	LLVMValueRef src,
		426	LLVMValueRef scale,
		427	unsigned mantissa_start)
		428	{
		429	LLVMValueRef shift, mask;
		430
		431	struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
		432	struct lp_build_context i32_bld, f32_bld;
		433
		434	lp_build_context_init(&i32_bld, gallivm, i32_type);
		435	lp_build_context_init(&f32_bld, gallivm, f32_type);
		436
		437	/*
		438	* This is much easier as other weirdo float formats, since
		439	* there's no sign, no Inf/NaN, and there's nothing special
		440	* required for normals/denormals neither (as without the implied one
		441	* for the mantissa for other formats, everything looks like a denormal).
		442	* So just do (float)comp_bits * scale
		443	*/
		444	shift = lp_build_const_int_vec(gallivm, i32_type, mantissa_start);
		445	mask = lp_build_const_int_vec(gallivm, i32_type, 0x1ff);
		446	src = lp_build_shr(&i32_bld, src, shift);
		447	src = lp_build_and(&i32_bld, src, mask);
		448	src = lp_build_int_to_float(&f32_bld, src);
		449	return lp_build_mul(&f32_bld, src, scale);
		450	}
		451
		452
		453	/**
		454	* Convert shared exponent format (rgb9e5) value(s) to rgba float SoA values.
		455	*
		456	* @param src packed AoS rgb9e5 values (as (vector) int32)
		457	* @param dst pointer to the SoA result values
		458	*/
		459	void
		460	lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
		461	LLVMValueRef src,
		462	LLVMValueRef *dst)
		463	{
		464	LLVMBuilderRef builder = gallivm->builder;
		465	LLVMTypeRef src_type = LLVMTypeOf(src);
		466	LLVMValueRef shift, scale, bias, exp;
		467	unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
		468	LLVMGetVectorSize(src_type) : 1;
		469	struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
		470	struct lp_type u32_type = lp_type_uint_vec(32, 32 * src_length);
		471	struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
		472	struct lp_build_context i32_bld, u32_bld, f32_bld;
		473
		474	lp_build_context_init(&i32_bld, gallivm, i32_type);
		475	lp_build_context_init(&u32_bld, gallivm, u32_type);
		476	lp_build_context_init(&f32_bld, gallivm, f32_type);
		477
		478	/* extract exponent */
		479	shift = lp_build_const_int_vec(gallivm, i32_type, 27);
		480	/* this shift needs to be unsigned otherwise need mask */
		481	exp = lp_build_shr(&u32_bld, src, shift);
		482
		483	/*
		484	* scale factor is 2 ^ (exp - bias)
		485	* (and additionally corrected here for the mantissa bits)
		486	* not using shift because
		487	* a) don't have vector shift in a lot of cases
		488	* b) shift direction changes hence need 2 shifts + conditional
		489	* (or rotate instruction which is even more rare (for instance XOP))
		490	* so use whacky float 2 ^ function instead manipulating exponent
		491	* (saves us the float conversion at the end too)
		492	*/
		493	bias = lp_build_const_int_vec(gallivm, i32_type, 127 - (15 + 9));
		494	scale = lp_build_add(&i32_bld, exp, bias);
		495	shift = lp_build_const_int_vec(gallivm, i32_type, 23);
		496	scale = lp_build_shl(&i32_bld, scale, shift);
		497	scale = LLVMBuildBitCast(builder, scale, f32_bld.vec_type, "");
		498
		499	dst[0] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 0);
		500	dst[1] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 9);
		501	dst[2] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 18);
		502
		503	/* Just set alpha to one */
		504	dst[3] = f32_bld.one;
		505	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/auxiliary/gallivm/lp_bld_format_float.c – Rev 5564