WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-9.2.5/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c

Rev	Author	Line No.	Line
5563	serge	1	/**************************************************************************
		2	*
		3	* Copyright 2013 VMware, Inc.
		4	* All Rights Reserved.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the
		8	* "Software"), to deal in the Software without restriction, including
		9	* without limitation the rights to use, copy, modify, merge, publish,
		10	* distribute, sub license, and/or sell copies of the Software, and to
		11	* permit persons to whom the Software is furnished to do so, subject to
		12	* the following conditions:
		13	*
		14	* The above copyright notice and this permission notice (including the
		15	* next paragraph) shall be included in all copies or substantial portions
		16	* of the Software.
		17	*
		18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		19	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
		21	* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
		22	* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
		23	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
		24	* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25	*
		26	**************************************************************************/
		27
		28
		29	/**
		30	* @file
		31	* Format conversion code for srgb formats.
		32	*
		33	* Functions for converting from srgb to linear and vice versa.
		34	* From http://www.opengl.org/registry/specs/EXT/texture_sRGB.txt:
		35	*
		36	* srgb->linear:
		37	* cl = cs / 12.92, cs <= 0.04045
		38	* cl = ((cs + 0.055)/1.055)^2.4, cs > 0.04045
		39	*
		40	* linear->srgb:
		41	* if (isnan(cl)) {
		42	* Map IEEE-754 Not-a-number to zero.
		43	* cs = 0.0;
		44	* } else if (cl > 1.0) {
		45	* cs = 1.0;
		46	* } else if (cl < 0.0) {
		47	* cs = 0.0;
		48	* } else if (cl < 0.0031308) {
		49	* cs = 12.92 * cl;
		50	* } else {
		51	* cs = 1.055 * pow(cl, 0.41666) - 0.055;
		52	* }
		53	*
		54	* This does not need to be accurate, however at least for d3d10
		55	* (http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx):
		56	* 1) For srgb->linear, it is required that the error on the srgb side is
		57	* not larger than 0.5f, which I interpret that if you map the value back
		58	* to srgb from linear using the ideal conversion, it would not be off by
		59	* more than 0.5f (that is, it would map to the same 8-bit integer value
		60	* as it was before conversion to linear).
		61	* 2) linear->srgb is permitted 0.6f which luckily looks like quite a large
		62	* error is allowed.
		63	* 3) Additionally, all srgb values converted to linear and back must result
		64	* in the same value as they were originally.
		65	*
		66	* @author Roland Scheidegger
		67	*/
		68
		69
		70	#include "util/u_debug.h"
		71
		72	#include "lp_bld_type.h"
		73	#include "lp_bld_const.h"
		74	#include "lp_bld_arit.h"
		75	#include "lp_bld_bitarit.h"
		76	#include "lp_bld_logic.h"
		77	#include "lp_bld_format.h"
		78
		79
		80
		81	/**
		82	* Convert srgb int values to linear float values.
		83	* Several possibilities how to do this, e.g.
		84	* - table
		85	* - doing the pow() with int-to-float and float-to-int tricks
		86	* (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
		87	* - just using standard polynomial approximation
		88	* (3rd order polynomial is required for crappy but just sufficient accuracy)
		89	*
		90	* @param src integer (vector) value(s) to convert
		91	* (8 bit values unpacked to 32 bit already).
		92	*/
		93	LLVMValueRef
		94	lp_build_srgb_to_linear(struct gallivm_state *gallivm,
		95	struct lp_type src_type,
		96	LLVMValueRef src)
		97	{
		98	struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
		99	struct lp_build_context f32_bld;
		100	LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh;
		101	double coeffs[4] = {0.0023f,
		102	0.0030f / 255.0f,
		103	0.6935f / (255.0f * 255.0f),
		104	0.3012f / (255.0f * 255.0f * 255.0f)
		105	};
		106
		107	assert(src_type.width == 32);
		108
		109	lp_build_context_init(&f32_bld, gallivm, f32_type);
		110
		111	/*
		112	* using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023)
		113	* ( poly = 0.3012x^3 + 0.6935x^2 + 0.0030*x + 0.0023)
		114	* (found with octave polyfit and some magic as I couldn't get the error
		115	* function right). Using the above mentioned error function, the values stay
		116	* within +-0.35, except for the lowest values - hence tweaking linear segment
		117	* to cover the first 16 instead of the first 11 values (the error stays
		118	* just about acceptable there too).
		119	* Hence: lin = src > 15 ? poly : src / 12.6
		120	* This function really only makes sense for vectors, should use LUT otherwise.
		121	* All in all (including float conversion) 11 instructions (with sse4.1),
		122	* 6 constants (polynomial could be done with 1 instruction less at the cost
		123	* of slightly worse dependency chain, fma should also help).
		124	*/
		125	/* doing the 1/255 mul as part of the approximation */
		126	srcf = lp_build_int_to_float(&f32_bld, src);
		127	lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
		128	part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
		129
		130	part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4);
		131
		132	lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f);
		133	is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh);
		134	return lp_build_select(&f32_bld, is_linear, part_lin, part_pow);
		135	}
		136
		137
		138	/**
		139	* Convert linear float values to srgb int values.
		140	* Several possibilities how to do this, e.g.
		141	* - use table (based on exponent/highest order mantissa bits) and do
		142	* linear interpolation (https://gist.github.com/rygorous/2203834)
		143	* - Chebyshev polynomial
		144	* - Approximation using reciprocals
		145	* - using int-to-float and float-to-int tricks for pow()
		146	* (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
		147	*
		148	* @param src float (vector) value(s) to convert.
		149	*/
		150	static LLVMValueRef
		151	lp_build_linear_to_srgb(struct gallivm_state *gallivm,
		152	struct lp_type src_type,
		153	LLVMValueRef src)
		154	{
		155	LLVMBuilderRef builder = gallivm->builder;
		156	struct lp_build_context f32_bld;
		157	LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final;
		158
		159	lp_build_context_init(&f32_bld, gallivm, src_type);
		160
		161	src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one);
		162
		163	if (0) {
		164	/*
		165	* using int-to-float and float-to-int trick for pow().
		166	* This is much more accurate than necessary thanks to the correction,
		167	* but it most certainly makes no sense without rsqrt available.
		168	* Bonus points if you understand how this works...
		169	* All in all (including min/max clamp, conversion) 19 instructions.
		170	*/
		171
		172	float exp_f = 2.0f / 3.0f;
		173	/* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */
		174	float exp2f_c = 1.30438178253e+19f;
		175	float coeff_f = 0.62996f;
		176	LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2;
		177	struct lp_type int_type = lp_int_type(src_type);
		178
		179	/*
		180	* First calculate approx x^8/12
		181	*/
		182	exponent = lp_build_const_vec(gallivm, src_type, exp_f);
		183	coeff = lp_build_const_vec(gallivm, src_type,
		184	exp2f_c * powf(coeff_f, 1.0f / exp_f));
		185
		186	/* premultiply src */
		187	tmp = lp_build_mul(&f32_bld, coeff, src);
		188	/* "log2" */
		189	tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), "");
		190	tmp = lp_build_int_to_float(&f32_bld, tmp);
		191	/* multiply for pow */
		192	tmp = lp_build_mul(&f32_bld, tmp, exponent);
		193	/* "exp2" */
		194	pow_approx = lp_build_itrunc(&f32_bld, tmp);
		195	pow_approx = LLVMBuildBitCast(builder, pow_approx,
		196	lp_build_vec_type(gallivm, src_type), "");
		197
		198	/*
		199	* Since that pow was inaccurate (like 3 bits, though each sqrt step would
		200	* give another bit), compensate the error (which is why we chose another
		201	* exponent in the first place).
		202	*/
		203	/* x * x^(8/12) = x^(20/12) */
		204	pow_1 = lp_build_mul(&f32_bld, pow_approx, src);
		205
		206	/* x * x * x^(-4/12) = x^(20/12) */
		207	/* Should avoid using rsqrt if it's not available, but
		208	* using x * x^(4/12) * x^(4/12) instead will change error weight */
		209	tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx);
		210	x2 = lp_build_mul(&f32_bld, src, src);
		211	pow_2 = lp_build_mul(&f32_bld, x2, tmp);
		212
		213	/* average the values so the errors cancel out, compensate bias,
		214	* we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul
		215	* for conversion to int in here */
		216	tmp = lp_build_add(&f32_bld, pow_1, pow_2);
		217	coeff = lp_build_const_vec(gallivm, src_type,
		218	1.0f / (3.0f * coeff_f) * 0.999852f *
		219	powf(1.055f * 255.0f, 4.0f));
		220	pow_final = lp_build_mul(&f32_bld, tmp, coeff);
		221
		222	/* x^(5/12) = rsqrt(rsqrt(x^20/12)) */
		223	if (lp_build_fast_rsqrt_available(src_type)) {
		224	pow_final = lp_build_fast_rsqrt(&f32_bld,
		225	lp_build_fast_rsqrt(&f32_bld, pow_final));
		226	}
		227	else {
		228	pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final));
		229	}
		230	pow_final = lp_build_add(&f32_bld, pow_final,
		231	lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f));
		232	}
		233
		234	else {
		235	/*
		236	* using "rational polynomial" approximation here.
		237	* Essentially y = ax^0.375 + bx^0.5 + c, with also
		238	* factoring in the 255.0 mul and the scaling mul.
		239	* (a is closer to actual value so has higher weight than b.)
		240	* Note: the constants are magic values. They were found empirically,
		241	* possibly could be improved but good enough (be VERY careful with
		242	* error metric if you'd want to tweak them, they also MUST fit with
		243	* the crappy polynomial above for srgb->linear since it is required
		244	* that each srgb value maps back to the same value).
		245	* This function has an error of max +-0.17 (and we'd only require +-0.6),
		246	* for the approximated srgb->linear values the error is naturally larger
		247	* (+-0.42) but still accurate enough (required +-0.5 essentially).
		248	* All in all (including min/max clamp, conversion) 15 instructions.
		249	* FMA would help (minus 2 instructions).
		250	*/
		251
		252	LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2;
		253
		254	if (lp_build_fast_rsqrt_available(src_type)) {
		255	tmp = lp_build_fast_rsqrt(&f32_bld, src);
		256	x05 = lp_build_mul(&f32_bld, src, tmp);
		257	}
		258	else {
		259	/*
		260	* I don't really expect this to be practical without rsqrt
		261	* but there's no reason for triple punishment so at least
		262	* save the otherwise resulting division and unnecessary mul...
		263	*/
		264	x05 = lp_build_sqrt(&f32_bld, src);
		265	}
		266
		267	tmp = lp_build_mul(&f32_bld, x05, src);
		268	if (lp_build_fast_rsqrt_available(src_type)) {
		269	x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp));
		270	}
		271	else {
		272	x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp));
		273	}
		274
		275	a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f);
		276	b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f);
		277	c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);
		278
		279	tmp = lp_build_mul(&f32_bld, a_const, x0375);
		280	tmp2 = lp_build_mul(&f32_bld, b_const, x05);
		281	tmp2 = lp_build_add(&f32_bld, tmp2, c_const);
		282	pow_final = lp_build_add(&f32_bld, tmp, tmp2);
		283	}
		284
		285	/* linear part is easy */
		286	lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f);
		287	lin = lp_build_mul(&f32_bld, src, lin_const);
		288
		289	lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f);
		290	is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
		291	tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
		292
		293	f32_bld.type.sign = 0;
		294	return lp_build_iround(&f32_bld, tmp);
		295	}
		296
		297
		298	/**
		299	* Convert linear float soa values to packed srgb AoS values.
		300	* This only handles packed formats which are 4x8bit in size
		301	* (rgba and rgbx plus swizzles).
		302	*
		303	* @param src float SoA (vector) values to convert.
		304	*/
		305	LLVMValueRef
		306	lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
		307	const struct util_format_description *dst_fmt,
		308	struct lp_type src_type,
		309	LLVMValueRef *src)
		310	{
		311	LLVMBuilderRef builder = gallivm->builder;
		312	unsigned chan;
		313	struct lp_build_context f32_bld;
		314	struct lp_type int32_type = lp_int_type(src_type);
		315	LLVMValueRef tmpsrgb[4], alpha, dst;
		316
		317	lp_build_context_init(&f32_bld, gallivm, src_type);
		318
		319	/* rgb is subject to linear->srgb conversion, alpha is not */
		320	for (chan = 0; chan < 3; chan++) {
		321	tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
		322	}
		323	/*
		324	* can't use lp_build_conv since we want to keep values as 32bit
		325	* here so we can interleave with rgb to go from SoA->AoS.
		326	*/
		327	alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
		328	alpha = lp_build_mul(&f32_bld, alpha,
		329	lp_build_const_vec(gallivm, src_type, 255.0f));
		330	tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
		331
		332	dst = lp_build_zero(gallivm, int32_type);
		333	for (chan = 0; chan < dst_fmt->nr_channels; chan++) {
		334	if (dst_fmt->swizzle[chan] <= UTIL_FORMAT_SWIZZLE_W) {
		335	unsigned ls;
		336	LLVMValueRef shifted, shift_val;
		337	ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift;
		338	shift_val = lp_build_const_int_vec(gallivm, int32_type, ls);
		339	shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, "");
		340	dst = LLVMBuildOr(builder, dst, shifted, "");
		341	}
		342	}
		343	return dst;
		344	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-9.2.5/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c – Rev 5563