Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2013 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | |||
29 | /** |
||
30 | * @file |
||
31 | * Format conversion code for "special" float formats. |
||
32 | * |
||
33 | * @author Roland Scheidegger |
||
34 | */ |
||
35 | |||
36 | |||
37 | #include "util/u_debug.h" |
||
38 | |||
39 | #include "lp_bld_type.h" |
||
40 | #include "lp_bld_const.h" |
||
41 | #include "lp_bld_arit.h" |
||
42 | #include "lp_bld_bitarit.h" |
||
43 | #include "lp_bld_logic.h" |
||
44 | #include "lp_bld_format.h" |
||
45 | |||
46 | |||
47 | /** |
||
48 | * Convert float32 to a float-like value with less exponent and mantissa |
||
49 | * bits. The mantissa is still biased, and the mantissa still has an implied 1, |
||
50 | * and there may be a sign bit. |
||
51 | * |
||
52 | * @param src (vector) float value to convert |
||
53 | * @param mantissa_bits the number of mantissa bits |
||
54 | * @param exponent_bits the number of exponent bits |
||
55 | * @param mantissa_start the start position of the small float in result value |
||
56 | * @param has_sign if the small float has a sign bit |
||
57 | * |
||
58 | * This implements round-towards-zero (trunc) hence too large numbers get |
||
59 | * converted to largest representable number, not infinity. |
||
60 | * Small numbers may get converted to denorms, depending on normal |
||
61 | * float denorm handling of the cpu. |
||
62 | * Note that compared to the references, below, we skip any rounding bias |
||
63 | * since we do rounding towards zero - OpenGL allows rounding towards zero |
||
64 | * (though not preferred) and DX10 even seems to require it. |
||
65 | * Note that this will pack mantissa, exponent and sign bit (if any) together, |
||
66 | * and shift the result to mantissa_start. |
||
67 | * |
||
68 | * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ |
||
69 | * ref https://gist.github.com/rygorous/2156668 |
||
70 | */ |
||
71 | LLVMValueRef |
||
72 | lp_build_float_to_smallfloat(struct gallivm_state *gallivm, |
||
73 | struct lp_type i32_type, |
||
74 | LLVMValueRef src, |
||
75 | unsigned mantissa_bits, |
||
76 | unsigned exponent_bits, |
||
77 | unsigned mantissa_start, |
||
78 | boolean has_sign) |
||
79 | { |
||
80 | LLVMBuilderRef builder = gallivm->builder; |
||
81 | LLVMValueRef i32_floatexpmask, i32_smallexpmask, magic, normal; |
||
82 | LLVMValueRef rescale_src, i32_roundmask, small_max; |
||
83 | LLVMValueRef i32_qnanbit, shift, res; |
||
84 | LLVMValueRef is_nan_or_inf, nan_or_inf, mask, i32_src; |
||
85 | struct lp_type f32_type = lp_type_float_vec(32, 32 * i32_type.length); |
||
86 | struct lp_build_context f32_bld, i32_bld; |
||
87 | LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f); |
||
88 | unsigned exponent_start = mantissa_start + mantissa_bits; |
||
89 | boolean always_preserve_nans = true; |
||
90 | boolean maybe_correct_denorm_rounding = true; |
||
91 | |||
92 | lp_build_context_init(&f32_bld, gallivm, f32_type); |
||
93 | lp_build_context_init(&i32_bld, gallivm, i32_type); |
||
94 | |||
95 | i32_smallexpmask = lp_build_const_int_vec(gallivm, i32_type, |
||
96 | ((1 << exponent_bits) - 1) << 23); |
||
97 | i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); |
||
98 | |||
99 | i32_src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, ""); |
||
100 | |||
101 | if (has_sign) { |
||
102 | rescale_src = src; |
||
103 | } |
||
104 | else { |
||
105 | /* clamp to pos range (can still have sign bit if NaN or negative zero) */ |
||
106 | rescale_src = lp_build_max(&f32_bld, zero, src); |
||
107 | } |
||
108 | rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); |
||
109 | |||
110 | /* "ordinary" number */ |
||
111 | /* |
||
112 | * get rid of excess mantissa bits and sign bit |
||
113 | * This is only really needed for correct rounding of denorms I think |
||
114 | * but only if we use the preserve NaN path does using |
||
115 | * src_abs instead save us any instruction. |
||
116 | */ |
||
117 | if (maybe_correct_denorm_rounding || !always_preserve_nans) { |
||
118 | i32_roundmask = lp_build_const_int_vec(gallivm, i32_type, |
||
119 | ~((1 << (23 - mantissa_bits)) - 1) & |
||
120 | 0x7fffffff); |
||
121 | rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); |
||
122 | rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask); |
||
123 | rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, ""); |
||
124 | } |
||
125 | else { |
||
126 | rescale_src = lp_build_abs(&f32_bld, src); |
||
127 | } |
||
128 | |||
129 | /* bias exponent (and denormalize if necessary) */ |
||
130 | magic = lp_build_const_int_vec(gallivm, i32_type, |
||
131 | ((1 << (exponent_bits - 1)) - 1) << 23); |
||
132 | magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""); |
||
133 | normal = lp_build_mul(&f32_bld, rescale_src, magic); |
||
134 | |||
135 | /* clamp to max value - largest non-infinity number */ |
||
136 | small_max = lp_build_const_int_vec(gallivm, i32_type, |
||
137 | (((1 << exponent_bits) - 2) << 23) | |
||
138 | (((1 << mantissa_bits) - 1) << (23 - mantissa_bits))); |
||
139 | small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, ""); |
||
140 | normal = lp_build_min(&f32_bld, normal, small_max); |
||
141 | normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, ""); |
||
142 | |||
143 | /* |
||
144 | * handle nan/inf cases |
||
145 | * a little bit tricky since -Inf -> 0, +Inf -> +Inf, +-Nan -> +Nan |
||
146 | * (for no sign) else ->Inf -> ->Inf too. |
||
147 | * could use explicit "unordered" comparison checking for NaNs |
||
148 | * which might save us from calculating src_abs too. |
||
149 | * (Cannot actually save the comparison since we need to distinguish |
||
150 | * Inf and NaN cases anyway, but it would be better for AVX.) |
||
151 | */ |
||
152 | if (always_preserve_nans) { |
||
153 | LLVMValueRef infcheck_src, is_inf, is_nan; |
||
154 | LLVMValueRef src_abs = lp_build_abs(&f32_bld, src); |
||
155 | src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, ""); |
||
156 | |||
157 | if (has_sign) { |
||
158 | infcheck_src = src_abs; |
||
159 | } |
||
160 | else { |
||
161 | infcheck_src = i32_src; |
||
162 | } |
||
163 | is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, |
||
164 | src_abs, i32_floatexpmask); |
||
165 | is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL, |
||
166 | infcheck_src, i32_floatexpmask); |
||
167 | is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf); |
||
168 | /* could also set more mantissa bits but need at least the highest mantissa bit */ |
||
169 | i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22); |
||
170 | /* combine maxexp with qnanbit */ |
||
171 | nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask, |
||
172 | lp_build_and(&i32_bld, is_nan, i32_qnanbit)); |
||
173 | } |
||
174 | else { |
||
175 | /* |
||
176 | * A couple simplifications, with mostly 2 drawbacks (so disabled): |
||
177 | * - it will promote some SNaNs (those which only had bits set |
||
178 | * in the mantissa part which got chopped off) to +-Infinity. |
||
179 | * (Those bits get chopped off anyway later so can as well use |
||
180 | * rescale_src instead of src_abs here saving the calculation of that.) |
||
181 | * - for no sign case, it relies on the max() being used for rescale_src |
||
182 | * to give back the NaN (which is NOT ieee754r behavior, but should work |
||
183 | * with sse2 on a full moon (rather if I got the operand order right) - |
||
184 | * we _don't_ have well-defined behavior specified with min/max wrt NaNs, |
||
185 | * however, and if it gets converted to cmp/select it may not work (we |
||
186 | * don't really have specified behavior for cmp wrt NaNs neither). |
||
187 | */ |
||
188 | rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, ""); |
||
189 | is_nan_or_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GEQUAL, |
||
190 | rescale_src, i32_floatexpmask); |
||
191 | /* note this will introduce excess exponent bits */ |
||
192 | nan_or_inf = rescale_src; |
||
193 | } |
||
194 | res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal); |
||
195 | |||
196 | if (mantissa_start > 0 || !always_preserve_nans) { |
||
197 | /* mask off excess bits */ |
||
198 | unsigned maskbits = (1 << (mantissa_bits + exponent_bits)) - 1; |
||
199 | mask = lp_build_const_int_vec(gallivm, i32_type, |
||
200 | maskbits << (23 - mantissa_bits)); |
||
201 | res = lp_build_and(&i32_bld, res, mask); |
||
202 | } |
||
203 | |||
204 | /* add back sign bit at right position */ |
||
205 | if (has_sign) { |
||
206 | LLVMValueRef sign; |
||
207 | struct lp_type u32_type = lp_type_uint_vec(32, 32 * i32_type.length); |
||
208 | struct lp_build_context u32_bld; |
||
209 | lp_build_context_init(&u32_bld, gallivm, u32_type); |
||
210 | |||
211 | mask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000); |
||
212 | shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits); |
||
213 | sign = lp_build_and(&i32_bld, mask, i32_src); |
||
214 | sign = lp_build_shr(&u32_bld, sign, shift); |
||
215 | res = lp_build_or(&i32_bld, sign, res); |
||
216 | } |
||
217 | |||
218 | /* shift to final position */ |
||
219 | if (exponent_start < 23) { |
||
220 | shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start); |
||
221 | res = lp_build_shr(&i32_bld, res, shift); |
||
222 | } |
||
223 | else { |
||
224 | shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23); |
||
225 | res = lp_build_shl(&i32_bld, res, shift); |
||
226 | } |
||
227 | return res; |
||
228 | } |
||
229 | |||
230 | |||
231 | /** |
||
232 | * Convert rgba float SoA values to packed r11g11b10 values. |
||
233 | * |
||
234 | * @param src SoA float (vector) values to convert. |
||
235 | */ |
||
236 | LLVMValueRef |
||
237 | lp_build_float_to_r11g11b10(struct gallivm_state *gallivm, |
||
238 | LLVMValueRef *src) |
||
239 | { |
||
240 | LLVMValueRef dst, rcomp, bcomp, gcomp; |
||
241 | struct lp_build_context i32_bld; |
||
242 | LLVMTypeRef src_type = LLVMTypeOf(*src); |
||
243 | unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ? |
||
244 | LLVMGetVectorSize(src_type) : 1; |
||
245 | struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length); |
||
246 | |||
247 | lp_build_context_init(&i32_bld, gallivm, i32_type); |
||
248 | |||
249 | /* "rescale" and put in right position */ |
||
250 | rcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[0], 6, 5, 0, false); |
||
251 | gcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[1], 6, 5, 11, false); |
||
252 | bcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[2], 5, 5, 22, false); |
||
253 | |||
254 | /* combine the values */ |
||
255 | dst = lp_build_or(&i32_bld, rcomp, gcomp); |
||
256 | return lp_build_or(&i32_bld, dst, bcomp); |
||
257 | } |
||
258 | |||
259 | |||
260 | /** |
||
261 | * Convert a float-like value with less exponent and mantissa |
||
262 | * bits than a normal float32 to a float32. The mantissa of |
||
263 | * the source value is assumed to have an implied 1, and the exponent |
||
264 | * is biased. There may be a sign bit. |
||
265 | * The source value to extract must be in a 32bit int (bits not part of |
||
266 | * the value to convert will be masked off). |
||
267 | * This works for things like 11-bit floats or half-floats, |
||
268 | * mantissa, exponent (and sign if present) must be packed |
||
269 | * the same as they are in a ordinary float. |
||
270 | * |
||
271 | * @param src (vector) value to convert |
||
272 | * @param mantissa_bits the number of mantissa bits |
||
273 | * @param exponent_bits the number of exponent bits |
||
274 | * @param mantissa_start the bit start position of the packed component |
||
275 | * @param has_sign if the small float has a sign bit |
||
276 | * |
||
277 | * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ |
||
278 | * ref https://gist.github.com/rygorous/2156668 |
||
279 | */ |
||
280 | LLVMValueRef |
||
281 | lp_build_smallfloat_to_float(struct gallivm_state *gallivm, |
||
282 | struct lp_type f32_type, |
||
283 | LLVMValueRef src, |
||
284 | unsigned mantissa_bits, |
||
285 | unsigned exponent_bits, |
||
286 | unsigned mantissa_start, |
||
287 | boolean has_sign) |
||
288 | { |
||
289 | LLVMBuilderRef builder = gallivm->builder; |
||
290 | LLVMValueRef smallexpmask, i32_floatexpmask, magic; |
||
291 | LLVMValueRef wasinfnan, tmp, res, shift, maskabs, srcabs, sign; |
||
292 | unsigned exponent_start = mantissa_start + mantissa_bits; |
||
293 | struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length); |
||
294 | struct lp_build_context f32_bld, i32_bld; |
||
295 | |||
296 | lp_build_context_init(&f32_bld, gallivm, f32_type); |
||
297 | lp_build_context_init(&i32_bld, gallivm, i32_type); |
||
298 | |||
299 | /* extract the component to "float position" */ |
||
300 | if (exponent_start < 23) { |
||
301 | shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start); |
||
302 | src = lp_build_shl(&i32_bld, src, shift); |
||
303 | } |
||
304 | else { |
||
305 | shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23); |
||
306 | src = lp_build_shr(&i32_bld, src, shift); |
||
307 | } |
||
308 | maskabs = lp_build_const_int_vec(gallivm, i32_type, |
||
309 | ((1 << (mantissa_bits + exponent_bits)) - 1) |
||
310 | << (23 - mantissa_bits)); |
||
311 | srcabs = lp_build_and(&i32_bld, src, maskabs); |
||
312 | |||
313 | /* now do the actual scaling */ |
||
314 | smallexpmask = lp_build_const_int_vec(gallivm, i32_type, |
||
315 | ((1 << exponent_bits) - 1) << 23); |
||
316 | i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23); |
||
317 | |||
318 | if (0) { |
||
319 | /* |
||
320 | * Note that this code path, while simpler, will convert small |
||
321 | * float denorms to floats according to current cpu denorm mode, if |
||
322 | * denorms are disabled it will flush them to zero! |
||
323 | * If cpu denorms are enabled, it should be faster though as long as |
||
324 | * there's no denorms in the inputs, but if there are actually denorms |
||
325 | * it's likely to be an order of magnitude slower (on x86 cpus). |
||
326 | */ |
||
327 | |||
328 | srcabs = LLVMBuildBitCast(builder, srcabs, f32_bld.vec_type, ""); |
||
329 | |||
330 | /* |
||
331 | * magic number has exponent new exp bias + (new exp bias - old exp bias), |
||
332 | * mantissa is 0. |
||
333 | */ |
||
334 | magic = lp_build_const_int_vec(gallivm, i32_type, |
||
335 | (255 - (1 << (exponent_bits - 1))) << 23); |
||
336 | magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""); |
||
337 | |||
338 | /* adjust exponent and fix denorms */ |
||
339 | res = lp_build_mul(&f32_bld, srcabs, magic); |
||
340 | |||
341 | /* |
||
342 | * if exp was max (== NaN or Inf) set new exp to max (keep mantissa), |
||
343 | * so a simple "or" will do (because exp adjust will leave mantissa intact) |
||
344 | */ |
||
345 | /* use float compare (better for AVX 8-wide / no AVX2 but else should use int) */ |
||
346 | smallexpmask = LLVMBuildBitCast(builder, smallexpmask, f32_bld.vec_type, ""); |
||
347 | wasinfnan = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL, srcabs, smallexpmask); |
||
348 | res = LLVMBuildBitCast(builder, res, i32_bld.vec_type, ""); |
||
349 | tmp = lp_build_and(&i32_bld, i32_floatexpmask, wasinfnan); |
||
350 | res = lp_build_or(&i32_bld, tmp, res); |
||
351 | } |
||
352 | |||
353 | else { |
||
354 | LLVMValueRef exp_one, isdenorm, denorm, normal, exp_adj; |
||
355 | |||
356 | /* denorm (or zero) if exponent is zero */ |
||
357 | exp_one = lp_build_const_int_vec(gallivm, i32_type, 1 << 23); |
||
358 | isdenorm = lp_build_cmp(&i32_bld, PIPE_FUNC_LESS, srcabs, exp_one); |
||
359 | |||
360 | /* inf or nan if exponent is max */ |
||
361 | wasinfnan = lp_build_cmp(&i32_bld, PIPE_FUNC_GEQUAL, srcabs, smallexpmask); |
||
362 | |||
363 | /* for denormal (or zero), add (== or) magic exp to mantissa (== srcabs) (as int) |
||
364 | * then subtract it (as float). |
||
365 | * Another option would be to just do inttofp then do a rescale mul. |
||
366 | */ |
||
367 | magic = lp_build_const_int_vec(gallivm, i32_type, |
||
368 | (127 - ((1 << (exponent_bits - 1)) - 2)) << 23); |
||
369 | denorm = lp_build_or(&i32_bld, srcabs, magic); |
||
370 | denorm = LLVMBuildBitCast(builder, denorm, f32_bld.vec_type, ""); |
||
371 | denorm = lp_build_sub(&f32_bld, denorm, |
||
372 | LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "")); |
||
373 | denorm = LLVMBuildBitCast(builder, denorm, i32_bld.vec_type, ""); |
||
374 | |||
375 | /* for normals, Infs, Nans fix up exponent */ |
||
376 | exp_adj = lp_build_const_int_vec(gallivm, i32_type, |
||
377 | (127 - ((1 << (exponent_bits - 1)) - 1)) << 23); |
||
378 | normal = lp_build_add(&i32_bld, srcabs, exp_adj); |
||
379 | tmp = lp_build_and(&i32_bld, wasinfnan, i32_floatexpmask); |
||
380 | normal = lp_build_or(&i32_bld, tmp, normal); |
||
381 | |||
382 | res = lp_build_select(&i32_bld, isdenorm, denorm, normal); |
||
383 | } |
||
384 | |||
385 | if (has_sign) { |
||
386 | LLVMValueRef signmask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000); |
||
387 | shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits); |
||
388 | sign = lp_build_shl(&i32_bld, src, shift); |
||
389 | sign = lp_build_and(&i32_bld, signmask, sign); |
||
390 | res = lp_build_or(&i32_bld, res, sign); |
||
391 | } |
||
392 | |||
393 | return LLVMBuildBitCast(builder, res, f32_bld.vec_type, ""); |
||
394 | } |
||
395 | |||
396 | |||
397 | /** |
||
398 | * Convert packed float format (r11g11b10) value(s) to rgba float SoA values. |
||
399 | * |
||
400 | * @param src packed AoS r11g11b10 values (as (vector) int32) |
||
401 | * @param dst pointer to the SoA result values |
||
402 | */ |
||
403 | void |
||
404 | lp_build_r11g11b10_to_float(struct gallivm_state *gallivm, |
||
405 | LLVMValueRef src, |
||
406 | LLVMValueRef *dst) |
||
407 | { |
||
408 | LLVMTypeRef src_type = LLVMTypeOf(src); |
||
409 | unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ? |
||
410 | LLVMGetVectorSize(src_type) : 1; |
||
411 | struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length); |
||
412 | |||
413 | dst[0] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 0, false); |
||
414 | dst[1] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 11, false); |
||
415 | dst[2] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 5, 5, 22, false); |
||
416 | |||
417 | /* Just set alpha to one */ |
||
418 | dst[3] = lp_build_one(gallivm, f32_type); |
||
419 | } |
||
420 | |||
421 | |||
422 | static LLVMValueRef |
||
423 | lp_build_rgb9_to_float_helper(struct gallivm_state *gallivm, |
||
424 | struct lp_type f32_type, |
||
425 | LLVMValueRef src, |
||
426 | LLVMValueRef scale, |
||
427 | unsigned mantissa_start) |
||
428 | { |
||
429 | LLVMValueRef shift, mask; |
||
430 | |||
431 | struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length); |
||
432 | struct lp_build_context i32_bld, f32_bld; |
||
433 | |||
434 | lp_build_context_init(&i32_bld, gallivm, i32_type); |
||
435 | lp_build_context_init(&f32_bld, gallivm, f32_type); |
||
436 | |||
437 | /* |
||
438 | * This is much easier as other weirdo float formats, since |
||
439 | * there's no sign, no Inf/NaN, and there's nothing special |
||
440 | * required for normals/denormals neither (as without the implied one |
||
441 | * for the mantissa for other formats, everything looks like a denormal). |
||
442 | * So just do (float)comp_bits * scale |
||
443 | */ |
||
444 | shift = lp_build_const_int_vec(gallivm, i32_type, mantissa_start); |
||
445 | mask = lp_build_const_int_vec(gallivm, i32_type, 0x1ff); |
||
446 | src = lp_build_shr(&i32_bld, src, shift); |
||
447 | src = lp_build_and(&i32_bld, src, mask); |
||
448 | src = lp_build_int_to_float(&f32_bld, src); |
||
449 | return lp_build_mul(&f32_bld, src, scale); |
||
450 | } |
||
451 | |||
452 | |||
453 | /** |
||
454 | * Convert shared exponent format (rgb9e5) value(s) to rgba float SoA values. |
||
455 | * |
||
456 | * @param src packed AoS rgb9e5 values (as (vector) int32) |
||
457 | * @param dst pointer to the SoA result values |
||
458 | */ |
||
459 | void |
||
460 | lp_build_rgb9e5_to_float(struct gallivm_state *gallivm, |
||
461 | LLVMValueRef src, |
||
462 | LLVMValueRef *dst) |
||
463 | { |
||
464 | LLVMBuilderRef builder = gallivm->builder; |
||
465 | LLVMTypeRef src_type = LLVMTypeOf(src); |
||
466 | LLVMValueRef shift, scale, bias, exp; |
||
467 | unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ? |
||
468 | LLVMGetVectorSize(src_type) : 1; |
||
469 | struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length); |
||
470 | struct lp_type u32_type = lp_type_uint_vec(32, 32 * src_length); |
||
471 | struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length); |
||
472 | struct lp_build_context i32_bld, u32_bld, f32_bld; |
||
473 | |||
474 | lp_build_context_init(&i32_bld, gallivm, i32_type); |
||
475 | lp_build_context_init(&u32_bld, gallivm, u32_type); |
||
476 | lp_build_context_init(&f32_bld, gallivm, f32_type); |
||
477 | |||
478 | /* extract exponent */ |
||
479 | shift = lp_build_const_int_vec(gallivm, i32_type, 27); |
||
480 | /* this shift needs to be unsigned otherwise need mask */ |
||
481 | exp = lp_build_shr(&u32_bld, src, shift); |
||
482 | |||
483 | /* |
||
484 | * scale factor is 2 ^ (exp - bias) |
||
485 | * (and additionally corrected here for the mantissa bits) |
||
486 | * not using shift because |
||
487 | * a) don't have vector shift in a lot of cases |
||
488 | * b) shift direction changes hence need 2 shifts + conditional |
||
489 | * (or rotate instruction which is even more rare (for instance XOP)) |
||
490 | * so use whacky float 2 ^ function instead manipulating exponent |
||
491 | * (saves us the float conversion at the end too) |
||
492 | */ |
||
493 | bias = lp_build_const_int_vec(gallivm, i32_type, 127 - (15 + 9)); |
||
494 | scale = lp_build_add(&i32_bld, exp, bias); |
||
495 | shift = lp_build_const_int_vec(gallivm, i32_type, 23); |
||
496 | scale = lp_build_shl(&i32_bld, scale, shift); |
||
497 | scale = LLVMBuildBitCast(builder, scale, f32_bld.vec_type, ""); |
||
498 | |||
499 | dst[0] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 0); |
||
500 | dst[1] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 9); |
||
501 | dst[2] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 18); |
||
502 | |||
503 | /* Just set alpha to one */ |
||
504 | dst[3] = f32_bld.one; |
||
505 | }><>><>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><> |