Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2013 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28.  
  29. /**
  30.  * @file
  31.  * Format conversion code for "special" float formats.
  32.  *
  33.  * @author Roland Scheidegger <sroland@vmware.com>
  34.  */
  35.  
  36.  
  37. #include "util/u_debug.h"
  38.  
  39. #include "lp_bld_type.h"
  40. #include "lp_bld_const.h"
  41. #include "lp_bld_arit.h"
  42. #include "lp_bld_bitarit.h"
  43. #include "lp_bld_logic.h"
  44. #include "lp_bld_format.h"
  45.  
  46.  
  47. /**
  48.  * Convert float32 to a float-like value with less exponent and mantissa
  49.  * bits. The mantissa is still biased, and the mantissa still has an implied 1,
  50.  * and there may be a sign bit.
  51.  *
  52.  * @param src             (vector) float value to convert
  53.  * @param mantissa_bits   the number of mantissa bits
  54.  * @param exponent_bits   the number of exponent bits
  55.  * @param mantissa_start  the start position of the small float in result value
  56.  * @param has_sign        if the small float has a sign bit
  57.  *
  58.  * This implements round-towards-zero (trunc) hence too large numbers get
  59.  * converted to largest representable number, not infinity.
  60.  * Small numbers may get converted to denorms, depending on normal
  61.  * float denorm handling of the cpu.
  62.  * Note that compared to the references, below, we skip any rounding bias
  63.  * since we do rounding towards zero - OpenGL allows rounding towards zero
  64.  * (though not preferred) and DX10 even seems to require it.
  65.  * Note that this will pack mantissa, exponent and sign bit (if any) together,
  66.  * and shift the result to mantissa_start.
  67.  *
  68.  * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
  69.  * ref https://gist.github.com/rygorous/2156668
  70.  */
  71. LLVMValueRef
  72. lp_build_float_to_smallfloat(struct gallivm_state *gallivm,
  73.                              struct lp_type i32_type,
  74.                              LLVMValueRef src,
  75.                              unsigned mantissa_bits,
  76.                              unsigned exponent_bits,
  77.                              unsigned mantissa_start,
  78.                              boolean has_sign)
  79. {
  80.    LLVMBuilderRef builder = gallivm->builder;
  81.    LLVMValueRef i32_floatexpmask, i32_smallexpmask, magic, normal;
  82.    LLVMValueRef rescale_src, i32_roundmask, small_max;
  83.    LLVMValueRef i32_qnanbit, shift, res;
  84.    LLVMValueRef is_nan_or_inf, nan_or_inf, mask, i32_src;
  85.    struct lp_type f32_type = lp_type_float_vec(32, 32 * i32_type.length);
  86.    struct lp_build_context f32_bld, i32_bld;
  87.    LLVMValueRef zero = lp_build_const_vec(gallivm, f32_type, 0.0f);
  88.    unsigned exponent_start = mantissa_start + mantissa_bits;
  89.    boolean always_preserve_nans = true;
  90.    boolean maybe_correct_denorm_rounding = true;
  91.  
  92.    lp_build_context_init(&f32_bld, gallivm, f32_type);
  93.    lp_build_context_init(&i32_bld, gallivm, i32_type);
  94.  
  95.    i32_smallexpmask = lp_build_const_int_vec(gallivm, i32_type,
  96.                                              ((1 << exponent_bits) - 1) << 23);
  97.    i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
  98.  
  99.    i32_src = LLVMBuildBitCast(builder, src, i32_bld.vec_type, "");
  100.  
  101.    if (has_sign) {
  102.       rescale_src = src;
  103.    }
  104.    else {
  105.       /* clamp to pos range (can still have sign bit if NaN or negative zero) */
  106.       rescale_src = lp_build_max(&f32_bld, zero, src);
  107.    }
  108.    rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
  109.  
  110.    /* "ordinary" number */
  111.    /*
  112.     * get rid of excess mantissa bits and sign bit
  113.     * This is only really needed for correct rounding of denorms I think
  114.     * but only if we use the preserve NaN path does using
  115.     * src_abs instead save us any instruction.
  116.     */
  117.    if (maybe_correct_denorm_rounding || !always_preserve_nans) {
  118.       i32_roundmask = lp_build_const_int_vec(gallivm, i32_type,
  119.                                              ~((1 << (23 - mantissa_bits)) - 1) &
  120.                                              0x7fffffff);
  121.       rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
  122.       rescale_src = lp_build_and(&i32_bld, rescale_src, i32_roundmask);
  123.       rescale_src = LLVMBuildBitCast(builder, rescale_src, f32_bld.vec_type, "");
  124.    }
  125.    else {
  126.       rescale_src = lp_build_abs(&f32_bld, src);
  127.    }
  128.  
  129.    /* bias exponent (and denormalize if necessary) */
  130.    magic = lp_build_const_int_vec(gallivm, i32_type,
  131.                                   ((1 << (exponent_bits - 1)) - 1) << 23);
  132.    magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
  133.    normal = lp_build_mul(&f32_bld, rescale_src, magic);
  134.  
  135.    /* clamp to max value - largest non-infinity number */
  136.    small_max = lp_build_const_int_vec(gallivm, i32_type,
  137.                                       (((1 << exponent_bits) - 2) << 23) |
  138.                                       (((1 << mantissa_bits) - 1) << (23 - mantissa_bits)));
  139.    small_max = LLVMBuildBitCast(builder, small_max, f32_bld.vec_type, "");
  140.    normal = lp_build_min(&f32_bld, normal, small_max);
  141.    normal = LLVMBuildBitCast(builder, normal, i32_bld.vec_type, "");
  142.  
  143.    /*
  144.     * handle nan/inf cases
  145.     * a little bit tricky since -Inf -> 0, +Inf -> +Inf, +-Nan -> +Nan
  146.     * (for no sign) else ->Inf -> ->Inf too.
  147.     * could use explicit "unordered" comparison checking for NaNs
  148.     * which might save us from calculating src_abs too.
  149.     * (Cannot actually save the comparison since we need to distinguish
  150.     * Inf and NaN cases anyway, but it would be better for AVX.)
  151.     */
  152.    if (always_preserve_nans) {
  153.       LLVMValueRef infcheck_src, is_inf, is_nan;
  154.       LLVMValueRef src_abs = lp_build_abs(&f32_bld, src);
  155.       src_abs = LLVMBuildBitCast(builder, src_abs, i32_bld.vec_type, "");
  156.  
  157.       if (has_sign) {
  158.          infcheck_src = src_abs;
  159.       }
  160.       else {
  161.          infcheck_src = i32_src;
  162.       }
  163.       is_nan = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER,
  164.                                 src_abs, i32_floatexpmask);
  165.       is_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_EQUAL,
  166.                                 infcheck_src, i32_floatexpmask);
  167.       is_nan_or_inf = lp_build_or(&i32_bld, is_nan, is_inf);
  168.       /* could also set more mantissa bits but need at least the highest mantissa bit */
  169.       i32_qnanbit = lp_build_const_vec(gallivm, i32_type, 1 << 22);
  170.       /* combine maxexp with qnanbit */
  171.       nan_or_inf = lp_build_or(&i32_bld, i32_smallexpmask,
  172.                                lp_build_and(&i32_bld, is_nan, i32_qnanbit));
  173.    }
  174.    else {
  175.       /*
  176.        * A couple simplifications, with mostly 2 drawbacks (so disabled):
  177.        * - it will promote some SNaNs (those which only had bits set
  178.        * in the mantissa part which got chopped off) to +-Infinity.
  179.        * (Those bits get chopped off anyway later so can as well use
  180.        * rescale_src instead of src_abs here saving the calculation of that.)
  181.        * - for no sign case, it relies on the max() being used for rescale_src
  182.        * to give back the NaN (which is NOT ieee754r behavior, but should work
  183.        * with sse2 on a full moon (rather if I got the operand order right) -
  184.        * we _don't_ have well-defined behavior specified with min/max wrt NaNs,
  185.        * however, and if it gets converted to cmp/select it may not work (we
  186.        * don't really have specified behavior for cmp wrt NaNs neither).
  187.        */
  188.       rescale_src = LLVMBuildBitCast(builder, rescale_src, i32_bld.vec_type, "");
  189.       is_nan_or_inf = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GEQUAL,
  190.                                        rescale_src, i32_floatexpmask);
  191.       /* note this will introduce excess exponent bits */
  192.       nan_or_inf = rescale_src;
  193.    }
  194.    res = lp_build_select(&i32_bld, is_nan_or_inf, nan_or_inf, normal);
  195.  
  196.    if (mantissa_start > 0 || !always_preserve_nans) {
  197.       /* mask off excess bits */
  198.       unsigned maskbits = (1 << (mantissa_bits + exponent_bits)) - 1;
  199.       mask = lp_build_const_int_vec(gallivm, i32_type,
  200.                                     maskbits << (23 - mantissa_bits));
  201.       res = lp_build_and(&i32_bld, res, mask);
  202.    }
  203.  
  204.    /* add back sign bit at right position */
  205.    if (has_sign) {
  206.       LLVMValueRef sign;
  207.       struct lp_type u32_type = lp_type_uint_vec(32, 32 * i32_type.length);
  208.       struct lp_build_context u32_bld;
  209.       lp_build_context_init(&u32_bld, gallivm, u32_type);
  210.  
  211.       mask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
  212.       shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
  213.       sign = lp_build_and(&i32_bld, mask, i32_src);
  214.       sign = lp_build_shr(&u32_bld, sign, shift);
  215.       res = lp_build_or(&i32_bld, sign, res);
  216.    }
  217.  
  218.    /* shift to final position */
  219.    if (exponent_start < 23) {
  220.       shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
  221.       res = lp_build_shr(&i32_bld, res, shift);
  222.    }
  223.    else {
  224.       shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
  225.       res = lp_build_shl(&i32_bld, res, shift);
  226.    }
  227.    return res;
  228. }
  229.  
  230.  
  231. /**
  232.  * Convert rgba float SoA values to packed r11g11b10 values.
  233.  *
  234.  * @param src   SoA float (vector) values to convert.
  235.  */
  236. LLVMValueRef
  237. lp_build_float_to_r11g11b10(struct gallivm_state *gallivm,
  238.                             LLVMValueRef *src)
  239. {
  240.    LLVMValueRef dst, rcomp, bcomp, gcomp;
  241.    struct lp_build_context i32_bld;
  242.    LLVMTypeRef src_type = LLVMTypeOf(*src);
  243.    unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
  244.                             LLVMGetVectorSize(src_type) : 1;
  245.    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
  246.  
  247.    lp_build_context_init(&i32_bld, gallivm, i32_type);
  248.  
  249.    /* "rescale" and put in right position */
  250.    rcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[0], 6, 5, 0, false);
  251.    gcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[1], 6, 5, 11, false);
  252.    bcomp = lp_build_float_to_smallfloat(gallivm, i32_type, src[2], 5, 5, 22, false);
  253.  
  254.    /* combine the values */
  255.    dst = lp_build_or(&i32_bld, rcomp, gcomp);
  256.    return lp_build_or(&i32_bld, dst, bcomp);
  257. }
  258.  
  259.  
  260. /**
  261.  * Convert a float-like value with less exponent and mantissa
  262.  * bits than a normal float32 to a float32. The mantissa of
  263.  * the source value is assumed to have an implied 1, and the exponent
  264.  * is biased. There may be a sign bit.
  265.  * The source value to extract must be in a 32bit int (bits not part of
  266.  * the value to convert will be masked off).
  267.  * This works for things like 11-bit floats or half-floats,
  268.  * mantissa, exponent (and sign if present) must be packed
  269.  * the same as they are in a ordinary float.
  270.  *
  271.  * @param src             (vector) value to convert
  272.  * @param mantissa_bits   the number of mantissa bits
  273.  * @param exponent_bits   the number of exponent bits
  274.  * @param mantissa_start  the bit start position of the packed component
  275.  * @param has_sign        if the small float has a sign bit
  276.  *
  277.  * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
  278.  * ref https://gist.github.com/rygorous/2156668
  279.  */
  280. LLVMValueRef
  281. lp_build_smallfloat_to_float(struct gallivm_state *gallivm,
  282.                              struct lp_type f32_type,
  283.                              LLVMValueRef src,
  284.                              unsigned mantissa_bits,
  285.                              unsigned exponent_bits,
  286.                              unsigned mantissa_start,
  287.                              boolean has_sign)
  288. {
  289.    LLVMBuilderRef builder = gallivm->builder;
  290.    LLVMValueRef smallexpmask, i32_floatexpmask, magic;
  291.    LLVMValueRef wasinfnan, tmp, res, shift, maskabs, srcabs, sign;
  292.    unsigned exponent_start = mantissa_start + mantissa_bits;
  293.    struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
  294.    struct lp_build_context f32_bld, i32_bld;
  295.  
  296.    lp_build_context_init(&f32_bld, gallivm, f32_type);
  297.    lp_build_context_init(&i32_bld, gallivm, i32_type);
  298.  
  299.    /* extract the component to "float position" */
  300.    if (exponent_start < 23) {
  301.       shift = lp_build_const_int_vec(gallivm, i32_type, 23 - exponent_start);
  302.       src = lp_build_shl(&i32_bld, src, shift);
  303.    }
  304.    else {
  305.       shift = lp_build_const_int_vec(gallivm, i32_type, exponent_start - 23);
  306.       src = lp_build_shr(&i32_bld, src, shift);
  307.    }
  308.    maskabs = lp_build_const_int_vec(gallivm, i32_type,
  309.                                     ((1 << (mantissa_bits + exponent_bits)) - 1)
  310.                                     << (23 - mantissa_bits));
  311.    srcabs = lp_build_and(&i32_bld, src, maskabs);
  312.  
  313.    /* now do the actual scaling */
  314.    smallexpmask = lp_build_const_int_vec(gallivm, i32_type,
  315.                                          ((1 << exponent_bits) - 1) << 23);
  316.    i32_floatexpmask = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
  317.  
  318.    if (0) {
  319.      /*
  320.       * Note that this code path, while simpler, will convert small
  321.       * float denorms to floats according to current cpu denorm mode, if
  322.       * denorms are disabled it will flush them to zero!
  323.       * If cpu denorms are enabled, it should be faster though as long as
  324.       * there's no denorms in the inputs, but if there are actually denorms
  325.       * it's likely to be an order of magnitude slower (on x86 cpus).
  326.       */
  327.  
  328.       srcabs = LLVMBuildBitCast(builder, srcabs, f32_bld.vec_type, "");
  329.  
  330.       /*
  331.        * magic number has exponent new exp bias + (new exp bias - old exp bias),
  332.        * mantissa is 0.
  333.        */
  334.       magic = lp_build_const_int_vec(gallivm, i32_type,
  335.                                      (255 - (1 << (exponent_bits - 1))) << 23);
  336.       magic = LLVMBuildBitCast(builder, magic, f32_bld.vec_type, "");
  337.  
  338.       /* adjust exponent and fix denorms */
  339.       res = lp_build_mul(&f32_bld, srcabs, magic);
  340.  
  341.       /*
  342.        * if exp was max (== NaN or Inf) set new exp to max (keep mantissa),
  343.        * so a simple "or" will do (because exp adjust will leave mantissa intact)
  344.        */
  345.       /* use float compare (better for AVX 8-wide / no AVX2 but else should use int) */
  346.       smallexpmask = LLVMBuildBitCast(builder, smallexpmask, f32_bld.vec_type, "");
  347.       wasinfnan = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GEQUAL, srcabs, smallexpmask);
  348.       res = LLVMBuildBitCast(builder, res, i32_bld.vec_type, "");
  349.       tmp = lp_build_and(&i32_bld, i32_floatexpmask, wasinfnan);
  350.       res = lp_build_or(&i32_bld, tmp, res);
  351.    }
  352.  
  353.    else {
  354.       LLVMValueRef exp_one, isdenorm, denorm, normal, exp_adj;
  355.  
  356.       /* denorm (or zero) if exponent is zero */
  357.       exp_one = lp_build_const_int_vec(gallivm, i32_type, 1 << 23);
  358.       isdenorm = lp_build_cmp(&i32_bld, PIPE_FUNC_LESS, srcabs, exp_one);
  359.  
  360.       /* inf or nan if exponent is max */
  361.       wasinfnan = lp_build_cmp(&i32_bld, PIPE_FUNC_GEQUAL, srcabs, smallexpmask);
  362.  
  363.       /* for denormal (or zero), add (== or) magic exp to mantissa (== srcabs) (as int)
  364.        * then subtract it (as float).
  365.        * Another option would be to just do inttofp then do a rescale mul.
  366.        */
  367.       magic = lp_build_const_int_vec(gallivm, i32_type,
  368.                                      (127 - ((1 << (exponent_bits - 1)) - 2)) << 23);
  369.       denorm = lp_build_or(&i32_bld, srcabs, magic);
  370.       denorm = LLVMBuildBitCast(builder, denorm, f32_bld.vec_type, "");
  371.       denorm = lp_build_sub(&f32_bld, denorm,
  372.                             LLVMBuildBitCast(builder, magic, f32_bld.vec_type, ""));
  373.       denorm = LLVMBuildBitCast(builder, denorm, i32_bld.vec_type, "");
  374.  
  375.       /* for normals, Infs, Nans fix up exponent */
  376.       exp_adj = lp_build_const_int_vec(gallivm, i32_type,
  377.                                       (127 - ((1 << (exponent_bits - 1)) - 1)) << 23);
  378.       normal = lp_build_add(&i32_bld, srcabs, exp_adj);
  379.       tmp = lp_build_and(&i32_bld, wasinfnan, i32_floatexpmask);
  380.       normal = lp_build_or(&i32_bld, tmp, normal);
  381.  
  382.       res = lp_build_select(&i32_bld, isdenorm, denorm, normal);
  383.    }
  384.  
  385.    if (has_sign) {
  386.       LLVMValueRef signmask = lp_build_const_int_vec(gallivm, i32_type, 0x80000000);
  387.       shift = lp_build_const_int_vec(gallivm, i32_type, 8 - exponent_bits);
  388.       sign = lp_build_shl(&i32_bld, src, shift);
  389.       sign = lp_build_and(&i32_bld, signmask, sign);
  390.       res = lp_build_or(&i32_bld, res, sign);
  391.    }
  392.  
  393.    return LLVMBuildBitCast(builder, res, f32_bld.vec_type, "");
  394. }
  395.  
  396.  
  397. /**
  398.  * Convert packed float format (r11g11b10) value(s) to rgba float SoA values.
  399.  *
  400.  * @param src   packed AoS r11g11b10 values (as (vector) int32)
  401.  * @param dst   pointer to the SoA result values
  402.  */
  403. void
  404. lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
  405.                             LLVMValueRef src,
  406.                             LLVMValueRef *dst)
  407. {
  408.    LLVMTypeRef src_type = LLVMTypeOf(src);
  409.    unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
  410.                             LLVMGetVectorSize(src_type) : 1;
  411.    struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
  412.  
  413.    dst[0] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 0, false);
  414.    dst[1] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 6, 5, 11, false);
  415.    dst[2] = lp_build_smallfloat_to_float(gallivm, f32_type, src, 5, 5, 22, false);
  416.  
  417.    /* Just set alpha to one */
  418.    dst[3] = lp_build_one(gallivm, f32_type);
  419. }
  420.  
  421.  
  422. static LLVMValueRef
  423. lp_build_rgb9_to_float_helper(struct gallivm_state *gallivm,
  424.                               struct lp_type f32_type,
  425.                               LLVMValueRef src,
  426.                               LLVMValueRef scale,
  427.                               unsigned mantissa_start)
  428. {
  429.    LLVMValueRef shift, mask;
  430.  
  431.    struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
  432.    struct lp_build_context i32_bld, f32_bld;
  433.  
  434.    lp_build_context_init(&i32_bld, gallivm, i32_type);
  435.    lp_build_context_init(&f32_bld, gallivm, f32_type);
  436.  
  437.    /*
  438.     * This is much easier as other weirdo float formats, since
  439.     * there's no sign, no Inf/NaN, and there's nothing special
  440.     * required for normals/denormals neither (as without the implied one
  441.     * for the mantissa for other formats, everything looks like a denormal).
  442.     * So just do (float)comp_bits * scale
  443.     */
  444.    shift = lp_build_const_int_vec(gallivm, i32_type, mantissa_start);
  445.    mask = lp_build_const_int_vec(gallivm, i32_type, 0x1ff);
  446.    src = lp_build_shr(&i32_bld, src, shift);
  447.    src = lp_build_and(&i32_bld, src, mask);
  448.    src = lp_build_int_to_float(&f32_bld, src);
  449.    return lp_build_mul(&f32_bld, src, scale);
  450. }
  451.  
  452.  
  453. /**
  454.  * Convert shared exponent format (rgb9e5) value(s) to rgba float SoA values.
  455.  *
  456.  * @param src   packed AoS rgb9e5 values (as (vector) int32)
  457.  * @param dst   pointer to the SoA result values
  458.  */
  459. void
  460. lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
  461.                          LLVMValueRef src,
  462.                          LLVMValueRef *dst)
  463. {
  464.    LLVMBuilderRef builder = gallivm->builder;
  465.    LLVMTypeRef src_type = LLVMTypeOf(src);
  466.    LLVMValueRef shift, scale, bias, exp;
  467.    unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
  468.                             LLVMGetVectorSize(src_type) : 1;
  469.    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
  470.    struct lp_type u32_type = lp_type_uint_vec(32, 32 * src_length);
  471.    struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
  472.    struct lp_build_context i32_bld, u32_bld, f32_bld;
  473.  
  474.    lp_build_context_init(&i32_bld, gallivm, i32_type);
  475.    lp_build_context_init(&u32_bld, gallivm, u32_type);
  476.    lp_build_context_init(&f32_bld, gallivm, f32_type);
  477.  
  478.    /* extract exponent */
  479.    shift = lp_build_const_int_vec(gallivm, i32_type, 27);
  480.    /* this shift needs to be unsigned otherwise need mask */
  481.    exp = lp_build_shr(&u32_bld, src, shift);
  482.  
  483.    /*
  484.     * scale factor is 2 ^ (exp - bias)
  485.     * (and additionally corrected here for the mantissa bits)
  486.     * not using shift because
  487.     * a) don't have vector shift in a lot of cases
  488.     * b) shift direction changes hence need 2 shifts + conditional
  489.     *    (or rotate instruction which is even more rare (for instance XOP))
  490.     * so use whacky float 2 ^ function instead manipulating exponent
  491.     * (saves us the float conversion at the end too)
  492.     */
  493.    bias = lp_build_const_int_vec(gallivm, i32_type, 127 - (15 + 9));
  494.    scale = lp_build_add(&i32_bld, exp, bias);
  495.    shift = lp_build_const_int_vec(gallivm, i32_type, 23);
  496.    scale = lp_build_shl(&i32_bld, scale, shift);
  497.    scale = LLVMBuildBitCast(builder, scale, f32_bld.vec_type, "");
  498.  
  499.    dst[0] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 0);
  500.    dst[1] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 9);
  501.    dst[2] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 18);
  502.  
  503.    /* Just set alpha to one */
  504.    dst[3] = f32_bld.one;
  505. }
  506.