Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2013 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28.  
  29. /**
  30.  * @file
  31.  * Format conversion code for srgb formats.
  32.  *
  33.  * Functions for converting from srgb to linear and vice versa.
  34.  * From http://www.opengl.org/registry/specs/EXT/texture_sRGB.txt:
  35.  *
  36.  * srgb->linear:
  37.  * cl = cs / 12.92,                 cs <= 0.04045
  38.  * cl = ((cs + 0.055)/1.055)^2.4,   cs >  0.04045
  39.  *
  40.  * linear->srgb:
  41.  * if (isnan(cl)) {
  42.  *    Map IEEE-754 Not-a-number to zero.
  43.  *    cs = 0.0;
  44.  * } else if (cl > 1.0) {
  45.  *    cs = 1.0;
  46.  * } else if (cl < 0.0) {
  47.  *    cs = 0.0;
  48.  * } else if (cl < 0.0031308) {
  49.  *    cs = 12.92 * cl;
  50.  * } else {
  51.  *    cs = 1.055 * pow(cl, 0.41666) - 0.055;
  52.  * }
  53.  *
  54.  * This does not need to be accurate, however at least for d3d10
  55.  * (http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx):
  56.  * 1) For srgb->linear, it is required that the error on the srgb side is
  57.  *    not larger than 0.5f, which I interpret that if you map the value back
  58.  *    to srgb from linear using the ideal conversion, it would not be off by
  59.  *    more than 0.5f (that is, it would map to the same 8-bit integer value
  60.  *    as it was before conversion to linear).
  61.  * 2) linear->srgb is permitted 0.6f which luckily looks like quite a large
  62.  *    error is allowed.
  63.  * 3) Additionally, all srgb values converted to linear and back must result
  64.  *    in the same value as they were originally.
  65.  *
  66.  * @author Roland Scheidegger <sroland@vmware.com>
  67.  */
  68.  
  69.  
  70. #include "util/u_debug.h"
  71.  
  72. #include "lp_bld_type.h"
  73. #include "lp_bld_const.h"
  74. #include "lp_bld_arit.h"
  75. #include "lp_bld_bitarit.h"
  76. #include "lp_bld_logic.h"
  77. #include "lp_bld_format.h"
  78.  
  79.  
  80.  
  81. /**
  82.  * Convert srgb int values to linear float values.
  83.  * Several possibilities how to do this, e.g.
  84.  * - table
  85.  * - doing the pow() with int-to-float and float-to-int tricks
  86.  *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
  87.  * - just using standard polynomial approximation
  88.  *   (3rd order polynomial is required for crappy but just sufficient accuracy)
  89.  *
  90.  * @param src   integer (vector) value(s) to convert
  91.  *              (chan_bits bit values unpacked to 32 bit already).
  92.  */
  93. LLVMValueRef
  94. lp_build_srgb_to_linear(struct gallivm_state *gallivm,
  95.                         struct lp_type src_type,
  96.                         unsigned chan_bits,
  97.                         LLVMValueRef src)
  98. {
  99.    struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
  100.    struct lp_build_context f32_bld;
  101.    LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh;
  102.    double coeffs[4] = {0.0023f,
  103.                        0.0030f / 255.0f,
  104.                        0.6935f / (255.0f * 255.0f),
  105.                        0.3012f / (255.0f * 255.0f * 255.0f)
  106.    };
  107.  
  108.    assert(src_type.width == 32);
  109.    /* Technically this would work with more bits too but would be inaccurate. */
  110.    assert(chan_bits <= 8);
  111.  
  112.    lp_build_context_init(&f32_bld, gallivm, f32_type);
  113.  
  114.    /*
  115.     * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023)
  116.     * ( poly =  0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023)
  117.     * (found with octave polyfit and some magic as I couldn't get the error
  118.     * function right). Using the above mentioned error function, the values stay
  119.     * within +-0.35, except for the lowest values - hence tweaking linear segment
  120.     * to cover the first 16 instead of the first 11 values (the error stays
  121.     * just about acceptable there too).
  122.     * Hence: lin = src > 15 ? poly : src / 12.6
  123.     * This function really only makes sense for vectors, should use LUT otherwise.
  124.     * All in all (including float conversion) 11 instructions (with sse4.1),
  125.     * 6 constants (polynomial could be done with 1 instruction less at the cost
  126.     * of slightly worse dependency chain, fma should also help).
  127.     */
  128.    /* doing the 1/255 mul as part of the approximation */
  129.    srcf = lp_build_int_to_float(&f32_bld, src);
  130.    if (chan_bits != 8) {
  131.       /* could adjust all the constants instead */
  132.       LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
  133.                                                       255.0f / ((1 << chan_bits) - 1));
  134.       srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
  135.    }
  136.    lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
  137.    part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
  138.  
  139.    part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4);
  140.  
  141.    lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f);
  142.    is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh);
  143.    return lp_build_select(&f32_bld, is_linear, part_lin, part_pow);
  144. }
  145.  
  146.  
  147. /**
  148.  * Convert linear float values to srgb int values.
  149.  * Several possibilities how to do this, e.g.
  150.  * - use table (based on exponent/highest order mantissa bits) and do
  151.  *   linear interpolation (https://gist.github.com/rygorous/2203834)
  152.  * - Chebyshev polynomial
  153.  * - Approximation using reciprocals
  154.  * - using int-to-float and float-to-int tricks for pow()
  155.  *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
  156.  *
  157.  * @param src   float (vector) value(s) to convert.
  158.  */
  159. static LLVMValueRef
  160. lp_build_linear_to_srgb(struct gallivm_state *gallivm,
  161.                         struct lp_type src_type,
  162.                         unsigned chan_bits,
  163.                         LLVMValueRef src)
  164. {
  165.    LLVMBuilderRef builder = gallivm->builder;
  166.    struct lp_build_context f32_bld;
  167.    LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final;
  168.  
  169.    lp_build_context_init(&f32_bld, gallivm, src_type);
  170.  
  171.    src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one);
  172.  
  173.    if (0) {
  174.       /*
  175.        * using int-to-float and float-to-int trick for pow().
  176.        * This is much more accurate than necessary thanks to the correction,
  177.        * but it most certainly makes no sense without rsqrt available.
  178.        * Bonus points if you understand how this works...
  179.        * All in all (including min/max clamp, conversion) 19 instructions.
  180.        */
  181.  
  182.       float exp_f = 2.0f / 3.0f;
  183.       /* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */
  184.       float exp2f_c = 1.30438178253e+19f;
  185.       float coeff_f = 0.62996f;
  186.       LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2;
  187.       struct lp_type int_type = lp_int_type(src_type);
  188.  
  189.       /*
  190.        * First calculate approx x^8/12
  191.        */
  192.       exponent = lp_build_const_vec(gallivm, src_type, exp_f);
  193.       coeff = lp_build_const_vec(gallivm, src_type,
  194.                                  exp2f_c * powf(coeff_f, 1.0f / exp_f));
  195.  
  196.       /* premultiply src */
  197.       tmp = lp_build_mul(&f32_bld, coeff, src);
  198.       /* "log2" */
  199.       tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), "");
  200.       tmp = lp_build_int_to_float(&f32_bld, tmp);
  201.       /* multiply for pow */
  202.       tmp = lp_build_mul(&f32_bld, tmp, exponent);
  203.       /* "exp2" */
  204.       pow_approx = lp_build_itrunc(&f32_bld, tmp);
  205.       pow_approx = LLVMBuildBitCast(builder, pow_approx,
  206.                                     lp_build_vec_type(gallivm, src_type), "");
  207.  
  208.       /*
  209.        * Since that pow was inaccurate (like 3 bits, though each sqrt step would
  210.        * give another bit), compensate the error (which is why we chose another
  211.        * exponent in the first place).
  212.        */
  213.       /* x * x^(8/12) = x^(20/12) */
  214.       pow_1 = lp_build_mul(&f32_bld, pow_approx, src);
  215.  
  216.       /* x * x * x^(-4/12) = x^(20/12) */
  217.       /* Should avoid using rsqrt if it's not available, but
  218.        * using x * x^(4/12) * x^(4/12) instead will change error weight */
  219.       tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx);
  220.       x2 = lp_build_mul(&f32_bld, src, src);
  221.       pow_2 = lp_build_mul(&f32_bld, x2, tmp);
  222.  
  223.       /* average the values so the errors cancel out, compensate bias,
  224.        * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul
  225.        * for conversion to int in here */
  226.       tmp = lp_build_add(&f32_bld, pow_1, pow_2);
  227.       coeff = lp_build_const_vec(gallivm, src_type,
  228.                                  1.0f / (3.0f * coeff_f) * 0.999852f *
  229.                                  powf(1.055f * 255.0f, 4.0f));
  230.       pow_final = lp_build_mul(&f32_bld, tmp, coeff);
  231.  
  232.       /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */
  233.       if (lp_build_fast_rsqrt_available(src_type)) {
  234.          pow_final = lp_build_fast_rsqrt(&f32_bld,
  235.                         lp_build_fast_rsqrt(&f32_bld, pow_final));
  236.       }
  237.       else {
  238.          pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final));
  239.       }
  240.       pow_final = lp_build_add(&f32_bld, pow_final,
  241.                                lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f));
  242.    }
  243.  
  244.    else {
  245.       /*
  246.        * using "rational polynomial" approximation here.
  247.        * Essentially y = a*x^0.375 + b*x^0.5 + c, with also
  248.        * factoring in the 255.0 mul and the scaling mul.
  249.        * (a is closer to actual value so has higher weight than b.)
  250.        * Note: the constants are magic values. They were found empirically,
  251.        * possibly could be improved but good enough (be VERY careful with
  252.        * error metric if you'd want to tweak them, they also MUST fit with
  253.        * the crappy polynomial above for srgb->linear since it is required
  254.        * that each srgb value maps back to the same value).
  255.        * This function has an error of max +-0.17. Not sure this is actually
  256.        * enough, we require +-0.6 but that may include the +-0.5 from integer
  257.        * conversion. Seems to pass all relevant tests though...
  258.        * For the approximated srgb->linear values the error is naturally larger
  259.        * (+-0.42) but still accurate enough (required +-0.5 essentially).
  260.        * All in all (including min/max clamp, conversion) 15 instructions.
  261.        * FMA would help (minus 2 instructions).
  262.        */
  263.  
  264.       LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2;
  265.  
  266.       if (lp_build_fast_rsqrt_available(src_type)) {
  267.          tmp = lp_build_fast_rsqrt(&f32_bld, src);
  268.          x05 = lp_build_mul(&f32_bld, src, tmp);
  269.       }
  270.       else {
  271.          /*
  272.           * I don't really expect this to be practical without rsqrt
  273.           * but there's no reason for triple punishment so at least
  274.           * save the otherwise resulting division and unnecessary mul...
  275.           */
  276.          x05 = lp_build_sqrt(&f32_bld, src);
  277.       }
  278.  
  279.       tmp = lp_build_mul(&f32_bld, x05, src);
  280.       if (lp_build_fast_rsqrt_available(src_type)) {
  281.          x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp));
  282.       }
  283.       else {
  284.          x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp));
  285.       }
  286.  
  287.       a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f);
  288.       b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f);
  289.       c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);
  290.  
  291.       tmp = lp_build_mul(&f32_bld, a_const, x0375);
  292.       tmp2 = lp_build_mul(&f32_bld, b_const, x05);
  293.       tmp2 = lp_build_add(&f32_bld, tmp2, c_const);
  294.       pow_final = lp_build_add(&f32_bld, tmp, tmp2);
  295.    }
  296.  
  297.    /* linear part is easy */
  298.    lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f);
  299.    lin = lp_build_mul(&f32_bld, src, lin_const);
  300.  
  301.    lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f);
  302.    is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
  303.    tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
  304.  
  305.    if (chan_bits != 8) {
  306.       /* could adjust all the constants instead */
  307.       LLVMValueRef rescale_const = lp_build_const_vec(gallivm, src_type,
  308.                                                       ((1 << chan_bits) - 1) / 255.0f);
  309.       tmp = lp_build_mul(&f32_bld, tmp, rescale_const);
  310.    }
  311.  
  312.    f32_bld.type.sign = 0;
  313.    return lp_build_iround(&f32_bld, tmp);
  314. }
  315.  
  316.  
  317. /**
  318.  * Convert linear float soa values to packed srgb AoS values.
  319.  * This only handles packed formats which are 4x8bit in size
  320.  * (rgba and rgbx plus swizzles), and 16bit 565-style formats
  321.  * with no alpha. (In the latter case the return values won't be
  322.  * fully packed, it will look like r5g6b5x16r5g6b5x16...)
  323.  *
  324.  * @param src   float SoA (vector) values to convert.
  325.  */
  326. LLVMValueRef
  327. lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
  328.                               const struct util_format_description *dst_fmt,
  329.                               struct lp_type src_type,
  330.                               LLVMValueRef *src)
  331. {
  332.    LLVMBuilderRef builder = gallivm->builder;
  333.    unsigned chan;
  334.    struct lp_build_context f32_bld;
  335.    struct lp_type int32_type = lp_int_type(src_type);
  336.    LLVMValueRef tmpsrgb[4], alpha, dst;
  337.  
  338.    lp_build_context_init(&f32_bld, gallivm, src_type);
  339.  
  340.    /* rgb is subject to linear->srgb conversion, alpha is not */
  341.    for (chan = 0; chan < 3; chan++) {
  342.       unsigned chan_bits = dst_fmt->channel[dst_fmt->swizzle[chan]].size;
  343.       tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, chan_bits, src[chan]);
  344.    }
  345.    /*
  346.     * can't use lp_build_conv since we want to keep values as 32bit
  347.     * here so we can interleave with rgb to go from SoA->AoS.
  348.     */
  349.    alpha = lp_build_clamp_zero_one_nanzero(&f32_bld, src[3]);
  350.    alpha = lp_build_mul(&f32_bld, alpha,
  351.                         lp_build_const_vec(gallivm, src_type, 255.0f));
  352.    tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
  353.  
  354.    dst = lp_build_zero(gallivm, int32_type);
  355.    for (chan = 0; chan < dst_fmt->nr_channels; chan++) {
  356.       if (dst_fmt->swizzle[chan] <= UTIL_FORMAT_SWIZZLE_W) {
  357.          unsigned ls;
  358.          LLVMValueRef shifted, shift_val;
  359.          ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift;
  360.          shift_val = lp_build_const_int_vec(gallivm, int32_type, ls);
  361.          shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, "");
  362.          dst = LLVMBuildOr(builder, dst, shifted, "");
  363.       }
  364.    }
  365.    return dst;
  366. }
  367.