Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/**************************************************************************
2
 *
3
 * Copyright 2013 VMware, Inc.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
 
29
/**
30
 * @file
31
 * Format conversion code for srgb formats.
32
 *
33
 * Functions for converting from srgb to linear and vice versa.
34
 * From http://www.opengl.org/registry/specs/EXT/texture_sRGB.txt:
35
 *
36
 * srgb->linear:
37
 * cl = cs / 12.92,                 cs <= 0.04045
38
 * cl = ((cs + 0.055)/1.055)^2.4,   cs >  0.04045
39
 *
40
 * linear->srgb:
41
 * if (isnan(cl)) {
42
 *    Map IEEE-754 Not-a-number to zero.
43
 *    cs = 0.0;
44
 * } else if (cl > 1.0) {
45
 *    cs = 1.0;
46
 * } else if (cl < 0.0) {
47
 *    cs = 0.0;
48
 * } else if (cl < 0.0031308) {
49
 *    cs = 12.92 * cl;
50
 * } else {
51
 *    cs = 1.055 * pow(cl, 0.41666) - 0.055;
52
 * }
53
 *
54
 * This does not need to be accurate, however at least for d3d10
55
 * (http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx):
56
 * 1) For srgb->linear, it is required that the error on the srgb side is
57
 *    not larger than 0.5f, which I interpret that if you map the value back
58
 *    to srgb from linear using the ideal conversion, it would not be off by
59
 *    more than 0.5f (that is, it would map to the same 8-bit integer value
60
 *    as it was before conversion to linear).
61
 * 2) linear->srgb is permitted 0.6f which luckily looks like quite a large
62
 *    error is allowed.
63
 * 3) Additionally, all srgb values converted to linear and back must result
64
 *    in the same value as they were originally.
65
 *
66
 * @author Roland Scheidegger 
67
 */
68
 
69
 
70
#include "util/u_debug.h"
71
 
72
#include "lp_bld_type.h"
73
#include "lp_bld_const.h"
74
#include "lp_bld_arit.h"
75
#include "lp_bld_bitarit.h"
76
#include "lp_bld_logic.h"
77
#include "lp_bld_format.h"
78
 
79
 
80
 
81
/**
82
 * Convert srgb int values to linear float values.
83
 * Several possibilities how to do this, e.g.
84
 * - table
85
 * - doing the pow() with int-to-float and float-to-int tricks
86
 *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
87
 * - just using standard polynomial approximation
88
 *   (3rd order polynomial is required for crappy but just sufficient accuracy)
89
 *
90
 * @param src   integer (vector) value(s) to convert
91
 *              (8 bit values unpacked to 32 bit already).
92
 */
93
LLVMValueRef
94
lp_build_srgb_to_linear(struct gallivm_state *gallivm,
95
                        struct lp_type src_type,
96
                        LLVMValueRef src)
97
{
98
   struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
99
   struct lp_build_context f32_bld;
100
   LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh;
101
   double coeffs[4] = {0.0023f,
102
                       0.0030f / 255.0f,
103
                       0.6935f / (255.0f * 255.0f),
104
                       0.3012f / (255.0f * 255.0f * 255.0f)
105
   };
106
 
107
   assert(src_type.width == 32);
108
 
109
   lp_build_context_init(&f32_bld, gallivm, f32_type);
110
 
111
   /*
112
    * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023)
113
    * ( poly =  0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023)
114
    * (found with octave polyfit and some magic as I couldn't get the error
115
    * function right). Using the above mentioned error function, the values stay
116
    * within +-0.35, except for the lowest values - hence tweaking linear segment
117
    * to cover the first 16 instead of the first 11 values (the error stays
118
    * just about acceptable there too).
119
    * Hence: lin = src > 15 ? poly : src / 12.6
120
    * This function really only makes sense for vectors, should use LUT otherwise.
121
    * All in all (including float conversion) 11 instructions (with sse4.1),
122
    * 6 constants (polynomial could be done with 1 instruction less at the cost
123
    * of slightly worse dependency chain, fma should also help).
124
    */
125
   /* doing the 1/255 mul as part of the approximation */
126
   srcf = lp_build_int_to_float(&f32_bld, src);
127
   lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
128
   part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
129
 
130
   part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4);
131
 
132
   lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f);
133
   is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh);
134
   return lp_build_select(&f32_bld, is_linear, part_lin, part_pow);
135
}
136
 
137
 
138
/**
139
 * Convert linear float values to srgb int values.
140
 * Several possibilities how to do this, e.g.
141
 * - use table (based on exponent/highest order mantissa bits) and do
142
 *   linear interpolation (https://gist.github.com/rygorous/2203834)
143
 * - Chebyshev polynomial
144
 * - Approximation using reciprocals
145
 * - using int-to-float and float-to-int tricks for pow()
146
 *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
147
 *
148
 * @param src   float (vector) value(s) to convert.
149
 */
150
static LLVMValueRef
151
lp_build_linear_to_srgb(struct gallivm_state *gallivm,
152
                        struct lp_type src_type,
153
                        LLVMValueRef src)
154
{
155
   LLVMBuilderRef builder = gallivm->builder;
156
   struct lp_build_context f32_bld;
157
   LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final;
158
 
159
   lp_build_context_init(&f32_bld, gallivm, src_type);
160
 
161
   src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one);
162
 
163
   if (0) {
164
      /*
165
       * using int-to-float and float-to-int trick for pow().
166
       * This is much more accurate than necessary thanks to the correction,
167
       * but it most certainly makes no sense without rsqrt available.
168
       * Bonus points if you understand how this works...
169
       * All in all (including min/max clamp, conversion) 19 instructions.
170
       */
171
 
172
      float exp_f = 2.0f / 3.0f;
173
      /* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */
174
      float exp2f_c = 1.30438178253e+19f;
175
      float coeff_f = 0.62996f;
176
      LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2;
177
      struct lp_type int_type = lp_int_type(src_type);
178
 
179
      /*
180
       * First calculate approx x^8/12
181
       */
182
      exponent = lp_build_const_vec(gallivm, src_type, exp_f);
183
      coeff = lp_build_const_vec(gallivm, src_type,
184
                                 exp2f_c * powf(coeff_f, 1.0f / exp_f));
185
 
186
      /* premultiply src */
187
      tmp = lp_build_mul(&f32_bld, coeff, src);
188
      /* "log2" */
189
      tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), "");
190
      tmp = lp_build_int_to_float(&f32_bld, tmp);
191
      /* multiply for pow */
192
      tmp = lp_build_mul(&f32_bld, tmp, exponent);
193
      /* "exp2" */
194
      pow_approx = lp_build_itrunc(&f32_bld, tmp);
195
      pow_approx = LLVMBuildBitCast(builder, pow_approx,
196
                                    lp_build_vec_type(gallivm, src_type), "");
197
 
198
      /*
199
       * Since that pow was inaccurate (like 3 bits, though each sqrt step would
200
       * give another bit), compensate the error (which is why we chose another
201
       * exponent in the first place).
202
       */
203
      /* x * x^(8/12) = x^(20/12) */
204
      pow_1 = lp_build_mul(&f32_bld, pow_approx, src);
205
 
206
      /* x * x * x^(-4/12) = x^(20/12) */
207
      /* Should avoid using rsqrt if it's not available, but
208
       * using x * x^(4/12) * x^(4/12) instead will change error weight */
209
      tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx);
210
      x2 = lp_build_mul(&f32_bld, src, src);
211
      pow_2 = lp_build_mul(&f32_bld, x2, tmp);
212
 
213
      /* average the values so the errors cancel out, compensate bias,
214
       * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul
215
       * for conversion to int in here */
216
      tmp = lp_build_add(&f32_bld, pow_1, pow_2);
217
      coeff = lp_build_const_vec(gallivm, src_type,
218
                                 1.0f / (3.0f * coeff_f) * 0.999852f *
219
                                 powf(1.055f * 255.0f, 4.0f));
220
      pow_final = lp_build_mul(&f32_bld, tmp, coeff);
221
 
222
      /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */
223
      if (lp_build_fast_rsqrt_available(src_type)) {
224
         pow_final = lp_build_fast_rsqrt(&f32_bld,
225
                        lp_build_fast_rsqrt(&f32_bld, pow_final));
226
      }
227
      else {
228
         pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final));
229
      }
230
      pow_final = lp_build_add(&f32_bld, pow_final,
231
                               lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f));
232
   }
233
 
234
   else {
235
      /*
236
       * using "rational polynomial" approximation here.
237
       * Essentially y = a*x^0.375 + b*x^0.5 + c, with also
238
       * factoring in the 255.0 mul and the scaling mul.
239
       * (a is closer to actual value so has higher weight than b.)
240
       * Note: the constants are magic values. They were found empirically,
241
       * possibly could be improved but good enough (be VERY careful with
242
       * error metric if you'd want to tweak them, they also MUST fit with
243
       * the crappy polynomial above for srgb->linear since it is required
244
       * that each srgb value maps back to the same value).
245
       * This function has an error of max +-0.17 (and we'd only require +-0.6),
246
       * for the approximated srgb->linear values the error is naturally larger
247
       * (+-0.42) but still accurate enough (required +-0.5 essentially).
248
       * All in all (including min/max clamp, conversion) 15 instructions.
249
       * FMA would help (minus 2 instructions).
250
       */
251
 
252
      LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2;
253
 
254
      if (lp_build_fast_rsqrt_available(src_type)) {
255
         tmp = lp_build_fast_rsqrt(&f32_bld, src);
256
         x05 = lp_build_mul(&f32_bld, src, tmp);
257
      }
258
      else {
259
         /*
260
          * I don't really expect this to be practical without rsqrt
261
          * but there's no reason for triple punishment so at least
262
          * save the otherwise resulting division and unnecessary mul...
263
          */
264
         x05 = lp_build_sqrt(&f32_bld, src);
265
      }
266
 
267
      tmp = lp_build_mul(&f32_bld, x05, src);
268
      if (lp_build_fast_rsqrt_available(src_type)) {
269
         x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp));
270
      }
271
      else {
272
         x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp));
273
      }
274
 
275
      a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f);
276
      b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f);
277
      c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);
278
 
279
      tmp = lp_build_mul(&f32_bld, a_const, x0375);
280
      tmp2 = lp_build_mul(&f32_bld, b_const, x05);
281
      tmp2 = lp_build_add(&f32_bld, tmp2, c_const);
282
      pow_final = lp_build_add(&f32_bld, tmp, tmp2);
283
   }
284
 
285
   /* linear part is easy */
286
   lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f);
287
   lin = lp_build_mul(&f32_bld, src, lin_const);
288
 
289
   lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f);
290
   is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
291
   tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
292
 
293
   f32_bld.type.sign = 0;
294
   return lp_build_iround(&f32_bld, tmp);
295
}
296
 
297
 
298
/**
299
 * Convert linear float soa values to packed srgb AoS values.
300
 * This only handles packed formats which are 4x8bit in size
301
 * (rgba and rgbx plus swizzles).
302
 *
303
 * @param src   float SoA (vector) values to convert.
304
 */
305
LLVMValueRef
306
lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
307
                              const struct util_format_description *dst_fmt,
308
                              struct lp_type src_type,
309
                              LLVMValueRef *src)
310
{
311
   LLVMBuilderRef builder = gallivm->builder;
312
   unsigned chan;
313
   struct lp_build_context f32_bld;
314
   struct lp_type int32_type = lp_int_type(src_type);
315
   LLVMValueRef tmpsrgb[4], alpha, dst;
316
 
317
   lp_build_context_init(&f32_bld, gallivm, src_type);
318
 
319
   /* rgb is subject to linear->srgb conversion, alpha is not */
320
   for (chan = 0; chan < 3; chan++) {
321
      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
322
   }
323
   /*
324
    * can't use lp_build_conv since we want to keep values as 32bit
325
    * here so we can interleave with rgb to go from SoA->AoS.
326
    */
327
   alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
328
   alpha = lp_build_mul(&f32_bld, alpha,
329
                        lp_build_const_vec(gallivm, src_type, 255.0f));
330
   tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
331
 
332
   dst = lp_build_zero(gallivm, int32_type);
333
   for (chan = 0; chan < dst_fmt->nr_channels; chan++) {
334
      if (dst_fmt->swizzle[chan] <= UTIL_FORMAT_SWIZZLE_W) {
335
         unsigned ls;
336
         LLVMValueRef shifted, shift_val;
337
         ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift;
338
         shift_val = lp_build_const_int_vec(gallivm, int32_type, ls);
339
         shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, "");
340
         dst = LLVMBuildOr(builder, dst, shifted, "");
341
      }
342
   }
343
   return dst;
344
}