Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/**************************************************************************
2
 *
3
 * Copyright 2009 VMware, Inc.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
 
29
#include "pipe/p_defines.h"
30
 
31
#include "util/u_format.h"
32
#include "util/u_memory.h"
33
#include "util/u_string.h"
34
 
35
#include "lp_bld_type.h"
36
#include "lp_bld_const.h"
37
#include "lp_bld_conv.h"
38
#include "lp_bld_swizzle.h"
39
#include "lp_bld_gather.h"
40
#include "lp_bld_debug.h"
41
#include "lp_bld_format.h"
42
 
43
 
44
void
45
lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
46
                            struct lp_build_context *bld,
47
                            const LLVMValueRef *unswizzled,
48
                            LLVMValueRef swizzled_out[4])
49
{
50
   assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
51
   assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
52
 
53
   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
54
      enum util_format_swizzle swizzle;
55
      LLVMValueRef depth_or_stencil;
56
 
57
      if (util_format_has_stencil(format_desc) &&
58
          !util_format_has_depth(format_desc)) {
59
         assert(!bld->type.floating);
60
         swizzle = format_desc->swizzle[1];
61
      }
62
      else {
63
         assert(bld->type.floating);
64
         swizzle = format_desc->swizzle[0];
65
      }
66
      /*
67
       * Return zzz1 or sss1 for depth-stencil formats here.
68
       * Correct swizzling will be handled by apply_sampler_swizzle() later.
69
       */
70
      depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
71
 
72
      swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
73
      swizzled_out[3] = bld->one;
74
   }
75
   else {
76
      unsigned chan;
77
      for (chan = 0; chan < 4; ++chan) {
78
         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
79
         swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
80
      }
81
   }
82
}
83
 
84
 
85
/**
86
 * Unpack several pixels in SoA.
87
 *
88
 * It takes a vector of packed pixels:
89
 *
90
 *   packed = {P0, P1, P2, P3, ..., Pn}
91
 *
92
 * And will produce four vectors:
93
 *
94
 *   red    = {R0, R1, R2, R3, ..., Rn}
95
 *   green  = {G0, G1, G2, G3, ..., Gn}
96
 *   blue   = {B0, B1, B2, B3, ..., Bn}
97
 *   alpha  = {A0, A1, A2, A3, ..., An}
98
 *
99
 * It requires that a packed pixel fits into an element of the output
100
 * channels. The common case is when converting pixel with a depth of 32 bit or
101
 * less into floats.
102
 *
103
 * \param format_desc  the format of the 'packed' incoming pixel vector
104
 * \param type  the desired type for rgba_out (type.length = n, above)
105
 * \param packed  the incoming vector of packed pixels
106
 * \param rgba_out  returns the SoA R,G,B,A vectors
107
 */
108
void
109
lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
110
                         const struct util_format_description *format_desc,
111
                         struct lp_type type,
112
                         LLVMValueRef packed,
113
                         LLVMValueRef rgba_out[4])
114
{
115
   LLVMBuilderRef builder = gallivm->builder;
116
   struct lp_build_context bld;
117
   LLVMValueRef inputs[4];
118
   unsigned chan;
119
 
120
   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
121
   assert(format_desc->block.width == 1);
122
   assert(format_desc->block.height == 1);
123
   assert(format_desc->block.bits <= type.width);
124
   /* FIXME: Support more output types */
125
   assert(type.width == 32);
126
 
127
   lp_build_context_init(&bld, gallivm, type);
128
 
129
   /* Decode the input vector components */
130
   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
131
      const unsigned width = format_desc->channel[chan].size;
132
      const unsigned start = format_desc->channel[chan].shift;
133
      const unsigned stop = start + width;
134
      LLVMValueRef input;
135
 
136
      input = packed;
137
 
138
      switch(format_desc->channel[chan].type) {
139
      case UTIL_FORMAT_TYPE_VOID:
140
         input = lp_build_undef(gallivm, type);
141
         break;
142
 
143
      case UTIL_FORMAT_TYPE_UNSIGNED:
144
         /*
145
          * Align the LSB
146
          */
147
 
148
         if (start) {
149
            input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
150
         }
151
 
152
         /*
153
          * Zero the MSBs
154
          */
155
 
156
         if (stop < format_desc->block.bits) {
157
            unsigned mask = ((unsigned long long)1 << width) - 1;
158
            input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
159
         }
160
 
161
         /*
162
          * Type conversion
163
          */
164
 
165
         if (type.floating) {
166
            if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
167
               assert(width == 8);
168
               if (format_desc->swizzle[3] == chan) {
169
                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
170
               }
171
               else {
172
                  struct lp_type conv_type = lp_uint_type(type);
173
                  input = lp_build_srgb_to_linear(gallivm, conv_type, input);
174
               }
175
            }
176
            else {
177
               if(format_desc->channel[chan].normalized)
178
                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
179
               else
180
                  input = LLVMBuildSIToFP(builder, input,
181
                                          lp_build_vec_type(gallivm, type), "");
182
            }
183
         }
184
         else if (format_desc->channel[chan].pure_integer) {
185
            /* Nothing to do */
186
         } else {
187
             /* FIXME */
188
             assert(0);
189
         }
190
 
191
         break;
192
 
193
      case UTIL_FORMAT_TYPE_SIGNED:
194
         /*
195
          * Align the sign bit first.
196
          */
197
 
198
         if (stop < type.width) {
199
            unsigned bits = type.width - stop;
200
            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
201
            input = LLVMBuildShl(builder, input, bits_val, "");
202
         }
203
 
204
         /*
205
          * Align the LSB (with an arithmetic shift to preserve the sign)
206
          */
207
 
208
         if (format_desc->channel[chan].size < type.width) {
209
            unsigned bits = type.width - format_desc->channel[chan].size;
210
            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
211
            input = LLVMBuildAShr(builder, input, bits_val, "");
212
         }
213
 
214
         /*
215
          * Type conversion
216
          */
217
 
218
         if (type.floating) {
219
            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
220
            if (format_desc->channel[chan].normalized) {
221
               double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
222
               LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
223
               input = LLVMBuildFMul(builder, input, scale_val, "");
224
            }
225
         }
226
         else if (format_desc->channel[chan].pure_integer) {
227
            /* Nothing to do */
228
         } else {
229
             /* FIXME */
230
             assert(0);
231
         }
232
 
233
         break;
234
 
235
      case UTIL_FORMAT_TYPE_FLOAT:
236
         if (type.floating) {
237
            assert(start == 0);
238
            assert(stop == 32);
239
            assert(type.width == 32);
240
            input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
241
         }
242
         else {
243
            /* FIXME */
244
            assert(0);
245
            input = lp_build_undef(gallivm, type);
246
         }
247
         break;
248
 
249
      case UTIL_FORMAT_TYPE_FIXED:
250
         if (type.floating) {
251
            double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
252
            LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
253
            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
254
            input = LLVMBuildFMul(builder, input, scale_val, "");
255
         }
256
         else {
257
            /* FIXME */
258
            assert(0);
259
            input = lp_build_undef(gallivm, type);
260
         }
261
         break;
262
 
263
      default:
264
         assert(0);
265
         input = lp_build_undef(gallivm, type);
266
         break;
267
      }
268
 
269
      inputs[chan] = input;
270
   }
271
 
272
   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
273
}
274
 
275
 
276
/**
277
 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
278
 *
279
 * \param dst_type  The desired return type. For pure integer formats
280
 *                  this should be a 32bit wide int or uint vector type,
281
 *                  otherwise a float vector type.
282
 *
283
 * \param packed    The rgba8 values to pack.
284
 *
285
 * \param rgba      The 4 SoA return vectors.
286
 */
287
void
288
lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
289
                           struct lp_type dst_type,
290
                           LLVMValueRef packed,
291
                           LLVMValueRef *rgba)
292
{
293
   LLVMBuilderRef builder = gallivm->builder;
294
   LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
295
   unsigned chan;
296
 
297
   /* XXX technically shouldn't use that for uint dst_type */
298
   packed = LLVMBuildBitCast(builder, packed,
299
                             lp_build_int_vec_type(gallivm, dst_type), "");
300
 
301
   /* Decode the input vector components */
302
   for (chan = 0; chan < 4; ++chan) {
303
#ifdef PIPE_ARCH_LITTLE_ENDIAN
304
      unsigned start = chan*8;
305
#else
306
      unsigned start = (3-chan)*8;
307
#endif
308
      unsigned stop = start + 8;
309
      LLVMValueRef input;
310
 
311
      input = packed;
312
 
313
      if (start)
314
         input = LLVMBuildLShr(builder, input,
315
                               lp_build_const_int_vec(gallivm, dst_type, start), "");
316
 
317
      if (stop < 32)
318
         input = LLVMBuildAnd(builder, input, mask, "");
319
 
320
      if (dst_type.floating)
321
         input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
322
 
323
      rgba[chan] = input;
324
   }
325
}
326
 
327
 
328
 
329
/**
330
 * Fetch a texels from a texture, returning them in SoA layout.
331
 *
332
 * \param type  the desired return type for 'rgba'.  The vector length
333
 *              is the number of texels to fetch
334
 *
335
 * \param base_ptr  points to the base of the texture mip tree.
336
 * \param offset    offset to start of the texture image block.  For non-
337
 *                  compressed formats, this simply is an offset to the texel.
338
 *                  For compressed formats, it is an offset to the start of the
339
 *                  compressed data block.
340
 *
341
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
342
 *              these will always be (0,0).  For compressed formats, i will
343
 *              be in [0, block_width-1] and j will be in [0, block_height-1].
344
 */
345
void
346
lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
347
                        const struct util_format_description *format_desc,
348
                        struct lp_type type,
349
                        LLVMValueRef base_ptr,
350
                        LLVMValueRef offset,
351
                        LLVMValueRef i,
352
                        LLVMValueRef j,
353
                        LLVMValueRef rgba_out[4])
354
{
355
   LLVMBuilderRef builder = gallivm->builder;
356
 
357
   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
358
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
359
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
360
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
361
       format_desc->block.width == 1 &&
362
       format_desc->block.height == 1 &&
363
       format_desc->block.bits <= type.width &&
364
       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
365
        format_desc->channel[0].size == 32))
366
   {
367
      /*
368
       * The packed pixel fits into an element of the destination format. Put
369
       * the packed pixels into a vector and extract each component for all
370
       * vector elements in parallel.
371
       */
372
 
373
      LLVMValueRef packed;
374
 
375
      /*
376
       * gather the texels from the texture
377
       * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
378
       */
379
      assert(format_desc->block.bits <= type.width);
380
      packed = lp_build_gather(gallivm,
381
                               type.length,
382
                               format_desc->block.bits,
383
                               type.width,
384
                               base_ptr, offset, FALSE);
385
 
386
      /*
387
       * convert texels to float rgba
388
       */
389
      lp_build_unpack_rgba_soa(gallivm,
390
                               format_desc,
391
                               type,
392
                               packed, rgba_out);
393
      return;
394
   }
395
 
396
   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
397
       format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
398
      /*
399
       * similar conceptually to above but requiring special
400
       * AoS packed -> SoA float conversion code.
401
       */
402
      LLVMValueRef packed;
403
 
404
      assert(type.floating);
405
      assert(type.width == 32);
406
 
407
      packed = lp_build_gather(gallivm, type.length,
408
                               format_desc->block.bits,
409
                               type.width, base_ptr, offset,
410
                               FALSE);
411
      if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
412
         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
413
      }
414
      else {
415
         lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
416
      }
417
      return;
418
   }
419
 
420
   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
421
       format_desc->block.bits == 64) {
422
      /*
423
       * special case the format is 64 bits but we only require
424
       * 32bit (or 8bit) from each block.
425
       */
426
      LLVMValueRef packed;
427
 
428
      if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
429
         /*
430
          * for stencil simply fix up offsets - could in fact change
431
          * base_ptr instead even outside the shader.
432
          */
433
         unsigned mask = (1 << 8) - 1;
434
         LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
435
         offset = LLVMBuildAdd(builder, offset, s_offset, "");
436
         packed = lp_build_gather(gallivm, type.length,
437
                                  32, type.width, base_ptr, offset, FALSE);
438
         packed = LLVMBuildAnd(builder, packed,
439
                               lp_build_const_int_vec(gallivm, type, mask), "");
440
      }
441
      else {
442
         assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
443
         packed = lp_build_gather(gallivm, type.length,
444
                                  32, type.width, base_ptr, offset, TRUE);
445
         packed = LLVMBuildBitCast(builder, packed,
446
                                   lp_build_vec_type(gallivm, type), "");
447
      }
448
      /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
449
      rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
450
      rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
451
      return;
452
   }
453
 
454
   /*
455
    * Try calling lp_build_fetch_rgba_aos for all pixels.
456
    */
457
 
458
   if (util_format_fits_8unorm(format_desc) &&
459
       type.floating && type.width == 32 &&
460
       (type.length == 1 || (type.length % 4 == 0))) {
461
      struct lp_type tmp_type;
462
      LLVMValueRef tmp;
463
 
464
      memset(&tmp_type, 0, sizeof tmp_type);
465
      tmp_type.width = 8;
466
      tmp_type.length = type.length * 4;
467
      tmp_type.norm = TRUE;
468
 
469
      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
470
                                    base_ptr, offset, i, j);
471
 
472
      lp_build_rgba8_to_fi32_soa(gallivm,
473
                                type,
474
                                tmp,
475
                                rgba_out);
476
 
477
      return;
478
   }
479
 
480
   /*
481
    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
482
    *
483
    * This is not the most efficient way of fetching pixels, as we
484
    * miss some opportunities to do vectorization, but this is
485
    * convenient for formats or scenarios for which there was no
486
    * opportunity or incentive to optimize.
487
    */
488
 
489
   {
490
      unsigned k, chan;
491
      struct lp_type tmp_type;
492
 
493
      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
494
         debug_printf("%s: scalar unpacking of %s\n",
495
                      __FUNCTION__, format_desc->short_name);
496
      }
497
 
498
      tmp_type = type;
499
      tmp_type.length = 4;
500
 
501
      for (chan = 0; chan < 4; ++chan) {
502
         rgba_out[chan] = lp_build_undef(gallivm, type);
503
      }
504
 
505
      /* loop over number of pixels */
506
      for(k = 0; k < type.length; ++k) {
507
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
508
         LLVMValueRef offset_elem;
509
         LLVMValueRef i_elem, j_elem;
510
         LLVMValueRef tmp;
511
 
512
         offset_elem = LLVMBuildExtractElement(builder, offset,
513
                                               index, "");
514
 
515
         i_elem = LLVMBuildExtractElement(builder, i, index, "");
516
         j_elem = LLVMBuildExtractElement(builder, j, index, "");
517
 
518
         /* Get a single float[4]={R,G,B,A} pixel */
519
         tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
520
                                       base_ptr, offset_elem,
521
                                       i_elem, j_elem);
522
 
523
         /*
524
          * Insert the AoS tmp value channels into the SoA result vectors at
525
          * position = 'index'.
526
          */
527
         for (chan = 0; chan < 4; ++chan) {
528
            LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
529
            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
530
            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
531
                                                    tmp_chan, index, "");
532
         }
533
      }
534
   }
535
}