Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28.  
  29. #include "pipe/p_defines.h"
  30.  
  31. #include "util/u_format.h"
  32. #include "util/u_memory.h"
  33. #include "util/u_string.h"
  34.  
  35. #include "lp_bld_type.h"
  36. #include "lp_bld_const.h"
  37. #include "lp_bld_conv.h"
  38. #include "lp_bld_swizzle.h"
  39. #include "lp_bld_gather.h"
  40. #include "lp_bld_debug.h"
  41. #include "lp_bld_format.h"
  42. #include "lp_bld_arit.h"
  43.  
  44.  
  45. void
  46. lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  47.                             struct lp_build_context *bld,
  48.                             const LLVMValueRef *unswizzled,
  49.                             LLVMValueRef swizzled_out[4])
  50. {
  51.    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  52.    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  53.  
  54.    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  55.       enum util_format_swizzle swizzle;
  56.       LLVMValueRef depth_or_stencil;
  57.  
  58.       if (util_format_has_stencil(format_desc) &&
  59.           !util_format_has_depth(format_desc)) {
  60.          assert(!bld->type.floating);
  61.          swizzle = format_desc->swizzle[1];
  62.       }
  63.       else {
  64.          assert(bld->type.floating);
  65.          swizzle = format_desc->swizzle[0];
  66.       }
  67.       /*
  68.        * Return zzz1 or sss1 for depth-stencil formats here.
  69.        * Correct swizzling will be handled by apply_sampler_swizzle() later.
  70.        */
  71.       depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  72.  
  73.       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
  74.       swizzled_out[3] = bld->one;
  75.    }
  76.    else {
  77.       unsigned chan;
  78.       for (chan = 0; chan < 4; ++chan) {
  79.          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  80.          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  81.       }
  82.    }
  83. }
  84.  
  85.  
  86. /**
  87.  * Unpack several pixels in SoA.
  88.  *
  89.  * It takes a vector of packed pixels:
  90.  *
  91.  *   packed = {P0, P1, P2, P3, ..., Pn}
  92.  *
  93.  * And will produce four vectors:
  94.  *
  95.  *   red    = {R0, R1, R2, R3, ..., Rn}
  96.  *   green  = {G0, G1, G2, G3, ..., Gn}
  97.  *   blue   = {B0, B1, B2, B3, ..., Bn}
  98.  *   alpha  = {A0, A1, A2, A3, ..., An}
  99.  *
  100.  * It requires that a packed pixel fits into an element of the output
  101.  * channels. The common case is when converting pixel with a depth of 32 bit or
  102.  * less into floats.
  103.  *
  104.  * \param format_desc  the format of the 'packed' incoming pixel vector
  105.  * \param type  the desired type for rgba_out (type.length = n, above)
  106.  * \param packed  the incoming vector of packed pixels
  107.  * \param rgba_out  returns the SoA R,G,B,A vectors
  108.  */
  109. void
  110. lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
  111.                          const struct util_format_description *format_desc,
  112.                          struct lp_type type,
  113.                          LLVMValueRef packed,
  114.                          LLVMValueRef rgba_out[4])
  115. {
  116.    LLVMBuilderRef builder = gallivm->builder;
  117.    struct lp_build_context bld;
  118.    LLVMValueRef inputs[4];
  119.    unsigned chan;
  120.  
  121.    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  122.    assert(format_desc->block.width == 1);
  123.    assert(format_desc->block.height == 1);
  124.    assert(format_desc->block.bits <= type.width);
  125.    /* FIXME: Support more output types */
  126.    assert(type.width == 32);
  127.  
  128.    lp_build_context_init(&bld, gallivm, type);
  129.  
  130.    /* Decode the input vector components */
  131.    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
  132.       const unsigned width = format_desc->channel[chan].size;
  133.       const unsigned start = format_desc->channel[chan].shift;
  134.       const unsigned stop = start + width;
  135.       LLVMValueRef input;
  136.  
  137.       input = packed;
  138.  
  139.       switch(format_desc->channel[chan].type) {
  140.       case UTIL_FORMAT_TYPE_VOID:
  141.          input = lp_build_undef(gallivm, type);
  142.          break;
  143.  
  144.       case UTIL_FORMAT_TYPE_UNSIGNED:
  145.          /*
  146.           * Align the LSB
  147.           */
  148.  
  149.          if (start) {
  150.             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
  151.          }
  152.  
  153.          /*
  154.           * Zero the MSBs
  155.           */
  156.  
  157.          if (stop < format_desc->block.bits) {
  158.             unsigned mask = ((unsigned long long)1 << width) - 1;
  159.             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
  160.          }
  161.  
  162.          /*
  163.           * Type conversion
  164.           */
  165.  
  166.          if (type.floating) {
  167.             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
  168.                if (format_desc->swizzle[3] == chan) {
  169.                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
  170.                }
  171.                else {
  172.                   struct lp_type conv_type = lp_uint_type(type);
  173.                   input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
  174.                }
  175.             }
  176.             else {
  177.                if(format_desc->channel[chan].normalized)
  178.                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
  179.                else
  180.                   input = LLVMBuildSIToFP(builder, input,
  181.                                           lp_build_vec_type(gallivm, type), "");
  182.             }
  183.          }
  184.          else if (format_desc->channel[chan].pure_integer) {
  185.             /* Nothing to do */
  186.          } else {
  187.              /* FIXME */
  188.              assert(0);
  189.          }
  190.  
  191.          break;
  192.  
  193.       case UTIL_FORMAT_TYPE_SIGNED:
  194.          /*
  195.           * Align the sign bit first.
  196.           */
  197.  
  198.          if (stop < type.width) {
  199.             unsigned bits = type.width - stop;
  200.             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
  201.             input = LLVMBuildShl(builder, input, bits_val, "");
  202.          }
  203.  
  204.          /*
  205.           * Align the LSB (with an arithmetic shift to preserve the sign)
  206.           */
  207.  
  208.          if (format_desc->channel[chan].size < type.width) {
  209.             unsigned bits = type.width - format_desc->channel[chan].size;
  210.             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
  211.             input = LLVMBuildAShr(builder, input, bits_val, "");
  212.          }
  213.  
  214.          /*
  215.           * Type conversion
  216.           */
  217.  
  218.          if (type.floating) {
  219.             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
  220.             if (format_desc->channel[chan].normalized) {
  221.                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
  222.                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
  223.                input = LLVMBuildFMul(builder, input, scale_val, "");
  224.                /* the formula above will produce value below -1.0 for most negative
  225.                 * value but everything seems happy with that hence disable for now */
  226.                if (0)
  227.                   input = lp_build_max(&bld, input,
  228.                                        lp_build_const_vec(gallivm, type, -1.0f));
  229.             }
  230.          }
  231.          else if (format_desc->channel[chan].pure_integer) {
  232.             /* Nothing to do */
  233.          } else {
  234.              /* FIXME */
  235.              assert(0);
  236.          }
  237.  
  238.          break;
  239.  
  240.       case UTIL_FORMAT_TYPE_FLOAT:
  241.          if (type.floating) {
  242.             assert(start == 0);
  243.             assert(stop == 32);
  244.             assert(type.width == 32);
  245.             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
  246.          }
  247.          else {
  248.             /* FIXME */
  249.             assert(0);
  250.             input = lp_build_undef(gallivm, type);
  251.          }
  252.          break;
  253.  
  254.       case UTIL_FORMAT_TYPE_FIXED:
  255.          if (type.floating) {
  256.             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
  257.             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
  258.             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
  259.             input = LLVMBuildFMul(builder, input, scale_val, "");
  260.          }
  261.          else {
  262.             /* FIXME */
  263.             assert(0);
  264.             input = lp_build_undef(gallivm, type);
  265.          }
  266.          break;
  267.  
  268.       default:
  269.          assert(0);
  270.          input = lp_build_undef(gallivm, type);
  271.          break;
  272.       }
  273.  
  274.       inputs[chan] = input;
  275.    }
  276.  
  277.    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
  278. }
  279.  
  280.  
  281. /**
  282.  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
  283.  *
  284.  * \param dst_type  The desired return type. For pure integer formats
  285.  *                  this should be a 32bit wide int or uint vector type,
  286.  *                  otherwise a float vector type.
  287.  *
  288.  * \param packed    The rgba8 values to pack.
  289.  *
  290.  * \param rgba      The 4 SoA return vectors.
  291.  */
  292. void
  293. lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
  294.                            struct lp_type dst_type,
  295.                            LLVMValueRef packed,
  296.                            LLVMValueRef *rgba)
  297. {
  298.    LLVMBuilderRef builder = gallivm->builder;
  299.    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
  300.    unsigned chan;
  301.  
  302.    /* XXX technically shouldn't use that for uint dst_type */
  303.    packed = LLVMBuildBitCast(builder, packed,
  304.                              lp_build_int_vec_type(gallivm, dst_type), "");
  305.  
  306.    /* Decode the input vector components */
  307.    for (chan = 0; chan < 4; ++chan) {
  308. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  309.       unsigned start = chan*8;
  310. #else
  311.       unsigned start = (3-chan)*8;
  312. #endif
  313.       unsigned stop = start + 8;
  314.       LLVMValueRef input;
  315.  
  316.       input = packed;
  317.  
  318.       if (start)
  319.          input = LLVMBuildLShr(builder, input,
  320.                                lp_build_const_int_vec(gallivm, dst_type, start), "");
  321.  
  322.       if (stop < 32)
  323.          input = LLVMBuildAnd(builder, input, mask, "");
  324.  
  325.       if (dst_type.floating)
  326.          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
  327.  
  328.       rgba[chan] = input;
  329.    }
  330. }
  331.  
  332.  
  333.  
  334. /**
  335.  * Fetch a texels from a texture, returning them in SoA layout.
  336.  *
  337.  * \param type  the desired return type for 'rgba'.  The vector length
  338.  *              is the number of texels to fetch
  339.  *
  340.  * \param base_ptr  points to the base of the texture mip tree.
  341.  * \param offset    offset to start of the texture image block.  For non-
  342.  *                  compressed formats, this simply is an offset to the texel.
  343.  *                  For compressed formats, it is an offset to the start of the
  344.  *                  compressed data block.
  345.  *
  346.  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  347.  *              these will always be (0,0).  For compressed formats, i will
  348.  *              be in [0, block_width-1] and j will be in [0, block_height-1].
  349.  */
  350. void
  351. lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
  352.                         const struct util_format_description *format_desc,
  353.                         struct lp_type type,
  354.                         LLVMValueRef base_ptr,
  355.                         LLVMValueRef offset,
  356.                         LLVMValueRef i,
  357.                         LLVMValueRef j,
  358.                         LLVMValueRef rgba_out[4])
  359. {
  360.    LLVMBuilderRef builder = gallivm->builder;
  361.  
  362.    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
  363.        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
  364.         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
  365.         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
  366.        format_desc->block.width == 1 &&
  367.        format_desc->block.height == 1 &&
  368.        format_desc->block.bits <= type.width &&
  369.        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
  370.         format_desc->channel[0].size == 32))
  371.    {
  372.       /*
  373.        * The packed pixel fits into an element of the destination format. Put
  374.        * the packed pixels into a vector and extract each component for all
  375.        * vector elements in parallel.
  376.        */
  377.  
  378.       LLVMValueRef packed;
  379.  
  380.       /*
  381.        * gather the texels from the texture
  382.        * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
  383.        */
  384.       assert(format_desc->block.bits <= type.width);
  385.       packed = lp_build_gather(gallivm,
  386.                                type.length,
  387.                                format_desc->block.bits,
  388.                                type.width,
  389.                                TRUE,
  390.                                base_ptr, offset, FALSE);
  391.  
  392.       /*
  393.        * convert texels to float rgba
  394.        */
  395.       lp_build_unpack_rgba_soa(gallivm,
  396.                                format_desc,
  397.                                type,
  398.                                packed, rgba_out);
  399.       return;
  400.    }
  401.  
  402.    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
  403.        format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
  404.       /*
  405.        * similar conceptually to above but requiring special
  406.        * AoS packed -> SoA float conversion code.
  407.        */
  408.       LLVMValueRef packed;
  409.  
  410.       assert(type.floating);
  411.       assert(type.width == 32);
  412.  
  413.       packed = lp_build_gather(gallivm, type.length,
  414.                                format_desc->block.bits,
  415.                                type.width, TRUE,
  416.                                base_ptr, offset, FALSE);
  417.       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
  418.          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
  419.       }
  420.       else {
  421.          lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
  422.       }
  423.       return;
  424.    }
  425.  
  426.    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
  427.        format_desc->block.bits == 64) {
  428.       /*
  429.        * special case the format is 64 bits but we only require
  430.        * 32bit (or 8bit) from each block.
  431.        */
  432.       LLVMValueRef packed;
  433.  
  434.       if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
  435.          /*
  436.           * for stencil simply fix up offsets - could in fact change
  437.           * base_ptr instead even outside the shader.
  438.           */
  439.          unsigned mask = (1 << 8) - 1;
  440.          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
  441.          offset = LLVMBuildAdd(builder, offset, s_offset, "");
  442.          packed = lp_build_gather(gallivm, type.length, 32, type.width,
  443.                                   TRUE, base_ptr, offset, FALSE);
  444.          packed = LLVMBuildAnd(builder, packed,
  445.                                lp_build_const_int_vec(gallivm, type, mask), "");
  446.       }
  447.       else {
  448.          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
  449.          packed = lp_build_gather(gallivm, type.length, 32, type.width,
  450.                                   TRUE, base_ptr, offset, TRUE);
  451.          packed = LLVMBuildBitCast(builder, packed,
  452.                                    lp_build_vec_type(gallivm, type), "");
  453.       }
  454.       /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
  455.       rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
  456.       rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
  457.       return;
  458.    }
  459.  
  460.    /*
  461.     * Try calling lp_build_fetch_rgba_aos for all pixels.
  462.     */
  463.  
  464.    if (util_format_fits_8unorm(format_desc) &&
  465.        type.floating && type.width == 32 &&
  466.        (type.length == 1 || (type.length % 4 == 0))) {
  467.       struct lp_type tmp_type;
  468.       LLVMValueRef tmp;
  469.  
  470.       memset(&tmp_type, 0, sizeof tmp_type);
  471.       tmp_type.width = 8;
  472.       tmp_type.length = type.length * 4;
  473.       tmp_type.norm = TRUE;
  474.  
  475.       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
  476.                                     TRUE, base_ptr, offset, i, j);
  477.  
  478.       lp_build_rgba8_to_fi32_soa(gallivm,
  479.                                 type,
  480.                                 tmp,
  481.                                 rgba_out);
  482.  
  483.       return;
  484.    }
  485.  
  486.    /*
  487.     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
  488.     *
  489.     * This is not the most efficient way of fetching pixels, as we
  490.     * miss some opportunities to do vectorization, but this is
  491.     * convenient for formats or scenarios for which there was no
  492.     * opportunity or incentive to optimize.
  493.     */
  494.  
  495.    {
  496.       unsigned k, chan;
  497.       struct lp_type tmp_type;
  498.  
  499.       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  500.          debug_printf("%s: scalar unpacking of %s\n",
  501.                       __FUNCTION__, format_desc->short_name);
  502.       }
  503.  
  504.       tmp_type = type;
  505.       tmp_type.length = 4;
  506.  
  507.       for (chan = 0; chan < 4; ++chan) {
  508.          rgba_out[chan] = lp_build_undef(gallivm, type);
  509.       }
  510.  
  511.       /* loop over number of pixels */
  512.       for(k = 0; k < type.length; ++k) {
  513.          LLVMValueRef index = lp_build_const_int32(gallivm, k);
  514.          LLVMValueRef offset_elem;
  515.          LLVMValueRef i_elem, j_elem;
  516.          LLVMValueRef tmp;
  517.  
  518.          offset_elem = LLVMBuildExtractElement(builder, offset,
  519.                                                index, "");
  520.  
  521.          i_elem = LLVMBuildExtractElement(builder, i, index, "");
  522.          j_elem = LLVMBuildExtractElement(builder, j, index, "");
  523.  
  524.          /* Get a single float[4]={R,G,B,A} pixel */
  525.          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
  526.                                        TRUE, base_ptr, offset_elem,
  527.                                        i_elem, j_elem);
  528.  
  529.          /*
  530.           * Insert the AoS tmp value channels into the SoA result vectors at
  531.           * position = 'index'.
  532.           */
  533.          for (chan = 0; chan < 4; ++chan) {
  534.             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
  535.             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
  536.             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
  537.                                                     tmp_chan, index, "");
  538.          }
  539.       }
  540.    }
  541. }
  542.