Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28.  
  29. #include "pipe/p_defines.h"
  30.  
  31. #include "util/u_format.h"
  32. #include "util/u_memory.h"
  33. #include "util/u_string.h"
  34.  
  35. #include "lp_bld_type.h"
  36. #include "lp_bld_const.h"
  37. #include "lp_bld_conv.h"
  38. #include "lp_bld_swizzle.h"
  39. #include "lp_bld_gather.h"
  40. #include "lp_bld_debug.h"
  41. #include "lp_bld_format.h"
  42.  
  43.  
  44. void
  45. lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  46.                             struct lp_build_context *bld,
  47.                             const LLVMValueRef *unswizzled,
  48.                             LLVMValueRef swizzled_out[4])
  49. {
  50.    assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
  51.    assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
  52.  
  53.    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  54.       enum util_format_swizzle swizzle;
  55.       LLVMValueRef depth_or_stencil;
  56.  
  57.       if (util_format_has_stencil(format_desc) &&
  58.           !util_format_has_depth(format_desc)) {
  59.          assert(!bld->type.floating);
  60.          swizzle = format_desc->swizzle[1];
  61.       }
  62.       else {
  63.          assert(bld->type.floating);
  64.          swizzle = format_desc->swizzle[0];
  65.       }
  66.       /*
  67.        * Return zzz1 or sss1 for depth-stencil formats here.
  68.        * Correct swizzling will be handled by apply_sampler_swizzle() later.
  69.        */
  70.       depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  71.  
  72.       swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
  73.       swizzled_out[3] = bld->one;
  74.    }
  75.    else {
  76.       unsigned chan;
  77.       for (chan = 0; chan < 4; ++chan) {
  78.          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
  79.          swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
  80.       }
  81.    }
  82. }
  83.  
  84.  
  85. /**
  86.  * Unpack several pixels in SoA.
  87.  *
  88.  * It takes a vector of packed pixels:
  89.  *
  90.  *   packed = {P0, P1, P2, P3, ..., Pn}
  91.  *
  92.  * And will produce four vectors:
  93.  *
  94.  *   red    = {R0, R1, R2, R3, ..., Rn}
  95.  *   green  = {G0, G1, G2, G3, ..., Gn}
  96.  *   blue   = {B0, B1, B2, B3, ..., Bn}
  97.  *   alpha  = {A0, A1, A2, A3, ..., An}
  98.  *
  99.  * It requires that a packed pixel fits into an element of the output
  100.  * channels. The common case is when converting pixel with a depth of 32 bit or
  101.  * less into floats.
  102.  *
  103.  * \param format_desc  the format of the 'packed' incoming pixel vector
  104.  * \param type  the desired type for rgba_out (type.length = n, above)
  105.  * \param packed  the incoming vector of packed pixels
  106.  * \param rgba_out  returns the SoA R,G,B,A vectors
  107.  */
  108. void
  109. lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
  110.                          const struct util_format_description *format_desc,
  111.                          struct lp_type type,
  112.                          LLVMValueRef packed,
  113.                          LLVMValueRef rgba_out[4])
  114. {
  115.    LLVMBuilderRef builder = gallivm->builder;
  116.    struct lp_build_context bld;
  117.    LLVMValueRef inputs[4];
  118.    unsigned chan;
  119.  
  120.    assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  121.    assert(format_desc->block.width == 1);
  122.    assert(format_desc->block.height == 1);
  123.    assert(format_desc->block.bits <= type.width);
  124.    /* FIXME: Support more output types */
  125.    assert(type.width == 32);
  126.  
  127.    lp_build_context_init(&bld, gallivm, type);
  128.  
  129.    /* Decode the input vector components */
  130.    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
  131.       const unsigned width = format_desc->channel[chan].size;
  132.       const unsigned start = format_desc->channel[chan].shift;
  133.       const unsigned stop = start + width;
  134.       LLVMValueRef input;
  135.  
  136.       input = packed;
  137.  
  138.       switch(format_desc->channel[chan].type) {
  139.       case UTIL_FORMAT_TYPE_VOID:
  140.          input = lp_build_undef(gallivm, type);
  141.          break;
  142.  
  143.       case UTIL_FORMAT_TYPE_UNSIGNED:
  144.          /*
  145.           * Align the LSB
  146.           */
  147.  
  148.          if (start) {
  149.             input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
  150.          }
  151.  
  152.          /*
  153.           * Zero the MSBs
  154.           */
  155.  
  156.          if (stop < format_desc->block.bits) {
  157.             unsigned mask = ((unsigned long long)1 << width) - 1;
  158.             input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
  159.          }
  160.  
  161.          /*
  162.           * Type conversion
  163.           */
  164.  
  165.          if (type.floating) {
  166.             if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
  167.                assert(width == 8);
  168.                if (format_desc->swizzle[3] == chan) {
  169.                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
  170.                }
  171.                else {
  172.                   struct lp_type conv_type = lp_uint_type(type);
  173.                   input = lp_build_srgb_to_linear(gallivm, conv_type, input);
  174.                }
  175.             }
  176.             else {
  177.                if(format_desc->channel[chan].normalized)
  178.                   input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
  179.                else
  180.                   input = LLVMBuildSIToFP(builder, input,
  181.                                           lp_build_vec_type(gallivm, type), "");
  182.             }
  183.          }
  184.          else if (format_desc->channel[chan].pure_integer) {
  185.             /* Nothing to do */
  186.          } else {
  187.              /* FIXME */
  188.              assert(0);
  189.          }
  190.  
  191.          break;
  192.  
  193.       case UTIL_FORMAT_TYPE_SIGNED:
  194.          /*
  195.           * Align the sign bit first.
  196.           */
  197.  
  198.          if (stop < type.width) {
  199.             unsigned bits = type.width - stop;
  200.             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
  201.             input = LLVMBuildShl(builder, input, bits_val, "");
  202.          }
  203.  
  204.          /*
  205.           * Align the LSB (with an arithmetic shift to preserve the sign)
  206.           */
  207.  
  208.          if (format_desc->channel[chan].size < type.width) {
  209.             unsigned bits = type.width - format_desc->channel[chan].size;
  210.             LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
  211.             input = LLVMBuildAShr(builder, input, bits_val, "");
  212.          }
  213.  
  214.          /*
  215.           * Type conversion
  216.           */
  217.  
  218.          if (type.floating) {
  219.             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
  220.             if (format_desc->channel[chan].normalized) {
  221.                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
  222.                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
  223.                input = LLVMBuildFMul(builder, input, scale_val, "");
  224.             }
  225.          }
  226.          else if (format_desc->channel[chan].pure_integer) {
  227.             /* Nothing to do */
  228.          } else {
  229.              /* FIXME */
  230.              assert(0);
  231.          }
  232.  
  233.          break;
  234.  
  235.       case UTIL_FORMAT_TYPE_FLOAT:
  236.          if (type.floating) {
  237.             assert(start == 0);
  238.             assert(stop == 32);
  239.             assert(type.width == 32);
  240.             input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
  241.          }
  242.          else {
  243.             /* FIXME */
  244.             assert(0);
  245.             input = lp_build_undef(gallivm, type);
  246.          }
  247.          break;
  248.  
  249.       case UTIL_FORMAT_TYPE_FIXED:
  250.          if (type.floating) {
  251.             double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
  252.             LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
  253.             input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
  254.             input = LLVMBuildFMul(builder, input, scale_val, "");
  255.          }
  256.          else {
  257.             /* FIXME */
  258.             assert(0);
  259.             input = lp_build_undef(gallivm, type);
  260.          }
  261.          break;
  262.  
  263.       default:
  264.          assert(0);
  265.          input = lp_build_undef(gallivm, type);
  266.          break;
  267.       }
  268.  
  269.       inputs[chan] = input;
  270.    }
  271.  
  272.    lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
  273. }
  274.  
  275.  
  276. /**
  277.  * Convert a vector of rgba8 values into 32bit wide SoA vectors.
  278.  *
  279.  * \param dst_type  The desired return type. For pure integer formats
  280.  *                  this should be a 32bit wide int or uint vector type,
  281.  *                  otherwise a float vector type.
  282.  *
  283.  * \param packed    The rgba8 values to pack.
  284.  *
  285.  * \param rgba      The 4 SoA return vectors.
  286.  */
  287. void
  288. lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
  289.                            struct lp_type dst_type,
  290.                            LLVMValueRef packed,
  291.                            LLVMValueRef *rgba)
  292. {
  293.    LLVMBuilderRef builder = gallivm->builder;
  294.    LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
  295.    unsigned chan;
  296.  
  297.    /* XXX technically shouldn't use that for uint dst_type */
  298.    packed = LLVMBuildBitCast(builder, packed,
  299.                              lp_build_int_vec_type(gallivm, dst_type), "");
  300.  
  301.    /* Decode the input vector components */
  302.    for (chan = 0; chan < 4; ++chan) {
  303. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  304.       unsigned start = chan*8;
  305. #else
  306.       unsigned start = (3-chan)*8;
  307. #endif
  308.       unsigned stop = start + 8;
  309.       LLVMValueRef input;
  310.  
  311.       input = packed;
  312.  
  313.       if (start)
  314.          input = LLVMBuildLShr(builder, input,
  315.                                lp_build_const_int_vec(gallivm, dst_type, start), "");
  316.  
  317.       if (stop < 32)
  318.          input = LLVMBuildAnd(builder, input, mask, "");
  319.  
  320.       if (dst_type.floating)
  321.          input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
  322.  
  323.       rgba[chan] = input;
  324.    }
  325. }
  326.  
  327.  
  328.  
  329. /**
  330.  * Fetch a texels from a texture, returning them in SoA layout.
  331.  *
  332.  * \param type  the desired return type for 'rgba'.  The vector length
  333.  *              is the number of texels to fetch
  334.  *
  335.  * \param base_ptr  points to the base of the texture mip tree.
  336.  * \param offset    offset to start of the texture image block.  For non-
  337.  *                  compressed formats, this simply is an offset to the texel.
  338.  *                  For compressed formats, it is an offset to the start of the
  339.  *                  compressed data block.
  340.  *
  341.  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  342.  *              these will always be (0,0).  For compressed formats, i will
  343.  *              be in [0, block_width-1] and j will be in [0, block_height-1].
  344.  */
  345. void
  346. lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
  347.                         const struct util_format_description *format_desc,
  348.                         struct lp_type type,
  349.                         LLVMValueRef base_ptr,
  350.                         LLVMValueRef offset,
  351.                         LLVMValueRef i,
  352.                         LLVMValueRef j,
  353.                         LLVMValueRef rgba_out[4])
  354. {
  355.    LLVMBuilderRef builder = gallivm->builder;
  356.  
  357.    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
  358.        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
  359.         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
  360.         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
  361.        format_desc->block.width == 1 &&
  362.        format_desc->block.height == 1 &&
  363.        format_desc->block.bits <= type.width &&
  364.        (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
  365.         format_desc->channel[0].size == 32))
  366.    {
  367.       /*
  368.        * The packed pixel fits into an element of the destination format. Put
  369.        * the packed pixels into a vector and extract each component for all
  370.        * vector elements in parallel.
  371.        */
  372.  
  373.       LLVMValueRef packed;
  374.  
  375.       /*
  376.        * gather the texels from the texture
  377.        * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
  378.        */
  379.       assert(format_desc->block.bits <= type.width);
  380.       packed = lp_build_gather(gallivm,
  381.                                type.length,
  382.                                format_desc->block.bits,
  383.                                type.width,
  384.                                base_ptr, offset, FALSE);
  385.  
  386.       /*
  387.        * convert texels to float rgba
  388.        */
  389.       lp_build_unpack_rgba_soa(gallivm,
  390.                                format_desc,
  391.                                type,
  392.                                packed, rgba_out);
  393.       return;
  394.    }
  395.  
  396.    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
  397.        format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
  398.       /*
  399.        * similar conceptually to above but requiring special
  400.        * AoS packed -> SoA float conversion code.
  401.        */
  402.       LLVMValueRef packed;
  403.  
  404.       assert(type.floating);
  405.       assert(type.width == 32);
  406.  
  407.       packed = lp_build_gather(gallivm, type.length,
  408.                                format_desc->block.bits,
  409.                                type.width, base_ptr, offset,
  410.                                FALSE);
  411.       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
  412.          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
  413.       }
  414.       else {
  415.          lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
  416.       }
  417.       return;
  418.    }
  419.  
  420.    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
  421.        format_desc->block.bits == 64) {
  422.       /*
  423.        * special case the format is 64 bits but we only require
  424.        * 32bit (or 8bit) from each block.
  425.        */
  426.       LLVMValueRef packed;
  427.  
  428.       if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
  429.          /*
  430.           * for stencil simply fix up offsets - could in fact change
  431.           * base_ptr instead even outside the shader.
  432.           */
  433.          unsigned mask = (1 << 8) - 1;
  434.          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
  435.          offset = LLVMBuildAdd(builder, offset, s_offset, "");
  436.          packed = lp_build_gather(gallivm, type.length,
  437.                                   32, type.width, base_ptr, offset, FALSE);
  438.          packed = LLVMBuildAnd(builder, packed,
  439.                                lp_build_const_int_vec(gallivm, type, mask), "");
  440.       }
  441.       else {
  442.          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
  443.          packed = lp_build_gather(gallivm, type.length,
  444.                                   32, type.width, base_ptr, offset, TRUE);
  445.          packed = LLVMBuildBitCast(builder, packed,
  446.                                    lp_build_vec_type(gallivm, type), "");
  447.       }
  448.       /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
  449.       rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
  450.       rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
  451.       return;
  452.    }
  453.  
  454.    /*
  455.     * Try calling lp_build_fetch_rgba_aos for all pixels.
  456.     */
  457.  
  458.    if (util_format_fits_8unorm(format_desc) &&
  459.        type.floating && type.width == 32 &&
  460.        (type.length == 1 || (type.length % 4 == 0))) {
  461.       struct lp_type tmp_type;
  462.       LLVMValueRef tmp;
  463.  
  464.       memset(&tmp_type, 0, sizeof tmp_type);
  465.       tmp_type.width = 8;
  466.       tmp_type.length = type.length * 4;
  467.       tmp_type.norm = TRUE;
  468.  
  469.       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
  470.                                     base_ptr, offset, i, j);
  471.  
  472.       lp_build_rgba8_to_fi32_soa(gallivm,
  473.                                 type,
  474.                                 tmp,
  475.                                 rgba_out);
  476.  
  477.       return;
  478.    }
  479.  
  480.    /*
  481.     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
  482.     *
  483.     * This is not the most efficient way of fetching pixels, as we
  484.     * miss some opportunities to do vectorization, but this is
  485.     * convenient for formats or scenarios for which there was no
  486.     * opportunity or incentive to optimize.
  487.     */
  488.  
  489.    {
  490.       unsigned k, chan;
  491.       struct lp_type tmp_type;
  492.  
  493.       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  494.          debug_printf("%s: scalar unpacking of %s\n",
  495.                       __FUNCTION__, format_desc->short_name);
  496.       }
  497.  
  498.       tmp_type = type;
  499.       tmp_type.length = 4;
  500.  
  501.       for (chan = 0; chan < 4; ++chan) {
  502.          rgba_out[chan] = lp_build_undef(gallivm, type);
  503.       }
  504.  
  505.       /* loop over number of pixels */
  506.       for(k = 0; k < type.length; ++k) {
  507.          LLVMValueRef index = lp_build_const_int32(gallivm, k);
  508.          LLVMValueRef offset_elem;
  509.          LLVMValueRef i_elem, j_elem;
  510.          LLVMValueRef tmp;
  511.  
  512.          offset_elem = LLVMBuildExtractElement(builder, offset,
  513.                                                index, "");
  514.  
  515.          i_elem = LLVMBuildExtractElement(builder, i, index, "");
  516.          j_elem = LLVMBuildExtractElement(builder, j, index, "");
  517.  
  518.          /* Get a single float[4]={R,G,B,A} pixel */
  519.          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
  520.                                        base_ptr, offset_elem,
  521.                                        i_elem, j_elem);
  522.  
  523.          /*
  524.           * Insert the AoS tmp value channels into the SoA result vectors at
  525.           * position = 'index'.
  526.           */
  527.          for (chan = 0; chan < 4; ++chan) {
  528.             LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
  529.             tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
  530.             rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
  531.                                                     tmp_chan, index, "");
  532.          }
  533.       }
  534.    }
  535. }
  536.