Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * @file
  30.  * AoS pixel format manipulation.
  31.  *
  32.  * @author Jose Fonseca <jfonseca@vmware.com>
  33.  */
  34.  
  35.  
  36. #include "util/u_format.h"
  37. #include "util/u_memory.h"
  38. #include "util/u_math.h"
  39. #include "util/u_pointer.h"
  40. #include "util/u_string.h"
  41.  
  42. #include "lp_bld_arit.h"
  43. #include "lp_bld_init.h"
  44. #include "lp_bld_type.h"
  45. #include "lp_bld_flow.h"
  46. #include "lp_bld_const.h"
  47. #include "lp_bld_conv.h"
  48. #include "lp_bld_swizzle.h"
  49. #include "lp_bld_gather.h"
  50. #include "lp_bld_debug.h"
  51. #include "lp_bld_format.h"
  52. #include "lp_bld_intr.h"
  53.  
  54.  
  55. /**
  56.  * Basic swizzling.  Rearrange the order of the unswizzled array elements
  57.  * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
  58.  * too.
  59.  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
  60.  */
  61. LLVMValueRef
  62. lp_build_format_swizzle_aos(const struct util_format_description *desc,
  63.                             struct lp_build_context *bld,
  64.                             LLVMValueRef unswizzled)
  65. {
  66.    unsigned char swizzles[4];
  67.    unsigned chan;
  68.  
  69.    assert(bld->type.length % 4 == 0);
  70.  
  71.    for (chan = 0; chan < 4; ++chan) {
  72.       enum util_format_swizzle swizzle;
  73.  
  74.       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  75.          /*
  76.           * For ZS formats do RGBA = ZZZ1
  77.           */
  78.          if (chan == 3) {
  79.             swizzle = UTIL_FORMAT_SWIZZLE_1;
  80.          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
  81.             swizzle = UTIL_FORMAT_SWIZZLE_0;
  82.          } else {
  83.             swizzle = desc->swizzle[0];
  84.          }
  85.       } else {
  86.          swizzle = desc->swizzle[chan];
  87.       }
  88.       swizzles[chan] = swizzle;
  89.    }
  90.  
  91.    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
  92. }
  93.  
  94.  
  95. /**
  96.  * Whether the format matches the vector type, apart of swizzles.
  97.  */
  98. static INLINE boolean
  99. format_matches_type(const struct util_format_description *desc,
  100.                     struct lp_type type)
  101. {
  102.    enum util_format_type chan_type;
  103.    unsigned chan;
  104.  
  105.    assert(type.length % 4 == 0);
  106.  
  107.    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
  108.        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
  109.        desc->block.width != 1 ||
  110.        desc->block.height != 1) {
  111.       return FALSE;
  112.    }
  113.  
  114.    if (type.floating) {
  115.       chan_type = UTIL_FORMAT_TYPE_FLOAT;
  116.    } else if (type.fixed) {
  117.       chan_type = UTIL_FORMAT_TYPE_FIXED;
  118.    } else if (type.sign) {
  119.       chan_type = UTIL_FORMAT_TYPE_SIGNED;
  120.    } else {
  121.       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
  122.    }
  123.  
  124.    for (chan = 0; chan < desc->nr_channels; ++chan) {
  125.       if (desc->channel[chan].size != type.width) {
  126.          return FALSE;
  127.       }
  128.  
  129.       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
  130.          if (desc->channel[chan].type != chan_type ||
  131.              desc->channel[chan].normalized != type.norm) {
  132.             return FALSE;
  133.          }
  134.       }
  135.    }
  136.  
  137.    return TRUE;
  138. }
  139.  
  140.  
  141. /**
  142.  * Unpack a single pixel into its XYZW components.
  143.  *
  144.  * @param desc  the pixel format for the packed pixel value
  145.  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
  146.  *
  147.  * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
  148.  */
  149. static INLINE LLVMValueRef
  150. lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
  151.                                const struct util_format_description *desc,
  152.                                LLVMValueRef packed)
  153. {
  154.    LLVMBuilderRef builder = gallivm->builder;
  155.    LLVMValueRef shifted, casted, scaled, masked;
  156.    LLVMValueRef shifts[4];
  157.    LLVMValueRef masks[4];
  158.    LLVMValueRef scales[4];
  159.  
  160.    boolean normalized;
  161.    boolean needs_uitofp;
  162.    unsigned i;
  163.  
  164.    /* TODO: Support more formats */
  165.    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  166.    assert(desc->block.width == 1);
  167.    assert(desc->block.height == 1);
  168.    assert(desc->block.bits <= 32);
  169.  
  170.    /* Do the intermediate integer computations with 32bit integers since it
  171.     * matches floating point size */
  172.    assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
  173.  
  174.    /* Broadcast the packed value to all four channels
  175.     * before: packed = BGRA
  176.     * after: packed = {BGRA, BGRA, BGRA, BGRA}
  177.     */
  178.    packed = LLVMBuildInsertElement(builder,
  179.                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
  180.                                    packed,
  181.                                    LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
  182.                                    "");
  183.    packed = LLVMBuildShuffleVector(builder,
  184.                                    packed,
  185.                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
  186.                                    LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
  187.                                    "");
  188.  
  189.    /* Initialize vector constants */
  190.    normalized = FALSE;
  191.    needs_uitofp = FALSE;
  192.  
  193.    /* Loop over 4 color components */
  194.    for (i = 0; i < 4; ++i) {
  195.       unsigned bits = desc->channel[i].size;
  196.       unsigned shift = desc->channel[i].shift;
  197.  
  198.       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  199.          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  200.          masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
  201.          scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
  202.       }
  203.       else {
  204.          unsigned long long mask = (1ULL << bits) - 1;
  205.  
  206.          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  207.  
  208.          if (bits == 32) {
  209.             needs_uitofp = TRUE;
  210.          }
  211.  
  212.          shifts[i] = lp_build_const_int32(gallivm, shift);
  213.          masks[i] = lp_build_const_int32(gallivm, mask);
  214.  
  215.          if (desc->channel[i].normalized) {
  216.             scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
  217.             normalized = TRUE;
  218.          }
  219.          else
  220.             scales[i] =  lp_build_const_float(gallivm, 1.0);
  221.       }
  222.    }
  223.  
  224.    /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW}
  225.     * into masked = {X, Y, Z, W}
  226.     */
  227.    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
  228.    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
  229.  
  230.    if (!needs_uitofp) {
  231.       /* UIToFP can't be expressed in SSE2 */
  232.       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
  233.    } else {
  234.       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
  235.    }
  236.  
  237.    /* At this point 'casted' may be a vector of floats such as
  238.     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
  239.     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
  240.     */
  241.  
  242.    if (normalized)
  243.       scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
  244.    else
  245.       scaled = casted;
  246.  
  247.    return scaled;
  248. }
  249.  
  250.  
  251. /**
  252.  * Pack a single pixel.
  253.  *
  254.  * @param rgba 4 float vector with the unpacked components.
  255.  *
  256.  * XXX: This is mostly for reference and testing -- operating a single pixel at
  257.  * a time is rarely if ever needed.
  258.  */
  259. LLVMValueRef
  260. lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
  261.                        const struct util_format_description *desc,
  262.                        LLVMValueRef rgba)
  263. {
  264.    LLVMBuilderRef builder = gallivm->builder;
  265.    LLVMTypeRef type;
  266.    LLVMValueRef packed = NULL;
  267.    LLVMValueRef swizzles[4];
  268.    LLVMValueRef shifted, casted, scaled, unswizzled;
  269.    LLVMValueRef shifts[4];
  270.    LLVMValueRef scales[4];
  271.    boolean normalized;
  272.    unsigned i, j;
  273.  
  274.    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  275.    assert(desc->block.width == 1);
  276.    assert(desc->block.height == 1);
  277.  
  278.    type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
  279.  
  280.    /* Unswizzle the color components into the source vector. */
  281.    for (i = 0; i < 4; ++i) {
  282.       for (j = 0; j < 4; ++j) {
  283.          if (desc->swizzle[j] == i)
  284.             break;
  285.       }
  286.       if (j < 4)
  287.          swizzles[i] = lp_build_const_int32(gallivm, j);
  288.       else
  289.          swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  290.    }
  291.  
  292.    unswizzled = LLVMBuildShuffleVector(builder, rgba,
  293.                                        LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
  294.                                        LLVMConstVector(swizzles, 4), "");
  295.  
  296.    normalized = FALSE;
  297.    for (i = 0; i < 4; ++i) {
  298.       unsigned bits = desc->channel[i].size;
  299.       unsigned shift = desc->channel[i].shift;
  300.  
  301.       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  302.          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  303.          scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
  304.       }
  305.       else {
  306.          unsigned mask = (1 << bits) - 1;
  307.  
  308.          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  309.          assert(bits < 32);
  310.  
  311.          shifts[i] = lp_build_const_int32(gallivm, shift);
  312.  
  313.          if (desc->channel[i].normalized) {
  314.             scales[i] = lp_build_const_float(gallivm, mask);
  315.             normalized = TRUE;
  316.          }
  317.          else
  318.             scales[i] = lp_build_const_float(gallivm, 1.0);
  319.       }
  320.    }
  321.  
  322.    if (normalized)
  323.       scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
  324.    else
  325.       scaled = unswizzled;
  326.  
  327.    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
  328.  
  329.    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
  330.    
  331.    /* Bitwise or all components */
  332.    for (i = 0; i < 4; ++i) {
  333.       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
  334.          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
  335.                                                lp_build_const_int32(gallivm, i), "");
  336.          if (packed)
  337.             packed = LLVMBuildOr(builder, packed, component, "");
  338.          else
  339.             packed = component;
  340.       }
  341.    }
  342.  
  343.    if (!packed)
  344.       packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  345.  
  346.    if (desc->block.bits < 32)
  347.       packed = LLVMBuildTrunc(builder, packed, type, "");
  348.  
  349.    return packed;
  350. }
  351.  
  352.  
  353.  
  354.  
  355. /**
  356.  * Fetch a pixel into a 4 float AoS.
  357.  *
  358.  * \param format_desc  describes format of the image we're fetching from
  359.  * \param aligned  whether the data is guaranteed to be aligned
  360.  * \param ptr  address of the pixel block (or the texel if uncompressed)
  361.  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  362.  *              these will always be (0, 0).
  363.  * \return  a 4 element vector with the pixel's RGBA values.
  364.  */
  365. LLVMValueRef
  366. lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
  367.                         const struct util_format_description *format_desc,
  368.                         struct lp_type type,
  369.                         boolean aligned,
  370.                         LLVMValueRef base_ptr,
  371.                         LLVMValueRef offset,
  372.                         LLVMValueRef i,
  373.                         LLVMValueRef j)
  374. {
  375.    LLVMBuilderRef builder = gallivm->builder;
  376.    unsigned num_pixels = type.length / 4;
  377.    struct lp_build_context bld;
  378.  
  379.    assert(type.length <= LP_MAX_VECTOR_LENGTH);
  380.    assert(type.length % 4 == 0);
  381.  
  382.    lp_build_context_init(&bld, gallivm, type);
  383.  
  384.    /*
  385.     * Trivial case
  386.     *
  387.     * The format matches the type (apart of a swizzle) so no need for
  388.     * scaling or converting.
  389.     */
  390.  
  391.    if (format_matches_type(format_desc, type) &&
  392.        format_desc->block.bits <= type.width * 4 &&
  393.        util_is_power_of_two(format_desc->block.bits)) {
  394.       LLVMValueRef packed;
  395.       LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
  396.       unsigned vec_len = type.width * type.length;
  397.  
  398.       /*
  399.        * The format matches the type (apart of a swizzle) so no need for
  400.        * scaling or converting.
  401.        */
  402.  
  403.       packed = lp_build_gather(gallivm, type.length/4,
  404.                                format_desc->block.bits, type.width*4,
  405.                                aligned, base_ptr, offset, TRUE);
  406.  
  407.       assert(format_desc->block.bits <= vec_len);
  408.  
  409.       packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
  410.       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
  411.    }
  412.  
  413.    /*
  414.     * Bit arithmetic
  415.     */
  416.  
  417.    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
  418.        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
  419.         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
  420.        format_desc->block.width == 1 &&
  421.        format_desc->block.height == 1 &&
  422.        util_is_power_of_two(format_desc->block.bits) &&
  423.        format_desc->block.bits <= 32 &&
  424.        format_desc->is_bitmask &&
  425.        !format_desc->is_mixed &&
  426.        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
  427.         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
  428.        !format_desc->channel[0].pure_integer) {
  429.  
  430.       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
  431.       LLVMValueRef res;
  432.       unsigned k;
  433.  
  434.       /*
  435.        * Unpack a pixel at a time into a <4 x float> RGBA vector
  436.        */
  437.  
  438.       for (k = 0; k < num_pixels; ++k) {
  439.          LLVMValueRef packed;
  440.  
  441.          packed = lp_build_gather_elem(gallivm, num_pixels,
  442.                                        format_desc->block.bits, 32, aligned,
  443.                                        base_ptr, offset, k, FALSE);
  444.  
  445.          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
  446.                                                   format_desc,
  447.                                                   packed);
  448.       }
  449.  
  450.       /*
  451.        * Type conversion.
  452.        *
  453.        * TODO: We could avoid floating conversion for integer to
  454.        * integer conversions.
  455.        */
  456.  
  457.       if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
  458.          debug_printf("%s: unpacking %s with floating point\n",
  459.                       __FUNCTION__, format_desc->short_name);
  460.       }
  461.  
  462.       lp_build_conv(gallivm,
  463.                     lp_float32_vec4_type(),
  464.                     type,
  465.                     tmps, num_pixels, &res, 1);
  466.  
  467.       return lp_build_format_swizzle_aos(format_desc, &bld, res);
  468.    }
  469.  
  470.    /* If all channels are of same type and we are not using half-floats */
  471.    if (format_desc->is_array &&
  472.        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {
  473.       assert(!format_desc->is_mixed);
  474.       return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
  475.    }
  476.  
  477.    /*
  478.     * YUV / subsampled formats
  479.     */
  480.  
  481.    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
  482.       struct lp_type tmp_type;
  483.       LLVMValueRef tmp;
  484.  
  485.       memset(&tmp_type, 0, sizeof tmp_type);
  486.       tmp_type.width = 8;
  487.       tmp_type.length = num_pixels * 4;
  488.       tmp_type.norm = TRUE;
  489.  
  490.       tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
  491.                                                format_desc,
  492.                                                num_pixels,
  493.                                                base_ptr,
  494.                                                offset,
  495.                                                i, j);
  496.  
  497.       lp_build_conv(gallivm,
  498.                     tmp_type, type,
  499.                     &tmp, 1, &tmp, 1);
  500.  
  501.       return tmp;
  502.    }
  503.  
  504.    /*
  505.     * Fallback to util_format_description::fetch_rgba_8unorm().
  506.     */
  507.  
  508.    if (format_desc->fetch_rgba_8unorm &&
  509.        !type.floating && type.width == 8 && !type.sign && type.norm) {
  510.       /*
  511.        * Fallback to calling util_format_description::fetch_rgba_8unorm.
  512.        *
  513.        * This is definitely not the most efficient way of fetching pixels, as
  514.        * we miss the opportunity to do vectorization, but this it is a
  515.        * convenient for formats or scenarios for which there was no opportunity
  516.        * or incentive to optimize.
  517.        */
  518.  
  519.       LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
  520.       LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
  521.       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  522.       LLVMValueRef function;
  523.       LLVMValueRef tmp_ptr;
  524.       LLVMValueRef tmp;
  525.       LLVMValueRef res;
  526.       unsigned k;
  527.  
  528.       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  529.          debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
  530.                       __FUNCTION__, format_desc->short_name);
  531.       }
  532.  
  533.       /*
  534.        * Declare and bind format_desc->fetch_rgba_8unorm().
  535.        */
  536.  
  537.       {
  538.          /*
  539.           * Function to call looks like:
  540.           *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
  541.           */
  542.          LLVMTypeRef ret_type;
  543.          LLVMTypeRef arg_types[4];
  544.          LLVMTypeRef function_type;
  545.  
  546.          ret_type = LLVMVoidTypeInContext(gallivm->context);
  547.          arg_types[0] = pi8t;
  548.          arg_types[1] = pi8t;
  549.          arg_types[2] = i32t;
  550.          arg_types[3] = i32t;
  551.          function_type = LLVMFunctionType(ret_type, arg_types,
  552.                                           Elements(arg_types), 0);
  553.  
  554.          /* make const pointer for the C fetch_rgba_8unorm function */
  555.          function = lp_build_const_int_pointer(gallivm,
  556.             func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
  557.  
  558.          /* cast the callee pointer to the function's type */
  559.          function = LLVMBuildBitCast(builder, function,
  560.                                      LLVMPointerType(function_type, 0),
  561.                                      "cast callee");
  562.       }
  563.  
  564.       tmp_ptr = lp_build_alloca(gallivm, i32t, "");
  565.  
  566.       res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
  567.  
  568.       /*
  569.        * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
  570.        * in the SoA vectors.
  571.        */
  572.  
  573.       for (k = 0; k < num_pixels; ++k) {
  574.          LLVMValueRef index = lp_build_const_int32(gallivm, k);
  575.          LLVMValueRef args[4];
  576.  
  577.          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
  578.          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
  579.                                             base_ptr, offset, k);
  580.  
  581.          if (num_pixels == 1) {
  582.             args[2] = i;
  583.             args[3] = j;
  584.          }
  585.          else {
  586.             args[2] = LLVMBuildExtractElement(builder, i, index, "");
  587.             args[3] = LLVMBuildExtractElement(builder, j, index, "");
  588.          }
  589.  
  590.          LLVMBuildCall(builder, function, args, Elements(args), "");
  591.  
  592.          tmp = LLVMBuildLoad(builder, tmp_ptr, "");
  593.  
  594.          if (num_pixels == 1) {
  595.             res = tmp;
  596.          }
  597.          else {
  598.             res = LLVMBuildInsertElement(builder, res, tmp, index, "");
  599.          }
  600.       }
  601.  
  602.       /* Bitcast from <n x i32> to <4n x i8> */
  603.       res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
  604.  
  605.       return res;
  606.    }
  607.  
  608.    /*
  609.     * Fallback to util_format_description::fetch_rgba_float().
  610.     */
  611.  
  612.    if (format_desc->fetch_rgba_float) {
  613.       /*
  614.        * Fallback to calling util_format_description::fetch_rgba_float.
  615.        *
  616.        * This is definitely not the most efficient way of fetching pixels, as
  617.        * we miss the opportunity to do vectorization, but this it is a
  618.        * convenient for formats or scenarios for which there was no opportunity
  619.        * or incentive to optimize.
  620.        */
  621.  
  622.       LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
  623.       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
  624.       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
  625.       LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
  626.       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  627.       LLVMValueRef function;
  628.       LLVMValueRef tmp_ptr;
  629.       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
  630.       LLVMValueRef res;
  631.       unsigned k;
  632.  
  633.       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  634.          debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
  635.                       __FUNCTION__, format_desc->short_name);
  636.       }
  637.  
  638.       /*
  639.        * Declare and bind format_desc->fetch_rgba_float().
  640.        */
  641.  
  642.       {
  643.          /*
  644.           * Function to call looks like:
  645.           *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
  646.           */
  647.          LLVMTypeRef ret_type;
  648.          LLVMTypeRef arg_types[4];
  649.  
  650.          ret_type = LLVMVoidTypeInContext(gallivm->context);
  651.          arg_types[0] = pf32t;
  652.          arg_types[1] = pi8t;
  653.          arg_types[2] = i32t;
  654.          arg_types[3] = i32t;
  655.  
  656.          function = lp_build_const_func_pointer(gallivm,
  657.                                                 func_to_pointer((func_pointer) format_desc->fetch_rgba_float),
  658.                                                 ret_type,
  659.                                                 arg_types, Elements(arg_types),
  660.                                                 format_desc->short_name);
  661.       }
  662.  
  663.       tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
  664.  
  665.       /*
  666.        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
  667.        * in the SoA vectors.
  668.        */
  669.  
  670.       for (k = 0; k < num_pixels; ++k) {
  671.          LLVMValueRef args[4];
  672.  
  673.          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
  674.          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
  675.                                             base_ptr, offset, k);
  676.  
  677.          if (num_pixels == 1) {
  678.             args[2] = i;
  679.             args[3] = j;
  680.          }
  681.          else {
  682.             LLVMValueRef index = lp_build_const_int32(gallivm, k);
  683.             args[2] = LLVMBuildExtractElement(builder, i, index, "");
  684.             args[3] = LLVMBuildExtractElement(builder, j, index, "");
  685.          }
  686.  
  687.          LLVMBuildCall(builder, function, args, Elements(args), "");
  688.  
  689.          tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
  690.       }
  691.  
  692.       lp_build_conv(gallivm,
  693.                     lp_float32_vec4_type(),
  694.                     type,
  695.                     tmps, num_pixels, &res, 1);
  696.  
  697.       return res;
  698.    }
  699.  
  700.    assert(!util_format_is_pure_integer(format_desc->format));
  701.  
  702.    assert(0);
  703.    return lp_build_undef(gallivm, type);
  704. }
  705.