Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * @file
  30.  * AoS pixel format manipulation.
  31.  *
  32.  * @author Jose Fonseca <jfonseca@vmware.com>
  33.  */
  34.  
  35.  
  36. #include "util/u_format.h"
  37. #include "util/u_memory.h"
  38. #include "util/u_math.h"
  39. #include "util/u_pointer.h"
  40. #include "util/u_string.h"
  41.  
  42. #include "lp_bld_arit.h"
  43. #include "lp_bld_init.h"
  44. #include "lp_bld_type.h"
  45. #include "lp_bld_flow.h"
  46. #include "lp_bld_const.h"
  47. #include "lp_bld_conv.h"
  48. #include "lp_bld_swizzle.h"
  49. #include "lp_bld_gather.h"
  50. #include "lp_bld_debug.h"
  51. #include "lp_bld_format.h"
  52. #include "lp_bld_intr.h"
  53.  
  54.  
  55. /**
  56.  * Basic swizzling.  Rearrange the order of the unswizzled array elements
  57.  * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
  58.  * too.
  59.  * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
  60.  */
  61. LLVMValueRef
  62. lp_build_format_swizzle_aos(const struct util_format_description *desc,
  63.                             struct lp_build_context *bld,
  64.                             LLVMValueRef unswizzled)
  65. {
  66.    unsigned char swizzles[4];
  67.    unsigned chan;
  68.  
  69.    assert(bld->type.length % 4 == 0);
  70.  
  71.    for (chan = 0; chan < 4; ++chan) {
  72.       enum util_format_swizzle swizzle;
  73.  
  74.       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
  75.          /*
  76.           * For ZS formats do RGBA = ZZZ1
  77.           */
  78.          if (chan == 3) {
  79.             swizzle = UTIL_FORMAT_SWIZZLE_1;
  80.          } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
  81.             swizzle = UTIL_FORMAT_SWIZZLE_0;
  82.          } else {
  83.             swizzle = desc->swizzle[0];
  84.          }
  85.       } else {
  86.          swizzle = desc->swizzle[chan];
  87.       }
  88.       swizzles[chan] = swizzle;
  89.    }
  90.  
  91.    return lp_build_swizzle_aos(bld, unswizzled, swizzles);
  92. }
  93.  
  94.  
  95. /**
  96.  * Whether the format matches the vector type, apart of swizzles.
  97.  */
  98. static INLINE boolean
  99. format_matches_type(const struct util_format_description *desc,
  100.                     struct lp_type type)
  101. {
  102.    enum util_format_type chan_type;
  103.    unsigned chan;
  104.  
  105.    assert(type.length % 4 == 0);
  106.  
  107.    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
  108.        desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
  109.        desc->block.width != 1 ||
  110.        desc->block.height != 1) {
  111.       return FALSE;
  112.    }
  113.  
  114.    if (type.floating) {
  115.       chan_type = UTIL_FORMAT_TYPE_FLOAT;
  116.    } else if (type.fixed) {
  117.       chan_type = UTIL_FORMAT_TYPE_FIXED;
  118.    } else if (type.sign) {
  119.       chan_type = UTIL_FORMAT_TYPE_SIGNED;
  120.    } else {
  121.       chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
  122.    }
  123.  
  124.    for (chan = 0; chan < desc->nr_channels; ++chan) {
  125.       if (desc->channel[chan].size != type.width) {
  126.          return FALSE;
  127.       }
  128.  
  129.       if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
  130.          if (desc->channel[chan].type != chan_type ||
  131.              desc->channel[chan].normalized != type.norm) {
  132.             return FALSE;
  133.          }
  134.       }
  135.    }
  136.  
  137.    return TRUE;
  138. }
  139.  
  140.  
  141. /**
  142.  * Unpack a single pixel into its XYZW components.
  143.  *
  144.  * @param desc  the pixel format for the packed pixel value
  145.  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
  146.  *
  147.  * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
  148.  */
  149. static INLINE LLVMValueRef
  150. lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
  151.                                const struct util_format_description *desc,
  152.                                LLVMValueRef packed)
  153. {
  154.    LLVMBuilderRef builder = gallivm->builder;
  155.    LLVMValueRef shifted, casted, scaled, masked;
  156.    LLVMValueRef shifts[4];
  157.    LLVMValueRef masks[4];
  158.    LLVMValueRef scales[4];
  159.  
  160.    boolean normalized;
  161.    boolean needs_uitofp;
  162.    unsigned i;
  163.  
  164.    /* TODO: Support more formats */
  165.    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  166.    assert(desc->block.width == 1);
  167.    assert(desc->block.height == 1);
  168.    assert(desc->block.bits <= 32);
  169.  
  170.    /* Do the intermediate integer computations with 32bit integers since it
  171.     * matches floating point size */
  172.    assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));
  173.  
  174.    /* Broadcast the packed value to all four channels
  175.     * before: packed = BGRA
  176.     * after: packed = {BGRA, BGRA, BGRA, BGRA}
  177.     */
  178.    packed = LLVMBuildInsertElement(builder,
  179.                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
  180.                                    packed,
  181.                                    LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
  182.                                    "");
  183.    packed = LLVMBuildShuffleVector(builder,
  184.                                    packed,
  185.                                    LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
  186.                                    LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
  187.                                    "");
  188.  
  189.    /* Initialize vector constants */
  190.    normalized = FALSE;
  191.    needs_uitofp = FALSE;
  192.  
  193.    /* Loop over 4 color components */
  194.    for (i = 0; i < 4; ++i) {
  195.       unsigned bits = desc->channel[i].size;
  196.       unsigned shift = desc->channel[i].shift;
  197.  
  198.       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  199.          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  200.          masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
  201.          scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
  202.       }
  203.       else {
  204.          unsigned long long mask = (1ULL << bits) - 1;
  205.  
  206.          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  207.  
  208.          if (bits == 32) {
  209.             needs_uitofp = TRUE;
  210.          }
  211.  
  212.          shifts[i] = lp_build_const_int32(gallivm, shift);
  213.          masks[i] = lp_build_const_int32(gallivm, mask);
  214.  
  215.          if (desc->channel[i].normalized) {
  216.             scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
  217.             normalized = TRUE;
  218.          }
  219.          else
  220.             scales[i] =  lp_build_const_float(gallivm, 1.0);
  221.       }
  222.    }
  223.  
  224.    /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW}
  225.     * into masked = {X, Y, Z, W}
  226.     */
  227.    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
  228.    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
  229.  
  230.    if (!needs_uitofp) {
  231.       /* UIToFP can't be expressed in SSE2 */
  232.       casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
  233.    } else {
  234.       casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
  235.    }
  236.  
  237.    /* At this point 'casted' may be a vector of floats such as
  238.     * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
  239.     * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
  240.     */
  241.  
  242.    if (normalized)
  243.       scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
  244.    else
  245.       scaled = casted;
  246.  
  247.    return scaled;
  248. }
  249.  
  250.  
  251. /**
  252.  * Pack a single pixel.
  253.  *
  254.  * @param rgba 4 float vector with the unpacked components.
  255.  *
  256.  * XXX: This is mostly for reference and testing -- operating a single pixel at
  257.  * a time is rarely if ever needed.
  258.  */
  259. LLVMValueRef
  260. lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
  261.                        const struct util_format_description *desc,
  262.                        LLVMValueRef rgba)
  263. {
  264.    LLVMBuilderRef builder = gallivm->builder;
  265.    LLVMTypeRef type;
  266.    LLVMValueRef packed = NULL;
  267.    LLVMValueRef swizzles[4];
  268.    LLVMValueRef shifted, casted, scaled, unswizzled;
  269.    LLVMValueRef shifts[4];
  270.    LLVMValueRef scales[4];
  271.    boolean normalized;
  272.    unsigned i, j;
  273.  
  274.    assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
  275.    assert(desc->block.width == 1);
  276.    assert(desc->block.height == 1);
  277.  
  278.    type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);
  279.  
  280.    /* Unswizzle the color components into the source vector. */
  281.    for (i = 0; i < 4; ++i) {
  282.       for (j = 0; j < 4; ++j) {
  283.          if (desc->swizzle[j] == i)
  284.             break;
  285.       }
  286.       if (j < 4)
  287.          swizzles[i] = lp_build_const_int32(gallivm, j);
  288.       else
  289.          swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  290.    }
  291.  
  292.    unswizzled = LLVMBuildShuffleVector(builder, rgba,
  293.                                        LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
  294.                                        LLVMConstVector(swizzles, 4), "");
  295.  
  296.    normalized = FALSE;
  297.    for (i = 0; i < 4; ++i) {
  298.       unsigned bits = desc->channel[i].size;
  299.       unsigned shift = desc->channel[i].shift;
  300.  
  301.       if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
  302.          shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  303.          scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
  304.       }
  305.       else {
  306.          unsigned mask = (1 << bits) - 1;
  307.  
  308.          assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
  309.          assert(bits < 32);
  310.  
  311.          shifts[i] = lp_build_const_int32(gallivm, shift);
  312.  
  313.          if (desc->channel[i].normalized) {
  314.             scales[i] = lp_build_const_float(gallivm, mask);
  315.             normalized = TRUE;
  316.          }
  317.          else
  318.             scales[i] = lp_build_const_float(gallivm, 1.0);
  319.       }
  320.    }
  321.  
  322.    if (normalized)
  323.       scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
  324.    else
  325.       scaled = unswizzled;
  326.  
  327.    casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");
  328.  
  329.    shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
  330.    
  331.    /* Bitwise or all components */
  332.    for (i = 0; i < 4; ++i) {
  333.       if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
  334.          LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
  335.                                                lp_build_const_int32(gallivm, i), "");
  336.          if (packed)
  337.             packed = LLVMBuildOr(builder, packed, component, "");
  338.          else
  339.             packed = component;
  340.       }
  341.    }
  342.  
  343.    if (!packed)
  344.       packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  345.  
  346.    if (desc->block.bits < 32)
  347.       packed = LLVMBuildTrunc(builder, packed, type, "");
  348.  
  349.    return packed;
  350. }
  351.  
  352.  
  353.  
  354.  
  355. /**
  356.  * Fetch a pixel into a 4 float AoS.
  357.  *
  358.  * \param format_desc  describes format of the image we're fetching from
  359.  * \param ptr  address of the pixel block (or the texel if uncompressed)
  360.  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  361.  *              these will always be (0, 0).
  362.  * \return  a 4 element vector with the pixel's RGBA values.
  363.  */
  364. LLVMValueRef
  365. lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
  366.                         const struct util_format_description *format_desc,
  367.                         struct lp_type type,
  368.                         LLVMValueRef base_ptr,
  369.                         LLVMValueRef offset,
  370.                         LLVMValueRef i,
  371.                         LLVMValueRef j)
  372. {
  373.    LLVMBuilderRef builder = gallivm->builder;
  374.    unsigned num_pixels = type.length / 4;
  375.    struct lp_build_context bld;
  376.  
  377.    assert(type.length <= LP_MAX_VECTOR_LENGTH);
  378.    assert(type.length % 4 == 0);
  379.  
  380.    lp_build_context_init(&bld, gallivm, type);
  381.  
  382.    /*
  383.     * Trivial case
  384.     *
  385.     * The format matches the type (apart of a swizzle) so no need for
  386.     * scaling or converting.
  387.     */
  388.  
  389.    if (format_matches_type(format_desc, type) &&
  390.        format_desc->block.bits <= type.width * 4 &&
  391.        util_is_power_of_two(format_desc->block.bits)) {
  392.       LLVMValueRef packed;
  393.       LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
  394.       unsigned vec_len = type.width * type.length;
  395.  
  396.       /*
  397.        * The format matches the type (apart of a swizzle) so no need for
  398.        * scaling or converting.
  399.        */
  400.  
  401.       packed = lp_build_gather(gallivm, type.length/4,
  402.                                format_desc->block.bits, type.width*4,
  403.                                base_ptr, offset, TRUE);
  404.  
  405.       assert(format_desc->block.bits <= vec_len);
  406.  
  407.       packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
  408.       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
  409.    }
  410.  
  411.    /*
  412.     * Bit arithmetic
  413.     */
  414.  
  415.    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
  416.        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
  417.         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
  418.        format_desc->block.width == 1 &&
  419.        format_desc->block.height == 1 &&
  420.        util_is_power_of_two(format_desc->block.bits) &&
  421.        format_desc->block.bits <= 32 &&
  422.        format_desc->is_bitmask &&
  423.        !format_desc->is_mixed &&
  424.        (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
  425.         format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
  426.        !format_desc->channel[0].pure_integer) {
  427.  
  428.       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
  429.       LLVMValueRef res;
  430.       unsigned k;
  431.  
  432.       /*
  433.        * Unpack a pixel at a time into a <4 x float> RGBA vector
  434.        */
  435.  
  436.       for (k = 0; k < num_pixels; ++k) {
  437.          LLVMValueRef packed;
  438.  
  439.          packed = lp_build_gather_elem(gallivm, num_pixels,
  440.                                        format_desc->block.bits, 32,
  441.                                        base_ptr, offset, k, FALSE);
  442.  
  443.          tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
  444.                                                   format_desc,
  445.                                                   packed);
  446.       }
  447.  
  448.       /*
  449.        * Type conversion.
  450.        *
  451.        * TODO: We could avoid floating conversion for integer to
  452.        * integer conversions.
  453.        */
  454.  
  455.       if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
  456.          debug_printf("%s: unpacking %s with floating point\n",
  457.                       __FUNCTION__, format_desc->short_name);
  458.       }
  459.  
  460.       lp_build_conv(gallivm,
  461.                     lp_float32_vec4_type(),
  462.                     type,
  463.                     tmps, num_pixels, &res, 1);
  464.  
  465.       return lp_build_format_swizzle_aos(format_desc, &bld, res);
  466.    }
  467.  
  468.    /* If all channels are of same type and we are not using half-floats */
  469.    if (format_desc->is_array &&
  470.        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {
  471.       assert(!format_desc->is_mixed);
  472.       return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
  473.    }
  474.  
  475.    /*
  476.     * YUV / subsampled formats
  477.     */
  478.  
  479.    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
  480.       struct lp_type tmp_type;
  481.       LLVMValueRef tmp;
  482.  
  483.       memset(&tmp_type, 0, sizeof tmp_type);
  484.       tmp_type.width = 8;
  485.       tmp_type.length = num_pixels * 4;
  486.       tmp_type.norm = TRUE;
  487.  
  488.       tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
  489.                                                format_desc,
  490.                                                num_pixels,
  491.                                                base_ptr,
  492.                                                offset,
  493.                                                i, j);
  494.  
  495.       lp_build_conv(gallivm,
  496.                     tmp_type, type,
  497.                     &tmp, 1, &tmp, 1);
  498.  
  499.       return tmp;
  500.    }
  501.  
  502.    /*
  503.     * Fallback to util_format_description::fetch_rgba_8unorm().
  504.     */
  505.  
  506.    if (format_desc->fetch_rgba_8unorm &&
  507.        !type.floating && type.width == 8 && !type.sign && type.norm) {
  508.       /*
  509.        * Fallback to calling util_format_description::fetch_rgba_8unorm.
  510.        *
  511.        * This is definitely not the most efficient way of fetching pixels, as
  512.        * we miss the opportunity to do vectorization, but this it is a
  513.        * convenient for formats or scenarios for which there was no opportunity
  514.        * or incentive to optimize.
  515.        */
  516.  
  517.       LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
  518.       LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
  519.       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  520.       LLVMValueRef function;
  521.       LLVMValueRef tmp_ptr;
  522.       LLVMValueRef tmp;
  523.       LLVMValueRef res;
  524.       unsigned k;
  525.  
  526.       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  527.          debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
  528.                       __FUNCTION__, format_desc->short_name);
  529.       }
  530.  
  531.       /*
  532.        * Declare and bind format_desc->fetch_rgba_8unorm().
  533.        */
  534.  
  535.       {
  536.          /*
  537.           * Function to call looks like:
  538.           *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
  539.           */
  540.          LLVMTypeRef ret_type;
  541.          LLVMTypeRef arg_types[4];
  542.          LLVMTypeRef function_type;
  543.  
  544.          ret_type = LLVMVoidTypeInContext(gallivm->context);
  545.          arg_types[0] = pi8t;
  546.          arg_types[1] = pi8t;
  547.          arg_types[2] = i32t;
  548.          arg_types[3] = i32t;
  549.          function_type = LLVMFunctionType(ret_type, arg_types,
  550.                                           Elements(arg_types), 0);
  551.  
  552.          /* make const pointer for the C fetch_rgba_8unorm function */
  553.          function = lp_build_const_int_pointer(gallivm,
  554.             func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
  555.  
  556.          /* cast the callee pointer to the function's type */
  557.          function = LLVMBuildBitCast(builder, function,
  558.                                      LLVMPointerType(function_type, 0),
  559.                                      "cast callee");
  560.       }
  561.  
  562.       tmp_ptr = lp_build_alloca(gallivm, i32t, "");
  563.  
  564.       res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
  565.  
  566.       /*
  567.        * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
  568.        * in the SoA vectors.
  569.        */
  570.  
  571.       for (k = 0; k < num_pixels; ++k) {
  572.          LLVMValueRef index = lp_build_const_int32(gallivm, k);
  573.          LLVMValueRef args[4];
  574.  
  575.          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
  576.          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
  577.                                             base_ptr, offset, k);
  578.  
  579.          if (num_pixels == 1) {
  580.             args[2] = i;
  581.             args[3] = j;
  582.          }
  583.          else {
  584.             args[2] = LLVMBuildExtractElement(builder, i, index, "");
  585.             args[3] = LLVMBuildExtractElement(builder, j, index, "");
  586.          }
  587.  
  588.          LLVMBuildCall(builder, function, args, Elements(args), "");
  589.  
  590.          tmp = LLVMBuildLoad(builder, tmp_ptr, "");
  591.  
  592.          if (num_pixels == 1) {
  593.             res = tmp;
  594.          }
  595.          else {
  596.             res = LLVMBuildInsertElement(builder, res, tmp, index, "");
  597.          }
  598.       }
  599.  
  600.       /* Bitcast from <n x i32> to <4n x i8> */
  601.       res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
  602.  
  603.       return res;
  604.    }
  605.  
  606.    /*
  607.     * Fallback to util_format_description::fetch_rgba_float().
  608.     */
  609.  
  610.    if (format_desc->fetch_rgba_float) {
  611.       /*
  612.        * Fallback to calling util_format_description::fetch_rgba_float.
  613.        *
  614.        * This is definitely not the most efficient way of fetching pixels, as
  615.        * we miss the opportunity to do vectorization, but this it is a
  616.        * convenient for formats or scenarios for which there was no opportunity
  617.        * or incentive to optimize.
  618.        */
  619.  
  620.       LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
  621.       LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
  622.       LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
  623.       LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
  624.       LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  625.       LLVMValueRef function;
  626.       LLVMValueRef tmp_ptr;
  627.       LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
  628.       LLVMValueRef res;
  629.       unsigned k;
  630.  
  631.       if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  632.          debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
  633.                       __FUNCTION__, format_desc->short_name);
  634.       }
  635.  
  636.       /*
  637.        * Declare and bind format_desc->fetch_rgba_float().
  638.        */
  639.  
  640.       {
  641.          /*
  642.           * Function to call looks like:
  643.           *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
  644.           */
  645.          LLVMTypeRef ret_type;
  646.          LLVMTypeRef arg_types[4];
  647.  
  648.          ret_type = LLVMVoidTypeInContext(gallivm->context);
  649.          arg_types[0] = pf32t;
  650.          arg_types[1] = pi8t;
  651.          arg_types[2] = i32t;
  652.          arg_types[3] = i32t;
  653.  
  654.          function = lp_build_const_func_pointer(gallivm,
  655.                                                 func_to_pointer((func_pointer) format_desc->fetch_rgba_float),
  656.                                                 ret_type,
  657.                                                 arg_types, Elements(arg_types),
  658.                                                 format_desc->short_name);
  659.       }
  660.  
  661.       tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
  662.  
  663.       /*
  664.        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
  665.        * in the SoA vectors.
  666.        */
  667.  
  668.       for (k = 0; k < num_pixels; ++k) {
  669.          LLVMValueRef args[4];
  670.  
  671.          args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
  672.          args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
  673.                                             base_ptr, offset, k);
  674.  
  675.          if (num_pixels == 1) {
  676.             args[2] = i;
  677.             args[3] = j;
  678.          }
  679.          else {
  680.             LLVMValueRef index = lp_build_const_int32(gallivm, k);
  681.             args[2] = LLVMBuildExtractElement(builder, i, index, "");
  682.             args[3] = LLVMBuildExtractElement(builder, j, index, "");
  683.          }
  684.  
  685.          LLVMBuildCall(builder, function, args, Elements(args), "");
  686.  
  687.          tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
  688.       }
  689.  
  690.       lp_build_conv(gallivm,
  691.                     lp_float32_vec4_type(),
  692.                     type,
  693.                     tmps, num_pixels, &res, 1);
  694.  
  695.       return res;
  696.    }
  697.  
  698.    assert(!util_format_is_pure_integer(format_desc->format));
  699.  
  700.    assert(0);
  701.    return lp_build_undef(gallivm, type);
  702. }
  703.