Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * @file
  30.  * Texture sampling -- SoA.
  31.  *
  32.  * @author Jose Fonseca <jfonseca@vmware.com>
  33.  * @author Brian Paul <brianp@vmware.com>
  34.  */
  35.  
  36. #include "pipe/p_defines.h"
  37. #include "pipe/p_state.h"
  38. #include "pipe/p_shader_tokens.h"
  39. #include "util/u_debug.h"
  40. #include "util/u_dump.h"
  41. #include "util/u_memory.h"
  42. #include "util/u_math.h"
  43. #include "util/u_format.h"
  44. #include "util/u_cpu_detect.h"
  45. #include "util/u_format_rgb9e5.h"
  46. #include "lp_bld_debug.h"
  47. #include "lp_bld_type.h"
  48. #include "lp_bld_const.h"
  49. #include "lp_bld_conv.h"
  50. #include "lp_bld_arit.h"
  51. #include "lp_bld_bitarit.h"
  52. #include "lp_bld_logic.h"
  53. #include "lp_bld_printf.h"
  54. #include "lp_bld_swizzle.h"
  55. #include "lp_bld_flow.h"
  56. #include "lp_bld_gather.h"
  57. #include "lp_bld_format.h"
  58. #include "lp_bld_sample.h"
  59. #include "lp_bld_sample_aos.h"
  60. #include "lp_bld_struct.h"
  61. #include "lp_bld_quad.h"
  62. #include "lp_bld_pack.h"
  63.  
  64.  
  65. /**
  66.  * Generate code to fetch a texel from a texture at int coords (x, y, z).
  67.  * The computation depends on whether the texture is 1D, 2D or 3D.
  68.  * The result, texel, will be float vectors:
  69.  *   texel[0] = red values
  70.  *   texel[1] = green values
  71.  *   texel[2] = blue values
  72.  *   texel[3] = alpha values
  73.  */
  74. static void
  75. lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
  76.                           LLVMValueRef width,
  77.                           LLVMValueRef height,
  78.                           LLVMValueRef depth,
  79.                           LLVMValueRef x,
  80.                           LLVMValueRef y,
  81.                           LLVMValueRef z,
  82.                           LLVMValueRef y_stride,
  83.                           LLVMValueRef z_stride,
  84.                           LLVMValueRef data_ptr,
  85.                           LLVMValueRef mipoffsets,
  86.                           LLVMValueRef texel_out[4])
  87. {
  88.    const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
  89.    const unsigned dims = bld->dims;
  90.    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  91.    LLVMBuilderRef builder = bld->gallivm->builder;
  92.    LLVMValueRef offset;
  93.    LLVMValueRef i, j;
  94.    LLVMValueRef use_border = NULL;
  95.  
  96.    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
  97.    if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
  98.                                               static_state->min_img_filter,
  99.                                               static_state->mag_img_filter)) {
  100.       LLVMValueRef b1, b2;
  101.       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
  102.       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
  103.       use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
  104.    }
  105.  
  106.    if (dims >= 2 &&
  107.        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
  108.                                               static_state->min_img_filter,
  109.                                               static_state->mag_img_filter)) {
  110.       LLVMValueRef b1, b2;
  111.       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
  112.       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
  113.       if (use_border) {
  114.          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
  115.          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
  116.       }
  117.       else {
  118.          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
  119.       }
  120.    }
  121.  
  122.    if (dims == 3 &&
  123.        lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
  124.                                               static_state->min_img_filter,
  125.                                               static_state->mag_img_filter)) {
  126.       LLVMValueRef b1, b2;
  127.       b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
  128.       b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
  129.       if (use_border) {
  130.          use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
  131.          use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
  132.       }
  133.       else {
  134.          use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
  135.       }
  136.    }
  137.  
  138.    /* convert x,y,z coords to linear offset from start of texture, in bytes */
  139.    lp_build_sample_offset(&bld->int_coord_bld,
  140.                           bld->format_desc,
  141.                           x, y, z, y_stride, z_stride,
  142.                           &offset, &i, &j);
  143.    if (mipoffsets) {
  144.       offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
  145.    }
  146.  
  147.    if (use_border) {
  148.       /* If we can sample the border color, it means that texcoords may
  149.        * lie outside the bounds of the texture image.  We need to do
  150.        * something to prevent reading out of bounds and causing a segfault.
  151.        *
  152.        * Simply AND the texture coords with !use_border.  This will cause
  153.        * coords which are out of bounds to become zero.  Zero's guaranteed
  154.        * to be inside the texture image.
  155.        */
  156.       offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
  157.    }
  158.  
  159.    lp_build_fetch_rgba_soa(bld->gallivm,
  160.                            bld->format_desc,
  161.                            bld->texel_type,
  162.                            data_ptr, offset,
  163.                            i, j,
  164.                            texel_out);
  165.  
  166.    /*
  167.     * Note: if we find an app which frequently samples the texture border
  168.     * we might want to implement a true conditional here to avoid sampling
  169.     * the texture whenever possible (since that's quite a bit of code).
  170.     * Ex:
  171.     *   if (use_border) {
  172.     *      texel = border_color;
  173.     *   }
  174.     *   else {
  175.     *      texel = sample_texture(coord);
  176.     *   }
  177.     * As it is now, we always sample the texture, then selectively replace
  178.     * the texel color results with the border color.
  179.     */
  180.  
  181.    if (use_border) {
  182.       /* select texel color or border color depending on use_border. */
  183.       const struct util_format_description *format_desc = bld->format_desc;
  184.       int chan;
  185.       struct lp_type border_type = bld->texel_type;
  186.       border_type.length = 4;
  187.       /*
  188.        * Only replace channels which are actually present. The others should
  189.        * get optimized away eventually by sampler_view swizzle anyway but it's
  190.        * easier too.
  191.        */
  192.       for (chan = 0; chan < 4; chan++) {
  193.          unsigned chan_s;
  194.          /* reverse-map channel... */
  195.          for (chan_s = 0; chan_s < 4; chan_s++) {
  196.             if (chan_s == format_desc->swizzle[chan]) {
  197.                break;
  198.             }
  199.          }
  200.          if (chan_s <= 3) {
  201.             /* use the already clamped color */
  202.             LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
  203.             LLVMValueRef border_chan;
  204.  
  205.             border_chan = lp_build_extract_broadcast(bld->gallivm,
  206.                                                      border_type,
  207.                                                      bld->texel_type,
  208.                                                      bld->border_color_clamped,
  209.                                                      idx);
  210.             texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
  211.                                               border_chan, texel_out[chan]);
  212.          }
  213.       }
  214.    }
  215. }
  216.  
  217.  
  218. /**
  219.  * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
  220.  */
  221. static LLVMValueRef
  222. lp_build_coord_mirror(struct lp_build_sample_context *bld,
  223.                       LLVMValueRef coord)
  224. {
  225.    struct lp_build_context *coord_bld = &bld->coord_bld;
  226.    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  227.    LLVMValueRef fract, flr, isOdd;
  228.  
  229.    lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
  230.  
  231.    /* isOdd = flr & 1 */
  232.    isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
  233.  
  234.    /* make coord positive or negative depending on isOdd */
  235.    coord = lp_build_set_sign(coord_bld, fract, isOdd);
  236.  
  237.    /* convert isOdd to float */
  238.    isOdd = lp_build_int_to_float(coord_bld, isOdd);
  239.  
  240.    /* add isOdd to coord */
  241.    coord = lp_build_add(coord_bld, coord, isOdd);
  242.  
  243.    return coord;
  244. }
  245.  
  246.  
  247. /**
  248.  * Helper to compute the first coord and the weight for
  249.  * linear wrap repeat npot textures
  250.  */
  251. void
  252. lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
  253.                                   LLVMValueRef coord_f,
  254.                                   LLVMValueRef length_i,
  255.                                   LLVMValueRef length_f,
  256.                                   LLVMValueRef *coord0_i,
  257.                                   LLVMValueRef *weight_f)
  258. {
  259.    struct lp_build_context *coord_bld = &bld->coord_bld;
  260.    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  261.    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
  262.    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
  263.                                                 int_coord_bld->one);
  264.    LLVMValueRef mask;
  265.    /* wrap with normalized floats is just fract */
  266.    coord_f = lp_build_fract(coord_bld, coord_f);
  267.    /* mul by size and subtract 0.5 */
  268.    coord_f = lp_build_mul(coord_bld, coord_f, length_f);
  269.    coord_f = lp_build_sub(coord_bld, coord_f, half);
  270.    /*
  271.     * we avoided the 0.5/length division before the repeat wrap,
  272.     * now need to fix up edge cases with selects
  273.     */
  274.    /* convert to int, compute lerp weight */
  275.    lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
  276.    mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
  277.                            PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
  278.    *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
  279. }
  280.  
  281.  
  282. /**
  283.  * Build LLVM code for texture wrap mode for linear filtering.
  284.  * \param x0_out  returns first integer texcoord
  285.  * \param x1_out  returns second integer texcoord
  286.  * \param weight_out  returns linear interpolation weight
  287.  */
  288. static void
  289. lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
  290.                             LLVMValueRef coord,
  291.                             LLVMValueRef length,
  292.                             LLVMValueRef length_f,
  293.                             LLVMValueRef offset,
  294.                             boolean is_pot,
  295.                             unsigned wrap_mode,
  296.                             LLVMValueRef *x0_out,
  297.                             LLVMValueRef *x1_out,
  298.                             LLVMValueRef *weight_out)
  299. {
  300.    struct lp_build_context *coord_bld = &bld->coord_bld;
  301.    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  302.    LLVMBuilderRef builder = bld->gallivm->builder;
  303.    LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
  304.    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  305.    LLVMValueRef coord0, coord1, weight;
  306.  
  307.    switch(wrap_mode) {
  308.    case PIPE_TEX_WRAP_REPEAT:
  309.       if (is_pot) {
  310.          /* mul by size and subtract 0.5 */
  311.          coord = lp_build_mul(coord_bld, coord, length_f);
  312.          coord = lp_build_sub(coord_bld, coord, half);
  313.          if (offset) {
  314.             offset = lp_build_int_to_float(coord_bld, offset);
  315.             coord = lp_build_add(coord_bld, coord, offset);
  316.          }
  317.          /* convert to int, compute lerp weight */
  318.          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
  319.          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  320.          /* repeat wrap */
  321.          coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
  322.          coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
  323.       }
  324.       else {
  325.          LLVMValueRef mask;
  326.          if (offset) {
  327.             offset = lp_build_int_to_float(coord_bld, offset);
  328.             offset = lp_build_div(coord_bld, offset, length_f);
  329.             coord = lp_build_add(coord_bld, coord, offset);
  330.          }
  331.          lp_build_coord_repeat_npot_linear(bld, coord,
  332.                                            length, length_f,
  333.                                            &coord0, &weight);
  334.          mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
  335.                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
  336.          coord1 = LLVMBuildAnd(builder,
  337.                                lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
  338.                                mask, "");
  339.       }
  340.       break;
  341.  
  342.    case PIPE_TEX_WRAP_CLAMP:
  343.       if (bld->static_sampler_state->normalized_coords) {
  344.          /* scale coord to length */
  345.          coord = lp_build_mul(coord_bld, coord, length_f);
  346.       }
  347.       if (offset) {
  348.          offset = lp_build_int_to_float(coord_bld, offset);
  349.          coord = lp_build_add(coord_bld, coord, offset);
  350.       }
  351.  
  352.       /* clamp to [0, length] */
  353.       coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
  354.  
  355.       coord = lp_build_sub(coord_bld, coord, half);
  356.  
  357.       /* convert to int, compute lerp weight */
  358.       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
  359.       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  360.       break;
  361.  
  362.    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
  363.       {
  364.          struct lp_build_context abs_coord_bld = bld->coord_bld;
  365.          abs_coord_bld.type.sign = FALSE;
  366.  
  367.          if (bld->static_sampler_state->normalized_coords) {
  368.             /* mul by tex size */
  369.             coord = lp_build_mul(coord_bld, coord, length_f);
  370.          }
  371.          if (offset) {
  372.             offset = lp_build_int_to_float(coord_bld, offset);
  373.             coord = lp_build_add(coord_bld, coord, offset);
  374.          }
  375.  
  376.          /* clamp to length max */
  377.          coord = lp_build_min(coord_bld, coord, length_f);
  378.          /* subtract 0.5 */
  379.          coord = lp_build_sub(coord_bld, coord, half);
  380.          /* clamp to [0, length - 0.5] */
  381.          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
  382.          /* convert to int, compute lerp weight */
  383.          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
  384.          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  385.          /* coord1 = min(coord1, length-1) */
  386.          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
  387.          break;
  388.       }
  389.  
  390.    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
  391.       if (bld->static_sampler_state->normalized_coords) {
  392.          /* scale coord to length */
  393.          coord = lp_build_mul(coord_bld, coord, length_f);
  394.       }
  395.       if (offset) {
  396.          offset = lp_build_int_to_float(coord_bld, offset);
  397.          coord = lp_build_add(coord_bld, coord, offset);
  398.       }
  399.       /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
  400.       /* can skip clamp (though might not work for very large coord values */
  401.       coord = lp_build_sub(coord_bld, coord, half);
  402.       /* convert to int, compute lerp weight */
  403.       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
  404.       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  405.       break;
  406.  
  407.    case PIPE_TEX_WRAP_MIRROR_REPEAT:
  408.       /* compute mirror function */
  409.       coord = lp_build_coord_mirror(bld, coord);
  410.  
  411.       /* scale coord to length */
  412.       coord = lp_build_mul(coord_bld, coord, length_f);
  413.       coord = lp_build_sub(coord_bld, coord, half);
  414.       if (offset) {
  415.          offset = lp_build_int_to_float(coord_bld, offset);
  416.          coord = lp_build_add(coord_bld, coord, offset);
  417.       }
  418.  
  419.       /* convert to int, compute lerp weight */
  420.       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
  421.       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  422.  
  423.       /* coord0 = max(coord0, 0) */
  424.       coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
  425.       /* coord1 = min(coord1, length-1) */
  426.       coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
  427.       break;
  428.  
  429.    case PIPE_TEX_WRAP_MIRROR_CLAMP:
  430.       if (bld->static_sampler_state->normalized_coords) {
  431.          /* scale coord to length */
  432.          coord = lp_build_mul(coord_bld, coord, length_f);
  433.       }
  434.       if (offset) {
  435.          offset = lp_build_int_to_float(coord_bld, offset);
  436.          coord = lp_build_add(coord_bld, coord, offset);
  437.       }
  438.       coord = lp_build_abs(coord_bld, coord);
  439.  
  440.       /* clamp to [0, length] */
  441.       coord = lp_build_min(coord_bld, coord, length_f);
  442.  
  443.       coord = lp_build_sub(coord_bld, coord, half);
  444.  
  445.       /* convert to int, compute lerp weight */
  446.       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
  447.       coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  448.       break;
  449.  
  450.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
  451.       {
  452.          struct lp_build_context abs_coord_bld = bld->coord_bld;
  453.          abs_coord_bld.type.sign = FALSE;
  454.  
  455.          if (bld->static_sampler_state->normalized_coords) {
  456.             /* scale coord to length */
  457.             coord = lp_build_mul(coord_bld, coord, length_f);
  458.          }
  459.          if (offset) {
  460.             offset = lp_build_int_to_float(coord_bld, offset);
  461.             coord = lp_build_add(coord_bld, coord, offset);
  462.          }
  463.          coord = lp_build_abs(coord_bld, coord);
  464.  
  465.          /* clamp to length max */
  466.          coord = lp_build_min(coord_bld, coord, length_f);
  467.          /* subtract 0.5 */
  468.          coord = lp_build_sub(coord_bld, coord, half);
  469.          /* clamp to [0, length - 0.5] */
  470.          coord = lp_build_max(coord_bld, coord, coord_bld->zero);
  471.  
  472.          /* convert to int, compute lerp weight */
  473.          lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
  474.          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  475.          /* coord1 = min(coord1, length-1) */
  476.          coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
  477.       }
  478.       break;
  479.  
  480.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
  481.       {
  482.          if (bld->static_sampler_state->normalized_coords) {
  483.             /* scale coord to length */
  484.             coord = lp_build_mul(coord_bld, coord, length_f);
  485.          }
  486.          if (offset) {
  487.             offset = lp_build_int_to_float(coord_bld, offset);
  488.             coord = lp_build_add(coord_bld, coord, offset);
  489.          }
  490.          coord = lp_build_abs(coord_bld, coord);
  491.  
  492.          /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
  493.          /* skip clamp - always positive, and other side
  494.             only potentially matters for very large coords */
  495.          coord = lp_build_sub(coord_bld, coord, half);
  496.  
  497.          /* convert to int, compute lerp weight */
  498.          lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
  499.          coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
  500.       }
  501.       break;
  502.  
  503.    default:
  504.       assert(0);
  505.       coord0 = NULL;
  506.       coord1 = NULL;
  507.       weight = NULL;
  508.    }
  509.  
  510.    *x0_out = coord0;
  511.    *x1_out = coord1;
  512.    *weight_out = weight;
  513. }
  514.  
  515.  
  516. /**
  517.  * Build LLVM code for texture wrap mode for nearest filtering.
  518.  * \param coord  the incoming texcoord (nominally in [0,1])
  519.  * \param length  the texture size along one dimension, as int vector
  520.  * \param length_f  the texture size along one dimension, as float vector
  521.  * \param offset  texel offset along one dimension (as int vector)
  522.  * \param is_pot  if TRUE, length is a power of two
  523.  * \param wrap_mode  one of PIPE_TEX_WRAP_x
  524.  */
  525. static LLVMValueRef
  526. lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
  527.                              LLVMValueRef coord,
  528.                              LLVMValueRef length,
  529.                              LLVMValueRef length_f,
  530.                              LLVMValueRef offset,
  531.                              boolean is_pot,
  532.                              unsigned wrap_mode)
  533. {
  534.    struct lp_build_context *coord_bld = &bld->coord_bld;
  535.    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  536.    LLVMBuilderRef builder = bld->gallivm->builder;
  537.    LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
  538.    LLVMValueRef icoord;
  539.    
  540.    switch(wrap_mode) {
  541.    case PIPE_TEX_WRAP_REPEAT:
  542.       if (is_pot) {
  543.          coord = lp_build_mul(coord_bld, coord, length_f);
  544.          icoord = lp_build_ifloor(coord_bld, coord);
  545.          if (offset) {
  546.             icoord = lp_build_add(int_coord_bld, icoord, offset);
  547.          }
  548.          icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
  549.       }
  550.       else {
  551.           if (offset) {
  552.              offset = lp_build_int_to_float(coord_bld, offset);
  553.              offset = lp_build_div(coord_bld, offset, length_f);
  554.              coord = lp_build_add(coord_bld, coord, offset);
  555.           }
  556.           /* take fraction, unnormalize */
  557.           coord = lp_build_fract_safe(coord_bld, coord);
  558.           coord = lp_build_mul(coord_bld, coord, length_f);
  559.           icoord = lp_build_itrunc(coord_bld, coord);
  560.       }
  561.       break;
  562.  
  563.    case PIPE_TEX_WRAP_CLAMP:
  564.    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
  565.       if (bld->static_sampler_state->normalized_coords) {
  566.          /* scale coord to length */
  567.          coord = lp_build_mul(coord_bld, coord, length_f);
  568.       }
  569.  
  570.       /* floor */
  571.       /* use itrunc instead since we clamp to 0 anyway */
  572.       icoord = lp_build_itrunc(coord_bld, coord);
  573.       if (offset) {
  574.          icoord = lp_build_add(int_coord_bld, icoord, offset);
  575.       }
  576.  
  577.       /* clamp to [0, length - 1]. */
  578.       icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
  579.                               length_minus_one);
  580.       break;
  581.  
  582.    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
  583.       if (bld->static_sampler_state->normalized_coords) {
  584.          /* scale coord to length */
  585.          coord = lp_build_mul(coord_bld, coord, length_f);
  586.       }
  587.       /* no clamp necessary, border masking will handle this */
  588.       icoord = lp_build_ifloor(coord_bld, coord);
  589.       if (offset) {
  590.          icoord = lp_build_add(int_coord_bld, icoord, offset);
  591.       }
  592.       break;
  593.  
  594.    case PIPE_TEX_WRAP_MIRROR_REPEAT:
  595.       if (offset) {
  596.          offset = lp_build_int_to_float(coord_bld, offset);
  597.          offset = lp_build_div(coord_bld, offset, length_f);
  598.          coord = lp_build_add(coord_bld, coord, offset);
  599.       }
  600.       /* compute mirror function */
  601.       coord = lp_build_coord_mirror(bld, coord);
  602.  
  603.       /* scale coord to length */
  604.       assert(bld->static_sampler_state->normalized_coords);
  605.       coord = lp_build_mul(coord_bld, coord, length_f);
  606.  
  607.       /* itrunc == ifloor here */
  608.       icoord = lp_build_itrunc(coord_bld, coord);
  609.  
  610.       /* clamp to [0, length - 1] */
  611.       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
  612.       break;
  613.  
  614.    case PIPE_TEX_WRAP_MIRROR_CLAMP:
  615.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
  616.       if (bld->static_sampler_state->normalized_coords) {
  617.          /* scale coord to length */
  618.          coord = lp_build_mul(coord_bld, coord, length_f);
  619.       }
  620.       if (offset) {
  621.          offset = lp_build_int_to_float(coord_bld, offset);
  622.          coord = lp_build_add(coord_bld, coord, offset);
  623.       }
  624.       coord = lp_build_abs(coord_bld, coord);
  625.  
  626.       /* itrunc == ifloor here */
  627.       icoord = lp_build_itrunc(coord_bld, coord);
  628.  
  629.       /* clamp to [0, length - 1] */
  630.       icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
  631.       break;
  632.  
  633.    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
  634.       if (bld->static_sampler_state->normalized_coords) {
  635.          /* scale coord to length */
  636.          coord = lp_build_mul(coord_bld, coord, length_f);
  637.       }
  638.       if (offset) {
  639.          offset = lp_build_int_to_float(coord_bld, offset);
  640.          coord = lp_build_add(coord_bld, coord, offset);
  641.       }
  642.       coord = lp_build_abs(coord_bld, coord);
  643.  
  644.       /* itrunc == ifloor here */
  645.       icoord = lp_build_itrunc(coord_bld, coord);
  646.       break;
  647.  
  648.    default:
  649.       assert(0);
  650.       icoord = NULL;
  651.    }
  652.  
  653.    return icoord;
  654. }
  655.  
  656.  
  657. /**
  658.  * Do shadow test/comparison.
  659.  * \param p shadow ref value
  660.  * \param texel  the texel to compare against
  661.  */
  662. static LLVMValueRef
  663. lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
  664.                             LLVMValueRef p,
  665.                             LLVMValueRef texel)
  666. {
  667.    struct lp_build_context *texel_bld = &bld->texel_bld;
  668.    LLVMValueRef res;
  669.  
  670.    if (0) {
  671.       //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
  672.       lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
  673.    }
  674.  
  675.    /* result = (p FUNC texel) ? 1 : 0 */
  676.    /*
  677.     * honor d3d10 floating point rules here, which state that comparisons
  678.     * are ordered except NOT_EQUAL which is unordered.
  679.     */
  680.    if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
  681.       res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
  682.                                  p, texel);
  683.    }
  684.    else {
  685.       res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
  686.                          p, texel);
  687.    }
  688.    return res;
  689. }
  690.  
  691.  
  692. /**
  693.  * Generate code to sample a mipmap level with nearest filtering.
  694.  * If sampling a cube texture, r = cube face in [0,5].
  695.  */
  696. static void
  697. lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
  698.                               LLVMValueRef size,
  699.                               LLVMValueRef row_stride_vec,
  700.                               LLVMValueRef img_stride_vec,
  701.                               LLVMValueRef data_ptr,
  702.                               LLVMValueRef mipoffsets,
  703.                               LLVMValueRef *coords,
  704.                               const LLVMValueRef *offsets,
  705.                               LLVMValueRef colors_out[4])
  706. {
  707.    const unsigned dims = bld->dims;
  708.    LLVMValueRef width_vec;
  709.    LLVMValueRef height_vec;
  710.    LLVMValueRef depth_vec;
  711.    LLVMValueRef flt_size;
  712.    LLVMValueRef flt_width_vec;
  713.    LLVMValueRef flt_height_vec;
  714.    LLVMValueRef flt_depth_vec;
  715.    LLVMValueRef x, y = NULL, z = NULL;
  716.  
  717.    lp_build_extract_image_sizes(bld,
  718.                                 &bld->int_size_bld,
  719.                                 bld->int_coord_type,
  720.                                 size,
  721.                                 &width_vec, &height_vec, &depth_vec);
  722.  
  723.    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
  724.  
  725.    lp_build_extract_image_sizes(bld,
  726.                                 &bld->float_size_bld,
  727.                                 bld->coord_type,
  728.                                 flt_size,
  729.                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
  730.  
  731.    /*
  732.     * Compute integer texcoords.
  733.     */
  734.    x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
  735.                                     flt_width_vec, offsets[0],
  736.                                     bld->static_texture_state->pot_width,
  737.                                     bld->static_sampler_state->wrap_s);
  738.    lp_build_name(x, "tex.x.wrapped");
  739.  
  740.    if (dims >= 2) {
  741.       y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
  742.                                        flt_height_vec, offsets[1],
  743.                                        bld->static_texture_state->pot_height,
  744.                                        bld->static_sampler_state->wrap_t);
  745.       lp_build_name(y, "tex.y.wrapped");
  746.  
  747.       if (dims == 3) {
  748.          z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
  749.                                           flt_depth_vec, offsets[2],
  750.                                           bld->static_texture_state->pot_depth,
  751.                                           bld->static_sampler_state->wrap_r);
  752.          lp_build_name(z, "tex.z.wrapped");
  753.       }
  754.    }
  755.    if (has_layer_coord(bld->static_texture_state->target)) {
  756.       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
  757.          /* add cube layer to face */
  758.          z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
  759.       }
  760.       else {
  761.          z = coords[2];
  762.       }
  763.       lp_build_name(z, "tex.z.layer");
  764.    }
  765.  
  766.    /*
  767.     * Get texture colors.
  768.     */
  769.    lp_build_sample_texel_soa(bld,
  770.                              width_vec, height_vec, depth_vec,
  771.                              x, y, z,
  772.                              row_stride_vec, img_stride_vec,
  773.                              data_ptr, mipoffsets, colors_out);
  774.  
  775.    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
  776.       LLVMValueRef cmpval;
  777.       cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
  778.       /* this is really just a AND 1.0, cmpval but llvm is clever enough */
  779.       colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
  780.                                       bld->texel_bld.one, bld->texel_bld.zero);
  781.       colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
  782.    }
  783.  
  784. }
  785.  
  786.  
  787. /**
  788.  * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
  789.  */
  790. static LLVMValueRef
  791. lp_build_masklerp(struct lp_build_context *bld,
  792.                  LLVMValueRef weight,
  793.                  LLVMValueRef mask0,
  794.                  LLVMValueRef mask1)
  795. {
  796.    struct gallivm_state *gallivm = bld->gallivm;
  797.    LLVMBuilderRef builder = gallivm->builder;
  798.    LLVMValueRef weight2;
  799.  
  800.    weight2 = lp_build_sub(bld, bld->one, weight);
  801.    weight = LLVMBuildBitCast(builder, weight,
  802.                               lp_build_int_vec_type(gallivm, bld->type), "");
  803.    weight2 = LLVMBuildBitCast(builder, weight2,
  804.                               lp_build_int_vec_type(gallivm, bld->type), "");
  805.    weight = LLVMBuildAnd(builder, weight, mask1, "");
  806.    weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
  807.    weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
  808.    weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
  809.    return lp_build_add(bld, weight, weight2);
  810. }
  811.  
  812. /**
  813.  * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
  814.  */
  815. static LLVMValueRef
  816. lp_build_masklerp2d(struct lp_build_context *bld,
  817.                     LLVMValueRef weight0,
  818.                     LLVMValueRef weight1,
  819.                     LLVMValueRef mask00,
  820.                     LLVMValueRef mask01,
  821.                     LLVMValueRef mask10,
  822.                     LLVMValueRef mask11)
  823. {
  824.    LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
  825.    LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
  826.    return lp_build_lerp(bld, weight1, val0, val1, 0);
  827. }
  828.  
  829. /*
  830.  * this is a bit excessive code for something OpenGL just recommends
  831.  * but does not require.
  832.  */
  833. #define ACCURATE_CUBE_CORNERS 1
  834.  
  835. /**
  836.  * Generate code to sample a mipmap level with linear filtering.
  837.  * If sampling a cube texture, r = cube face in [0,5].
  838.  * If linear_mask is present, only pixels having their mask set
  839.  * will receive linear filtering, the rest will use nearest.
  840.  */
  841. static void
  842. lp_build_sample_image_linear(struct lp_build_sample_context *bld,
  843.                              boolean is_gather,
  844.                              LLVMValueRef size,
  845.                              LLVMValueRef linear_mask,
  846.                              LLVMValueRef row_stride_vec,
  847.                              LLVMValueRef img_stride_vec,
  848.                              LLVMValueRef data_ptr,
  849.                              LLVMValueRef mipoffsets,
  850.                              LLVMValueRef *coords,
  851.                              const LLVMValueRef *offsets,
  852.                              LLVMValueRef colors_out[4])
  853. {
  854.    LLVMBuilderRef builder = bld->gallivm->builder;
  855.    struct lp_build_context *ivec_bld = &bld->int_coord_bld;
  856.    struct lp_build_context *coord_bld = &bld->coord_bld;
  857.    struct lp_build_context *texel_bld = &bld->texel_bld;
  858.    const unsigned dims = bld->dims;
  859.    LLVMValueRef width_vec;
  860.    LLVMValueRef height_vec;
  861.    LLVMValueRef depth_vec;
  862.    LLVMValueRef flt_size;
  863.    LLVMValueRef flt_width_vec;
  864.    LLVMValueRef flt_height_vec;
  865.    LLVMValueRef flt_depth_vec;
  866.    LLVMValueRef fall_off[4], have_corners;
  867.    LLVMValueRef z1 = NULL;
  868.    LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
  869.    LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
  870.    LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
  871.    LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
  872.    LLVMValueRef xs[4], ys[4], zs[4];
  873.    LLVMValueRef neighbors[2][2][4];
  874.    int chan, texel_index;
  875.    boolean seamless_cube_filter, accurate_cube_corners;
  876.  
  877.    seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
  878.                            bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
  879.                           bld->static_sampler_state->seamless_cube_map;
  880.    /*
  881.     * XXX I don't know how this is really supposed to work with gather. From GL
  882.     * spec wording (not gather specific) it sounds like the 4th missing texel
  883.     * should be an average of the other 3, hence for gather could return this.
  884.     * This is however NOT how the code here works, which just fixes up the
  885.     * weights used for filtering instead. And of course for gather there is
  886.     * no filter to tweak...
  887.     */
  888.    accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
  889.                            !is_gather;
  890.  
  891.    lp_build_extract_image_sizes(bld,
  892.                                 &bld->int_size_bld,
  893.                                 bld->int_coord_type,
  894.                                 size,
  895.                                 &width_vec, &height_vec, &depth_vec);
  896.  
  897.    flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
  898.  
  899.    lp_build_extract_image_sizes(bld,
  900.                                 &bld->float_size_bld,
  901.                                 bld->coord_type,
  902.                                 flt_size,
  903.                                 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
  904.  
  905.    /*
  906.     * Compute integer texcoords.
  907.     */
  908.  
  909.    if (!seamless_cube_filter) {
  910.       lp_build_sample_wrap_linear(bld, coords[0], width_vec,
  911.                                   flt_width_vec, offsets[0],
  912.                                   bld->static_texture_state->pot_width,
  913.                                   bld->static_sampler_state->wrap_s,
  914.                                   &x00, &x01, &s_fpart);
  915.       lp_build_name(x00, "tex.x0.wrapped");
  916.       lp_build_name(x01, "tex.x1.wrapped");
  917.       x10 = x00;
  918.       x11 = x01;
  919.  
  920.       if (dims >= 2) {
  921.          lp_build_sample_wrap_linear(bld, coords[1], height_vec,
  922.                                      flt_height_vec, offsets[1],
  923.                                      bld->static_texture_state->pot_height,
  924.                                      bld->static_sampler_state->wrap_t,
  925.                                      &y00, &y10, &t_fpart);
  926.          lp_build_name(y00, "tex.y0.wrapped");
  927.          lp_build_name(y10, "tex.y1.wrapped");
  928.          y01 = y00;
  929.          y11 = y10;
  930.  
  931.          if (dims == 3) {
  932.             lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
  933.                                         flt_depth_vec, offsets[2],
  934.                                         bld->static_texture_state->pot_depth,
  935.                                         bld->static_sampler_state->wrap_r,
  936.                                         &z00, &z1, &r_fpart);
  937.             z01 = z10 = z11 = z00;
  938.             lp_build_name(z00, "tex.z0.wrapped");
  939.             lp_build_name(z1, "tex.z1.wrapped");
  940.          }
  941.       }
  942.       if (has_layer_coord(bld->static_texture_state->target)) {
  943.          if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
  944.             /* add cube layer to face */
  945.             z00 = z01 = z10 = z11 = z1 =
  946.                lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
  947.          }
  948.          else {
  949.             z00 = z01 = z10 = z11 = z1 = coords[2];  /* cube face or layer */
  950.          }
  951.          lp_build_name(z00, "tex.z0.layer");
  952.          lp_build_name(z1, "tex.z1.layer");
  953.       }
  954.    }
  955.    else {
  956.       struct lp_build_if_state edge_if;
  957.       LLVMTypeRef int1t;
  958.       LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
  959.       LLVMValueRef coord, have_edge, have_corner;
  960.       LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
  961.       LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
  962.       LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
  963.       LLVMValueRef face = coords[2];
  964.       LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
  965.       LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
  966.       /* XXX drop height calcs. Could (should) do this without seamless filtering too */
  967.       height_vec = width_vec;
  968.       flt_height_vec = flt_width_vec;
  969.  
  970.       /* XXX the overflow logic is actually sort of duplicated with trilinear,
  971.        * since an overflow in one mip should also have a corresponding overflow
  972.        * in another.
  973.        */
  974.       /* should always have normalized coords, and offsets are undefined */
  975.       assert(bld->static_sampler_state->normalized_coords);
  976.       coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
  977.       /* instead of clamp, build mask if overflowed */
  978.       coord = lp_build_sub(coord_bld, coord, half);
  979.       /* convert to int, compute lerp weight */
  980.       /* not ideal with AVX (and no AVX2) */
  981.       lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
  982.       x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
  983.       coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
  984.       coord = lp_build_sub(coord_bld, coord, half);
  985.       lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
  986.       y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
  987.  
  988.       fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
  989.       fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
  990.       fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
  991.       fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
  992.  
  993.       fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
  994.       fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
  995.       have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
  996.       have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
  997.  
  998.       /* needed for accurate corner filtering branch later, rely on 0 init */
  999.       int1t = LLVMInt1TypeInContext(bld->gallivm->context);
  1000.       have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
  1001.  
  1002.       for (texel_index = 0; texel_index < 4; texel_index++) {
  1003.          xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
  1004.          ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
  1005.          zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
  1006.       }
  1007.  
  1008.       lp_build_if(&edge_if, bld->gallivm, have_edge);
  1009.  
  1010.       have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
  1011.       have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
  1012.       LLVMBuildStore(builder, have_corner, have_corners);
  1013.  
  1014.       /*
  1015.        * Need to feed clamped values here for cheap corner handling,
  1016.        * but only for y coord (as when falling off both edges we only
  1017.        * fall off the x one) - this should be sufficient.
  1018.        */
  1019.       y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
  1020.       y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
  1021.  
  1022.       /*
  1023.        * Get all possible new coords.
  1024.        */
  1025.       lp_build_cube_new_coords(ivec_bld, face,
  1026.                                x0, x1, y0_clamped, y1_clamped,
  1027.                                length_minus_one,
  1028.                                new_faces, new_xcoords, new_ycoords);
  1029.  
  1030.       /* handle fall off x-, x+ direction */
  1031.       /* determine new coords, face (not both fall_off vars can be true at same time) */
  1032.       x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
  1033.       y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
  1034.       x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
  1035.       y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
  1036.       x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
  1037.       y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
  1038.       x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
  1039.       y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
  1040.  
  1041.       z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
  1042.       z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
  1043.  
  1044.       /* handle fall off y-, y+ direction */
  1045.       /*
  1046.        * Cheap corner logic: just hack up things so a texel doesn't fall
  1047.        * off both sides (which means filter weights will be wrong but we'll only
  1048.        * use valid texels in the filter).
  1049.        * This means however (y) coords must additionally be clamped (see above).
  1050.        * This corner handling should be fully OpenGL (but not d3d10) compliant.
  1051.        */
  1052.       fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
  1053.       fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
  1054.       fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
  1055.       fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
  1056.  
  1057.       x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
  1058.       y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
  1059.       x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
  1060.       y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
  1061.       x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
  1062.       y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
  1063.       x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
  1064.       y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
  1065.  
  1066.       z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
  1067.       z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
  1068.       z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
  1069.       z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
  1070.  
  1071.       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
  1072.          /* now can add cube layer to face (per sample) */
  1073.          z00 = lp_build_add(ivec_bld, z00, coords[3]);
  1074.          z01 = lp_build_add(ivec_bld, z01, coords[3]);
  1075.          z10 = lp_build_add(ivec_bld, z10, coords[3]);
  1076.          z11 = lp_build_add(ivec_bld, z11, coords[3]);
  1077.       }
  1078.  
  1079.       LLVMBuildStore(builder, x00, xs[0]);
  1080.       LLVMBuildStore(builder, x01, xs[1]);
  1081.       LLVMBuildStore(builder, x10, xs[2]);
  1082.       LLVMBuildStore(builder, x11, xs[3]);
  1083.       LLVMBuildStore(builder, y00, ys[0]);
  1084.       LLVMBuildStore(builder, y01, ys[1]);
  1085.       LLVMBuildStore(builder, y10, ys[2]);
  1086.       LLVMBuildStore(builder, y11, ys[3]);
  1087.       LLVMBuildStore(builder, z00, zs[0]);
  1088.       LLVMBuildStore(builder, z01, zs[1]);
  1089.       LLVMBuildStore(builder, z10, zs[2]);
  1090.       LLVMBuildStore(builder, z11, zs[3]);
  1091.  
  1092.       lp_build_else(&edge_if);
  1093.  
  1094.       LLVMBuildStore(builder, x0, xs[0]);
  1095.       LLVMBuildStore(builder, x1, xs[1]);
  1096.       LLVMBuildStore(builder, x0, xs[2]);
  1097.       LLVMBuildStore(builder, x1, xs[3]);
  1098.       LLVMBuildStore(builder, y0, ys[0]);
  1099.       LLVMBuildStore(builder, y0, ys[1]);
  1100.       LLVMBuildStore(builder, y1, ys[2]);
  1101.       LLVMBuildStore(builder, y1, ys[3]);
  1102.       if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
  1103.          LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
  1104.          LLVMBuildStore(builder, cube_layer, zs[0]);
  1105.          LLVMBuildStore(builder, cube_layer, zs[1]);
  1106.          LLVMBuildStore(builder, cube_layer, zs[2]);
  1107.          LLVMBuildStore(builder, cube_layer, zs[3]);
  1108.       }
  1109.       else {
  1110.          LLVMBuildStore(builder, face, zs[0]);
  1111.          LLVMBuildStore(builder, face, zs[1]);
  1112.          LLVMBuildStore(builder, face, zs[2]);
  1113.          LLVMBuildStore(builder, face, zs[3]);
  1114.       }
  1115.  
  1116.       lp_build_endif(&edge_if);
  1117.  
  1118.       x00 = LLVMBuildLoad(builder, xs[0], "");
  1119.       x01 = LLVMBuildLoad(builder, xs[1], "");
  1120.       x10 = LLVMBuildLoad(builder, xs[2], "");
  1121.       x11 = LLVMBuildLoad(builder, xs[3], "");
  1122.       y00 = LLVMBuildLoad(builder, ys[0], "");
  1123.       y01 = LLVMBuildLoad(builder, ys[1], "");
  1124.       y10 = LLVMBuildLoad(builder, ys[2], "");
  1125.       y11 = LLVMBuildLoad(builder, ys[3], "");
  1126.       z00 = LLVMBuildLoad(builder, zs[0], "");
  1127.       z01 = LLVMBuildLoad(builder, zs[1], "");
  1128.       z10 = LLVMBuildLoad(builder, zs[2], "");
  1129.       z11 = LLVMBuildLoad(builder, zs[3], "");
  1130.    }
  1131.  
  1132.    if (linear_mask) {
  1133.       /*
  1134.        * Whack filter weights into place. Whatever texel had more weight is
  1135.        * the one which should have been selected by nearest filtering hence
  1136.        * just use 100% weight for it.
  1137.        */
  1138.       struct lp_build_context *c_bld = &bld->coord_bld;
  1139.       LLVMValueRef w1_mask, w1_weight;
  1140.       LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
  1141.  
  1142.       w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
  1143.       /* this select is really just a "and" */
  1144.       w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
  1145.       s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
  1146.       if (dims >= 2) {
  1147.          w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
  1148.          w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
  1149.          t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
  1150.          if (dims == 3) {
  1151.             w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
  1152.             w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
  1153.             r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
  1154.          }
  1155.       }
  1156.    }
  1157.  
  1158.    /*
  1159.     * Get texture colors.
  1160.     */
  1161.    /* get x0/x1 texels */
  1162.    lp_build_sample_texel_soa(bld,
  1163.                              width_vec, height_vec, depth_vec,
  1164.                              x00, y00, z00,
  1165.                              row_stride_vec, img_stride_vec,
  1166.                              data_ptr, mipoffsets, neighbors[0][0]);
  1167.    lp_build_sample_texel_soa(bld,
  1168.                              width_vec, height_vec, depth_vec,
  1169.                              x01, y01, z01,
  1170.                              row_stride_vec, img_stride_vec,
  1171.                              data_ptr, mipoffsets, neighbors[0][1]);
  1172.  
  1173.    if (dims == 1) {
  1174.       assert(!is_gather);
  1175.       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
  1176.          /* Interpolate two samples from 1D image to produce one color */
  1177.          for (chan = 0; chan < 4; chan++) {
  1178.             colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
  1179.                                              neighbors[0][0][chan],
  1180.                                              neighbors[0][1][chan],
  1181.                                              0);
  1182.          }
  1183.       }
  1184.       else {
  1185.          LLVMValueRef cmpval0, cmpval1;
  1186.          cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
  1187.          cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
  1188.          /* simplified lerp, AND mask with weight and add */
  1189.          colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
  1190.                                            cmpval0, cmpval1);
  1191.          colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
  1192.       }
  1193.    }
  1194.    else {
  1195.       /* 2D/3D texture */
  1196.       struct lp_build_if_state corner_if;
  1197.       LLVMValueRef colors0[4], colorss[4];
  1198.  
  1199.       /* get x0/x1 texels at y1 */
  1200.       lp_build_sample_texel_soa(bld,
  1201.                                 width_vec, height_vec, depth_vec,
  1202.                                 x10, y10, z10,
  1203.                                 row_stride_vec, img_stride_vec,
  1204.                                 data_ptr, mipoffsets, neighbors[1][0]);
  1205.       lp_build_sample_texel_soa(bld,
  1206.                                 width_vec, height_vec, depth_vec,
  1207.                                 x11, y11, z11,
  1208.                                 row_stride_vec, img_stride_vec,
  1209.                                 data_ptr, mipoffsets, neighbors[1][1]);
  1210.  
  1211.       /*
  1212.        * To avoid having to duplicate linear_mask / fetch code use
  1213.        * another branch (with corner condition though edge would work
  1214.        * as well) here.
  1215.        */
  1216.       if (accurate_cube_corners) {
  1217.          LLVMValueRef w00, w01, w10, w11, wx0, wy0;
  1218.          LLVMValueRef c_weight, c00, c01, c10, c11;
  1219.          LLVMValueRef have_corner, one_third, tmp;
  1220.  
  1221.          colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
  1222.          colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
  1223.          colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
  1224.          colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
  1225.  
  1226.          have_corner = LLVMBuildLoad(builder, have_corners, "");
  1227.  
  1228.          lp_build_if(&corner_if, bld->gallivm, have_corner);
  1229.  
  1230.          /*
  1231.           * we can't use standard 2d lerp as we need per-element weight
  1232.           * in case of corners, so just calculate bilinear result as
  1233.           * w00*s00 + w01*s01 + w10*s10 + w11*s11.
  1234.           * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
  1235.           * however calculating the weights needs another 6, so actually probably
  1236.           * not slower than 2d lerp only for 4 channels as weights only need
  1237.           * to be calculated once - of course fixing the weights has additional cost.)
  1238.           */
  1239.          wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
  1240.          wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
  1241.          w00 = lp_build_mul(coord_bld, wx0, wy0);
  1242.          w01 = lp_build_mul(coord_bld, s_fpart, wy0);
  1243.          w10 = lp_build_mul(coord_bld, wx0, t_fpart);
  1244.          w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
  1245.  
  1246.          /* find corner weight */
  1247.          c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
  1248.          c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
  1249.          c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
  1250.          c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
  1251.          c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
  1252.          c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
  1253.          c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
  1254.          c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
  1255.  
  1256.          /*
  1257.           * add 1/3 of the corner weight to each of the 3 other samples
  1258.           * and null out corner weight
  1259.           */
  1260.          one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
  1261.          c_weight = lp_build_mul(coord_bld, c_weight, one_third);
  1262.          w00 = lp_build_add(coord_bld, w00, c_weight);
  1263.          c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
  1264.          w00 = lp_build_andnot(coord_bld, w00, c00);
  1265.          w01 = lp_build_add(coord_bld, w01, c_weight);
  1266.          c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
  1267.          w01 = lp_build_andnot(coord_bld, w01, c01);
  1268.          w10 = lp_build_add(coord_bld, w10, c_weight);
  1269.          c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
  1270.          w10 = lp_build_andnot(coord_bld, w10, c10);
  1271.          w11 = lp_build_add(coord_bld, w11, c_weight);
  1272.          c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
  1273.          w11 = lp_build_andnot(coord_bld, w11, c11);
  1274.  
  1275.          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
  1276.             for (chan = 0; chan < 4; chan++) {
  1277.                colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
  1278.                tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
  1279.                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
  1280.                tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
  1281.                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
  1282.                tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
  1283.                colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
  1284.             }
  1285.          }
  1286.          else {
  1287.             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
  1288.             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
  1289.             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
  1290.             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
  1291.             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
  1292.             /* inputs to interpolation are just masks so just add masked weights together */
  1293.             cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
  1294.             cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
  1295.             cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
  1296.             cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
  1297.             colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
  1298.             tmp = lp_build_and(coord_bld, w01, cmpval01);
  1299.             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
  1300.             tmp = lp_build_and(coord_bld, w10, cmpval10);
  1301.             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
  1302.             tmp = lp_build_and(coord_bld, w11, cmpval11);
  1303.             colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
  1304.             colors0[1] = colors0[2] = colors0[3] = colors0[0];
  1305.          }
  1306.  
  1307.          LLVMBuildStore(builder, colors0[0], colorss[0]);
  1308.          LLVMBuildStore(builder, colors0[1], colorss[1]);
  1309.          LLVMBuildStore(builder, colors0[2], colorss[2]);
  1310.          LLVMBuildStore(builder, colors0[3], colorss[3]);
  1311.  
  1312.          lp_build_else(&corner_if);
  1313.       }
  1314.  
  1315.       if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
  1316.          if (is_gather) {
  1317.             /*
  1318.              * Just assign the red channel (no component selection yet).
  1319.              * This is a bit hackish, we usually do the swizzle at the
  1320.              * end of sampling (much less values to swizzle), but this
  1321.              * obviously cannot work when using gather.
  1322.              */
  1323.             unsigned chan_swiz = bld->static_texture_state->swizzle_r;
  1324.             colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
  1325.                                                       neighbors[1][0],
  1326.                                                       chan_swiz);
  1327.             colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
  1328.                                                       neighbors[1][1],
  1329.                                                       chan_swiz);
  1330.             colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
  1331.                                                       neighbors[0][1],
  1332.                                                       chan_swiz);
  1333.             colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
  1334.                                                       neighbors[0][0],
  1335.                                                       chan_swiz);
  1336.          }
  1337.          else {
  1338.             /* Bilinear interpolate the four samples from the 2D image / 3D slice */
  1339.             for (chan = 0; chan < 4; chan++) {
  1340.                colors0[chan] = lp_build_lerp_2d(texel_bld,
  1341.                                                 s_fpart, t_fpart,
  1342.                                                 neighbors[0][0][chan],
  1343.                                                 neighbors[0][1][chan],
  1344.                                                 neighbors[1][0][chan],
  1345.                                                 neighbors[1][1][chan],
  1346.                                                 0);
  1347.             }
  1348.          }
  1349.       }
  1350.       else {
  1351.          LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
  1352.          cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
  1353.          cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
  1354.          cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
  1355.          cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
  1356.  
  1357.          if (is_gather) {
  1358.             /* more hacks for swizzling, should be X, ONE or ZERO... */
  1359.             unsigned chan_swiz = bld->static_texture_state->swizzle_r;
  1360.             if (chan_swiz <= PIPE_SWIZZLE_ALPHA) {
  1361.                colors0[0] = lp_build_select(texel_bld, cmpval10,
  1362.                                             texel_bld->one, texel_bld->zero);
  1363.                colors0[1] = lp_build_select(texel_bld, cmpval11,
  1364.                                             texel_bld->one, texel_bld->zero);
  1365.                colors0[2] = lp_build_select(texel_bld, cmpval01,
  1366.                                             texel_bld->one, texel_bld->zero);
  1367.                colors0[3] = lp_build_select(texel_bld, cmpval00,
  1368.                                             texel_bld->one, texel_bld->zero);
  1369.             }
  1370.             else if (chan_swiz == PIPE_SWIZZLE_ZERO) {
  1371.                colors0[0] = colors0[1] = colors0[2] = colors0[3] =
  1372.                             texel_bld->zero;
  1373.             }
  1374.             else {
  1375.                colors0[0] = colors0[1] = colors0[2] = colors0[3] =
  1376.                             texel_bld->one;
  1377.             }
  1378.          }
  1379.          else {
  1380.             colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
  1381.                                              cmpval00, cmpval01, cmpval10, cmpval11);
  1382.             colors0[1] = colors0[2] = colors0[3] = colors0[0];
  1383.          }
  1384.       }
  1385.  
  1386.       if (accurate_cube_corners) {
  1387.          LLVMBuildStore(builder, colors0[0], colorss[0]);
  1388.          LLVMBuildStore(builder, colors0[1], colorss[1]);
  1389.          LLVMBuildStore(builder, colors0[2], colorss[2]);
  1390.          LLVMBuildStore(builder, colors0[3], colorss[3]);
  1391.  
  1392.          lp_build_endif(&corner_if);
  1393.  
  1394.          colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
  1395.          colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
  1396.          colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
  1397.          colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
  1398.       }
  1399.  
  1400.       if (dims == 3) {
  1401.          LLVMValueRef neighbors1[2][2][4];
  1402.          LLVMValueRef colors1[4];
  1403.  
  1404.          assert(!is_gather);
  1405.  
  1406.          /* get x0/x1/y0/y1 texels at z1 */
  1407.          lp_build_sample_texel_soa(bld,
  1408.                                    width_vec, height_vec, depth_vec,
  1409.                                    x00, y00, z1,
  1410.                                    row_stride_vec, img_stride_vec,
  1411.                                    data_ptr, mipoffsets, neighbors1[0][0]);
  1412.          lp_build_sample_texel_soa(bld,
  1413.                                    width_vec, height_vec, depth_vec,
  1414.                                    x01, y01, z1,
  1415.                                    row_stride_vec, img_stride_vec,
  1416.                                    data_ptr, mipoffsets, neighbors1[0][1]);
  1417.          lp_build_sample_texel_soa(bld,
  1418.                                    width_vec, height_vec, depth_vec,
  1419.                                    x10, y10, z1,
  1420.                                    row_stride_vec, img_stride_vec,
  1421.                                    data_ptr, mipoffsets, neighbors1[1][0]);
  1422.          lp_build_sample_texel_soa(bld,
  1423.                                    width_vec, height_vec, depth_vec,
  1424.                                    x11, y11, z1,
  1425.                                    row_stride_vec, img_stride_vec,
  1426.                                    data_ptr, mipoffsets, neighbors1[1][1]);
  1427.  
  1428.          if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
  1429.             /* Bilinear interpolate the four samples from the second Z slice */
  1430.             for (chan = 0; chan < 4; chan++) {
  1431.                colors1[chan] = lp_build_lerp_2d(texel_bld,
  1432.                                                 s_fpart, t_fpart,
  1433.                                                 neighbors1[0][0][chan],
  1434.                                                 neighbors1[0][1][chan],
  1435.                                                 neighbors1[1][0][chan],
  1436.                                                 neighbors1[1][1][chan],
  1437.                                                 0);
  1438.             }
  1439.             /* Linearly interpolate the two samples from the two 3D slices */
  1440.             for (chan = 0; chan < 4; chan++) {
  1441.                colors_out[chan] = lp_build_lerp(texel_bld,
  1442.                                                 r_fpart,
  1443.                                                 colors0[chan], colors1[chan],
  1444.                                                 0);
  1445.             }
  1446.          }
  1447.          else {
  1448.             LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
  1449.             cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
  1450.             cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
  1451.             cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
  1452.             cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
  1453.             colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
  1454.                                              cmpval00, cmpval01, cmpval10, cmpval11);
  1455.             /* Linearly interpolate the two samples from the two 3D slices */
  1456.             colors_out[0] = lp_build_lerp(texel_bld,
  1457.                                           r_fpart,
  1458.                                           colors0[0], colors1[0],
  1459.                                           0);
  1460.             colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
  1461.          }
  1462.       }
  1463.       else {
  1464.          /* 2D tex */
  1465.          for (chan = 0; chan < 4; chan++) {
  1466.             colors_out[chan] = colors0[chan];
  1467.          }
  1468.       }
  1469.    }
  1470. }
  1471.  
  1472.  
  1473. /**
  1474.  * Sample the texture/mipmap using given image filter and mip filter.
  1475.  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
  1476.  * from (vectors or scalars).
  1477.  * If we're using nearest miplevel sampling the '1' values will be null/unused.
  1478.  */
  1479. static void
  1480. lp_build_sample_mipmap(struct lp_build_sample_context *bld,
  1481.                        unsigned img_filter,
  1482.                        unsigned mip_filter,
  1483.                        boolean is_gather,
  1484.                        LLVMValueRef *coords,
  1485.                        const LLVMValueRef *offsets,
  1486.                        LLVMValueRef ilevel0,
  1487.                        LLVMValueRef ilevel1,
  1488.                        LLVMValueRef lod_fpart,
  1489.                        LLVMValueRef *colors_out)
  1490. {
  1491.    LLVMBuilderRef builder = bld->gallivm->builder;
  1492.    LLVMValueRef size0 = NULL;
  1493.    LLVMValueRef size1 = NULL;
  1494.    LLVMValueRef row_stride0_vec = NULL;
  1495.    LLVMValueRef row_stride1_vec = NULL;
  1496.    LLVMValueRef img_stride0_vec = NULL;
  1497.    LLVMValueRef img_stride1_vec = NULL;
  1498.    LLVMValueRef data_ptr0 = NULL;
  1499.    LLVMValueRef data_ptr1 = NULL;
  1500.    LLVMValueRef mipoff0 = NULL;
  1501.    LLVMValueRef mipoff1 = NULL;
  1502.    LLVMValueRef colors0[4], colors1[4];
  1503.    unsigned chan;
  1504.  
  1505.    /* sample the first mipmap level */
  1506.    lp_build_mipmap_level_sizes(bld, ilevel0,
  1507.                                &size0,
  1508.                                &row_stride0_vec, &img_stride0_vec);
  1509.    if (bld->num_mips == 1) {
  1510.       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
  1511.    }
  1512.    else {
  1513.       /* This path should work for num_lods 1 too but slightly less efficient */
  1514.       data_ptr0 = bld->base_ptr;
  1515.       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
  1516.    }
  1517.    if (img_filter == PIPE_TEX_FILTER_NEAREST) {
  1518.       lp_build_sample_image_nearest(bld, size0,
  1519.                                     row_stride0_vec, img_stride0_vec,
  1520.                                     data_ptr0, mipoff0, coords, offsets,
  1521.                                     colors0);
  1522.    }
  1523.    else {
  1524.       assert(img_filter == PIPE_TEX_FILTER_LINEAR);
  1525.       lp_build_sample_image_linear(bld, is_gather, size0, NULL,
  1526.                                    row_stride0_vec, img_stride0_vec,
  1527.                                    data_ptr0, mipoff0, coords, offsets,
  1528.                                    colors0);
  1529.    }
  1530.  
  1531.    /* Store the first level's colors in the output variables */
  1532.    for (chan = 0; chan < 4; chan++) {
  1533.        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
  1534.    }
  1535.  
  1536.    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
  1537.       struct lp_build_if_state if_ctx;
  1538.       LLVMValueRef need_lerp;
  1539.  
  1540.       /* need_lerp = lod_fpart > 0 */
  1541.       if (bld->num_lods == 1) {
  1542.          need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
  1543.                                    lod_fpart, bld->lodf_bld.zero,
  1544.                                    "need_lerp");
  1545.       }
  1546.       else {
  1547.          /*
  1548.           * We'll do mip filtering if any of the quads (or individual
  1549.           * pixel in case of per-pixel lod) need it.
  1550.           * It might be better to split the vectors here and only fetch/filter
  1551.           * quads which need it (if there's one lod per quad).
  1552.           */
  1553.          need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
  1554.                                       PIPE_FUNC_GREATER,
  1555.                                       lod_fpart, bld->lodf_bld.zero);
  1556.          need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
  1557.       }
  1558.  
  1559.       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
  1560.       {
  1561.          /*
  1562.           * We unfortunately need to clamp lod_fpart here since we can get
  1563.           * negative values which would screw up filtering if not all
  1564.           * lod_fpart values have same sign.
  1565.           */
  1566.          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
  1567.                                   bld->lodf_bld.zero);
  1568.          /* sample the second mipmap level */
  1569.          lp_build_mipmap_level_sizes(bld, ilevel1,
  1570.                                      &size1,
  1571.                                      &row_stride1_vec, &img_stride1_vec);
  1572.          if (bld->num_mips == 1) {
  1573.             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
  1574.          }
  1575.          else {
  1576.             data_ptr1 = bld->base_ptr;
  1577.             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
  1578.          }
  1579.          if (img_filter == PIPE_TEX_FILTER_NEAREST) {
  1580.             lp_build_sample_image_nearest(bld, size1,
  1581.                                           row_stride1_vec, img_stride1_vec,
  1582.                                           data_ptr1, mipoff1, coords, offsets,
  1583.                                           colors1);
  1584.          }
  1585.          else {
  1586.             lp_build_sample_image_linear(bld, FALSE, size1, NULL,
  1587.                                          row_stride1_vec, img_stride1_vec,
  1588.                                          data_ptr1, mipoff1, coords, offsets,
  1589.                                          colors1);
  1590.          }
  1591.  
  1592.          /* interpolate samples from the two mipmap levels */
  1593.  
  1594.          if (bld->num_lods != bld->coord_type.length)
  1595.             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
  1596.                                                               bld->lodf_bld.type,
  1597.                                                               bld->texel_bld.type,
  1598.                                                               lod_fpart);
  1599.  
  1600.          for (chan = 0; chan < 4; chan++) {
  1601.             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
  1602.                                           colors0[chan], colors1[chan],
  1603.                                           0);
  1604.             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
  1605.          }
  1606.       }
  1607.       lp_build_endif(&if_ctx);
  1608.    }
  1609. }
  1610.  
  1611.  
  1612. /**
  1613.  * Sample the texture/mipmap using given mip filter, and using
  1614.  * both nearest and linear filtering at the same time depending
  1615.  * on linear_mask.
  1616.  * lod can be per quad but linear_mask is always per pixel.
  1617.  * ilevel0 and ilevel1 indicate the two mipmap levels to sample
  1618.  * from (vectors or scalars).
  1619.  * If we're using nearest miplevel sampling the '1' values will be null/unused.
  1620.  */
  1621. static void
  1622. lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
  1623.                             LLVMValueRef linear_mask,
  1624.                             unsigned mip_filter,
  1625.                             LLVMValueRef *coords,
  1626.                             const LLVMValueRef *offsets,
  1627.                             LLVMValueRef ilevel0,
  1628.                             LLVMValueRef ilevel1,
  1629.                             LLVMValueRef lod_fpart,
  1630.                             LLVMValueRef lod_positive,
  1631.                             LLVMValueRef *colors_out)
  1632. {
  1633.    LLVMBuilderRef builder = bld->gallivm->builder;
  1634.    LLVMValueRef size0 = NULL;
  1635.    LLVMValueRef size1 = NULL;
  1636.    LLVMValueRef row_stride0_vec = NULL;
  1637.    LLVMValueRef row_stride1_vec = NULL;
  1638.    LLVMValueRef img_stride0_vec = NULL;
  1639.    LLVMValueRef img_stride1_vec = NULL;
  1640.    LLVMValueRef data_ptr0 = NULL;
  1641.    LLVMValueRef data_ptr1 = NULL;
  1642.    LLVMValueRef mipoff0 = NULL;
  1643.    LLVMValueRef mipoff1 = NULL;
  1644.    LLVMValueRef colors0[4], colors1[4];
  1645.    unsigned chan;
  1646.  
  1647.    /* sample the first mipmap level */
  1648.    lp_build_mipmap_level_sizes(bld, ilevel0,
  1649.                                &size0,
  1650.                                &row_stride0_vec, &img_stride0_vec);
  1651.    if (bld->num_mips == 1) {
  1652.       data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
  1653.    }
  1654.    else {
  1655.       /* This path should work for num_lods 1 too but slightly less efficient */
  1656.       data_ptr0 = bld->base_ptr;
  1657.       mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
  1658.    }
  1659.  
  1660.    lp_build_sample_image_linear(bld, FALSE, size0, linear_mask,
  1661.                                 row_stride0_vec, img_stride0_vec,
  1662.                                 data_ptr0, mipoff0, coords, offsets,
  1663.                                 colors0);
  1664.  
  1665.    /* Store the first level's colors in the output variables */
  1666.    for (chan = 0; chan < 4; chan++) {
  1667.        LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
  1668.    }
  1669.  
  1670.    if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
  1671.       struct lp_build_if_state if_ctx;
  1672.       LLVMValueRef need_lerp;
  1673.  
  1674.       /*
  1675.        * We'll do mip filtering if any of the quads (or individual
  1676.        * pixel in case of per-pixel lod) need it.
  1677.        * Note using lod_positive here not lod_fpart since it may be the same
  1678.        * condition as that used in the outer "if" in the caller hence llvm
  1679.        * should be able to merge the branches in this case.
  1680.        */
  1681.       need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
  1682.  
  1683.       lp_build_if(&if_ctx, bld->gallivm, need_lerp);
  1684.       {
  1685.          /*
  1686.           * We unfortunately need to clamp lod_fpart here since we can get
  1687.           * negative values which would screw up filtering if not all
  1688.           * lod_fpart values have same sign.
  1689.           */
  1690.          lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
  1691.                                   bld->lodf_bld.zero);
  1692.          /* sample the second mipmap level */
  1693.          lp_build_mipmap_level_sizes(bld, ilevel1,
  1694.                                      &size1,
  1695.                                      &row_stride1_vec, &img_stride1_vec);
  1696.          if (bld->num_mips == 1) {
  1697.             data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
  1698.          }
  1699.          else {
  1700.             data_ptr1 = bld->base_ptr;
  1701.             mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
  1702.          }
  1703.  
  1704.          lp_build_sample_image_linear(bld, FALSE, size1, linear_mask,
  1705.                                       row_stride1_vec, img_stride1_vec,
  1706.                                       data_ptr1, mipoff1, coords, offsets,
  1707.                                       colors1);
  1708.  
  1709.          /* interpolate samples from the two mipmap levels */
  1710.  
  1711.          if (bld->num_lods != bld->coord_type.length)
  1712.             lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
  1713.                                                               bld->lodf_bld.type,
  1714.                                                               bld->texel_bld.type,
  1715.                                                               lod_fpart);
  1716.  
  1717.          for (chan = 0; chan < 4; chan++) {
  1718.             colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
  1719.                                           colors0[chan], colors1[chan],
  1720.                                           0);
  1721.             LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
  1722.          }
  1723.       }
  1724.       lp_build_endif(&if_ctx);
  1725.    }
  1726. }
  1727.  
  1728.  
  1729. /**
  1730.  * Build (per-coord) layer value.
  1731.  * Either clamp layer to valid values or fill in optional out_of_bounds
  1732.  * value and just return value unclamped.
  1733.  */
  1734. static LLVMValueRef
  1735. lp_build_layer_coord(struct lp_build_sample_context *bld,
  1736.                      unsigned texture_unit,
  1737.                      boolean is_cube_array,
  1738.                      LLVMValueRef layer,
  1739.                      LLVMValueRef *out_of_bounds)
  1740. {
  1741.    LLVMValueRef num_layers;
  1742.    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  1743.  
  1744.    num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
  1745.                                           bld->context_ptr, texture_unit);
  1746.  
  1747.    if (out_of_bounds) {
  1748.       LLVMValueRef out1, out;
  1749.       assert(!is_cube_array);
  1750.       num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
  1751.       out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
  1752.       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
  1753.       *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
  1754.       return layer;
  1755.    }
  1756.    else {
  1757.       LLVMValueRef maxlayer;
  1758.       LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
  1759.                                        bld->int_bld.one;
  1760.       maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
  1761.       maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
  1762.       return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
  1763.    }
  1764. }
  1765.  
  1766.  
  1767. /**
  1768.  * Calculate cube face, lod, mip levels.
  1769.  */
  1770. static void
  1771. lp_build_sample_common(struct lp_build_sample_context *bld,
  1772.                        unsigned texture_index,
  1773.                        unsigned sampler_index,
  1774.                        LLVMValueRef *coords,
  1775.                        const struct lp_derivatives *derivs, /* optional */
  1776.                        LLVMValueRef lod_bias, /* optional */
  1777.                        LLVMValueRef explicit_lod, /* optional */
  1778.                        LLVMValueRef *lod_pos_or_zero,
  1779.                        LLVMValueRef *lod_fpart,
  1780.                        LLVMValueRef *ilevel0,
  1781.                        LLVMValueRef *ilevel1)
  1782. {
  1783.    const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
  1784.    const unsigned min_filter = bld->static_sampler_state->min_img_filter;
  1785.    const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
  1786.    const unsigned target = bld->static_texture_state->target;
  1787.    LLVMValueRef first_level, cube_rho = NULL;
  1788.    LLVMValueRef lod_ipart = NULL;
  1789.    struct lp_derivatives cube_derivs;
  1790.  
  1791.    /*
  1792.    printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
  1793.           mip_filter, min_filter, mag_filter);
  1794.    */
  1795.  
  1796.    /*
  1797.     * Choose cube face, recompute texcoords for the chosen face and
  1798.     * compute rho here too (as it requires transform of derivatives).
  1799.     */
  1800.    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
  1801.       boolean need_derivs;
  1802.       need_derivs = ((min_filter != mag_filter ||
  1803.                       mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
  1804.                       !bld->static_sampler_state->min_max_lod_equal &&
  1805.                       !explicit_lod);
  1806.       lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
  1807.       derivs = &cube_derivs;
  1808.       if (target == PIPE_TEXTURE_CUBE_ARRAY) {
  1809.          /* calculate cube layer coord now */
  1810.          LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
  1811.          LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
  1812.          layer = lp_build_mul(&bld->int_coord_bld, layer, six);
  1813.          coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
  1814.          /* because of seamless filtering can't add it to face (coords[2]) here. */
  1815.       }
  1816.    }
  1817.    else if (target == PIPE_TEXTURE_1D_ARRAY ||
  1818.             target == PIPE_TEXTURE_2D_ARRAY) {
  1819.       coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
  1820.       coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
  1821.    }
  1822.  
  1823.    if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
  1824.       /*
  1825.        * Clamp p coords to [0,1] for fixed function depth texture format here.
  1826.        * Technically this is not entirely correct for unorm depth as the ref value
  1827.        * should be converted to the depth format (quantization!) and comparison
  1828.        * then done in texture format. This would actually help performance (since
  1829.        * only need to do it once and could save the per-sample conversion of texels
  1830.        * to floats instead), but it would need more messy code (would need to push
  1831.        * at least some bits down to actual fetch so conversion could be skipped,
  1832.        * and would have ugly interaction with border color, would need to convert
  1833.        * border color to that format too or do some other tricks to make it work).
  1834.        */
  1835.       const struct util_format_description *format_desc = bld->format_desc;
  1836.       unsigned chan_type;
  1837.       /* not entirely sure we couldn't end up with non-valid swizzle here */
  1838.       chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
  1839.                      format_desc->channel[format_desc->swizzle[0]].type :
  1840.                      UTIL_FORMAT_TYPE_FLOAT;
  1841.       if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
  1842.          coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
  1843.                                     bld->coord_bld.zero, bld->coord_bld.one);
  1844.       }
  1845.    }
  1846.  
  1847.    /*
  1848.     * Compute the level of detail (float).
  1849.     */
  1850.    if (min_filter != mag_filter ||
  1851.        mip_filter != PIPE_TEX_MIPFILTER_NONE) {
  1852.       /* Need to compute lod either to choose mipmap levels or to
  1853.        * distinguish between minification/magnification with one mipmap level.
  1854.        */
  1855.       lp_build_lod_selector(bld, texture_index, sampler_index,
  1856.                             coords[0], coords[1], coords[2], cube_rho,
  1857.                             derivs, lod_bias, explicit_lod,
  1858.                             mip_filter,
  1859.                             &lod_ipart, lod_fpart, lod_pos_or_zero);
  1860.    } else {
  1861.       lod_ipart = bld->lodi_bld.zero;
  1862.       *lod_pos_or_zero = bld->lodi_bld.zero;
  1863.    }
  1864.  
  1865.    if (bld->num_lods != bld->num_mips) {
  1866.       /* only makes sense if there's just a single mip level */
  1867.       assert(bld->num_mips == 1);
  1868.       lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
  1869.    }
  1870.  
  1871.    /*
  1872.     * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
  1873.     */
  1874.    switch (mip_filter) {
  1875.    default:
  1876.       assert(0 && "bad mip_filter value in lp_build_sample_soa()");
  1877.       /* fall-through */
  1878.    case PIPE_TEX_MIPFILTER_NONE:
  1879.       /* always use mip level 0 */
  1880.       first_level = bld->dynamic_state->first_level(bld->dynamic_state,
  1881.                                                     bld->gallivm, bld->context_ptr,
  1882.                                                     texture_index);
  1883.       first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
  1884.       *ilevel0 = first_level;
  1885.       break;
  1886.    case PIPE_TEX_MIPFILTER_NEAREST:
  1887.       assert(lod_ipart);
  1888.       lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
  1889.       break;
  1890.    case PIPE_TEX_MIPFILTER_LINEAR:
  1891.       assert(lod_ipart);
  1892.       assert(*lod_fpart);
  1893.       lp_build_linear_mip_levels(bld, texture_index,
  1894.                                  lod_ipart, lod_fpart,
  1895.                                  ilevel0, ilevel1);
  1896.       break;
  1897.    }
  1898. }
  1899.  
  1900. static void
  1901. lp_build_clamp_border_color(struct lp_build_sample_context *bld,
  1902.                             unsigned sampler_unit)
  1903. {
  1904.    struct gallivm_state *gallivm = bld->gallivm;
  1905.    LLVMBuilderRef builder = gallivm->builder;
  1906.    LLVMValueRef border_color_ptr =
  1907.       bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
  1908.                                        bld->context_ptr, sampler_unit);
  1909.    LLVMValueRef border_color;
  1910.    const struct util_format_description *format_desc = bld->format_desc;
  1911.    struct lp_type vec4_type = bld->texel_type;
  1912.    struct lp_build_context vec4_bld;
  1913.    LLVMValueRef min_clamp = NULL;
  1914.    LLVMValueRef max_clamp = NULL;
  1915.  
  1916.    /*
  1917.     * For normalized format need to clamp border color (technically
  1918.     * probably should also quantize the data). Really sucks doing this
  1919.     * here but can't avoid at least for now since this is part of
  1920.     * sampler state and texture format is part of sampler_view state.
  1921.     * GL expects also expects clamping for uint/sint formats too so
  1922.     * do that as well (d3d10 can't end up here with uint/sint since it
  1923.     * only supports them with ld).
  1924.     */
  1925.    vec4_type.length = 4;
  1926.    lp_build_context_init(&vec4_bld, gallivm, vec4_type);
  1927.  
  1928.    /*
  1929.     * Vectorized clamping of border color. Loading is a bit of a hack since
  1930.     * we just cast the pointer to float array to pointer to vec4
  1931.     * (int or float).
  1932.     */
  1933.    border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
  1934.                                              lp_build_const_int32(gallivm, 0));
  1935.    border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
  1936.                                        LLVMPointerType(vec4_bld.vec_type, 0), "");
  1937.    border_color = LLVMBuildLoad(builder, border_color_ptr, "");
  1938.    /* we don't have aligned type in the dynamic state unfortunately */
  1939.    lp_set_load_alignment(border_color, 4);
  1940.  
  1941.    /*
  1942.     * Instead of having some incredibly complex logic which will try to figure out
  1943.     * clamping necessary for each channel, simply use the first channel, and treat
  1944.     * mixed signed/unsigned normalized formats specially.
  1945.     * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
  1946.     * good reason.)
  1947.     */
  1948.    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
  1949.       int chan;
  1950.       /* d/s needs special handling because both present means just sampling depth */
  1951.       if (util_format_is_depth_and_stencil(format_desc->format)) {
  1952.          chan = format_desc->swizzle[0];
  1953.       }
  1954.       else {
  1955.          chan = util_format_get_first_non_void_channel(format_desc->format);
  1956.       }
  1957.       if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
  1958.          unsigned chan_type = format_desc->channel[chan].type;
  1959.          unsigned chan_norm = format_desc->channel[chan].normalized;
  1960.          unsigned chan_pure = format_desc->channel[chan].pure_integer;
  1961.          if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
  1962.             if (chan_norm) {
  1963.                min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
  1964.                max_clamp = vec4_bld.one;
  1965.             }
  1966.             else if (chan_pure) {
  1967.                /*
  1968.                 * Border color was stored as int, hence need min/max clamp
  1969.                 * only if chan has less than 32 bits..
  1970.                 */
  1971.                unsigned chan_size = format_desc->channel[chan].size;
  1972.                if (chan_size < 32) {
  1973.                   min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
  1974.                                                      0 - (1 << (chan_size - 1)));
  1975.                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
  1976.                                                      (1 << (chan_size - 1)) - 1);
  1977.                }
  1978.             }
  1979.             /* TODO: no idea about non-pure, non-normalized! */
  1980.          }
  1981.          else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
  1982.             if (chan_norm) {
  1983.                min_clamp = vec4_bld.zero;
  1984.                max_clamp = vec4_bld.one;
  1985.             }
  1986.             /*
  1987.              * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
  1988.              * we use Z32_FLOAT_S8X24 to imply sampling depth component
  1989.              * and ignoring stencil, which will blow up here if we try to
  1990.              * do a uint clamp in a float texel build...
  1991.              * And even if we had that format, mesa st also thinks using z24s8
  1992.              * means depth sampling ignoring stencil.
  1993.              */
  1994.             else if (chan_pure) {
  1995.                /*
  1996.                 * Border color was stored as uint, hence never need min
  1997.                 * clamp, and only need max clamp if chan has less than 32 bits.
  1998.                 */
  1999.                unsigned chan_size = format_desc->channel[chan].size;
  2000.                if (chan_size < 32) {
  2001.                   max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
  2002.                                                      (1 << chan_size) - 1);
  2003.                }
  2004.                /* TODO: no idea about non-pure, non-normalized! */
  2005.             }
  2006.          }
  2007.          else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
  2008.             /* TODO: I have no idea what clamp this would need if any! */
  2009.          }
  2010.       }
  2011.       /* mixed plain formats (or different pure size) */
  2012.       switch (format_desc->format) {
  2013.       case PIPE_FORMAT_B10G10R10A2_UINT:
  2014.       case PIPE_FORMAT_R10G10B10A2_UINT:
  2015.       {
  2016.          unsigned max10 = (1 << 10) - 1;
  2017.          max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
  2018.                                         max10, (1 << 2) - 1, NULL);
  2019.       }
  2020.          break;
  2021.       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
  2022.          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
  2023.                                         -1.0F, 0.0F, NULL);
  2024.          max_clamp = vec4_bld.one;
  2025.          break;
  2026.       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
  2027.       case PIPE_FORMAT_R5SG5SB6U_NORM:
  2028.          min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
  2029.                                         0.0F, 0.0F, NULL);
  2030.          max_clamp = vec4_bld.one;
  2031.          break;
  2032.       default:
  2033.          break;
  2034.       }
  2035.    }
  2036.    else {
  2037.       /* cannot figure this out from format description */
  2038.       if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
  2039.          /* s3tc formats are always unorm */
  2040.          min_clamp = vec4_bld.zero;
  2041.          max_clamp = vec4_bld.one;
  2042.       }
  2043.       else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
  2044.                format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
  2045.          switch (format_desc->format) {
  2046.          case PIPE_FORMAT_RGTC1_UNORM:
  2047.          case PIPE_FORMAT_RGTC2_UNORM:
  2048.          case PIPE_FORMAT_LATC1_UNORM:
  2049.          case PIPE_FORMAT_LATC2_UNORM:
  2050.          case PIPE_FORMAT_ETC1_RGB8:
  2051.             min_clamp = vec4_bld.zero;
  2052.             max_clamp = vec4_bld.one;
  2053.             break;
  2054.          case PIPE_FORMAT_RGTC1_SNORM:
  2055.          case PIPE_FORMAT_RGTC2_SNORM:
  2056.          case PIPE_FORMAT_LATC1_SNORM:
  2057.          case PIPE_FORMAT_LATC2_SNORM:
  2058.             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
  2059.             max_clamp = vec4_bld.one;
  2060.             break;
  2061.          default:
  2062.             assert(0);
  2063.             break;
  2064.          }
  2065.       }
  2066.       /*
  2067.        * all others from subsampled/other group, though we don't care
  2068.        * about yuv (and should not have any from zs here)
  2069.        */
  2070.       else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
  2071.          switch (format_desc->format) {
  2072.          case PIPE_FORMAT_R8G8_B8G8_UNORM:
  2073.          case PIPE_FORMAT_G8R8_G8B8_UNORM:
  2074.          case PIPE_FORMAT_G8R8_B8R8_UNORM:
  2075.          case PIPE_FORMAT_R8G8_R8B8_UNORM:
  2076.          case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
  2077.             min_clamp = vec4_bld.zero;
  2078.             max_clamp = vec4_bld.one;
  2079.             break;
  2080.          case PIPE_FORMAT_R8G8Bx_SNORM:
  2081.             min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
  2082.             max_clamp = vec4_bld.one;
  2083.             break;
  2084.             /*
  2085.              * Note smallfloat formats usually don't need clamping
  2086.              * (they still have infinite range) however this is not
  2087.              * true for r11g11b10 and r9g9b9e5, which can't represent
  2088.              * negative numbers (and additionally r9g9b9e5 can't represent
  2089.              * very large numbers). d3d10 seems happy without clamping in
  2090.              * this case, but gl spec is pretty clear: "for floating
  2091.              * point and integer formats, border values are clamped to
  2092.              * the representable range of the format" so do that here.
  2093.              */
  2094.          case PIPE_FORMAT_R11G11B10_FLOAT:
  2095.             min_clamp = vec4_bld.zero;
  2096.             break;
  2097.          case PIPE_FORMAT_R9G9B9E5_FLOAT:
  2098.             min_clamp = vec4_bld.zero;
  2099.             max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
  2100.             break;
  2101.          default:
  2102.             assert(0);
  2103.             break;
  2104.          }
  2105.       }
  2106.    }
  2107.  
  2108.    if (min_clamp) {
  2109.       border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
  2110.    }
  2111.    if (max_clamp) {
  2112.       border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
  2113.    }
  2114.  
  2115.    bld->border_color_clamped = border_color;
  2116. }
  2117.  
  2118.  
  2119. /**
  2120.  * General texture sampling codegen.
  2121.  * This function handles texture sampling for all texture targets (1D,
  2122.  * 2D, 3D, cube) and all filtering modes.
  2123.  */
  2124. static void
  2125. lp_build_sample_general(struct lp_build_sample_context *bld,
  2126.                         unsigned sampler_unit,
  2127.                         boolean is_gather,
  2128.                         LLVMValueRef *coords,
  2129.                         const LLVMValueRef *offsets,
  2130.                         LLVMValueRef lod_positive,
  2131.                         LLVMValueRef lod_fpart,
  2132.                         LLVMValueRef ilevel0,
  2133.                         LLVMValueRef ilevel1,
  2134.                         LLVMValueRef *colors_out)
  2135. {
  2136.    LLVMBuilderRef builder = bld->gallivm->builder;
  2137.    const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
  2138.    const unsigned mip_filter = sampler_state->min_mip_filter;
  2139.    const unsigned min_filter = sampler_state->min_img_filter;
  2140.    const unsigned mag_filter = sampler_state->mag_img_filter;
  2141.    LLVMValueRef texels[4];
  2142.    unsigned chan;
  2143.  
  2144.    /* if we need border color, (potentially) clamp it now */
  2145.    if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
  2146.                                               min_filter,
  2147.                                               mag_filter) ||
  2148.        (bld->dims > 1 &&
  2149.            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
  2150.                                                   min_filter,
  2151.                                                   mag_filter)) ||
  2152.        (bld->dims > 2 &&
  2153.            lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
  2154.                                                   min_filter,
  2155.                                                   mag_filter))) {
  2156.       lp_build_clamp_border_color(bld, sampler_unit);
  2157.    }
  2158.  
  2159.  
  2160.    /*
  2161.     * Get/interpolate texture colors.
  2162.     */
  2163.  
  2164.    for (chan = 0; chan < 4; ++chan) {
  2165.      texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
  2166.      lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
  2167.    }
  2168.  
  2169.    if (min_filter == mag_filter) {
  2170.       /* no need to distinguish between minification and magnification */
  2171.       lp_build_sample_mipmap(bld, min_filter, mip_filter,
  2172.                              is_gather,
  2173.                              coords, offsets,
  2174.                              ilevel0, ilevel1, lod_fpart,
  2175.                              texels);
  2176.    }
  2177.    else {
  2178.       /*
  2179.        * Could also get rid of the if-logic and always use mipmap_both, both
  2180.        * for the single lod and multi-lod case if nothing really uses this.
  2181.        */
  2182.       if (bld->num_lods == 1) {
  2183.          /* Emit conditional to choose min image filter or mag image filter
  2184.           * depending on the lod being > 0 or <= 0, respectively.
  2185.           */
  2186.          struct lp_build_if_state if_ctx;
  2187.  
  2188.          lod_positive = LLVMBuildTrunc(builder, lod_positive,
  2189.                                        LLVMInt1TypeInContext(bld->gallivm->context), "");
  2190.  
  2191.          lp_build_if(&if_ctx, bld->gallivm, lod_positive);
  2192.          {
  2193.             /* Use the minification filter */
  2194.             lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE,
  2195.                                    coords, offsets,
  2196.                                    ilevel0, ilevel1, lod_fpart,
  2197.                                    texels);
  2198.          }
  2199.          lp_build_else(&if_ctx);
  2200.          {
  2201.             /* Use the magnification filter */
  2202.             lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
  2203.                                    FALSE,
  2204.                                    coords, offsets,
  2205.                                    ilevel0, NULL, NULL,
  2206.                                    texels);
  2207.          }
  2208.          lp_build_endif(&if_ctx);
  2209.       }
  2210.       else {
  2211.          LLVMValueRef need_linear, linear_mask;
  2212.          unsigned mip_filter_for_nearest;
  2213.          struct lp_build_if_state if_ctx;
  2214.  
  2215.          if (min_filter == PIPE_TEX_FILTER_LINEAR) {
  2216.             linear_mask = lod_positive;
  2217.             mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
  2218.          }
  2219.          else {
  2220.             linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
  2221.             mip_filter_for_nearest = mip_filter;
  2222.          }
  2223.          need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
  2224.                                                linear_mask);
  2225.  
  2226.          if (bld->num_lods != bld->coord_type.length) {
  2227.             linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
  2228.                                                                 bld->lodi_type,
  2229.                                                                 bld->int_coord_type,
  2230.                                                                 linear_mask);
  2231.          }
  2232.  
  2233.          lp_build_if(&if_ctx, bld->gallivm, need_linear);
  2234.          {
  2235.             /*
  2236.              * Do sampling with both filters simultaneously. This means using
  2237.              * a linear filter and doing some tricks (with weights) for the pixels
  2238.              * which need nearest filter.
  2239.              * Note that it's probably rare some pixels need nearest and some
  2240.              * linear filter but the fixups required for the nearest pixels
  2241.              * aren't all that complicated so just always run a combined path
  2242.              * if at least some pixels require linear.
  2243.              */
  2244.             lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
  2245.                                         coords, offsets,
  2246.                                         ilevel0, ilevel1,
  2247.                                         lod_fpart, lod_positive,
  2248.                                         texels);
  2249.          }
  2250.          lp_build_else(&if_ctx);
  2251.          {
  2252.             /*
  2253.              * All pixels require just nearest filtering, which is way
  2254.              * cheaper than linear, hence do a separate path for that.
  2255.              */
  2256.             lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, FALSE,
  2257.                                    mip_filter_for_nearest,
  2258.                                    coords, offsets,
  2259.                                    ilevel0, ilevel1, lod_fpart,
  2260.                                    texels);
  2261.          }
  2262.          lp_build_endif(&if_ctx);
  2263.       }
  2264.    }
  2265.  
  2266.    for (chan = 0; chan < 4; ++chan) {
  2267.      colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
  2268.      lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
  2269.    }
  2270. }
  2271.  
  2272.  
  2273. /**
  2274.  * Texel fetch function.
  2275.  * In contrast to general sampling there is no filtering, no coord minification,
  2276.  * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
  2277.  * directly to be applied to the selected mip level (after adding texel offsets).
  2278.  * This function handles texel fetch for all targets where texel fetch is supported
  2279.  * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
  2280.  */
  2281. static void
  2282. lp_build_fetch_texel(struct lp_build_sample_context *bld,
  2283.                      unsigned texture_unit,
  2284.                      const LLVMValueRef *coords,
  2285.                      LLVMValueRef explicit_lod,
  2286.                      const LLVMValueRef *offsets,
  2287.                      LLVMValueRef *colors_out)
  2288. {
  2289.    struct lp_build_context *perquadi_bld = &bld->lodi_bld;
  2290.    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
  2291.    unsigned dims = bld->dims, chan;
  2292.    unsigned target = bld->static_texture_state->target;
  2293.    boolean out_of_bound_ret_zero = TRUE;
  2294.    LLVMValueRef size, ilevel;
  2295.    LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
  2296.    LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
  2297.    LLVMValueRef width, height, depth, i, j;
  2298.    LLVMValueRef offset, out_of_bounds, out1;
  2299.  
  2300.    out_of_bounds = int_coord_bld->zero;
  2301.  
  2302.    if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
  2303.       if (bld->num_mips != int_coord_bld->type.length) {
  2304.          ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
  2305.                                             perquadi_bld->type, explicit_lod, 0);
  2306.       }
  2307.       else {
  2308.          ilevel = explicit_lod;
  2309.       }
  2310.       lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
  2311.                                  out_of_bound_ret_zero ? &out_of_bounds : NULL);
  2312.    }
  2313.    else {
  2314.       assert(bld->num_mips == 1);
  2315.       if (bld->static_texture_state->target != PIPE_BUFFER) {
  2316.          ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
  2317.                                                   bld->context_ptr, texture_unit);
  2318.       }
  2319.       else {
  2320.          ilevel = lp_build_const_int32(bld->gallivm, 0);
  2321.       }
  2322.    }
  2323.    lp_build_mipmap_level_sizes(bld, ilevel,
  2324.                                &size,
  2325.                                &row_stride_vec, &img_stride_vec);
  2326.    lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
  2327.                                 size, &width, &height, &depth);
  2328.  
  2329.    if (target == PIPE_TEXTURE_1D_ARRAY ||
  2330.        target == PIPE_TEXTURE_2D_ARRAY) {
  2331.       if (out_of_bound_ret_zero) {
  2332.          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
  2333.          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
  2334.       }
  2335.       else {
  2336.          z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
  2337.       }
  2338.    }
  2339.  
  2340.    /* This is a lot like border sampling */
  2341.    if (offsets[0]) {
  2342.       /*
  2343.        * coords are really unsigned, offsets are signed, but I don't think
  2344.        * exceeding 31 bits is possible
  2345.        */
  2346.       x = lp_build_add(int_coord_bld, x, offsets[0]);
  2347.    }
  2348.    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
  2349.    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
  2350.    out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
  2351.    out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
  2352.  
  2353.    if (dims >= 2) {
  2354.       if (offsets[1]) {
  2355.          y = lp_build_add(int_coord_bld, y, offsets[1]);
  2356.       }
  2357.       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
  2358.       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
  2359.       out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
  2360.       out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
  2361.  
  2362.       if (dims >= 3) {
  2363.          if (offsets[2]) {
  2364.             z = lp_build_add(int_coord_bld, z, offsets[2]);
  2365.          }
  2366.          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
  2367.          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
  2368.          out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
  2369.          out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
  2370.       }
  2371.    }
  2372.  
  2373.    lp_build_sample_offset(int_coord_bld,
  2374.                           bld->format_desc,
  2375.                           x, y, z, row_stride_vec, img_stride_vec,
  2376.                           &offset, &i, &j);
  2377.  
  2378.    if (bld->static_texture_state->target != PIPE_BUFFER) {
  2379.       offset = lp_build_add(int_coord_bld, offset,
  2380.                             lp_build_get_mip_offsets(bld, ilevel));
  2381.    }
  2382.  
  2383.    offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
  2384.  
  2385.    lp_build_fetch_rgba_soa(bld->gallivm,
  2386.                            bld->format_desc,
  2387.                            bld->texel_type,
  2388.                            bld->base_ptr, offset,
  2389.                            i, j,
  2390.                            colors_out);
  2391.  
  2392.    if (out_of_bound_ret_zero) {
  2393.       /*
  2394.        * Only needed for ARB_robust_buffer_access_behavior and d3d10.
  2395.        * Could use min/max above instead of out-of-bounds comparisons
  2396.        * if we don't care about the result returned for out-of-bounds.
  2397.        */
  2398.       for (chan = 0; chan < 4; chan++) {
  2399.          colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
  2400.                                             bld->texel_bld.zero, colors_out[chan]);
  2401.       }
  2402.    }
  2403. }
  2404.  
  2405.  
  2406. /**
  2407.  * Just set texels to white instead of actually sampling the texture.
  2408.  * For debugging.
  2409.  */
  2410. void
  2411. lp_build_sample_nop(struct gallivm_state *gallivm,
  2412.                     struct lp_type type,
  2413.                     const LLVMValueRef *coords,
  2414.                     LLVMValueRef texel_out[4])
  2415. {
  2416.    LLVMValueRef one = lp_build_one(gallivm, type);
  2417.    unsigned chan;
  2418.  
  2419.    for (chan = 0; chan < 4; chan++) {
  2420.       texel_out[chan] = one;
  2421.    }  
  2422. }
  2423.  
  2424.  
  2425. /**
  2426.  * Build the actual texture sampling code.
  2427.  * 'texel' will return a vector of four LLVMValueRefs corresponding to
  2428.  * R, G, B, A.
  2429.  * \param type  vector float type to use for coords, etc.
  2430.  * \param sample_key
  2431.  * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
  2432.  */
  2433. static void
  2434. lp_build_sample_soa_code(struct gallivm_state *gallivm,
  2435.                          const struct lp_static_texture_state *static_texture_state,
  2436.                          const struct lp_static_sampler_state *static_sampler_state,
  2437.                          struct lp_sampler_dynamic_state *dynamic_state,
  2438.                          struct lp_type type,
  2439.                          unsigned sample_key,
  2440.                          unsigned texture_index,
  2441.                          unsigned sampler_index,
  2442.                          LLVMValueRef context_ptr,
  2443.                          const LLVMValueRef *coords,
  2444.                          const LLVMValueRef *offsets,
  2445.                          const struct lp_derivatives *derivs, /* optional */
  2446.                          LLVMValueRef lod, /* optional */
  2447.                          LLVMValueRef texel_out[4])
  2448. {
  2449.    unsigned target = static_texture_state->target;
  2450.    unsigned dims = texture_dims(target);
  2451.    unsigned num_quads = type.length / 4;
  2452.    unsigned mip_filter, min_img_filter, mag_img_filter, i;
  2453.    struct lp_build_sample_context bld;
  2454.    struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
  2455.    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  2456.    LLVMBuilderRef builder = gallivm->builder;
  2457.    LLVMValueRef tex_width, newcoords[5];
  2458.    enum lp_sampler_lod_property lod_property;
  2459.    enum lp_sampler_lod_control lod_control;
  2460.    enum lp_sampler_op_type op_type;
  2461.    LLVMValueRef lod_bias = NULL;
  2462.    LLVMValueRef explicit_lod = NULL;
  2463.    boolean op_is_tex;
  2464.  
  2465.    if (0) {
  2466.       enum pipe_format fmt = static_texture_state->format;
  2467.       debug_printf("Sample from %s\n", util_format_name(fmt));
  2468.    }
  2469.  
  2470.    lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
  2471.                      LP_SAMPLER_LOD_PROPERTY_SHIFT;
  2472.    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
  2473.                     LP_SAMPLER_LOD_CONTROL_SHIFT;
  2474.    op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
  2475.                  LP_SAMPLER_OP_TYPE_SHIFT;
  2476.  
  2477.    op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
  2478.  
  2479.    if (lod_control == LP_SAMPLER_LOD_BIAS) {
  2480.       lod_bias = lod;
  2481.       assert(lod);
  2482.       assert(derivs == NULL);
  2483.    }
  2484.    else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
  2485.       explicit_lod = lod;
  2486.       assert(lod);
  2487.       assert(derivs == NULL);
  2488.    }
  2489.    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
  2490.       assert(derivs);
  2491.       assert(lod == NULL);
  2492.    }
  2493.    else {
  2494.       assert(derivs == NULL);
  2495.       assert(lod == NULL);
  2496.    }
  2497.  
  2498.    if (static_texture_state->format == PIPE_FORMAT_NONE) {
  2499.       /*
  2500.        * If there's nothing bound, format is NONE, and we must return
  2501.        * all zero as mandated by d3d10 in this case.
  2502.        */
  2503.       unsigned chan;
  2504.       LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
  2505.       for (chan = 0; chan < 4; chan++) {
  2506.          texel_out[chan] = zero;
  2507.       }
  2508.       return;
  2509.    }
  2510.  
  2511.    assert(type.floating);
  2512.  
  2513.    /* Setup our build context */
  2514.    memset(&bld, 0, sizeof bld);
  2515.    bld.gallivm = gallivm;
  2516.    bld.context_ptr = context_ptr;
  2517.    bld.static_sampler_state = &derived_sampler_state;
  2518.    bld.static_texture_state = static_texture_state;
  2519.    bld.dynamic_state = dynamic_state;
  2520.    bld.format_desc = util_format_description(static_texture_state->format);
  2521.    bld.dims = dims;
  2522.  
  2523.    bld.vector_width = lp_type_width(type);
  2524.  
  2525.    bld.float_type = lp_type_float(32);
  2526.    bld.int_type = lp_type_int(32);
  2527.    bld.coord_type = type;
  2528.    bld.int_coord_type = lp_int_type(type);
  2529.    bld.float_size_in_type = lp_type_float(32);
  2530.    bld.float_size_in_type.length = dims > 1 ? 4 : 1;
  2531.    bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
  2532.    bld.texel_type = type;
  2533.  
  2534.    /* always using the first channel hopefully should be safe,
  2535.     * if not things WILL break in other places anyway.
  2536.     */
  2537.    if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
  2538.        bld.format_desc->channel[0].pure_integer) {
  2539.       if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
  2540.          bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
  2541.       }
  2542.       else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
  2543.          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
  2544.       }
  2545.    }
  2546.    else if (util_format_has_stencil(bld.format_desc) &&
  2547.        !util_format_has_depth(bld.format_desc)) {
  2548.       /* for stencil only formats, sample stencil (uint) */
  2549.       bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
  2550.    }
  2551.  
  2552.    if (!static_texture_state->level_zero_only) {
  2553.       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
  2554.    } else {
  2555.       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
  2556.    }
  2557.    if (op_type == LP_SAMPLER_OP_GATHER) {
  2558.       /*
  2559.        * gather4 is exactly like GL_LINEAR filtering but in the end skipping
  2560.        * the actual filtering. Using mostly the same paths, so cube face
  2561.        * selection, coord wrapping etc. all naturally uses the same code.
  2562.        */
  2563.       derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
  2564.       derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
  2565.       derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
  2566.    }
  2567.    mip_filter = derived_sampler_state.min_mip_filter;
  2568.  
  2569.    if (0) {
  2570.       debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
  2571.    }
  2572.  
  2573.    if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
  2574.        static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
  2575.    {
  2576.       /*
  2577.        * Seamless filtering ignores wrap modes.
  2578.        * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
  2579.        * bilinear it's not correct but way better than using for instance repeat.
  2580.        * Note we even set this for non-seamless. Technically GL allows any wrap
  2581.        * mode, which made sense when supporting true borders (can get seamless
  2582.        * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
  2583.        * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
  2584.        * up the sampler state (as it makes it texture dependent).
  2585.        */
  2586.       derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
  2587.       derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
  2588.    }
  2589.  
  2590.    min_img_filter = derived_sampler_state.min_img_filter;
  2591.    mag_img_filter = derived_sampler_state.mag_img_filter;
  2592.  
  2593.  
  2594.    /*
  2595.     * This is all a bit complicated different paths are chosen for performance
  2596.     * reasons.
  2597.     * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
  2598.     * everything (the last two options are equivalent for 4-wide case).
  2599.     * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
  2600.     * lod is calculated then the lod value extracted afterwards so making this
  2601.     * case basically the same as far as lod handling is concerned for the
  2602.     * further sample/filter code as the 1 lod for everything case.
  2603.     * Different lod handling mostly shows up when building mipmap sizes
  2604.     * (lp_build_mipmap_level_sizes() and friends) and also in filtering
  2605.     * (getting the fractional part of the lod to the right texels).
  2606.     */
  2607.  
  2608.    /*
  2609.     * There are other situations where at least the multiple int lods could be
  2610.     * avoided like min and max lod being equal.
  2611.     */
  2612.    bld.num_mips = bld.num_lods = 1;
  2613.  
  2614.    if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
  2615.        (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
  2616.        (static_texture_state->target == PIPE_TEXTURE_CUBE ||
  2617.         static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
  2618.        (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
  2619.       /*
  2620.        * special case for using per-pixel lod even for implicit lod,
  2621.        * which is generally never required (ok by APIs) except to please
  2622.        * some (somewhat broken imho) tests (because per-pixel face selection
  2623.        * can cause derivatives to be different for pixels outside the primitive
  2624.        * due to the major axis division even if pre-project derivatives are
  2625.        * looking normal).
  2626.        */
  2627.       bld.num_mips = type.length;
  2628.       bld.num_lods = type.length;
  2629.    }
  2630.    else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
  2631.        (explicit_lod || lod_bias || derivs)) {
  2632.       if ((!op_is_tex && target != PIPE_BUFFER) ||
  2633.           (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
  2634.          bld.num_mips = type.length;
  2635.          bld.num_lods = type.length;
  2636.       }
  2637.       else if (op_is_tex && min_img_filter != mag_img_filter) {
  2638.          bld.num_mips = 1;
  2639.          bld.num_lods = type.length;
  2640.       }
  2641.    }
  2642.    /* TODO: for true scalar_lod should only use 1 lod value */
  2643.    else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
  2644.             (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
  2645.       bld.num_mips = num_quads;
  2646.       bld.num_lods = num_quads;
  2647.    }
  2648.    else if (op_is_tex && min_img_filter != mag_img_filter) {
  2649.       bld.num_mips = 1;
  2650.       bld.num_lods = num_quads;
  2651.    }
  2652.  
  2653.  
  2654.    bld.lodf_type = type;
  2655.    /* we want native vector size to be able to use our intrinsics */
  2656.    if (bld.num_lods != type.length) {
  2657.       /* TODO: this currently always has to be per-quad or per-element */
  2658.       bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
  2659.    }
  2660.    bld.lodi_type = lp_int_type(bld.lodf_type);
  2661.    bld.levelf_type = bld.lodf_type;
  2662.    if (bld.num_mips == 1) {
  2663.       bld.levelf_type.length = 1;
  2664.    }
  2665.    bld.leveli_type = lp_int_type(bld.levelf_type);
  2666.    bld.float_size_type = bld.float_size_in_type;
  2667.    /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
  2668.     * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
  2669.    if (bld.num_mips > 1) {
  2670.       bld.float_size_type.length = bld.num_mips == type.length ?
  2671.                                       bld.num_mips * bld.float_size_in_type.length :
  2672.                                       type.length;
  2673.    }
  2674.    bld.int_size_type = lp_int_type(bld.float_size_type);
  2675.  
  2676.    lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
  2677.    lp_build_context_init(&bld.float_vec_bld, gallivm, type);
  2678.    lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
  2679.    lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
  2680.    lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
  2681.    lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
  2682.    lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
  2683.    lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
  2684.    lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
  2685.    lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
  2686.    lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
  2687.    lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
  2688.    lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
  2689.    lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
  2690.  
  2691.    /* Get the dynamic state */
  2692.    tex_width = dynamic_state->width(dynamic_state, gallivm,
  2693.                                     context_ptr, texture_index);
  2694.    bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
  2695.                                                     context_ptr, texture_index);
  2696.    bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
  2697.                                                     context_ptr, texture_index);
  2698.    bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
  2699.                                           context_ptr, texture_index);
  2700.    bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
  2701.                                                 context_ptr, texture_index);
  2702.    /* Note that mip_offsets is an array[level] of offsets to texture images */
  2703.  
  2704.    /* width, height, depth as single int vector */
  2705.    if (dims <= 1) {
  2706.       bld.int_size = tex_width;
  2707.    }
  2708.    else {
  2709.       bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
  2710.                                             tex_width,
  2711.                                             LLVMConstInt(i32t, 0, 0), "");
  2712.       if (dims >= 2) {
  2713.          LLVMValueRef tex_height =
  2714.             dynamic_state->height(dynamic_state, gallivm,
  2715.                                   context_ptr, texture_index);
  2716.          bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
  2717.                                                tex_height,
  2718.                                                LLVMConstInt(i32t, 1, 0), "");
  2719.          if (dims >= 3) {
  2720.             LLVMValueRef tex_depth =
  2721.                dynamic_state->depth(dynamic_state, gallivm, context_ptr,
  2722.                                     texture_index);
  2723.             bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
  2724.                                                   tex_depth,
  2725.                                                   LLVMConstInt(i32t, 2, 0), "");
  2726.          }
  2727.       }
  2728.    }
  2729.  
  2730.    for (i = 0; i < 5; i++) {
  2731.       newcoords[i] = coords[i];
  2732.    }
  2733.  
  2734.    if (0) {
  2735.       /* For debug: no-op texture sampling */
  2736.       lp_build_sample_nop(gallivm,
  2737.                           bld.texel_type,
  2738.                           newcoords,
  2739.                           texel_out);
  2740.    }
  2741.  
  2742.    else if (op_type == LP_SAMPLER_OP_FETCH) {
  2743.       lp_build_fetch_texel(&bld, texture_index, newcoords,
  2744.                            lod, offsets,
  2745.                            texel_out);
  2746.    }
  2747.  
  2748.    else {
  2749.       LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
  2750.       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
  2751.       boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
  2752.                         op_is_tex &&
  2753.                         /* not sure this is strictly needed or simply impossible */
  2754.                         derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
  2755.                         lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
  2756.  
  2757.       use_aos &= bld.num_lods <= num_quads ||
  2758.                  derived_sampler_state.min_img_filter ==
  2759.                     derived_sampler_state.mag_img_filter;
  2760.       if (dims > 1) {
  2761.          use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
  2762.          if (dims > 2) {
  2763.             use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
  2764.          }
  2765.       }
  2766.       if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
  2767.            static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
  2768.           derived_sampler_state.seamless_cube_map &&
  2769.           (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
  2770.            derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
  2771.          /* theoretically possible with AoS filtering but not implemented (complex!) */
  2772.          use_aos = 0;
  2773.       }
  2774.  
  2775.       if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
  2776.           !use_aos && util_format_fits_8unorm(bld.format_desc)) {
  2777.          debug_printf("%s: using floating point linear filtering for %s\n",
  2778.                       __FUNCTION__, bld.format_desc->short_name);
  2779.          debug_printf("  min_img %d  mag_img %d  mip %d  target %d  seamless %d"
  2780.                       "  wraps %d  wrapt %d  wrapr %d\n",
  2781.                       derived_sampler_state.min_img_filter,
  2782.                       derived_sampler_state.mag_img_filter,
  2783.                       derived_sampler_state.min_mip_filter,
  2784.                       static_texture_state->target,
  2785.                       derived_sampler_state.seamless_cube_map,
  2786.                       derived_sampler_state.wrap_s,
  2787.                       derived_sampler_state.wrap_t,
  2788.                       derived_sampler_state.wrap_r);
  2789.       }
  2790.  
  2791.       lp_build_sample_common(&bld, texture_index, sampler_index,
  2792.                              newcoords,
  2793.                              derivs, lod_bias, explicit_lod,
  2794.                              &lod_positive, &lod_fpart,
  2795.                              &ilevel0, &ilevel1);
  2796.  
  2797.       if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
  2798.          /* The aos path doesn't do seamless filtering so simply add cube layer
  2799.           * to face now.
  2800.           */
  2801.          newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
  2802.       }
  2803.  
  2804.       /*
  2805.        * we only try 8-wide sampling with soa as it appears to
  2806.        * be a loss with aos with AVX (but it should work, except
  2807.        * for conformance if min_filter != mag_filter if num_lods > 1).
  2808.        * (It should be faster if we'd support avx2)
  2809.        */
  2810.       if (num_quads == 1 || !use_aos) {
  2811.          if (use_aos) {
  2812.             /* do sampling/filtering with fixed pt arithmetic */
  2813.             lp_build_sample_aos(&bld, sampler_index,
  2814.                                 newcoords[0], newcoords[1],
  2815.                                 newcoords[2],
  2816.                                 offsets, lod_positive, lod_fpart,
  2817.                                 ilevel0, ilevel1,
  2818.                                 texel_out);
  2819.          }
  2820.  
  2821.          else {
  2822.             lp_build_sample_general(&bld, sampler_index,
  2823.                                     op_type == LP_SAMPLER_OP_GATHER,
  2824.                                     newcoords, offsets,
  2825.                                     lod_positive, lod_fpart,
  2826.                                     ilevel0, ilevel1,
  2827.                                     texel_out);
  2828.          }
  2829.       }
  2830.       else {
  2831.          unsigned j;
  2832.          struct lp_build_sample_context bld4;
  2833.          struct lp_type type4 = type;
  2834.          unsigned i;
  2835.          LLVMValueRef texelout4[4];
  2836.          LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
  2837.  
  2838.          type4.length = 4;
  2839.  
  2840.          /* Setup our build context */
  2841.          memset(&bld4, 0, sizeof bld4);
  2842.          bld4.gallivm = bld.gallivm;
  2843.          bld4.context_ptr = bld.context_ptr;
  2844.          bld4.static_texture_state = bld.static_texture_state;
  2845.          bld4.static_sampler_state = bld.static_sampler_state;
  2846.          bld4.dynamic_state = bld.dynamic_state;
  2847.          bld4.format_desc = bld.format_desc;
  2848.          bld4.dims = bld.dims;
  2849.          bld4.row_stride_array = bld.row_stride_array;
  2850.          bld4.img_stride_array = bld.img_stride_array;
  2851.          bld4.base_ptr = bld.base_ptr;
  2852.          bld4.mip_offsets = bld.mip_offsets;
  2853.          bld4.int_size = bld.int_size;
  2854.  
  2855.          bld4.vector_width = lp_type_width(type4);
  2856.  
  2857.          bld4.float_type = lp_type_float(32);
  2858.          bld4.int_type = lp_type_int(32);
  2859.          bld4.coord_type = type4;
  2860.          bld4.int_coord_type = lp_int_type(type4);
  2861.          bld4.float_size_in_type = lp_type_float(32);
  2862.          bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
  2863.          bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
  2864.          bld4.texel_type = bld.texel_type;
  2865.          bld4.texel_type.length = 4;
  2866.  
  2867.          bld4.num_mips = bld4.num_lods = 1;
  2868.          if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
  2869.              (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
  2870.              (static_texture_state->target == PIPE_TEXTURE_CUBE ||
  2871.               static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
  2872.              (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
  2873.             bld4.num_mips = type4.length;
  2874.             bld4.num_lods = type4.length;
  2875.          }
  2876.          if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
  2877.              (explicit_lod || lod_bias || derivs)) {
  2878.             if ((!op_is_tex && target != PIPE_BUFFER) ||
  2879.                 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
  2880.                bld4.num_mips = type4.length;
  2881.                bld4.num_lods = type4.length;
  2882.             }
  2883.             else if (op_is_tex && min_img_filter != mag_img_filter) {
  2884.                bld4.num_mips = 1;
  2885.                bld4.num_lods = type4.length;
  2886.             }
  2887.          }
  2888.  
  2889.          /* we want native vector size to be able to use our intrinsics */
  2890.          bld4.lodf_type = type4;
  2891.          if (bld4.num_lods != type4.length) {
  2892.             bld4.lodf_type.length = 1;
  2893.          }
  2894.          bld4.lodi_type = lp_int_type(bld4.lodf_type);
  2895.          bld4.levelf_type = type4;
  2896.          if (bld4.num_mips != type4.length) {
  2897.             bld4.levelf_type.length = 1;
  2898.          }
  2899.          bld4.leveli_type = lp_int_type(bld4.levelf_type);
  2900.          bld4.float_size_type = bld4.float_size_in_type;
  2901.          if (bld4.num_mips > 1) {
  2902.             bld4.float_size_type.length = bld4.num_mips == type4.length ?
  2903.                                             bld4.num_mips * bld4.float_size_in_type.length :
  2904.                                             type4.length;
  2905.          }
  2906.          bld4.int_size_type = lp_int_type(bld4.float_size_type);
  2907.  
  2908.          lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
  2909.          lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
  2910.          lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
  2911.          lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
  2912.          lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
  2913.          lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
  2914.          lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
  2915.          lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
  2916.          lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
  2917.          lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
  2918.          lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
  2919.          lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
  2920.          lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
  2921.          lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
  2922.  
  2923.          for (i = 0; i < num_quads; i++) {
  2924.             LLVMValueRef s4, t4, r4;
  2925.             LLVMValueRef lod_positive4, lod_fpart4 = NULL;
  2926.             LLVMValueRef ilevel04, ilevel14 = NULL;
  2927.             LLVMValueRef offsets4[4] = { NULL };
  2928.             unsigned num_lods = bld4.num_lods;
  2929.  
  2930.             s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
  2931.             t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
  2932.             r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
  2933.  
  2934.             if (offsets[0]) {
  2935.                offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
  2936.                if (dims > 1) {
  2937.                   offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
  2938.                   if (dims > 2) {
  2939.                      offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
  2940.                   }
  2941.                }
  2942.             }
  2943.             lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
  2944.             ilevel04 = bld.num_mips == 1 ? ilevel0 :
  2945.                           lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
  2946.             if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
  2947.                ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
  2948.                lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
  2949.             }
  2950.  
  2951.             if (use_aos) {
  2952.                /* do sampling/filtering with fixed pt arithmetic */
  2953.                lp_build_sample_aos(&bld4, sampler_index,
  2954.                                    s4, t4, r4, offsets4,
  2955.                                    lod_positive4, lod_fpart4,
  2956.                                    ilevel04, ilevel14,
  2957.                                    texelout4);
  2958.             }
  2959.  
  2960.             else {
  2961.                /* this path is currently unreachable and hence might break easily... */
  2962.                LLVMValueRef newcoords4[5];
  2963.                newcoords4[0] = s4;
  2964.                newcoords4[1] = t4;
  2965.                newcoords4[2] = r4;
  2966.                newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
  2967.                newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
  2968.  
  2969.                lp_build_sample_general(&bld4, sampler_index,
  2970.                                        op_type == LP_SAMPLER_OP_GATHER,
  2971.                                        newcoords4, offsets4,
  2972.                                        lod_positive4, lod_fpart4,
  2973.                                        ilevel04, ilevel14,
  2974.                                        texelout4);
  2975.             }
  2976.             for (j = 0; j < 4; j++) {
  2977.                texelouttmp[j][i] = texelout4[j];
  2978.             }
  2979.          }
  2980.  
  2981.          for (j = 0; j < 4; j++) {
  2982.             texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
  2983.          }
  2984.       }
  2985.    }
  2986.  
  2987.    if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
  2988.       apply_sampler_swizzle(&bld, texel_out);
  2989.    }
  2990.  
  2991.    /*
  2992.     * texel type can be a (32bit) int/uint (for pure int formats only),
  2993.     * however we are expected to always return floats (storage is untyped).
  2994.     */
  2995.    if (!bld.texel_type.floating) {
  2996.       unsigned chan;
  2997.       for (chan = 0; chan < 4; chan++) {
  2998.          texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
  2999.                                             lp_build_vec_type(gallivm, type), "");
  3000.       }
  3001.    }
  3002. }
  3003.  
  3004.  
  3005. #define USE_TEX_FUNC_CALL 1
  3006.  
  3007. #define LP_MAX_TEX_FUNC_ARGS 32
  3008.  
  3009. static inline void
  3010. get_target_info(enum pipe_texture_target target,
  3011.                 unsigned *num_coords, unsigned *num_derivs,
  3012.                 unsigned *num_offsets, unsigned *layer)
  3013. {
  3014.    unsigned dims = texture_dims(target);
  3015.    *num_coords = dims;
  3016.    *num_offsets = dims;
  3017.    *num_derivs = (target == PIPE_TEXTURE_CUBE ||
  3018.                   target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
  3019.    *layer = has_layer_coord(target) ? 2: 0;
  3020.    if (target == PIPE_TEXTURE_CUBE_ARRAY) {
  3021.       /*
  3022.        * dims doesn't include r coord for cubes - this is handled
  3023.        * by layer instead, but need to fix up for cube arrays...
  3024.        */
  3025.       *layer = 3;
  3026.       *num_coords = 3;
  3027.    }
  3028. }
  3029.  
  3030.  
  3031. /**
  3032.  * Generate the function body for a texture sampling function.
  3033.  */
  3034. static void
  3035. lp_build_sample_gen_func(struct gallivm_state *gallivm,
  3036.                          const struct lp_static_texture_state *static_texture_state,
  3037.                          const struct lp_static_sampler_state *static_sampler_state,
  3038.                          struct lp_sampler_dynamic_state *dynamic_state,
  3039.                          struct lp_type type,
  3040.                          unsigned texture_index,
  3041.                          unsigned sampler_index,
  3042.                          LLVMValueRef function,
  3043.                          unsigned num_args,
  3044.                          unsigned sample_key)
  3045. {
  3046.    LLVMBuilderRef old_builder;
  3047.    LLVMBasicBlockRef block;
  3048.    LLVMValueRef coords[5];
  3049.    LLVMValueRef offsets[3] = { NULL };
  3050.    LLVMValueRef lod = NULL;
  3051.    LLVMValueRef context_ptr;
  3052.    LLVMValueRef texel_out[4];
  3053.    struct lp_derivatives derivs;
  3054.    struct lp_derivatives *deriv_ptr = NULL;
  3055.    unsigned num_param = 0;
  3056.    unsigned i, num_coords, num_derivs, num_offsets, layer;
  3057.    enum lp_sampler_lod_control lod_control;
  3058.  
  3059.    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
  3060.                     LP_SAMPLER_LOD_CONTROL_SHIFT;
  3061.  
  3062.    get_target_info(static_texture_state->target,
  3063.                    &num_coords, &num_derivs, &num_offsets, &layer);
  3064.  
  3065.    /* "unpack" arguments */
  3066.    context_ptr = LLVMGetParam(function, num_param++);
  3067.    for (i = 0; i < num_coords; i++) {
  3068.       coords[i] = LLVMGetParam(function, num_param++);
  3069.    }
  3070.    for (i = num_coords; i < 5; i++) {
  3071.       /* This is rather unfortunate... */
  3072.       coords[i] = lp_build_undef(gallivm, type);
  3073.    }
  3074.    if (layer) {
  3075.       coords[layer] = LLVMGetParam(function, num_param++);
  3076.    }
  3077.    if (sample_key & LP_SAMPLER_SHADOW) {
  3078.       coords[4] = LLVMGetParam(function, num_param++);
  3079.    }
  3080.    if (sample_key & LP_SAMPLER_OFFSETS) {
  3081.       for (i = 0; i < num_offsets; i++) {
  3082.          offsets[i] = LLVMGetParam(function, num_param++);
  3083.       }
  3084.    }
  3085.    if (lod_control == LP_SAMPLER_LOD_BIAS ||
  3086.        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
  3087.       lod = LLVMGetParam(function, num_param++);
  3088.    }
  3089.    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
  3090.       for (i = 0; i < num_derivs; i++) {
  3091.          derivs.ddx[i] = LLVMGetParam(function, num_param++);
  3092.          derivs.ddy[i] = LLVMGetParam(function, num_param++);
  3093.       }
  3094.       deriv_ptr = &derivs;
  3095.    }
  3096.  
  3097.    assert(num_args == num_param);
  3098.  
  3099.    /*
  3100.     * Function body
  3101.     */
  3102.  
  3103.    old_builder = gallivm->builder;
  3104.    block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
  3105.    gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
  3106.    LLVMPositionBuilderAtEnd(gallivm->builder, block);
  3107.  
  3108.    lp_build_sample_soa_code(gallivm,
  3109.                             static_texture_state,
  3110.                             static_sampler_state,
  3111.                             dynamic_state,
  3112.                             type,
  3113.                             sample_key,
  3114.                             texture_index,
  3115.                             sampler_index,
  3116.                             context_ptr,
  3117.                             coords,
  3118.                             offsets,
  3119.                             deriv_ptr,
  3120.                             lod,
  3121.                             texel_out);
  3122.  
  3123.    LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
  3124.  
  3125.    LLVMDisposeBuilder(gallivm->builder);
  3126.    gallivm->builder = old_builder;
  3127.  
  3128.    gallivm_verify_function(gallivm, function);
  3129. }
  3130.  
  3131.  
  3132. /**
  3133.  * Call the matching function for texture sampling.
  3134.  * If there's no match, generate a new one.
  3135.  */
  3136. static void
  3137. lp_build_sample_soa_func(struct gallivm_state *gallivm,
  3138.                          const struct lp_static_texture_state *static_texture_state,
  3139.                          const struct lp_static_sampler_state *static_sampler_state,
  3140.                          struct lp_sampler_dynamic_state *dynamic_state,
  3141.                          const struct lp_sampler_params *params)
  3142. {
  3143.    LLVMBuilderRef builder = gallivm->builder;
  3144.    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
  3145.                              LLVMGetInsertBlock(builder)));
  3146.    LLVMValueRef function, inst;
  3147.    LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
  3148.    LLVMBasicBlockRef bb;
  3149.    LLVMValueRef tex_ret;
  3150.    unsigned num_args = 0;
  3151.    char func_name[64];
  3152.    unsigned i, num_coords, num_derivs, num_offsets, layer;
  3153.    unsigned texture_index = params->texture_index;
  3154.    unsigned sampler_index = params->sampler_index;
  3155.    unsigned sample_key = params->sample_key;
  3156.    const LLVMValueRef *coords = params->coords;
  3157.    const LLVMValueRef *offsets = params->offsets;
  3158.    const struct lp_derivatives *derivs = params->derivs;
  3159.    enum lp_sampler_lod_control lod_control;
  3160.  
  3161.    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
  3162.                     LP_SAMPLER_LOD_CONTROL_SHIFT;
  3163.  
  3164.    get_target_info(static_texture_state->target,
  3165.                    &num_coords, &num_derivs, &num_offsets, &layer);
  3166.  
  3167.    /*
  3168.     * texture function matches are found by name.
  3169.     * Thus the name has to include both the texture and sampler unit
  3170.     * (which covers all static state) plus the actual texture function
  3171.     * (including things like offsets, shadow coord, lod control).
  3172.     * Additionally lod_property has to be included too.
  3173.     */
  3174.  
  3175.    util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
  3176.                  texture_index, sampler_index, sample_key);
  3177.  
  3178.    function = LLVMGetNamedFunction(module, func_name);
  3179.  
  3180.    if(!function) {
  3181.       LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
  3182.       LLVMTypeRef ret_type;
  3183.       LLVMTypeRef function_type;
  3184.       LLVMTypeRef val_type[4];
  3185.       unsigned num_param = 0;
  3186.  
  3187.       /*
  3188.        * Generate the function prototype.
  3189.        */
  3190.  
  3191.       arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
  3192.       for (i = 0; i < num_coords; i++) {
  3193.          arg_types[num_param++] = LLVMTypeOf(coords[0]);
  3194.          assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
  3195.       }
  3196.       if (layer) {
  3197.          arg_types[num_param++] = LLVMTypeOf(coords[layer]);
  3198.          assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
  3199.       }
  3200.       if (sample_key & LP_SAMPLER_SHADOW) {
  3201.          arg_types[num_param++] = LLVMTypeOf(coords[0]);
  3202.       }
  3203.       if (sample_key & LP_SAMPLER_OFFSETS) {
  3204.          for (i = 0; i < num_offsets; i++) {
  3205.             arg_types[num_param++] = LLVMTypeOf(offsets[0]);
  3206.             assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
  3207.          }
  3208.       }
  3209.       if (lod_control == LP_SAMPLER_LOD_BIAS ||
  3210.           lod_control == LP_SAMPLER_LOD_EXPLICIT) {
  3211.          arg_types[num_param++] = LLVMTypeOf(params->lod);
  3212.       }
  3213.       else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
  3214.          for (i = 0; i < num_derivs; i++) {
  3215.             arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
  3216.             arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
  3217.             assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
  3218.             assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
  3219.          }
  3220.       }
  3221.  
  3222.       val_type[0] = val_type[1] = val_type[2] = val_type[3] =
  3223.          lp_build_vec_type(gallivm, params->type);
  3224.       ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
  3225.       function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
  3226.       function = LLVMAddFunction(module, func_name, function_type);
  3227.  
  3228.       for (i = 0; i < num_param; ++i) {
  3229.          if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
  3230.             LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
  3231.          }
  3232.       }
  3233.  
  3234.       LLVMSetFunctionCallConv(function, LLVMFastCallConv);
  3235.       LLVMSetLinkage(function, LLVMPrivateLinkage);
  3236.  
  3237.       lp_build_sample_gen_func(gallivm,
  3238.                                static_texture_state,
  3239.                                static_sampler_state,
  3240.                                dynamic_state,
  3241.                                params->type,
  3242.                                texture_index,
  3243.                                sampler_index,
  3244.                                function,
  3245.                                num_param,
  3246.                                sample_key);
  3247.    }
  3248.  
  3249.    num_args = 0;
  3250.    args[num_args++] = params->context_ptr;
  3251.    for (i = 0; i < num_coords; i++) {
  3252.       args[num_args++] = coords[i];
  3253.    }
  3254.    if (layer) {
  3255.       args[num_args++] = coords[layer];
  3256.    }
  3257.    if (sample_key & LP_SAMPLER_SHADOW) {
  3258.       args[num_args++] = coords[4];
  3259.    }
  3260.    if (sample_key & LP_SAMPLER_OFFSETS) {
  3261.       for (i = 0; i < num_offsets; i++) {
  3262.          args[num_args++] = offsets[i];
  3263.       }
  3264.    }
  3265.    if (lod_control == LP_SAMPLER_LOD_BIAS ||
  3266.        lod_control == LP_SAMPLER_LOD_EXPLICIT) {
  3267.       args[num_args++] = params->lod;
  3268.    }
  3269.    else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
  3270.       for (i = 0; i < num_derivs; i++) {
  3271.          args[num_args++] = derivs->ddx[i];
  3272.          args[num_args++] = derivs->ddy[i];
  3273.       }
  3274.    }
  3275.  
  3276.    assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
  3277.  
  3278.    tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
  3279.    bb = LLVMGetInsertBlock(builder);
  3280.    inst = LLVMGetLastInstruction(bb);
  3281.    LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
  3282.  
  3283.    for (i = 0; i < 4; i++) {
  3284.       params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
  3285.    }
  3286. }
  3287.  
  3288.  
  3289. /**
  3290.  * Build texture sampling code.
  3291.  * Either via a function call or inline it directly.
  3292.  */
  3293. void
  3294. lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
  3295.                     const struct lp_static_sampler_state *static_sampler_state,
  3296.                     struct lp_sampler_dynamic_state *dynamic_state,
  3297.                     struct gallivm_state *gallivm,
  3298.                     const struct lp_sampler_params *params)
  3299. {
  3300.    boolean use_tex_func = FALSE;
  3301.  
  3302.    /*
  3303.     * Do not use a function call if the sampling is "simple enough".
  3304.     * We define this by
  3305.     * a) format
  3306.     * b) no mips (either one level only or no mip filter)
  3307.     * No mips will definitely make the code smaller, though
  3308.     * the format requirement is a bit iffy - there's some (SoA) formats
  3309.     * which definitely generate less code. This does happen to catch
  3310.     * some important cases though which are hurt quite a bit by using
  3311.     * a call (though not really because of the call overhead but because
  3312.     * they are reusing the same texture unit with some of the same
  3313.     * parameters).
  3314.     * Ideally we'd let llvm recognize this stuff by doing IPO passes.
  3315.     */
  3316.  
  3317.    if (USE_TEX_FUNC_CALL) {
  3318.       const struct util_format_description *format_desc;
  3319.       boolean simple_format;
  3320.       boolean simple_tex;
  3321.       enum lp_sampler_op_type op_type;
  3322.       format_desc = util_format_description(static_texture_state->format);
  3323.       simple_format = !format_desc ||
  3324.                          (util_format_is_rgba8_variant(format_desc) &&
  3325.                           format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
  3326.  
  3327.       op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
  3328.                     LP_SAMPLER_OP_TYPE_SHIFT;
  3329.       simple_tex =
  3330.          op_type != LP_SAMPLER_OP_TEXTURE ||
  3331.            ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
  3332.              static_texture_state->level_zero_only == TRUE) &&
  3333.             static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
  3334.  
  3335.       use_tex_func = format_desc && !(simple_format && simple_tex);
  3336.    }
  3337.  
  3338.    if (use_tex_func) {
  3339.       lp_build_sample_soa_func(gallivm,
  3340.                                static_texture_state,
  3341.                                static_sampler_state,
  3342.                                dynamic_state,
  3343.                                params);
  3344.    }
  3345.    else {
  3346.       lp_build_sample_soa_code(gallivm,
  3347.                                static_texture_state,
  3348.                                static_sampler_state,
  3349.                                dynamic_state,
  3350.                                params->type,
  3351.                                params->sample_key,
  3352.                                params->texture_index,
  3353.                                params->sampler_index,
  3354.                                params->context_ptr,
  3355.                                params->coords,
  3356.                                params->offsets,
  3357.                                params->derivs,
  3358.                                params->lod,
  3359.                                params->texel);
  3360.    }
  3361. }
  3362.  
  3363.  
  3364. void
  3365. lp_build_size_query_soa(struct gallivm_state *gallivm,
  3366.                         const struct lp_static_texture_state *static_state,
  3367.                         struct lp_sampler_dynamic_state *dynamic_state,
  3368.                         struct lp_type int_type,
  3369.                         unsigned texture_unit,
  3370.                         unsigned target,
  3371.                         LLVMValueRef context_ptr,
  3372.                         boolean is_sviewinfo,
  3373.                         enum lp_sampler_lod_property lod_property,
  3374.                         LLVMValueRef explicit_lod,
  3375.                         LLVMValueRef *sizes_out)
  3376. {
  3377.    LLVMValueRef lod, level, size;
  3378.    LLVMValueRef first_level = NULL;
  3379.    int dims, i;
  3380.    boolean has_array;
  3381.    unsigned num_lods = 1;
  3382.    struct lp_build_context bld_int_vec4;
  3383.  
  3384.    if (static_state->format == PIPE_FORMAT_NONE) {
  3385.       /*
  3386.        * If there's nothing bound, format is NONE, and we must return
  3387.        * all zero as mandated by d3d10 in this case.
  3388.        */
  3389.       unsigned chan;
  3390.       LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F);
  3391.       for (chan = 0; chan < 4; chan++) {
  3392.          sizes_out[chan] = zero;
  3393.       }
  3394.       return;
  3395.    }
  3396.  
  3397.    /*
  3398.     * Do some sanity verification about bound texture and shader dcl target.
  3399.     * Not entirely sure what's possible but assume array/non-array
  3400.     * always compatible (probably not ok for OpenGL but d3d10 has no
  3401.     * distinction of arrays at the resource level).
  3402.     * Everything else looks bogus (though not entirely sure about rect/2d).
  3403.     * Currently disabled because it causes assertion failures if there's
  3404.     * nothing bound (or rather a dummy texture, not that this case would
  3405.     * return the right values).
  3406.     */
  3407.    if (0 && static_state->target != target) {
  3408.       if (static_state->target == PIPE_TEXTURE_1D)
  3409.          assert(target == PIPE_TEXTURE_1D_ARRAY);
  3410.       else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
  3411.          assert(target == PIPE_TEXTURE_1D);
  3412.       else if (static_state->target == PIPE_TEXTURE_2D)
  3413.          assert(target == PIPE_TEXTURE_2D_ARRAY);
  3414.       else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
  3415.          assert(target == PIPE_TEXTURE_2D);
  3416.       else if (static_state->target == PIPE_TEXTURE_CUBE)
  3417.          assert(target == PIPE_TEXTURE_CUBE_ARRAY);
  3418.       else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
  3419.          assert(target == PIPE_TEXTURE_CUBE);
  3420.       else
  3421.          assert(0);
  3422.    }
  3423.  
  3424.    dims = texture_dims(target);
  3425.  
  3426.    switch (target) {
  3427.    case PIPE_TEXTURE_1D_ARRAY:
  3428.    case PIPE_TEXTURE_2D_ARRAY:
  3429.    case PIPE_TEXTURE_CUBE_ARRAY:
  3430.       has_array = TRUE;
  3431.       break;
  3432.    default:
  3433.       has_array = FALSE;
  3434.       break;
  3435.    }
  3436.  
  3437.    assert(!int_type.floating);
  3438.  
  3439.    lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
  3440.  
  3441.    if (explicit_lod) {
  3442.       /* FIXME: this needs to honor per-element lod */
  3443.       lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod,
  3444.                                     lp_build_const_int32(gallivm, 0), "");
  3445.       first_level = dynamic_state->first_level(dynamic_state, gallivm,
  3446.                                                context_ptr, texture_unit);
  3447.       level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
  3448.       lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
  3449.    } else {
  3450.       lod = bld_int_vec4.zero;
  3451.    }
  3452.  
  3453.    size = bld_int_vec4.undef;
  3454.  
  3455.    size = LLVMBuildInsertElement(gallivm->builder, size,
  3456.                                  dynamic_state->width(dynamic_state, gallivm,
  3457.                                                       context_ptr, texture_unit),
  3458.                                  lp_build_const_int32(gallivm, 0), "");
  3459.  
  3460.    if (dims >= 2) {
  3461.       size = LLVMBuildInsertElement(gallivm->builder, size,
  3462.                                     dynamic_state->height(dynamic_state, gallivm,
  3463.                                                           context_ptr, texture_unit),
  3464.                                     lp_build_const_int32(gallivm, 1), "");
  3465.    }
  3466.  
  3467.    if (dims >= 3) {
  3468.       size = LLVMBuildInsertElement(gallivm->builder, size,
  3469.                                     dynamic_state->depth(dynamic_state, gallivm,
  3470.                                                          context_ptr, texture_unit),
  3471.                                     lp_build_const_int32(gallivm, 2), "");
  3472.    }
  3473.  
  3474.    size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
  3475.  
  3476.    if (has_array) {
  3477.       LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
  3478.                                                  context_ptr, texture_unit);
  3479.       if (target == PIPE_TEXTURE_CUBE_ARRAY) {
  3480.          /*
  3481.           * It looks like GL wants number of cubes, d3d10.1 has it undefined?
  3482.           * Could avoid this by passing in number of cubes instead of total
  3483.           * number of layers (might make things easier elsewhere too).
  3484.           */
  3485.          LLVMValueRef six = lp_build_const_int32(gallivm, 6);
  3486.          layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
  3487.       }
  3488.       size = LLVMBuildInsertElement(gallivm->builder, size, layers,
  3489.                                     lp_build_const_int32(gallivm, dims), "");
  3490.    }
  3491.  
  3492.    /*
  3493.     * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
  3494.     * if level is out of bounds (note this can't cover unbound texture
  3495.     * here, which also requires returning zero).
  3496.     */
  3497.    if (explicit_lod && is_sviewinfo) {
  3498.       LLVMValueRef last_level, out, out1;
  3499.       struct lp_build_context leveli_bld;
  3500.  
  3501.       /* everything is scalar for now */
  3502.       lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
  3503.       last_level = dynamic_state->last_level(dynamic_state, gallivm,
  3504.                                              context_ptr, texture_unit);
  3505.  
  3506.       out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
  3507.       out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
  3508.       out = lp_build_or(&leveli_bld, out, out1);
  3509.       if (num_lods == 1) {
  3510.          out = lp_build_broadcast_scalar(&bld_int_vec4, out);
  3511.       }
  3512.       else {
  3513.          /* TODO */
  3514.          assert(0);
  3515.       }
  3516.       size = lp_build_andnot(&bld_int_vec4, size, out);
  3517.    }
  3518.    for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
  3519.       sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
  3520.                                                 size,
  3521.                                                 lp_build_const_int32(gallivm, i));
  3522.    }
  3523.    if (is_sviewinfo) {
  3524.       for (; i < 4; i++) {
  3525.          sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
  3526.       }
  3527.    }
  3528.  
  3529.    /*
  3530.     * if there's no explicit_lod (buffers, rects) queries requiring nr of
  3531.     * mips would be illegal.
  3532.     */
  3533.    if (is_sviewinfo && explicit_lod) {
  3534.       struct lp_build_context bld_int_scalar;
  3535.       LLVMValueRef num_levels;
  3536.       lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
  3537.  
  3538.       if (static_state->level_zero_only) {
  3539.          num_levels = bld_int_scalar.one;
  3540.       }
  3541.       else {
  3542.          LLVMValueRef last_level;
  3543.  
  3544.          last_level = dynamic_state->last_level(dynamic_state, gallivm,
  3545.                                                 context_ptr, texture_unit);
  3546.          num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
  3547.          num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
  3548.       }
  3549.       sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
  3550.                                         num_levels);
  3551.    }
  3552. }
  3553.