Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * @file
  30.  * Helper functions for logical operations.
  31.  *
  32.  * @author Jose Fonseca <jfonseca@vmware.com>
  33.  */
  34.  
  35.  
  36. #include "util/u_cpu_detect.h"
  37. #include "util/u_memory.h"
  38. #include "util/u_debug.h"
  39.  
  40. #include "lp_bld_type.h"
  41. #include "lp_bld_const.h"
  42. #include "lp_bld_init.h"
  43. #include "lp_bld_intr.h"
  44. #include "lp_bld_debug.h"
  45. #include "lp_bld_logic.h"
  46.  
  47.  
  48. /*
  49.  * XXX
  50.  *
  51.  * Selection with vector conditional like
  52.  *
  53.  *    select <4 x i1> %C, %A, %B
  54.  *
  55.  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
  56.  * supported on some backends (x86) starting with llvm 3.1.
  57.  *
  58.  * Expanding the boolean vector to full SIMD register width, as in
  59.  *
  60.  *    sext <4 x i1> %C to <4 x i32>
  61.  *
  62.  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  63.  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  64.  * LLVM 2.7.
  65.  */
  66.  
  67.  
  68. /**
  69.  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  70.  * \param func  one of PIPE_FUNC_x
  71.  * The result values will be 0 for false or ~0 for true.
  72.  */
  73. LLVMValueRef
  74. lp_build_compare(struct gallivm_state *gallivm,
  75.                  const struct lp_type type,
  76.                  unsigned func,
  77.                  LLVMValueRef a,
  78.                  LLVMValueRef b)
  79. {
  80.    LLVMBuilderRef builder = gallivm->builder;
  81.    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  82.    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  83.    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  84.    LLVMValueRef cond;
  85.    LLVMValueRef res;
  86.  
  87.    assert(func >= PIPE_FUNC_NEVER);
  88.    assert(func <= PIPE_FUNC_ALWAYS);
  89.    assert(lp_check_value(type, a));
  90.    assert(lp_check_value(type, b));
  91.  
  92.    if(func == PIPE_FUNC_NEVER)
  93.       return zeros;
  94.    if(func == PIPE_FUNC_ALWAYS)
  95.       return ones;
  96.  
  97. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  98.    /*
  99.     * There are no unsigned integer comparison instructions in SSE.
  100.     */
  101.  
  102.    if (!type.floating && !type.sign &&
  103.        type.width * type.length == 128 &&
  104.        util_cpu_caps.has_sse2 &&
  105.        (func == PIPE_FUNC_LESS ||
  106.         func == PIPE_FUNC_LEQUAL ||
  107.         func == PIPE_FUNC_GREATER ||
  108.         func == PIPE_FUNC_GEQUAL) &&
  109.        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
  110.          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
  111.                       __FUNCTION__, type.length, type.width);
  112.    }
  113. #endif
  114.  
  115. #if HAVE_LLVM < 0x0207
  116. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  117.    if(type.width * type.length == 128) {
  118.       if(type.floating && util_cpu_caps.has_sse) {
  119.          /* float[4] comparison */
  120.          LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
  121.          LLVMValueRef args[3];
  122.          unsigned cc;
  123.          boolean swap;
  124.  
  125.          swap = FALSE;
  126.          switch(func) {
  127.          case PIPE_FUNC_EQUAL:
  128.             cc = 0;
  129.             break;
  130.          case PIPE_FUNC_NOTEQUAL:
  131.             cc = 4;
  132.             break;
  133.          case PIPE_FUNC_LESS:
  134.             cc = 1;
  135.             break;
  136.          case PIPE_FUNC_LEQUAL:
  137.             cc = 2;
  138.             break;
  139.          case PIPE_FUNC_GREATER:
  140.             cc = 1;
  141.             swap = TRUE;
  142.             break;
  143.          case PIPE_FUNC_GEQUAL:
  144.             cc = 2;
  145.             swap = TRUE;
  146.             break;
  147.          default:
  148.             assert(0);
  149.             return lp_build_undef(gallivm, type);
  150.          }
  151.  
  152.          if(swap) {
  153.             args[0] = b;
  154.             args[1] = a;
  155.          }
  156.          else {
  157.             args[0] = a;
  158.             args[1] = b;
  159.          }
  160.  
  161.          args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
  162.          res = lp_build_intrinsic(builder,
  163.                                   "llvm.x86.sse.cmp.ps",
  164.                                   vec_type,
  165.                                   args, 3);
  166.          res = LLVMBuildBitCast(builder, res, int_vec_type, "");
  167.          return res;
  168.       }
  169.       else if(util_cpu_caps.has_sse2) {
  170.          /* int[4] comparison */
  171.          static const struct {
  172.             unsigned swap:1;
  173.             unsigned eq:1;
  174.             unsigned gt:1;
  175.             unsigned not:1;
  176.          } table[] = {
  177.             {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
  178.             {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
  179.             {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
  180.             {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
  181.             {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
  182.             {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
  183.             {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
  184.             {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
  185.          };
  186.          const char *pcmpeq;
  187.          const char *pcmpgt;
  188.          LLVMValueRef args[2];
  189.          LLVMValueRef res;
  190.          LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
  191.  
  192.          switch (type.width) {
  193.          case 8:
  194.             pcmpeq = "llvm.x86.sse2.pcmpeq.b";
  195.             pcmpgt = "llvm.x86.sse2.pcmpgt.b";
  196.             break;
  197.          case 16:
  198.             pcmpeq = "llvm.x86.sse2.pcmpeq.w";
  199.             pcmpgt = "llvm.x86.sse2.pcmpgt.w";
  200.             break;
  201.          case 32:
  202.             pcmpeq = "llvm.x86.sse2.pcmpeq.d";
  203.             pcmpgt = "llvm.x86.sse2.pcmpgt.d";
  204.             break;
  205.          default:
  206.             assert(0);
  207.             return lp_build_undef(gallivm, type);
  208.          }
  209.  
  210.          /* There are no unsigned comparison instructions. So flip the sign bit
  211.           * so that the results match.
  212.           */
  213.          if (table[func].gt && !type.sign) {
  214.             LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
  215.             a = LLVMBuildXor(builder, a, msb, "");
  216.             b = LLVMBuildXor(builder, b, msb, "");
  217.          }
  218.  
  219.          if(table[func].swap) {
  220.             args[0] = b;
  221.             args[1] = a;
  222.          }
  223.          else {
  224.             args[0] = a;
  225.             args[1] = b;
  226.          }
  227.  
  228.          if(table[func].eq)
  229.             res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
  230.          else if (table[func].gt)
  231.             res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
  232.          else
  233.             res = LLVMConstNull(vec_type);
  234.  
  235.          if(table[func].not)
  236.             res = LLVMBuildNot(builder, res, "");
  237.  
  238.          return res;
  239.       }
  240.    } /* if (type.width * type.length == 128) */
  241. #endif
  242. #endif /* HAVE_LLVM < 0x0207 */
  243.  
  244.    /* XXX: It is not clear if we should use the ordered or unordered operators */
  245.  
  246.    if(type.floating) {
  247.       LLVMRealPredicate op;
  248.       switch(func) {
  249.       case PIPE_FUNC_NEVER:
  250.          op = LLVMRealPredicateFalse;
  251.          break;
  252.       case PIPE_FUNC_ALWAYS:
  253.          op = LLVMRealPredicateTrue;
  254.          break;
  255.       case PIPE_FUNC_EQUAL:
  256.          op = LLVMRealUEQ;
  257.          break;
  258.       case PIPE_FUNC_NOTEQUAL:
  259.          op = LLVMRealUNE;
  260.          break;
  261.       case PIPE_FUNC_LESS:
  262.          op = LLVMRealULT;
  263.          break;
  264.       case PIPE_FUNC_LEQUAL:
  265.          op = LLVMRealULE;
  266.          break;
  267.       case PIPE_FUNC_GREATER:
  268.          op = LLVMRealUGT;
  269.          break;
  270.       case PIPE_FUNC_GEQUAL:
  271.          op = LLVMRealUGE;
  272.          break;
  273.       default:
  274.          assert(0);
  275.          return lp_build_undef(gallivm, type);
  276.       }
  277.  
  278. #if HAVE_LLVM >= 0x0207
  279.       cond = LLVMBuildFCmp(builder, op, a, b, "");
  280.       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
  281. #else
  282.       if (type.length == 1) {
  283.          cond = LLVMBuildFCmp(builder, op, a, b, "");
  284.          res = LLVMBuildSExt(builder, cond, int_vec_type, "");
  285.       }
  286.       else {
  287.          unsigned i;
  288.  
  289.          res = LLVMGetUndef(int_vec_type);
  290.  
  291.          debug_printf("%s: warning: using slow element-wise float"
  292.                       " vector comparison\n", __FUNCTION__);
  293.          for (i = 0; i < type.length; ++i) {
  294.             LLVMValueRef index = lp_build_const_int32(gallivm, i);
  295.             cond = LLVMBuildFCmp(builder, op,
  296.                                  LLVMBuildExtractElement(builder, a, index, ""),
  297.                                  LLVMBuildExtractElement(builder, b, index, ""),
  298.                                  "");
  299.             cond = LLVMBuildSelect(builder, cond,
  300.                                    LLVMConstExtractElement(ones, index),
  301.                                    LLVMConstExtractElement(zeros, index),
  302.                                    "");
  303.             res = LLVMBuildInsertElement(builder, res, cond, index, "");
  304.          }
  305.       }
  306. #endif
  307.    }
  308.    else {
  309.       LLVMIntPredicate op;
  310.       switch(func) {
  311.       case PIPE_FUNC_EQUAL:
  312.          op = LLVMIntEQ;
  313.          break;
  314.       case PIPE_FUNC_NOTEQUAL:
  315.          op = LLVMIntNE;
  316.          break;
  317.       case PIPE_FUNC_LESS:
  318.          op = type.sign ? LLVMIntSLT : LLVMIntULT;
  319.          break;
  320.       case PIPE_FUNC_LEQUAL:
  321.          op = type.sign ? LLVMIntSLE : LLVMIntULE;
  322.          break;
  323.       case PIPE_FUNC_GREATER:
  324.          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
  325.          break;
  326.       case PIPE_FUNC_GEQUAL:
  327.          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
  328.          break;
  329.       default:
  330.          assert(0);
  331.          return lp_build_undef(gallivm, type);
  332.       }
  333.  
  334. #if HAVE_LLVM >= 0x0207
  335.       cond = LLVMBuildICmp(builder, op, a, b, "");
  336.       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
  337. #else
  338.       if (type.length == 1) {
  339.          cond = LLVMBuildICmp(builder, op, a, b, "");
  340.          res = LLVMBuildSExt(builder, cond, int_vec_type, "");
  341.       }
  342.       else {
  343.          unsigned i;
  344.  
  345.          res = LLVMGetUndef(int_vec_type);
  346.  
  347.          if (gallivm_debug & GALLIVM_DEBUG_PERF) {
  348.             debug_printf("%s: using slow element-wise int"
  349.                          " vector comparison\n", __FUNCTION__);
  350.          }
  351.  
  352.          for(i = 0; i < type.length; ++i) {
  353.             LLVMValueRef index = lp_build_const_int32(gallivm, i);
  354.             cond = LLVMBuildICmp(builder, op,
  355.                                  LLVMBuildExtractElement(builder, a, index, ""),
  356.                                  LLVMBuildExtractElement(builder, b, index, ""),
  357.                                  "");
  358.             cond = LLVMBuildSelect(builder, cond,
  359.                                    LLVMConstExtractElement(ones, index),
  360.                                    LLVMConstExtractElement(zeros, index),
  361.                                    "");
  362.             res = LLVMBuildInsertElement(builder, res, cond, index, "");
  363.          }
  364.       }
  365. #endif
  366.    }
  367.  
  368.    return res;
  369. }
  370.  
  371.  
  372.  
  373. /**
  374.  * Build code to compare two values 'a' and 'b' using the given func.
  375.  * \param func  one of PIPE_FUNC_x
  376.  * The result values will be 0 for false or ~0 for true.
  377.  */
  378. LLVMValueRef
  379. lp_build_cmp(struct lp_build_context *bld,
  380.              unsigned func,
  381.              LLVMValueRef a,
  382.              LLVMValueRef b)
  383. {
  384.    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
  385. }
  386.  
  387.  
  388. /**
  389.  * Return (mask & a) | (~mask & b);
  390.  */
  391. LLVMValueRef
  392. lp_build_select_bitwise(struct lp_build_context *bld,
  393.                         LLVMValueRef mask,
  394.                         LLVMValueRef a,
  395.                         LLVMValueRef b)
  396. {
  397.    LLVMBuilderRef builder = bld->gallivm->builder;
  398.    struct lp_type type = bld->type;
  399.    LLVMValueRef res;
  400.  
  401.    assert(lp_check_value(type, a));
  402.    assert(lp_check_value(type, b));
  403.  
  404.    if (a == b) {
  405.       return a;
  406.    }
  407.  
  408.    if(type.floating) {
  409.       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
  410.       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
  411.       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
  412.    }
  413.  
  414.    a = LLVMBuildAnd(builder, a, mask, "");
  415.  
  416.    /* This often gets translated to PANDN, but sometimes the NOT is
  417.     * pre-computed and stored in another constant. The best strategy depends
  418.     * on available registers, so it is not a big deal -- hopefully LLVM does
  419.     * the right decision attending the rest of the program.
  420.     */
  421.    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
  422.  
  423.    res = LLVMBuildOr(builder, a, b, "");
  424.  
  425.    if(type.floating) {
  426.       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
  427.       res = LLVMBuildBitCast(builder, res, vec_type, "");
  428.    }
  429.  
  430.    return res;
  431. }
  432.  
  433.  
  434. /**
  435.  * Return mask ? a : b;
  436.  *
  437.  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
  438.  * will yield unpredictable results.
  439.  */
  440. LLVMValueRef
  441. lp_build_select(struct lp_build_context *bld,
  442.                 LLVMValueRef mask,
  443.                 LLVMValueRef a,
  444.                 LLVMValueRef b)
  445. {
  446.    LLVMBuilderRef builder = bld->gallivm->builder;
  447.    LLVMContextRef lc = bld->gallivm->context;
  448.    struct lp_type type = bld->type;
  449.    LLVMValueRef res;
  450.  
  451.    assert(lp_check_value(type, a));
  452.    assert(lp_check_value(type, b));
  453.  
  454.    if(a == b)
  455.       return a;
  456.  
  457.    if (type.length == 1) {
  458.       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
  459.       res = LLVMBuildSelect(builder, mask, a, b, "");
  460.    }
  461.    else if (0) {
  462.       /* Generate a vector select.
  463.        *
  464.        * XXX: Using vector selects would avoid emitting intrinsics, but they aren't
  465.        * properly supported yet.
  466.        *
  467.        * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
  468.        *
  469.        * LLVM 3.0 includes experimental support provided the -promote-elements
  470.        * options is passed to LLVM's command line (e.g., via
  471.        * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
  472.        * worse, probably because some optimization passes don't know how to
  473.        * handle vector selects.
  474.        *
  475.        * See also:
  476.        * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
  477.        */
  478.  
  479.       /* Convert the mask to a vector of booleans.
  480.        * XXX: There are two ways to do this. Decide what's best.
  481.        */
  482.       if (1) {
  483.          LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
  484.          mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
  485.       } else {
  486.          mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
  487.       }
  488.       res = LLVMBuildSelect(builder, mask, a, b, "");
  489.    }
  490.    else if (((util_cpu_caps.has_sse4_1 &&
  491.               type.width * type.length == 128) ||
  492.              (util_cpu_caps.has_avx &&
  493.               type.width * type.length == 256 && type.width >= 32)) &&
  494.             !LLVMIsConstant(a) &&
  495.             !LLVMIsConstant(b) &&
  496.             !LLVMIsConstant(mask)) {
  497.       const char *intrinsic;
  498.       LLVMTypeRef arg_type;
  499.       LLVMValueRef args[3];
  500.  
  501.       /*
  502.        *  There's only float blend in AVX but can just cast i32/i64
  503.        *  to float.
  504.        */
  505.       if (type.width * type.length == 256) {
  506.          if (type.width == 64) {
  507.            intrinsic = "llvm.x86.avx.blendv.pd.256";
  508.            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
  509.          }
  510.          else {
  511.             intrinsic = "llvm.x86.avx.blendv.ps.256";
  512.             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
  513.          }
  514.       }
  515.       else if (type.floating &&
  516.                type.width == 64) {
  517.          intrinsic = "llvm.x86.sse41.blendvpd";
  518.          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
  519.       } else if (type.floating &&
  520.                  type.width == 32) {
  521.          intrinsic = "llvm.x86.sse41.blendvps";
  522.          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
  523.       } else {
  524.          intrinsic = "llvm.x86.sse41.pblendvb";
  525.          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
  526.       }
  527.  
  528.       if (arg_type != bld->int_vec_type) {
  529.          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
  530.       }
  531.  
  532.       if (arg_type != bld->vec_type) {
  533.          a = LLVMBuildBitCast(builder, a, arg_type, "");
  534.          b = LLVMBuildBitCast(builder, b, arg_type, "");
  535.       }
  536.  
  537.       args[0] = b;
  538.       args[1] = a;
  539.       args[2] = mask;
  540.  
  541.       res = lp_build_intrinsic(builder, intrinsic,
  542.                                arg_type, args, Elements(args));
  543.  
  544.       if (arg_type != bld->vec_type) {
  545.          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
  546.       }
  547.    }
  548.    else {
  549.       res = lp_build_select_bitwise(bld, mask, a, b);
  550.    }
  551.  
  552.    return res;
  553. }
  554.  
  555.  
  556. /**
  557.  * Return mask ? a : b;
  558.  *
  559.  * mask is a TGSI_WRITEMASK_xxx.
  560.  */
  561. LLVMValueRef
  562. lp_build_select_aos(struct lp_build_context *bld,
  563.                     unsigned mask,
  564.                     LLVMValueRef a,
  565.                     LLVMValueRef b,
  566.                     unsigned num_channels)
  567. {
  568.    LLVMBuilderRef builder = bld->gallivm->builder;
  569.    const struct lp_type type = bld->type;
  570.    const unsigned n = type.length;
  571.    unsigned i, j;
  572.  
  573.    assert((mask & ~0xf) == 0);
  574.    assert(lp_check_value(type, a));
  575.    assert(lp_check_value(type, b));
  576.  
  577.    if(a == b)
  578.       return a;
  579.    if((mask & 0xf) == 0xf)
  580.       return a;
  581.    if((mask & 0xf) == 0x0)
  582.       return b;
  583.    if(a == bld->undef || b == bld->undef)
  584.       return bld->undef;
  585.  
  586.    /*
  587.     * There are two major ways of accomplishing this:
  588.     * - with a shuffle
  589.     * - with a select
  590.     *
  591.     * The flip between these is empirical and might need to be adjusted.
  592.     */
  593.    if (n <= 4) {
  594.       /*
  595.        * Shuffle.
  596.        */
  597.       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
  598.       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  599.  
  600.       for(j = 0; j < n; j += num_channels)
  601.          for(i = 0; i < num_channels; ++i)
  602.             shuffles[j + i] = LLVMConstInt(elem_type,
  603.                                            (mask & (1 << i) ? 0 : n) + j + i,
  604.                                            0);
  605.  
  606.       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
  607.    }
  608.    else {
  609.       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
  610.       return lp_build_select(bld, mask_vec, a, b);
  611.    }
  612. }
  613.  
  614.  
  615. /**
  616.  * Return (scalar-cast)val ? true : false;
  617.  */
  618. LLVMValueRef
  619. lp_build_any_true_range(struct lp_build_context *bld,
  620.                         unsigned real_length,
  621.                         LLVMValueRef val)
  622. {
  623.    LLVMBuilderRef builder = bld->gallivm->builder;
  624.    LLVMTypeRef scalar_type;
  625.    LLVMTypeRef true_type;
  626.  
  627.    assert(real_length <= bld->type.length);
  628.  
  629.    true_type = LLVMIntTypeInContext(bld->gallivm->context,
  630.                                     bld->type.width * real_length);
  631.    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
  632.                                       bld->type.width * bld->type.length);
  633.    val = LLVMBuildBitCast(builder, val, scalar_type, "");
  634.    /*
  635.     * We're using always native types so we can use intrinsics.
  636.     * However, if we don't do per-element calculations, we must ensure
  637.     * the excess elements aren't used since they may contain garbage.
  638.     */
  639.    if (real_length < bld->type.length) {
  640.       val = LLVMBuildTrunc(builder, val, true_type, "");
  641.    }
  642.    return LLVMBuildICmp(builder, LLVMIntNE,
  643.                         val, LLVMConstNull(true_type), "");
  644. }
  645.