Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * @file
  30.  * Helper functions for logical operations.
  31.  *
  32.  * @author Jose Fonseca <jfonseca@vmware.com>
  33.  */
  34.  
  35.  
  36. #include "util/u_cpu_detect.h"
  37. #include "util/u_memory.h"
  38. #include "util/u_debug.h"
  39.  
  40. #include "lp_bld_type.h"
  41. #include "lp_bld_const.h"
  42. #include "lp_bld_init.h"
  43. #include "lp_bld_intr.h"
  44. #include "lp_bld_debug.h"
  45. #include "lp_bld_logic.h"
  46.  
  47.  
  48. /*
  49.  * XXX
  50.  *
  51.  * Selection with vector conditional like
  52.  *
  53.  *    select <4 x i1> %C, %A, %B
  54.  *
  55.  * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
  56.  * supported on some backends (x86) starting with llvm 3.1.
  57.  *
  58.  * Expanding the boolean vector to full SIMD register width, as in
  59.  *
  60.  *    sext <4 x i1> %C to <4 x i32>
  61.  *
  62.  * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
  63.  * it causes assertion failures in LLVM 2.6. It appears to work correctly on
  64.  * LLVM 2.7.
  65.  */
  66.  
  67.  
  68. /**
  69.  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  70.  * \param func  one of PIPE_FUNC_x
  71.  * If the ordered argument is true the function will use LLVM's ordered
  72.  * comparisons, otherwise unordered comparisons will be used.
  73.  * The result values will be 0 for false or ~0 for true.
  74.  */
  75. static LLVMValueRef
  76. lp_build_compare_ext(struct gallivm_state *gallivm,
  77.                      const struct lp_type type,
  78.                      unsigned func,
  79.                      LLVMValueRef a,
  80.                      LLVMValueRef b,
  81.                      boolean ordered)
  82. {
  83.    LLVMBuilderRef builder = gallivm->builder;
  84.    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  85.    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  86.    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  87.    LLVMValueRef cond;
  88.    LLVMValueRef res;
  89.  
  90.    assert(func >= PIPE_FUNC_NEVER);
  91.    assert(func <= PIPE_FUNC_ALWAYS);
  92.    assert(lp_check_value(type, a));
  93.    assert(lp_check_value(type, b));
  94.  
  95.    if(func == PIPE_FUNC_NEVER)
  96.       return zeros;
  97.    if(func == PIPE_FUNC_ALWAYS)
  98.       return ones;
  99.  
  100.    if(type.floating) {
  101.       LLVMRealPredicate op;
  102.       switch(func) {
  103.       case PIPE_FUNC_EQUAL:
  104.          op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
  105.          break;
  106.       case PIPE_FUNC_NOTEQUAL:
  107.          op = ordered ? LLVMRealONE : LLVMRealUNE;
  108.          break;
  109.       case PIPE_FUNC_LESS:
  110.          op = ordered ? LLVMRealOLT : LLVMRealULT;
  111.          break;
  112.       case PIPE_FUNC_LEQUAL:
  113.          op = ordered ? LLVMRealOLE : LLVMRealULE;
  114.          break;
  115.       case PIPE_FUNC_GREATER:
  116.          op = ordered ? LLVMRealOGT : LLVMRealUGT;
  117.          break;
  118.       case PIPE_FUNC_GEQUAL:
  119.          op = ordered ? LLVMRealOGE : LLVMRealUGE;
  120.          break;
  121.       default:
  122.          assert(0);
  123.          return lp_build_undef(gallivm, type);
  124.       }
  125.  
  126.       cond = LLVMBuildFCmp(builder, op, a, b, "");
  127.       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
  128.    }
  129.    else {
  130.       LLVMIntPredicate op;
  131.       switch(func) {
  132.       case PIPE_FUNC_EQUAL:
  133.          op = LLVMIntEQ;
  134.          break;
  135.       case PIPE_FUNC_NOTEQUAL:
  136.          op = LLVMIntNE;
  137.          break;
  138.       case PIPE_FUNC_LESS:
  139.          op = type.sign ? LLVMIntSLT : LLVMIntULT;
  140.          break;
  141.       case PIPE_FUNC_LEQUAL:
  142.          op = type.sign ? LLVMIntSLE : LLVMIntULE;
  143.          break;
  144.       case PIPE_FUNC_GREATER:
  145.          op = type.sign ? LLVMIntSGT : LLVMIntUGT;
  146.          break;
  147.       case PIPE_FUNC_GEQUAL:
  148.          op = type.sign ? LLVMIntSGE : LLVMIntUGE;
  149.          break;
  150.       default:
  151.          assert(0);
  152.          return lp_build_undef(gallivm, type);
  153.       }
  154.  
  155.       cond = LLVMBuildICmp(builder, op, a, b, "");
  156.       res = LLVMBuildSExt(builder, cond, int_vec_type, "");
  157.    }
  158.  
  159.    return res;
  160. }
  161.  
  162. /**
  163.  * Build code to compare two values 'a' and 'b' of 'type' using the given func.
  164.  * \param func  one of PIPE_FUNC_x
  165.  * The result values will be 0 for false or ~0 for true.
  166.  */
  167. LLVMValueRef
  168. lp_build_compare(struct gallivm_state *gallivm,
  169.                  const struct lp_type type,
  170.                  unsigned func,
  171.                  LLVMValueRef a,
  172.                  LLVMValueRef b)
  173. {
  174.    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
  175.    LLVMValueRef zeros = LLVMConstNull(int_vec_type);
  176.    LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
  177.  
  178.    assert(func >= PIPE_FUNC_NEVER);
  179.    assert(func <= PIPE_FUNC_ALWAYS);
  180.    assert(lp_check_value(type, a));
  181.    assert(lp_check_value(type, b));
  182.  
  183.    if(func == PIPE_FUNC_NEVER)
  184.       return zeros;
  185.    if(func == PIPE_FUNC_ALWAYS)
  186.       return ones;
  187.  
  188. #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
  189.    /*
  190.     * There are no unsigned integer comparison instructions in SSE.
  191.     */
  192.  
  193.    if (!type.floating && !type.sign &&
  194.        type.width * type.length == 128 &&
  195.        util_cpu_caps.has_sse2 &&
  196.        (func == PIPE_FUNC_LESS ||
  197.         func == PIPE_FUNC_LEQUAL ||
  198.         func == PIPE_FUNC_GREATER ||
  199.         func == PIPE_FUNC_GEQUAL) &&
  200.        (gallivm_debug & GALLIVM_DEBUG_PERF)) {
  201.          debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
  202.                       __FUNCTION__, type.length, type.width);
  203.    }
  204. #endif
  205.  
  206.    return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
  207. }
  208.  
  209. /**
  210.  * Build code to compare two values 'a' and 'b' using the given func.
  211.  * \param func  one of PIPE_FUNC_x
  212.  * If the operands are floating point numbers, the function will use
  213.  * ordered comparison which means that it will return true if both
  214.  * operands are not a NaN and the specified condition evaluates to true.
  215.  * The result values will be 0 for false or ~0 for true.
  216.  */
  217. LLVMValueRef
  218. lp_build_cmp_ordered(struct lp_build_context *bld,
  219.                      unsigned func,
  220.                      LLVMValueRef a,
  221.                      LLVMValueRef b)
  222. {
  223.    return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
  224. }
  225.  
  226. /**
  227.  * Build code to compare two values 'a' and 'b' using the given func.
  228.  * \param func  one of PIPE_FUNC_x
  229.  * If the operands are floating point numbers, the function will use
  230.  * unordered comparison which means that it will return true if either
  231.  * operand is a NaN or the specified condition evaluates to true.
  232.  * The result values will be 0 for false or ~0 for true.
  233.  */
  234. LLVMValueRef
  235. lp_build_cmp(struct lp_build_context *bld,
  236.              unsigned func,
  237.              LLVMValueRef a,
  238.              LLVMValueRef b)
  239. {
  240.    return lp_build_compare(bld->gallivm, bld->type, func, a, b);
  241. }
  242.  
  243.  
  244. /**
  245.  * Return (mask & a) | (~mask & b);
  246.  */
  247. LLVMValueRef
  248. lp_build_select_bitwise(struct lp_build_context *bld,
  249.                         LLVMValueRef mask,
  250.                         LLVMValueRef a,
  251.                         LLVMValueRef b)
  252. {
  253.    LLVMBuilderRef builder = bld->gallivm->builder;
  254.    struct lp_type type = bld->type;
  255.    LLVMValueRef res;
  256.  
  257.    assert(lp_check_value(type, a));
  258.    assert(lp_check_value(type, b));
  259.  
  260.    if (a == b) {
  261.       return a;
  262.    }
  263.  
  264.    if(type.floating) {
  265.       LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
  266.       a = LLVMBuildBitCast(builder, a, int_vec_type, "");
  267.       b = LLVMBuildBitCast(builder, b, int_vec_type, "");
  268.    }
  269.  
  270.    a = LLVMBuildAnd(builder, a, mask, "");
  271.  
  272.    /* This often gets translated to PANDN, but sometimes the NOT is
  273.     * pre-computed and stored in another constant. The best strategy depends
  274.     * on available registers, so it is not a big deal -- hopefully LLVM does
  275.     * the right decision attending the rest of the program.
  276.     */
  277.    b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
  278.  
  279.    res = LLVMBuildOr(builder, a, b, "");
  280.  
  281.    if(type.floating) {
  282.       LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
  283.       res = LLVMBuildBitCast(builder, res, vec_type, "");
  284.    }
  285.  
  286.    return res;
  287. }
  288.  
  289.  
  290. /**
  291.  * Return mask ? a : b;
  292.  *
  293.  * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
  294.  * will yield unpredictable results.
  295.  */
  296. LLVMValueRef
  297. lp_build_select(struct lp_build_context *bld,
  298.                 LLVMValueRef mask,
  299.                 LLVMValueRef a,
  300.                 LLVMValueRef b)
  301. {
  302.    LLVMBuilderRef builder = bld->gallivm->builder;
  303.    LLVMContextRef lc = bld->gallivm->context;
  304.    struct lp_type type = bld->type;
  305.    LLVMValueRef res;
  306.  
  307.    assert(lp_check_value(type, a));
  308.    assert(lp_check_value(type, b));
  309.  
  310.    if(a == b)
  311.       return a;
  312.  
  313.    if (type.length == 1) {
  314.       mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
  315.       res = LLVMBuildSelect(builder, mask, a, b, "");
  316.    }
  317.    else if (0) {
  318.       /* Generate a vector select.
  319.        *
  320.        * XXX: Using vector selects would avoid emitting intrinsics, but they aren't
  321.        * properly supported yet.
  322.        *
  323.        * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
  324.        *
  325.        * LLVM 3.0 includes experimental support provided the -promote-elements
  326.        * options is passed to LLVM's command line (e.g., via
  327.        * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
  328.        * worse, probably because some optimization passes don't know how to
  329.        * handle vector selects.
  330.        *
  331.        * See also:
  332.        * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
  333.        */
  334.  
  335.       /* Convert the mask to a vector of booleans.
  336.        * XXX: There are two ways to do this. Decide what's best.
  337.        */
  338.       if (1) {
  339.          LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
  340.          mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
  341.       } else {
  342.          mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
  343.       }
  344.       res = LLVMBuildSelect(builder, mask, a, b, "");
  345.    }
  346.    else if (((util_cpu_caps.has_sse4_1 &&
  347.               type.width * type.length == 128) ||
  348.              (util_cpu_caps.has_avx &&
  349.               type.width * type.length == 256 && type.width >= 32)) &&
  350.             !LLVMIsConstant(a) &&
  351.             !LLVMIsConstant(b) &&
  352.             !LLVMIsConstant(mask)) {
  353.       const char *intrinsic;
  354.       LLVMTypeRef arg_type;
  355.       LLVMValueRef args[3];
  356.  
  357.       /*
  358.        *  There's only float blend in AVX but can just cast i32/i64
  359.        *  to float.
  360.        */
  361.       if (type.width * type.length == 256) {
  362.          if (type.width == 64) {
  363.            intrinsic = "llvm.x86.avx.blendv.pd.256";
  364.            arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
  365.          }
  366.          else {
  367.             intrinsic = "llvm.x86.avx.blendv.ps.256";
  368.             arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
  369.          }
  370.       }
  371.       else if (type.floating &&
  372.                type.width == 64) {
  373.          intrinsic = "llvm.x86.sse41.blendvpd";
  374.          arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
  375.       } else if (type.floating &&
  376.                  type.width == 32) {
  377.          intrinsic = "llvm.x86.sse41.blendvps";
  378.          arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
  379.       } else {
  380.          intrinsic = "llvm.x86.sse41.pblendvb";
  381.          arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
  382.       }
  383.  
  384.       if (arg_type != bld->int_vec_type) {
  385.          mask = LLVMBuildBitCast(builder, mask, arg_type, "");
  386.       }
  387.  
  388.       if (arg_type != bld->vec_type) {
  389.          a = LLVMBuildBitCast(builder, a, arg_type, "");
  390.          b = LLVMBuildBitCast(builder, b, arg_type, "");
  391.       }
  392.  
  393.       args[0] = b;
  394.       args[1] = a;
  395.       args[2] = mask;
  396.  
  397.       res = lp_build_intrinsic(builder, intrinsic,
  398.                                arg_type, args, Elements(args));
  399.  
  400.       if (arg_type != bld->vec_type) {
  401.          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
  402.       }
  403.    }
  404.    else {
  405.       res = lp_build_select_bitwise(bld, mask, a, b);
  406.    }
  407.  
  408.    return res;
  409. }
  410.  
  411.  
  412. /**
  413.  * Return mask ? a : b;
  414.  *
  415.  * mask is a TGSI_WRITEMASK_xxx.
  416.  */
  417. LLVMValueRef
  418. lp_build_select_aos(struct lp_build_context *bld,
  419.                     unsigned mask,
  420.                     LLVMValueRef a,
  421.                     LLVMValueRef b,
  422.                     unsigned num_channels)
  423. {
  424.    LLVMBuilderRef builder = bld->gallivm->builder;
  425.    const struct lp_type type = bld->type;
  426.    const unsigned n = type.length;
  427.    unsigned i, j;
  428.  
  429.    assert((mask & ~0xf) == 0);
  430.    assert(lp_check_value(type, a));
  431.    assert(lp_check_value(type, b));
  432.  
  433.    if(a == b)
  434.       return a;
  435.    if((mask & 0xf) == 0xf)
  436.       return a;
  437.    if((mask & 0xf) == 0x0)
  438.       return b;
  439.    if(a == bld->undef || b == bld->undef)
  440.       return bld->undef;
  441.  
  442.    /*
  443.     * There are two major ways of accomplishing this:
  444.     * - with a shuffle
  445.     * - with a select
  446.     *
  447.     * The flip between these is empirical and might need to be adjusted.
  448.     */
  449.    if (n <= 4) {
  450.       /*
  451.        * Shuffle.
  452.        */
  453.       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
  454.       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  455.  
  456.       for(j = 0; j < n; j += num_channels)
  457.          for(i = 0; i < num_channels; ++i)
  458.             shuffles[j + i] = LLVMConstInt(elem_type,
  459.                                            (mask & (1 << i) ? 0 : n) + j + i,
  460.                                            0);
  461.  
  462.       return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
  463.    }
  464.    else {
  465.       LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
  466.       return lp_build_select(bld, mask_vec, a, b);
  467.    }
  468. }
  469.  
  470.  
  471. /**
  472.  * Return (scalar-cast)val ? true : false;
  473.  */
  474. LLVMValueRef
  475. lp_build_any_true_range(struct lp_build_context *bld,
  476.                         unsigned real_length,
  477.                         LLVMValueRef val)
  478. {
  479.    LLVMBuilderRef builder = bld->gallivm->builder;
  480.    LLVMTypeRef scalar_type;
  481.    LLVMTypeRef true_type;
  482.  
  483.    assert(real_length <= bld->type.length);
  484.  
  485.    true_type = LLVMIntTypeInContext(bld->gallivm->context,
  486.                                     bld->type.width * real_length);
  487.    scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
  488.                                       bld->type.width * bld->type.length);
  489.    val = LLVMBuildBitCast(builder, val, scalar_type, "");
  490.    /*
  491.     * We're using always native types so we can use intrinsics.
  492.     * However, if we don't do per-element calculations, we must ensure
  493.     * the excess elements aren't used since they may contain garbage.
  494.     */
  495.    if (real_length < bld->type.length) {
  496.       val = LLVMBuildTrunc(builder, val, true_type, "");
  497.    }
  498.    return LLVMBuildICmp(builder, LLVMIntNE,
  499.                         val, LLVMConstNull(true_type), "");
  500. }
  501.