Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file opt_minmax.cpp
  26.  *
  27.  * Drop operands from an expression tree of only min/max operations if they
  28.  * can be proven to not contribute to the final result.
  29.  *
  30.  * The algorithm is similar to alpha-beta pruning on a minmax search.
  31.  */
  32.  
  33. #include "ir.h"
  34. #include "ir_visitor.h"
  35. #include "ir_rvalue_visitor.h"
  36. #include "ir_optimization.h"
  37. #include "ir_builder.h"
  38. #include "program/prog_instruction.h"
  39. #include "glsl_types.h"
  40. #include "main/macros.h"
  41.  
  42. using namespace ir_builder;
  43.  
  44. namespace {
  45.  
  46. enum compare_components_result {
  47.    LESS,
  48.    LESS_OR_EQUAL,
  49.    EQUAL,
  50.    GREATER_OR_EQUAL,
  51.    GREATER,
  52.    MIXED
  53. };
  54.  
  55. class minmax_range {
  56. public:
  57.    minmax_range(ir_constant *low = NULL, ir_constant *high = NULL)
  58.    {
  59.       this->low = low;
  60.       this->high = high;
  61.    }
  62.  
  63.    /* low is the lower limit of the range, high is the higher limit. NULL on
  64.     * low means negative infinity (unlimited) and on high positive infinity
  65.     * (unlimited). Because of the two interpretations of the value NULL,
  66.     * arbitrary comparison between ir_constants is impossible.
  67.     */
  68.    ir_constant *low;
  69.    ir_constant *high;
  70. };
  71.  
  72. class ir_minmax_visitor : public ir_rvalue_enter_visitor {
  73. public:
  74.    ir_minmax_visitor()
  75.       : progress(false)
  76.    {
  77.    }
  78.  
  79.    ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange);
  80.  
  81.    void handle_rvalue(ir_rvalue **rvalue);
  82.  
  83.    bool progress;
  84. };
  85.  
  86. /*
  87.  * Returns LESS if all vector components of `a' are strictly lower than of `b',
  88.  * GREATER if all vector components of `a' are strictly greater than of `b',
  89.  * MIXED if some vector components of `a' are strictly lower than of `b' while
  90.  * others are strictly greater, or EQUAL otherwise.
  91.  */
  92. static enum compare_components_result
  93. compare_components(ir_constant *a, ir_constant *b)
  94. {
  95.    assert(a != NULL);
  96.    assert(b != NULL);
  97.  
  98.    assert(a->type->base_type == b->type->base_type);
  99.  
  100.    unsigned a_inc = a->type->is_scalar() ? 0 : 1;
  101.    unsigned b_inc = b->type->is_scalar() ? 0 : 1;
  102.    unsigned components = MAX2(a->type->components(), b->type->components());
  103.  
  104.    bool foundless = false;
  105.    bool foundgreater = false;
  106.    bool foundequal = false;
  107.  
  108.    for (unsigned i = 0, c0 = 0, c1 = 0;
  109.         i < components;
  110.         c0 += a_inc, c1 += b_inc, ++i) {
  111.       switch (a->type->base_type) {
  112.       case GLSL_TYPE_UINT:
  113.          if (a->value.u[c0] < b->value.u[c1])
  114.             foundless = true;
  115.          else if (a->value.u[c0] > b->value.u[c1])
  116.             foundgreater = true;
  117.          else
  118.             foundequal = true;
  119.          break;
  120.       case GLSL_TYPE_INT:
  121.          if (a->value.i[c0] < b->value.i[c1])
  122.             foundless = true;
  123.          else if (a->value.i[c0] > b->value.i[c1])
  124.             foundgreater = true;
  125.          else
  126.             foundequal = true;
  127.          break;
  128.       case GLSL_TYPE_FLOAT:
  129.          if (a->value.f[c0] < b->value.f[c1])
  130.             foundless = true;
  131.          else if (a->value.f[c0] > b->value.f[c1])
  132.             foundgreater = true;
  133.          else
  134.             foundequal = true;
  135.          break;
  136.       case GLSL_TYPE_DOUBLE:
  137.          if (a->value.d[c0] < b->value.d[c1])
  138.             foundless = true;
  139.          else if (a->value.d[c0] > b->value.d[c1])
  140.             foundgreater = true;
  141.          else
  142.             foundequal = true;
  143.          break;
  144.       default:
  145.          unreachable("not reached");
  146.       }
  147.    }
  148.  
  149.    if (foundless && foundgreater) {
  150.       /* Some components are strictly lower, others are strictly greater */
  151.       return MIXED;
  152.    }
  153.  
  154.    if (foundequal) {
  155.        /* It is not mixed, but it is not strictly lower or greater */
  156.       if (foundless)
  157.          return LESS_OR_EQUAL;
  158.       if (foundgreater)
  159.          return GREATER_OR_EQUAL;
  160.       return EQUAL;
  161.    }
  162.  
  163.    /* All components are strictly lower or strictly greater */
  164.    return foundless ? LESS : GREATER;
  165. }
  166.  
  167. static ir_constant *
  168. combine_constant(bool ismin, ir_constant *a, ir_constant *b)
  169. {
  170.    void *mem_ctx = ralloc_parent(a);
  171.    ir_constant *c = a->clone(mem_ctx, NULL);
  172.    for (unsigned i = 0; i < c->type->components(); i++) {
  173.       switch (c->type->base_type) {
  174.       case GLSL_TYPE_UINT:
  175.          if ((ismin && b->value.u[i] < c->value.u[i]) ||
  176.              (!ismin && b->value.u[i] > c->value.u[i]))
  177.             c->value.u[i] = b->value.u[i];
  178.          break;
  179.       case GLSL_TYPE_INT:
  180.          if ((ismin && b->value.i[i] < c->value.i[i]) ||
  181.              (!ismin && b->value.i[i] > c->value.i[i]))
  182.             c->value.i[i] = b->value.i[i];
  183.          break;
  184.       case GLSL_TYPE_FLOAT:
  185.          if ((ismin && b->value.f[i] < c->value.f[i]) ||
  186.              (!ismin && b->value.f[i] > c->value.f[i]))
  187.             c->value.f[i] = b->value.f[i];
  188.          break;
  189.       case GLSL_TYPE_DOUBLE:
  190.          if ((ismin && b->value.d[i] < c->value.d[i]) ||
  191.              (!ismin && b->value.d[i] > c->value.d[i]))
  192.             c->value.d[i] = b->value.d[i];
  193.          break;
  194.       default:
  195.          assert(!"not reached");
  196.       }
  197.    }
  198.    return c;
  199. }
  200.  
  201. static ir_constant *
  202. smaller_constant(ir_constant *a, ir_constant *b)
  203. {
  204.    assert(a != NULL);
  205.    assert(b != NULL);
  206.  
  207.    enum compare_components_result ret = compare_components(a, b);
  208.    if (ret == MIXED)
  209.       return combine_constant(true, a, b);
  210.    else if (ret < EQUAL)
  211.       return a;
  212.    else
  213.       return b;
  214. }
  215.  
  216. static ir_constant *
  217. larger_constant(ir_constant *a, ir_constant *b)
  218. {
  219.    assert(a != NULL);
  220.    assert(b != NULL);
  221.  
  222.    enum compare_components_result ret = compare_components(a, b);
  223.    if (ret == MIXED)
  224.       return combine_constant(false, a, b);
  225.    else if (ret < EQUAL)
  226.       return b;
  227.    else
  228.       return a;
  229. }
  230.  
  231. /* Combines two ranges by doing an element-wise min() / max() depending on the
  232.  * operation.
  233.  */
  234. static minmax_range
  235. combine_range(minmax_range r0, minmax_range r1, bool ismin)
  236. {
  237.    minmax_range ret;
  238.  
  239.    if (!r0.low) {
  240.       ret.low = ismin ? r0.low : r1.low;
  241.    } else if (!r1.low) {
  242.       ret.low = ismin ? r1.low : r0.low;
  243.    } else {
  244.       ret.low = ismin ? smaller_constant(r0.low, r1.low) :
  245.          larger_constant(r0.low, r1.low);
  246.    }
  247.  
  248.    if (!r0.high) {
  249.       ret.high = ismin ? r1.high : r0.high;
  250.    } else if (!r1.high) {
  251.       ret.high = ismin ? r0.high : r1.high;
  252.    } else {
  253.       ret.high = ismin ? smaller_constant(r0.high, r1.high) :
  254.          larger_constant(r0.high, r1.high);
  255.    }
  256.  
  257.    return ret;
  258. }
  259.  
  260. /* Returns a range so that lower limit is the larger of the two lower limits,
  261.  * and higher limit is the smaller of the two higher limits.
  262.  */
  263. static minmax_range
  264. range_intersection(minmax_range r0, minmax_range r1)
  265. {
  266.    minmax_range ret;
  267.  
  268.    if (!r0.low)
  269.       ret.low = r1.low;
  270.    else if (!r1.low)
  271.       ret.low = r0.low;
  272.    else
  273.       ret.low = larger_constant(r0.low, r1.low);
  274.  
  275.    if (!r0.high)
  276.       ret.high = r1.high;
  277.    else if (!r1.high)
  278.       ret.high = r0.high;
  279.    else
  280.       ret.high = smaller_constant(r0.high, r1.high);
  281.  
  282.    return ret;
  283. }
  284.  
  285. static minmax_range
  286. get_range(ir_rvalue *rval)
  287. {
  288.    ir_expression *expr = rval->as_expression();
  289.    if (expr && (expr->operation == ir_binop_min ||
  290.                 expr->operation == ir_binop_max)) {
  291.       minmax_range r0 = get_range(expr->operands[0]);
  292.       minmax_range r1 = get_range(expr->operands[1]);
  293.       return combine_range(r0, r1, expr->operation == ir_binop_min);
  294.    }
  295.  
  296.    ir_constant *c = rval->as_constant();
  297.    if (c) {
  298.       return minmax_range(c, c);
  299.    }
  300.  
  301.    return minmax_range();
  302. }
  303.  
  304. /**
  305.  * Prunes a min/max expression considering the base range of the parent
  306.  * min/max expression.
  307.  *
  308.  * @param baserange the range that the parents of this min/max expression
  309.  * in the min/max tree will clamp its value to.
  310.  */
  311. ir_rvalue *
  312. ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
  313. {
  314.    assert(expr->operation == ir_binop_min ||
  315.           expr->operation == ir_binop_max);
  316.  
  317.    bool ismin = expr->operation == ir_binop_min;
  318.    minmax_range limits[2];
  319.  
  320.    /* Recurse to get the ranges for each of the subtrees of this
  321.     * expression. We need to do this as a separate step because we need to
  322.     * know the ranges of each of the subtrees before we prune either one.
  323.     * Consider something like this:
  324.     *
  325.     *        max
  326.     *     /       \
  327.     *    max     max
  328.     *   /   \   /   \
  329.     *  3    a   b    2
  330.     *
  331.     * We would like to prune away the max on the bottom-right, but to do so
  332.     * we need to know the range of the expression on the left beforehand,
  333.     * and there's no guarantee that we will visit either subtree in a
  334.     * particular order.
  335.     */
  336.    for (unsigned i = 0; i < 2; ++i)
  337.       limits[i] = get_range(expr->operands[i]);
  338.  
  339.    for (unsigned i = 0; i < 2; ++i) {
  340.       bool is_redundant = false;
  341.  
  342.       enum compare_components_result cr = LESS;
  343.       if (ismin) {
  344.          /* If this operand will always be greater than the other one, it's
  345.           * redundant.
  346.           */
  347.          if (limits[i].low && limits[1 - i].high) {
  348.                cr = compare_components(limits[i].low, limits[1 - i].high);
  349.             if (cr >= EQUAL && cr != MIXED)
  350.                is_redundant = true;
  351.          }
  352.          /* If this operand is always greater than baserange, then even if
  353.           * it's smaller than the other one it'll get clamped, so it's
  354.           * redundant.
  355.           */
  356.          if (!is_redundant && limits[i].low && baserange.high) {
  357.             cr = compare_components(limits[i].low, baserange.high);
  358.             if (cr >= EQUAL && cr != MIXED)
  359.                is_redundant = true;
  360.          }
  361.       } else {
  362.          /* If this operand will always be lower than the other one, it's
  363.           * redundant.
  364.           */
  365.          if (limits[i].high && limits[1 - i].low) {
  366.             cr = compare_components(limits[i].high, limits[1 - i].low);
  367.             if (cr <= EQUAL)
  368.                is_redundant = true;
  369.          }
  370.          /* If this operand is always lower than baserange, then even if
  371.           * it's greater than the other one it'll get clamped, so it's
  372.           * redundant.
  373.           */
  374.          if (!is_redundant && limits[i].high && baserange.low) {
  375.             cr = compare_components(limits[i].high, baserange.low);
  376.             if (cr <= EQUAL)
  377.                is_redundant = true;
  378.          }
  379.       }
  380.  
  381.       if (is_redundant) {
  382.          progress = true;
  383.  
  384.          /* Recurse if necessary. */
  385.          ir_expression *op_expr = expr->operands[1 - i]->as_expression();
  386.          if (op_expr && (op_expr->operation == ir_binop_min ||
  387.                          op_expr->operation == ir_binop_max)) {
  388.             return prune_expression(op_expr, baserange);
  389.          }
  390.  
  391.          return expr->operands[1 - i];
  392.       } else if (cr == MIXED) {
  393.          /* If we have mixed vector operands, we can try to resolve the minmax
  394.           * expression by doing a component-wise minmax:
  395.           *
  396.           *             min                          min
  397.           *           /    \                       /    \
  398.           *         min     a       ===>        [1,1]    a
  399.           *       /    \
  400.           *    [1,3]   [3,1]
  401.           *
  402.           */
  403.          ir_constant *a = expr->operands[0]->as_constant();
  404.          ir_constant *b = expr->operands[1]->as_constant();
  405.          if (a && b)
  406.             return combine_constant(ismin, a, b);
  407.       }
  408.    }
  409.  
  410.    /* Now recurse to operands giving them the proper baserange. The baserange
  411.     * to pass is the intersection of our baserange and the other operand's
  412.     * limit with one of the ranges unlimited. If we can't compute a valid
  413.     * intersection, we use the current baserange.
  414.     */
  415.    for (unsigned i = 0; i < 2; ++i) {
  416.       ir_expression *op_expr = expr->operands[i]->as_expression();
  417.       if (op_expr && (op_expr->operation == ir_binop_min ||
  418.                       op_expr->operation == ir_binop_max)) {
  419.          /* We can only compute a new baserange for this operand if we managed
  420.           * to compute a valid range for the other operand.
  421.           */
  422.          if (ismin)
  423.             limits[1 - i].low = NULL;
  424.          else
  425.             limits[1 - i].high = NULL;
  426.          minmax_range base = range_intersection(limits[1 - i], baserange);
  427.          expr->operands[i] = prune_expression(op_expr, base);
  428.       }
  429.    }
  430.  
  431.    /* If we got here we could not discard any of the operands of the minmax
  432.     * expression, but we can still try to resolve the expression if both
  433.     * operands are constant. We do this after the loop above, to make sure
  434.     * that if our operands are minmax expressions we have tried to prune them
  435.     * first (hopefully reducing them to constants).
  436.     */
  437.    ir_constant *a = expr->operands[0]->as_constant();
  438.    ir_constant *b = expr->operands[1]->as_constant();
  439.    if (a && b)
  440.       return combine_constant(ismin, a, b);
  441.  
  442.    return expr;
  443. }
  444.  
  445. static ir_rvalue *
  446. swizzle_if_required(ir_expression *expr, ir_rvalue *rval)
  447. {
  448.    if (expr->type->is_vector() && rval->type->is_scalar()) {
  449.       return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements);
  450.    } else {
  451.       return rval;
  452.    }
  453. }
  454.  
  455. void
  456. ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue)
  457. {
  458.    if (!*rvalue)
  459.       return;
  460.  
  461.    ir_expression *expr = (*rvalue)->as_expression();
  462.    if (!expr || (expr->operation != ir_binop_min &&
  463.                  expr->operation != ir_binop_max))
  464.       return;
  465.  
  466.    ir_rvalue *new_rvalue = prune_expression(expr, minmax_range());
  467.    if (new_rvalue == *rvalue)
  468.       return;
  469.  
  470.    /* If the expression type is a vector and the optimization leaves a scalar
  471.     * as the result, we need to turn it into a vector.
  472.     */
  473.    *rvalue = swizzle_if_required(expr, new_rvalue);
  474.  
  475.    progress = true;
  476. }
  477.  
  478. }
  479.  
  480. bool
  481. do_minmax_prune(exec_list *instructions)
  482. {
  483.    ir_minmax_visitor v;
  484.  
  485.    visit_list_elements(&v, instructions);
  486.  
  487.    return v.progress;
  488. }
  489.