Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file opt_rebalance_tree.cpp
  26.  *
  27.  * Rebalances a reduction expression tree.
  28.  *
  29.  * For reduction operations (e.g., x + y + z + w) we generate an expression
  30.  * tree like
  31.  *
  32.  *        +
  33.  *       / \
  34.  *      +   w
  35.  *     / \
  36.  *    +   z
  37.  *   / \
  38.  *  x   y
  39.  *
  40.  * which we can rebalance into
  41.  *
  42.  *       +
  43.  *      / \
  44.  *     /   \
  45.  *    +     +
  46.  *   / \   / \
  47.  *  x   y z   w
  48.  *
  49.  * to get a better instruction scheduling.
  50.  *
  51.  * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
  52.  * and Bette L. Warren.
  53.  *
  54.  * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
  55.  * explanation of the of the tree_to_vine() (rightward rotation) and
  56.  * vine_to_tree() (leftward rotation) algorithms.
  57.  */
  58.  
  59. #include "ir.h"
  60. #include "ir_visitor.h"
  61. #include "ir_rvalue_visitor.h"
  62. #include "ir_optimization.h"
  63. #include "main/macros.h" /* for MAX2 */
  64.  
  65. /* The DSW algorithm generates a degenerate tree (really, a linked list) in
  66.  * tree_to_vine(). We'd rather not leave a binary expression with only one
  67.  * operand, so trivial modifications (the ternary operators below) are needed
  68.  * to ensure that we only rotate around the ir_expression nodes of the tree.
  69.  */
  70. static unsigned
  71. tree_to_vine(ir_expression *root)
  72. {
  73.    unsigned size = 0;
  74.    ir_rvalue *vine_tail = root;
  75.    ir_rvalue *remainder = root->operands[1];
  76.  
  77.    while (remainder != NULL) {
  78.       ir_expression *remainder_temp = remainder->as_expression();
  79.       ir_expression *remainder_left = remainder_temp ?
  80.          remainder_temp->operands[0]->as_expression() : NULL;
  81.  
  82.       if (remainder_left == NULL) {
  83.          /* move vine_tail down one */
  84.          vine_tail = remainder;
  85.          remainder = remainder->as_expression() ?
  86.             ((ir_expression *)remainder)->operands[1] : NULL;
  87.          size++;
  88.       } else {
  89.          /* rotate */
  90.          ir_expression *tempptr = remainder_left;
  91.          ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
  92.          tempptr->operands[1] = remainder;
  93.          remainder = tempptr;
  94.          ((ir_expression *)vine_tail)->operands[1] = tempptr;
  95.       }
  96.    }
  97.  
  98.    return size;
  99. }
  100.  
  101. static void
  102. compression(ir_expression *root, unsigned count)
  103. {
  104.    ir_expression *scanner = root;
  105.  
  106.    for (unsigned i = 0; i < count; i++) {
  107.       ir_expression *child = (ir_expression *)scanner->operands[1];
  108.       scanner->operands[1] = child->operands[1];
  109.       scanner = (ir_expression *)scanner->operands[1];
  110.       child->operands[1] = scanner->operands[0];
  111.       scanner->operands[0] = child;
  112.    }
  113. }
  114.  
  115. static void
  116. vine_to_tree(ir_expression *root, unsigned size)
  117. {
  118.    int n = size - 1;
  119.    for (int m = n / 2; m > 0; m = n / 2) {
  120.       compression(root, m);
  121.       n -= m + 1;
  122.    }
  123. }
  124.  
  125. namespace {
  126.  
  127. class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
  128. public:
  129.    ir_rebalance_visitor()
  130.    {
  131.       progress = false;
  132.    }
  133.  
  134.    void handle_rvalue(ir_rvalue **rvalue);
  135.  
  136.    bool progress;
  137. };
  138.  
  139. struct is_reduction_data {
  140.    ir_expression_operation operation;
  141.    const glsl_type *type;
  142.    unsigned num_expr;
  143.    bool is_reduction;
  144.    bool contains_constant;
  145. };
  146.  
  147. } /* anonymous namespace */
  148.  
  149. static bool
  150. is_reduction_operation(ir_expression_operation operation)
  151. {
  152.    switch (operation) {
  153.    case ir_binop_add:
  154.    case ir_binop_mul:
  155.    case ir_binop_bit_and:
  156.    case ir_binop_bit_xor:
  157.    case ir_binop_bit_or:
  158.    case ir_binop_logic_and:
  159.    case ir_binop_logic_xor:
  160.    case ir_binop_logic_or:
  161.    case ir_binop_min:
  162.    case ir_binop_max:
  163.       return true;
  164.    default:
  165.       return false;
  166.    }
  167. }
  168.  
  169. /* Note that this function does not attempt to recognize that reduction trees
  170.  * are already balanced.
  171.  *
  172.  * We return false from this function for a number of reasons other than an
  173.  * expression tree not being a mathematical reduction. Namely,
  174.  *
  175.  *    - if the tree contains multiple constants that we may be able to combine.
  176.  *    - if the tree contains matrices:
  177.  *       - they might contain vec4's with many constant components that we can
  178.  *         simplify after splitting.
  179.  *       - applying the matrix chain ordering optimization is more than just
  180.  *         balancing an expression tree.
  181.  *    - if the tree contains operations on multiple types.
  182.  *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
  183.  *      would trick the visiting pass.
  184.  */
  185. static void
  186. is_reduction(ir_instruction *ir, void *data)
  187. {
  188.    struct is_reduction_data *ird = (struct is_reduction_data *)data;
  189.    if (!ird->is_reduction)
  190.       return;
  191.  
  192.    /* We don't want to balance a tree that contains multiple constants, since
  193.     * we'll be able to constant fold them if they're not in separate subtrees.
  194.     */
  195.    if (ir->as_constant()) {
  196.       if (ird->contains_constant) {
  197.          ird->is_reduction = false;
  198.       }
  199.       ird->contains_constant = true;
  200.       return;
  201.    }
  202.  
  203.    /* Array/record dereferences have subtrees that are not part of the expr
  204.     * tree we're balancing. Skip trees containing them.
  205.     */
  206.    if (ir->ir_type == ir_type_dereference_array ||
  207.        ir->ir_type == ir_type_dereference_record) {
  208.       ird->is_reduction = false;
  209.       return;
  210.    }
  211.  
  212.    ir_expression *expr = ir->as_expression();
  213.    if (!expr)
  214.       return;
  215.  
  216.    /* Non-constant matrices might still contain constant vec4 that we can
  217.     * constant fold once split up. Handling matrices will need some more
  218.     * work.
  219.     */
  220.    if (expr->type->is_matrix() ||
  221.        expr->operands[0]->type->is_matrix() ||
  222.        (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
  223.       ird->is_reduction = false;
  224.       return;
  225.    }
  226.  
  227.    if (ird->type != NULL && ird->type != expr->type) {
  228.       ird->is_reduction = false;
  229.       return;
  230.    }
  231.    ird->type = expr->type;
  232.  
  233.    ird->num_expr++;
  234.    if (is_reduction_operation(expr->operation)) {
  235.       if (ird->operation != 0 && ird->operation != expr->operation)
  236.          ird->is_reduction = false;
  237.       ird->operation = expr->operation;
  238.    } else {
  239.       ird->is_reduction = false;
  240.    }
  241. }
  242.  
  243. static ir_rvalue *
  244. handle_expression(ir_expression *expr)
  245. {
  246.    struct is_reduction_data ird;
  247.    ird.operation = (ir_expression_operation)0;
  248.    ird.type = NULL;
  249.    ird.num_expr = 0;
  250.    ird.is_reduction = true;
  251.    ird.contains_constant = false;
  252.  
  253.    visit_tree(expr, is_reduction, (void *)&ird);
  254.  
  255.    if (ird.is_reduction && ird.num_expr > 2) {
  256.       ir_constant z = ir_constant(0.0f);
  257.       ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
  258.  
  259.       unsigned size = tree_to_vine(&pseudo_root);
  260.       vine_to_tree(&pseudo_root, size);
  261.  
  262.       expr = (ir_expression *)pseudo_root.operands[1];
  263.    }
  264.    return expr;
  265. }
  266.  
  267. static void
  268. update_types(ir_instruction *ir, void *)
  269. {
  270.    ir_expression *expr = ir->as_expression();
  271.    if (!expr)
  272.       return;
  273.  
  274.    const glsl_type *const new_type =
  275.       glsl_type::get_instance(expr->type->base_type,
  276.                               MAX2(expr->operands[0]->type->vector_elements,
  277.                                    expr->operands[1]->type->vector_elements),
  278.                               1);
  279.    assert(new_type != glsl_type::error_type);
  280.    expr->type = new_type;
  281. }
  282.  
  283. void
  284. ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
  285. {
  286.    if (!*rvalue)
  287.       return;
  288.  
  289.    ir_expression *expr = (*rvalue)->as_expression();
  290.    if (!expr || !is_reduction_operation(expr->operation))
  291.       return;
  292.  
  293.    ir_rvalue *new_rvalue = handle_expression(expr);
  294.  
  295.    /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
  296.     * or some other set of cases) new_rvalue will point to the same root as
  297.     * before.
  298.     *
  299.     * Similarly, if the tree rooted at *rvalue was a reduction and was already
  300.     * balanced, the algorithm will rearrange the tree but will ultimately
  301.     * return an identical tree, so this check will handle that as well and
  302.     * will not set progress = true.
  303.     */
  304.    if (new_rvalue == *rvalue)
  305.       return;
  306.  
  307.    visit_tree(new_rvalue, NULL, NULL, update_types);
  308.  
  309.    *rvalue = new_rvalue;
  310.    this->progress = true;
  311. }
  312.  
  313. bool
  314. do_rebalance_tree(exec_list *instructions)
  315. {
  316.    ir_rebalance_visitor v;
  317.  
  318.    v.run(instructions);
  319.  
  320.    return v.progress;
  321. }
  322.