Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "glsl_types.h"
  25. #include "loop_analysis.h"
  26. #include "ir_hierarchical_visitor.h"
  27.  
  28. #include "main/mtypes.h"
  29.  
  30. namespace {
  31.  
  32. class loop_unroll_visitor : public ir_hierarchical_visitor {
  33. public:
  34.    loop_unroll_visitor(loop_state *state,
  35.                        const struct gl_shader_compiler_options *options)
  36.    {
  37.       this->state = state;
  38.       this->progress = false;
  39.       this->options = options;
  40.    }
  41.  
  42.    virtual ir_visitor_status visit_leave(ir_loop *ir);
  43.    void simple_unroll(ir_loop *ir, int iterations);
  44.    void complex_unroll(ir_loop *ir, int iterations,
  45.                        bool continue_from_then_branch);
  46.    void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
  47.  
  48.    loop_state *state;
  49.  
  50.    bool progress;
  51.    const struct gl_shader_compiler_options *options;
  52. };
  53.  
  54. } /* anonymous namespace */
  55.  
  56. static bool
  57. is_break(ir_instruction *ir)
  58. {
  59.    return ir != NULL && ir->ir_type == ir_type_loop_jump
  60.                      && ((ir_loop_jump *) ir)->is_break();
  61. }
  62.  
  63. class loop_unroll_count : public ir_hierarchical_visitor {
  64. public:
  65.    int nodes;
  66.    bool unsupported_variable_indexing;
  67.    bool array_indexed_by_induction_var_with_exact_iterations;
  68.    /* If there are nested loops, the node count will be inaccurate. */
  69.    bool nested_loop;
  70.  
  71.    loop_unroll_count(exec_list *list, loop_variable_state *ls,
  72.                      const struct gl_shader_compiler_options *options)
  73.       : ls(ls), options(options)
  74.    {
  75.       nodes = 0;
  76.       nested_loop = false;
  77.       unsupported_variable_indexing = false;
  78.       array_indexed_by_induction_var_with_exact_iterations = false;
  79.  
  80.       run(list);
  81.    }
  82.  
  83.    virtual ir_visitor_status visit_enter(ir_assignment *)
  84.    {
  85.       nodes++;
  86.       return visit_continue;
  87.    }
  88.  
  89.    virtual ir_visitor_status visit_enter(ir_expression *)
  90.    {
  91.       nodes++;
  92.       return visit_continue;
  93.    }
  94.  
  95.    virtual ir_visitor_status visit_enter(ir_loop *)
  96.    {
  97.       nested_loop = true;
  98.       return visit_continue;
  99.    }
  100.  
  101.    virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
  102.    {
  103.       /* Check for arrays variably-indexed by a loop induction variable.
  104.        * Unrolling the loop may convert that access into constant-indexing.
  105.        *
  106.        * Many drivers don't support particular kinds of variable indexing,
  107.        * and have to resort to using lower_variable_index_to_cond_assign to
  108.        * handle it.  This results in huge amounts of horrible code, so we'd
  109.        * like to avoid that if possible.  Here, we just note that it will
  110.        * happen.
  111.        */
  112.       if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
  113.           !ir->array_index->as_constant()) {
  114.          ir_variable *array = ir->array->variable_referenced();
  115.          loop_variable *lv = ls->get(ir->array_index->variable_referenced());
  116.          if (array && lv && lv->is_induction_var()) {
  117.             /* If an array is indexed by a loop induction variable, and the
  118.              * array size is exactly the number of loop iterations, this is
  119.              * probably a simple for-loop trying to access each element in
  120.              * turn; the application may expect it to be unrolled.
  121.              */
  122.             if (int(array->type->length) == ls->limiting_terminator->iterations)
  123.                array_indexed_by_induction_var_with_exact_iterations = true;
  124.  
  125.             switch (array->data.mode) {
  126.             case ir_var_auto:
  127.             case ir_var_temporary:
  128.             case ir_var_const_in:
  129.             case ir_var_function_in:
  130.             case ir_var_function_out:
  131.             case ir_var_function_inout:
  132.                if (options->EmitNoIndirectTemp)
  133.                   unsupported_variable_indexing = true;
  134.                break;
  135.             case ir_var_uniform:
  136.                if (options->EmitNoIndirectUniform)
  137.                   unsupported_variable_indexing = true;
  138.                break;
  139.             case ir_var_shader_in:
  140.                if (options->EmitNoIndirectInput)
  141.                   unsupported_variable_indexing = true;
  142.                break;
  143.             case ir_var_shader_out:
  144.                if (options->EmitNoIndirectOutput)
  145.                   unsupported_variable_indexing = true;
  146.                break;
  147.             }
  148.          }
  149.       }
  150.       return visit_continue;
  151.    }
  152.  
  153. private:
  154.    loop_variable_state *ls;
  155.    const struct gl_shader_compiler_options *options;
  156. };
  157.  
  158.  
  159. /**
  160.  * Unroll a loop which does not contain any jumps.  For example, if the input
  161.  * is:
  162.  *
  163.  *     (loop (...) ...instrs...)
  164.  *
  165.  * And the iteration count is 3, the output will be:
  166.  *
  167.  *     ...instrs... ...instrs... ...instrs...
  168.  */
  169. void
  170. loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
  171. {
  172.    void *const mem_ctx = ralloc_parent(ir);
  173.  
  174.    for (int i = 0; i < iterations; i++) {
  175.       exec_list copy_list;
  176.  
  177.       copy_list.make_empty();
  178.       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
  179.  
  180.       ir->insert_before(&copy_list);
  181.    }
  182.  
  183.    /* The loop has been replaced by the unrolled copies.  Remove the original
  184.     * loop from the IR sequence.
  185.     */
  186.    ir->remove();
  187.  
  188.    this->progress = true;
  189. }
  190.  
  191.  
  192. /**
  193.  * Unroll a loop whose last statement is an ir_if.  If \c
  194.  * continue_from_then_branch is true, the loop is repeated only when the
  195.  * "then" branch of the if is taken; otherwise it is repeated only when the
  196.  * "else" branch of the if is taken.
  197.  *
  198.  * For example, if the input is:
  199.  *
  200.  *     (loop (...)
  201.  *      ...body...
  202.  *      (if (cond)
  203.  *          (...then_instrs...)
  204.  *        (...else_instrs...)))
  205.  *
  206.  * And the iteration count is 3, and \c continue_from_then_branch is true,
  207.  * then the output will be:
  208.  *
  209.  *     ...body...
  210.  *     (if (cond)
  211.  *         (...then_instrs...
  212.  *          ...body...
  213.  *          (if (cond)
  214.  *              (...then_instrs...
  215.  *               ...body...
  216.  *               (if (cond)
  217.  *                   (...then_instrs...)
  218.  *                 (...else_instrs...)))
  219.  *            (...else_instrs...)))
  220.  *       (...else_instrs))
  221.  */
  222. void
  223. loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
  224.                                     bool continue_from_then_branch)
  225. {
  226.    void *const mem_ctx = ralloc_parent(ir);
  227.    ir_instruction *ir_to_replace = ir;
  228.  
  229.    for (int i = 0; i < iterations; i++) {
  230.       exec_list copy_list;
  231.  
  232.       copy_list.make_empty();
  233.       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
  234.  
  235.       ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
  236.       assert(ir_if != NULL);
  237.  
  238.       ir_to_replace->insert_before(&copy_list);
  239.       ir_to_replace->remove();
  240.  
  241.       /* placeholder that will be removed in the next iteration */
  242.       ir_to_replace =
  243.          new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
  244.  
  245.       exec_list *const list = (continue_from_then_branch)
  246.          ? &ir_if->then_instructions : &ir_if->else_instructions;
  247.  
  248.       list->push_tail(ir_to_replace);
  249.    }
  250.  
  251.    ir_to_replace->remove();
  252.  
  253.    this->progress = true;
  254. }
  255.  
  256.  
  257. /**
  258.  * Move all of the instructions which follow \c ir_if to the end of
  259.  * \c splice_dest.
  260.  *
  261.  * For example, in the code snippet:
  262.  *
  263.  *     (if (cond)
  264.  *         (...then_instructions...
  265.  *          break)
  266.  *       (...else_instructions...))
  267.  *     ...post_if_instructions...
  268.  *
  269.  * If \c ir_if points to the "if" instruction, and \c splice_dest points to
  270.  * (...else_instructions...), the code snippet is transformed into:
  271.  *
  272.  *     (if (cond)
  273.  *         (...then_instructions...
  274.  *          break)
  275.  *       (...else_instructions...
  276.  *        ...post_if_instructions...))
  277.  */
  278. void
  279. loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
  280.                                                  exec_list *splice_dest)
  281. {
  282.    while (!ir_if->get_next()->is_tail_sentinel()) {
  283.       ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
  284.  
  285.       move_ir->remove();
  286.       splice_dest->push_tail(move_ir);
  287.    }
  288. }
  289.  
  290.  
  291. ir_visitor_status
  292. loop_unroll_visitor::visit_leave(ir_loop *ir)
  293. {
  294.    loop_variable_state *const ls = this->state->get(ir);
  295.    int iterations;
  296.  
  297.    /* If we've entered a loop that hasn't been analyzed, something really,
  298.     * really bad has happened.
  299.     */
  300.    if (ls == NULL) {
  301.       assert(ls != NULL);
  302.       return visit_continue;
  303.    }
  304.  
  305.    /* Don't try to unroll loops where the number of iterations is not known
  306.     * at compile-time.
  307.     */
  308.    if (ls->limiting_terminator == NULL)
  309.       return visit_continue;
  310.  
  311.    iterations = ls->limiting_terminator->iterations;
  312.  
  313.    const int max_iterations = options->MaxUnrollIterations;
  314.  
  315.    /* Don't try to unroll loops that have zillions of iterations either.
  316.     */
  317.    if (iterations > max_iterations)
  318.       return visit_continue;
  319.  
  320.    /* Don't try to unroll nested loops and loops with a huge body.
  321.     */
  322.    loop_unroll_count count(&ir->body_instructions, ls, options);
  323.  
  324.    bool loop_too_large =
  325.       count.nested_loop || count.nodes * iterations > max_iterations * 5;
  326.  
  327.    if (loop_too_large && !count.unsupported_variable_indexing &&
  328.        !count.array_indexed_by_induction_var_with_exact_iterations)
  329.       return visit_continue;
  330.  
  331.    /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
  332.     * We'll be removing the limiting terminator before we unroll.
  333.     */
  334.    assert(ls->num_loop_jumps > 0);
  335.    unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
  336.  
  337.    if (predicted_num_loop_jumps > 1)
  338.       return visit_continue;
  339.  
  340.    if (predicted_num_loop_jumps == 0) {
  341.       ls->limiting_terminator->ir->remove();
  342.       simple_unroll(ir, iterations);
  343.       return visit_continue;
  344.    }
  345.  
  346.    ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
  347.    assert(last_ir != NULL);
  348.  
  349.    if (is_break(last_ir)) {
  350.       /* If the only loop-jump is a break at the end of the loop, the loop
  351.        * will execute exactly once.  Remove the break and use the simple
  352.        * unroller with an iteration count of 1.
  353.        */
  354.       last_ir->remove();
  355.  
  356.       ls->limiting_terminator->ir->remove();
  357.       simple_unroll(ir, 1);
  358.       return visit_continue;
  359.    }
  360.  
  361.    /* recognize loops in the form produced by ir_lower_jumps */
  362.    foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) {
  363.       /* Skip the limiting terminator, since it will go away when we
  364.        * unroll.
  365.        */
  366.       if (cur_ir == ls->limiting_terminator->ir)
  367.          continue;
  368.  
  369.       ir_if *ir_if = cur_ir->as_if();
  370.       if (ir_if != NULL) {
  371.          /* Determine which if-statement branch, if any, ends with a
  372.           * break.  The branch that did *not* have the break will get a
  373.           * temporary continue inserted in each iteration of the loop
  374.           * unroll.
  375.           *
  376.           * Note that since ls->num_loop_jumps is <= 1, it is impossible
  377.           * for both branches to end with a break.
  378.           */
  379.          ir_instruction *ir_if_last =
  380.             (ir_instruction *) ir_if->then_instructions.get_tail();
  381.  
  382.          if (is_break(ir_if_last)) {
  383.             ls->limiting_terminator->ir->remove();
  384.             splice_post_if_instructions(ir_if, &ir_if->else_instructions);
  385.             ir_if_last->remove();
  386.             complex_unroll(ir, iterations, false);
  387.             return visit_continue;
  388.          } else {
  389.             ir_if_last =
  390.                (ir_instruction *) ir_if->else_instructions.get_tail();
  391.  
  392.             if (is_break(ir_if_last)) {
  393.                ls->limiting_terminator->ir->remove();
  394.                splice_post_if_instructions(ir_if, &ir_if->then_instructions);
  395.                ir_if_last->remove();
  396.                complex_unroll(ir, iterations, true);
  397.                return visit_continue;
  398.             }
  399.          }
  400.       }
  401.    }
  402.  
  403.    /* Did not find the break statement.  It must be in a complex if-nesting,
  404.     * so don't try to unroll.
  405.     */
  406.    return visit_continue;
  407. }
  408.  
  409.  
  410. bool
  411. unroll_loops(exec_list *instructions, loop_state *ls,
  412.              const struct gl_shader_compiler_options *options)
  413. {
  414.    loop_unroll_visitor v(ls, options);
  415.  
  416.    v.run(instructions);
  417.  
  418.    return v.progress;
  419. }
  420.