Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2010 Luca Barbieri
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file lower_jumps.cpp
  26.  *
  27.  * This pass lowers jumps (break, continue, and return) to if/else structures.
  28.  *
  29.  * It can be asked to:
  30.  * 1. Pull jumps out of ifs where possible
  31.  * 2. Remove all "continue"s, replacing them with an "execute flag"
  32.  * 3. Replace all "break" with a single conditional one at the end of the loop
  33.  * 4. Replace all "return"s with a single return at the end of the function,
  34.  *    for the main function and/or other functions
  35.  *
  36.  * Applying this pass gives several benefits:
  37.  * 1. All functions can be inlined.
  38.  * 2. nv40 and other pre-DX10 chips without "continue" can be supported
  39.  * 3. nv30 and other pre-DX10 chips with no control flow at all are better
  40.  *    supported
  41.  *
  42.  * Continues are lowered by adding a per-loop "execute flag", initialized to
  43.  * true, that when cleared inhibits all execution until the end of the loop.
  44.  *
  45.  * Breaks are lowered to continues, plus setting a "break flag" that is checked
  46.  * at the end of the loop, and trigger the unique "break".
  47.  *
  48.  * Returns are lowered to breaks/continues, plus adding a "return flag" that
  49.  * causes loops to break again out of their enclosing loops until all the
  50.  * loops are exited: then the "execute flag" logic will ignore everything
  51.  * until the end of the function.
  52.  *
  53.  * Note that "continue" and "return" can also be implemented by adding
  54.  * a dummy loop and using break.
  55.  * However, this is bad for hardware with limited nesting depth, and
  56.  * prevents further optimization, and thus is not currently performed.
  57.  */
  58.  
  59. #include "glsl_types.h"
  60. #include <string.h>
  61. #include "ir.h"
  62.  
  63. enum jump_strength
  64. {
  65.    strength_none,
  66.    strength_always_clears_execute_flag,
  67.    strength_continue,
  68.    strength_break,
  69.    strength_return,
  70. };
  71.  
  72. struct block_record
  73. {
  74.    /* minimum jump strength (of lowered IR, not pre-lowering IR)
  75.     *
  76.     * If the block ends with a jump, must be the strength of the jump.
  77.     * Otherwise, the jump would be dead and have been deleted before)
  78.     *
  79.     * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump
  80.     * (e.g. an if with a return in one branch, and a break in the other, while not lowering them)
  81.     * Note that identical jumps are usually unified though.
  82.     */
  83.    jump_strength min_strength;
  84.  
  85.    /* can anything clear the execute flag? */
  86.    bool may_clear_execute_flag;
  87.  
  88.    block_record()
  89.    {
  90.       this->min_strength = strength_none;
  91.       this->may_clear_execute_flag = false;
  92.    }
  93. };
  94.  
  95. struct loop_record
  96. {
  97.    ir_function_signature* signature;
  98.    ir_loop* loop;
  99.  
  100.    /* used to avoid lowering the break used to represent lowered breaks */
  101.    unsigned nesting_depth;
  102.    bool in_if_at_the_end_of_the_loop;
  103.  
  104.    bool may_set_return_flag;
  105.  
  106.    ir_variable* break_flag;
  107.    ir_variable* execute_flag; /* cleared to emulate continue */
  108.  
  109.    loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0)
  110.    {
  111.       this->signature = p_signature;
  112.       this->loop = p_loop;
  113.       this->nesting_depth = 0;
  114.       this->in_if_at_the_end_of_the_loop = false;
  115.       this->may_set_return_flag = false;
  116.       this->break_flag = 0;
  117.       this->execute_flag = 0;
  118.    }
  119.  
  120.    ir_variable* get_execute_flag()
  121.    {
  122.       /* also supported for the "function loop" */
  123.       if(!this->execute_flag) {
  124.          exec_list& list = this->loop ? this->loop->body_instructions : signature->body;
  125.          this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary);
  126.          list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0));
  127.          list.push_head(this->execute_flag);
  128.       }
  129.       return this->execute_flag;
  130.    }
  131.  
  132.    ir_variable* get_break_flag()
  133.    {
  134.       assert(this->loop);
  135.       if(!this->break_flag) {
  136.          this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary);
  137.          this->loop->insert_before(this->break_flag);
  138.          this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0));
  139.       }
  140.       return this->break_flag;
  141.    }
  142. };
  143.  
  144. struct function_record
  145. {
  146.    ir_function_signature* signature;
  147.    ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */
  148.    ir_variable* return_value;
  149.    bool is_main;
  150.    unsigned nesting_depth;
  151.  
  152.    function_record(ir_function_signature* p_signature = 0)
  153.    {
  154.       this->signature = p_signature;
  155.       this->return_flag = 0;
  156.       this->return_value = 0;
  157.       this->nesting_depth = 0;
  158.       this->is_main = this->signature && (strcmp(this->signature->function_name(), "main") == 0);
  159.    }
  160.  
  161.    ir_variable* get_return_flag()
  162.    {
  163.       if(!this->return_flag) {
  164.          this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary);
  165.          this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0));
  166.          this->signature->body.push_head(this->return_flag);
  167.       }
  168.       return this->return_flag;
  169.    }
  170.  
  171.    ir_variable* get_return_value()
  172.    {
  173.       if(!this->return_value) {
  174.          assert(!this->signature->return_type->is_void());
  175.          return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary);
  176.          this->signature->body.push_head(this->return_value);
  177.       }
  178.       return this->return_value;
  179.    }
  180. };
  181.  
  182. struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
  183.    bool progress;
  184.  
  185.    struct function_record function;
  186.    struct loop_record loop;
  187.    struct block_record block;
  188.  
  189.    bool pull_out_jumps;
  190.    bool lower_continue;
  191.    bool lower_break;
  192.    bool lower_sub_return;
  193.    bool lower_main_return;
  194.  
  195.    ir_lower_jumps_visitor()
  196.    {
  197.       this->progress = false;
  198.    }
  199.  
  200.    void truncate_after_instruction(exec_node *ir)
  201.    {
  202.       if (!ir)
  203.          return;
  204.  
  205.       while (!ir->get_next()->is_tail_sentinel()) {
  206.          ((ir_instruction *)ir->get_next())->remove();
  207.          this->progress = true;
  208.       }
  209.    }
  210.  
  211.    void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block)
  212.    {
  213.       while (!ir->get_next()->is_tail_sentinel()) {
  214.          ir_instruction *move_ir = (ir_instruction *)ir->get_next();
  215.  
  216.          move_ir->remove();
  217.          inner_block->push_tail(move_ir);
  218.       }
  219.    }
  220.  
  221.    virtual void visit(class ir_loop_jump * ir)
  222.    {
  223.       truncate_after_instruction(ir);
  224.       this->block.min_strength = ir->is_break() ? strength_break : strength_continue;
  225.    }
  226.  
  227.    virtual void visit(class ir_return * ir)
  228.    {
  229.       truncate_after_instruction(ir);
  230.       this->block.min_strength = strength_return;
  231.    }
  232.  
  233.    virtual void visit(class ir_discard * ir)
  234.    {
  235.    }
  236.  
  237.    enum jump_strength get_jump_strength(ir_instruction* ir)
  238.    {
  239.       if(!ir)
  240.          return strength_none;
  241.       else if(ir->ir_type == ir_type_loop_jump) {
  242.          if(((ir_loop_jump*)ir)->is_break())
  243.             return strength_break;
  244.          else
  245.             return strength_continue;
  246.       } else if(ir->ir_type == ir_type_return)
  247.          return strength_return;
  248.       else
  249.          return strength_none;
  250.    }
  251.  
  252.    bool should_lower_jump(ir_jump* ir)
  253.    {
  254.       unsigned strength = get_jump_strength(ir);
  255.       bool lower;
  256.       switch(strength)
  257.       {
  258.       case strength_none:
  259.          lower = false; /* don't change this, code relies on it */
  260.          break;
  261.       case strength_continue:
  262.          lower = lower_continue;
  263.          break;
  264.       case strength_break:
  265.          assert(this->loop.loop);
  266.          /* never lower "canonical break" */
  267.          if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0
  268.                || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop)))
  269.             lower = false;
  270.          else
  271.             lower = lower_break;
  272.          break;
  273.       case strength_return:
  274.          /* never lower return at the end of a this->function */
  275.          if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
  276.             lower = false;
  277.          else if (this->function.is_main)
  278.             lower = lower_main_return;
  279.          else
  280.             lower = lower_sub_return;
  281.          break;
  282.       }
  283.       return lower;
  284.    }
  285.  
  286.    block_record visit_block(exec_list* list)
  287.    {
  288.       block_record saved_block = this->block;
  289.       this->block = block_record();
  290.       visit_exec_list(list, this);
  291.       block_record ret = this->block;
  292.       this->block = saved_block;
  293.       return ret;
  294.    }
  295.  
  296.    virtual void visit(ir_if *ir)
  297.    {
  298.       if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
  299.          this->loop.in_if_at_the_end_of_the_loop = true;
  300.  
  301.       ++this->function.nesting_depth;
  302.       ++this->loop.nesting_depth;
  303.  
  304.       block_record block_records[2];
  305.       ir_jump* jumps[2];
  306.  
  307.       block_records[0] = visit_block(&ir->then_instructions);
  308.       block_records[1] = visit_block(&ir->else_instructions);
  309.  
  310. retry: /* we get here if we put code after the if inside a branch */
  311.    for(unsigned i = 0; i < 2; ++i) {
  312.       exec_list& list = i ? ir->else_instructions : ir->then_instructions;
  313.       jumps[i] = 0;
  314.       if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail()))
  315.          jumps[i] = (ir_jump*)list.get_tail();
  316.    }
  317.  
  318.       for(;;) {
  319.          jump_strength jump_strengths[2];
  320.  
  321.          for(unsigned i = 0; i < 2; ++i) {
  322.             if(jumps[i]) {
  323.                jump_strengths[i] = block_records[i].min_strength;
  324.                assert(jump_strengths[i] == get_jump_strength(jumps[i]));
  325.             } else
  326.                jump_strengths[i] = strength_none;
  327.          }
  328.  
  329.          /* move both jumps out if possible */
  330.          if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) {
  331.             bool unify = true;
  332.             if(jump_strengths[0] == strength_continue)
  333.                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue));
  334.             else if(jump_strengths[0] == strength_break)
  335.                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  336.             /* FINISHME: unify returns with identical expressions */
  337.             else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void())
  338.                ir->insert_after(new(ir) ir_return(NULL));
  339.             else
  340.                unify = false;
  341.  
  342.             if(unify) {
  343.                jumps[0]->remove();
  344.                jumps[1]->remove();
  345.                this->progress = true;
  346.  
  347.                jumps[0] = 0;
  348.                jumps[1] = 0;
  349.                block_records[0].min_strength = strength_none;
  350.                block_records[1].min_strength = strength_none;
  351.                break;
  352.             }
  353.          }
  354.  
  355.          /* lower a jump: if both need to lowered, start with the strongest one, so that
  356.           * we might later unify the lowered version with the other one
  357.           */
  358.          bool should_lower[2];
  359.          for(unsigned i = 0; i < 2; ++i)
  360.             should_lower[i] = should_lower_jump(jumps[i]);
  361.  
  362.          int lower;
  363.          if(should_lower[1] && should_lower[0])
  364.             lower = jump_strengths[1] > jump_strengths[0];
  365.          else if(should_lower[0])
  366.             lower = 0;
  367.          else if(should_lower[1])
  368.             lower = 1;
  369.          else
  370.             break;
  371.  
  372.          if(jump_strengths[lower] == strength_return) {
  373.             ir_variable* return_flag = this->function.get_return_flag();
  374.             if(!this->function.signature->return_type->is_void()) {
  375.                ir_variable* return_value = this->function.get_return_value();
  376.                jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_value), ((ir_return*)jumps[lower])->value, NULL));
  377.             }
  378.             jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_flag), new (ir) ir_constant(true), NULL));
  379.             this->loop.may_set_return_flag = true;
  380.             if(this->loop.loop) {
  381.                ir_loop_jump* lowered = 0;
  382.                lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
  383.                block_records[lower].min_strength = strength_break;
  384.                jumps[lower]->replace_with(lowered);
  385.                jumps[lower] = lowered;
  386.             } else
  387.                goto lower_continue;
  388.             this->progress = true;
  389.          } else if(jump_strengths[lower] == strength_break) {
  390.             /* We can't lower to an actual continue because that would execute the increment.
  391.              *
  392.              * In the lowered code, we instead put the break check between the this->loop body and the increment,
  393.              * which is impossible with a real continue as defined by the GLSL IR currently.
  394.              *
  395.              * Smarter options (such as undoing the increment) are possible but it's not worth implementing them,
  396.              * because if break is lowered, continue is almost surely lowered too.
  397.              */
  398.             jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(this->loop.get_break_flag()), new (ir) ir_constant(true), 0));
  399.             goto lower_continue;
  400.          } else if(jump_strengths[lower] == strength_continue) {
  401. lower_continue:
  402.             ir_variable* execute_flag = this->loop.get_execute_flag();
  403.             jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0));
  404.             jumps[lower] = 0;
  405.             block_records[lower].min_strength = strength_always_clears_execute_flag;
  406.             block_records[lower].may_clear_execute_flag = true;
  407.             this->progress = true;
  408.             break;
  409.          }
  410.       }
  411.  
  412.       /* move out a jump out if possible */
  413.       if(pull_out_jumps) {
  414.          int move_out = -1;
  415.          if(jumps[0] && block_records[1].min_strength >= strength_continue)
  416.             move_out = 0;
  417.          else if(jumps[1] && block_records[0].min_strength >= strength_continue)
  418.             move_out = 1;
  419.  
  420.          if(move_out >= 0)
  421.          {
  422.             jumps[move_out]->remove();
  423.             ir->insert_after(jumps[move_out]);
  424.             jumps[move_out] = 0;
  425.             block_records[move_out].min_strength = strength_none;
  426.             this->progress = true;
  427.          }
  428.       }
  429.  
  430.       if(block_records[0].min_strength < block_records[1].min_strength)
  431.          this->block.min_strength = block_records[0].min_strength;
  432.       else
  433.          this->block.min_strength = block_records[1].min_strength;
  434.       this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag;
  435.  
  436.       if(this->block.min_strength)
  437.          truncate_after_instruction(ir);
  438.       else if(this->block.may_clear_execute_flag)
  439.       {
  440.          int move_into = -1;
  441.          if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag)
  442.             move_into = 1;
  443.          else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag)
  444.             move_into = 0;
  445.  
  446.          if(move_into >= 0) {
  447.             assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */
  448.  
  449.             exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions;
  450.             exec_node* next = ir->get_next();
  451.             if(!next->is_tail_sentinel()) {
  452.                move_outer_block_inside(ir, list);
  453.  
  454.                exec_list list;
  455.                list.head = next;
  456.                block_records[move_into] = visit_block(&list);
  457.  
  458.                this->progress = true;
  459.                goto retry;
  460.             }
  461.          } else {
  462.             ir_instruction* ir_after;
  463.             for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();)
  464.             {
  465.                ir_if* ir_if = ir_after->as_if();
  466.                if(ir_if && ir_if->else_instructions.is_empty()) {
  467.                   ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable();
  468.                   if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) {
  469.                      ir_instruction* ir_next = (ir_instruction*)ir_after->get_next();
  470.                      ir_after->insert_before(&ir_if->then_instructions);
  471.                      ir_after->remove();
  472.                      ir_after = ir_next;
  473.                      continue;
  474.                   }
  475.                }
  476.                ir_after = (ir_instruction*)ir_after->get_next();
  477.  
  478.                /* only set this if we find any unprotected instruction */
  479.                this->progress = true;
  480.             }
  481.  
  482.             if(!ir->get_next()->is_tail_sentinel()) {
  483.                assert(this->loop.execute_flag);
  484.                ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag));
  485.                move_outer_block_inside(ir, &if_execute->then_instructions);
  486.                ir->insert_after(if_execute);
  487.             }
  488.          }
  489.       }
  490.       --this->loop.nesting_depth;
  491.       --this->function.nesting_depth;
  492.    }
  493.  
  494.    virtual void visit(ir_loop *ir)
  495.    {
  496.       ++this->function.nesting_depth;
  497.       loop_record saved_loop = this->loop;
  498.       this->loop = loop_record(this->function.signature, ir);
  499.  
  500.       block_record body = visit_block(&ir->body_instructions);
  501.  
  502.       if(body.min_strength >= strength_break) {
  503.          /* FINISHME: turn the this->loop into an if, or replace it with its body */
  504.       }
  505.  
  506.       if(this->loop.break_flag) {
  507.          ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag));
  508.          break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  509.          ir->body_instructions.push_tail(break_if);
  510.       }
  511.  
  512.       if(this->loop.may_set_return_flag) {
  513.          assert(this->function.return_flag);
  514.          ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag));
  515.          saved_loop.may_set_return_flag = true;
  516.          if(saved_loop.loop)
  517.             return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  518.          else
  519.             move_outer_block_inside(ir, &return_if->else_instructions);
  520.          ir->insert_after(return_if);
  521.       }
  522.  
  523.       this->loop = saved_loop;
  524.       --this->function.nesting_depth;
  525.    }
  526.  
  527.    virtual void visit(ir_function_signature *ir)
  528.    {
  529.       /* these are not strictly necessary */
  530.       assert(!this->function.signature);
  531.       assert(!this->loop.loop);
  532.  
  533.       function_record saved_function = this->function;
  534.       loop_record saved_loop = this->loop;
  535.       this->function = function_record(ir);
  536.       this->loop = loop_record(ir);
  537.  
  538.       assert(!this->loop.loop);
  539.       visit_block(&ir->body);
  540.  
  541.       if(this->function.return_value)
  542.          ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value)));
  543.  
  544.       this->loop = saved_loop;
  545.       this->function = saved_function;
  546.    }
  547.  
  548.    virtual void visit(class ir_function * ir)
  549.    {
  550.       visit_block(&ir->signatures);
  551.    }
  552. };
  553.  
  554. bool
  555. do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break)
  556. {
  557.    ir_lower_jumps_visitor v;
  558.    v.pull_out_jumps = pull_out_jumps;
  559.    v.lower_continue = lower_continue;
  560.    v.lower_break = lower_break;
  561.    v.lower_sub_return = lower_sub_return;
  562.    v.lower_main_return = lower_main_return;
  563.  
  564.    do {
  565.       v.progress = false;
  566.       visit_exec_list(instructions, &v);
  567.    } while (v.progress);
  568.  
  569.    return v.progress;
  570. }
  571.