Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Luca Barbieri
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file lower_jumps.cpp
  26.  *
  27.  * This pass lowers jumps (break, continue, and return) to if/else structures.
  28.  *
  29.  * It can be asked to:
  30.  * 1. Pull jumps out of ifs where possible
  31.  * 2. Remove all "continue"s, replacing them with an "execute flag"
  32.  * 3. Replace all "break" with a single conditional one at the end of the loop
  33.  * 4. Replace all "return"s with a single return at the end of the function,
  34.  *    for the main function and/or other functions
  35.  *
  36.  * Applying this pass gives several benefits:
  37.  * 1. All functions can be inlined.
  38.  * 2. nv40 and other pre-DX10 chips without "continue" can be supported
  39.  * 3. nv30 and other pre-DX10 chips with no control flow at all are better
  40.  *    supported
  41.  *
  42.  * Continues are lowered by adding a per-loop "execute flag", initialized to
  43.  * true, that when cleared inhibits all execution until the end of the loop.
  44.  *
  45.  * Breaks are lowered to continues, plus setting a "break flag" that is checked
  46.  * at the end of the loop, and trigger the unique "break".
  47.  *
  48.  * Returns are lowered to breaks/continues, plus adding a "return flag" that
  49.  * causes loops to break again out of their enclosing loops until all the
  50.  * loops are exited: then the "execute flag" logic will ignore everything
  51.  * until the end of the function.
  52.  *
  53.  * Note that "continue" and "return" can also be implemented by adding
  54.  * a dummy loop and using break.
  55.  * However, this is bad for hardware with limited nesting depth, and
  56.  * prevents further optimization, and thus is not currently performed.
  57.  */
  58.  
  59. #include "glsl_types.h"
  60. #include <string.h>
  61. #include "ir.h"
  62.  
  63. /**
  64.  * Enum recording the result of analyzing how control flow might exit
  65.  * an IR node.
  66.  *
  67.  * Each possible value of jump_strength indicates a strictly stronger
  68.  * guarantee on control flow than the previous value.
  69.  *
  70.  * The ordering of strengths roughly reflects the way jumps are
  71.  * lowered: jumps with higher strength tend to be lowered to jumps of
  72.  * lower strength.  Accordingly, strength is used as a heuristic to
  73.  * determine which lowering to perform first.
  74.  *
  75.  * This enum is also used by get_jump_strength() to categorize
  76.  * instructions as either break, continue, return, or other.  When
  77.  * used in this fashion, strength_always_clears_execute_flag is not
  78.  * used.
  79.  *
  80.  * The control flow analysis made by this optimization pass makes two
  81.  * simplifying assumptions:
  82.  *
  83.  * - It ignores discard instructions, since they are lowered by a
  84.  *   separate pass (lower_discard.cpp).
  85.  *
  86.  * - It assumes it is always possible for control to flow from a loop
  87.  *   to the instruction immediately following it.  Technically, this
  88.  *   is not true (since all execution paths through the loop might
  89.  *   jump back to the top, or return from the function).
  90.  *
  91.  * Both of these simplifying assumtions are safe, since they can never
  92.  * cause reachable code to be incorrectly classified as unreachable;
  93.  * they can only do the opposite.
  94.  */
  95. enum jump_strength
  96. {
  97.    /**
  98.     * Analysis has produced no guarantee on how control flow might
  99.     * exit this IR node.  It might fall out the bottom (with or
  100.     * without clearing the execute flag, if present), or it might
  101.     * continue to the top of the innermost enclosing loop, break out
  102.     * of it, or return from the function.
  103.     */
  104.    strength_none,
  105.  
  106.    /**
  107.     * The only way control can fall out the bottom of this node is
  108.     * through a code path that clears the execute flag.  It might also
  109.     * continue to the top of the innermost enclosing loop, break out
  110.     * of it, or return from the function.
  111.     */
  112.    strength_always_clears_execute_flag,
  113.  
  114.    /**
  115.     * Control cannot fall out the bottom of this node.  It might
  116.     * continue to the top of the innermost enclosing loop, break out
  117.     * of it, or return from the function.
  118.     */
  119.    strength_continue,
  120.  
  121.    /**
  122.     * Control cannot fall out the bottom of this node, or continue the
  123.     * top of the innermost enclosing loop.  It can only break out of
  124.     * it or return from the function.
  125.     */
  126.    strength_break,
  127.  
  128.    /**
  129.     * Control cannot fall out the bottom of this node, continue to the
  130.     * top of the innermost enclosing loop, or break out of it.  It can
  131.     * only return from the function.
  132.     */
  133.    strength_return
  134. };
  135.  
  136. struct block_record
  137. {
  138.    /* minimum jump strength (of lowered IR, not pre-lowering IR)
  139.     *
  140.     * If the block ends with a jump, must be the strength of the jump.
  141.     * Otherwise, the jump would be dead and have been deleted before)
  142.     *
  143.     * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump
  144.     * (e.g. an if with a return in one branch, and a break in the other, while not lowering them)
  145.     * Note that identical jumps are usually unified though.
  146.     */
  147.    jump_strength min_strength;
  148.  
  149.    /* can anything clear the execute flag? */
  150.    bool may_clear_execute_flag;
  151.  
  152.    block_record()
  153.    {
  154.       this->min_strength = strength_none;
  155.       this->may_clear_execute_flag = false;
  156.    }
  157. };
  158.  
  159. struct loop_record
  160. {
  161.    ir_function_signature* signature;
  162.    ir_loop* loop;
  163.  
  164.    /* used to avoid lowering the break used to represent lowered breaks */
  165.    unsigned nesting_depth;
  166.    bool in_if_at_the_end_of_the_loop;
  167.  
  168.    bool may_set_return_flag;
  169.  
  170.    ir_variable* break_flag;
  171.    ir_variable* execute_flag; /* cleared to emulate continue */
  172.  
  173.    loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0)
  174.    {
  175.       this->signature = p_signature;
  176.       this->loop = p_loop;
  177.       this->nesting_depth = 0;
  178.       this->in_if_at_the_end_of_the_loop = false;
  179.       this->may_set_return_flag = false;
  180.       this->break_flag = 0;
  181.       this->execute_flag = 0;
  182.    }
  183.  
  184.    ir_variable* get_execute_flag()
  185.    {
  186.       /* also supported for the "function loop" */
  187.       if(!this->execute_flag) {
  188.          exec_list& list = this->loop ? this->loop->body_instructions : signature->body;
  189.          this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary);
  190.          list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0));
  191.          list.push_head(this->execute_flag);
  192.       }
  193.       return this->execute_flag;
  194.    }
  195.  
  196.    ir_variable* get_break_flag()
  197.    {
  198.       assert(this->loop);
  199.       if(!this->break_flag) {
  200.          this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary);
  201.          this->loop->insert_before(this->break_flag);
  202.          this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0));
  203.       }
  204.       return this->break_flag;
  205.    }
  206. };
  207.  
  208. struct function_record
  209. {
  210.    ir_function_signature* signature;
  211.    ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */
  212.    ir_variable* return_value;
  213.    bool lower_return;
  214.    unsigned nesting_depth;
  215.  
  216.    function_record(ir_function_signature* p_signature = 0,
  217.                    bool lower_return = false)
  218.    {
  219.       this->signature = p_signature;
  220.       this->return_flag = 0;
  221.       this->return_value = 0;
  222.       this->nesting_depth = 0;
  223.       this->lower_return = lower_return;
  224.    }
  225.  
  226.    ir_variable* get_return_flag()
  227.    {
  228.       if(!this->return_flag) {
  229.          this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary);
  230.          this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0));
  231.          this->signature->body.push_head(this->return_flag);
  232.       }
  233.       return this->return_flag;
  234.    }
  235.  
  236.    ir_variable* get_return_value()
  237.    {
  238.       if(!this->return_value) {
  239.          assert(!this->signature->return_type->is_void());
  240.          return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary);
  241.          this->signature->body.push_head(this->return_value);
  242.       }
  243.       return this->return_value;
  244.    }
  245. };
  246.  
  247. struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
  248.    /* Postconditions: on exit of any visit() function:
  249.     *
  250.     * ANALYSIS: this->block.min_strength,
  251.     * this->block.may_clear_execute_flag, and
  252.     * this->loop.may_set_return_flag are updated to reflect the
  253.     * characteristics of the visited statement.
  254.     *
  255.     * DEAD_CODE_ELIMINATION: If this->block.min_strength is not
  256.     * strength_none, the visited node is at the end of its exec_list.
  257.     * In other words, any unreachable statements that follow the
  258.     * visited statement in its exec_list have been removed.
  259.     *
  260.     * CONTAINED_JUMPS_LOWERED: If the visited statement contains other
  261.     * statements, then should_lower_jump() is false for all of the
  262.     * return, break, or continue statements it contains.
  263.     *
  264.     * Note that visiting a jump does not lower it.  That is the
  265.     * responsibility of the statement (or function signature) that
  266.     * contains the jump.
  267.     */
  268.  
  269.    bool progress;
  270.  
  271.    struct function_record function;
  272.    struct loop_record loop;
  273.    struct block_record block;
  274.  
  275.    bool pull_out_jumps;
  276.    bool lower_continue;
  277.    bool lower_break;
  278.    bool lower_sub_return;
  279.    bool lower_main_return;
  280.  
  281.    ir_lower_jumps_visitor()
  282.    {
  283.       this->progress = false;
  284.    }
  285.  
  286.    void truncate_after_instruction(exec_node *ir)
  287.    {
  288.       if (!ir)
  289.          return;
  290.  
  291.       while (!ir->get_next()->is_tail_sentinel()) {
  292.          ((ir_instruction *)ir->get_next())->remove();
  293.          this->progress = true;
  294.       }
  295.    }
  296.  
  297.    void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block)
  298.    {
  299.       while (!ir->get_next()->is_tail_sentinel()) {
  300.          ir_instruction *move_ir = (ir_instruction *)ir->get_next();
  301.  
  302.          move_ir->remove();
  303.          inner_block->push_tail(move_ir);
  304.       }
  305.    }
  306.  
  307.    /**
  308.     * Insert the instructions necessary to lower a return statement,
  309.     * before the given return instruction.
  310.     */
  311.    void insert_lowered_return(ir_return *ir)
  312.    {
  313.       ir_variable* return_flag = this->function.get_return_flag();
  314.       if(!this->function.signature->return_type->is_void()) {
  315.          ir_variable* return_value = this->function.get_return_value();
  316.          ir->insert_before(
  317.             new(ir) ir_assignment(
  318.                new (ir) ir_dereference_variable(return_value),
  319.                ir->value));
  320.       }
  321.       ir->insert_before(
  322.          new(ir) ir_assignment(
  323.             new (ir) ir_dereference_variable(return_flag),
  324.             new (ir) ir_constant(true)));
  325.       this->loop.may_set_return_flag = true;
  326.    }
  327.  
  328.    /**
  329.     * If the given instruction is a return, lower it to instructions
  330.     * that store the return value (if there is one), set the return
  331.     * flag, and then break.
  332.     *
  333.     * It is safe to pass NULL to this function.
  334.     */
  335.    void lower_return_unconditionally(ir_instruction *ir)
  336.    {
  337.       if (get_jump_strength(ir) != strength_return) {
  338.          return;
  339.       }
  340.       insert_lowered_return((ir_return*)ir);
  341.       ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  342.    }
  343.  
  344.    /**
  345.     * Create the necessary instruction to replace a break instruction.
  346.     */
  347.    ir_instruction *create_lowered_break()
  348.    {
  349.       void *ctx = this->function.signature;
  350.       return new(ctx) ir_assignment(
  351.           new(ctx) ir_dereference_variable(this->loop.get_break_flag()),
  352.           new(ctx) ir_constant(true),
  353.           0);
  354.    }
  355.  
  356.    /**
  357.     * If the given instruction is a break, lower it to an instruction
  358.     * that sets the break flag, without consulting
  359.     * should_lower_jump().
  360.     *
  361.     * It is safe to pass NULL to this function.
  362.     */
  363.    void lower_break_unconditionally(ir_instruction *ir)
  364.    {
  365.       if (get_jump_strength(ir) != strength_break) {
  366.          return;
  367.       }
  368.       ir->replace_with(create_lowered_break());
  369.    }
  370.  
  371.    /**
  372.     * If the block ends in a conditional or unconditional break, lower
  373.     * it, even though should_lower_jump() says it needn't be lowered.
  374.     */
  375.    void lower_final_breaks(exec_list *block)
  376.    {
  377.       ir_instruction *ir = (ir_instruction *) block->get_tail();
  378.       lower_break_unconditionally(ir);
  379.       ir_if *ir_if = ir->as_if();
  380.       if (ir_if) {
  381.           lower_break_unconditionally(
  382.               (ir_instruction *) ir_if->then_instructions.get_tail());
  383.           lower_break_unconditionally(
  384.               (ir_instruction *) ir_if->else_instructions.get_tail());
  385.       }
  386.    }
  387.  
  388.    virtual void visit(class ir_loop_jump * ir)
  389.    {
  390.       /* Eliminate all instructions after each one, since they are
  391.        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
  392.        * postcondition.
  393.        */
  394.       truncate_after_instruction(ir);
  395.  
  396.       /* Set this->block.min_strength based on this instruction.  This
  397.        * satisfies the ANALYSIS postcondition.  It is not necessary to
  398.        * update this->block.may_clear_execute_flag or
  399.        * this->loop.may_set_return_flag, because an unlowered jump
  400.        * instruction can't change any flags.
  401.        */
  402.       this->block.min_strength = ir->is_break() ? strength_break : strength_continue;
  403.  
  404.       /* The CONTAINED_JUMPS_LOWERED postcondition is already
  405.        * satisfied, because jump statements can't contain other
  406.        * statements.
  407.        */
  408.    }
  409.  
  410.    virtual void visit(class ir_return * ir)
  411.    {
  412.       /* Eliminate all instructions after each one, since they are
  413.        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
  414.        * postcondition.
  415.        */
  416.       truncate_after_instruction(ir);
  417.  
  418.       /* Set this->block.min_strength based on this instruction.  This
  419.        * satisfies the ANALYSIS postcondition.  It is not necessary to
  420.        * update this->block.may_clear_execute_flag or
  421.        * this->loop.may_set_return_flag, because an unlowered return
  422.        * instruction can't change any flags.
  423.        */
  424.       this->block.min_strength = strength_return;
  425.  
  426.       /* The CONTAINED_JUMPS_LOWERED postcondition is already
  427.        * satisfied, because jump statements can't contain other
  428.        * statements.
  429.        */
  430.    }
  431.  
  432.    virtual void visit(class ir_discard * ir)
  433.    {
  434.       /* Nothing needs to be done.  The ANALYSIS and
  435.        * DEAD_CODE_ELIMINATION postconditions are already satisfied,
  436.        * because discard statements are ignored by this optimization
  437.        * pass.  The CONTAINED_JUMPS_LOWERED postcondition is already
  438.        * satisfied, because discard statements can't contain other
  439.        * statements.
  440.        */
  441.       (void) ir;
  442.    }
  443.  
  444.    enum jump_strength get_jump_strength(ir_instruction* ir)
  445.    {
  446.       if(!ir)
  447.          return strength_none;
  448.       else if(ir->ir_type == ir_type_loop_jump) {
  449.          if(((ir_loop_jump*)ir)->is_break())
  450.             return strength_break;
  451.          else
  452.             return strength_continue;
  453.       } else if(ir->ir_type == ir_type_return)
  454.          return strength_return;
  455.       else
  456.          return strength_none;
  457.    }
  458.  
  459.    bool should_lower_jump(ir_jump* ir)
  460.    {
  461.       unsigned strength = get_jump_strength(ir);
  462.       bool lower;
  463.       switch(strength)
  464.       {
  465.       case strength_none:
  466.          lower = false; /* don't change this, code relies on it */
  467.          break;
  468.       case strength_continue:
  469.          lower = lower_continue;
  470.          break;
  471.       case strength_break:
  472.          assert(this->loop.loop);
  473.          /* never lower "canonical break" */
  474.          if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0
  475.                || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop)))
  476.             lower = false;
  477.          else
  478.             lower = lower_break;
  479.          break;
  480.       case strength_return:
  481.          /* never lower return at the end of a this->function */
  482.          if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
  483.             lower = false;
  484.          else
  485.             lower = this->function.lower_return;
  486.          break;
  487.       }
  488.       return lower;
  489.    }
  490.  
  491.    block_record visit_block(exec_list* list)
  492.    {
  493.       /* Note: since visiting a node may change that node's next
  494.        * pointer, we can't use visit_exec_list(), because
  495.        * visit_exec_list() caches the node's next pointer before
  496.        * visiting it.  So we use foreach_list() instead.
  497.        *
  498.        * foreach_list() isn't safe if the node being visited gets
  499.        * removed, but fortunately this visitor doesn't do that.
  500.        */
  501.  
  502.       block_record saved_block = this->block;
  503.       this->block = block_record();
  504.       foreach_list(node, list) {
  505.          ((ir_instruction *) node)->accept(this);
  506.       }
  507.       block_record ret = this->block;
  508.       this->block = saved_block;
  509.       return ret;
  510.    }
  511.  
  512.    virtual void visit(ir_if *ir)
  513.    {
  514.       if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
  515.          this->loop.in_if_at_the_end_of_the_loop = true;
  516.  
  517.       ++this->function.nesting_depth;
  518.       ++this->loop.nesting_depth;
  519.  
  520.       block_record block_records[2];
  521.       ir_jump* jumps[2];
  522.  
  523.       /* Recursively lower nested jumps.  This satisfies the
  524.        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
  525.        * unconditional jumps at the end of ir->then_instructions and
  526.        * ir->else_instructions, which are handled below.
  527.        */
  528.       block_records[0] = visit_block(&ir->then_instructions);
  529.       block_records[1] = visit_block(&ir->else_instructions);
  530.  
  531. retry: /* we get here if we put code after the if inside a branch */
  532.  
  533.       /* Determine which of ir->then_instructions and
  534.        * ir->else_instructions end with an unconditional jump.
  535.        */
  536.       for(unsigned i = 0; i < 2; ++i) {
  537.          exec_list& list = i ? ir->else_instructions : ir->then_instructions;
  538.          jumps[i] = 0;
  539.          if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail()))
  540.             jumps[i] = (ir_jump*)list.get_tail();
  541.       }
  542.  
  543.       /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED
  544.        * postcondition by lowering jumps in both then_instructions and
  545.        * else_instructions.
  546.        */
  547.       for(;;) {
  548.          /* Determine the types of the jumps that terminate
  549.           * ir->then_instructions and ir->else_instructions.
  550.           */
  551.          jump_strength jump_strengths[2];
  552.  
  553.          for(unsigned i = 0; i < 2; ++i) {
  554.             if(jumps[i]) {
  555.                jump_strengths[i] = block_records[i].min_strength;
  556.                assert(jump_strengths[i] == get_jump_strength(jumps[i]));
  557.             } else
  558.                jump_strengths[i] = strength_none;
  559.          }
  560.  
  561.          /* If both code paths end in a jump, and the jumps are the
  562.           * same, and we are pulling out jumps, replace them with a
  563.           * single jump that comes after the if instruction.  The new
  564.           * jump will be visited next, and it will be lowered if
  565.           * necessary by the loop or conditional that encloses it.
  566.           */
  567.          if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) {
  568.             bool unify = true;
  569.             if(jump_strengths[0] == strength_continue)
  570.                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue));
  571.             else if(jump_strengths[0] == strength_break)
  572.                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  573.             /* FINISHME: unify returns with identical expressions */
  574.             else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void())
  575.                ir->insert_after(new(ir) ir_return(NULL));
  576.             else
  577.                unify = false;
  578.  
  579.             if(unify) {
  580.                jumps[0]->remove();
  581.                jumps[1]->remove();
  582.                this->progress = true;
  583.  
  584.                /* Update jumps[] to reflect the fact that the jumps
  585.                 * are gone, and update block_records[] to reflect the
  586.                 * fact that control can now flow to the next
  587.                 * instruction.
  588.                 */
  589.                jumps[0] = 0;
  590.                jumps[1] = 0;
  591.                block_records[0].min_strength = strength_none;
  592.                block_records[1].min_strength = strength_none;
  593.  
  594.                /* The CONTAINED_JUMPS_LOWERED postcondition is now
  595.                 * satisfied, so we can break out of the loop.
  596.                 */
  597.                break;
  598.             }
  599.          }
  600.  
  601.          /* lower a jump: if both need to lowered, start with the strongest one, so that
  602.           * we might later unify the lowered version with the other one
  603.           */
  604.          bool should_lower[2];
  605.          for(unsigned i = 0; i < 2; ++i)
  606.             should_lower[i] = should_lower_jump(jumps[i]);
  607.  
  608.          int lower;
  609.          if(should_lower[1] && should_lower[0])
  610.             lower = jump_strengths[1] > jump_strengths[0];
  611.          else if(should_lower[0])
  612.             lower = 0;
  613.          else if(should_lower[1])
  614.             lower = 1;
  615.          else
  616.             /* Neither code path ends in a jump that needs to be
  617.              * lowered, so the CONTAINED_JUMPS_LOWERED postcondition
  618.              * is satisfied and we can break out of the loop.
  619.              */
  620.             break;
  621.  
  622.          if(jump_strengths[lower] == strength_return) {
  623.             /* To lower a return, we create a return flag (if the
  624.              * function doesn't have one already) and add instructions
  625.              * that: 1. store the return value (if this function has a
  626.              * non-void return) and 2. set the return flag
  627.              */
  628.             insert_lowered_return((ir_return*)jumps[lower]);
  629.             if(this->loop.loop) {
  630.                /* If we are in a loop, replace the return instruction
  631.                 * with a break instruction, and then loop so that the
  632.                 * break instruction can be lowered if necessary.
  633.                 */
  634.                ir_loop_jump* lowered = 0;
  635.                lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
  636.                /* Note: we must update block_records and jumps to
  637.                 * reflect the fact that the control path has been
  638.                 * altered from a return to a break.
  639.                 */
  640.                block_records[lower].min_strength = strength_break;
  641.                jumps[lower]->replace_with(lowered);
  642.                jumps[lower] = lowered;
  643.             } else {
  644.                /* If we are not in a loop, we then proceed as we would
  645.                 * for a continue statement (set the execute flag to
  646.                 * false to prevent the rest of the function from
  647.                 * executing).
  648.                 */
  649.                goto lower_continue;
  650.             }
  651.             this->progress = true;
  652.          } else if(jump_strengths[lower] == strength_break) {
  653.             /* To lower a break, we create a break flag (if the loop
  654.              * doesn't have one already) and add an instruction that
  655.              * sets it.
  656.              *
  657.              * Then we proceed as we would for a continue statement
  658.              * (set the execute flag to false to prevent the rest of
  659.              * the loop body from executing).
  660.              *
  661.              * The visit() function for the loop will ensure that the
  662.              * break flag is checked after executing the loop body.
  663.              */
  664.             jumps[lower]->insert_before(create_lowered_break());
  665.             goto lower_continue;
  666.          } else if(jump_strengths[lower] == strength_continue) {
  667. lower_continue:
  668.             /* To lower a continue, we create an execute flag (if the
  669.              * loop doesn't have one already) and replace the continue
  670.              * with an instruction that clears it.
  671.              *
  672.              * Note that this code path gets exercised when lowering
  673.              * return statements that are not inside a loop, so
  674.              * this->loop must be initialized even outside of loops.
  675.              */
  676.             ir_variable* execute_flag = this->loop.get_execute_flag();
  677.             jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0));
  678.             /* Note: we must update block_records and jumps to reflect
  679.              * the fact that the control path has been altered to an
  680.              * instruction that clears the execute flag.
  681.              */
  682.             jumps[lower] = 0;
  683.             block_records[lower].min_strength = strength_always_clears_execute_flag;
  684.             block_records[lower].may_clear_execute_flag = true;
  685.             this->progress = true;
  686.  
  687.             /* Let the loop run again, in case the other branch of the
  688.              * if needs to be lowered too.
  689.              */
  690.          }
  691.       }
  692.  
  693.       /* move out a jump out if possible */
  694.       if(pull_out_jumps) {
  695.          /* If one of the branches ends in a jump, and control cannot
  696.           * fall out the bottom of the other branch, then we can move
  697.           * the jump after the if.
  698.           *
  699.           * Set move_out to the branch we are moving a jump out of.
  700.           */
  701.          int move_out = -1;
  702.          if(jumps[0] && block_records[1].min_strength >= strength_continue)
  703.             move_out = 0;
  704.          else if(jumps[1] && block_records[0].min_strength >= strength_continue)
  705.             move_out = 1;
  706.  
  707.          if(move_out >= 0)
  708.          {
  709.             jumps[move_out]->remove();
  710.             ir->insert_after(jumps[move_out]);
  711.             /* Note: we must update block_records and jumps to reflect
  712.              * the fact that the jump has been moved out of the if.
  713.              */
  714.             jumps[move_out] = 0;
  715.             block_records[move_out].min_strength = strength_none;
  716.             this->progress = true;
  717.          }
  718.       }
  719.  
  720.       /* Now satisfy the ANALYSIS postcondition by setting
  721.        * this->block.min_strength and
  722.        * this->block.may_clear_execute_flag based on the
  723.        * characteristics of the two branches.
  724.        */
  725.       if(block_records[0].min_strength < block_records[1].min_strength)
  726.          this->block.min_strength = block_records[0].min_strength;
  727.       else
  728.          this->block.min_strength = block_records[1].min_strength;
  729.       this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag;
  730.  
  731.       /* Now we need to clean up the instructions that follow the
  732.        * if.
  733.        *
  734.        * If those instructions are unreachable, then satisfy the
  735.        * DEAD_CODE_ELIMINATION postcondition by eliminating them.
  736.        * Otherwise that postcondition is already satisfied.
  737.        */
  738.       if(this->block.min_strength)
  739.          truncate_after_instruction(ir);
  740.       else if(this->block.may_clear_execute_flag)
  741.       {
  742.          /* If the "if" instruction might clear the execute flag, then
  743.           * we need to guard any instructions that follow so that they
  744.           * are only executed if the execute flag is set.
  745.           *
  746.           * If one of the branches of the "if" always clears the
  747.           * execute flag, and the other branch never clears it, then
  748.           * this is easy: just move all the instructions following the
  749.           * "if" into the branch that never clears it.
  750.           */
  751.          int move_into = -1;
  752.          if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag)
  753.             move_into = 1;
  754.          else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag)
  755.             move_into = 0;
  756.  
  757.          if(move_into >= 0) {
  758.             assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */
  759.  
  760.             exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions;
  761.             exec_node* next = ir->get_next();
  762.             if(!next->is_tail_sentinel()) {
  763.                move_outer_block_inside(ir, list);
  764.  
  765.                /* If any instructions moved, then we need to visit
  766.                 * them (since they are now inside the "if").  Since
  767.                 * block_records[move_into] is in its default state
  768.                 * (see assertion above), we can safely replace
  769.                 * block_records[move_into] with the result of this
  770.                 * analysis.
  771.                 */
  772.                exec_list list;
  773.                list.head = next;
  774.                block_records[move_into] = visit_block(&list);
  775.  
  776.                /*
  777.                 * Then we need to re-start our jump lowering, since one
  778.                 * of the instructions we moved might be a jump that
  779.                 * needs to be lowered.
  780.                 */
  781.                this->progress = true;
  782.                goto retry;
  783.             }
  784.          } else {
  785.             /* If we get here, then the simple case didn't apply; we
  786.              * need to actually guard the instructions that follow.
  787.              *
  788.              * To avoid creating unnecessarily-deep nesting, first
  789.              * look through the instructions that follow and unwrap
  790.              * any instructions that that are already wrapped in the
  791.              * appropriate guard.
  792.              */
  793.             ir_instruction* ir_after;
  794.             for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();)
  795.             {
  796.                ir_if* ir_if = ir_after->as_if();
  797.                if(ir_if && ir_if->else_instructions.is_empty()) {
  798.                   ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable();
  799.                   if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) {
  800.                      ir_instruction* ir_next = (ir_instruction*)ir_after->get_next();
  801.                      ir_after->insert_before(&ir_if->then_instructions);
  802.                      ir_after->remove();
  803.                      ir_after = ir_next;
  804.                      continue;
  805.                   }
  806.                }
  807.                ir_after = (ir_instruction*)ir_after->get_next();
  808.  
  809.                /* only set this if we find any unprotected instruction */
  810.                this->progress = true;
  811.             }
  812.  
  813.             /* Then, wrap all the instructions that follow in a single
  814.              * guard.
  815.              */
  816.             if(!ir->get_next()->is_tail_sentinel()) {
  817.                assert(this->loop.execute_flag);
  818.                ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag));
  819.                move_outer_block_inside(ir, &if_execute->then_instructions);
  820.                ir->insert_after(if_execute);
  821.             }
  822.          }
  823.       }
  824.       --this->loop.nesting_depth;
  825.       --this->function.nesting_depth;
  826.    }
  827.  
  828.    virtual void visit(ir_loop *ir)
  829.    {
  830.       /* Visit the body of the loop, with a fresh data structure in
  831.        * this->loop so that the analysis we do here won't bleed into
  832.        * enclosing loops.
  833.        *
  834.        * We assume that all code after a loop is reachable from the
  835.        * loop (see comments on enum jump_strength), so the
  836.        * DEAD_CODE_ELIMINATION postcondition is automatically
  837.        * satisfied, as is the block.min_strength portion of the
  838.        * ANALYSIS postcondition.
  839.        *
  840.        * The block.may_clear_execute_flag portion of the ANALYSIS
  841.        * postcondition is automatically satisfied because execute
  842.        * flags do not propagate outside of loops.
  843.        *
  844.        * The loop.may_set_return_flag portion of the ANALYSIS
  845.        * postcondition is handled below.
  846.        */
  847.       ++this->function.nesting_depth;
  848.       loop_record saved_loop = this->loop;
  849.       this->loop = loop_record(this->function.signature, ir);
  850.  
  851.       /* Recursively lower nested jumps.  This satisfies the
  852.        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
  853.        * an unconditional continue or return at the bottom of the
  854.        * loop, which are handled below.
  855.        */
  856.       block_record body = visit_block(&ir->body_instructions);
  857.  
  858.       /* If the loop ends in an unconditional continue, eliminate it
  859.        * because it is redundant.
  860.        */
  861.       ir_instruction *ir_last
  862.          = (ir_instruction *) ir->body_instructions.get_tail();
  863.       if (get_jump_strength(ir_last) == strength_continue) {
  864.          ir_last->remove();
  865.       }
  866.  
  867.       /* If the loop ends in an unconditional return, and we are
  868.        * lowering returns, lower it.
  869.        */
  870.       if (this->function.lower_return)
  871.          lower_return_unconditionally(ir_last);
  872.  
  873.       if(body.min_strength >= strength_break) {
  874.          /* FINISHME: If the min_strength of the loop body is
  875.           * strength_break or strength_return, that means that it
  876.           * isn't a loop at all, since control flow always leaves the
  877.           * body of the loop via break or return.  In principle the
  878.           * loop could be eliminated in this case.  This optimization
  879.           * is not implemented yet.
  880.           */
  881.       }
  882.  
  883.       if(this->loop.break_flag) {
  884.          /* We only get here if we are lowering breaks */
  885.          assert (lower_break);
  886.  
  887.          /* If a break flag was generated while visiting the body of
  888.           * the loop, then at least one break was lowered, so we need
  889.           * to generate an if statement at the end of the loop that
  890.           * does a "break" if the break flag is set.  The break we
  891.           * generate won't violate the CONTAINED_JUMPS_LOWERED
  892.           * postcondition, because should_lower_jump() always returns
  893.           * false for a break that happens at the end of a loop.
  894.           *
  895.           * However, if the loop already ends in a conditional or
  896.           * unconditional break, then we need to lower that break,
  897.           * because it won't be at the end of the loop anymore.
  898.           */
  899.          lower_final_breaks(&ir->body_instructions);
  900.  
  901.          ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag));
  902.          break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  903.          ir->body_instructions.push_tail(break_if);
  904.       }
  905.  
  906.       /* If the body of the loop may set the return flag, then at
  907.        * least one return was lowered to a break, so we need to ensure
  908.        * that the return flag is checked after the body of the loop is
  909.        * executed.
  910.        */
  911.       if(this->loop.may_set_return_flag) {
  912.          assert(this->function.return_flag);
  913.          /* Generate the if statement to check the return flag */
  914.          ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag));
  915.          /* Note: we also need to propagate the knowledge that the
  916.           * return flag may get set to the outer context.  This
  917.           * satisfies the loop.may_set_return_flag part of the
  918.           * ANALYSIS postcondition.
  919.           */
  920.          saved_loop.may_set_return_flag = true;
  921.          if(saved_loop.loop)
  922.             /* If this loop is nested inside another one, then the if
  923.              * statement that we generated should break out of that
  924.              * loop if the return flag is set.  Caller will lower that
  925.              * break statement if necessary.
  926.              */
  927.             return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  928.          else
  929.             /* Otherwise, all we need to do is ensure that the
  930.              * instructions that follow are only executed if the
  931.              * return flag is clear.  We can do that by moving those
  932.              * instructions into the else clause of the generated if
  933.              * statement.
  934.              */
  935.             move_outer_block_inside(ir, &return_if->else_instructions);
  936.          ir->insert_after(return_if);
  937.       }
  938.  
  939.       this->loop = saved_loop;
  940.       --this->function.nesting_depth;
  941.    }
  942.  
  943.    virtual void visit(ir_function_signature *ir)
  944.    {
  945.       /* these are not strictly necessary */
  946.       assert(!this->function.signature);
  947.       assert(!this->loop.loop);
  948.  
  949.       bool lower_return;
  950.       if (strcmp(ir->function_name(), "main") == 0)
  951.          lower_return = lower_main_return;
  952.       else
  953.          lower_return = lower_sub_return;
  954.  
  955.       function_record saved_function = this->function;
  956.       loop_record saved_loop = this->loop;
  957.       this->function = function_record(ir, lower_return);
  958.       this->loop = loop_record(ir);
  959.  
  960.       assert(!this->loop.loop);
  961.  
  962.       /* Visit the body of the function to lower any jumps that occur
  963.        * in it, except possibly an unconditional return statement at
  964.        * the end of it.
  965.        */
  966.       visit_block(&ir->body);
  967.  
  968.       /* If the body ended in an unconditional return of non-void,
  969.        * then we don't need to lower it because it's the one canonical
  970.        * return.
  971.        *
  972.        * If the body ended in a return of void, eliminate it because
  973.        * it is redundant.
  974.        */
  975.       if (ir->return_type->is_void() &&
  976.           get_jump_strength((ir_instruction *) ir->body.get_tail())) {
  977.          ir_jump *jump = (ir_jump *) ir->body.get_tail();
  978.          assert (jump->ir_type == ir_type_return);
  979.          jump->remove();
  980.       }
  981.  
  982.       if(this->function.return_value)
  983.          ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value)));
  984.  
  985.       this->loop = saved_loop;
  986.       this->function = saved_function;
  987.    }
  988.  
  989.    virtual void visit(class ir_function * ir)
  990.    {
  991.       visit_block(&ir->signatures);
  992.    }
  993. };
  994.  
  995. bool
  996. do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break)
  997. {
  998.    ir_lower_jumps_visitor v;
  999.    v.pull_out_jumps = pull_out_jumps;
  1000.    v.lower_continue = lower_continue;
  1001.    v.lower_break = lower_break;
  1002.    v.lower_sub_return = lower_sub_return;
  1003.    v.lower_main_return = lower_main_return;
  1004.  
  1005.    bool progress_ever = false;
  1006.    do {
  1007.       v.progress = false;
  1008.       visit_exec_list(instructions, &v);
  1009.       progress_ever = v.progress || progress_ever;
  1010.    } while (v.progress);
  1011.  
  1012.    return progress_ever;
  1013. }
  1014.