Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Luca Barbieri
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * \file lower_jumps.cpp
  26.  *
  27.  * This pass lowers jumps (break, continue, and return) to if/else structures.
  28.  *
  29.  * It can be asked to:
  30.  * 1. Pull jumps out of ifs where possible
  31.  * 2. Remove all "continue"s, replacing them with an "execute flag"
  32.  * 3. Replace all "break" with a single conditional one at the end of the loop
  33.  * 4. Replace all "return"s with a single return at the end of the function,
  34.  *    for the main function and/or other functions
  35.  *
  36.  * Applying this pass gives several benefits:
  37.  * 1. All functions can be inlined.
  38.  * 2. nv40 and other pre-DX10 chips without "continue" can be supported
  39.  * 3. nv30 and other pre-DX10 chips with no control flow at all are better
  40.  *    supported
  41.  *
  42.  * Continues are lowered by adding a per-loop "execute flag", initialized to
  43.  * true, that when cleared inhibits all execution until the end of the loop.
  44.  *
  45.  * Breaks are lowered to continues, plus setting a "break flag" that is checked
  46.  * at the end of the loop, and trigger the unique "break".
  47.  *
  48.  * Returns are lowered to breaks/continues, plus adding a "return flag" that
  49.  * causes loops to break again out of their enclosing loops until all the
  50.  * loops are exited: then the "execute flag" logic will ignore everything
  51.  * until the end of the function.
  52.  *
  53.  * Note that "continue" and "return" can also be implemented by adding
  54.  * a dummy loop and using break.
  55.  * However, this is bad for hardware with limited nesting depth, and
  56.  * prevents further optimization, and thus is not currently performed.
  57.  */
  58.  
  59. #include "glsl_types.h"
  60. #include <string.h>
  61. #include "ir.h"
  62.  
  63. /**
  64.  * Enum recording the result of analyzing how control flow might exit
  65.  * an IR node.
  66.  *
  67.  * Each possible value of jump_strength indicates a strictly stronger
  68.  * guarantee on control flow than the previous value.
  69.  *
  70.  * The ordering of strengths roughly reflects the way jumps are
  71.  * lowered: jumps with higher strength tend to be lowered to jumps of
  72.  * lower strength.  Accordingly, strength is used as a heuristic to
  73.  * determine which lowering to perform first.
  74.  *
  75.  * This enum is also used by get_jump_strength() to categorize
  76.  * instructions as either break, continue, return, or other.  When
  77.  * used in this fashion, strength_always_clears_execute_flag is not
  78.  * used.
  79.  *
  80.  * The control flow analysis made by this optimization pass makes two
  81.  * simplifying assumptions:
  82.  *
  83.  * - It ignores discard instructions, since they are lowered by a
  84.  *   separate pass (lower_discard.cpp).
  85.  *
  86.  * - It assumes it is always possible for control to flow from a loop
  87.  *   to the instruction immediately following it.  Technically, this
  88.  *   is not true (since all execution paths through the loop might
  89.  *   jump back to the top, or return from the function).
  90.  *
  91.  * Both of these simplifying assumtions are safe, since they can never
  92.  * cause reachable code to be incorrectly classified as unreachable;
  93.  * they can only do the opposite.
  94.  */
  95. enum jump_strength
  96. {
  97.    /**
  98.     * Analysis has produced no guarantee on how control flow might
  99.     * exit this IR node.  It might fall out the bottom (with or
  100.     * without clearing the execute flag, if present), or it might
  101.     * continue to the top of the innermost enclosing loop, break out
  102.     * of it, or return from the function.
  103.     */
  104.    strength_none,
  105.  
  106.    /**
  107.     * The only way control can fall out the bottom of this node is
  108.     * through a code path that clears the execute flag.  It might also
  109.     * continue to the top of the innermost enclosing loop, break out
  110.     * of it, or return from the function.
  111.     */
  112.    strength_always_clears_execute_flag,
  113.  
  114.    /**
  115.     * Control cannot fall out the bottom of this node.  It might
  116.     * continue to the top of the innermost enclosing loop, break out
  117.     * of it, or return from the function.
  118.     */
  119.    strength_continue,
  120.  
  121.    /**
  122.     * Control cannot fall out the bottom of this node, or continue the
  123.     * top of the innermost enclosing loop.  It can only break out of
  124.     * it or return from the function.
  125.     */
  126.    strength_break,
  127.  
  128.    /**
  129.     * Control cannot fall out the bottom of this node, continue to the
  130.     * top of the innermost enclosing loop, or break out of it.  It can
  131.     * only return from the function.
  132.     */
  133.    strength_return
  134. };
  135.  
  136. namespace {
  137.  
  138. struct block_record
  139. {
  140.    /* minimum jump strength (of lowered IR, not pre-lowering IR)
  141.     *
  142.     * If the block ends with a jump, must be the strength of the jump.
  143.     * Otherwise, the jump would be dead and have been deleted before)
  144.     *
  145.     * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump
  146.     * (e.g. an if with a return in one branch, and a break in the other, while not lowering them)
  147.     * Note that identical jumps are usually unified though.
  148.     */
  149.    jump_strength min_strength;
  150.  
  151.    /* can anything clear the execute flag? */
  152.    bool may_clear_execute_flag;
  153.  
  154.    block_record()
  155.    {
  156.       this->min_strength = strength_none;
  157.       this->may_clear_execute_flag = false;
  158.    }
  159. };
  160.  
  161. struct loop_record
  162. {
  163.    ir_function_signature* signature;
  164.    ir_loop* loop;
  165.  
  166.    /* used to avoid lowering the break used to represent lowered breaks */
  167.    unsigned nesting_depth;
  168.    bool in_if_at_the_end_of_the_loop;
  169.  
  170.    bool may_set_return_flag;
  171.  
  172.    ir_variable* break_flag;
  173.    ir_variable* execute_flag; /* cleared to emulate continue */
  174.  
  175.    loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0)
  176.    {
  177.       this->signature = p_signature;
  178.       this->loop = p_loop;
  179.       this->nesting_depth = 0;
  180.       this->in_if_at_the_end_of_the_loop = false;
  181.       this->may_set_return_flag = false;
  182.       this->break_flag = 0;
  183.       this->execute_flag = 0;
  184.    }
  185.  
  186.    ir_variable* get_execute_flag()
  187.    {
  188.       /* also supported for the "function loop" */
  189.       if(!this->execute_flag) {
  190.          exec_list& list = this->loop ? this->loop->body_instructions : signature->body;
  191.          this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary);
  192.          list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0));
  193.          list.push_head(this->execute_flag);
  194.       }
  195.       return this->execute_flag;
  196.    }
  197.  
  198.    ir_variable* get_break_flag()
  199.    {
  200.       assert(this->loop);
  201.       if(!this->break_flag) {
  202.          this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary);
  203.          this->loop->insert_before(this->break_flag);
  204.          this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0));
  205.       }
  206.       return this->break_flag;
  207.    }
  208. };
  209.  
  210. struct function_record
  211. {
  212.    ir_function_signature* signature;
  213.    ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */
  214.    ir_variable* return_value;
  215.    bool lower_return;
  216.    unsigned nesting_depth;
  217.  
  218.    function_record(ir_function_signature* p_signature = 0,
  219.                    bool lower_return = false)
  220.    {
  221.       this->signature = p_signature;
  222.       this->return_flag = 0;
  223.       this->return_value = 0;
  224.       this->nesting_depth = 0;
  225.       this->lower_return = lower_return;
  226.    }
  227.  
  228.    ir_variable* get_return_flag()
  229.    {
  230.       if(!this->return_flag) {
  231.          this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary);
  232.          this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0));
  233.          this->signature->body.push_head(this->return_flag);
  234.       }
  235.       return this->return_flag;
  236.    }
  237.  
  238.    ir_variable* get_return_value()
  239.    {
  240.       if(!this->return_value) {
  241.          assert(!this->signature->return_type->is_void());
  242.          return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary);
  243.          this->signature->body.push_head(this->return_value);
  244.       }
  245.       return this->return_value;
  246.    }
  247. };
  248.  
  249. struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
  250.    /* Postconditions: on exit of any visit() function:
  251.     *
  252.     * ANALYSIS: this->block.min_strength,
  253.     * this->block.may_clear_execute_flag, and
  254.     * this->loop.may_set_return_flag are updated to reflect the
  255.     * characteristics of the visited statement.
  256.     *
  257.     * DEAD_CODE_ELIMINATION: If this->block.min_strength is not
  258.     * strength_none, the visited node is at the end of its exec_list.
  259.     * In other words, any unreachable statements that follow the
  260.     * visited statement in its exec_list have been removed.
  261.     *
  262.     * CONTAINED_JUMPS_LOWERED: If the visited statement contains other
  263.     * statements, then should_lower_jump() is false for all of the
  264.     * return, break, or continue statements it contains.
  265.     *
  266.     * Note that visiting a jump does not lower it.  That is the
  267.     * responsibility of the statement (or function signature) that
  268.     * contains the jump.
  269.     */
  270.  
  271.    bool progress;
  272.  
  273.    struct function_record function;
  274.    struct loop_record loop;
  275.    struct block_record block;
  276.  
  277.    bool pull_out_jumps;
  278.    bool lower_continue;
  279.    bool lower_break;
  280.    bool lower_sub_return;
  281.    bool lower_main_return;
  282.  
  283.    ir_lower_jumps_visitor()
  284.       : progress(false),
  285.         pull_out_jumps(false),
  286.         lower_continue(false),
  287.         lower_break(false),
  288.         lower_sub_return(false),
  289.         lower_main_return(false)
  290.    {
  291.    }
  292.  
  293.    void truncate_after_instruction(exec_node *ir)
  294.    {
  295.       if (!ir)
  296.          return;
  297.  
  298.       while (!ir->get_next()->is_tail_sentinel()) {
  299.          ((ir_instruction *)ir->get_next())->remove();
  300.          this->progress = true;
  301.       }
  302.    }
  303.  
  304.    void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block)
  305.    {
  306.       while (!ir->get_next()->is_tail_sentinel()) {
  307.          ir_instruction *move_ir = (ir_instruction *)ir->get_next();
  308.  
  309.          move_ir->remove();
  310.          inner_block->push_tail(move_ir);
  311.       }
  312.    }
  313.  
  314.    /**
  315.     * Insert the instructions necessary to lower a return statement,
  316.     * before the given return instruction.
  317.     */
  318.    void insert_lowered_return(ir_return *ir)
  319.    {
  320.       ir_variable* return_flag = this->function.get_return_flag();
  321.       if(!this->function.signature->return_type->is_void()) {
  322.          ir_variable* return_value = this->function.get_return_value();
  323.          ir->insert_before(
  324.             new(ir) ir_assignment(
  325.                new (ir) ir_dereference_variable(return_value),
  326.                ir->value));
  327.       }
  328.       ir->insert_before(
  329.          new(ir) ir_assignment(
  330.             new (ir) ir_dereference_variable(return_flag),
  331.             new (ir) ir_constant(true)));
  332.       this->loop.may_set_return_flag = true;
  333.    }
  334.  
  335.    /**
  336.     * If the given instruction is a return, lower it to instructions
  337.     * that store the return value (if there is one), set the return
  338.     * flag, and then break.
  339.     *
  340.     * It is safe to pass NULL to this function.
  341.     */
  342.    void lower_return_unconditionally(ir_instruction *ir)
  343.    {
  344.       if (get_jump_strength(ir) != strength_return) {
  345.          return;
  346.       }
  347.       insert_lowered_return((ir_return*)ir);
  348.       ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  349.    }
  350.  
  351.    /**
  352.     * Create the necessary instruction to replace a break instruction.
  353.     */
  354.    ir_instruction *create_lowered_break()
  355.    {
  356.       void *ctx = this->function.signature;
  357.       return new(ctx) ir_assignment(
  358.           new(ctx) ir_dereference_variable(this->loop.get_break_flag()),
  359.           new(ctx) ir_constant(true),
  360.           0);
  361.    }
  362.  
  363.    /**
  364.     * If the given instruction is a break, lower it to an instruction
  365.     * that sets the break flag, without consulting
  366.     * should_lower_jump().
  367.     *
  368.     * It is safe to pass NULL to this function.
  369.     */
  370.    void lower_break_unconditionally(ir_instruction *ir)
  371.    {
  372.       if (get_jump_strength(ir) != strength_break) {
  373.          return;
  374.       }
  375.       ir->replace_with(create_lowered_break());
  376.    }
  377.  
  378.    /**
  379.     * If the block ends in a conditional or unconditional break, lower
  380.     * it, even though should_lower_jump() says it needn't be lowered.
  381.     */
  382.    void lower_final_breaks(exec_list *block)
  383.    {
  384.       ir_instruction *ir = (ir_instruction *) block->get_tail();
  385.       lower_break_unconditionally(ir);
  386.       ir_if *ir_if = ir->as_if();
  387.       if (ir_if) {
  388.           lower_break_unconditionally(
  389.               (ir_instruction *) ir_if->then_instructions.get_tail());
  390.           lower_break_unconditionally(
  391.               (ir_instruction *) ir_if->else_instructions.get_tail());
  392.       }
  393.    }
  394.  
  395.    virtual void visit(class ir_loop_jump * ir)
  396.    {
  397.       /* Eliminate all instructions after each one, since they are
  398.        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
  399.        * postcondition.
  400.        */
  401.       truncate_after_instruction(ir);
  402.  
  403.       /* Set this->block.min_strength based on this instruction.  This
  404.        * satisfies the ANALYSIS postcondition.  It is not necessary to
  405.        * update this->block.may_clear_execute_flag or
  406.        * this->loop.may_set_return_flag, because an unlowered jump
  407.        * instruction can't change any flags.
  408.        */
  409.       this->block.min_strength = ir->is_break() ? strength_break : strength_continue;
  410.  
  411.       /* The CONTAINED_JUMPS_LOWERED postcondition is already
  412.        * satisfied, because jump statements can't contain other
  413.        * statements.
  414.        */
  415.    }
  416.  
  417.    virtual void visit(class ir_return * ir)
  418.    {
  419.       /* Eliminate all instructions after each one, since they are
  420.        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
  421.        * postcondition.
  422.        */
  423.       truncate_after_instruction(ir);
  424.  
  425.       /* Set this->block.min_strength based on this instruction.  This
  426.        * satisfies the ANALYSIS postcondition.  It is not necessary to
  427.        * update this->block.may_clear_execute_flag or
  428.        * this->loop.may_set_return_flag, because an unlowered return
  429.        * instruction can't change any flags.
  430.        */
  431.       this->block.min_strength = strength_return;
  432.  
  433.       /* The CONTAINED_JUMPS_LOWERED postcondition is already
  434.        * satisfied, because jump statements can't contain other
  435.        * statements.
  436.        */
  437.    }
  438.  
  439.    virtual void visit(class ir_discard * ir)
  440.    {
  441.       /* Nothing needs to be done.  The ANALYSIS and
  442.        * DEAD_CODE_ELIMINATION postconditions are already satisfied,
  443.        * because discard statements are ignored by this optimization
  444.        * pass.  The CONTAINED_JUMPS_LOWERED postcondition is already
  445.        * satisfied, because discard statements can't contain other
  446.        * statements.
  447.        */
  448.       (void) ir;
  449.    }
  450.  
  451.    enum jump_strength get_jump_strength(ir_instruction* ir)
  452.    {
  453.       if(!ir)
  454.          return strength_none;
  455.       else if(ir->ir_type == ir_type_loop_jump) {
  456.          if(((ir_loop_jump*)ir)->is_break())
  457.             return strength_break;
  458.          else
  459.             return strength_continue;
  460.       } else if(ir->ir_type == ir_type_return)
  461.          return strength_return;
  462.       else
  463.          return strength_none;
  464.    }
  465.  
  466.    bool should_lower_jump(ir_jump* ir)
  467.    {
  468.       unsigned strength = get_jump_strength(ir);
  469.       bool lower;
  470.       switch(strength)
  471.       {
  472.       case strength_none:
  473.          lower = false; /* don't change this, code relies on it */
  474.          break;
  475.       case strength_continue:
  476.          lower = lower_continue;
  477.          break;
  478.       case strength_break:
  479.          assert(this->loop.loop);
  480.          /* never lower "canonical break" */
  481.          if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0
  482.                || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop)))
  483.             lower = false;
  484.          else
  485.             lower = lower_break;
  486.          break;
  487.       case strength_return:
  488.          /* never lower return at the end of a this->function */
  489.          if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
  490.             lower = false;
  491.          else
  492.             lower = this->function.lower_return;
  493.          break;
  494.       }
  495.       return lower;
  496.    }
  497.  
  498.    block_record visit_block(exec_list* list)
  499.    {
  500.       /* Note: since visiting a node may change that node's next
  501.        * pointer, we can't use visit_exec_list(), because
  502.        * visit_exec_list() caches the node's next pointer before
  503.        * visiting it.  So we use foreach_in_list() instead.
  504.        *
  505.        * foreach_in_list() isn't safe if the node being visited gets
  506.        * removed, but fortunately this visitor doesn't do that.
  507.        */
  508.  
  509.       block_record saved_block = this->block;
  510.       this->block = block_record();
  511.       foreach_in_list(ir_instruction, node, list) {
  512.          node->accept(this);
  513.       }
  514.       block_record ret = this->block;
  515.       this->block = saved_block;
  516.       return ret;
  517.    }
  518.  
  519.    virtual void visit(ir_if *ir)
  520.    {
  521.       if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
  522.          this->loop.in_if_at_the_end_of_the_loop = true;
  523.  
  524.       ++this->function.nesting_depth;
  525.       ++this->loop.nesting_depth;
  526.  
  527.       block_record block_records[2];
  528.       ir_jump* jumps[2];
  529.  
  530.       /* Recursively lower nested jumps.  This satisfies the
  531.        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
  532.        * unconditional jumps at the end of ir->then_instructions and
  533.        * ir->else_instructions, which are handled below.
  534.        */
  535.       block_records[0] = visit_block(&ir->then_instructions);
  536.       block_records[1] = visit_block(&ir->else_instructions);
  537.  
  538. retry: /* we get here if we put code after the if inside a branch */
  539.  
  540.       /* Determine which of ir->then_instructions and
  541.        * ir->else_instructions end with an unconditional jump.
  542.        */
  543.       for(unsigned i = 0; i < 2; ++i) {
  544.          exec_list& list = i ? ir->else_instructions : ir->then_instructions;
  545.          jumps[i] = 0;
  546.          if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail()))
  547.             jumps[i] = (ir_jump*)list.get_tail();
  548.       }
  549.  
  550.       /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED
  551.        * postcondition by lowering jumps in both then_instructions and
  552.        * else_instructions.
  553.        */
  554.       for(;;) {
  555.          /* Determine the types of the jumps that terminate
  556.           * ir->then_instructions and ir->else_instructions.
  557.           */
  558.          jump_strength jump_strengths[2];
  559.  
  560.          for(unsigned i = 0; i < 2; ++i) {
  561.             if(jumps[i]) {
  562.                jump_strengths[i] = block_records[i].min_strength;
  563.                assert(jump_strengths[i] == get_jump_strength(jumps[i]));
  564.             } else
  565.                jump_strengths[i] = strength_none;
  566.          }
  567.  
  568.          /* If both code paths end in a jump, and the jumps are the
  569.           * same, and we are pulling out jumps, replace them with a
  570.           * single jump that comes after the if instruction.  The new
  571.           * jump will be visited next, and it will be lowered if
  572.           * necessary by the loop or conditional that encloses it.
  573.           */
  574.          if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) {
  575.             bool unify = true;
  576.             if(jump_strengths[0] == strength_continue)
  577.                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue));
  578.             else if(jump_strengths[0] == strength_break)
  579.                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  580.             /* FINISHME: unify returns with identical expressions */
  581.             else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void())
  582.                ir->insert_after(new(ir) ir_return(NULL));
  583.             else
  584.                unify = false;
  585.  
  586.             if(unify) {
  587.                jumps[0]->remove();
  588.                jumps[1]->remove();
  589.                this->progress = true;
  590.  
  591.                /* Update jumps[] to reflect the fact that the jumps
  592.                 * are gone, and update block_records[] to reflect the
  593.                 * fact that control can now flow to the next
  594.                 * instruction.
  595.                 */
  596.                jumps[0] = 0;
  597.                jumps[1] = 0;
  598.                block_records[0].min_strength = strength_none;
  599.                block_records[1].min_strength = strength_none;
  600.  
  601.                /* The CONTAINED_JUMPS_LOWERED postcondition is now
  602.                 * satisfied, so we can break out of the loop.
  603.                 */
  604.                break;
  605.             }
  606.          }
  607.  
  608.          /* lower a jump: if both need to lowered, start with the strongest one, so that
  609.           * we might later unify the lowered version with the other one
  610.           */
  611.          bool should_lower[2];
  612.          for(unsigned i = 0; i < 2; ++i)
  613.             should_lower[i] = should_lower_jump(jumps[i]);
  614.  
  615.          int lower;
  616.          if(should_lower[1] && should_lower[0])
  617.             lower = jump_strengths[1] > jump_strengths[0];
  618.          else if(should_lower[0])
  619.             lower = 0;
  620.          else if(should_lower[1])
  621.             lower = 1;
  622.          else
  623.             /* Neither code path ends in a jump that needs to be
  624.              * lowered, so the CONTAINED_JUMPS_LOWERED postcondition
  625.              * is satisfied and we can break out of the loop.
  626.              */
  627.             break;
  628.  
  629.          if(jump_strengths[lower] == strength_return) {
  630.             /* To lower a return, we create a return flag (if the
  631.              * function doesn't have one already) and add instructions
  632.              * that: 1. store the return value (if this function has a
  633.              * non-void return) and 2. set the return flag
  634.              */
  635.             insert_lowered_return((ir_return*)jumps[lower]);
  636.             if(this->loop.loop) {
  637.                /* If we are in a loop, replace the return instruction
  638.                 * with a break instruction, and then loop so that the
  639.                 * break instruction can be lowered if necessary.
  640.                 */
  641.                ir_loop_jump* lowered = 0;
  642.                lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
  643.                /* Note: we must update block_records and jumps to
  644.                 * reflect the fact that the control path has been
  645.                 * altered from a return to a break.
  646.                 */
  647.                block_records[lower].min_strength = strength_break;
  648.                jumps[lower]->replace_with(lowered);
  649.                jumps[lower] = lowered;
  650.             } else {
  651.                /* If we are not in a loop, we then proceed as we would
  652.                 * for a continue statement (set the execute flag to
  653.                 * false to prevent the rest of the function from
  654.                 * executing).
  655.                 */
  656.                goto lower_continue;
  657.             }
  658.             this->progress = true;
  659.          } else if(jump_strengths[lower] == strength_break) {
  660.             /* To lower a break, we create a break flag (if the loop
  661.              * doesn't have one already) and add an instruction that
  662.              * sets it.
  663.              *
  664.              * Then we proceed as we would for a continue statement
  665.              * (set the execute flag to false to prevent the rest of
  666.              * the loop body from executing).
  667.              *
  668.              * The visit() function for the loop will ensure that the
  669.              * break flag is checked after executing the loop body.
  670.              */
  671.             jumps[lower]->insert_before(create_lowered_break());
  672.             goto lower_continue;
  673.          } else if(jump_strengths[lower] == strength_continue) {
  674. lower_continue:
  675.             /* To lower a continue, we create an execute flag (if the
  676.              * loop doesn't have one already) and replace the continue
  677.              * with an instruction that clears it.
  678.              *
  679.              * Note that this code path gets exercised when lowering
  680.              * return statements that are not inside a loop, so
  681.              * this->loop must be initialized even outside of loops.
  682.              */
  683.             ir_variable* execute_flag = this->loop.get_execute_flag();
  684.             jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0));
  685.             /* Note: we must update block_records and jumps to reflect
  686.              * the fact that the control path has been altered to an
  687.              * instruction that clears the execute flag.
  688.              */
  689.             jumps[lower] = 0;
  690.             block_records[lower].min_strength = strength_always_clears_execute_flag;
  691.             block_records[lower].may_clear_execute_flag = true;
  692.             this->progress = true;
  693.  
  694.             /* Let the loop run again, in case the other branch of the
  695.              * if needs to be lowered too.
  696.              */
  697.          }
  698.       }
  699.  
  700.       /* move out a jump out if possible */
  701.       if(pull_out_jumps) {
  702.          /* If one of the branches ends in a jump, and control cannot
  703.           * fall out the bottom of the other branch, then we can move
  704.           * the jump after the if.
  705.           *
  706.           * Set move_out to the branch we are moving a jump out of.
  707.           */
  708.          int move_out = -1;
  709.          if(jumps[0] && block_records[1].min_strength >= strength_continue)
  710.             move_out = 0;
  711.          else if(jumps[1] && block_records[0].min_strength >= strength_continue)
  712.             move_out = 1;
  713.  
  714.          if(move_out >= 0)
  715.          {
  716.             jumps[move_out]->remove();
  717.             ir->insert_after(jumps[move_out]);
  718.             /* Note: we must update block_records and jumps to reflect
  719.              * the fact that the jump has been moved out of the if.
  720.              */
  721.             jumps[move_out] = 0;
  722.             block_records[move_out].min_strength = strength_none;
  723.             this->progress = true;
  724.          }
  725.       }
  726.  
  727.       /* Now satisfy the ANALYSIS postcondition by setting
  728.        * this->block.min_strength and
  729.        * this->block.may_clear_execute_flag based on the
  730.        * characteristics of the two branches.
  731.        */
  732.       if(block_records[0].min_strength < block_records[1].min_strength)
  733.          this->block.min_strength = block_records[0].min_strength;
  734.       else
  735.          this->block.min_strength = block_records[1].min_strength;
  736.       this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag;
  737.  
  738.       /* Now we need to clean up the instructions that follow the
  739.        * if.
  740.        *
  741.        * If those instructions are unreachable, then satisfy the
  742.        * DEAD_CODE_ELIMINATION postcondition by eliminating them.
  743.        * Otherwise that postcondition is already satisfied.
  744.        */
  745.       if(this->block.min_strength)
  746.          truncate_after_instruction(ir);
  747.       else if(this->block.may_clear_execute_flag)
  748.       {
  749.          /* If the "if" instruction might clear the execute flag, then
  750.           * we need to guard any instructions that follow so that they
  751.           * are only executed if the execute flag is set.
  752.           *
  753.           * If one of the branches of the "if" always clears the
  754.           * execute flag, and the other branch never clears it, then
  755.           * this is easy: just move all the instructions following the
  756.           * "if" into the branch that never clears it.
  757.           */
  758.          int move_into = -1;
  759.          if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag)
  760.             move_into = 1;
  761.          else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag)
  762.             move_into = 0;
  763.  
  764.          if(move_into >= 0) {
  765.             assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */
  766.  
  767.             exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions;
  768.             exec_node* next = ir->get_next();
  769.             if(!next->is_tail_sentinel()) {
  770.                move_outer_block_inside(ir, list);
  771.  
  772.                /* If any instructions moved, then we need to visit
  773.                 * them (since they are now inside the "if").  Since
  774.                 * block_records[move_into] is in its default state
  775.                 * (see assertion above), we can safely replace
  776.                 * block_records[move_into] with the result of this
  777.                 * analysis.
  778.                 */
  779.                exec_list list;
  780.                list.head = next;
  781.                block_records[move_into] = visit_block(&list);
  782.  
  783.                /*
  784.                 * Then we need to re-start our jump lowering, since one
  785.                 * of the instructions we moved might be a jump that
  786.                 * needs to be lowered.
  787.                 */
  788.                this->progress = true;
  789.                goto retry;
  790.             }
  791.          } else {
  792.             /* If we get here, then the simple case didn't apply; we
  793.              * need to actually guard the instructions that follow.
  794.              *
  795.              * To avoid creating unnecessarily-deep nesting, first
  796.              * look through the instructions that follow and unwrap
  797.              * any instructions that that are already wrapped in the
  798.              * appropriate guard.
  799.              */
  800.             ir_instruction* ir_after;
  801.             for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();)
  802.             {
  803.                ir_if* ir_if = ir_after->as_if();
  804.                if(ir_if && ir_if->else_instructions.is_empty()) {
  805.                   ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable();
  806.                   if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) {
  807.                      ir_instruction* ir_next = (ir_instruction*)ir_after->get_next();
  808.                      ir_after->insert_before(&ir_if->then_instructions);
  809.                      ir_after->remove();
  810.                      ir_after = ir_next;
  811.                      continue;
  812.                   }
  813.                }
  814.                ir_after = (ir_instruction*)ir_after->get_next();
  815.  
  816.                /* only set this if we find any unprotected instruction */
  817.                this->progress = true;
  818.             }
  819.  
  820.             /* Then, wrap all the instructions that follow in a single
  821.              * guard.
  822.              */
  823.             if(!ir->get_next()->is_tail_sentinel()) {
  824.                assert(this->loop.execute_flag);
  825.                ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag));
  826.                move_outer_block_inside(ir, &if_execute->then_instructions);
  827.                ir->insert_after(if_execute);
  828.             }
  829.          }
  830.       }
  831.       --this->loop.nesting_depth;
  832.       --this->function.nesting_depth;
  833.    }
  834.  
  835.    virtual void visit(ir_loop *ir)
  836.    {
  837.       /* Visit the body of the loop, with a fresh data structure in
  838.        * this->loop so that the analysis we do here won't bleed into
  839.        * enclosing loops.
  840.        *
  841.        * We assume that all code after a loop is reachable from the
  842.        * loop (see comments on enum jump_strength), so the
  843.        * DEAD_CODE_ELIMINATION postcondition is automatically
  844.        * satisfied, as is the block.min_strength portion of the
  845.        * ANALYSIS postcondition.
  846.        *
  847.        * The block.may_clear_execute_flag portion of the ANALYSIS
  848.        * postcondition is automatically satisfied because execute
  849.        * flags do not propagate outside of loops.
  850.        *
  851.        * The loop.may_set_return_flag portion of the ANALYSIS
  852.        * postcondition is handled below.
  853.        */
  854.       ++this->function.nesting_depth;
  855.       loop_record saved_loop = this->loop;
  856.       this->loop = loop_record(this->function.signature, ir);
  857.  
  858.       /* Recursively lower nested jumps.  This satisfies the
  859.        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
  860.        * an unconditional continue or return at the bottom of the
  861.        * loop, which are handled below.
  862.        */
  863.       block_record body = visit_block(&ir->body_instructions);
  864.  
  865.       /* If the loop ends in an unconditional continue, eliminate it
  866.        * because it is redundant.
  867.        */
  868.       ir_instruction *ir_last
  869.          = (ir_instruction *) ir->body_instructions.get_tail();
  870.       if (get_jump_strength(ir_last) == strength_continue) {
  871.          ir_last->remove();
  872.       }
  873.  
  874.       /* If the loop ends in an unconditional return, and we are
  875.        * lowering returns, lower it.
  876.        */
  877.       if (this->function.lower_return)
  878.          lower_return_unconditionally(ir_last);
  879.  
  880.       if(body.min_strength >= strength_break) {
  881.          /* FINISHME: If the min_strength of the loop body is
  882.           * strength_break or strength_return, that means that it
  883.           * isn't a loop at all, since control flow always leaves the
  884.           * body of the loop via break or return.  In principle the
  885.           * loop could be eliminated in this case.  This optimization
  886.           * is not implemented yet.
  887.           */
  888.       }
  889.  
  890.       if(this->loop.break_flag) {
  891.          /* We only get here if we are lowering breaks */
  892.          assert (lower_break);
  893.  
  894.          /* If a break flag was generated while visiting the body of
  895.           * the loop, then at least one break was lowered, so we need
  896.           * to generate an if statement at the end of the loop that
  897.           * does a "break" if the break flag is set.  The break we
  898.           * generate won't violate the CONTAINED_JUMPS_LOWERED
  899.           * postcondition, because should_lower_jump() always returns
  900.           * false for a break that happens at the end of a loop.
  901.           *
  902.           * However, if the loop already ends in a conditional or
  903.           * unconditional break, then we need to lower that break,
  904.           * because it won't be at the end of the loop anymore.
  905.           */
  906.          lower_final_breaks(&ir->body_instructions);
  907.  
  908.          ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag));
  909.          break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  910.          ir->body_instructions.push_tail(break_if);
  911.       }
  912.  
  913.       /* If the body of the loop may set the return flag, then at
  914.        * least one return was lowered to a break, so we need to ensure
  915.        * that the return flag is checked after the body of the loop is
  916.        * executed.
  917.        */
  918.       if(this->loop.may_set_return_flag) {
  919.          assert(this->function.return_flag);
  920.          /* Generate the if statement to check the return flag */
  921.          ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag));
  922.          /* Note: we also need to propagate the knowledge that the
  923.           * return flag may get set to the outer context.  This
  924.           * satisfies the loop.may_set_return_flag part of the
  925.           * ANALYSIS postcondition.
  926.           */
  927.          saved_loop.may_set_return_flag = true;
  928.          if(saved_loop.loop)
  929.             /* If this loop is nested inside another one, then the if
  930.              * statement that we generated should break out of that
  931.              * loop if the return flag is set.  Caller will lower that
  932.              * break statement if necessary.
  933.              */
  934.             return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
  935.          else
  936.             /* Otherwise, all we need to do is ensure that the
  937.              * instructions that follow are only executed if the
  938.              * return flag is clear.  We can do that by moving those
  939.              * instructions into the else clause of the generated if
  940.              * statement.
  941.              */
  942.             move_outer_block_inside(ir, &return_if->else_instructions);
  943.          ir->insert_after(return_if);
  944.       }
  945.  
  946.       this->loop = saved_loop;
  947.       --this->function.nesting_depth;
  948.    }
  949.  
  950.    virtual void visit(ir_function_signature *ir)
  951.    {
  952.       /* these are not strictly necessary */
  953.       assert(!this->function.signature);
  954.       assert(!this->loop.loop);
  955.  
  956.       bool lower_return;
  957.       if (strcmp(ir->function_name(), "main") == 0)
  958.          lower_return = lower_main_return;
  959.       else
  960.          lower_return = lower_sub_return;
  961.  
  962.       function_record saved_function = this->function;
  963.       loop_record saved_loop = this->loop;
  964.       this->function = function_record(ir, lower_return);
  965.       this->loop = loop_record(ir);
  966.  
  967.       assert(!this->loop.loop);
  968.  
  969.       /* Visit the body of the function to lower any jumps that occur
  970.        * in it, except possibly an unconditional return statement at
  971.        * the end of it.
  972.        */
  973.       visit_block(&ir->body);
  974.  
  975.       /* If the body ended in an unconditional return of non-void,
  976.        * then we don't need to lower it because it's the one canonical
  977.        * return.
  978.        *
  979.        * If the body ended in a return of void, eliminate it because
  980.        * it is redundant.
  981.        */
  982.       if (ir->return_type->is_void() &&
  983.           get_jump_strength((ir_instruction *) ir->body.get_tail())) {
  984.          ir_jump *jump = (ir_jump *) ir->body.get_tail();
  985.          assert (jump->ir_type == ir_type_return);
  986.          jump->remove();
  987.       }
  988.  
  989.       if(this->function.return_value)
  990.          ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value)));
  991.  
  992.       this->loop = saved_loop;
  993.       this->function = saved_function;
  994.    }
  995.  
  996.    virtual void visit(class ir_function * ir)
  997.    {
  998.       visit_block(&ir->signatures);
  999.    }
  1000. };
  1001.  
  1002. } /* anonymous namespace */
  1003.  
  1004. bool
  1005. do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break)
  1006. {
  1007.    ir_lower_jumps_visitor v;
  1008.    v.pull_out_jumps = pull_out_jumps;
  1009.    v.lower_continue = lower_continue;
  1010.    v.lower_break = lower_break;
  1011.    v.lower_sub_return = lower_sub_return;
  1012.    v.lower_main_return = lower_main_return;
  1013.  
  1014.    bool progress_ever = false;
  1015.    do {
  1016.       v.progress = false;
  1017.       visit_exec_list(instructions, &v);
  1018.       progress_ever = v.progress || progress_ever;
  1019.    } while (v.progress);
  1020.  
  1021.    return progress_ever;
  1022. }
  1023.