Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Jason Ekstrand (jason@jlekstrand.net)
  25.  *
  26.  */
  27.  
  28. #include "nir.h"
  29.  
  30. /*
  31.  * Implements a small peephole optimization that looks for
  32.  *
  33.  * if (cond) {
  34.  *    <empty>
  35.  * } else {
  36.  *    <empty>
  37.  * }
  38.  * phi
  39.  * ...
  40.  * phi
  41.  *
  42.  * and replaces it with a series of selects.  It can also handle the case
  43.  * where, instead of being empty, the if may contain some move operations
  44.  * whose only use is one of the following phi nodes.  This happens all the
  45.  * time when the SSA form comes from a conditional assignment with a
  46.  * swizzle.
  47.  */
  48.  
  49. struct peephole_select_state {
  50.    void *mem_ctx;
  51.    bool progress;
  52. };
  53.  
  54. static bool
  55. block_check_for_allowed_instrs(nir_block *block)
  56. {
  57.    nir_foreach_instr(block, instr) {
  58.       switch (instr->type) {
  59.       case nir_instr_type_intrinsic: {
  60.          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  61.  
  62.          switch (intrin->intrinsic) {
  63.          case nir_intrinsic_load_var:
  64.             switch (intrin->variables[0]->var->data.mode) {
  65.             case nir_var_shader_in:
  66.             case nir_var_uniform:
  67.                break;
  68.  
  69.             default:
  70.                return false;
  71.             }
  72.             break;
  73.  
  74.          default:
  75.             return false;
  76.          }
  77.  
  78.          break;
  79.       }
  80.  
  81.       case nir_instr_type_load_const:
  82.          break;
  83.  
  84.       case nir_instr_type_alu: {
  85.          /* It must be a move operation */
  86.          nir_alu_instr *mov = nir_instr_as_alu(instr);
  87.          if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
  88.              mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
  89.              mov->op != nir_op_fabs && mov->op != nir_op_iabs)
  90.             return false;
  91.  
  92.          /* Can't handle saturate */
  93.          if (mov->dest.saturate)
  94.             return false;
  95.  
  96.          /* It must be SSA */
  97.          if (!mov->dest.dest.is_ssa)
  98.             return false;
  99.  
  100.          /* It cannot have any if-uses */
  101.          if (!list_empty(&mov->dest.dest.ssa.if_uses))
  102.             return false;
  103.  
  104.          /* The only uses of this definition must be phi's in the successor */
  105.          nir_foreach_use(&mov->dest.dest.ssa, use) {
  106.             if (use->parent_instr->type != nir_instr_type_phi ||
  107.                 use->parent_instr->block != block->successors[0])
  108.                return false;
  109.          }
  110.          break;
  111.       }
  112.  
  113.       default:
  114.          return false;
  115.       }
  116.    }
  117.  
  118.    return true;
  119. }
  120.  
  121. static bool
  122. nir_opt_peephole_select_block(nir_block *block, void *void_state)
  123. {
  124.    struct peephole_select_state *state = void_state;
  125.  
  126.    /* If the block is empty, then it certainly doesn't have any phi nodes,
  127.     * so we can skip it.  This also ensures that we do an early skip on the
  128.     * end block of the function which isn't actually attached to the CFG.
  129.     */
  130.    if (exec_list_is_empty(&block->instr_list))
  131.       return true;
  132.  
  133.    if (nir_cf_node_is_first(&block->cf_node))
  134.       return true;
  135.  
  136.    nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
  137.    if (prev_node->type != nir_cf_node_if)
  138.       return true;
  139.  
  140.    nir_if *if_stmt = nir_cf_node_as_if(prev_node);
  141.    nir_cf_node *then_node = nir_if_first_then_node(if_stmt);
  142.    nir_cf_node *else_node = nir_if_first_else_node(if_stmt);
  143.  
  144.    /* We can only have one block in each side ... */
  145.    if (nir_if_last_then_node(if_stmt) != then_node ||
  146.        nir_if_last_else_node(if_stmt) != else_node)
  147.       return true;
  148.  
  149.    nir_block *then_block = nir_cf_node_as_block(then_node);
  150.    nir_block *else_block = nir_cf_node_as_block(else_node);
  151.  
  152.    /* ... and those blocks must only contain "allowed" instructions. */
  153.    if (!block_check_for_allowed_instrs(then_block) ||
  154.        !block_check_for_allowed_instrs(else_block))
  155.       return true;
  156.  
  157.    /* At this point, we know that the previous CFG node is an if-then
  158.     * statement containing only moves to phi nodes in this block.  We can
  159.     * just remove that entire CF node and replace all of the phi nodes with
  160.     * selects.
  161.     */
  162.  
  163.    nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
  164.    assert(prev_block->cf_node.type == nir_cf_node_block);
  165.  
  166.    /* First, we move the remaining instructions from the blocks to the
  167.     * block before.  We have already guaranteed that this is safe by
  168.     * calling block_check_for_allowed_instrs()
  169.     */
  170.    nir_foreach_instr_safe(then_block, instr) {
  171.       exec_node_remove(&instr->node);
  172.       instr->block = prev_block;
  173.       exec_list_push_tail(&prev_block->instr_list, &instr->node);
  174.    }
  175.  
  176.    nir_foreach_instr_safe(else_block, instr) {
  177.       exec_node_remove(&instr->node);
  178.       instr->block = prev_block;
  179.       exec_list_push_tail(&prev_block->instr_list, &instr->node);
  180.    }
  181.  
  182.    nir_foreach_instr_safe(block, instr) {
  183.       if (instr->type != nir_instr_type_phi)
  184.          break;
  185.  
  186.       nir_phi_instr *phi = nir_instr_as_phi(instr);
  187.       nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
  188.       nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
  189.       /* Splat the condition to all channels */
  190.       memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
  191.  
  192.       assert(exec_list_length(&phi->srcs) == 2);
  193.       nir_foreach_phi_src(phi, src) {
  194.          assert(src->pred == then_block || src->pred == else_block);
  195.          assert(src->src.is_ssa);
  196.  
  197.          unsigned idx = src->pred == then_block ? 1 : 2;
  198.          nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
  199.       }
  200.  
  201.       nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
  202.                         phi->dest.ssa.num_components, phi->dest.ssa.name);
  203.       sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
  204.  
  205.       nir_ssa_def_rewrite_uses(&phi->dest.ssa,
  206.                                nir_src_for_ssa(&sel->dest.dest.ssa),
  207.                                state->mem_ctx);
  208.  
  209.       nir_instr_insert_before(&phi->instr, &sel->instr);
  210.       nir_instr_remove(&phi->instr);
  211.    }
  212.  
  213.    nir_cf_node_remove(&if_stmt->cf_node);
  214.    state->progress = true;
  215.  
  216.    return true;
  217. }
  218.  
  219. static bool
  220. nir_opt_peephole_select_impl(nir_function_impl *impl)
  221. {
  222.    struct peephole_select_state state;
  223.  
  224.    state.mem_ctx = ralloc_parent(impl);
  225.    state.progress = false;
  226.  
  227.    nir_foreach_block(impl, nir_opt_peephole_select_block, &state);
  228.  
  229.    if (state.progress)
  230.       nir_metadata_preserve(impl, nir_metadata_none);
  231.  
  232.    return state.progress;
  233. }
  234.  
  235. bool
  236. nir_opt_peephole_select(nir_shader *shader)
  237. {
  238.    bool progress = false;
  239.  
  240.    nir_foreach_overload(shader, overload) {
  241.       if (overload->impl)
  242.          progress |= nir_opt_peephole_select_impl(overload->impl);
  243.    }
  244.  
  245.    return progress;
  246. }
  247.