Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Jason Ekstrand (jason@jlekstrand.net)
  25.  *
  26.  */
  27.  
  28. #include "nir.h"
  29.  
  30. /*
  31.  * Implements common subexpression elimination
  32.  */
  33.  
  34. struct cse_state {
  35.    void *mem_ctx;
  36.    bool progress;
  37. };
  38.  
  39. static bool
  40. nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1,
  41.                    unsigned src2)
  42. {
  43.    if (alu1->src[src1].abs != alu2->src[src2].abs ||
  44.        alu1->src[src1].negate != alu2->src[src2].negate)
  45.       return false;
  46.  
  47.    for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
  48.       if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
  49.          return false;
  50.    }
  51.  
  52.    return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
  53. }
  54.  
  55. static bool
  56. nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
  57. {
  58.    if (instr1->type != instr2->type)
  59.       return false;
  60.  
  61.    switch (instr1->type) {
  62.    case nir_instr_type_alu: {
  63.       nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
  64.       nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
  65.  
  66.       if (alu1->op != alu2->op)
  67.          return false;
  68.  
  69.       /* TODO: We can probably acutally do something more inteligent such
  70.        * as allowing different numbers and taking a maximum or something
  71.        * here */
  72.       if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
  73.          return false;
  74.  
  75.       if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
  76.          assert(nir_op_infos[alu1->op].num_inputs == 2);
  77.          return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
  78.                  nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
  79.                 (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
  80.                  nir_alu_srcs_equal(alu1, alu2, 1, 0));
  81.       } else {
  82.          for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
  83.             if (!nir_alu_srcs_equal(alu1, alu2, i, i))
  84.                return false;
  85.          }
  86.       }
  87.       return true;
  88.    }
  89.    case nir_instr_type_tex:
  90.       return false;
  91.    case nir_instr_type_load_const: {
  92.       nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
  93.       nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
  94.  
  95.       if (load1->def.num_components != load2->def.num_components)
  96.          return false;
  97.  
  98.       return memcmp(load1->value.f, load2->value.f,
  99.                     load1->def.num_components * sizeof(*load2->value.f)) == 0;
  100.    }
  101.    case nir_instr_type_phi: {
  102.       nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
  103.       nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
  104.  
  105.       if (phi1->instr.block != phi2->instr.block)
  106.          return false;
  107.  
  108.       nir_foreach_phi_src(phi1, src1) {
  109.          nir_foreach_phi_src(phi2, src2) {
  110.             if (src1->pred == src2->pred) {
  111.                if (!nir_srcs_equal(src1->src, src2->src))
  112.                   return false;
  113.  
  114.                break;
  115.             }
  116.          }
  117.       }
  118.  
  119.       return true;
  120.    }
  121.    case nir_instr_type_intrinsic: {
  122.       nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
  123.       nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
  124.       const nir_intrinsic_info *info =
  125.          &nir_intrinsic_infos[intrinsic1->intrinsic];
  126.  
  127.       if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
  128.           intrinsic1->num_components != intrinsic2->num_components)
  129.          return false;
  130.  
  131.       if (info->has_dest && intrinsic1->dest.ssa.num_components !=
  132.                             intrinsic2->dest.ssa.num_components)
  133.          return false;
  134.  
  135.       for (unsigned i = 0; i < info->num_srcs; i++) {
  136.          if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
  137.             return false;
  138.       }
  139.  
  140.       assert(info->num_variables == 0);
  141.  
  142.       for (unsigned i = 0; i < info->num_indices; i++) {
  143.          if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
  144.             return false;
  145.       }
  146.  
  147.       return true;
  148.    }
  149.    case nir_instr_type_call:
  150.    case nir_instr_type_jump:
  151.    case nir_instr_type_ssa_undef:
  152.    case nir_instr_type_parallel_copy:
  153.    default:
  154.       unreachable("Invalid instruction type");
  155.    }
  156.  
  157.    return false;
  158. }
  159.  
  160. static bool
  161. src_is_ssa(nir_src *src, void *data)
  162. {
  163.    (void) data;
  164.    return src->is_ssa;
  165. }
  166.  
  167. static bool
  168. dest_is_ssa(nir_dest *dest, void *data)
  169. {
  170.    (void) data;
  171.    return dest->is_ssa;
  172. }
  173.  
  174. static bool
  175. nir_instr_can_cse(nir_instr *instr)
  176. {
  177.    /* We only handle SSA. */
  178.    if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
  179.        !nir_foreach_src(instr, src_is_ssa, NULL))
  180.       return false;
  181.  
  182.    switch (instr->type) {
  183.    case nir_instr_type_alu:
  184.    case nir_instr_type_load_const:
  185.    case nir_instr_type_phi:
  186.       return true;
  187.    case nir_instr_type_tex:
  188.       return false; /* TODO */
  189.    case nir_instr_type_intrinsic: {
  190.       const nir_intrinsic_info *info =
  191.          &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
  192.       return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
  193.              (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
  194.              info->num_variables == 0; /* not implemented yet */
  195.    }
  196.    case nir_instr_type_call:
  197.    case nir_instr_type_jump:
  198.    case nir_instr_type_ssa_undef:
  199.       return false;
  200.    case nir_instr_type_parallel_copy:
  201.    default:
  202.       unreachable("Invalid instruction type");
  203.    }
  204.  
  205.    return false;
  206. }
  207.  
  208. static nir_ssa_def *
  209. nir_instr_get_dest_ssa_def(nir_instr *instr)
  210. {
  211.    switch (instr->type) {
  212.    case nir_instr_type_alu:
  213.       assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
  214.       return &nir_instr_as_alu(instr)->dest.dest.ssa;
  215.    case nir_instr_type_load_const:
  216.       return &nir_instr_as_load_const(instr)->def;
  217.    case nir_instr_type_phi:
  218.       assert(nir_instr_as_phi(instr)->dest.is_ssa);
  219.       return &nir_instr_as_phi(instr)->dest.ssa;
  220.    case nir_instr_type_intrinsic:
  221.       assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
  222.       return &nir_instr_as_intrinsic(instr)->dest.ssa;
  223.    default:
  224.       unreachable("We never ask for any of these");
  225.    }
  226. }
  227.  
  228. static void
  229. nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
  230. {
  231.    if (!nir_instr_can_cse(instr))
  232.       return;
  233.  
  234.    for (struct exec_node *node = instr->node.prev;
  235.         !exec_node_is_head_sentinel(node); node = node->prev) {
  236.       nir_instr *other = exec_node_data(nir_instr, node, node);
  237.       if (nir_instrs_equal(instr, other)) {
  238.          nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
  239.          nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
  240.                                   nir_src_for_ssa(other_def),
  241.                                   state->mem_ctx);
  242.          nir_instr_remove(instr);
  243.          state->progress = true;
  244.          return;
  245.       }
  246.    }
  247.  
  248.    for (nir_block *block = instr->block->imm_dom;
  249.         block != NULL; block = block->imm_dom) {
  250.       nir_foreach_instr_reverse(block, other) {
  251.          if (nir_instrs_equal(instr, other)) {
  252.             nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
  253.             nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
  254.                                      nir_src_for_ssa(other_def),
  255.                                      state->mem_ctx);
  256.             nir_instr_remove(instr);
  257.             state->progress = true;
  258.             return;
  259.          }
  260.       }
  261.    }
  262. }
  263.  
  264. static bool
  265. nir_opt_cse_block(nir_block *block, void *void_state)
  266. {
  267.    struct cse_state *state = void_state;
  268.  
  269.    nir_foreach_instr_safe(block, instr)
  270.       nir_opt_cse_instr(instr, state);
  271.  
  272.    return true;
  273. }
  274.  
  275. static bool
  276. nir_opt_cse_impl(nir_function_impl *impl)
  277. {
  278.    struct cse_state state;
  279.  
  280.    state.mem_ctx = ralloc_parent(impl);
  281.    state.progress = false;
  282.  
  283.    nir_metadata_require(impl, nir_metadata_dominance);
  284.  
  285.    nir_foreach_block(impl, nir_opt_cse_block, &state);
  286.  
  287.    if (state.progress)
  288.       nir_metadata_preserve(impl, nir_metadata_block_index |
  289.                                   nir_metadata_dominance);
  290.  
  291.    return state.progress;
  292. }
  293.  
  294. bool
  295. nir_opt_cse(nir_shader *shader)
  296. {
  297.    bool progress = false;
  298.  
  299.    nir_foreach_overload(shader, overload) {
  300.       if (overload->impl)
  301.          progress |= nir_opt_cse_impl(overload->impl);
  302.    }
  303.  
  304.    return progress;
  305. }
  306.