Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Jason Ekstrand (jason@jlekstrand.net)
  25.  *
  26.  */
  27.  
  28. #include "nir.h"
  29.  
  30. /*
  31.  * This pass lowers the neg, abs, and sat operations to source modifiers on
  32.  * ALU operations to make things nicer for the backend.  It's just much
  33.  * easier to not have them when we're doing optimizations.
  34.  */
  35.  
  36. static bool
  37. nir_lower_to_source_mods_block(nir_block *block, void *state)
  38. {
  39.    nir_foreach_instr(block, instr) {
  40.       if (instr->type != nir_instr_type_alu)
  41.          continue;
  42.  
  43.       nir_alu_instr *alu = nir_instr_as_alu(instr);
  44.  
  45.       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
  46.          if (!alu->src[i].src.is_ssa)
  47.             continue;
  48.  
  49.          if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu)
  50.             continue;
  51.  
  52.          nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr);
  53.  
  54.          if (parent->dest.saturate)
  55.             continue;
  56.  
  57.          switch (nir_op_infos[alu->op].input_types[i]) {
  58.          case nir_type_float:
  59.             if (parent->op != nir_op_fmov)
  60.                continue;
  61.             break;
  62.          case nir_type_int:
  63.             if (parent->op != nir_op_imov)
  64.                continue;
  65.             break;
  66.          default:
  67.             continue;
  68.          }
  69.  
  70.          /* We can only do a rewrite if the source we are copying is SSA.
  71.           * Otherwise, moving the read might invalidly reorder reads/writes
  72.           * on a register.
  73.           */
  74.          if (!parent->src[0].src.is_ssa)
  75.             continue;
  76.  
  77.          nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src);
  78.          if (alu->src[i].abs) {
  79.             /* abs trumps both neg and abs, do nothing */
  80.          } else {
  81.             alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate);
  82.             alu->src[i].abs |= parent->src[0].abs;
  83.          }
  84.  
  85.          for (int j = 0; j < 4; ++j) {
  86.             if (!nir_alu_instr_channel_used(alu, i, j))
  87.                continue;
  88.             alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]];
  89.          }
  90.  
  91.          if (list_empty(&parent->dest.dest.ssa.uses) &&
  92.              list_empty(&parent->dest.dest.ssa.if_uses))
  93.             nir_instr_remove(&parent->instr);
  94.       }
  95.  
  96.       switch (alu->op) {
  97.       case nir_op_fsat:
  98.          alu->op = nir_op_fmov;
  99.          alu->dest.saturate = true;
  100.          break;
  101.       case nir_op_ineg:
  102.          alu->op = nir_op_imov;
  103.          alu->src[0].negate = !alu->src[0].negate;
  104.          break;
  105.       case nir_op_fneg:
  106.          alu->op = nir_op_fmov;
  107.          alu->src[0].negate = !alu->src[0].negate;
  108.          break;
  109.       case nir_op_iabs:
  110.          alu->op = nir_op_imov;
  111.          alu->src[0].abs = true;
  112.          alu->src[0].negate = false;
  113.          break;
  114.       case nir_op_fabs:
  115.          alu->op = nir_op_fmov;
  116.          alu->src[0].abs = true;
  117.          alu->src[0].negate = false;
  118.          break;
  119.       default:
  120.          break;
  121.       }
  122.  
  123.       /* We've covered sources.  Now we're going to try and saturate the
  124.        * destination if we can.
  125.        */
  126.  
  127.       if (!alu->dest.dest.is_ssa)
  128.          continue;
  129.  
  130.       /* We can only saturate float destinations */
  131.       if (nir_op_infos[alu->op].output_type != nir_type_float)
  132.          continue;
  133.  
  134.       if (!list_empty(&alu->dest.dest.ssa.if_uses))
  135.          continue;
  136.  
  137.       bool all_children_are_sat = true;
  138.       nir_foreach_use(&alu->dest.dest.ssa, child_src) {
  139.          assert(child_src->is_ssa);
  140.          nir_instr *child = child_src->parent_instr;
  141.          if (child->type != nir_instr_type_alu) {
  142.             all_children_are_sat = false;
  143.             continue;
  144.          }
  145.  
  146.          nir_alu_instr *child_alu = nir_instr_as_alu(child);
  147.          if (child_alu->src[0].negate || child_alu->src[0].abs) {
  148.             all_children_are_sat = false;
  149.             continue;
  150.          }
  151.  
  152.          if (child_alu->op != nir_op_fsat &&
  153.              !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) {
  154.             all_children_are_sat = false;
  155.             continue;
  156.          }
  157.       }
  158.  
  159.       if (!all_children_are_sat)
  160.          continue;
  161.  
  162.       alu->dest.saturate = true;
  163.  
  164.       nir_foreach_use(&alu->dest.dest.ssa, child_src) {
  165.          assert(child_src->is_ssa);
  166.          nir_instr *child = child_src->parent_instr;
  167.          assert(child->type == nir_instr_type_alu);
  168.          nir_alu_instr *child_alu = nir_instr_as_alu(child);
  169.  
  170.          child_alu->op = nir_op_fmov;
  171.          child_alu->dest.saturate = false;
  172.          /* We could propagate the dest of our instruction to the
  173.           * destinations of the uses here.  However, one quick round of
  174.           * copy propagation will clean that all up and then we don't have
  175.           * the complexity.
  176.           */
  177.       }
  178.    }
  179.  
  180.    return true;
  181. }
  182.  
  183. static void
  184. nir_lower_to_source_mods_impl(nir_function_impl *impl)
  185. {
  186.    nir_foreach_block(impl, nir_lower_to_source_mods_block, NULL);
  187. }
  188.  
  189. void
  190. nir_lower_to_source_mods(nir_shader *shader)
  191. {
  192.    nir_foreach_overload(shader, overload) {
  193.       if (overload->impl)
  194.          nir_lower_to_source_mods_impl(overload->impl);
  195.    }
  196. }
  197.