Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * @file brw_vec4_copy_propagation.cpp
  26.  *
  27.  * Implements tracking of values copied between registers, and
  28.  * optimizations based on that: copy propagation and constant
  29.  * propagation.
  30.  */
  31.  
  32. #include "brw_vec4.h"
  33. extern "C" {
  34. #include "main/macros.h"
  35. }
  36.  
  37. namespace brw {
  38.  
  39. static bool
  40. is_direct_copy(vec4_instruction *inst)
  41. {
  42.    return (inst->opcode == BRW_OPCODE_MOV &&
  43.            !inst->predicate &&
  44.            inst->dst.file == GRF &&
  45.            !inst->saturate &&
  46.            !inst->dst.reladdr &&
  47.            !inst->src[0].reladdr &&
  48.            inst->dst.type == inst->src[0].type);
  49. }
  50.  
  51. static bool
  52. is_dominated_by_previous_instruction(vec4_instruction *inst)
  53. {
  54.    return (inst->opcode != BRW_OPCODE_DO &&
  55.            inst->opcode != BRW_OPCODE_WHILE &&
  56.            inst->opcode != BRW_OPCODE_ELSE &&
  57.            inst->opcode != BRW_OPCODE_ENDIF);
  58. }
  59.  
  60. static bool
  61. try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
  62. {
  63.    /* For constant propagation, we only handle the same constant
  64.     * across all 4 channels.  Some day, we should handle the 8-bit
  65.     * float vector format, which would let us constant propagate
  66.     * vectors better.
  67.     */
  68.    src_reg value = *values[0];
  69.    for (int i = 1; i < 4; i++) {
  70.       if (!value.equals(values[i]))
  71.          return false;
  72.    }
  73.  
  74.    if (value.file != IMM)
  75.       return false;
  76.  
  77.    if (inst->src[arg].abs) {
  78.       if (value.type == BRW_REGISTER_TYPE_F) {
  79.          value.imm.f = fabs(value.imm.f);
  80.       } else if (value.type == BRW_REGISTER_TYPE_D) {
  81.          if (value.imm.i < 0)
  82.             value.imm.i = -value.imm.i;
  83.       }
  84.    }
  85.  
  86.    if (inst->src[arg].negate) {
  87.       if (value.type == BRW_REGISTER_TYPE_F)
  88.          value.imm.f = -value.imm.f;
  89.       else
  90.          value.imm.u = -value.imm.u;
  91.    }
  92.  
  93.    switch (inst->opcode) {
  94.    case BRW_OPCODE_MOV:
  95.       inst->src[arg] = value;
  96.       return true;
  97.  
  98.    case BRW_OPCODE_MACH:
  99.    case BRW_OPCODE_MUL:
  100.    case BRW_OPCODE_ADD:
  101.       if (arg == 1) {
  102.          inst->src[arg] = value;
  103.          return true;
  104.       } else if (arg == 0 && inst->src[1].file != IMM) {
  105.          /* Fit this constant in by commuting the operands.  Exception: we
  106.           * can't do this for 32-bit integer MUL/MACH because it's asymmetric.
  107.           */
  108.          if ((inst->opcode == BRW_OPCODE_MUL ||
  109.               inst->opcode == BRW_OPCODE_MACH) &&
  110.              (inst->src[1].type == BRW_REGISTER_TYPE_D ||
  111.               inst->src[1].type == BRW_REGISTER_TYPE_UD))
  112.             break;
  113.          inst->src[0] = inst->src[1];
  114.          inst->src[1] = value;
  115.          return true;
  116.       }
  117.       break;
  118.  
  119.    case BRW_OPCODE_CMP:
  120.       if (arg == 1) {
  121.          inst->src[arg] = value;
  122.          return true;
  123.       } else if (arg == 0 && inst->src[1].file != IMM) {
  124.          uint32_t new_cmod;
  125.  
  126.          new_cmod = brw_swap_cmod(inst->conditional_mod);
  127.          if (new_cmod != ~0u) {
  128.             /* Fit this constant in by swapping the operands and
  129.              * flipping the test.
  130.              */
  131.             inst->src[0] = inst->src[1];
  132.             inst->src[1] = value;
  133.             inst->conditional_mod = new_cmod;
  134.             return true;
  135.          }
  136.       }
  137.       break;
  138.  
  139.    case BRW_OPCODE_SEL:
  140.       if (arg == 1) {
  141.          inst->src[arg] = value;
  142.          return true;
  143.       } else if (arg == 0 && inst->src[1].file != IMM) {
  144.          inst->src[0] = inst->src[1];
  145.          inst->src[1] = value;
  146.  
  147.          /* If this was predicated, flipping operands means
  148.           * we also need to flip the predicate.
  149.           */
  150.          if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
  151.             inst->predicate_inverse = !inst->predicate_inverse;
  152.          }
  153.          return true;
  154.       }
  155.       break;
  156.  
  157.    default:
  158.       break;
  159.    }
  160.  
  161.    return false;
  162. }
  163.  
  164. bool
  165. vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
  166.                                    src_reg *values[4])
  167. {
  168.    /* For constant propagation, we only handle the same constant
  169.     * across all 4 channels.  Some day, we should handle the 8-bit
  170.     * float vector format, which would let us constant propagate
  171.     * vectors better.
  172.     */
  173.    src_reg value = *values[0];
  174.    for (int i = 1; i < 4; i++) {
  175.       /* This is equals() except we don't care about the swizzle. */
  176.       if (value.file != values[i]->file ||
  177.           value.reg != values[i]->reg ||
  178.           value.reg_offset != values[i]->reg_offset ||
  179.           value.type != values[i]->type ||
  180.           value.negate != values[i]->negate ||
  181.           value.abs != values[i]->abs) {
  182.          return false;
  183.       }
  184.    }
  185.  
  186.    /* Compute the swizzle of the original register by swizzling the
  187.     * component loaded from each value according to the swizzle of
  188.     * operand we're going to change.
  189.     */
  190.    int s[4];
  191.    for (int i = 0; i < 4; i++) {
  192.       s[i] = BRW_GET_SWZ(values[i]->swizzle,
  193.                          BRW_GET_SWZ(inst->src[arg].swizzle, i));
  194.    }
  195.    value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
  196.  
  197.    if (value.file != UNIFORM &&
  198.        value.file != GRF &&
  199.        value.file != ATTR)
  200.       return false;
  201.  
  202.    if (inst->src[arg].abs) {
  203.       value.negate = false;
  204.       value.abs = true;
  205.    }
  206.    if (inst->src[arg].negate)
  207.       value.negate = !value.negate;
  208.  
  209.    bool has_source_modifiers = value.negate || value.abs;
  210.  
  211.    /* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
  212.     * instructions.
  213.     */
  214.    if ((has_source_modifiers || value.file == UNIFORM ||
  215.         value.swizzle != BRW_SWIZZLE_XYZW) && !can_do_source_mods(inst))
  216.       return false;
  217.  
  218.    if (has_source_modifiers && value.type != inst->src[arg].type)
  219.       return false;
  220.  
  221.    bool is_3src_inst = (inst->opcode == BRW_OPCODE_LRP ||
  222.                         inst->opcode == BRW_OPCODE_MAD ||
  223.                         inst->opcode == BRW_OPCODE_BFE ||
  224.                         inst->opcode == BRW_OPCODE_BFI2);
  225.    if (is_3src_inst && value.file == UNIFORM)
  226.       return false;
  227.  
  228.    /* We can't copy-propagate a UD negation into a condmod
  229.     * instruction, because the condmod ends up looking at the 33-bit
  230.     * signed accumulator value instead of the 32-bit value we wanted
  231.     */
  232.    if (inst->conditional_mod &&
  233.        value.negate &&
  234.        value.type == BRW_REGISTER_TYPE_UD)
  235.       return false;
  236.  
  237.    /* Don't report progress if this is a noop. */
  238.    if (value.equals(&inst->src[arg]))
  239.       return false;
  240.  
  241.    value.type = inst->src[arg].type;
  242.    inst->src[arg] = value;
  243.    return true;
  244. }
  245.  
  246. bool
  247. vec4_visitor::opt_copy_propagation()
  248. {
  249.    bool progress = false;
  250.    src_reg *cur_value[virtual_grf_reg_count][4];
  251.  
  252.    memset(&cur_value, 0, sizeof(cur_value));
  253.  
  254.    foreach_list(node, &this->instructions) {
  255.       vec4_instruction *inst = (vec4_instruction *)node;
  256.  
  257.       /* This pass only works on basic blocks.  If there's flow
  258.        * control, throw out all our information and start from
  259.        * scratch.
  260.        *
  261.        * This should really be fixed by using a structure like in
  262.        * src/glsl/opt_copy_propagation.cpp to track available copies.
  263.        */
  264.       if (!is_dominated_by_previous_instruction(inst)) {
  265.          memset(cur_value, 0, sizeof(cur_value));
  266.          continue;
  267.       }
  268.  
  269.       /* For each source arg, see if each component comes from a copy
  270.        * from the same type file (IMM, GRF, UNIFORM), and try
  271.        * optimizing out access to the copy result
  272.        */
  273.       for (int i = 2; i >= 0; i--) {
  274.          /* Copied values end up in GRFs, and we don't track reladdr
  275.           * accesses.
  276.           */
  277.          if (inst->src[i].file != GRF ||
  278.              inst->src[i].reladdr)
  279.             continue;
  280.  
  281.          int reg = (virtual_grf_reg_map[inst->src[i].reg] +
  282.                     inst->src[i].reg_offset);
  283.  
  284.          /* Find the regs that each swizzle component came from.
  285.           */
  286.          src_reg *values[4];
  287.          int c;
  288.          for (c = 0; c < 4; c++) {
  289.             values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
  290.  
  291.             /* If there's no available copy for this channel, bail.
  292.              * We could be more aggressive here -- some channels might
  293.              * not get used based on the destination writemask.
  294.              */
  295.             if (!values[c])
  296.                break;
  297.  
  298.             /* We'll only be able to copy propagate if the sources are
  299.              * all from the same file -- there's no ability to swizzle
  300.              * 0 or 1 constants in with source registers like in i915.
  301.              */
  302.             if (c > 0 && values[c - 1]->file != values[c]->file)
  303.                break;
  304.          }
  305.  
  306.          if (c != 4)
  307.             continue;
  308.  
  309.          if (try_constant_propagation(inst, i, values) ||
  310.              try_copy_propagation(inst, i, values))
  311.             progress = true;
  312.       }
  313.  
  314.       /* Track available source registers. */
  315.       if (inst->dst.file == GRF) {
  316.          const int reg =
  317.             virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
  318.  
  319.          /* Update our destination's current channel values.  For a direct copy,
  320.           * the value is the newly propagated source.  Otherwise, we don't know
  321.           * the new value, so clear it.
  322.           */
  323.          bool direct_copy = is_direct_copy(inst);
  324.          for (int i = 0; i < 4; i++) {
  325.             if (inst->dst.writemask & (1 << i)) {
  326.                cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL;
  327.             }
  328.          }
  329.  
  330.          /* Clear the records for any registers whose current value came from
  331.           * our destination's updated channels, as the two are no longer equal.
  332.           */
  333.          if (inst->dst.reladdr)
  334.             memset(cur_value, 0, sizeof(cur_value));
  335.          else {
  336.             for (int i = 0; i < virtual_grf_reg_count; i++) {
  337.                for (int j = 0; j < 4; j++) {
  338.                   if (inst->dst.writemask & (1 << j) &&
  339.                       cur_value[i][j] &&
  340.                       cur_value[i][j]->file == GRF &&
  341.                       cur_value[i][j]->reg == inst->dst.reg &&
  342.                       cur_value[i][j]->reg_offset == inst->dst.reg_offset) {
  343.                      cur_value[i][j] = NULL;
  344.                   }
  345.                }
  346.             }
  347.          }
  348.       }
  349.    }
  350.  
  351.    if (progress)
  352.       live_intervals_valid = false;
  353.  
  354.    return progress;
  355. }
  356.  
  357. } /* namespace brw */
  358.