Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * @file brw_vec4_copy_propagation.cpp
  26.  *
  27.  * Implements tracking of values copied between registers, and
  28.  * optimizations based on that: copy propagation and constant
  29.  * propagation.
  30.  */
  31.  
  32. #include "brw_vec4.h"
  33. #include "brw_cfg.h"
  34. extern "C" {
  35. #include "main/macros.h"
  36. }
  37.  
  38. namespace brw {
  39.  
  40. struct copy_entry {
  41.    src_reg *value[4];
  42.    int saturatemask;
  43. };
  44.  
  45. static bool
  46. is_direct_copy(vec4_instruction *inst)
  47. {
  48.    return (inst->opcode == BRW_OPCODE_MOV &&
  49.            !inst->predicate &&
  50.            inst->dst.file == GRF &&
  51.            !inst->dst.reladdr &&
  52.            !inst->src[0].reladdr &&
  53.            (inst->dst.type == inst->src[0].type ||
  54.             (inst->dst.type == BRW_REGISTER_TYPE_F &&
  55.              inst->src[0].type == BRW_REGISTER_TYPE_VF)));
  56. }
  57.  
  58. static bool
  59. is_dominated_by_previous_instruction(vec4_instruction *inst)
  60. {
  61.    return (inst->opcode != BRW_OPCODE_DO &&
  62.            inst->opcode != BRW_OPCODE_WHILE &&
  63.            inst->opcode != BRW_OPCODE_ELSE &&
  64.            inst->opcode != BRW_OPCODE_ENDIF);
  65. }
  66.  
  67. static bool
  68. is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
  69. {
  70.    const src_reg *src = values[ch];
  71.  
  72.    /* consider GRF only */
  73.    assert(inst->dst.file == GRF);
  74.    if (!src || src->file != GRF)
  75.       return false;
  76.  
  77.    return (src->in_range(inst->dst, inst->regs_written) &&
  78.            inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
  79. }
  80.  
  81. static unsigned
  82. swizzle_vf_imm(unsigned vf4, unsigned swizzle)
  83. {
  84.    union {
  85.       unsigned vf4;
  86.       uint8_t vf[4];
  87.    } v = { vf4 }, ret;
  88.  
  89.    ret.vf[0] = v.vf[BRW_GET_SWZ(swizzle, 0)];
  90.    ret.vf[1] = v.vf[BRW_GET_SWZ(swizzle, 1)];
  91.    ret.vf[2] = v.vf[BRW_GET_SWZ(swizzle, 2)];
  92.    ret.vf[3] = v.vf[BRW_GET_SWZ(swizzle, 3)];
  93.  
  94.    return ret.vf4;
  95. }
  96.  
  97. static bool
  98. is_logic_op(enum opcode opcode)
  99. {
  100.    return (opcode == BRW_OPCODE_AND ||
  101.            opcode == BRW_OPCODE_OR  ||
  102.            opcode == BRW_OPCODE_XOR ||
  103.            opcode == BRW_OPCODE_NOT);
  104. }
  105.  
  106. static bool
  107. try_constant_propagate(const struct brw_device_info *devinfo,
  108.                        vec4_instruction *inst,
  109.                        int arg, struct copy_entry *entry)
  110. {
  111.    /* For constant propagation, we only handle the same constant
  112.     * across all 4 channels.  Some day, we should handle the 8-bit
  113.     * float vector format, which would let us constant propagate
  114.     * vectors better.
  115.     */
  116.    src_reg value = *entry->value[0];
  117.    for (int i = 1; i < 4; i++) {
  118.       if (!value.equals(*entry->value[i]))
  119.          return false;
  120.    }
  121.  
  122.    if (value.file != IMM)
  123.       return false;
  124.  
  125.    if (value.type == BRW_REGISTER_TYPE_VF) {
  126.       /* The result of bit-casting the component values of a vector float
  127.        * cannot in general be represented as an immediate.
  128.        */
  129.       if (inst->src[arg].type != BRW_REGISTER_TYPE_F)
  130.          return false;
  131.    } else {
  132.       value.type = inst->src[arg].type;
  133.    }
  134.  
  135.    if (inst->src[arg].abs) {
  136.       if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
  137.           !brw_abs_immediate(value.type, &value.fixed_hw_reg)) {
  138.          return false;
  139.       }
  140.    }
  141.  
  142.    if (inst->src[arg].negate) {
  143.       if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
  144.           !brw_negate_immediate(value.type, &value.fixed_hw_reg)) {
  145.          return false;
  146.       }
  147.    }
  148.  
  149.    if (value.type == BRW_REGISTER_TYPE_VF)
  150.       value.fixed_hw_reg.dw1.ud = swizzle_vf_imm(value.fixed_hw_reg.dw1.ud,
  151.                                                  inst->src[arg].swizzle);
  152.  
  153.    switch (inst->opcode) {
  154.    case BRW_OPCODE_MOV:
  155.    case SHADER_OPCODE_BROADCAST:
  156.       inst->src[arg] = value;
  157.       return true;
  158.  
  159.    case SHADER_OPCODE_POW:
  160.    case SHADER_OPCODE_INT_QUOTIENT:
  161.    case SHADER_OPCODE_INT_REMAINDER:
  162.       if (devinfo->gen < 8)
  163.          break;
  164.       /* fallthrough */
  165.    case BRW_OPCODE_DP2:
  166.    case BRW_OPCODE_DP3:
  167.    case BRW_OPCODE_DP4:
  168.    case BRW_OPCODE_DPH:
  169.    case BRW_OPCODE_BFI1:
  170.    case BRW_OPCODE_ASR:
  171.    case BRW_OPCODE_SHL:
  172.    case BRW_OPCODE_SHR:
  173.    case BRW_OPCODE_SUBB:
  174.       if (arg == 1) {
  175.          inst->src[arg] = value;
  176.          return true;
  177.       }
  178.       break;
  179.  
  180.    case BRW_OPCODE_MACH:
  181.    case BRW_OPCODE_MUL:
  182.    case BRW_OPCODE_ADD:
  183.    case BRW_OPCODE_OR:
  184.    case BRW_OPCODE_AND:
  185.    case BRW_OPCODE_XOR:
  186.    case BRW_OPCODE_ADDC:
  187.       if (arg == 1) {
  188.          inst->src[arg] = value;
  189.          return true;
  190.       } else if (arg == 0 && inst->src[1].file != IMM) {
  191.          /* Fit this constant in by commuting the operands.  Exception: we
  192.           * can't do this for 32-bit integer MUL/MACH because it's asymmetric.
  193.           */
  194.          if ((inst->opcode == BRW_OPCODE_MUL ||
  195.               inst->opcode == BRW_OPCODE_MACH) &&
  196.              (inst->src[1].type == BRW_REGISTER_TYPE_D ||
  197.               inst->src[1].type == BRW_REGISTER_TYPE_UD))
  198.             break;
  199.          inst->src[0] = inst->src[1];
  200.          inst->src[1] = value;
  201.          return true;
  202.       }
  203.       break;
  204.  
  205.    case BRW_OPCODE_CMP:
  206.       if (arg == 1) {
  207.          inst->src[arg] = value;
  208.          return true;
  209.       } else if (arg == 0 && inst->src[1].file != IMM) {
  210.          enum brw_conditional_mod new_cmod;
  211.  
  212.          new_cmod = brw_swap_cmod(inst->conditional_mod);
  213.          if (new_cmod != BRW_CONDITIONAL_NONE) {
  214.             /* Fit this constant in by swapping the operands and
  215.              * flipping the test.
  216.              */
  217.             inst->src[0] = inst->src[1];
  218.             inst->src[1] = value;
  219.             inst->conditional_mod = new_cmod;
  220.             return true;
  221.          }
  222.       }
  223.       break;
  224.  
  225.    case BRW_OPCODE_SEL:
  226.       if (arg == 1) {
  227.          inst->src[arg] = value;
  228.          return true;
  229.       } else if (arg == 0 && inst->src[1].file != IMM) {
  230.          inst->src[0] = inst->src[1];
  231.          inst->src[1] = value;
  232.  
  233.          /* If this was predicated, flipping operands means
  234.           * we also need to flip the predicate.
  235.           */
  236.          if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
  237.             inst->predicate_inverse = !inst->predicate_inverse;
  238.          }
  239.          return true;
  240.       }
  241.       break;
  242.  
  243.    default:
  244.       break;
  245.    }
  246.  
  247.    return false;
  248. }
  249.  
  250. static bool
  251. try_copy_propagate(const struct brw_device_info *devinfo,
  252.                    vec4_instruction *inst,
  253.                    int arg, struct copy_entry *entry)
  254. {
  255.    /* For constant propagation, we only handle the same constant
  256.     * across all 4 channels.  Some day, we should handle the 8-bit
  257.     * float vector format, which would let us constant propagate
  258.     * vectors better.
  259.     */
  260.    src_reg value = *entry->value[0];
  261.    for (int i = 1; i < 4; i++) {
  262.       /* This is equals() except we don't care about the swizzle. */
  263.       if (value.file != entry->value[i]->file ||
  264.           value.reg != entry->value[i]->reg ||
  265.           value.reg_offset != entry->value[i]->reg_offset ||
  266.           value.type != entry->value[i]->type ||
  267.           value.negate != entry->value[i]->negate ||
  268.           value.abs != entry->value[i]->abs) {
  269.          return false;
  270.       }
  271.    }
  272.  
  273.    /* Compute the swizzle of the original register by swizzling the
  274.     * component loaded from each value according to the swizzle of
  275.     * operand we're going to change.
  276.     */
  277.    int s[4];
  278.    for (int i = 0; i < 4; i++) {
  279.       s[i] = BRW_GET_SWZ(entry->value[i]->swizzle, i);
  280.    }
  281.    value.swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
  282.                                        BRW_SWIZZLE4(s[0], s[1], s[2], s[3]));
  283.  
  284.    if (value.file != UNIFORM &&
  285.        value.file != GRF &&
  286.        value.file != ATTR)
  287.       return false;
  288.  
  289.    if (devinfo->gen >= 8 && (value.negate || value.abs) &&
  290.        is_logic_op(inst->opcode)) {
  291.       return false;
  292.    }
  293.  
  294.    if (inst->src[arg].abs) {
  295.       value.negate = false;
  296.       value.abs = true;
  297.    }
  298.    if (inst->src[arg].negate)
  299.       value.negate = !value.negate;
  300.  
  301.    bool has_source_modifiers = value.negate || value.abs;
  302.  
  303.    /* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
  304.     * instructions.
  305.     */
  306.    if ((has_source_modifiers || value.file == UNIFORM ||
  307.         value.swizzle != BRW_SWIZZLE_XYZW) && !inst->can_do_source_mods(devinfo))
  308.       return false;
  309.  
  310.    if (has_source_modifiers && value.type != inst->src[arg].type)
  311.       return false;
  312.  
  313.    if (has_source_modifiers &&
  314.        inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
  315.       return false;
  316.  
  317.    if (inst->is_3src() && value.file == UNIFORM)
  318.       return false;
  319.  
  320.    if (inst->is_send_from_grf())
  321.       return false;
  322.  
  323.    /* we can't generally copy-propagate UD negations becuse we
  324.     * end up accessing the resulting values as signed integers
  325.     * instead. See also resolve_ud_negate().
  326.     */
  327.    if (value.negate &&
  328.        value.type == BRW_REGISTER_TYPE_UD)
  329.       return false;
  330.  
  331.    /* Don't report progress if this is a noop. */
  332.    if (value.equals(inst->src[arg]))
  333.       return false;
  334.  
  335.    const unsigned dst_saturate_mask = inst->dst.writemask &
  336.       brw_apply_swizzle_to_mask(inst->src[arg].swizzle, entry->saturatemask);
  337.  
  338.    if (dst_saturate_mask) {
  339.       /* We either saturate all or nothing. */
  340.       if (dst_saturate_mask != inst->dst.writemask)
  341.          return false;
  342.  
  343.       /* Limit saturate propagation only to SEL with src1 bounded within 0.0
  344.        * and 1.0, otherwise skip copy propagate altogether.
  345.        */
  346.       switch(inst->opcode) {
  347.       case BRW_OPCODE_SEL:
  348.          if (arg != 0 ||
  349.              inst->src[0].type != BRW_REGISTER_TYPE_F ||
  350.              inst->src[1].file != IMM ||
  351.              inst->src[1].type != BRW_REGISTER_TYPE_F ||
  352.              inst->src[1].fixed_hw_reg.dw1.f < 0.0 ||
  353.              inst->src[1].fixed_hw_reg.dw1.f > 1.0) {
  354.             return false;
  355.          }
  356.          if (!inst->saturate)
  357.             inst->saturate = true;
  358.          break;
  359.       default:
  360.          return false;
  361.       }
  362.    }
  363.  
  364.    value.type = inst->src[arg].type;
  365.    inst->src[arg] = value;
  366.    return true;
  367. }
  368.  
  369. bool
  370. vec4_visitor::opt_copy_propagation(bool do_constant_prop)
  371. {
  372.    bool progress = false;
  373.    struct copy_entry entries[alloc.total_size];
  374.  
  375.    memset(&entries, 0, sizeof(entries));
  376.  
  377.    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
  378.       /* This pass only works on basic blocks.  If there's flow
  379.        * control, throw out all our information and start from
  380.        * scratch.
  381.        *
  382.        * This should really be fixed by using a structure like in
  383.        * src/glsl/opt_copy_propagation.cpp to track available copies.
  384.        */
  385.       if (!is_dominated_by_previous_instruction(inst)) {
  386.          memset(&entries, 0, sizeof(entries));
  387.          continue;
  388.       }
  389.  
  390.       /* For each source arg, see if each component comes from a copy
  391.        * from the same type file (IMM, GRF, UNIFORM), and try
  392.        * optimizing out access to the copy result
  393.        */
  394.       for (int i = 2; i >= 0; i--) {
  395.          /* Copied values end up in GRFs, and we don't track reladdr
  396.           * accesses.
  397.           */
  398.          if (inst->src[i].file != GRF ||
  399.              inst->src[i].reladdr)
  400.             continue;
  401.  
  402.          /* We only handle single-register copies. */
  403.          if (inst->regs_read(i) != 1)
  404.             continue;
  405.  
  406.          int reg = (alloc.offsets[inst->src[i].reg] +
  407.                     inst->src[i].reg_offset);
  408.  
  409.          /* Find the regs that each swizzle component came from.
  410.           */
  411.          struct copy_entry entry;
  412.          memset(&entry, 0, sizeof(copy_entry));
  413.          int c;
  414.          for (c = 0; c < 4; c++) {
  415.             int channel = BRW_GET_SWZ(inst->src[i].swizzle, c);
  416.             entry.value[c] = entries[reg].value[channel];
  417.  
  418.             /* If there's no available copy for this channel, bail.
  419.              * We could be more aggressive here -- some channels might
  420.              * not get used based on the destination writemask.
  421.              */
  422.             if (!entry.value[c])
  423.                break;
  424.  
  425.             entry.saturatemask |=
  426.                (entries[reg].saturatemask & (1 << channel) ? 1 : 0) << c;
  427.  
  428.             /* We'll only be able to copy propagate if the sources are
  429.              * all from the same file -- there's no ability to swizzle
  430.              * 0 or 1 constants in with source registers like in i915.
  431.              */
  432.             if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file)
  433.                break;
  434.          }
  435.  
  436.          if (c != 4)
  437.             continue;
  438.  
  439.          if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
  440.             progress = true;
  441.  
  442.          if (try_copy_propagate(devinfo, inst, i, &entry))
  443.             progress = true;
  444.       }
  445.  
  446.       /* Track available source registers. */
  447.       if (inst->dst.file == GRF) {
  448.          const int reg =
  449.             alloc.offsets[inst->dst.reg] + inst->dst.reg_offset;
  450.  
  451.          /* Update our destination's current channel values.  For a direct copy,
  452.           * the value is the newly propagated source.  Otherwise, we don't know
  453.           * the new value, so clear it.
  454.           */
  455.          bool direct_copy = is_direct_copy(inst);
  456.          entries[reg].saturatemask &= ~inst->dst.writemask;
  457.          for (int i = 0; i < 4; i++) {
  458.             if (inst->dst.writemask & (1 << i)) {
  459.                entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
  460.                entries[reg].saturatemask |=
  461.                   inst->saturate && direct_copy ? 1 << i : 0;
  462.             }
  463.          }
  464.  
  465.          /* Clear the records for any registers whose current value came from
  466.           * our destination's updated channels, as the two are no longer equal.
  467.           */
  468.          if (inst->dst.reladdr)
  469.             memset(&entries, 0, sizeof(entries));
  470.          else {
  471.             for (unsigned i = 0; i < alloc.total_size; i++) {
  472.                for (int j = 0; j < 4; j++) {
  473.                   if (is_channel_updated(inst, entries[i].value, j)) {
  474.                      entries[i].value[j] = NULL;
  475.                      entries[i].saturatemask &= ~(1 << j);
  476.                   }
  477.                }
  478.             }
  479.          }
  480.       }
  481.    }
  482.  
  483.    if (progress)
  484.       invalidate_live_intervals();
  485.  
  486.    return progress;
  487. }
  488.  
  489. } /* namespace brw */
  490.