Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012, 2013, 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "brw_vec4.h"
  25. #include "brw_vec4_live_variables.h"
  26. #include "brw_cfg.h"
  27.  
  28. using namespace brw;
  29.  
  30. /** @file brw_vec4_cse.cpp
  31.  *
  32.  * Support for local common subexpression elimination.
  33.  *
  34.  * See Muchnick's Advanced Compiler Design and Implementation, section
  35.  * 13.1 (p378).
  36.  */
  37.  
  38. namespace {
  39. struct aeb_entry : public exec_node {
  40.    /** The instruction that generates the expression value. */
  41.    vec4_instruction *generator;
  42.  
  43.    /** The temporary where the value is stored. */
  44.    src_reg tmp;
  45. };
  46. }
  47.  
  48. static bool
  49. is_expression(const vec4_instruction *const inst)
  50. {
  51.    switch (inst->opcode) {
  52.    case BRW_OPCODE_MOV:
  53.    case BRW_OPCODE_SEL:
  54.    case BRW_OPCODE_NOT:
  55.    case BRW_OPCODE_AND:
  56.    case BRW_OPCODE_OR:
  57.    case BRW_OPCODE_XOR:
  58.    case BRW_OPCODE_SHR:
  59.    case BRW_OPCODE_SHL:
  60.    case BRW_OPCODE_ASR:
  61.    case BRW_OPCODE_CMP:
  62.    case BRW_OPCODE_CMPN:
  63.    case BRW_OPCODE_ADD:
  64.    case BRW_OPCODE_MUL:
  65.    case BRW_OPCODE_FRC:
  66.    case BRW_OPCODE_RNDU:
  67.    case BRW_OPCODE_RNDD:
  68.    case BRW_OPCODE_RNDE:
  69.    case BRW_OPCODE_RNDZ:
  70.    case BRW_OPCODE_LINE:
  71.    case BRW_OPCODE_PLN:
  72.    case BRW_OPCODE_MAD:
  73.    case BRW_OPCODE_LRP:
  74.    case VEC4_OPCODE_UNPACK_UNIFORM:
  75.    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
  76.    case SHADER_OPCODE_BROADCAST:
  77.       return true;
  78.    case SHADER_OPCODE_RCP:
  79.    case SHADER_OPCODE_RSQ:
  80.    case SHADER_OPCODE_SQRT:
  81.    case SHADER_OPCODE_EXP2:
  82.    case SHADER_OPCODE_LOG2:
  83.    case SHADER_OPCODE_POW:
  84.    case SHADER_OPCODE_INT_QUOTIENT:
  85.    case SHADER_OPCODE_INT_REMAINDER:
  86.    case SHADER_OPCODE_SIN:
  87.    case SHADER_OPCODE_COS:
  88.       return inst->mlen == 0;
  89.    default:
  90.       return false;
  91.    }
  92. }
  93.  
  94. static bool
  95. operands_match(const vec4_instruction *a, const vec4_instruction *b)
  96. {
  97.    const src_reg *xs = a->src;
  98.    const src_reg *ys = b->src;
  99.  
  100.    if (a->opcode == BRW_OPCODE_MAD) {
  101.       return xs[0].equals(ys[0]) &&
  102.              ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||
  103.               (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
  104.    } else if (!a->is_commutative()) {
  105.       return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
  106.    } else {
  107.       return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
  108.              (xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
  109.    }
  110. }
  111.  
  112. static bool
  113. instructions_match(vec4_instruction *a, vec4_instruction *b)
  114. {
  115.    return a->opcode == b->opcode &&
  116.           a->saturate == b->saturate &&
  117.           a->conditional_mod == b->conditional_mod &&
  118.           a->dst.type == b->dst.type &&
  119.           a->dst.writemask == b->dst.writemask &&
  120.           a->force_writemask_all == b->force_writemask_all &&
  121.           a->regs_written == b->regs_written &&
  122.           operands_match(a, b);
  123. }
  124.  
  125. bool
  126. vec4_visitor::opt_cse_local(bblock_t *block)
  127. {
  128.    bool progress = false;
  129.    exec_list aeb;
  130.  
  131.    void *cse_ctx = ralloc_context(NULL);
  132.  
  133.    int ip = block->start_ip;
  134.    foreach_inst_in_block (vec4_instruction, inst, block) {
  135.       /* Skip some cases. */
  136.       if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
  137.           (inst->dst.file != HW_REG || inst->dst.is_null()))
  138.       {
  139.          bool found = false;
  140.  
  141.          foreach_in_list_use_after(aeb_entry, entry, &aeb) {
  142.             /* Match current instruction's expression against those in AEB. */
  143.             if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) &&
  144.                 instructions_match(inst, entry->generator)) {
  145.                found = true;
  146.                progress = true;
  147.                break;
  148.             }
  149.          }
  150.  
  151.          if (!found) {
  152.             if (inst->opcode != BRW_OPCODE_MOV ||
  153.                 (inst->opcode == BRW_OPCODE_MOV &&
  154.                  inst->src[0].file == IMM &&
  155.                  inst->src[0].type == BRW_REGISTER_TYPE_VF)) {
  156.                /* Our first sighting of this expression.  Create an entry. */
  157.                aeb_entry *entry = ralloc(cse_ctx, aeb_entry);
  158.                entry->tmp = src_reg(); /* file will be BAD_FILE */
  159.                entry->generator = inst;
  160.                aeb.push_tail(entry);
  161.             }
  162.          } else {
  163.             /* This is at least our second sighting of this expression.
  164.              * If we don't have a temporary already, make one.
  165.              */
  166.             bool no_existing_temp = entry->tmp.file == BAD_FILE;
  167.             if (no_existing_temp && !entry->generator->dst.is_null()) {
  168.                entry->tmp = retype(src_reg(GRF, alloc.allocate(
  169.                                               entry->generator->regs_written),
  170.                                            NULL), inst->dst.type);
  171.  
  172.                for (unsigned i = 0; i < entry->generator->regs_written; ++i) {
  173.                   vec4_instruction *copy = MOV(offset(entry->generator->dst, i),
  174.                                                offset(entry->tmp, i));
  175.                   copy->force_writemask_all =
  176.                      entry->generator->force_writemask_all;
  177.                   entry->generator->insert_after(block, copy);
  178.                }
  179.  
  180.                entry->generator->dst = dst_reg(entry->tmp);
  181.             }
  182.  
  183.             /* dest <- temp */
  184.             if (!inst->dst.is_null()) {
  185.                assert(inst->dst.type == entry->tmp.type);
  186.  
  187.                for (unsigned i = 0; i < inst->regs_written; ++i) {
  188.                   vec4_instruction *copy = MOV(offset(inst->dst, i),
  189.                                                offset(entry->tmp, i));
  190.                   copy->force_writemask_all = inst->force_writemask_all;
  191.                   inst->insert_before(block, copy);
  192.                }
  193.             }
  194.  
  195.             /* Set our iterator so that next time through the loop inst->next
  196.              * will get the instruction in the basic block after the one we've
  197.              * removed.
  198.              */
  199.             vec4_instruction *prev = (vec4_instruction *)inst->prev;
  200.  
  201.             inst->remove(block);
  202.             inst = prev;
  203.          }
  204.       }
  205.  
  206.       foreach_in_list_safe(aeb_entry, entry, &aeb) {
  207.          /* Kill all AEB entries that write a different value to or read from
  208.           * the flag register if we just wrote it.
  209.           */
  210.          if (inst->writes_flag()) {
  211.             if (entry->generator->reads_flag() ||
  212.                 (entry->generator->writes_flag() &&
  213.                  !instructions_match(inst, entry->generator))) {
  214.                entry->remove();
  215.                ralloc_free(entry);
  216.                continue;
  217.             }
  218.          }
  219.  
  220.          for (int i = 0; i < 3; i++) {
  221.             src_reg *src = &entry->generator->src[i];
  222.  
  223.             /* Kill all AEB entries that use the destination we just
  224.              * overwrote.
  225.              */
  226.             if (inst->dst.file == entry->generator->src[i].file &&
  227.                 inst->dst.reg == entry->generator->src[i].reg) {
  228.                entry->remove();
  229.                ralloc_free(entry);
  230.                break;
  231.             }
  232.  
  233.             /* Kill any AEB entries using registers that don't get reused any
  234.              * more -- a sure sign they'll fail operands_match().
  235.              */
  236.             if (src->file == GRF) {
  237.                if (var_range_end(var_from_reg(alloc, *src), 4) < ip) {
  238.                   entry->remove();
  239.                   ralloc_free(entry);
  240.                   break;
  241.                }
  242.             }
  243.          }
  244.       }
  245.  
  246.       ip++;
  247.    }
  248.  
  249.    ralloc_free(cse_ctx);
  250.  
  251.    return progress;
  252. }
  253.  
  254. bool
  255. vec4_visitor::opt_cse()
  256. {
  257.    bool progress = false;
  258.  
  259.    calculate_live_intervals();
  260.  
  261.    foreach_block (block, cfg) {
  262.       progress = opt_cse_local(block) || progress;
  263.    }
  264.  
  265.    if (progress)
  266.       invalidate_live_intervals();
  267.  
  268.    return progress;
  269. }
  270.