Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "brw_fs.h"
  25. #include "brw_cfg.h"
  26.  
  27. /** @file brw_fs_cse.cpp
  28.  *
  29.  * Support for local common subexpression elimination.
  30.  *
  31.  * See Muchnik's Advanced Compiler Design and Implementation, section
  32.  * 13.1 (p378).
  33.  */
  34.  
  35. namespace {
  36. struct aeb_entry : public exec_node {
  37.    /** The instruction that generates the expression value. */
  38.    fs_inst *generator;
  39.  
  40.    /** The temporary where the value is stored. */
  41.    fs_reg tmp;
  42. };
  43. }
  44.  
  45. static bool
  46. is_expression(const fs_inst *const inst)
  47. {
  48.    switch (inst->opcode) {
  49.    case BRW_OPCODE_SEL:
  50.    case BRW_OPCODE_NOT:
  51.    case BRW_OPCODE_AND:
  52.    case BRW_OPCODE_OR:
  53.    case BRW_OPCODE_XOR:
  54.    case BRW_OPCODE_SHR:
  55.    case BRW_OPCODE_SHL:
  56.    case BRW_OPCODE_RSR:
  57.    case BRW_OPCODE_RSL:
  58.    case BRW_OPCODE_ASR:
  59.    case BRW_OPCODE_ADD:
  60.    case BRW_OPCODE_MUL:
  61.    case BRW_OPCODE_FRC:
  62.    case BRW_OPCODE_RNDU:
  63.    case BRW_OPCODE_RNDD:
  64.    case BRW_OPCODE_RNDE:
  65.    case BRW_OPCODE_RNDZ:
  66.    case BRW_OPCODE_LINE:
  67.    case BRW_OPCODE_PLN:
  68.    case BRW_OPCODE_MAD:
  69.    case BRW_OPCODE_LRP:
  70.    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
  71.    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
  72.    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
  73.    case FS_OPCODE_CINTERP:
  74.    case FS_OPCODE_LINTERP:
  75.       return true;
  76.    default:
  77.       return false;
  78.    }
  79. }
  80.  
  81. static bool
  82. operands_match(fs_reg *xs, fs_reg *ys)
  83. {
  84.    return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
  85. }
  86.  
  87. bool
  88. fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
  89. {
  90.    bool progress = false;
  91.  
  92.    void *mem_ctx = ralloc_context(this->mem_ctx);
  93.  
  94.    int ip = block->start_ip;
  95.    for (fs_inst *inst = (fs_inst *)block->start;
  96.         inst != block->end->next;
  97.         inst = (fs_inst *) inst->next) {
  98.  
  99.       /* Skip some cases. */
  100.       if (is_expression(inst) && !inst->is_partial_write() &&
  101.           !inst->conditional_mod)
  102.       {
  103.          bool found = false;
  104.  
  105.          aeb_entry *entry;
  106.          foreach_list(entry_node, aeb) {
  107.             entry = (aeb_entry *) entry_node;
  108.  
  109.             /* Match current instruction's expression against those in AEB. */
  110.             if (inst->opcode == entry->generator->opcode &&
  111.                 inst->saturate == entry->generator->saturate &&
  112.                 inst->dst.type == entry->generator->dst.type &&
  113.                 operands_match(entry->generator->src, inst->src)) {
  114.  
  115.                found = true;
  116.                progress = true;
  117.                break;
  118.             }
  119.          }
  120.  
  121.          if (!found) {
  122.             /* Our first sighting of this expression.  Create an entry. */
  123.             aeb_entry *entry = ralloc(mem_ctx, aeb_entry);
  124.             entry->tmp = reg_undef;
  125.             entry->generator = inst;
  126.             aeb->push_tail(entry);
  127.          } else {
  128.             /* This is at least our second sighting of this expression.
  129.              * If we don't have a temporary already, make one.
  130.              */
  131.             bool no_existing_temp = entry->tmp.file == BAD_FILE;
  132.             if (no_existing_temp) {
  133.                int written = entry->generator->regs_written;
  134.  
  135.                fs_reg orig_dst = entry->generator->dst;
  136.                fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
  137.                                    orig_dst.type);
  138.                entry->tmp = tmp;
  139.                entry->generator->dst = tmp;
  140.  
  141.                for (int i = 0; i < written; i++) {
  142.                   fs_inst *copy = MOV(orig_dst, tmp);
  143.                   copy->force_writemask_all =
  144.                      entry->generator->force_writemask_all;
  145.                   entry->generator->insert_after(copy);
  146.  
  147.                   orig_dst.reg_offset++;
  148.                   tmp.reg_offset++;
  149.                }
  150.             }
  151.  
  152.             /* dest <- temp */
  153.             int written = inst->regs_written;
  154.             assert(written == entry->generator->regs_written);
  155.             assert(inst->dst.type == entry->tmp.type);
  156.             fs_reg dst = inst->dst;
  157.             fs_reg tmp = entry->tmp;
  158.             fs_inst *copy = NULL;
  159.             for (int i = 0; i < written; i++) {
  160.                copy = MOV(dst, tmp);
  161.                copy->force_writemask_all = inst->force_writemask_all;
  162.                inst->insert_before(copy);
  163.  
  164.                dst.reg_offset++;
  165.                tmp.reg_offset++;
  166.             }
  167.             inst->remove();
  168.  
  169.             /* Appending an instruction may have changed our bblock end. */
  170.             if (inst == block->end) {
  171.                block->end = copy;
  172.             }
  173.  
  174.             /* Continue iteration with copy->next */
  175.             inst = copy;
  176.          }
  177.       }
  178.  
  179.       foreach_list_safe(entry_node, aeb) {
  180.          aeb_entry *entry = (aeb_entry *)entry_node;
  181.  
  182.          for (int i = 0; i < 3; i++) {
  183.             fs_reg *src_reg = &entry->generator->src[i];
  184.  
  185.             /* Kill all AEB entries that use the destination we just
  186.              * overwrote.
  187.              */
  188.             if (inst->overwrites_reg(entry->generator->src[i])) {
  189.                entry->remove();
  190.                ralloc_free(entry);
  191.                break;
  192.             }
  193.  
  194.             /* Kill any AEB entries using registers that don't get reused any
  195.              * more -- a sure sign they'll fail operands_match().
  196.              */
  197.             if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
  198.                entry->remove();
  199.                ralloc_free(entry);
  200.                break;
  201.             }
  202.          }
  203.       }
  204.  
  205.       ip++;
  206.    }
  207.  
  208.    ralloc_free(mem_ctx);
  209.  
  210.    if (progress)
  211.       this->live_intervals_valid = false;
  212.  
  213.    return progress;
  214. }
  215.  
  216. bool
  217. fs_visitor::opt_cse()
  218. {
  219.    bool progress = false;
  220.  
  221.    calculate_live_intervals();
  222.  
  223.    cfg_t cfg(this);
  224.  
  225.    for (int b = 0; b < cfg.num_blocks; b++) {
  226.       bblock_t *block = cfg.blocks[b];
  227.       exec_list aeb;
  228.  
  229.       progress = opt_cse_local(block, &aeb) || progress;
  230.    }
  231.  
  232.    return progress;
  233. }
  234.