Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. #include "brw_cfg.h"
  29. #include "brw_fs_live_variables.h"
  30.  
  31. using namespace brw;
  32.  
  33. /** @file brw_fs_live_variables.cpp
  34.  *
  35.  * Support for computing at the basic block level which variables
  36.  * (virtual GRFs in our case) are live at entry and exit.
  37.  *
  38.  * See Muchnik's Advanced Compiler Design and Implementation, section
  39.  * 14.1 (p444).
  40.  */
  41.  
  42. /**
  43.  * Sets up the use[] and def[] bitsets.
  44.  *
  45.  * The basic-block-level live variable analysis needs to know which
  46.  * variables get used before they're completely defined, and which
  47.  * variables are completely defined before they're used.
  48.  */
  49. void
  50. fs_live_variables::setup_def_use()
  51. {
  52.    int ip = 0;
  53.  
  54.    for (int b = 0; b < cfg->num_blocks; b++) {
  55.       bblock_t *block = cfg->blocks[b];
  56.  
  57.       assert(ip == block->start_ip);
  58.       if (b > 0)
  59.          assert(cfg->blocks[b - 1]->end_ip == ip - 1);
  60.  
  61.       for (fs_inst *inst = (fs_inst *)block->start;
  62.            inst != block->end->next;
  63.            inst = (fs_inst *)inst->next) {
  64.  
  65.          /* Set use[] for this instruction */
  66.          for (unsigned int i = 0; i < 3; i++) {
  67.             if (inst->src[i].file == GRF) {
  68.                int reg = inst->src[i].reg;
  69.  
  70.                if (!BITSET_TEST(bd[b].def, reg))
  71.                   BITSET_SET(bd[b].use, reg);
  72.             }
  73.          }
  74.  
  75.          /* Check for unconditional writes to whole registers. These
  76.           * are the things that screen off preceding definitions of a
  77.           * variable, and thus qualify for being in def[].
  78.           */
  79.          if (inst->dst.file == GRF &&
  80.              inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
  81.              !inst->is_partial_write()) {
  82.             int reg = inst->dst.reg;
  83.             if (!BITSET_TEST(bd[b].use, reg))
  84.                BITSET_SET(bd[b].def, reg);
  85.          }
  86.  
  87.          ip++;
  88.       }
  89.    }
  90. }
  91.  
  92. /**
  93.  * The algorithm incrementally sets bits in liveout and livein,
  94.  * propagating it through control flow.  It will eventually terminate
  95.  * because it only ever adds bits, and stops when no bits are added in
  96.  * a pass.
  97.  */
  98. void
  99. fs_live_variables::compute_live_variables()
  100. {
  101.    bool cont = true;
  102.  
  103.    while (cont) {
  104.       cont = false;
  105.  
  106.       for (int b = 0; b < cfg->num_blocks; b++) {
  107.          /* Update livein */
  108.          for (int i = 0; i < bitset_words; i++) {
  109.             BITSET_WORD new_livein = (bd[b].use[i] |
  110.                                       (bd[b].liveout[i] & ~bd[b].def[i]));
  111.             if (new_livein & ~bd[b].livein[i]) {
  112.                bd[b].livein[i] |= new_livein;
  113.                cont = true;
  114.             }
  115.          }
  116.  
  117.          /* Update liveout */
  118.          foreach_list(block_node, &cfg->blocks[b]->children) {
  119.             bblock_link *link = (bblock_link *)block_node;
  120.             bblock_t *block = link->block;
  121.  
  122.             for (int i = 0; i < bitset_words; i++) {
  123.                BITSET_WORD new_liveout = (bd[block->block_num].livein[i] &
  124.                                           ~bd[b].liveout[i]);
  125.                if (new_liveout) {
  126.                   bd[b].liveout[i] |= new_liveout;
  127.                   cont = true;
  128.                }
  129.             }
  130.          }
  131.       }
  132.    }
  133. }
  134.  
  135. fs_live_variables::fs_live_variables(fs_visitor *v, cfg_t *cfg)
  136.    : v(v), cfg(cfg)
  137. {
  138.    mem_ctx = ralloc_context(cfg->mem_ctx);
  139.  
  140.    num_vars = v->virtual_grf_count;
  141.    bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
  142.  
  143.    bitset_words = BITSET_WORDS(v->virtual_grf_count);
  144.    for (int i = 0; i < cfg->num_blocks; i++) {
  145.       bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  146.       bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  147.       bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  148.       bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  149.    }
  150.  
  151.    setup_def_use();
  152.    compute_live_variables();
  153. }
  154.  
  155. fs_live_variables::~fs_live_variables()
  156. {
  157.    ralloc_free(mem_ctx);
  158. }
  159.  
  160. #define MAX_INSTRUCTION (1 << 30)
  161.  
  162. void
  163. fs_visitor::calculate_live_intervals()
  164. {
  165.    int num_vars = this->virtual_grf_count;
  166.  
  167.    if (this->live_intervals_valid)
  168.       return;
  169.  
  170.    int *start = ralloc_array(mem_ctx, int, num_vars);
  171.    int *end = ralloc_array(mem_ctx, int, num_vars);
  172.    ralloc_free(this->virtual_grf_start);
  173.    ralloc_free(this->virtual_grf_end);
  174.    this->virtual_grf_start = start;
  175.    this->virtual_grf_end = end;
  176.  
  177.    for (int i = 0; i < num_vars; i++) {
  178.       start[i] = MAX_INSTRUCTION;
  179.       end[i] = -1;
  180.    }
  181.  
  182.    /* Start by setting up the intervals with no knowledge of control
  183.     * flow.
  184.     */
  185.    int ip = 0;
  186.    foreach_list(node, &this->instructions) {
  187.       fs_inst *inst = (fs_inst *)node;
  188.  
  189.       for (unsigned int i = 0; i < 3; i++) {
  190.          if (inst->src[i].file == GRF) {
  191.             int reg = inst->src[i].reg;
  192.             int end_ip = ip;
  193.  
  194.             /* In most cases, a register can be written over safely by the
  195.              * same instruction that is its last use.  For a single
  196.              * instruction, the sources are dereferenced before writing of the
  197.              * destination starts (naturally).  This gets more complicated for
  198.              * simd16, because the instruction:
  199.              *
  200.              * mov(16)      g4<1>F      g4<8,8,1>F   g6<8,8,1>F
  201.              *
  202.              * is actually decoded in hardware as:
  203.              *
  204.              * mov(8)       g4<1>F      g4<8,8,1>F   g6<8,8,1>F
  205.              * mov(8)       g5<1>F      g5<8,8,1>F   g7<8,8,1>F
  206.              *
  207.              * Which is safe.  However, if we have uniform accesses
  208.              * happening, we get into trouble:
  209.              *
  210.              * mov(8)       g4<1>F      g4<0,1,0>F   g6<8,8,1>F
  211.              * mov(8)       g5<1>F      g4<0,1,0>F   g7<8,8,1>F
  212.              *
  213.              * Now our destination for the first instruction overwrote the
  214.              * second instruction's src0, and we get garbage for those 8
  215.              * pixels.  There's a similar issue for the pre-gen6
  216.              * pixel_x/pixel_y, which are registers of 16-bit values and thus
  217.              * would get stomped by the first decode as well.
  218.              */
  219.             if (dispatch_width == 16 && (inst->src[i].smear >= 0 ||
  220.                                          (this->pixel_x.reg == reg ||
  221.                                           this->pixel_y.reg == reg))) {
  222.                end_ip++;
  223.             }
  224.  
  225.             start[reg] = MIN2(start[reg], ip);
  226.             end[reg] = MAX2(end[reg], end_ip);
  227.          }
  228.       }
  229.  
  230.       if (inst->dst.file == GRF) {
  231.          int reg = inst->dst.reg;
  232.  
  233.          start[reg] = MIN2(start[reg], ip);
  234.          end[reg] = MAX2(end[reg], ip);
  235.       }
  236.  
  237.       ip++;
  238.    }
  239.  
  240.    /* Now, extend those intervals using our analysis of control flow. */
  241.    cfg_t cfg(this);
  242.    fs_live_variables livevars(this, &cfg);
  243.  
  244.    for (int b = 0; b < cfg.num_blocks; b++) {
  245.       for (int i = 0; i < num_vars; i++) {
  246.          if (BITSET_TEST(livevars.bd[b].livein, i)) {
  247.             start[i] = MIN2(start[i], cfg.blocks[b]->start_ip);
  248.             end[i] = MAX2(end[i], cfg.blocks[b]->start_ip);
  249.          }
  250.  
  251.          if (BITSET_TEST(livevars.bd[b].liveout, i)) {
  252.             start[i] = MIN2(start[i], cfg.blocks[b]->end_ip);
  253.             end[i] = MAX2(end[i], cfg.blocks[b]->end_ip);
  254.          }
  255.       }
  256.    }
  257.  
  258.    this->live_intervals_valid = true;
  259. }
  260.  
  261. bool
  262. fs_visitor::virtual_grf_interferes(int a, int b)
  263. {
  264.    return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
  265.             virtual_grf_end[b] <= virtual_grf_start[a]);
  266. }
  267.