Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. #include "brw_cfg.h"
  29. #include "brw_vec4_live_variables.h"
  30.  
  31. using namespace brw;
  32.  
  33. /** @file brw_vec4_live_variables.cpp
  34.  *
  35.  * Support for computing at the basic block level which variables
  36.  * (virtual GRFs in our case) are live at entry and exit.
  37.  *
  38.  * See Muchnik's Advanced Compiler Design and Implementation, section
  39.  * 14.1 (p444).
  40.  */
  41.  
  42. /**
  43.  * Sets up the use[] and def[] arrays.
  44.  *
  45.  * The basic-block-level live variable analysis needs to know which
  46.  * variables get used before they're completely defined, and which
  47.  * variables are completely defined before they're used.
  48.  *
  49.  * We independently track each channel of a vec4.  This is because we need to
  50.  * be able to recognize a sequence like:
  51.  *
  52.  * ...
  53.  * DP4 tmp.x a b;
  54.  * DP4 tmp.y c d;
  55.  * MUL result.xy tmp.xy e.xy
  56.  * ...
  57.  *
  58.  * as having tmp live only across that sequence (assuming it's used nowhere
  59.  * else), because it's a common pattern.  A more conservative approach that
  60.  * doesn't get tmp marked a deffed in this block will tend to result in
  61.  * spilling.
  62.  */
  63. void
  64. vec4_live_variables::setup_def_use()
  65. {
  66.    int ip = 0;
  67.  
  68.    for (int b = 0; b < cfg->num_blocks; b++) {
  69.       bblock_t *block = cfg->blocks[b];
  70.  
  71.       assert(ip == block->start_ip);
  72.       if (b > 0)
  73.          assert(cfg->blocks[b - 1]->end_ip == ip - 1);
  74.  
  75.       for (vec4_instruction *inst = (vec4_instruction *)block->start;
  76.            inst != block->end->next;
  77.            inst = (vec4_instruction *)inst->next) {
  78.  
  79.          /* Set use[] for this instruction */
  80.          for (unsigned int i = 0; i < 3; i++) {
  81.             if (inst->src[i].file == GRF) {
  82.                int reg = inst->src[i].reg;
  83.  
  84.                for (int j = 0; j < 4; j++) {
  85.                   int c = BRW_GET_SWZ(inst->src[i].swizzle, j);
  86.                   if (!bd[b].def[reg * 4 + c])
  87.                      bd[b].use[reg * 4 + c] = true;
  88.                }
  89.             }
  90.          }
  91.  
  92.          /* Check for unconditional writes to whole registers. These
  93.           * are the things that screen off preceding definitions of a
  94.           * variable, and thus qualify for being in def[].
  95.           */
  96.          if (inst->dst.file == GRF &&
  97.              v->virtual_grf_sizes[inst->dst.reg] == 1 &&
  98.              !inst->predicate) {
  99.             for (int c = 0; c < 4; c++) {
  100.                if (inst->dst.writemask & (1 << c)) {
  101.                   int reg = inst->dst.reg;
  102.                   if (!bd[b].use[reg * 4 + c])
  103.                      bd[b].def[reg * 4 + c] = true;
  104.                }
  105.             }
  106.          }
  107.  
  108.          ip++;
  109.       }
  110.    }
  111. }
  112.  
  113. /**
  114.  * The algorithm incrementally sets bits in liveout and livein,
  115.  * propagating it through control flow.  It will eventually terminate
  116.  * because it only ever adds bits, and stops when no bits are added in
  117.  * a pass.
  118.  */
  119. void
  120. vec4_live_variables::compute_live_variables()
  121. {
  122.    bool cont = true;
  123.  
  124.    while (cont) {
  125.       cont = false;
  126.  
  127.       for (int b = 0; b < cfg->num_blocks; b++) {
  128.          /* Update livein */
  129.          for (int i = 0; i < num_vars; i++) {
  130.             if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {
  131.                if (!bd[b].livein[i]) {
  132.                   bd[b].livein[i] = true;
  133.                   cont = true;
  134.                }
  135.             }
  136.          }
  137.  
  138.          /* Update liveout */
  139.          foreach_list(block_node, &cfg->blocks[b]->children) {
  140.             bblock_link *link = (bblock_link *)block_node;
  141.             bblock_t *block = link->block;
  142.  
  143.             for (int i = 0; i < num_vars; i++) {
  144.                if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {
  145.                   bd[b].liveout[i] = true;
  146.                   cont = true;
  147.                }
  148.             }
  149.          }
  150.       }
  151.    }
  152. }
  153.  
  154. vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
  155.    : v(v), cfg(cfg)
  156. {
  157.    mem_ctx = ralloc_context(cfg->mem_ctx);
  158.  
  159.    num_vars = v->virtual_grf_count * 4;
  160.    bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
  161.  
  162.    for (int i = 0; i < cfg->num_blocks; i++) {
  163.       bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);
  164.       bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);
  165.       bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);
  166.       bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);
  167.    }
  168.  
  169.    setup_def_use();
  170.    compute_live_variables();
  171. }
  172.  
  173. vec4_live_variables::~vec4_live_variables()
  174. {
  175.    ralloc_free(mem_ctx);
  176. }
  177.  
  178. #define MAX_INSTRUCTION (1 << 30)
  179.  
  180. /**
  181.  * Computes a conservative start/end of the live intervals for each virtual GRF.
  182.  *
  183.  * We could expose per-channel live intervals to the consumer based on the
  184.  * information we computed in vec4_live_variables, except that our only
  185.  * current user is virtual_grf_interferes().  So we instead union the
  186.  * per-channel ranges into a per-vgrf range for virtual_grf_start[] and
  187.  * virtual_grf_end[].
  188.  *
  189.  * We could potentially have virtual_grf_interferes() do the test per-channel,
  190.  * which would let some interesting register allocation occur (particularly on
  191.  * code-generated GLSL sequences from the Cg compiler which does register
  192.  * allocation at the GLSL level and thus reuses components of the variable
  193.  * with distinct lifetimes).  But right now the complexity of doing so doesn't
  194.  * seem worth it, since having virtual_grf_interferes() be cheap is important
  195.  * for register allocation performance.
  196.  */
  197. void
  198. vec4_visitor::calculate_live_intervals()
  199. {
  200.    if (this->live_intervals_valid)
  201.       return;
  202.  
  203.    int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count);
  204.    int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count);
  205.    ralloc_free(this->virtual_grf_start);
  206.    ralloc_free(this->virtual_grf_end);
  207.    this->virtual_grf_start = start;
  208.    this->virtual_grf_end = end;
  209.  
  210.    for (int i = 0; i < this->virtual_grf_count; i++) {
  211.       start[i] = MAX_INSTRUCTION;
  212.       end[i] = -1;
  213.    }
  214.  
  215.    /* Start by setting up the intervals with no knowledge of control
  216.     * flow.
  217.     */
  218.    int ip = 0;
  219.    foreach_list(node, &this->instructions) {
  220.       vec4_instruction *inst = (vec4_instruction *)node;
  221.  
  222.       for (unsigned int i = 0; i < 3; i++) {
  223.          if (inst->src[i].file == GRF) {
  224.             int reg = inst->src[i].reg;
  225.  
  226.             start[reg] = MIN2(start[reg], ip);
  227.             end[reg] = ip;
  228.          }
  229.       }
  230.  
  231.       if (inst->dst.file == GRF) {
  232.          int reg = inst->dst.reg;
  233.  
  234.          start[reg] = MIN2(start[reg], ip);
  235.          end[reg] = ip;
  236.       }
  237.  
  238.       ip++;
  239.    }
  240.  
  241.    /* Now, extend those intervals using our analysis of control flow.
  242.     *
  243.     * The control flow-aware analysis was done at a channel level, while at
  244.     * this point we're distilling it down to vgrfs.
  245.     */
  246.    cfg_t cfg(this);
  247.    vec4_live_variables livevars(this, &cfg);
  248.  
  249.    for (int b = 0; b < cfg.num_blocks; b++) {
  250.       for (int i = 0; i < livevars.num_vars; i++) {
  251.          if (livevars.bd[b].livein[i]) {
  252.             start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip);
  253.             end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip);
  254.          }
  255.  
  256.          if (livevars.bd[b].liveout[i]) {
  257.             start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->end_ip);
  258.             end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->end_ip);
  259.          }
  260.       }
  261.    }
  262.  
  263.    this->live_intervals_valid = true;
  264. }
  265.  
  266. bool
  267. vec4_visitor::virtual_grf_interferes(int a, int b)
  268. {
  269.    return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
  270.             virtual_grf_end[b] <= virtual_grf_start[a]);
  271. }
  272.