Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. #include "brw_cfg.h"
  29. #include "brw_vec4_live_variables.h"
  30.  
  31. using namespace brw;
  32.  
  33. /** @file brw_vec4_live_variables.cpp
  34.  *
  35.  * Support for computing at the basic block level which variables
  36.  * (virtual GRFs in our case) are live at entry and exit.
  37.  *
  38.  * See Muchnick's Advanced Compiler Design and Implementation, section
  39.  * 14.1 (p444).
  40.  */
  41.  
  42. /**
  43.  * Sets up the use[] and def[] arrays.
  44.  *
  45.  * The basic-block-level live variable analysis needs to know which
  46.  * variables get used before they're completely defined, and which
  47.  * variables are completely defined before they're used.
  48.  *
  49.  * We independently track each channel of a vec4.  This is because we need to
  50.  * be able to recognize a sequence like:
  51.  *
  52.  * ...
  53.  * DP4 tmp.x a b;
  54.  * DP4 tmp.y c d;
  55.  * MUL result.xy tmp.xy e.xy
  56.  * ...
  57.  *
  58.  * as having tmp live only across that sequence (assuming it's used nowhere
  59.  * else), because it's a common pattern.  A more conservative approach that
  60.  * doesn't get tmp marked a deffed in this block will tend to result in
  61.  * spilling.
  62.  */
  63. void
  64. vec4_live_variables::setup_def_use()
  65. {
  66.    int ip = 0;
  67.  
  68.    foreach_block (block, cfg) {
  69.       assert(ip == block->start_ip);
  70.       if (block->num > 0)
  71.          assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);
  72.  
  73.       foreach_inst_in_block(vec4_instruction, inst, block) {
  74.          struct block_data *bd = &block_data[block->num];
  75.  
  76.          /* Set use[] for this instruction */
  77.          for (unsigned int i = 0; i < 3; i++) {
  78.             if (inst->src[i].file == GRF) {
  79.                for (unsigned j = 0; j < inst->regs_read(i); j++) {
  80.                   for (int c = 0; c < 4; c++) {
  81.                      const unsigned v =
  82.                         var_from_reg(alloc, offset(inst->src[i], j), c);
  83.                      if (!BITSET_TEST(bd->def, v))
  84.                         BITSET_SET(bd->use, v);
  85.                   }
  86.                }
  87.             }
  88.          }
  89.          if (inst->reads_flag()) {
  90.             if (!BITSET_TEST(bd->flag_def, 0)) {
  91.                BITSET_SET(bd->flag_use, 0);
  92.             }
  93.          }
  94.  
  95.          /* Check for unconditional writes to whole registers. These
  96.           * are the things that screen off preceding definitions of a
  97.           * variable, and thus qualify for being in def[].
  98.           */
  99.          if (inst->dst.file == GRF && !inst->predicate) {
  100.             for (unsigned i = 0; i < inst->regs_written; i++) {
  101.                for (int c = 0; c < 4; c++) {
  102.                   if (inst->dst.writemask & (1 << c)) {
  103.                      const unsigned v =
  104.                         var_from_reg(alloc, offset(inst->dst, i), c);
  105.                      if (!BITSET_TEST(bd->use, v))
  106.                         BITSET_SET(bd->def, v);
  107.                   }
  108.                }
  109.             }
  110.          }
  111.          if (inst->writes_flag()) {
  112.             if (!BITSET_TEST(bd->flag_use, 0)) {
  113.                BITSET_SET(bd->flag_def, 0);
  114.             }
  115.          }
  116.  
  117.          ip++;
  118.       }
  119.    }
  120. }
  121.  
  122. /**
  123.  * The algorithm incrementally sets bits in liveout and livein,
  124.  * propagating it through control flow.  It will eventually terminate
  125.  * because it only ever adds bits, and stops when no bits are added in
  126.  * a pass.
  127.  */
  128. void
  129. vec4_live_variables::compute_live_variables()
  130. {
  131.    bool cont = true;
  132.  
  133.    while (cont) {
  134.       cont = false;
  135.  
  136.       foreach_block (block, cfg) {
  137.          struct block_data *bd = &block_data[block->num];
  138.  
  139.          /* Update livein */
  140.          for (int i = 0; i < bitset_words; i++) {
  141.             BITSET_WORD new_livein = (bd->use[i] |
  142.                                       (bd->liveout[i] &
  143.                                        ~bd->def[i]));
  144.             if (new_livein & ~bd->livein[i]) {
  145.                bd->livein[i] |= new_livein;
  146.                cont = true;
  147.             }
  148.          }
  149.          BITSET_WORD new_livein = (bd->flag_use[0] |
  150.                                    (bd->flag_liveout[0] &
  151.                                     ~bd->flag_def[0]));
  152.          if (new_livein & ~bd->flag_livein[0]) {
  153.             bd->flag_livein[0] |= new_livein;
  154.             cont = true;
  155.          }
  156.  
  157.          /* Update liveout */
  158.          foreach_list_typed(bblock_link, child_link, link, &block->children) {
  159.             struct block_data *child_bd = &block_data[child_link->block->num];
  160.  
  161.             for (int i = 0; i < bitset_words; i++) {
  162.                BITSET_WORD new_liveout = (child_bd->livein[i] &
  163.                                           ~bd->liveout[i]);
  164.                if (new_liveout) {
  165.                   bd->liveout[i] |= new_liveout;
  166.                   cont = true;
  167.                }
  168.             }
  169.             BITSET_WORD new_liveout = (child_bd->flag_livein[0] &
  170.                                        ~bd->flag_liveout[0]);
  171.             if (new_liveout) {
  172.                bd->flag_liveout[0] |= new_liveout;
  173.                cont = true;
  174.             }
  175.          }
  176.       }
  177.    }
  178. }
  179.  
  180. vec4_live_variables::vec4_live_variables(const simple_allocator &alloc,
  181.                                          cfg_t *cfg)
  182.    : alloc(alloc), cfg(cfg)
  183. {
  184.    mem_ctx = ralloc_context(NULL);
  185.  
  186.    num_vars = alloc.total_size * 4;
  187.    block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
  188.  
  189.    bitset_words = BITSET_WORDS(num_vars);
  190.    for (int i = 0; i < cfg->num_blocks; i++) {
  191.       block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  192.       block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  193.       block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  194.       block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  195.  
  196.       block_data[i].flag_def[0] = 0;
  197.       block_data[i].flag_use[0] = 0;
  198.       block_data[i].flag_livein[0] = 0;
  199.       block_data[i].flag_liveout[0] = 0;
  200.    }
  201.  
  202.    setup_def_use();
  203.    compute_live_variables();
  204. }
  205.  
  206. vec4_live_variables::~vec4_live_variables()
  207. {
  208.    ralloc_free(mem_ctx);
  209. }
  210.  
  211. #define MAX_INSTRUCTION (1 << 30)
  212.  
  213. /**
  214.  * Computes a conservative start/end of the live intervals for each virtual GRF.
  215.  *
  216.  * We could expose per-channel live intervals to the consumer based on the
  217.  * information we computed in vec4_live_variables, except that our only
  218.  * current user is virtual_grf_interferes().  So we instead union the
  219.  * per-channel ranges into a per-vgrf range for virtual_grf_start[] and
  220.  * virtual_grf_end[].
  221.  *
  222.  * We could potentially have virtual_grf_interferes() do the test per-channel,
  223.  * which would let some interesting register allocation occur (particularly on
  224.  * code-generated GLSL sequences from the Cg compiler which does register
  225.  * allocation at the GLSL level and thus reuses components of the variable
  226.  * with distinct lifetimes).  But right now the complexity of doing so doesn't
  227.  * seem worth it, since having virtual_grf_interferes() be cheap is important
  228.  * for register allocation performance.
  229.  */
  230. void
  231. vec4_visitor::calculate_live_intervals()
  232. {
  233.    if (this->live_intervals)
  234.       return;
  235.  
  236.    int *start = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
  237.    int *end = ralloc_array(mem_ctx, int, this->alloc.total_size * 4);
  238.    ralloc_free(this->virtual_grf_start);
  239.    ralloc_free(this->virtual_grf_end);
  240.    this->virtual_grf_start = start;
  241.    this->virtual_grf_end = end;
  242.  
  243.    for (unsigned i = 0; i < this->alloc.total_size * 4; i++) {
  244.       start[i] = MAX_INSTRUCTION;
  245.       end[i] = -1;
  246.    }
  247.  
  248.    /* Start by setting up the intervals with no knowledge of control
  249.     * flow.
  250.     */
  251.    int ip = 0;
  252.    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
  253.       for (unsigned int i = 0; i < 3; i++) {
  254.          if (inst->src[i].file == GRF) {
  255.             for (unsigned j = 0; j < inst->regs_read(i); j++) {
  256.                for (int c = 0; c < 4; c++) {
  257.                   const unsigned v =
  258.                      var_from_reg(alloc, offset(inst->src[i], j), c);
  259.                   start[v] = MIN2(start[v], ip);
  260.                   end[v] = ip;
  261.                }
  262.             }
  263.          }
  264.       }
  265.  
  266.       if (inst->dst.file == GRF) {
  267.          for (unsigned i = 0; i < inst->regs_written; i++) {
  268.             for (int c = 0; c < 4; c++) {
  269.                if (inst->dst.writemask & (1 << c)) {
  270.                   const unsigned v =
  271.                      var_from_reg(alloc, offset(inst->dst, i), c);
  272.                   start[v] = MIN2(start[v], ip);
  273.                   end[v] = ip;
  274.                }
  275.             }
  276.          }
  277.       }
  278.  
  279.       ip++;
  280.    }
  281.  
  282.    /* Now, extend those intervals using our analysis of control flow.
  283.     *
  284.     * The control flow-aware analysis was done at a channel level, while at
  285.     * this point we're distilling it down to vgrfs.
  286.     */
  287.    this->live_intervals = new(mem_ctx) vec4_live_variables(alloc, cfg);
  288.  
  289.    foreach_block (block, cfg) {
  290.       struct block_data *bd = &live_intervals->block_data[block->num];
  291.  
  292.       for (int i = 0; i < live_intervals->num_vars; i++) {
  293.          if (BITSET_TEST(bd->livein, i)) {
  294.             start[i] = MIN2(start[i], block->start_ip);
  295.             end[i] = MAX2(end[i], block->start_ip);
  296.          }
  297.  
  298.          if (BITSET_TEST(bd->liveout, i)) {
  299.             start[i] = MIN2(start[i], block->end_ip);
  300.             end[i] = MAX2(end[i], block->end_ip);
  301.          }
  302.       }
  303.    }
  304. }
  305.  
  306. void
  307. vec4_visitor::invalidate_live_intervals()
  308. {
  309.    ralloc_free(live_intervals);
  310.    live_intervals = NULL;
  311. }
  312.  
  313. int
  314. vec4_visitor::var_range_start(unsigned v, unsigned n) const
  315. {
  316.    int start = INT_MAX;
  317.  
  318.    for (unsigned i = 0; i < n; i++)
  319.       start = MIN2(start, virtual_grf_start[v + i]);
  320.  
  321.    return start;
  322. }
  323.  
  324. int
  325. vec4_visitor::var_range_end(unsigned v, unsigned n) const
  326. {
  327.    int end = INT_MIN;
  328.  
  329.    for (unsigned i = 0; i < n; i++)
  330.       end = MAX2(end, virtual_grf_end[v + i]);
  331.  
  332.    return end;
  333. }
  334.  
  335. bool
  336. vec4_visitor::virtual_grf_interferes(int a, int b)
  337. {
  338.    return !((var_range_end(4 * alloc.offsets[a], 4 * alloc.sizes[a]) <=
  339.              var_range_start(4 * alloc.offsets[b], 4 * alloc.sizes[b])) ||
  340.             (var_range_end(4 * alloc.offsets[b], 4 * alloc.sizes[b]) <=
  341.              var_range_start(4 * alloc.offsets[a], 4 * alloc.sizes[a])));
  342. }
  343.