Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. #include "brw_cfg.h"
  29. #include "brw_fs_live_variables.h"
  30.  
  31. using namespace brw;
  32.  
  33. #define MAX_INSTRUCTION (1 << 30)
  34.  
  35. /** @file brw_fs_live_variables.cpp
  36.  *
  37.  * Support for calculating liveness information about virtual GRFs.
  38.  *
  39.  * This produces a live interval for each whole virtual GRF.  We could
  40.  * choose to expose per-component live intervals for VGRFs of size > 1,
  41.  * but we currently do not.  It is easier for the consumers of this
  42.  * information to work with whole VGRFs.
  43.  *
  44.  * However, we internally track use/def information at the per-component
  45.  * (reg_offset) level for greater accuracy.  Large VGRFs may be accessed
  46.  * piecemeal over many (possibly non-adjacent) instructions.  In this case,
  47.  * examining a single instruction is insufficient to decide whether a whole
  48.  * VGRF is ultimately used or defined.  Tracking individual components
  49.  * allows us to easily assemble this information.
  50.  *
  51.  * See Muchnick's Advanced Compiler Design and Implementation, section
  52.  * 14.1 (p444).
  53.  */
  54.  
  55. void
  56. fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst,
  57.                                   int ip, const fs_reg &reg)
  58. {
  59.    int var = var_from_reg(reg);
  60.    assert(var < num_vars);
  61.  
  62.    /* In most cases, a register can be written over safely by the
  63.     * same instruction that is its last use.  For a single
  64.     * instruction, the sources are dereferenced before writing of the
  65.     * destination starts (naturally).  This gets more complicated for
  66.     * simd16, because the instruction:
  67.     *
  68.     * add(16)      g4<1>F      g4<8,8,1>F   g6<8,8,1>F
  69.     *
  70.     * is actually decoded in hardware as:
  71.     *
  72.     * add(8)       g4<1>F      g4<8,8,1>F   g6<8,8,1>F
  73.     * add(8)       g5<1>F      g5<8,8,1>F   g7<8,8,1>F
  74.     *
  75.     * Which is safe.  However, if we have uniform accesses
  76.     * happening, we get into trouble:
  77.     *
  78.     * add(8)       g4<1>F      g4<0,1,0>F   g6<8,8,1>F
  79.     * add(8)       g5<1>F      g4<0,1,0>F   g7<8,8,1>F
  80.     *
  81.     * Now our destination for the first instruction overwrote the
  82.     * second instruction's src0, and we get garbage for those 8
  83.     * pixels.  There's a similar issue for the pre-gen6
  84.     * pixel_x/pixel_y, which are registers of 16-bit values and thus
  85.     * would get stomped by the first decode as well.
  86.     */
  87.    int end_ip = ip;
  88.    if (inst->exec_size == 16 && (reg.stride == 0 ||
  89.                                  reg.type == BRW_REGISTER_TYPE_UW ||
  90.                                  reg.type == BRW_REGISTER_TYPE_W ||
  91.                                  reg.type == BRW_REGISTER_TYPE_UB ||
  92.                                  reg.type == BRW_REGISTER_TYPE_B)) {
  93.       end_ip++;
  94.    }
  95.  
  96.    start[var] = MIN2(start[var], ip);
  97.    end[var] = MAX2(end[var], end_ip);
  98.  
  99.    /* The use[] bitset marks when the block makes use of a variable (VGRF
  100.     * channel) without having completely defined that variable within the
  101.     * block.
  102.     */
  103.    if (!BITSET_TEST(bd->def, var))
  104.       BITSET_SET(bd->use, var);
  105. }
  106.  
  107. void
  108. fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
  109.                                    int ip, const fs_reg &reg)
  110. {
  111.    int var = var_from_reg(reg);
  112.    assert(var < num_vars);
  113.  
  114.    start[var] = MIN2(start[var], ip);
  115.    end[var] = MAX2(end[var], ip);
  116.  
  117.    /* The def[] bitset marks when an initialization in a block completely
  118.     * screens off previous updates of that variable (VGRF channel).
  119.     */
  120.    if (inst->dst.file == GRF && !inst->is_partial_write()) {
  121.       if (!BITSET_TEST(bd->use, var))
  122.          BITSET_SET(bd->def, var);
  123.    }
  124. }
  125.  
  126. /**
  127.  * Sets up the use[] and def[] bitsets.
  128.  *
  129.  * The basic-block-level live variable analysis needs to know which
  130.  * variables get used before they're completely defined, and which
  131.  * variables are completely defined before they're used.
  132.  *
  133.  * These are tracked at the per-component level, rather than whole VGRFs.
  134.  */
  135. void
  136. fs_live_variables::setup_def_use()
  137. {
  138.    int ip = 0;
  139.  
  140.    foreach_block (block, cfg) {
  141.       assert(ip == block->start_ip);
  142.       if (block->num > 0)
  143.          assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);
  144.  
  145.       struct block_data *bd = &block_data[block->num];
  146.  
  147.       foreach_inst_in_block(fs_inst, inst, block) {
  148.          /* Set use[] for this instruction */
  149.          for (unsigned int i = 0; i < inst->sources; i++) {
  150.             fs_reg reg = inst->src[i];
  151.  
  152.             if (reg.file != GRF)
  153.                continue;
  154.  
  155.             for (int j = 0; j < inst->regs_read(i); j++) {
  156.                setup_one_read(bd, inst, ip, reg);
  157.                reg.reg_offset++;
  158.             }
  159.          }
  160.          if (inst->reads_flag()) {
  161.             /* The vertical combination predicates read f0.0 and f0.1. */
  162.             if (inst->predicate == BRW_PREDICATE_ALIGN1_ANYV ||
  163.                 inst->predicate == BRW_PREDICATE_ALIGN1_ALLV) {
  164.                assert(inst->flag_subreg == 0);
  165.                if (!BITSET_TEST(bd->flag_def, 1)) {
  166.                   BITSET_SET(bd->flag_use, 1);
  167.                }
  168.             }
  169.             if (!BITSET_TEST(bd->flag_def, inst->flag_subreg)) {
  170.                BITSET_SET(bd->flag_use, inst->flag_subreg);
  171.             }
  172.          }
  173.  
  174.          /* Set def[] for this instruction */
  175.          if (inst->dst.file == GRF) {
  176.             fs_reg reg = inst->dst;
  177.             for (int j = 0; j < inst->regs_written; j++) {
  178.                setup_one_write(bd, inst, ip, reg);
  179.                reg.reg_offset++;
  180.             }
  181.          }
  182.          if (inst->writes_flag()) {
  183.             if (!BITSET_TEST(bd->flag_use, inst->flag_subreg)) {
  184.                BITSET_SET(bd->flag_def, inst->flag_subreg);
  185.             }
  186.          }
  187.  
  188.          ip++;
  189.       }
  190.    }
  191. }
  192.  
  193. /**
  194.  * The algorithm incrementally sets bits in liveout and livein,
  195.  * propagating it through control flow.  It will eventually terminate
  196.  * because it only ever adds bits, and stops when no bits are added in
  197.  * a pass.
  198.  */
  199. void
  200. fs_live_variables::compute_live_variables()
  201. {
  202.    bool cont = true;
  203.  
  204.    while (cont) {
  205.       cont = false;
  206.  
  207.       foreach_block (block, cfg) {
  208.          struct block_data *bd = &block_data[block->num];
  209.  
  210.          /* Update livein */
  211.          for (int i = 0; i < bitset_words; i++) {
  212.             BITSET_WORD new_livein = (bd->use[i] |
  213.                                       (bd->liveout[i] &
  214.                                        ~bd->def[i]));
  215.             if (new_livein & ~bd->livein[i]) {
  216.                bd->livein[i] |= new_livein;
  217.                cont = true;
  218.             }
  219.          }
  220.          BITSET_WORD new_livein = (bd->flag_use[0] |
  221.                                    (bd->flag_liveout[0] &
  222.                                     ~bd->flag_def[0]));
  223.          if (new_livein & ~bd->flag_livein[0]) {
  224.             bd->flag_livein[0] |= new_livein;
  225.             cont = true;
  226.          }
  227.  
  228.          /* Update liveout */
  229.          foreach_list_typed(bblock_link, child_link, link, &block->children) {
  230.             struct block_data *child_bd = &block_data[child_link->block->num];
  231.  
  232.             for (int i = 0; i < bitset_words; i++) {
  233.                BITSET_WORD new_liveout = (child_bd->livein[i] &
  234.                                           ~bd->liveout[i]);
  235.                if (new_liveout) {
  236.                   bd->liveout[i] |= new_liveout;
  237.                   cont = true;
  238.                }
  239.             }
  240.             BITSET_WORD new_liveout = (child_bd->flag_livein[0] &
  241.                                        ~bd->flag_liveout[0]);
  242.             if (new_liveout) {
  243.                bd->flag_liveout[0] |= new_liveout;
  244.                cont = true;
  245.             }
  246.          }
  247.       }
  248.    }
  249. }
  250.  
  251. /**
  252.  * Extend the start/end ranges for each variable to account for the
  253.  * new information calculated from control flow.
  254.  */
  255. void
  256. fs_live_variables::compute_start_end()
  257. {
  258.    foreach_block (block, cfg) {
  259.       struct block_data *bd = &block_data[block->num];
  260.  
  261.       for (int i = 0; i < num_vars; i++) {
  262.          if (BITSET_TEST(bd->livein, i)) {
  263.             start[i] = MIN2(start[i], block->start_ip);
  264.             end[i] = MAX2(end[i], block->start_ip);
  265.          }
  266.  
  267.          if (BITSET_TEST(bd->liveout, i)) {
  268.             start[i] = MIN2(start[i], block->end_ip);
  269.             end[i] = MAX2(end[i], block->end_ip);
  270.          }
  271.  
  272.       }
  273.    }
  274. }
  275.  
  276. fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg)
  277.    : v(v), cfg(cfg)
  278. {
  279.    mem_ctx = ralloc_context(NULL);
  280.  
  281.    num_vgrfs = v->alloc.count;
  282.    num_vars = 0;
  283.    var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs);
  284.    for (int i = 0; i < num_vgrfs; i++) {
  285.       var_from_vgrf[i] = num_vars;
  286.       num_vars += v->alloc.sizes[i];
  287.    }
  288.  
  289.    vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars);
  290.    for (int i = 0; i < num_vgrfs; i++) {
  291.       for (unsigned j = 0; j < v->alloc.sizes[i]; j++) {
  292.          vgrf_from_var[var_from_vgrf[i] + j] = i;
  293.       }
  294.    }
  295.  
  296.    start = ralloc_array(mem_ctx, int, num_vars);
  297.    end = rzalloc_array(mem_ctx, int, num_vars);
  298.    for (int i = 0; i < num_vars; i++) {
  299.       start[i] = MAX_INSTRUCTION;
  300.       end[i] = -1;
  301.    }
  302.  
  303.    block_data= rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
  304.  
  305.    bitset_words = BITSET_WORDS(num_vars);
  306.    for (int i = 0; i < cfg->num_blocks; i++) {
  307.       block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  308.       block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  309.       block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  310.       block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
  311.  
  312.       block_data[i].flag_def[0] = 0;
  313.       block_data[i].flag_use[0] = 0;
  314.       block_data[i].flag_livein[0] = 0;
  315.       block_data[i].flag_liveout[0] = 0;
  316.    }
  317.  
  318.    setup_def_use();
  319.    compute_live_variables();
  320.    compute_start_end();
  321. }
  322.  
  323. fs_live_variables::~fs_live_variables()
  324. {
  325.    ralloc_free(mem_ctx);
  326. }
  327.  
  328. void
  329. fs_visitor::invalidate_live_intervals()
  330. {
  331.    ralloc_free(live_intervals);
  332.    live_intervals = NULL;
  333. }
  334.  
  335. /**
  336.  * Compute the live intervals for each virtual GRF.
  337.  *
  338.  * This uses the per-component use/def data, but combines it to produce
  339.  * information about whole VGRFs.
  340.  */
  341. void
  342. fs_visitor::calculate_live_intervals()
  343. {
  344.    if (this->live_intervals)
  345.       return;
  346.  
  347.    int num_vgrfs = this->alloc.count;
  348.    ralloc_free(this->virtual_grf_start);
  349.    ralloc_free(this->virtual_grf_end);
  350.    virtual_grf_start = ralloc_array(mem_ctx, int, num_vgrfs);
  351.    virtual_grf_end = ralloc_array(mem_ctx, int, num_vgrfs);
  352.  
  353.    for (int i = 0; i < num_vgrfs; i++) {
  354.       virtual_grf_start[i] = MAX_INSTRUCTION;
  355.       virtual_grf_end[i] = -1;
  356.    }
  357.  
  358.    this->live_intervals = new(mem_ctx) fs_live_variables(this, cfg);
  359.  
  360.    /* Merge the per-component live ranges to whole VGRF live ranges. */
  361.    for (int i = 0; i < live_intervals->num_vars; i++) {
  362.       int vgrf = live_intervals->vgrf_from_var[i];
  363.       virtual_grf_start[vgrf] = MIN2(virtual_grf_start[vgrf],
  364.                                      live_intervals->start[i]);
  365.       virtual_grf_end[vgrf] = MAX2(virtual_grf_end[vgrf],
  366.                                    live_intervals->end[i]);
  367.    }
  368. }
  369.  
  370. bool
  371. fs_live_variables::vars_interfere(int a, int b)
  372. {
  373.    return !(end[b] <= start[a] ||
  374.             end[a] <= start[b]);
  375. }
  376.  
  377. bool
  378. fs_visitor::virtual_grf_interferes(int a, int b)
  379. {
  380.    return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
  381.             virtual_grf_end[b] <= virtual_grf_start[a]);
  382. }
  383.