Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. extern "C" {
  25. #include "main/macros.h"
  26. #include "program/register_allocate.h"
  27. } /* extern "C" */
  28.  
  29. #include "brw_vec4.h"
  30.  
  31. using namespace brw;
  32.  
  33. namespace brw {
  34.  
  35. static void
  36. assign(unsigned int *reg_hw_locations, reg *reg)
  37. {
  38.    if (reg->file == GRF) {
  39.       reg->reg = reg_hw_locations[reg->reg];
  40.    }
  41. }
  42.  
  43. bool
  44. vec4_visitor::reg_allocate_trivial()
  45. {
  46.    unsigned int hw_reg_mapping[this->virtual_grf_count];
  47.    bool virtual_grf_used[this->virtual_grf_count];
  48.    int i;
  49.    int next;
  50.  
  51.    /* Calculate which virtual GRFs are actually in use after whatever
  52.     * optimization passes have occurred.
  53.     */
  54.    for (int i = 0; i < this->virtual_grf_count; i++) {
  55.       virtual_grf_used[i] = false;
  56.    }
  57.  
  58.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  59.       vec4_instruction *inst = (vec4_instruction *)iter.get();
  60.  
  61.       if (inst->dst.file == GRF)
  62.          virtual_grf_used[inst->dst.reg] = true;
  63.  
  64.       for (int i = 0; i < 3; i++) {
  65.          if (inst->src[i].file == GRF)
  66.             virtual_grf_used[inst->src[i].reg] = true;
  67.       }
  68.    }
  69.  
  70.    hw_reg_mapping[0] = this->first_non_payload_grf;
  71.    next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
  72.    for (i = 1; i < this->virtual_grf_count; i++) {
  73.       if (virtual_grf_used[i]) {
  74.          hw_reg_mapping[i] = next;
  75.          next += this->virtual_grf_sizes[i];
  76.       }
  77.    }
  78.    prog_data->total_grf = next;
  79.  
  80.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  81.       vec4_instruction *inst = (vec4_instruction *)iter.get();
  82.  
  83.       assign(hw_reg_mapping, &inst->dst);
  84.       assign(hw_reg_mapping, &inst->src[0]);
  85.       assign(hw_reg_mapping, &inst->src[1]);
  86.       assign(hw_reg_mapping, &inst->src[2]);
  87.    }
  88.  
  89.    if (prog_data->total_grf > max_grf) {
  90.       fail("Ran out of regs on trivial allocator (%d/%d)\n",
  91.            prog_data->total_grf, max_grf);
  92.       return false;
  93.    }
  94.  
  95.    return true;
  96. }
  97.  
  98. static void
  99. brw_alloc_reg_set_for_classes(struct brw_context *brw,
  100.                               int *class_sizes,
  101.                               int class_count,
  102.                               int base_reg_count)
  103. {
  104.    /* Compute the total number of registers across all classes. */
  105.    int ra_reg_count = 0;
  106.    for (int i = 0; i < class_count; i++) {
  107.       ra_reg_count += base_reg_count - (class_sizes[i] - 1);
  108.    }
  109.  
  110.    ralloc_free(brw->vs.ra_reg_to_grf);
  111.    brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
  112.    ralloc_free(brw->vs.regs);
  113.    brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count);
  114.    if (brw->gen >= 6)
  115.       ra_set_allocate_round_robin(brw->vs.regs);
  116.    ralloc_free(brw->vs.classes);
  117.    brw->vs.classes = ralloc_array(brw, int, class_count + 1);
  118.  
  119.    /* Now, add the registers to their classes, and add the conflicts
  120.     * between them and the base GRF registers (and also each other).
  121.     */
  122.    int reg = 0;
  123.    for (int i = 0; i < class_count; i++) {
  124.       int class_reg_count = base_reg_count - (class_sizes[i] - 1);
  125.       brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
  126.  
  127.       for (int j = 0; j < class_reg_count; j++) {
  128.          ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
  129.  
  130.          brw->vs.ra_reg_to_grf[reg] = j;
  131.  
  132.          for (int base_reg = j;
  133.               base_reg < j + class_sizes[i];
  134.               base_reg++) {
  135.             ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
  136.          }
  137.  
  138.          reg++;
  139.       }
  140.    }
  141.    assert(reg == ra_reg_count);
  142.  
  143.    ra_set_finalize(brw->vs.regs, NULL);
  144. }
  145.  
  146. bool
  147. vec4_visitor::reg_allocate()
  148. {
  149.    unsigned int hw_reg_mapping[virtual_grf_count];
  150.    int first_assigned_grf = this->first_non_payload_grf;
  151.    int base_reg_count = max_grf - first_assigned_grf;
  152.    int class_sizes[base_reg_count];
  153.    int class_count = 0;
  154.  
  155.    /* Using the trivial allocator can be useful in debugging undefined
  156.     * register access as a result of broken optimization passes.
  157.     */
  158.    if (0)
  159.       return reg_allocate_trivial();
  160.  
  161.    calculate_live_intervals();
  162.  
  163.    /* Set up the register classes.
  164.     *
  165.     * The base registers store a vec4.  However, we'll need larger
  166.     * storage for arrays, structures, and matrices, which will be sets
  167.     * of contiguous registers.
  168.     */
  169.    class_sizes[class_count++] = 1;
  170.  
  171.    for (int r = 0; r < virtual_grf_count; r++) {
  172.       int i;
  173.  
  174.       for (i = 0; i < class_count; i++) {
  175.          if (class_sizes[i] == this->virtual_grf_sizes[r])
  176.             break;
  177.       }
  178.       if (i == class_count) {
  179.          if (this->virtual_grf_sizes[r] >= base_reg_count) {
  180.             fail("Object too large to register allocate.\n");
  181.          }
  182.  
  183.          class_sizes[class_count++] = this->virtual_grf_sizes[r];
  184.       }
  185.    }
  186.  
  187.    brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
  188.  
  189.    struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
  190.                                                     virtual_grf_count);
  191.  
  192.    for (int i = 0; i < virtual_grf_count; i++) {
  193.       for (int c = 0; c < class_count; c++) {
  194.          if (class_sizes[c] == this->virtual_grf_sizes[i]) {
  195.             ra_set_node_class(g, i, brw->vs.classes[c]);
  196.             break;
  197.          }
  198.       }
  199.  
  200.       for (int j = 0; j < i; j++) {
  201.          if (virtual_grf_interferes(i, j)) {
  202.             ra_add_node_interference(g, i, j);
  203.          }
  204.       }
  205.    }
  206.  
  207.    if (!ra_allocate_no_spills(g)) {
  208.       /* Failed to allocate registers.  Spill a reg, and the caller will
  209.        * loop back into here to try again.
  210.        */
  211.       int reg = choose_spill_reg(g);
  212.       if (reg == -1) {
  213.          fail("no register to spill\n");
  214.       } else {
  215.          spill_reg(reg);
  216.       }
  217.       ralloc_free(g);
  218.       return false;
  219.    }
  220.  
  221.    /* Get the chosen virtual registers for each node, and map virtual
  222.     * regs in the register classes back down to real hardware reg
  223.     * numbers.
  224.     */
  225.    prog_data->total_grf = first_assigned_grf;
  226.    for (int i = 0; i < virtual_grf_count; i++) {
  227.       int reg = ra_get_node_reg(g, i);
  228.  
  229.       hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
  230.       prog_data->total_grf = MAX2(prog_data->total_grf,
  231.                                   hw_reg_mapping[i] + virtual_grf_sizes[i]);
  232.    }
  233.  
  234.    foreach_list(node, &this->instructions) {
  235.       vec4_instruction *inst = (vec4_instruction *)node;
  236.  
  237.       assign(hw_reg_mapping, &inst->dst);
  238.       assign(hw_reg_mapping, &inst->src[0]);
  239.       assign(hw_reg_mapping, &inst->src[1]);
  240.       assign(hw_reg_mapping, &inst->src[2]);
  241.    }
  242.  
  243.    ralloc_free(g);
  244.  
  245.    return true;
  246. }
  247.  
  248. void
  249. vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
  250. {
  251.    float loop_scale = 1.0;
  252.  
  253.    for (int i = 0; i < this->virtual_grf_count; i++) {
  254.       spill_costs[i] = 0.0;
  255.       no_spill[i] = virtual_grf_sizes[i] != 1;
  256.    }
  257.  
  258.    /* Calculate costs for spilling nodes.  Call it a cost of 1 per
  259.     * spill/unspill we'll have to do, and guess that the insides of
  260.     * loops run 10 times.
  261.     */
  262.    foreach_list(node, &this->instructions) {
  263.       vec4_instruction *inst = (vec4_instruction *) node;
  264.  
  265.       for (unsigned int i = 0; i < 3; i++) {
  266.          if (inst->src[i].file == GRF) {
  267.             spill_costs[inst->src[i].reg] += loop_scale;
  268.             if (inst->src[i].reladdr)
  269.                no_spill[inst->src[i].reg] = true;
  270.          }
  271.       }
  272.  
  273.       if (inst->dst.file == GRF) {
  274.          spill_costs[inst->dst.reg] += loop_scale;
  275.          if (inst->dst.reladdr)
  276.             no_spill[inst->dst.reg] = true;
  277.       }
  278.  
  279.       switch (inst->opcode) {
  280.  
  281.       case BRW_OPCODE_DO:
  282.          loop_scale *= 10;
  283.          break;
  284.  
  285.       case BRW_OPCODE_WHILE:
  286.          loop_scale /= 10;
  287.          break;
  288.  
  289.       case VS_OPCODE_SCRATCH_READ:
  290.       case VS_OPCODE_SCRATCH_WRITE:
  291.          for (int i = 0; i < 3; i++) {
  292.             if (inst->src[i].file == GRF)
  293.                no_spill[inst->src[i].reg] = true;
  294.          }
  295.          if (inst->dst.file == GRF)
  296.             no_spill[inst->dst.reg] = true;
  297.          break;
  298.  
  299.       default:
  300.          break;
  301.       }
  302.    }
  303. }
  304.  
  305. int
  306. vec4_visitor::choose_spill_reg(struct ra_graph *g)
  307. {
  308.    float spill_costs[this->virtual_grf_count];
  309.    bool no_spill[this->virtual_grf_count];
  310.  
  311.    evaluate_spill_costs(spill_costs, no_spill);
  312.  
  313.    for (int i = 0; i < this->virtual_grf_count; i++) {
  314.       if (!no_spill[i])
  315.          ra_set_node_spill_cost(g, i, spill_costs[i]);
  316.    }
  317.  
  318.    return ra_get_best_spill_node(g);
  319. }
  320.  
  321. void
  322. vec4_visitor::spill_reg(int spill_reg_nr)
  323. {
  324.    assert(virtual_grf_sizes[spill_reg_nr] == 1);
  325.    unsigned int spill_offset = c->last_scratch++;
  326.  
  327.    /* Generate spill/unspill instructions for the objects being spilled. */
  328.    foreach_list(node, &this->instructions) {
  329.       vec4_instruction *inst = (vec4_instruction *) node;
  330.  
  331.       for (unsigned int i = 0; i < 3; i++) {
  332.          if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
  333.             src_reg spill_reg = inst->src[i];
  334.             inst->src[i].reg = virtual_grf_alloc(1);
  335.             dst_reg temp = dst_reg(inst->src[i]);
  336.  
  337.             /* Only read the necessary channels, to avoid overwriting the rest
  338.              * with data that may not have been written to scratch.
  339.              */
  340.             temp.writemask = 0;
  341.             for (int c = 0; c < 4; c++)
  342.                temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c));
  343.             assert(temp.writemask != 0);
  344.  
  345.             emit_scratch_read(inst, temp, spill_reg, spill_offset);
  346.          }
  347.       }
  348.  
  349.       if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
  350.          emit_scratch_write(inst, spill_offset);
  351.       }
  352.    }
  353.  
  354.    this->live_intervals_valid = false;
  355. }
  356.  
  357. } /* namespace brw */
  358.