Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. #include "brw_fs.h"
  29. #include "glsl/glsl_types.h"
  30. #include "glsl/ir_optimization.h"
  31.  
  32. static void
  33. assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
  34. {
  35.    if (reg->file == GRF) {
  36.       assert(reg->reg_offset >= 0);
  37.       reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
  38.       reg->reg_offset = 0;
  39.    }
  40. }
  41.  
  42. void
  43. fs_visitor::assign_regs_trivial()
  44. {
  45.    int hw_reg_mapping[this->virtual_grf_count + 1];
  46.    int i;
  47.    int reg_width = dispatch_width / 8;
  48.  
  49.    /* Note that compressed instructions require alignment to 2 registers. */
  50.    hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
  51.    for (i = 1; i <= this->virtual_grf_count; i++) {
  52.       hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
  53.                            this->virtual_grf_sizes[i - 1] * reg_width);
  54.    }
  55.    this->grf_used = hw_reg_mapping[this->virtual_grf_count];
  56.  
  57.    foreach_list(node, &this->instructions) {
  58.       fs_inst *inst = (fs_inst *)node;
  59.  
  60.       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
  61.       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
  62.       assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
  63.       assign_reg(hw_reg_mapping, &inst->src[2], reg_width);
  64.    }
  65.  
  66.    if (this->grf_used >= max_grf) {
  67.       fail("Ran out of regs on trivial allocator (%d/%d)\n",
  68.            this->grf_used, max_grf);
  69.    }
  70.  
  71. }
  72.  
  73. static void
  74. brw_alloc_reg_set(struct brw_context *brw, int reg_width)
  75. {
  76.    int base_reg_count = BRW_MAX_GRF / reg_width;
  77.    int index = reg_width - 1;
  78.  
  79.    /* The registers used to make up almost all values handled in the compiler
  80.     * are a scalar value occupying a single register (or 2 registers in the
  81.     * case of 16-wide, which is handled by dividing base_reg_count by 2 and
  82.     * multiplying allocated register numbers by 2).  Things that were
  83.     * aggregates of scalar values at the GLSL level were split to scalar
  84.     * values by split_virtual_grfs().
  85.     *
  86.     * However, texture SEND messages return a series of contiguous registers.
  87.     * We currently always ask for 4 registers, but we may convert that to use
  88.     * less some day.
  89.     *
  90.     * Additionally, on gen5 we need aligned pairs of registers for the PLN
  91.     * instruction, and on gen4 we need 8 contiguous regs for workaround simd16
  92.     * texturing.
  93.     *
  94.     * So we have a need for classes for 1, 2, 4, and 8 registers currently,
  95.     * and we add in '3' to make indexing the array easier for the common case
  96.     * (since we'll probably want it for texturing later).
  97.     */
  98.    const int class_count = 5;
  99.    const int class_sizes[class_count] = {1, 2, 3, 4, 8};
  100.  
  101.    /* Compute the total number of registers across all classes. */
  102.    int ra_reg_count = 0;
  103.    for (int i = 0; i < class_count; i++) {
  104.       ra_reg_count += base_reg_count - (class_sizes[i] - 1);
  105.    }
  106.  
  107.    uint8_t *ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
  108.    struct ra_regs *regs = ra_alloc_reg_set(brw, ra_reg_count);
  109.    if (brw->gen >= 6)
  110.       ra_set_allocate_round_robin(regs);
  111.    int *classes = ralloc_array(brw, int, class_count);
  112.    int aligned_pairs_class = -1;
  113.  
  114.    /* Now, add the registers to their classes, and add the conflicts
  115.     * between them and the base GRF registers (and also each other).
  116.     */
  117.    int reg = 0;
  118.    int pairs_base_reg = 0;
  119.    int pairs_reg_count = 0;
  120.    for (int i = 0; i < class_count; i++) {
  121.       int class_reg_count = base_reg_count - (class_sizes[i] - 1);
  122.       classes[i] = ra_alloc_reg_class(regs);
  123.  
  124.       /* Save this off for the aligned pair class at the end. */
  125.       if (class_sizes[i] == 2) {
  126.          pairs_base_reg = reg;
  127.          pairs_reg_count = class_reg_count;
  128.       }
  129.  
  130.       for (int j = 0; j < class_reg_count; j++) {
  131.          ra_class_add_reg(regs, classes[i], reg);
  132.  
  133.          ra_reg_to_grf[reg] = j;
  134.  
  135.          for (int base_reg = j;
  136.               base_reg < j + class_sizes[i];
  137.               base_reg++) {
  138.             ra_add_transitive_reg_conflict(regs, base_reg, reg);
  139.          }
  140.  
  141.          reg++;
  142.       }
  143.    }
  144.    assert(reg == ra_reg_count);
  145.  
  146.    /* Add a special class for aligned pairs, which we'll put delta_x/y
  147.     * in on gen5 so that we can do PLN.
  148.     */
  149.    if (brw->has_pln && reg_width == 1 && brw->gen < 6) {
  150.       aligned_pairs_class = ra_alloc_reg_class(regs);
  151.  
  152.       for (int i = 0; i < pairs_reg_count; i++) {
  153.          if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
  154.             ra_class_add_reg(regs, aligned_pairs_class, pairs_base_reg + i);
  155.          }
  156.       }
  157.    }
  158.  
  159.    ra_set_finalize(regs, NULL);
  160.  
  161.    brw->wm.reg_sets[index].regs = regs;
  162.    brw->wm.reg_sets[index].classes = classes;
  163.    brw->wm.reg_sets[index].ra_reg_to_grf = ra_reg_to_grf;
  164.    brw->wm.reg_sets[index].aligned_pairs_class = aligned_pairs_class;
  165. }
  166.  
  167. void
  168. brw_fs_alloc_reg_sets(struct brw_context *brw)
  169. {
  170.    brw_alloc_reg_set(brw, 1);
  171.    brw_alloc_reg_set(brw, 2);
  172. }
  173.  
  174. int
  175. count_to_loop_end(fs_inst *do_inst)
  176. {
  177.    int depth = 1;
  178.    int ip = 1;
  179.    for (fs_inst *inst = (fs_inst *)do_inst->next;
  180.         depth > 0;
  181.         inst = (fs_inst *)inst->next) {
  182.       switch (inst->opcode) {
  183.       case BRW_OPCODE_DO:
  184.          depth++;
  185.          break;
  186.       case BRW_OPCODE_WHILE:
  187.          depth--;
  188.          break;
  189.       default:
  190.          break;
  191.       }
  192.       ip++;
  193.    }
  194.    return ip;
  195. }
  196.  
  197. /**
  198.  * Sets up interference between thread payload registers and the virtual GRFs
  199.  * to be allocated for program temporaries.
  200.  *
  201.  * We want to be able to reallocate the payload for our virtual GRFs, notably
  202.  * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
  203.  * our 128 registers.
  204.  *
  205.  * The layout of the payload registers is:
  206.  *
  207.  * 0..nr_payload_regs-1: fixed function setup (including bary coordinates).
  208.  * nr_payload_regs..nr_payload_regs+curb_read_lengh-1: uniform data
  209.  * nr_payload_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
  210.  *
  211.  * And we have payload_node_count nodes covering these registers in order
  212.  * (note that in 16-wide, a node is two registers).
  213.  */
  214. void
  215. fs_visitor::setup_payload_interference(struct ra_graph *g,
  216.                                        int payload_node_count,
  217.                                        int first_payload_node)
  218. {
  219.    int reg_width = dispatch_width / 8;
  220.    int loop_depth = 0;
  221.    int loop_end_ip = 0;
  222.  
  223.    int payload_last_use_ip[payload_node_count];
  224.    memset(payload_last_use_ip, 0, sizeof(payload_last_use_ip));
  225.    int ip = 0;
  226.    foreach_list(node, &this->instructions) {
  227.       fs_inst *inst = (fs_inst *)node;
  228.  
  229.       switch (inst->opcode) {
  230.       case BRW_OPCODE_DO:
  231.          loop_depth++;
  232.  
  233.          /* Since payload regs are deffed only at the start of the shader
  234.           * execution, any uses of the payload within a loop mean the live
  235.           * interval extends to the end of the outermost loop.  Find the ip of
  236.           * the end now.
  237.           */
  238.          if (loop_depth == 1)
  239.             loop_end_ip = ip + count_to_loop_end(inst);
  240.          break;
  241.       case BRW_OPCODE_WHILE:
  242.          loop_depth--;
  243.          break;
  244.       default:
  245.          break;
  246.       }
  247.  
  248.       int use_ip;
  249.       if (loop_depth > 0)
  250.          use_ip = loop_end_ip;
  251.       else
  252.          use_ip = ip;
  253.  
  254.       /* Note that UNIFORM args have been turned into FIXED_HW_REG by
  255.        * assign_curbe_setup(), and interpolation uses fixed hardware regs from
  256.        * the start (see interp_reg()).
  257.        */
  258.       for (int i = 0; i < 3; i++) {
  259.          if (inst->src[i].file == HW_REG &&
  260.              inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
  261.             int node_nr = inst->src[i].fixed_hw_reg.nr / reg_width;
  262.             if (node_nr >= payload_node_count)
  263.                continue;
  264.  
  265.             payload_last_use_ip[node_nr] = use_ip;
  266.          }
  267.       }
  268.  
  269.       /* Special case instructions which have extra implied registers used. */
  270.       switch (inst->opcode) {
  271.       case FS_OPCODE_FB_WRITE:
  272.          /* We could omit this for the !inst->header_present case, except that
  273.           * the simulator apparently incorrectly reads from g0/g1 instead of
  274.           * sideband.  It also really freaks out driver developers to see g0
  275.           * used in unusual places, so just always reserve it.
  276.           */
  277.          payload_last_use_ip[0 / reg_width] = use_ip;
  278.          payload_last_use_ip[1 / reg_width] = use_ip;
  279.          break;
  280.  
  281.       case FS_OPCODE_LINTERP:
  282.          /* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
  283.           * used by PLN's sourcing of the deltas, while we list only the first
  284.           * two in the arguments (1 node).  Pre-gen6, the deltas are computed
  285.           * in normal VGRFs.
  286.           */
  287.          if (brw->gen >= 6) {
  288.             int delta_x_arg = 0;
  289.             if (inst->src[delta_x_arg].file == HW_REG &&
  290.                 inst->src[delta_x_arg].fixed_hw_reg.file ==
  291.                 BRW_GENERAL_REGISTER_FILE) {
  292.                int sechalf_node = (inst->src[delta_x_arg].fixed_hw_reg.nr /
  293.                                    reg_width) + 1;
  294.                assert(sechalf_node < payload_node_count);
  295.                payload_last_use_ip[sechalf_node] = use_ip;
  296.             }
  297.          }
  298.          break;
  299.  
  300.       default:
  301.          break;
  302.       }
  303.  
  304.       ip++;
  305.    }
  306.  
  307.    for (int i = 0; i < payload_node_count; i++) {
  308.       /* Mark the payload node as interfering with any virtual grf that is
  309.        * live between the start of the program and our last use of the payload
  310.        * node.
  311.        */
  312.       for (int j = 0; j < this->virtual_grf_count; j++) {
  313.          /* Note that we use a <= comparison, unlike virtual_grf_interferes(),
  314.           * in order to not have to worry about the uniform issue described in
  315.           * calculate_live_intervals().
  316.           */
  317.          if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) {
  318.             ra_add_node_interference(g, first_payload_node + i, j);
  319.          }
  320.       }
  321.    }
  322.  
  323.    for (int i = 0; i < payload_node_count; i++) {
  324.       /* Mark each payload node as being allocated to its physical register.
  325.        *
  326.        * The alternative would be to have per-physical-register classes, which
  327.        * would just be silly.
  328.        */
  329.       ra_set_node_reg(g, first_payload_node + i, i);
  330.    }
  331. }
  332.  
  333. /**
  334.  * Sets interference between virtual GRFs and usage of the high GRFs for SEND
  335.  * messages (treated as MRFs in code generation).
  336.  */
  337. void
  338. fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
  339. {
  340.    int mrf_count = BRW_MAX_GRF - GEN7_MRF_HACK_START;
  341.    int reg_width = dispatch_width / 8;
  342.  
  343.    /* Identify all the MRFs used in the program. */
  344.    bool mrf_used[mrf_count];
  345.    memset(mrf_used, 0, sizeof(mrf_used));
  346.    foreach_list(node, &this->instructions) {
  347.       fs_inst *inst = (fs_inst *)node;
  348.  
  349.       if (inst->dst.file == MRF) {
  350.          int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
  351.          mrf_used[reg] = true;
  352.          if (reg_width == 2) {
  353.             if (inst->dst.reg & BRW_MRF_COMPR4) {
  354.                mrf_used[reg + 4] = true;
  355.             } else {
  356.                mrf_used[reg + 1] = true;
  357.             }
  358.          }
  359.       }
  360.  
  361.       if (inst->mlen > 0) {
  362.          for (int i = 0; i < implied_mrf_writes(inst); i++) {
  363.             mrf_used[inst->base_mrf + i] = true;
  364.          }
  365.       }
  366.    }
  367.  
  368.    for (int i = 0; i < mrf_count; i++) {
  369.       /* Mark each payload reg node as being allocated to its physical register.
  370.        *
  371.        * The alternative would be to have per-physical-register classes, which
  372.        * would just be silly.
  373.        */
  374.       ra_set_node_reg(g, first_mrf_node + i,
  375.                       (GEN7_MRF_HACK_START + i) / reg_width);
  376.  
  377.       /* Since we don't have any live/dead analysis on the MRFs, just mark all
  378.        * that are used as conflicting with all virtual GRFs.
  379.        */
  380.       if (mrf_used[i]) {
  381.          for (int j = 0; j < this->virtual_grf_count; j++) {
  382.             ra_add_node_interference(g, first_mrf_node + i, j);
  383.          }
  384.       }
  385.    }
  386. }
  387.  
  388. bool
  389. fs_visitor::assign_regs()
  390. {
  391.    /* Most of this allocation was written for a reg_width of 1
  392.     * (dispatch_width == 8).  In extending to 16-wide, the code was
  393.     * left in place and it was converted to have the hardware
  394.     * registers it's allocating be contiguous physical pairs of regs
  395.     * for reg_width == 2.
  396.     */
  397.    int reg_width = dispatch_width / 8;
  398.    int hw_reg_mapping[this->virtual_grf_count];
  399.    int payload_node_count = (ALIGN(this->first_non_payload_grf, reg_width) /
  400.                             reg_width);
  401.    int rsi = reg_width - 1; /* Which brw->wm.reg_sets[] to use */
  402.    calculate_live_intervals();
  403.  
  404.    int node_count = this->virtual_grf_count;
  405.    int first_payload_node = node_count;
  406.    node_count += payload_node_count;
  407.    int first_mrf_hack_node = node_count;
  408.    if (brw->gen >= 7)
  409.       node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START;
  410.    struct ra_graph *g = ra_alloc_interference_graph(brw->wm.reg_sets[rsi].regs,
  411.                                                     node_count);
  412.  
  413.    for (int i = 0; i < this->virtual_grf_count; i++) {
  414.       int size = this->virtual_grf_sizes[i];
  415.       int c;
  416.  
  417.       if (size == 8) {
  418.          c = 4;
  419.       } else {
  420.          assert(size >= 1 &&
  421.                 size <= 4 &&
  422.                 "Register allocation relies on split_virtual_grfs()");
  423.          c = brw->wm.reg_sets[rsi].classes[size - 1];
  424.       }
  425.  
  426.       /* Special case: on pre-GEN6 hardware that supports PLN, the
  427.        * second operand of a PLN instruction needs to be an
  428.        * even-numbered register, so we have a special register class
  429.        * wm_aligned_pairs_class to handle this case.  pre-GEN6 always
  430.        * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the
  431.        * second operand of a PLN instruction (since it doesn't support
  432.        * any other interpolation modes).  So all we need to do is find
  433.        * that register and set it to the appropriate class.
  434.        */
  435.       if (brw->wm.reg_sets[rsi].aligned_pairs_class >= 0 &&
  436.           this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
  437.          c = brw->wm.reg_sets[rsi].aligned_pairs_class;
  438.       }
  439.  
  440.       ra_set_node_class(g, i, c);
  441.  
  442.       for (int j = 0; j < i; j++) {
  443.          if (virtual_grf_interferes(i, j)) {
  444.             ra_add_node_interference(g, i, j);
  445.          }
  446.       }
  447.    }
  448.  
  449.    setup_payload_interference(g, payload_node_count, first_payload_node);
  450.    if (brw->gen >= 7)
  451.       setup_mrf_hack_interference(g, first_mrf_hack_node);
  452.  
  453.    if (!ra_allocate_no_spills(g)) {
  454.       /* Failed to allocate registers.  Spill a reg, and the caller will
  455.        * loop back into here to try again.
  456.        */
  457.       int reg = choose_spill_reg(g);
  458.  
  459.       if (reg == -1) {
  460.          fail("no register to spill:\n");
  461.          dump_instructions();
  462.       } else if (dispatch_width == 16) {
  463.          fail("Failure to register allocate.  Reduce number of live scalar "
  464.               "values to avoid this.");
  465.       } else {
  466.          spill_reg(reg);
  467.       }
  468.  
  469.  
  470.       ralloc_free(g);
  471.  
  472.       return false;
  473.    }
  474.  
  475.    /* Get the chosen virtual registers for each node, and map virtual
  476.     * regs in the register classes back down to real hardware reg
  477.     * numbers.
  478.     */
  479.    this->grf_used = payload_node_count * reg_width;
  480.    for (int i = 0; i < this->virtual_grf_count; i++) {
  481.       int reg = ra_get_node_reg(g, i);
  482.  
  483.       hw_reg_mapping[i] = brw->wm.reg_sets[rsi].ra_reg_to_grf[reg] * reg_width;
  484.       this->grf_used = MAX2(this->grf_used,
  485.                             hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
  486.                             reg_width);
  487.    }
  488.  
  489.    foreach_list(node, &this->instructions) {
  490.       fs_inst *inst = (fs_inst *)node;
  491.  
  492.       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
  493.       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
  494.       assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
  495.       assign_reg(hw_reg_mapping, &inst->src[2], reg_width);
  496.    }
  497.  
  498.    ralloc_free(g);
  499.  
  500.    return true;
  501. }
  502.  
  503. void
  504. fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
  505. {
  506.    fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
  507.    unspill_inst->offset = spill_offset;
  508.    unspill_inst->ir = inst->ir;
  509.    unspill_inst->annotation = inst->annotation;
  510.  
  511.    /* Choose a MRF that won't conflict with an MRF that's live across the
  512.     * spill.  Nothing else will make it up to MRF 14/15.
  513.     */
  514.    unspill_inst->base_mrf = 14;
  515.    unspill_inst->mlen = 1; /* header contains offset */
  516.    inst->insert_before(unspill_inst);
  517. }
  518.  
  519. int
  520. fs_visitor::choose_spill_reg(struct ra_graph *g)
  521. {
  522.    float loop_scale = 1.0;
  523.    float spill_costs[this->virtual_grf_count];
  524.    bool no_spill[this->virtual_grf_count];
  525.  
  526.    for (int i = 0; i < this->virtual_grf_count; i++) {
  527.       spill_costs[i] = 0.0;
  528.       no_spill[i] = false;
  529.    }
  530.  
  531.    /* Calculate costs for spilling nodes.  Call it a cost of 1 per
  532.     * spill/unspill we'll have to do, and guess that the insides of
  533.     * loops run 10 times.
  534.     */
  535.    foreach_list(node, &this->instructions) {
  536.       fs_inst *inst = (fs_inst *)node;
  537.  
  538.       for (unsigned int i = 0; i < 3; i++) {
  539.          if (inst->src[i].file == GRF) {
  540.             spill_costs[inst->src[i].reg] += loop_scale;
  541.  
  542.             /* Register spilling logic assumes full-width registers; smeared
  543.              * registers have a width of 1 so if we try to spill them we'll
  544.              * generate invalid assembly.  This shouldn't be a problem because
  545.              * smeared registers are only used as short-term temporaries when
  546.              * loading pull constants, so spilling them is unlikely to reduce
  547.              * register pressure anyhow.
  548.              */
  549.             if (inst->src[i].smear >= 0) {
  550.                no_spill[inst->src[i].reg] = true;
  551.             }
  552.          }
  553.       }
  554.  
  555.       if (inst->dst.file == GRF) {
  556.          spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
  557.  
  558.          if (inst->dst.smear >= 0) {
  559.             no_spill[inst->dst.reg] = true;
  560.          }
  561.       }
  562.  
  563.       switch (inst->opcode) {
  564.  
  565.       case BRW_OPCODE_DO:
  566.          loop_scale *= 10;
  567.          break;
  568.  
  569.       case BRW_OPCODE_WHILE:
  570.          loop_scale /= 10;
  571.          break;
  572.  
  573.       case FS_OPCODE_SPILL:
  574.          if (inst->src[0].file == GRF)
  575.             no_spill[inst->src[0].reg] = true;
  576.          break;
  577.  
  578.       case FS_OPCODE_UNSPILL:
  579.          if (inst->dst.file == GRF)
  580.             no_spill[inst->dst.reg] = true;
  581.          break;
  582.  
  583.       default:
  584.          break;
  585.       }
  586.    }
  587.  
  588.    for (int i = 0; i < this->virtual_grf_count; i++) {
  589.       if (!no_spill[i])
  590.          ra_set_node_spill_cost(g, i, spill_costs[i]);
  591.    }
  592.  
  593.    return ra_get_best_spill_node(g);
  594. }
  595.  
  596. void
  597. fs_visitor::spill_reg(int spill_reg)
  598. {
  599.    int size = virtual_grf_sizes[spill_reg];
  600.    unsigned int spill_offset = c->last_scratch;
  601.    assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
  602.    c->last_scratch += size * REG_SIZE;
  603.  
  604.    /* Generate spill/unspill instructions for the objects being
  605.     * spilled.  Right now, we spill or unspill the whole thing to a
  606.     * virtual grf of the same size.  For most instructions, though, we
  607.     * could just spill/unspill the GRF being accessed.
  608.     */
  609.    foreach_list(node, &this->instructions) {
  610.       fs_inst *inst = (fs_inst *)node;
  611.  
  612.       for (unsigned int i = 0; i < 3; i++) {
  613.          if (inst->src[i].file == GRF &&
  614.              inst->src[i].reg == spill_reg) {
  615.             inst->src[i].reg = virtual_grf_alloc(1);
  616.             emit_unspill(inst, inst->src[i],
  617.                          spill_offset + REG_SIZE * inst->src[i].reg_offset);
  618.          }
  619.       }
  620.  
  621.       if (inst->dst.file == GRF &&
  622.           inst->dst.reg == spill_reg) {
  623.          int subset_spill_offset = (spill_offset +
  624.                                     REG_SIZE * inst->dst.reg_offset);
  625.          inst->dst.reg = virtual_grf_alloc(inst->regs_written);
  626.          inst->dst.reg_offset = 0;
  627.  
  628.          /* If our write is going to affect just part of the
  629.           * inst->regs_written(), then we need to unspill the destination
  630.           * since we write back out all of the regs_written().
  631.           */
  632.          if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
  633.             fs_reg unspill_reg = inst->dst;
  634.             for (int chan = 0; chan < inst->regs_written; chan++) {
  635.                emit_unspill(inst, unspill_reg,
  636.                             subset_spill_offset + REG_SIZE * chan);
  637.                unspill_reg.reg_offset++;
  638.             }
  639.          }
  640.  
  641.          fs_reg spill_src = inst->dst;
  642.          spill_src.reg_offset = 0;
  643.          spill_src.abs = false;
  644.          spill_src.negate = false;
  645.          spill_src.smear = -1;
  646.  
  647.          for (int chan = 0; chan < inst->regs_written; chan++) {
  648.             fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
  649.                                                        reg_null_f, spill_src);
  650.             spill_src.reg_offset++;
  651.             spill_inst->offset = subset_spill_offset + chan * REG_SIZE;
  652.             spill_inst->ir = inst->ir;
  653.             spill_inst->annotation = inst->annotation;
  654.             spill_inst->base_mrf = 14;
  655.             spill_inst->mlen = 2; /* header, value */
  656.             inst->insert_after(spill_inst);
  657.          }
  658.       }
  659.    }
  660.  
  661.    this->live_intervals_valid = false;
  662. }
  663.