Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. /** @file register_allocate.c
  29.  *
  30.  * Graph-coloring register allocator.
  31.  */
  32.  
  33. #include <ralloc.h>
  34.  
  35. #include "main/imports.h"
  36. #include "main/macros.h"
  37. #include "main/mtypes.h"
  38. #include "register_allocate.h"
  39.  
  40. struct ra_reg {
  41.    char *name;
  42.    GLboolean *conflicts;
  43. };
  44.  
  45. struct ra_regs {
  46.    struct ra_reg *regs;
  47.    unsigned int count;
  48.  
  49.    struct ra_class **classes;
  50.    unsigned int class_count;
  51. };
  52.  
  53. struct ra_class {
  54.    GLboolean *regs;
  55.  
  56.    /**
  57.     * p_B in Runeson/Nyström paper.
  58.     *
  59.     * This is "how many regs are in the set."
  60.     */
  61.    unsigned int p;
  62.  
  63.    /**
  64.     * q_B,C in Runeson/Nyström paper.
  65.     */
  66.    unsigned int *q;
  67. };
  68.  
  69. struct ra_node {
  70.    GLboolean *adjacency;
  71.    unsigned int class;
  72.    unsigned int adjacency_count;
  73.    unsigned int reg;
  74.    GLboolean in_stack;
  75.    float spill_cost;
  76. };
  77.  
  78. struct ra_graph {
  79.    struct ra_regs *regs;
  80.    /**
  81.     * the variables that need register allocation.
  82.     */
  83.    struct ra_node *nodes;
  84.    unsigned int count; /**< count of nodes. */
  85.  
  86.    unsigned int *stack;
  87.    unsigned int stack_count;
  88. };
  89.  
  90. struct ra_regs *
  91. ra_alloc_reg_set(unsigned int count)
  92. {
  93.    unsigned int i;
  94.    struct ra_regs *regs;
  95.  
  96.    regs = rzalloc(NULL, struct ra_regs);
  97.    regs->count = count;
  98.    regs->regs = rzalloc_array(regs, struct ra_reg, count);
  99.  
  100.    for (i = 0; i < count; i++) {
  101.       regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count);
  102.       regs->regs[i].conflicts[i] = GL_TRUE;
  103.    }
  104.  
  105.    return regs;
  106. }
  107.  
  108. void
  109. ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
  110. {
  111.    regs->regs[r1].conflicts[r2] = GL_TRUE;
  112.    regs->regs[r2].conflicts[r1] = GL_TRUE;
  113. }
  114.  
  115. unsigned int
  116. ra_alloc_reg_class(struct ra_regs *regs)
  117. {
  118.    struct ra_class *class;
  119.  
  120.    regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *,
  121.                             regs->class_count + 1);
  122.  
  123.    class = rzalloc(regs, struct ra_class);
  124.    regs->classes[regs->class_count] = class;
  125.  
  126.    class->regs = rzalloc_array(class, GLboolean, regs->count);
  127.  
  128.    return regs->class_count++;
  129. }
  130.  
  131. void
  132. ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
  133. {
  134.    struct ra_class *class = regs->classes[c];
  135.  
  136.    class->regs[r] = GL_TRUE;
  137.    class->p++;
  138. }
  139.  
  140. /**
  141.  * Must be called after all conflicts and register classes have been
  142.  * set up and before the register set is used for allocation.
  143.  */
  144. void
  145. ra_set_finalize(struct ra_regs *regs)
  146. {
  147.    unsigned int b, c;
  148.  
  149.    for (b = 0; b < regs->class_count; b++) {
  150.       regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count);
  151.    }
  152.  
  153.    /* Compute, for each class B and C, how many regs of B an
  154.     * allocation to C could conflict with.
  155.     */
  156.    for (b = 0; b < regs->class_count; b++) {
  157.       for (c = 0; c < regs->class_count; c++) {
  158.          unsigned int rc;
  159.          int max_conflicts = 0;
  160.  
  161.          for (rc = 0; rc < regs->count; rc++) {
  162.             unsigned int rb;
  163.             int conflicts = 0;
  164.  
  165.             if (!regs->classes[c]->regs[rc])
  166.                continue;
  167.  
  168.             for (rb = 0; rb < regs->count; rb++) {
  169.                if (regs->classes[b]->regs[rb] &&
  170.                    regs->regs[rb].conflicts[rc])
  171.                   conflicts++;
  172.             }
  173.             max_conflicts = MAX2(max_conflicts, conflicts);
  174.          }
  175.          regs->classes[b]->q[c] = max_conflicts;
  176.       }
  177.    }
  178. }
  179.  
  180. struct ra_graph *
  181. ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
  182. {
  183.    struct ra_graph *g;
  184.    unsigned int i;
  185.  
  186.    g = rzalloc(regs, struct ra_graph);
  187.    g->regs = regs;
  188.    g->nodes = rzalloc_array(g, struct ra_node, count);
  189.    g->count = count;
  190.  
  191.    g->stack = rzalloc_array(g, unsigned int, count);
  192.  
  193.    for (i = 0; i < count; i++) {
  194.       g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count);
  195.       g->nodes[i].adjacency[i] = GL_TRUE;
  196.       g->nodes[i].reg = ~0;
  197.    }
  198.  
  199.    return g;
  200. }
  201.  
  202. void
  203. ra_set_node_class(struct ra_graph *g,
  204.                   unsigned int n, unsigned int class)
  205. {
  206.    g->nodes[n].class = class;
  207. }
  208.  
  209. void
  210. ra_add_node_interference(struct ra_graph *g,
  211.                          unsigned int n1, unsigned int n2)
  212. {
  213.    if (g->nodes[n1].adjacency[n2])
  214.       return;
  215.  
  216.    g->nodes[n1].adjacency[n2] = GL_TRUE;
  217.    g->nodes[n2].adjacency_count++;
  218.    g->nodes[n2].adjacency[n1] = GL_TRUE;
  219.    g->nodes[n2].adjacency_count++;
  220. }
  221.  
  222. static GLboolean pq_test(struct ra_graph *g, unsigned int n)
  223. {
  224.    unsigned int j;
  225.    unsigned int q = 0;
  226.    int n_class = g->nodes[n].class;
  227.  
  228.    for (j = 0; j < g->count; j++) {
  229.       if (j == n || g->nodes[j].in_stack)
  230.          continue;
  231.  
  232.       if (g->nodes[n].adjacency[j]) {
  233.          unsigned int j_class = g->nodes[j].class;
  234.          q += g->regs->classes[n_class]->q[j_class];
  235.       }
  236.    }
  237.  
  238.    return q < g->regs->classes[n_class]->p;
  239. }
  240.  
  241. /**
  242.  * Simplifies the interference graph by pushing all
  243.  * trivially-colorable nodes into a stack of nodes to be colored,
  244.  * removing them from the graph, and rinsing and repeating.
  245.  *
  246.  * Returns GL_TRUE if all nodes were removed from the graph.  GL_FALSE
  247.  * means that either spilling will be required, or optimistic coloring
  248.  * should be applied.
  249.  */
  250. GLboolean
  251. ra_simplify(struct ra_graph *g)
  252. {
  253.    GLboolean progress = GL_TRUE;
  254.    int i;
  255.  
  256.    while (progress) {
  257.       progress = GL_FALSE;
  258.  
  259.       for (i = g->count - 1; i >= 0; i--) {
  260.          if (g->nodes[i].in_stack)
  261.             continue;
  262.  
  263.          if (pq_test(g, i)) {
  264.             g->stack[g->stack_count] = i;
  265.             g->stack_count++;
  266.             g->nodes[i].in_stack = GL_TRUE;
  267.             progress = GL_TRUE;
  268.          }
  269.       }
  270.    }
  271.  
  272.    for (i = 0; i < g->count; i++) {
  273.       if (!g->nodes[i].in_stack)
  274.          return GL_FALSE;
  275.    }
  276.  
  277.    return GL_TRUE;
  278. }
  279.  
  280. /**
  281.  * Pops nodes from the stack back into the graph, coloring them with
  282.  * registers as they go.
  283.  *
  284.  * If all nodes were trivially colorable, then this must succeed.  If
  285.  * not (optimistic coloring), then it may return GL_FALSE;
  286.  */
  287. GLboolean
  288. ra_select(struct ra_graph *g)
  289. {
  290.    int i;
  291.  
  292.    while (g->stack_count != 0) {
  293.       unsigned int r;
  294.       int n = g->stack[g->stack_count - 1];
  295.       struct ra_class *c = g->regs->classes[g->nodes[n].class];
  296.  
  297.       /* Find the lowest-numbered reg which is not used by a member
  298.        * of the graph adjacent to us.
  299.        */
  300.       for (r = 0; r < g->regs->count; r++) {
  301.          if (!c->regs[r])
  302.             continue;
  303.  
  304.          /* Check if any of our neighbors conflict with this register choice. */
  305.          for (i = 0; i < g->count; i++) {
  306.             if (g->nodes[n].adjacency[i] &&
  307.                !g->nodes[i].in_stack &&
  308.                 g->regs->regs[r].conflicts[g->nodes[i].reg]) {
  309.                break;
  310.             }
  311.          }
  312.          if (i == g->count)
  313.             break;
  314.       }
  315.       if (r == g->regs->count)
  316.          return GL_FALSE;
  317.  
  318.       g->nodes[n].reg = r;
  319.       g->nodes[n].in_stack = GL_FALSE;
  320.       g->stack_count--;
  321.    }
  322.  
  323.    return GL_TRUE;
  324. }
  325.  
  326. /**
  327.  * Optimistic register coloring: Just push the remaining nodes
  328.  * on the stack.  They'll be colored first in ra_select(), and
  329.  * if they succeed then the locally-colorable nodes are still
  330.  * locally-colorable and the rest of the register allocation
  331.  * will succeed.
  332.  */
  333. void
  334. ra_optimistic_color(struct ra_graph *g)
  335. {
  336.    unsigned int i;
  337.  
  338.    for (i = 0; i < g->count; i++) {
  339.       if (g->nodes[i].in_stack)
  340.          continue;
  341.  
  342.       g->stack[g->stack_count] = i;
  343.       g->stack_count++;
  344.       g->nodes[i].in_stack = GL_TRUE;
  345.    }
  346. }
  347.  
  348. GLboolean
  349. ra_allocate_no_spills(struct ra_graph *g)
  350. {
  351.    if (!ra_simplify(g)) {
  352.       ra_optimistic_color(g);
  353.    }
  354.    return ra_select(g);
  355. }
  356.  
  357. unsigned int
  358. ra_get_node_reg(struct ra_graph *g, unsigned int n)
  359. {
  360.    return g->nodes[n].reg;
  361. }
  362.  
  363. static float
  364. ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
  365. {
  366.    int j;
  367.    float benefit = 0;
  368.    int n_class = g->nodes[n].class;
  369.  
  370.    /* Define the benefit of eliminating an interference between n, j
  371.     * through spilling as q(C, B) / p(C).  This is similar to the
  372.     * "count number of edges" approach of traditional graph coloring,
  373.     * but takes classes into account.
  374.     */
  375.    for (j = 0; j < g->count; j++) {
  376.       if (j != n && g->nodes[n].adjacency[j]) {
  377.          unsigned int j_class = g->nodes[j].class;
  378.          benefit += ((float)g->regs->classes[n_class]->q[j_class] /
  379.                      g->regs->classes[n_class]->p);
  380.          break;
  381.       }
  382.    }
  383.  
  384.    return benefit;
  385. }
  386.  
  387. /**
  388.  * Returns a node number to be spilled according to the cost/benefit using
  389.  * the pq test, or -1 if there are no spillable nodes.
  390.  */
  391. int
  392. ra_get_best_spill_node(struct ra_graph *g)
  393. {
  394.    unsigned int best_node = -1;
  395.    unsigned int best_benefit = 0.0;
  396.    unsigned int n;
  397.  
  398.    for (n = 0; n < g->count; n++) {
  399.       float cost = g->nodes[n].spill_cost;
  400.       float benefit;
  401.  
  402.       if (cost <= 0.0)
  403.          continue;
  404.  
  405.       benefit = ra_get_spill_benefit(g, n);
  406.  
  407.       if (benefit / cost > best_benefit) {
  408.          best_benefit = benefit / cost;
  409.          best_node = n;
  410.       }
  411.    }
  412.  
  413.    return best_node;
  414. }
  415.  
  416. /**
  417.  * Only nodes with a spill cost set (cost != 0.0) will be considered
  418.  * for register spilling.
  419.  */
  420. void
  421. ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost)
  422. {
  423.    g->nodes[n].spill_cost = cost;
  424. }
  425.