Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include <stdlib.h> /* for qsort() */
  29. #include "toy_compiler.h"
  30. #include "toy_legalize.h"
  31.  
  32. /**
  33.  * Live interval of a VRF register.
  34.  */
  35. struct linear_scan_live_interval {
  36.    int vrf;
  37.    int startpoint;
  38.    int endpoint;
  39.  
  40.    /*
  41.     * should this be assigned a consecutive register of the previous
  42.     * interval's?
  43.     */
  44.    bool consecutive;
  45.  
  46.    int reg;
  47.  
  48.    struct list_head list;
  49. };
  50.  
  51. /**
  52.  * Linear scan.
  53.  */
  54. struct linear_scan {
  55.    struct linear_scan_live_interval *intervals;
  56.    int max_vrf, num_vrfs;
  57.  
  58.    int num_regs;
  59.  
  60.    struct list_head active_list;
  61.    int *free_regs;
  62.    int num_free_regs;
  63.  
  64.    int *vrf_mapping;
  65. };
  66.  
  67. /**
  68.  * Return a chunk of registers to the free register pool.
  69.  */
  70. static void
  71. linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
  72. {
  73.    int i;
  74.  
  75.    for (i = 0; i < count; i++)
  76.       ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
  77. }
  78.  
  79. static int
  80. linear_scan_compare_regs(const void *elem1, const void *elem2)
  81. {
  82.    const int *reg1 = elem1;
  83.    const int *reg2 = elem2;
  84.  
  85.    /* in reverse order */
  86.    return (*reg2 - *reg1);
  87. }
  88.  
  89. /**
  90.  * Allocate a chunk of registers from the free register pool.
  91.  */
  92. static int
  93. linear_scan_allocate_regs(struct linear_scan *ls, int count)
  94. {
  95.    bool sorted = false;
  96.    int reg;
  97.  
  98.    /* simple cases */
  99.    if (count > ls->num_free_regs)
  100.       return -1;
  101.    else if (count == 1)
  102.       return ls->free_regs[--ls->num_free_regs];
  103.  
  104.    /* TODO a free register pool */
  105.    /* TODO reserve some regs for spilling */
  106.    while (true) {
  107.       bool found = false;
  108.       int start;
  109.  
  110.       /*
  111.        * find a chunk of registers that have consecutive register
  112.        * numbers
  113.        */
  114.       for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
  115.          int i;
  116.  
  117.          for (i = 1; i < count; i++) {
  118.             if (ls->free_regs[start - i] != ls->free_regs[start] + i)
  119.                break;
  120.          }
  121.  
  122.          if (i >= count) {
  123.             found = true;
  124.             break;
  125.          }
  126.       }
  127.  
  128.       if (found) {
  129.          reg = ls->free_regs[start];
  130.  
  131.          if (start != ls->num_free_regs - 1) {
  132.             start++;
  133.             memmove(&ls->free_regs[start - count],
  134.                     &ls->free_regs[start],
  135.                     sizeof(*ls->free_regs) * (ls->num_free_regs - start));
  136.          }
  137.          ls->num_free_regs -= count;
  138.          break;
  139.       }
  140.       else if (!sorted) {
  141.          /* sort and retry */
  142.          qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
  143.                linear_scan_compare_regs);
  144.          sorted = true;
  145.       }
  146.       else {
  147.          /* failed */
  148.          reg = -1;
  149.          break;
  150.       }
  151.    }
  152.  
  153.    return reg;
  154. }
  155.  
  156. /**
  157.  * Add an interval to the active list.
  158.  */
  159. static void
  160. linear_scan_add_active(struct linear_scan *ls,
  161.                        struct linear_scan_live_interval *interval)
  162. {
  163.    struct linear_scan_live_interval *pos;
  164.  
  165.    /* keep the active list sorted by endpoints */
  166.    LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
  167.       if (pos->endpoint >= interval->endpoint)
  168.          break;
  169.    }
  170.  
  171.    list_addtail(&interval->list, &pos->list);
  172. }
  173.  
  174. /**
  175.  * Remove an interval from the active list.
  176.  */
  177. static void
  178. linear_scan_remove_active(struct linear_scan *ls,
  179.                           struct linear_scan_live_interval *interval)
  180. {
  181.    list_del(&interval->list);
  182. }
  183.  
  184. /**
  185.  * Remove intervals that are no longer active from the active list.
  186.  */
  187. static void
  188. linear_scan_expire_active(struct linear_scan *ls, int pc)
  189. {
  190.    struct linear_scan_live_interval *interval, *next;
  191.  
  192.    LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
  193.       /*
  194.        * since we sort intervals on the active list by their endpoints, we
  195.        * know that this and the rest of the intervals are still active.
  196.        */
  197.       if (interval->endpoint >= pc)
  198.          break;
  199.  
  200.       linear_scan_remove_active(ls, interval);
  201.  
  202.       /* recycle the reg */
  203.       linear_scan_free_regs(ls, interval->reg, 1);
  204.    }
  205. }
  206.  
  207. /**
  208.  * Spill an interval.
  209.  */
  210. static void
  211. linear_scan_spill(struct linear_scan *ls,
  212.                   struct linear_scan_live_interval *interval,
  213.                   bool is_active)
  214. {
  215.    assert(!"no spilling support");
  216. }
  217.  
  218. /**
  219.  * Spill a range of intervals.
  220.  */
  221. static void
  222. linear_scan_spill_range(struct linear_scan *ls, int first, int count)
  223. {
  224.    int i;
  225.  
  226.    for (i = 0; i < count; i++) {
  227.       struct linear_scan_live_interval *interval = &ls->intervals[first + i];
  228.  
  229.       linear_scan_spill(ls, interval, false);
  230.    }
  231. }
  232.  
  233. /**
  234.  * Perform linear scan to allocate registers for the intervals.
  235.  */
  236. static bool
  237. linear_scan_run(struct linear_scan *ls)
  238. {
  239.    int i;
  240.  
  241.    i = 0;
  242.    while (i < ls->num_vrfs) {
  243.       struct linear_scan_live_interval *first = &ls->intervals[i];
  244.       int reg, count;
  245.  
  246.       /*
  247.        * BRW_OPCODE_SEND may write to multiple consecutive registers and we need to
  248.        * support that
  249.        */
  250.       for (count = 1; i + count < ls->num_vrfs; count++) {
  251.          const struct linear_scan_live_interval *interval =
  252.             &ls->intervals[i + count];
  253.  
  254.          if (interval->startpoint != first->startpoint ||
  255.              !interval->consecutive)
  256.             break;
  257.       }
  258.  
  259.       reg = linear_scan_allocate_regs(ls, count);
  260.  
  261.       /* expire intervals that are no longer active and try again */
  262.       if (reg < 0) {
  263.          linear_scan_expire_active(ls, first->startpoint);
  264.          reg = linear_scan_allocate_regs(ls, count);
  265.       }
  266.  
  267.       /* have to spill some intervals */
  268.       if (reg < 0) {
  269.          struct linear_scan_live_interval *last_active =
  270.             container_of(ls->active_list.prev,
  271.                   (struct linear_scan_live_interval *) NULL, list);
  272.  
  273.          /* heuristically spill the interval that ends last */
  274.          if (count > 1 || last_active->endpoint < first->endpoint) {
  275.             linear_scan_spill_range(ls, i, count);
  276.             i += count;
  277.             continue;
  278.          }
  279.  
  280.          /* make some room for the new interval */
  281.          linear_scan_spill(ls, last_active, true);
  282.          reg = linear_scan_allocate_regs(ls, count);
  283.          if (reg < 0) {
  284.             assert(!"failed to spill any register");
  285.             return false;
  286.          }
  287.       }
  288.  
  289.       while (count--) {
  290.          struct linear_scan_live_interval *interval = &ls->intervals[i++];
  291.  
  292.          interval->reg = reg++;
  293.          linear_scan_add_active(ls, interval);
  294.  
  295.          ls->vrf_mapping[interval->vrf] = interval->reg;
  296.  
  297.          /*
  298.           * this should and must be the case because of how we initialized the
  299.           * intervals
  300.           */
  301.          assert(interval->vrf - first->vrf == interval->reg - first->reg);
  302.       }
  303.    }
  304.  
  305.    return true;
  306. }
  307.  
  308. /**
  309.  * Add a new interval.
  310.  */
  311. static void
  312. linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
  313. {
  314.    if (ls->intervals[vrf].vrf)
  315.       return;
  316.  
  317.    ls->intervals[vrf].vrf = vrf;
  318.    ls->intervals[vrf].startpoint = pc;
  319.  
  320.    ls->num_vrfs++;
  321.    if (vrf > ls->max_vrf)
  322.       ls->max_vrf = vrf;
  323. }
  324.  
  325. /**
  326.  * Perform (oversimplified?) live variable analysis.
  327.  */
  328. static void
  329. linear_scan_init_live_intervals(struct linear_scan *ls,
  330.                                 struct toy_compiler *tc)
  331. {
  332.    const struct toy_inst *inst;
  333.    int pc, do_pc, while_pc;
  334.  
  335.    pc = 0;
  336.    do_pc = -1;
  337.    while_pc = -1;
  338.  
  339.    tc_head(tc);
  340.    while ((inst = tc_next_no_skip(tc)) != NULL) {
  341.       const int startpoint = (pc <= while_pc) ? do_pc : pc;
  342.       const int endpoint = (pc <= while_pc) ? while_pc : pc;
  343.       int vrf, i;
  344.  
  345.       /*
  346.        * assume all registers used in this outermost loop are live through out
  347.        * the whole loop
  348.        */
  349.       if (inst->marker) {
  350.          if (pc > while_pc) {
  351.             struct toy_inst *inst2;
  352.             int loop_level = 1;
  353.  
  354.             assert(inst->opcode == BRW_OPCODE_DO);
  355.             do_pc = pc;
  356.             while_pc = pc + 1;
  357.  
  358.             /* find the matching BRW_OPCODE_WHILE */
  359.             LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
  360.                   &tc->instructions, list) {
  361.                if (inst2->marker) {
  362.                   assert(inst->opcode == BRW_OPCODE_DO);
  363.                   loop_level++;
  364.                   continue;
  365.                }
  366.  
  367.                if (inst2->opcode == BRW_OPCODE_WHILE) {
  368.                   loop_level--;
  369.                   if (!loop_level)
  370.                      break;
  371.                }
  372.                while_pc++;
  373.             }
  374.          }
  375.  
  376.          continue;
  377.       }
  378.  
  379.       if (inst->dst.file == TOY_FILE_VRF) {
  380.          int num_dst;
  381.  
  382.          /* TODO this is a hack */
  383.          if (inst->opcode == BRW_OPCODE_SEND ||
  384.              inst->opcode == BRW_OPCODE_SENDC) {
  385.             const uint32_t mdesc = inst->src[1].val32;
  386.             int response_length = (mdesc >> 20) & 0x1f;
  387.  
  388.             num_dst = response_length;
  389.             if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16)
  390.                num_dst /= 2;
  391.          }
  392.          else {
  393.             num_dst = 1;
  394.          }
  395.  
  396.          vrf = inst->dst.val32 / TOY_REG_WIDTH;
  397.  
  398.          for (i = 0; i < num_dst; i++) {
  399.             /* first use */
  400.             if (!ls->intervals[vrf].vrf)
  401.                linear_scan_add_live_interval(ls, vrf, startpoint);
  402.  
  403.             ls->intervals[vrf].endpoint = endpoint;
  404.             ls->intervals[vrf].consecutive = (i > 0);
  405.  
  406.             vrf++;
  407.          }
  408.       }
  409.  
  410.       for (i = 0; i < Elements(inst->src); i++) {
  411.          if (inst->src[i].file != TOY_FILE_VRF)
  412.             continue;
  413.  
  414.          vrf = inst->src[i].val32 / TOY_REG_WIDTH;
  415.  
  416.          /* first use */
  417.          if (!ls->intervals[vrf].vrf)
  418.             linear_scan_add_live_interval(ls, vrf, startpoint);
  419.  
  420.          ls->intervals[vrf].endpoint = endpoint;
  421.       }
  422.  
  423.       pc++;
  424.    }
  425. }
  426.  
  427. /**
  428.  * Clean up after performing linear scan.
  429.  */
  430. static void
  431. linear_scan_cleanup(struct linear_scan *ls)
  432. {
  433.    FREE(ls->vrf_mapping);
  434.    FREE(ls->intervals);
  435.    FREE(ls->free_regs);
  436. }
  437.  
  438. static int
  439. linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
  440. {
  441.    const struct linear_scan_live_interval *interval1 = elem1;
  442.    const struct linear_scan_live_interval *interval2 = elem2;
  443.  
  444.    /* make unused elements appear at the end */
  445.    if (!interval1->vrf)
  446.       return 1;
  447.    else if (!interval2->vrf)
  448.       return -1;
  449.  
  450.    /* sort by startpoints first, and then by vrf */
  451.    if (interval1->startpoint != interval2->startpoint)
  452.       return (interval1->startpoint - interval2->startpoint);
  453.    else
  454.       return (interval1->vrf - interval2->vrf);
  455.  
  456. }
  457.  
  458. /**
  459.  * Prepare for linear scan.
  460.  */
  461. static bool
  462. linear_scan_init(struct linear_scan *ls, int num_regs,
  463.                  struct toy_compiler *tc)
  464. {
  465.    int num_intervals, i;
  466.  
  467.    memset(ls, 0, sizeof(*ls));
  468.  
  469.    /* this may be much larger than ls->num_vrfs... */
  470.    num_intervals = tc->next_vrf;
  471.    ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
  472.    if (!ls->intervals)
  473.       return false;
  474.  
  475.    linear_scan_init_live_intervals(ls, tc);
  476.    /* sort intervals by startpoints */
  477.    qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
  478.          linear_scan_compare_live_intervals);
  479.  
  480.    ls->num_regs = num_regs;
  481.    ls->num_free_regs = num_regs;
  482.  
  483.    ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
  484.    if (!ls->free_regs) {
  485.       FREE(ls->intervals);
  486.       return false;
  487.    }
  488.  
  489.    /* add in reverse order as we will allocate from the tail */
  490.    for (i = 0; i < ls->num_regs; i++)
  491.       ls->free_regs[i] = num_regs - i - 1;
  492.  
  493.    list_inithead(&ls->active_list);
  494.  
  495.    ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
  496.    if (!ls->vrf_mapping) {
  497.       FREE(ls->intervals);
  498.       FREE(ls->free_regs);
  499.       return false;
  500.    }
  501.  
  502.    return true;
  503. }
  504.  
  505. /**
  506.  * Allocate registers with linear scan.
  507.  */
  508. static void
  509. linear_scan_allocation(struct toy_compiler *tc,
  510.                        int start_grf, int end_grf,
  511.                        int num_grf_per_vrf)
  512. {
  513.    const int num_grfs = end_grf - start_grf + 1;
  514.    struct linear_scan ls;
  515.    struct toy_inst *inst;
  516.  
  517.    if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
  518.       return;
  519.  
  520.    if (!linear_scan_run(&ls)) {
  521.       tc_fail(tc, "failed to allocate registers");
  522.       return;
  523.    }
  524.  
  525.  
  526.    tc_head(tc);
  527.    while ((inst = tc_next(tc)) != NULL) {
  528.       int i;
  529.  
  530.       if (inst->dst.file == TOY_FILE_VRF) {
  531.          const uint32_t val32 = inst->dst.val32;
  532.          int reg = val32 / TOY_REG_WIDTH;
  533.          int subreg = val32 % TOY_REG_WIDTH;
  534.  
  535.          /* map to GRF */
  536.          reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
  537.  
  538.          inst->dst.file = TOY_FILE_GRF;
  539.          inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
  540.       }
  541.  
  542.       for (i = 0; i < Elements(inst->src); i++) {
  543.          const uint32_t val32 = inst->src[i].val32;
  544.          int reg, subreg;
  545.  
  546.          if (inst->src[i].file != TOY_FILE_VRF)
  547.             continue;
  548.  
  549.          reg = val32 / TOY_REG_WIDTH;
  550.          subreg = val32 % TOY_REG_WIDTH;
  551.  
  552.          /* map to GRF */
  553.          reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
  554.  
  555.          inst->src[i].file = TOY_FILE_GRF;
  556.          inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
  557.       }
  558.    }
  559.  
  560.    linear_scan_cleanup(&ls);
  561. }
  562.  
  563. /**
  564.  * Trivially allocate registers.
  565.  */
  566. static void
  567. trivial_allocation(struct toy_compiler *tc,
  568.                    int start_grf, int end_grf,
  569.                    int num_grf_per_vrf)
  570. {
  571.    struct toy_inst *inst;
  572.    int max_grf = -1;
  573.  
  574.    tc_head(tc);
  575.    while ((inst = tc_next(tc)) != NULL) {
  576.       int i;
  577.  
  578.       if (inst->dst.file == TOY_FILE_VRF) {
  579.          const uint32_t val32 = inst->dst.val32;
  580.          int reg = val32 / TOY_REG_WIDTH;
  581.          int subreg = val32 % TOY_REG_WIDTH;
  582.  
  583.          reg = reg * num_grf_per_vrf + start_grf - 1;
  584.  
  585.          inst->dst.file = TOY_FILE_GRF;
  586.          inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
  587.  
  588.          if (reg > max_grf)
  589.             max_grf = reg;
  590.       }
  591.  
  592.       for (i = 0; i < Elements(inst->src); i++) {
  593.          const uint32_t val32 = inst->src[i].val32;
  594.          int reg, subreg;
  595.  
  596.          if (inst->src[i].file != TOY_FILE_VRF)
  597.             continue;
  598.  
  599.          reg = val32 / TOY_REG_WIDTH;
  600.          subreg = val32 % TOY_REG_WIDTH;
  601.  
  602.          reg = reg * num_grf_per_vrf + start_grf - 1;
  603.  
  604.          inst->src[i].file = TOY_FILE_GRF;
  605.          inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
  606.  
  607.          if (reg > max_grf)
  608.             max_grf = reg;
  609.       }
  610.    }
  611.  
  612.    if (max_grf + num_grf_per_vrf - 1 > end_grf)
  613.       tc_fail(tc, "failed to allocate registers");
  614. }
  615.  
  616. /**
  617.  * Allocate GRF registers to VRF registers.
  618.  */
  619. void
  620. toy_compiler_allocate_registers(struct toy_compiler *tc,
  621.                                 int start_grf, int end_grf,
  622.                                 int num_grf_per_vrf)
  623. {
  624.    if (true)
  625.       linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
  626.    else
  627.       trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
  628. }
  629.