Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Mesa 3-D graphics library
3
 *
4
 * Copyright (C) 2012-2013 LunarG, Inc.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Chia-I Wu 
26
 */
27
 
28
#include  /* for qsort() */
29
#include "toy_compiler.h"
30
#include "toy_legalize.h"
31
 
32
/**
33
 * Live interval of a VRF register.
34
 */
35
struct linear_scan_live_interval {
36
   int vrf;
37
   int startpoint;
38
   int endpoint;
39
 
40
   /*
41
    * should this be assigned a consecutive register of the previous
42
    * interval's?
43
    */
44
   bool consecutive;
45
 
46
   int reg;
47
 
48
   struct list_head list;
49
};
50
 
51
/**
52
 * Linear scan.
53
 */
54
struct linear_scan {
55
   struct linear_scan_live_interval *intervals;
56
   int max_vrf, num_vrfs;
57
 
58
   int num_regs;
59
 
60
   struct list_head active_list;
61
   int *free_regs;
62
   int num_free_regs;
63
 
64
   int *vrf_mapping;
65
};
66
 
67
/**
68
 * Return a chunk of registers to the free register pool.
69
 */
70
static void
71
linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
72
{
73
   int i;
74
 
75
   for (i = 0; i < count; i++)
76
      ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
77
}
78
 
79
static int
80
linear_scan_compare_regs(const void *elem1, const void *elem2)
81
{
82
   const int *reg1 = elem1;
83
   const int *reg2 = elem2;
84
 
85
   /* in reverse order */
86
   return (*reg2 - *reg1);
87
}
88
 
89
/**
90
 * Allocate a chunk of registers from the free register pool.
91
 */
92
static int
93
linear_scan_allocate_regs(struct linear_scan *ls, int count)
94
{
95
   bool sorted = false;
96
   int reg;
97
 
98
   /* simple cases */
99
   if (count > ls->num_free_regs)
100
      return -1;
101
   else if (count == 1)
102
      return ls->free_regs[--ls->num_free_regs];
103
 
104
   /* TODO a free register pool */
105
   /* TODO reserve some regs for spilling */
106
   while (true) {
107
      bool found = false;
108
      int start;
109
 
110
      /*
111
       * find a chunk of registers that have consecutive register
112
       * numbers
113
       */
114
      for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
115
         int i;
116
 
117
         for (i = 1; i < count; i++) {
118
            if (ls->free_regs[start - i] != ls->free_regs[start] + i)
119
               break;
120
         }
121
 
122
         if (i >= count) {
123
            found = true;
124
            break;
125
         }
126
      }
127
 
128
      if (found) {
129
         reg = ls->free_regs[start];
130
 
131
         if (start != ls->num_free_regs - 1) {
132
            start++;
133
            memmove(&ls->free_regs[start - count],
134
                    &ls->free_regs[start],
135
                    sizeof(*ls->free_regs) * (ls->num_free_regs - start));
136
         }
137
         ls->num_free_regs -= count;
138
         break;
139
      }
140
      else if (!sorted) {
141
         /* sort and retry */
142
         qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
143
               linear_scan_compare_regs);
144
         sorted = true;
145
      }
146
      else {
147
         /* failed */
148
         reg = -1;
149
         break;
150
      }
151
   }
152
 
153
   return reg;
154
}
155
 
156
/**
157
 * Add an interval to the active list.
158
 */
159
static void
160
linear_scan_add_active(struct linear_scan *ls,
161
                       struct linear_scan_live_interval *interval)
162
{
163
   struct linear_scan_live_interval *pos;
164
 
165
   /* keep the active list sorted by endpoints */
166
   LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
167
      if (pos->endpoint >= interval->endpoint)
168
         break;
169
   }
170
 
171
   list_addtail(&interval->list, &pos->list);
172
}
173
 
174
/**
175
 * Remove an interval from the active list.
176
 */
177
static void
178
linear_scan_remove_active(struct linear_scan *ls,
179
                          struct linear_scan_live_interval *interval)
180
{
181
   list_del(&interval->list);
182
}
183
 
184
/**
185
 * Remove intervals that are no longer active from the active list.
186
 */
187
static void
188
linear_scan_expire_active(struct linear_scan *ls, int pc)
189
{
190
   struct linear_scan_live_interval *interval, *next;
191
 
192
   LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
193
      /*
194
       * since we sort intervals on the active list by their endpoints, we
195
       * know that this and the rest of the intervals are still active.
196
       */
197
      if (interval->endpoint >= pc)
198
         break;
199
 
200
      linear_scan_remove_active(ls, interval);
201
 
202
      /* recycle the reg */
203
      linear_scan_free_regs(ls, interval->reg, 1);
204
   }
205
}
206
 
207
/**
208
 * Spill an interval.
209
 */
210
static void
211
linear_scan_spill(struct linear_scan *ls,
212
                  struct linear_scan_live_interval *interval,
213
                  bool is_active)
214
{
215
   assert(!"no spilling support");
216
}
217
 
218
/**
219
 * Spill a range of intervals.
220
 */
221
static void
222
linear_scan_spill_range(struct linear_scan *ls, int first, int count)
223
{
224
   int i;
225
 
226
   for (i = 0; i < count; i++) {
227
      struct linear_scan_live_interval *interval = &ls->intervals[first + i];
228
 
229
      linear_scan_spill(ls, interval, false);
230
   }
231
}
232
 
233
/**
234
 * Perform linear scan to allocate registers for the intervals.
235
 */
236
static bool
237
linear_scan_run(struct linear_scan *ls)
238
{
239
   int i;
240
 
241
   i = 0;
242
   while (i < ls->num_vrfs) {
243
      struct linear_scan_live_interval *first = &ls->intervals[i];
244
      int reg, count;
245
 
246
      /*
247
       * GEN6_OPCODE_SEND may write to multiple consecutive registers and we need to
248
       * support that
249
       */
250
      for (count = 1; i + count < ls->num_vrfs; count++) {
251
         const struct linear_scan_live_interval *interval =
252
            &ls->intervals[i + count];
253
 
254
         if (interval->startpoint != first->startpoint ||
255
             !interval->consecutive)
256
            break;
257
      }
258
 
259
      reg = linear_scan_allocate_regs(ls, count);
260
 
261
      /* expire intervals that are no longer active and try again */
262
      if (reg < 0) {
263
         linear_scan_expire_active(ls, first->startpoint);
264
         reg = linear_scan_allocate_regs(ls, count);
265
      }
266
 
267
      /* have to spill some intervals */
268
      if (reg < 0) {
269
         struct linear_scan_live_interval *last_active =
270
            container_of(ls->active_list.prev,
271
                  (struct linear_scan_live_interval *) NULL, list);
272
 
273
         /* heuristically spill the interval that ends last */
274
         if (count > 1 || last_active->endpoint < first->endpoint) {
275
            linear_scan_spill_range(ls, i, count);
276
            i += count;
277
            continue;
278
         }
279
 
280
         /* make some room for the new interval */
281
         linear_scan_spill(ls, last_active, true);
282
         reg = linear_scan_allocate_regs(ls, count);
283
         if (reg < 0) {
284
            assert(!"failed to spill any register");
285
            return false;
286
         }
287
      }
288
 
289
      while (count--) {
290
         struct linear_scan_live_interval *interval = &ls->intervals[i++];
291
 
292
         interval->reg = reg++;
293
         linear_scan_add_active(ls, interval);
294
 
295
         ls->vrf_mapping[interval->vrf] = interval->reg;
296
 
297
         /*
298
          * this should and must be the case because of how we initialized the
299
          * intervals
300
          */
301
         assert(interval->vrf - first->vrf == interval->reg - first->reg);
302
      }
303
   }
304
 
305
   return true;
306
}
307
 
308
/**
309
 * Add a new interval.
310
 */
311
static void
312
linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
313
{
314
   if (ls->intervals[vrf].vrf)
315
      return;
316
 
317
   ls->intervals[vrf].vrf = vrf;
318
   ls->intervals[vrf].startpoint = pc;
319
 
320
   ls->num_vrfs++;
321
   if (vrf > ls->max_vrf)
322
      ls->max_vrf = vrf;
323
}
324
 
325
/**
326
 * Perform (oversimplified?) live variable analysis.
327
 */
328
static void
329
linear_scan_init_live_intervals(struct linear_scan *ls,
330
                                struct toy_compiler *tc)
331
{
332
   const struct toy_inst *inst;
333
   int pc, do_pc, while_pc;
334
 
335
   pc = 0;
336
   do_pc = -1;
337
   while_pc = -1;
338
 
339
   tc_head(tc);
340
   while ((inst = tc_next_no_skip(tc)) != NULL) {
341
      const int startpoint = (pc <= while_pc) ? do_pc : pc;
342
      const int endpoint = (pc <= while_pc) ? while_pc : pc;
343
      int vrf, i;
344
 
345
      /*
346
       * assume all registers used in this outermost loop are live through out
347
       * the whole loop
348
       */
349
      if (inst->marker) {
350
         if (pc > while_pc) {
351
            struct toy_inst *inst2;
352
            int loop_level = 1;
353
 
354
            assert(inst->opcode == TOY_OPCODE_DO);
355
            do_pc = pc;
356
            while_pc = pc + 1;
357
 
358
            /* find the matching GEN6_OPCODE_WHILE */
359
            LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
360
                  &tc->instructions, list) {
361
               if (inst2->marker) {
362
                  assert(inst->opcode == TOY_OPCODE_DO);
363
                  loop_level++;
364
                  continue;
365
               }
366
 
367
               if (inst2->opcode == GEN6_OPCODE_WHILE) {
368
                  loop_level--;
369
                  if (!loop_level)
370
                     break;
371
               }
372
               while_pc++;
373
            }
374
         }
375
 
376
         continue;
377
      }
378
 
379
      if (inst->dst.file == TOY_FILE_VRF) {
380
         int num_dst;
381
 
382
         /* TODO this is a hack */
383
         if (inst->opcode == GEN6_OPCODE_SEND ||
384
             inst->opcode == GEN6_OPCODE_SENDC) {
385
            const uint32_t mdesc = inst->src[1].val32;
386
            int response_length = (mdesc >> 20) & 0x1f;
387
 
388
            num_dst = response_length;
389
            if (num_dst > 1 && inst->exec_size == GEN6_EXECSIZE_16)
390
               num_dst /= 2;
391
         }
392
         else {
393
            num_dst = 1;
394
         }
395
 
396
         vrf = inst->dst.val32 / TOY_REG_WIDTH;
397
 
398
         for (i = 0; i < num_dst; i++) {
399
            /* first use */
400
            if (!ls->intervals[vrf].vrf)
401
               linear_scan_add_live_interval(ls, vrf, startpoint);
402
 
403
            ls->intervals[vrf].endpoint = endpoint;
404
            ls->intervals[vrf].consecutive = (i > 0);
405
 
406
            vrf++;
407
         }
408
      }
409
 
410
      for (i = 0; i < Elements(inst->src); i++) {
411
         if (inst->src[i].file != TOY_FILE_VRF)
412
            continue;
413
 
414
         vrf = inst->src[i].val32 / TOY_REG_WIDTH;
415
 
416
         /* first use */
417
         if (!ls->intervals[vrf].vrf)
418
            linear_scan_add_live_interval(ls, vrf, startpoint);
419
 
420
         ls->intervals[vrf].endpoint = endpoint;
421
      }
422
 
423
      pc++;
424
   }
425
}
426
 
427
/**
428
 * Clean up after performing linear scan.
429
 */
430
static void
431
linear_scan_cleanup(struct linear_scan *ls)
432
{
433
   FREE(ls->vrf_mapping);
434
   FREE(ls->intervals);
435
   FREE(ls->free_regs);
436
}
437
 
438
static int
439
linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
440
{
441
   const struct linear_scan_live_interval *interval1 = elem1;
442
   const struct linear_scan_live_interval *interval2 = elem2;
443
 
444
   /* make unused elements appear at the end */
445
   if (!interval1->vrf)
446
      return 1;
447
   else if (!interval2->vrf)
448
      return -1;
449
 
450
   /* sort by startpoints first, and then by vrf */
451
   if (interval1->startpoint != interval2->startpoint)
452
      return (interval1->startpoint - interval2->startpoint);
453
   else
454
      return (interval1->vrf - interval2->vrf);
455
 
456
}
457
 
458
/**
459
 * Prepare for linear scan.
460
 */
461
static bool
462
linear_scan_init(struct linear_scan *ls, int num_regs,
463
                 struct toy_compiler *tc)
464
{
465
   int num_intervals, i;
466
 
467
   memset(ls, 0, sizeof(*ls));
468
 
469
   /* this may be much larger than ls->num_vrfs... */
470
   num_intervals = tc->next_vrf;
471
   ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
472
   if (!ls->intervals)
473
      return false;
474
 
475
   linear_scan_init_live_intervals(ls, tc);
476
   /* sort intervals by startpoints */
477
   qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
478
         linear_scan_compare_live_intervals);
479
 
480
   ls->num_regs = num_regs;
481
   ls->num_free_regs = num_regs;
482
 
483
   ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
484
   if (!ls->free_regs) {
485
      FREE(ls->intervals);
486
      return false;
487
   }
488
 
489
   /* add in reverse order as we will allocate from the tail */
490
   for (i = 0; i < ls->num_regs; i++)
491
      ls->free_regs[i] = num_regs - i - 1;
492
 
493
   list_inithead(&ls->active_list);
494
 
495
   ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
496
   if (!ls->vrf_mapping) {
497
      FREE(ls->intervals);
498
      FREE(ls->free_regs);
499
      return false;
500
   }
501
 
502
   return true;
503
}
504
 
505
/**
506
 * Allocate registers with linear scan.
507
 */
508
static void
509
linear_scan_allocation(struct toy_compiler *tc,
510
                       int start_grf, int end_grf,
511
                       int num_grf_per_vrf)
512
{
513
   const int num_grfs = end_grf - start_grf + 1;
514
   struct linear_scan ls;
515
   struct toy_inst *inst;
516
 
517
   if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
518
      return;
519
 
520
   if (!linear_scan_run(&ls)) {
521
      tc_fail(tc, "failed to allocate registers");
522
      return;
523
   }
524
 
525
 
526
   tc_head(tc);
527
   while ((inst = tc_next(tc)) != NULL) {
528
      int i;
529
 
530
      if (inst->dst.file == TOY_FILE_VRF) {
531
         const uint32_t val32 = inst->dst.val32;
532
         int reg = val32 / TOY_REG_WIDTH;
533
         int subreg = val32 % TOY_REG_WIDTH;
534
 
535
         /* map to GRF */
536
         reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
537
 
538
         inst->dst.file = TOY_FILE_GRF;
539
         inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
540
      }
541
 
542
      for (i = 0; i < Elements(inst->src); i++) {
543
         const uint32_t val32 = inst->src[i].val32;
544
         int reg, subreg;
545
 
546
         if (inst->src[i].file != TOY_FILE_VRF)
547
            continue;
548
 
549
         reg = val32 / TOY_REG_WIDTH;
550
         subreg = val32 % TOY_REG_WIDTH;
551
 
552
         /* map to GRF */
553
         reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
554
 
555
         inst->src[i].file = TOY_FILE_GRF;
556
         inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
557
      }
558
   }
559
 
560
   linear_scan_cleanup(&ls);
561
}
562
 
563
/**
564
 * Trivially allocate registers.
565
 */
566
static void
567
trivial_allocation(struct toy_compiler *tc,
568
                   int start_grf, int end_grf,
569
                   int num_grf_per_vrf)
570
{
571
   struct toy_inst *inst;
572
   int max_grf = -1;
573
 
574
   tc_head(tc);
575
   while ((inst = tc_next(tc)) != NULL) {
576
      int i;
577
 
578
      if (inst->dst.file == TOY_FILE_VRF) {
579
         const uint32_t val32 = inst->dst.val32;
580
         int reg = val32 / TOY_REG_WIDTH;
581
         int subreg = val32 % TOY_REG_WIDTH;
582
 
583
         reg = reg * num_grf_per_vrf + start_grf - 1;
584
 
585
         inst->dst.file = TOY_FILE_GRF;
586
         inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
587
 
588
         if (reg > max_grf)
589
            max_grf = reg;
590
      }
591
 
592
      for (i = 0; i < Elements(inst->src); i++) {
593
         const uint32_t val32 = inst->src[i].val32;
594
         int reg, subreg;
595
 
596
         if (inst->src[i].file != TOY_FILE_VRF)
597
            continue;
598
 
599
         reg = val32 / TOY_REG_WIDTH;
600
         subreg = val32 % TOY_REG_WIDTH;
601
 
602
         reg = reg * num_grf_per_vrf + start_grf - 1;
603
 
604
         inst->src[i].file = TOY_FILE_GRF;
605
         inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
606
 
607
         if (reg > max_grf)
608
            max_grf = reg;
609
      }
610
   }
611
 
612
   if (max_grf + num_grf_per_vrf - 1 > end_grf)
613
      tc_fail(tc, "failed to allocate registers");
614
}
615
 
616
/**
617
 * Allocate GRF registers to VRF registers.
618
 */
619
void
620
toy_compiler_allocate_registers(struct toy_compiler *tc,
621
                                int start_grf, int end_grf,
622
                                int num_grf_per_vrf)
623
{
624
   if (true)
625
      linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
626
   else
627
      trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
628
}