Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/*
2
 * Copyright © 2012 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23
 
24
/** @file brw_eu_compact.c
25
 *
26
 * Instruction compaction is a feature of gm45 and newer hardware that allows
27
 * for a smaller instruction encoding.
28
 *
29
 * The instruction cache is on the order of 32KB, and many programs generate
30
 * far more instructions than that.  The instruction cache is built to barely
31
 * keep up with instruction dispatch abaility in cache hit cases -- L1
32
 * instruction cache misses that still hit in the next level could limit
33
 * throughput by around 50%.
34
 *
35
 * The idea of instruction compaction is that most instructions use a tiny
36
 * subset of the GPU functionality, so we can encode what would be a 16 byte
37
 * instruction in 8 bytes using some lookup tables for various fields.
38
 */
39
 
40
#include "brw_context.h"
41
#include "brw_eu.h"
42
 
43
static const uint32_t gen6_control_index_table[32] = {
44
   0b00000000000000000,
45
   0b01000000000000000,
46
   0b00110000000000000,
47
   0b00000000100000000,
48
   0b00010000000000000,
49
   0b00001000100000000,
50
   0b00000000100000010,
51
   0b00000000000000010,
52
   0b01000000100000000,
53
   0b01010000000000000,
54
   0b10110000000000000,
55
   0b00100000000000000,
56
   0b11010000000000000,
57
   0b11000000000000000,
58
   0b01001000100000000,
59
   0b01000000000001000,
60
   0b01000000000000100,
61
   0b00000000000001000,
62
   0b00000000000000100,
63
   0b00111000100000000,
64
   0b00001000100000010,
65
   0b00110000100000000,
66
   0b00110000000000001,
67
   0b00100000000000001,
68
   0b00110000000000010,
69
   0b00110000000000101,
70
   0b00110000000001001,
71
   0b00110000000010000,
72
   0b00110000000000011,
73
   0b00110000000000100,
74
   0b00110000100001000,
75
   0b00100000000001001
76
};
77
 
78
static const uint32_t gen6_datatype_table[32] = {
79
   0b001001110000000000,
80
   0b001000110000100000,
81
   0b001001110000000001,
82
   0b001000000001100000,
83
   0b001010110100101001,
84
   0b001000000110101101,
85
   0b001100011000101100,
86
   0b001011110110101101,
87
   0b001000000111101100,
88
   0b001000000001100001,
89
   0b001000110010100101,
90
   0b001000000001000001,
91
   0b001000001000110001,
92
   0b001000001000101001,
93
   0b001000000000100000,
94
   0b001000001000110010,
95
   0b001010010100101001,
96
   0b001011010010100101,
97
   0b001000000110100101,
98
   0b001100011000101001,
99
   0b001011011000101100,
100
   0b001011010110100101,
101
   0b001011110110100101,
102
   0b001111011110111101,
103
   0b001111011110111100,
104
   0b001111011110111101,
105
   0b001111011110011101,
106
   0b001111011110111110,
107
   0b001000000000100001,
108
   0b001000000000100010,
109
   0b001001111111011101,
110
   0b001000001110111110,
111
};
112
 
113
static const uint32_t gen6_subreg_table[32] = {
114
   0b000000000000000,
115
   0b000000000000100,
116
   0b000000110000000,
117
   0b111000000000000,
118
   0b011110000001000,
119
   0b000010000000000,
120
   0b000000000010000,
121
   0b000110000001100,
122
   0b001000000000000,
123
   0b000001000000000,
124
   0b000001010010100,
125
   0b000000001010110,
126
   0b010000000000000,
127
   0b110000000000000,
128
   0b000100000000000,
129
   0b000000010000000,
130
   0b000000000001000,
131
   0b100000000000000,
132
   0b000001010000000,
133
   0b001010000000000,
134
   0b001100000000000,
135
   0b000000001010100,
136
   0b101101010010100,
137
   0b010100000000000,
138
   0b000000010001111,
139
   0b011000000000000,
140
   0b111110000000000,
141
   0b101000000000000,
142
   0b000000000001111,
143
   0b000100010001111,
144
   0b001000010001111,
145
   0b000110000000000,
146
};
147
 
148
static const uint32_t gen6_src_index_table[32] = {
149
   0b000000000000,
150
   0b010110001000,
151
   0b010001101000,
152
   0b001000101000,
153
   0b011010010000,
154
   0b000100100000,
155
   0b010001101100,
156
   0b010101110000,
157
   0b011001111000,
158
   0b001100101000,
159
   0b010110001100,
160
   0b001000100000,
161
   0b010110001010,
162
   0b000000000010,
163
   0b010101010000,
164
   0b010101101000,
165
   0b111101001100,
166
   0b111100101100,
167
   0b011001110000,
168
   0b010110001001,
169
   0b010101011000,
170
   0b001101001000,
171
   0b010000101100,
172
   0b010000000000,
173
   0b001101110000,
174
   0b001100010000,
175
   0b001100000000,
176
   0b010001101010,
177
   0b001101111000,
178
   0b000001110000,
179
   0b001100100000,
180
   0b001101010000,
181
};
182
 
183
static const uint32_t gen7_control_index_table[32] = {
184
   0b0000000000000000010,
185
   0b0000100000000000000,
186
   0b0000100000000000001,
187
   0b0000100000000000010,
188
   0b0000100000000000011,
189
   0b0000100000000000100,
190
   0b0000100000000000101,
191
   0b0000100000000000111,
192
   0b0000100000000001000,
193
   0b0000100000000001001,
194
   0b0000100000000001101,
195
   0b0000110000000000000,
196
   0b0000110000000000001,
197
   0b0000110000000000010,
198
   0b0000110000000000011,
199
   0b0000110000000000100,
200
   0b0000110000000000101,
201
   0b0000110000000000111,
202
   0b0000110000000001001,
203
   0b0000110000000001101,
204
   0b0000110000000010000,
205
   0b0000110000100000000,
206
   0b0001000000000000000,
207
   0b0001000000000000010,
208
   0b0001000000000000100,
209
   0b0001000000100000000,
210
   0b0010110000000000000,
211
   0b0010110000000010000,
212
   0b0011000000000000000,
213
   0b0011000000100000000,
214
   0b0101000000000000000,
215
   0b0101000000100000000
216
};
217
 
218
static const uint32_t gen7_datatype_table[32] = {
219
   0b001000000000000001,
220
   0b001000000000100000,
221
   0b001000000000100001,
222
   0b001000000001100001,
223
   0b001000000010111101,
224
   0b001000001011111101,
225
   0b001000001110100001,
226
   0b001000001110100101,
227
   0b001000001110111101,
228
   0b001000010000100001,
229
   0b001000110000100000,
230
   0b001000110000100001,
231
   0b001001010010100101,
232
   0b001001110010100100,
233
   0b001001110010100101,
234
   0b001111001110111101,
235
   0b001111011110011101,
236
   0b001111011110111100,
237
   0b001111011110111101,
238
   0b001111111110111100,
239
   0b000000001000001100,
240
   0b001000000000111101,
241
   0b001000000010100101,
242
   0b001000010000100000,
243
   0b001001010010100100,
244
   0b001001110010000100,
245
   0b001010010100001001,
246
   0b001101111110111101,
247
   0b001111111110111101,
248
   0b001011110110101100,
249
   0b001010010100101000,
250
   0b001010110100101000
251
};
252
 
253
static const uint32_t gen7_subreg_table[32] = {
254
   0b000000000000000,
255
   0b000000000000001,
256
   0b000000000001000,
257
   0b000000000001111,
258
   0b000000000010000,
259
   0b000000010000000,
260
   0b000000100000000,
261
   0b000000110000000,
262
   0b000001000000000,
263
   0b000001000010000,
264
   0b000010100000000,
265
   0b001000000000000,
266
   0b001000000000001,
267
   0b001000010000001,
268
   0b001000010000010,
269
   0b001000010000011,
270
   0b001000010000100,
271
   0b001000010000111,
272
   0b001000010001000,
273
   0b001000010001110,
274
   0b001000010001111,
275
   0b001000110000000,
276
   0b001000111101000,
277
   0b010000000000000,
278
   0b010000110000000,
279
   0b011000000000000,
280
   0b011110010000111,
281
   0b100000000000000,
282
   0b101000000000000,
283
   0b110000000000000,
284
   0b111000000000000,
285
   0b111000000011100
286
};
287
 
288
static const uint32_t gen7_src_index_table[32] = {
289
   0b000000000000,
290
   0b000000000010,
291
   0b000000010000,
292
   0b000000010010,
293
   0b000000011000,
294
   0b000000100000,
295
   0b000000101000,
296
   0b000001001000,
297
   0b000001010000,
298
   0b000001110000,
299
   0b000001111000,
300
   0b001100000000,
301
   0b001100000010,
302
   0b001100001000,
303
   0b001100010000,
304
   0b001100010010,
305
   0b001100100000,
306
   0b001100101000,
307
   0b001100111000,
308
   0b001101000000,
309
   0b001101000010,
310
   0b001101001000,
311
   0b001101010000,
312
   0b001101100000,
313
   0b001101101000,
314
   0b001101110000,
315
   0b001101110001,
316
   0b001101111000,
317
   0b010001101000,
318
   0b010001101001,
319
   0b010001101010,
320
   0b010110001000
321
};
322
 
323
static const uint32_t *control_index_table;
324
static const uint32_t *datatype_table;
325
static const uint32_t *subreg_table;
326
static const uint32_t *src_index_table;
327
 
328
static bool
329
set_control_index(struct brw_context *brw,
330
                  struct brw_compact_instruction *dst,
331
                  struct brw_instruction *src)
332
{
333
   uint32_t *src_u32 = (uint32_t *)src;
334
   uint32_t uncompacted = 0;
335
 
336
   uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0;
337
   uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16;
338
   /* On gen7, the flag register number gets integrated into the control
339
    * index.
340
    */
341
   if (brw->gen >= 7)
342
      uncompacted |= ((src_u32[2] >> 25) & 0x3) << 17;
343
 
344
   for (int i = 0; i < 32; i++) {
345
      if (control_index_table[i] == uncompacted) {
346
	 dst->dw0.control_index = i;
347
	 return true;
348
      }
349
   }
350
 
351
   return false;
352
}
353
 
354
static bool
355
set_datatype_index(struct brw_compact_instruction *dst,
356
                   struct brw_instruction *src)
357
{
358
   uint32_t uncompacted = 0;
359
 
360
   uncompacted |= src->bits1.ud & 0x7fff;
361
   uncompacted |= (src->bits1.ud >> 29) << 15;
362
 
363
   for (int i = 0; i < 32; i++) {
364
      if (datatype_table[i] == uncompacted) {
365
	 dst->dw0.data_type_index = i;
366
	 return true;
367
      }
368
   }
369
 
370
   return false;
371
}
372
 
373
static bool
374
set_subreg_index(struct brw_compact_instruction *dst,
375
                 struct brw_instruction *src)
376
{
377
   uint32_t uncompacted = 0;
378
 
379
   uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
380
   uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
381
   uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
382
 
383
   for (int i = 0; i < 32; i++) {
384
      if (subreg_table[i] == uncompacted) {
385
	 dst->dw0.sub_reg_index = i;
386
	 return true;
387
      }
388
   }
389
 
390
   return false;
391
}
392
 
393
static bool
394
get_src_index(uint32_t uncompacted,
395
              uint32_t *compacted)
396
{
397
   for (int i = 0; i < 32; i++) {
398
      if (src_index_table[i] == uncompacted) {
399
	 *compacted = i;
400
	 return true;
401
      }
402
   }
403
 
404
   return false;
405
}
406
 
407
static bool
408
set_src0_index(struct brw_compact_instruction *dst,
409
               struct brw_instruction *src)
410
{
411
   uint32_t compacted, uncompacted = 0;
412
 
413
   uncompacted |= (src->bits2.ud >> 13) & 0xfff;
414
 
415
   if (!get_src_index(uncompacted, &compacted))
416
      return false;
417
 
418
   dst->dw0.src0_index = compacted & 0x3;
419
   dst->dw1.src0_index = compacted >> 2;
420
 
421
   return true;
422
}
423
 
424
static bool
425
set_src1_index(struct brw_compact_instruction *dst,
426
               struct brw_instruction *src)
427
{
428
   uint32_t compacted, uncompacted = 0;
429
 
430
   uncompacted |= (src->bits3.ud >> 13) & 0xfff;
431
 
432
   if (!get_src_index(uncompacted, &compacted))
433
      return false;
434
 
435
   dst->dw1.src1_index = compacted;
436
 
437
   return true;
438
}
439
 
440
/**
441
 * Tries to compact instruction src into dst.
442
 *
443
 * It doesn't modify dst unless src is compactable, which is relied on by
444
 * brw_compact_instructions().
445
 */
446
bool
447
brw_try_compact_instruction(struct brw_compile *p,
448
                            struct brw_compact_instruction *dst,
449
                            struct brw_instruction *src)
450
{
451
   struct brw_context *brw = p->brw;
452
   struct brw_compact_instruction temp;
453
 
454
   if (src->header.opcode == BRW_OPCODE_IF ||
455
       src->header.opcode == BRW_OPCODE_ELSE ||
456
       src->header.opcode == BRW_OPCODE_ENDIF ||
457
       src->header.opcode == BRW_OPCODE_HALT ||
458
       src->header.opcode == BRW_OPCODE_DO ||
459
       src->header.opcode == BRW_OPCODE_WHILE) {
460
      /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
461
       * to be able to handle compacted flow control instructions..
462
       */
463
      return false;
464
   }
465
 
466
   /* FINISHME: immediates */
467
   if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
468
       src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
469
      return false;
470
 
471
   memset(&temp, 0, sizeof(temp));
472
 
473
   temp.dw0.opcode = src->header.opcode;
474
   temp.dw0.debug_control = src->header.debug_control;
475
   if (!set_control_index(brw, &temp, src))
476
      return false;
477
   if (!set_datatype_index(&temp, src))
478
      return false;
479
   if (!set_subreg_index(&temp, src))
480
      return false;
481
   temp.dw0.acc_wr_control = src->header.acc_wr_control;
482
   temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
483
   if (brw->gen <= 6)
484
      temp.dw0.flag_subreg_nr = src->bits2.da1.flag_subreg_nr;
485
   temp.dw0.cmpt_ctrl = 1;
486
   if (!set_src0_index(&temp, src))
487
      return false;
488
   if (!set_src1_index(&temp, src))
489
      return false;
490
   temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
491
   temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
492
   temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
493
 
494
   *dst = temp;
495
 
496
   return true;
497
}
498
 
499
static void
500
set_uncompacted_control(struct brw_context *brw,
501
                        struct brw_instruction *dst,
502
                        struct brw_compact_instruction *src)
503
{
504
   uint32_t *dst_u32 = (uint32_t *)dst;
505
   uint32_t uncompacted = control_index_table[src->dw0.control_index];
506
 
507
   dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8;
508
   dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31;
509
 
510
   if (brw->gen >= 7)
511
      dst_u32[2] |= ((uncompacted >> 17) & 0x3) << 25;
512
}
513
 
514
static void
515
set_uncompacted_datatype(struct brw_instruction *dst,
516
                         struct brw_compact_instruction *src)
517
{
518
   uint32_t uncompacted = datatype_table[src->dw0.data_type_index];
519
 
520
   dst->bits1.ud &= ~(0x7 << 29);
521
   dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29;
522
   dst->bits1.ud &= ~0x7fff;
523
   dst->bits1.ud |= uncompacted & 0x7fff;
524
}
525
 
526
static void
527
set_uncompacted_subreg(struct brw_instruction *dst,
528
                       struct brw_compact_instruction *src)
529
{
530
   uint32_t uncompacted = subreg_table[src->dw0.sub_reg_index];
531
 
532
   dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0)  & 0x1f;
533
   dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5)  & 0x1f;
534
   dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f;
535
}
536
 
537
static void
538
set_uncompacted_src0(struct brw_instruction *dst,
539
                     struct brw_compact_instruction *src)
540
{
541
   uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2;
542
   uint32_t uncompacted = src_index_table[compacted];
543
 
544
   dst->bits2.ud |= uncompacted << 13;
545
}
546
 
547
static void
548
set_uncompacted_src1(struct brw_instruction *dst,
549
                     struct brw_compact_instruction *src)
550
{
551
   uint32_t uncompacted = src_index_table[src->dw1.src1_index];
552
 
553
   dst->bits3.ud |= uncompacted << 13;
554
}
555
 
556
void
557
brw_uncompact_instruction(struct brw_context *brw,
558
                          struct brw_instruction *dst,
559
                          struct brw_compact_instruction *src)
560
{
561
   memset(dst, 0, sizeof(*dst));
562
 
563
   dst->header.opcode = src->dw0.opcode;
564
   dst->header.debug_control = src->dw0.debug_control;
565
 
566
   set_uncompacted_control(brw, dst, src);
567
   set_uncompacted_datatype(dst, src);
568
   set_uncompacted_subreg(dst, src);
569
   dst->header.acc_wr_control = src->dw0.acc_wr_control;
570
   dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
571
   if (brw->gen <= 6)
572
      dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr;
573
   set_uncompacted_src0(dst, src);
574
   set_uncompacted_src1(dst, src);
575
   dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
576
   dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
577
   dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
578
}
579
 
580
void brw_debug_compact_uncompact(struct brw_context *brw,
581
                                 struct brw_instruction *orig,
582
                                 struct brw_instruction *uncompacted)
583
{
584
   fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
585
           brw->gen);
586
 
587
   fprintf(stderr, "  before: ");
588
   brw_disasm(stderr, orig, brw->gen);
589
 
590
   fprintf(stderr, "  after:  ");
591
   brw_disasm(stderr, uncompacted, brw->gen);
592
 
593
   uint32_t *before_bits = (uint32_t *)orig;
594
   uint32_t *after_bits = (uint32_t *)uncompacted;
595
   printf("  changed bits:\n");
596
   for (int i = 0; i < 128; i++) {
597
      uint32_t before = before_bits[i / 32] & (1 << (i & 31));
598
      uint32_t after = after_bits[i / 32] & (1 << (i & 31));
599
 
600
      if (before != after) {
601
         printf("  bit %d, %s to %s\n", i,
602
                before ? "set" : "unset",
603
                after ? "set" : "unset");
604
      }
605
   }
606
}
607
 
608
static int
609
compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
610
{
611
   int this_compacted_count = compacted_counts[old_ip];
612
   int target_compacted_count = compacted_counts[old_target_ip];
613
   return target_compacted_count - this_compacted_count;
614
}
615
 
616
static void
617
update_uip_jip(struct brw_instruction *insn, int this_old_ip,
618
               int *compacted_counts)
619
{
620
   int target_old_ip;
621
 
622
   target_old_ip = this_old_ip + insn->bits3.break_cont.jip;
623
   insn->bits3.break_cont.jip -= compacted_between(this_old_ip,
624
                                                   target_old_ip,
625
                                                   compacted_counts);
626
 
627
   target_old_ip = this_old_ip + insn->bits3.break_cont.uip;
628
   insn->bits3.break_cont.uip -= compacted_between(this_old_ip,
629
                                                   target_old_ip,
630
                                                   compacted_counts);
631
}
632
 
633
void
634
brw_init_compaction_tables(struct brw_context *brw)
635
{
636
   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
637
   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
638
   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
639
   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
640
   assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
641
   assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
642
   assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
643
   assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
644
 
645
   switch (brw->gen) {
646
   case 7:
647
      control_index_table = gen7_control_index_table;
648
      datatype_table = gen7_datatype_table;
649
      subreg_table = gen7_subreg_table;
650
      src_index_table = gen7_src_index_table;
651
      break;
652
   case 6:
653
      control_index_table = gen6_control_index_table;
654
      datatype_table = gen6_datatype_table;
655
      subreg_table = gen6_subreg_table;
656
      src_index_table = gen6_src_index_table;
657
      break;
658
   default:
659
      return;
660
   }
661
}
662
 
663
void
664
brw_compact_instructions(struct brw_compile *p)
665
{
666
   struct brw_context *brw = p->brw;
667
   void *store = p->store;
668
   /* For an instruction at byte offset 8*i before compaction, this is the number
669
    * of compacted instructions that preceded it.
670
    */
671
   int compacted_counts[p->next_insn_offset / 8];
672
   /* For an instruction at byte offset 8*i after compaction, this is the
673
    * 8-byte offset it was at before compaction.
674
    */
675
   int old_ip[p->next_insn_offset / 8];
676
 
677
   if (brw->gen < 6)
678
      return;
679
 
680
   int src_offset;
681
   int offset = 0;
682
   int compacted_count = 0;
683
   for (src_offset = 0; src_offset < p->nr_insn * 16;) {
684
      struct brw_instruction *src = store + src_offset;
685
      void *dst = store + offset;
686
 
687
      old_ip[offset / 8] = src_offset / 8;
688
      compacted_counts[src_offset / 8] = compacted_count;
689
 
690
      struct brw_instruction saved = *src;
691
 
692
      if (!src->header.cmpt_control &&
693
          brw_try_compact_instruction(p, dst, src)) {
694
         compacted_count++;
695
 
696
         if (INTEL_DEBUG) {
697
            struct brw_instruction uncompacted;
698
            brw_uncompact_instruction(brw, &uncompacted, dst);
699
            if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
700
               brw_debug_compact_uncompact(brw, &saved, &uncompacted);
701
            }
702
         }
703
 
704
         offset += 8;
705
         src_offset += 16;
706
      } else {
707
         int size = src->header.cmpt_control ? 8 : 16;
708
 
709
         /* It appears that the end of thread SEND instruction needs to be
710
          * aligned, or the GPU hangs.
711
          */
712
         if ((src->header.opcode == BRW_OPCODE_SEND ||
713
              src->header.opcode == BRW_OPCODE_SENDC) &&
714
             src->bits3.generic.end_of_thread &&
715
             (offset & 8) != 0) {
716
            struct brw_compact_instruction *align = store + offset;
717
            memset(align, 0, sizeof(*align));
718
            align->dw0.opcode = BRW_OPCODE_NOP;
719
            align->dw0.cmpt_ctrl = 1;
720
            offset += 8;
721
            old_ip[offset / 8] = src_offset / 8;
722
            dst = store + offset;
723
         }
724
 
725
         /* If we didn't compact this intruction, we need to move it down into
726
          * place.
727
          */
728
         if (offset != src_offset) {
729
            memmove(dst, src, size);
730
         }
731
         offset += size;
732
         src_offset += size;
733
      }
734
   }
735
 
736
   /* Fix up control flow offsets. */
737
   p->next_insn_offset = offset;
738
   for (offset = 0; offset < p->next_insn_offset;) {
739
      struct brw_instruction *insn = store + offset;
740
      int this_old_ip = old_ip[offset / 8];
741
      int this_compacted_count = compacted_counts[this_old_ip];
742
      int target_old_ip, target_compacted_count;
743
 
744
      switch (insn->header.opcode) {
745
      case BRW_OPCODE_BREAK:
746
      case BRW_OPCODE_CONTINUE:
747
      case BRW_OPCODE_HALT:
748
         update_uip_jip(insn, this_old_ip, compacted_counts);
749
         break;
750
 
751
      case BRW_OPCODE_IF:
752
      case BRW_OPCODE_ELSE:
753
      case BRW_OPCODE_ENDIF:
754
      case BRW_OPCODE_WHILE:
755
         if (brw->gen == 6) {
756
            target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count;
757
            target_compacted_count = compacted_counts[target_old_ip];
758
            insn->bits1.branch_gen6.jump_count -= (target_compacted_count -
759
                                                   this_compacted_count);
760
         } else {
761
            update_uip_jip(insn, this_old_ip, compacted_counts);
762
         }
763
         break;
764
      }
765
 
766
      if (insn->header.cmpt_control) {
767
         offset += 8;
768
      } else {
769
         offset += 16;
770
      }
771
   }
772
 
773
   /* p->nr_insn is counting the number of uncompacted instructions still, so
774
    * divide.  We do want to be sure there's a valid instruction in any
775
    * alignment padding, so that the next compression pass (for the FS 8/16
776
    * compile passes) parses correctly.
777
    */
778
   if (p->next_insn_offset & 8) {
779
      struct brw_compact_instruction *align = store + offset;
780
      memset(align, 0, sizeof(*align));
781
      align->dw0.opcode = BRW_OPCODE_NOP;
782
      align->dw0.cmpt_ctrl = 1;
783
      p->next_insn_offset += 8;
784
   }
785
   p->nr_insn = p->next_insn_offset / 16;
786
 
787
   if (0) {
788
      fprintf(stdout, "dumping compacted program\n");
789
      brw_dump_compile(p, stdout, 0, p->next_insn_offset);
790
 
791
      int cmp = 0;
792
      for (offset = 0; offset < p->next_insn_offset;) {
793
         struct brw_instruction *insn = store + offset;
794
 
795
         if (insn->header.cmpt_control) {
796
            offset += 8;
797
            cmp++;
798
         } else {
799
            offset += 16;
800
         }
801
      }
802
      fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
803
              cmp * 8 * 100 / (offset + cmp * 8));
804
   }
805
}