Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright © 2012 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23
 
24
/** @file brw_eu_compact.c
25
 *
26
 * Instruction compaction is a feature of G45 and newer hardware that allows
27
 * for a smaller instruction encoding.
28
 *
29
 * The instruction cache is on the order of 32KB, and many programs generate
30
 * far more instructions than that.  The instruction cache is built to barely
31
 * keep up with instruction dispatch ability in cache hit cases -- L1
32
 * instruction cache misses that still hit in the next level could limit
33
 * throughput by around 50%.
34
 *
35
 * The idea of instruction compaction is that most instructions use a tiny
36
 * subset of the GPU functionality, so we can encode what would be a 16 byte
37
 * instruction in 8 bytes using some lookup tables for various fields.
38
 *
39
 *
40
 * Instruction compaction capabilities vary subtly by generation.
41
 *
42
 * G45's support for instruction compaction is very limited. Jump counts on
43
 * this generation are in units of 16-byte uncompacted instructions. As such,
44
 * all jump targets must be 16-byte aligned. Also, all instructions must be
45
 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46
 * A G45-only instruction, NENOP, must be used to provide padding to align
47
 * uncompacted instructions.
48
 *
49
 * Gen5 removes these restrictions and changes jump counts to be in units of
50
 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51
 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52
 *
53
 * Gen6 adds the ability to compact instructions with a limited range of
54
 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55
 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56
 * value of DW3 in the uncompacted instruction word.
57
 *
58
 * On Gen7 we can compact some control flow instructions with a small positive
59
 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60
 * control flow instructions with UIP cannot be compacted, because of the
61
 * replicated 13th bit. No control flow instructions can be compacted on Gen6
62
 * since the jump count field is not in DW3.
63
 *
64
 *    break    JIP/UIP
65
 *    cont     JIP/UIP
66
 *    halt     JIP/UIP
67
 *    if       JIP/UIP
68
 *    else     JIP (plus UIP on BDW+)
69
 *    endif    JIP
70
 *    while    JIP (must be negative)
71
 *
72
 * Gen 8 adds support for compacting 3-src instructions.
73
 */
74
 
75
#include "brw_context.h"
76
#include "brw_eu.h"
77
#include "intel_asm_annotation.h"
78
#include "util/u_atomic.h" /* for p_atomic_cmpxchg */
79
 
80
static const uint32_t g45_control_index_table[32] = {
81
   0b00000000000000000,
82
   0b01000000000000000,
83
   0b00110000000000000,
84
   0b00000000000000010,
85
   0b00100000000000000,
86
   0b00010000000000000,
87
   0b01000000000100000,
88
   0b01000000100000000,
89
   0b01010000000100000,
90
   0b00000000100000010,
91
   0b11000000000000000,
92
   0b00001000100000010,
93
   0b01001000100000000,
94
   0b00000000100000000,
95
   0b11000000000100000,
96
   0b00001000100000000,
97
   0b10110000000000000,
98
   0b11010000000100000,
99
   0b00110000100000000,
100
   0b00100000100000000,
101
   0b01000000000001000,
102
   0b01000000000000100,
103
   0b00111100000000000,
104
   0b00101011000000000,
105
   0b00110000000010000,
106
   0b00010000100000000,
107
   0b01000000000100100,
108
   0b01000000000101000,
109
   0b00110000000000110,
110
   0b00000000000001010,
111
   0b01010000000101000,
112
   0b01010000000100100
113
};
114
 
115
static const uint32_t g45_datatype_table[32] = {
116
   0b001000000000100001,
117
   0b001011010110101101,
118
   0b001000001000110001,
119
   0b001111011110111101,
120
   0b001011010110101100,
121
   0b001000000110101101,
122
   0b001000000000100000,
123
   0b010100010110110001,
124
   0b001100011000101101,
125
   0b001000000000100010,
126
   0b001000001000110110,
127
   0b010000001000110001,
128
   0b001000001000110010,
129
   0b011000001000110010,
130
   0b001111011110111100,
131
   0b001000000100101000,
132
   0b010100011000110001,
133
   0b001010010100101001,
134
   0b001000001000101001,
135
   0b010000001000110110,
136
   0b101000001000110001,
137
   0b001011011000101101,
138
   0b001000000100001001,
139
   0b001011011000101100,
140
   0b110100011000110001,
141
   0b001000001110111101,
142
   0b110000001000110001,
143
   0b011000000100101010,
144
   0b101000001000101001,
145
   0b001011010110001100,
146
   0b001000000110100001,
147
   0b001010010100001000
148
};
149
 
150
static const uint16_t g45_subreg_table[32] = {
151
   0b000000000000000,
152
   0b000000010000000,
153
   0b000001000000000,
154
   0b000100000000000,
155
   0b000000000100000,
156
   0b100000000000000,
157
   0b000000000010000,
158
   0b001100000000000,
159
   0b001010000000000,
160
   0b000000100000000,
161
   0b001000000000000,
162
   0b000000000001000,
163
   0b000000001000000,
164
   0b000000000000001,
165
   0b000010000000000,
166
   0b000000010100000,
167
   0b000000000000111,
168
   0b000001000100000,
169
   0b011000000000000,
170
   0b000000110000000,
171
   0b000000000000010,
172
   0b000000000000100,
173
   0b000000001100000,
174
   0b000100000000010,
175
   0b001110011000110,
176
   0b001110100001000,
177
   0b000110011000110,
178
   0b000001000011000,
179
   0b000110010000100,
180
   0b001100000000110,
181
   0b000000010000110,
182
   0b000001000110000
183
};
184
 
185
static const uint16_t g45_src_index_table[32] = {
186
   0b000000000000,
187
   0b010001101000,
188
   0b010110001000,
189
   0b011010010000,
190
   0b001101001000,
191
   0b010110001010,
192
   0b010101110000,
193
   0b011001111000,
194
   0b001000101000,
195
   0b000000101000,
196
   0b010001010000,
197
   0b111101101100,
198
   0b010110001100,
199
   0b010001101100,
200
   0b011010010100,
201
   0b010001001100,
202
   0b001100101000,
203
   0b000000000010,
204
   0b111101001100,
205
   0b011001101000,
206
   0b010101001000,
207
   0b000000000100,
208
   0b000000101100,
209
   0b010001101010,
210
   0b000000111000,
211
   0b010101011000,
212
   0b000100100000,
213
   0b010110000000,
214
   0b010000000100,
215
   0b010000111000,
216
   0b000101100000,
217
   0b111101110100
218
};
219
 
220
static const uint32_t gen6_control_index_table[32] = {
221
   0b00000000000000000,
222
   0b01000000000000000,
223
   0b00110000000000000,
224
   0b00000000100000000,
225
   0b00010000000000000,
226
   0b00001000100000000,
227
   0b00000000100000010,
228
   0b00000000000000010,
229
   0b01000000100000000,
230
   0b01010000000000000,
231
   0b10110000000000000,
232
   0b00100000000000000,
233
   0b11010000000000000,
234
   0b11000000000000000,
235
   0b01001000100000000,
236
   0b01000000000001000,
237
   0b01000000000000100,
238
   0b00000000000001000,
239
   0b00000000000000100,
240
   0b00111000100000000,
241
   0b00001000100000010,
242
   0b00110000100000000,
243
   0b00110000000000001,
244
   0b00100000000000001,
245
   0b00110000000000010,
246
   0b00110000000000101,
247
   0b00110000000001001,
248
   0b00110000000010000,
249
   0b00110000000000011,
250
   0b00110000000000100,
251
   0b00110000100001000,
252
   0b00100000000001001
253
};
254
 
255
static const uint32_t gen6_datatype_table[32] = {
256
   0b001001110000000000,
257
   0b001000110000100000,
258
   0b001001110000000001,
259
   0b001000000001100000,
260
   0b001010110100101001,
261
   0b001000000110101101,
262
   0b001100011000101100,
263
   0b001011110110101101,
264
   0b001000000111101100,
265
   0b001000000001100001,
266
   0b001000110010100101,
267
   0b001000000001000001,
268
   0b001000001000110001,
269
   0b001000001000101001,
270
   0b001000000000100000,
271
   0b001000001000110010,
272
   0b001010010100101001,
273
   0b001011010010100101,
274
   0b001000000110100101,
275
   0b001100011000101001,
276
   0b001011011000101100,
277
   0b001011010110100101,
278
   0b001011110110100101,
279
   0b001111011110111101,
280
   0b001111011110111100,
281
   0b001111011110111101,
282
   0b001111011110011101,
283
   0b001111011110111110,
284
   0b001000000000100001,
285
   0b001000000000100010,
286
   0b001001111111011101,
287
   0b001000001110111110,
288
};
289
 
290
static const uint16_t gen6_subreg_table[32] = {
291
   0b000000000000000,
292
   0b000000000000100,
293
   0b000000110000000,
294
   0b111000000000000,
295
   0b011110000001000,
296
   0b000010000000000,
297
   0b000000000010000,
298
   0b000110000001100,
299
   0b001000000000000,
300
   0b000001000000000,
301
   0b000001010010100,
302
   0b000000001010110,
303
   0b010000000000000,
304
   0b110000000000000,
305
   0b000100000000000,
306
   0b000000010000000,
307
   0b000000000001000,
308
   0b100000000000000,
309
   0b000001010000000,
310
   0b001010000000000,
311
   0b001100000000000,
312
   0b000000001010100,
313
   0b101101010010100,
314
   0b010100000000000,
315
   0b000000010001111,
316
   0b011000000000000,
317
   0b111110000000000,
318
   0b101000000000000,
319
   0b000000000001111,
320
   0b000100010001111,
321
   0b001000010001111,
322
   0b000110000000000,
323
};
324
 
325
static const uint16_t gen6_src_index_table[32] = {
326
   0b000000000000,
327
   0b010110001000,
328
   0b010001101000,
329
   0b001000101000,
330
   0b011010010000,
331
   0b000100100000,
332
   0b010001101100,
333
   0b010101110000,
334
   0b011001111000,
335
   0b001100101000,
336
   0b010110001100,
337
   0b001000100000,
338
   0b010110001010,
339
   0b000000000010,
340
   0b010101010000,
341
   0b010101101000,
342
   0b111101001100,
343
   0b111100101100,
344
   0b011001110000,
345
   0b010110001001,
346
   0b010101011000,
347
   0b001101001000,
348
   0b010000101100,
349
   0b010000000000,
350
   0b001101110000,
351
   0b001100010000,
352
   0b001100000000,
353
   0b010001101010,
354
   0b001101111000,
355
   0b000001110000,
356
   0b001100100000,
357
   0b001101010000,
358
};
359
 
360
static const uint32_t gen7_control_index_table[32] = {
361
   0b0000000000000000010,
362
   0b0000100000000000000,
363
   0b0000100000000000001,
364
   0b0000100000000000010,
365
   0b0000100000000000011,
366
   0b0000100000000000100,
367
   0b0000100000000000101,
368
   0b0000100000000000111,
369
   0b0000100000000001000,
370
   0b0000100000000001001,
371
   0b0000100000000001101,
372
   0b0000110000000000000,
373
   0b0000110000000000001,
374
   0b0000110000000000010,
375
   0b0000110000000000011,
376
   0b0000110000000000100,
377
   0b0000110000000000101,
378
   0b0000110000000000111,
379
   0b0000110000000001001,
380
   0b0000110000000001101,
381
   0b0000110000000010000,
382
   0b0000110000100000000,
383
   0b0001000000000000000,
384
   0b0001000000000000010,
385
   0b0001000000000000100,
386
   0b0001000000100000000,
387
   0b0010110000000000000,
388
   0b0010110000000010000,
389
   0b0011000000000000000,
390
   0b0011000000100000000,
391
   0b0101000000000000000,
392
   0b0101000000100000000
393
};
394
 
395
static const uint32_t gen7_datatype_table[32] = {
396
   0b001000000000000001,
397
   0b001000000000100000,
398
   0b001000000000100001,
399
   0b001000000001100001,
400
   0b001000000010111101,
401
   0b001000001011111101,
402
   0b001000001110100001,
403
   0b001000001110100101,
404
   0b001000001110111101,
405
   0b001000010000100001,
406
   0b001000110000100000,
407
   0b001000110000100001,
408
   0b001001010010100101,
409
   0b001001110010100100,
410
   0b001001110010100101,
411
   0b001111001110111101,
412
   0b001111011110011101,
413
   0b001111011110111100,
414
   0b001111011110111101,
415
   0b001111111110111100,
416
   0b000000001000001100,
417
   0b001000000000111101,
418
   0b001000000010100101,
419
   0b001000010000100000,
420
   0b001001010010100100,
421
   0b001001110010000100,
422
   0b001010010100001001,
423
   0b001101111110111101,
424
   0b001111111110111101,
425
   0b001011110110101100,
426
   0b001010010100101000,
427
   0b001010110100101000
428
};
429
 
430
static const uint16_t gen7_subreg_table[32] = {
431
   0b000000000000000,
432
   0b000000000000001,
433
   0b000000000001000,
434
   0b000000000001111,
435
   0b000000000010000,
436
   0b000000010000000,
437
   0b000000100000000,
438
   0b000000110000000,
439
   0b000001000000000,
440
   0b000001000010000,
441
   0b000010100000000,
442
   0b001000000000000,
443
   0b001000000000001,
444
   0b001000010000001,
445
   0b001000010000010,
446
   0b001000010000011,
447
   0b001000010000100,
448
   0b001000010000111,
449
   0b001000010001000,
450
   0b001000010001110,
451
   0b001000010001111,
452
   0b001000110000000,
453
   0b001000111101000,
454
   0b010000000000000,
455
   0b010000110000000,
456
   0b011000000000000,
457
   0b011110010000111,
458
   0b100000000000000,
459
   0b101000000000000,
460
   0b110000000000000,
461
   0b111000000000000,
462
   0b111000000011100
463
};
464
 
465
static const uint16_t gen7_src_index_table[32] = {
466
   0b000000000000,
467
   0b000000000010,
468
   0b000000010000,
469
   0b000000010010,
470
   0b000000011000,
471
   0b000000100000,
472
   0b000000101000,
473
   0b000001001000,
474
   0b000001010000,
475
   0b000001110000,
476
   0b000001111000,
477
   0b001100000000,
478
   0b001100000010,
479
   0b001100001000,
480
   0b001100010000,
481
   0b001100010010,
482
   0b001100100000,
483
   0b001100101000,
484
   0b001100111000,
485
   0b001101000000,
486
   0b001101000010,
487
   0b001101001000,
488
   0b001101010000,
489
   0b001101100000,
490
   0b001101101000,
491
   0b001101110000,
492
   0b001101110001,
493
   0b001101111000,
494
   0b010001101000,
495
   0b010001101001,
496
   0b010001101010,
497
   0b010110001000
498
};
499
 
500
static const uint32_t gen8_control_index_table[32] = {
501
   0b0000000000000000010,
502
   0b0000100000000000000,
503
   0b0000100000000000001,
504
   0b0000100000000000010,
505
   0b0000100000000000011,
506
   0b0000100000000000100,
507
   0b0000100000000000101,
508
   0b0000100000000000111,
509
   0b0000100000000001000,
510
   0b0000100000000001001,
511
   0b0000100000000001101,
512
   0b0000110000000000000,
513
   0b0000110000000000001,
514
   0b0000110000000000010,
515
   0b0000110000000000011,
516
   0b0000110000000000100,
517
   0b0000110000000000101,
518
   0b0000110000000000111,
519
   0b0000110000000001001,
520
   0b0000110000000001101,
521
   0b0000110000000010000,
522
   0b0000110000100000000,
523
   0b0001000000000000000,
524
   0b0001000000000000010,
525
   0b0001000000000000100,
526
   0b0001000000100000000,
527
   0b0010110000000000000,
528
   0b0010110000000010000,
529
   0b0011000000000000000,
530
   0b0011000000100000000,
531
   0b0101000000000000000,
532
   0b0101000000100000000
533
};
534
 
535
static const uint32_t gen8_datatype_table[32] = {
536
   0b001000000000000000001,
537
   0b001000000000001000000,
538
   0b001000000000001000001,
539
   0b001000000000011000001,
540
   0b001000000000101011101,
541
   0b001000000010111011101,
542
   0b001000000011101000001,
543
   0b001000000011101000101,
544
   0b001000000011101011101,
545
   0b001000001000001000001,
546
   0b001000011000001000000,
547
   0b001000011000001000001,
548
   0b001000101000101000101,
549
   0b001000111000101000100,
550
   0b001000111000101000101,
551
   0b001011100011101011101,
552
   0b001011101011100011101,
553
   0b001011101011101011100,
554
   0b001011101011101011101,
555
   0b001011111011101011100,
556
   0b000000000010000001100,
557
   0b001000000000001011101,
558
   0b001000000000101000101,
559
   0b001000001000001000000,
560
   0b001000101000101000100,
561
   0b001000111000100000100,
562
   0b001001001001000001001,
563
   0b001010111011101011101,
564
   0b001011111011101011101,
565
   0b001001111001101001100,
566
   0b001001001001001001000,
567
   0b001001011001001001000
568
};
569
 
570
static const uint16_t gen8_subreg_table[32] = {
571
   0b000000000000000,
572
   0b000000000000001,
573
   0b000000000001000,
574
   0b000000000001111,
575
   0b000000000010000,
576
   0b000000010000000,
577
   0b000000100000000,
578
   0b000000110000000,
579
   0b000001000000000,
580
   0b000001000010000,
581
   0b000001010000000,
582
   0b001000000000000,
583
   0b001000000000001,
584
   0b001000010000001,
585
   0b001000010000010,
586
   0b001000010000011,
587
   0b001000010000100,
588
   0b001000010000111,
589
   0b001000010001000,
590
   0b001000010001110,
591
   0b001000010001111,
592
   0b001000110000000,
593
   0b001000111101000,
594
   0b010000000000000,
595
   0b010000110000000,
596
   0b011000000000000,
597
   0b011110010000111,
598
   0b100000000000000,
599
   0b101000000000000,
600
   0b110000000000000,
601
   0b111000000000000,
602
   0b111000000011100
603
};
604
 
605
static const uint16_t gen8_src_index_table[32] = {
606
   0b000000000000,
607
   0b000000000010,
608
   0b000000010000,
609
   0b000000010010,
610
   0b000000011000,
611
   0b000000100000,
612
   0b000000101000,
613
   0b000001001000,
614
   0b000001010000,
615
   0b000001110000,
616
   0b000001111000,
617
   0b001100000000,
618
   0b001100000010,
619
   0b001100001000,
620
   0b001100010000,
621
   0b001100010010,
622
   0b001100100000,
623
   0b001100101000,
624
   0b001100111000,
625
   0b001101000000,
626
   0b001101000010,
627
   0b001101001000,
628
   0b001101010000,
629
   0b001101100000,
630
   0b001101101000,
631
   0b001101110000,
632
   0b001101110001,
633
   0b001101111000,
634
   0b010001101000,
635
   0b010001101001,
636
   0b010001101010,
637
   0b010110001000
638
};
639
 
640
/* This is actually the control index table for Cherryview (26 bits), but the
641
 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
642
 * the start.
643
 *
644
 * The low 24 bits have the same mappings on both hardware.
645
 */
646
static const uint32_t gen8_3src_control_index_table[4] = {
647
   0b00100000000110000000000001,
648
   0b00000000000110000000000001,
649
   0b00000000001000000000000001,
650
   0b00000000001000000000100001
651
};
652
 
653
/* This is actually the control index table for Cherryview (49 bits), but the
654
 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
655
 * at the start.
656
 *
657
 * The low 44 bits have the same mappings on both hardware, and since the high
658
 * three bits on Broadwell are zero, we can reuse Cherryview's table.
659
 */
660
static const uint64_t gen8_3src_source_index_table[4] = {
661
   0b0000001110010011100100111001000001111000000000000,
662
   0b0000001110010011100100111001000001111000000000010,
663
   0b0000001110010011100100111001000001111000000001000,
664
   0b0000001110010011100100111001000001111000000100000
665
};
666
 
667
static const uint32_t *control_index_table;
668
static const uint32_t *datatype_table;
669
static const uint16_t *subreg_table;
670
static const uint16_t *src_index_table;
671
 
672
static bool
673
set_control_index(const struct brw_device_info *devinfo,
674
                  brw_compact_inst *dst, brw_inst *src)
675
{
676
   uint32_t uncompacted = devinfo->gen >= 8  /* 17b/G45; 19b/IVB+ */
677
      ? (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
678
        (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
679
        (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
680
        (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
681
        (brw_inst_bits(src,  8,  8))         /*  1b */
682
      : (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
683
        (brw_inst_bits(src, 23,  8));        /* 16b */
684
 
685
   /* On gen7, the flag register and subregister numbers are integrated into
686
    * the control index.
687
    */
688
   if (devinfo->gen == 7)
689
      uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
690
 
691
   for (int i = 0; i < 32; i++) {
692
      if (control_index_table[i] == uncompacted) {
693
         brw_compact_inst_set_control_index(dst, i);
694
	 return true;
695
      }
696
   }
697
 
698
   return false;
699
}
700
 
701
static bool
702
set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
703
                   brw_inst *src)
704
{
705
   uint32_t uncompacted = devinfo->gen >= 8  /* 18b/G45+; 21b/BDW+ */
706
      ? (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
707
        (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
708
        (brw_inst_bits(src, 46, 35))         /* 12b */
709
      : (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
710
        (brw_inst_bits(src, 46, 32));        /* 15b */
711
 
712
   for (int i = 0; i < 32; i++) {
713
      if (datatype_table[i] == uncompacted) {
714
         brw_compact_inst_set_datatype_index(dst, i);
715
	 return true;
716
      }
717
   }
718
 
719
   return false;
720
}
721
 
722
static bool
723
set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
724
                 brw_inst *src, bool is_immediate)
725
{
726
   uint16_t uncompacted =                 /* 15b */
727
      (brw_inst_bits(src, 52, 48) << 0) | /*  5b */
728
      (brw_inst_bits(src, 68, 64) << 5);  /*  5b */
729
 
730
   if (!is_immediate)
731
      uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
732
 
733
   for (int i = 0; i < 32; i++) {
734
      if (subreg_table[i] == uncompacted) {
735
         brw_compact_inst_set_subreg_index(dst, i);
736
	 return true;
737
      }
738
   }
739
 
740
   return false;
741
}
742
 
743
static bool
744
get_src_index(uint16_t uncompacted,
745
              uint16_t *compacted)
746
{
747
   for (int i = 0; i < 32; i++) {
748
      if (src_index_table[i] == uncompacted) {
749
	 *compacted = i;
750
	 return true;
751
      }
752
   }
753
 
754
   return false;
755
}
756
 
757
static bool
758
set_src0_index(const struct brw_device_info *devinfo,
759
               brw_compact_inst *dst, brw_inst *src)
760
{
761
   uint16_t compacted;
762
   uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
763
 
764
   if (!get_src_index(uncompacted, &compacted))
765
      return false;
766
 
767
   brw_compact_inst_set_src0_index(dst, compacted);
768
 
769
   return true;
770
}
771
 
772
static bool
773
set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst,
774
               brw_inst *src, bool is_immediate)
775
{
776
   uint16_t compacted;
777
 
778
   if (is_immediate) {
779
      compacted = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f;
780
   } else {
781
      uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
782
 
783
      if (!get_src_index(uncompacted, &compacted))
784
         return false;
785
   }
786
 
787
   brw_compact_inst_set_src1_index(dst, compacted);
788
 
789
   return true;
790
}
791
 
792
static bool
793
set_3src_control_index(const struct brw_device_info *devinfo,
794
                       brw_compact_inst *dst, brw_inst *src)
795
{
796
   assert(devinfo->gen >= 8);
797
 
798
   uint32_t uncompacted =                  /* 24b/BDW; 26b/CHV */
799
      (brw_inst_bits(src, 34, 32) << 21) | /*  3b */
800
      (brw_inst_bits(src, 28,  8));        /* 21b */
801
 
802
   if (devinfo->gen >= 9 || devinfo->is_cherryview)
803
      uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
804
 
805
   for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
806
      if (gen8_3src_control_index_table[i] == uncompacted) {
807
         brw_compact_inst_set_3src_control_index(dst, i);
808
	 return true;
809
      }
810
   }
811
 
812
   return false;
813
}
814
 
815
static bool
816
set_3src_source_index(const struct brw_device_info *devinfo,
817
                      brw_compact_inst *dst, brw_inst *src)
818
{
819
   assert(devinfo->gen >= 8);
820
 
821
   uint64_t uncompacted =                    /* 46b/BDW; 49b/CHV */
822
      (brw_inst_bits(src,  83,  83) << 43) | /*  1b */
823
      (brw_inst_bits(src, 114, 107) << 35) | /*  8b */
824
      (brw_inst_bits(src,  93,  86) << 27) | /*  8b */
825
      (brw_inst_bits(src,  72,  65) << 19) | /*  8b */
826
      (brw_inst_bits(src,  55,  37));        /* 19b */
827
 
828
   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
829
      uncompacted |=
830
         (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
831
         (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
832
         (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
833
   } else {
834
      uncompacted |=
835
         (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
836
         (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
837
   }
838
 
839
   for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
840
      if (gen8_3src_source_index_table[i] == uncompacted) {
841
         brw_compact_inst_set_3src_source_index(dst, i);
842
	 return true;
843
      }
844
   }
845
 
846
   return false;
847
}
848
 
849
static bool
850
has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
851
{
852
   /* Check for instruction bits that don't map to any of the fields of the
853
    * compacted instruction.  The instruction cannot be compacted if any of
854
    * them are set.  They overlap with:
855
    *  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
856
    *  - Dst.AddrImm[9] (bit 47 on Gen8)
857
    *  - Src0.AddrImm[9] (bit 95 on Gen8)
858
    *  - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
859
    *  - UIP[31] (bit 95 on Gen8)
860
    */
861
   if (devinfo->gen >= 8) {
862
      assert(!brw_inst_bits(src, 7,  7));
863
      return brw_inst_bits(src, 95, 95) ||
864
             brw_inst_bits(src, 47, 47) ||
865
             brw_inst_bits(src, 11, 11);
866
   } else {
867
      assert(!brw_inst_bits(src, 7,  7) &&
868
             !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90)));
869
      return brw_inst_bits(src, 95, 91) ||
870
             brw_inst_bits(src, 47, 47);
871
   }
872
}
873
 
874
static bool
875
has_3src_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src)
876
{
877
   /* Check for three-source instruction bits that don't map to any of the
878
    * fields of the compacted instruction.  All of them seem to be reserved
879
    * bits currently.
880
    */
881
   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
882
      assert(!brw_inst_bits(src, 127, 127) &&
883
             !brw_inst_bits(src, 7,  7));
884
   } else {
885
      assert(devinfo->gen >= 8);
886
      assert(!brw_inst_bits(src, 127, 126) &&
887
             !brw_inst_bits(src, 105, 105) &&
888
             !brw_inst_bits(src, 84, 84) &&
889
             !brw_inst_bits(src, 36, 35) &&
890
             !brw_inst_bits(src, 7,  7));
891
   }
892
 
893
   return false;
894
}
895
 
896
static bool
897
brw_try_compact_3src_instruction(const struct brw_device_info *devinfo,
898
                                 brw_compact_inst *dst, brw_inst *src)
899
{
900
   assert(devinfo->gen >= 8);
901
 
902
   if (has_3src_unmapped_bits(devinfo, src))
903
      return false;
904
 
905
#define compact(field) \
906
   brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src))
907
 
908
   compact(opcode);
909
 
910
   if (!set_3src_control_index(devinfo, dst, src))
911
      return false;
912
 
913
   if (!set_3src_source_index(devinfo, dst, src))
914
      return false;
915
 
916
   compact(dst_reg_nr);
917
   compact(src0_rep_ctrl);
918
   brw_compact_inst_set_3src_cmpt_control(dst, true);
919
   compact(debug_control);
920
   compact(saturate);
921
   compact(src1_rep_ctrl);
922
   compact(src2_rep_ctrl);
923
   compact(src0_reg_nr);
924
   compact(src1_reg_nr);
925
   compact(src2_reg_nr);
926
   compact(src0_subreg_nr);
927
   compact(src1_subreg_nr);
928
   compact(src2_subreg_nr);
929
 
930
#undef compact
931
 
932
   return true;
933
}
934
 
935
/* Compacted instructions have 12-bits for immediate sources, and a 13th bit
936
 * that's replicated through the high 20 bits.
937
 *
938
 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
939
 * of packed vectors as compactable immediates.
940
 */
941
static bool
942
is_compactable_immediate(unsigned imm)
943
{
944
   /* We get the low 12 bits as-is. */
945
   imm &= ~0xfff;
946
 
947
   /* We get one bit replicated through the top 20 bits. */
948
   return imm == 0 || imm == 0xfffff000;
949
}
950
 
951
/* Returns whether an opcode takes three sources. */
952
static bool
953
is_3src(uint32_t op)
954
{
955
   return opcode_descs[op].nsrc == 3;
956
}
957
 
958
/**
959
 * Tries to compact instruction src into dst.
960
 *
961
 * It doesn't modify dst unless src is compactable, which is relied on by
962
 * brw_compact_instructions().
963
 */
964
bool
965
brw_try_compact_instruction(const struct brw_device_info *devinfo,
966
                            brw_compact_inst *dst, brw_inst *src)
967
{
968
   brw_compact_inst temp;
969
 
970
   assert(brw_inst_cmpt_control(devinfo, src) == 0);
971
 
972
   if (is_3src(brw_inst_opcode(devinfo, src))) {
973
      if (devinfo->gen >= 8) {
974
         memset(&temp, 0, sizeof(temp));
975
         if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
976
            *dst = temp;
977
            return true;
978
         } else {
979
            return false;
980
         }
981
      } else {
982
         return false;
983
      }
984
   }
985
 
986
   bool is_immediate =
987
      brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE ||
988
      brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE;
989
   if (is_immediate &&
990
       (devinfo->gen < 6 ||
991
        !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) {
992
      return false;
993
   }
994
 
995
   if (has_unmapped_bits(devinfo, src))
996
      return false;
997
 
998
   memset(&temp, 0, sizeof(temp));
999
 
1000
   brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src));
1001
   brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src));
1002
   if (!set_control_index(devinfo, &temp, src))
1003
      return false;
1004
   if (!set_datatype_index(devinfo, &temp, src))
1005
      return false;
1006
   if (!set_subreg_index(devinfo, &temp, src, is_immediate))
1007
      return false;
1008
   brw_compact_inst_set_acc_wr_control(&temp,
1009
                                       brw_inst_acc_wr_control(devinfo, src));
1010
   brw_compact_inst_set_cond_modifier(&temp,
1011
                                      brw_inst_cond_modifier(devinfo, src));
1012
   if (devinfo->gen <= 6)
1013
      brw_compact_inst_set_flag_subreg_nr(&temp,
1014
                                          brw_inst_flag_subreg_nr(devinfo, src));
1015
   brw_compact_inst_set_cmpt_control(&temp, true);
1016
   if (!set_src0_index(devinfo, &temp, src))
1017
      return false;
1018
   if (!set_src1_index(devinfo, &temp, src, is_immediate))
1019
      return false;
1020
   brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src));
1021
   brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src));
1022
   if (is_immediate) {
1023
      brw_compact_inst_set_src1_reg_nr(&temp,
1024
                                       brw_inst_imm_ud(devinfo, src) & 0xff);
1025
   } else {
1026
      brw_compact_inst_set_src1_reg_nr(&temp,
1027
                                       brw_inst_src1_da_reg_nr(devinfo, src));
1028
   }
1029
 
1030
   *dst = temp;
1031
 
1032
   return true;
1033
}
1034
 
1035
static void
1036
set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst,
1037
                        brw_compact_inst *src)
1038
{
1039
   uint32_t uncompacted =
1040
      control_index_table[brw_compact_inst_control_index(src)];
1041
 
1042
   if (devinfo->gen >= 8) {
1043
      brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1044
      brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1045
      brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1046
      brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1047
      brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1048
   } else {
1049
      brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1050
      brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1051
 
1052
      if (devinfo->gen == 7)
1053
         brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1054
   }
1055
}
1056
 
1057
static void
1058
set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst,
1059
                         brw_compact_inst *src)
1060
{
1061
   uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)];
1062
 
1063
   if (devinfo->gen >= 8) {
1064
      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1065
      brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1066
      brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1067
   } else {
1068
      brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1069
      brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1070
   }
1071
}
1072
 
1073
static void
1074
set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst,
1075
                       brw_compact_inst *src)
1076
{
1077
   uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)];
1078
 
1079
   brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1080
   brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1081
   brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1082
}
1083
 
1084
static void
1085
set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst,
1086
                     brw_compact_inst *src)
1087
{
1088
   uint32_t compacted = brw_compact_inst_src0_index(src);
1089
   uint16_t uncompacted = src_index_table[compacted];
1090
 
1091
   brw_inst_set_bits(dst, 88, 77, uncompacted);
1092
}
1093
 
1094
static void
1095
set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst,
1096
                     brw_compact_inst *src, bool is_immediate)
1097
{
1098
   if (is_immediate) {
1099
      signed high5 = brw_compact_inst_src1_index(src);
1100
      /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1101
      brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
1102
   } else {
1103
      uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)];
1104
 
1105
      brw_inst_set_bits(dst, 120, 109, uncompacted);
1106
   }
1107
}
1108
 
1109
static void
1110
set_uncompacted_3src_control_index(const struct brw_device_info *devinfo,
1111
                                   brw_inst *dst, brw_compact_inst *src)
1112
{
1113
   assert(devinfo->gen >= 8);
1114
 
1115
   uint32_t compacted = brw_compact_inst_3src_control_index(src);
1116
   uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1117
 
1118
   brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1119
   brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
1120
 
1121
   if (devinfo->gen >= 9 || devinfo->is_cherryview)
1122
      brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1123
}
1124
 
1125
static void
1126
set_uncompacted_3src_source_index(const struct brw_device_info *devinfo,
1127
                                  brw_inst *dst, brw_compact_inst *src)
1128
{
1129
   assert(devinfo->gen >= 8);
1130
 
1131
   uint32_t compacted = brw_compact_inst_3src_source_index(src);
1132
   uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1133
 
1134
   brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
1135
   brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1136
   brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
1137
   brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
1138
   brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
1139
 
1140
   if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1141
      brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1142
      brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1143
      brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
1144
   } else {
1145
      brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1146
      brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1147
   }
1148
}
1149
 
1150
static void
1151
brw_uncompact_3src_instruction(const struct brw_device_info *devinfo,
1152
                               brw_inst *dst, brw_compact_inst *src)
1153
{
1154
   assert(devinfo->gen >= 8);
1155
 
1156
#define uncompact(field) \
1157
   brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src))
1158
 
1159
   uncompact(opcode);
1160
 
1161
   set_uncompacted_3src_control_index(devinfo, dst, src);
1162
   set_uncompacted_3src_source_index(devinfo, dst, src);
1163
 
1164
   uncompact(dst_reg_nr);
1165
   uncompact(src0_rep_ctrl);
1166
   brw_inst_set_3src_cmpt_control(devinfo, dst, false);
1167
   uncompact(debug_control);
1168
   uncompact(saturate);
1169
   uncompact(src1_rep_ctrl);
1170
   uncompact(src2_rep_ctrl);
1171
   uncompact(src0_reg_nr);
1172
   uncompact(src1_reg_nr);
1173
   uncompact(src2_reg_nr);
1174
   uncompact(src0_subreg_nr);
1175
   uncompact(src1_subreg_nr);
1176
   uncompact(src2_subreg_nr);
1177
 
1178
#undef uncompact
1179
}
1180
 
1181
void
1182
brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst,
1183
                          brw_compact_inst *src)
1184
{
1185
   memset(dst, 0, sizeof(*dst));
1186
 
1187
   if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) {
1188
      brw_uncompact_3src_instruction(devinfo, dst, src);
1189
      return;
1190
   }
1191
 
1192
   brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src));
1193
   brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src));
1194
 
1195
   set_uncompacted_control(devinfo, dst, src);
1196
   set_uncompacted_datatype(devinfo, dst, src);
1197
 
1198
   /* src0/1 register file fields are in the datatype table. */
1199
   bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE ||
1200
                       brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
1201
 
1202
   set_uncompacted_subreg(devinfo, dst, src);
1203
   brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src));
1204
   brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src));
1205
   if (devinfo->gen <= 6)
1206
      brw_inst_set_flag_subreg_nr(devinfo, dst,
1207
                                  brw_compact_inst_flag_subreg_nr(src));
1208
   set_uncompacted_src0(devinfo, dst, src);
1209
   set_uncompacted_src1(devinfo, dst, src, is_immediate);
1210
   brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src));
1211
   brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src));
1212
   if (is_immediate) {
1213
      brw_inst_set_imm_ud(devinfo, dst,
1214
                          brw_inst_imm_ud(devinfo, dst) |
1215
                          brw_compact_inst_src1_reg_nr(src));
1216
   } else {
1217
      brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src));
1218
   }
1219
}
1220
 
1221
void brw_debug_compact_uncompact(const struct brw_device_info *devinfo,
1222
                                 brw_inst *orig,
1223
                                 brw_inst *uncompacted)
1224
{
1225
   fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1226
           devinfo->gen);
1227
 
1228
   fprintf(stderr, "  before: ");
1229
   brw_disassemble_inst(stderr, devinfo, orig, true);
1230
 
1231
   fprintf(stderr, "  after:  ");
1232
   brw_disassemble_inst(stderr, devinfo, uncompacted, false);
1233
 
1234
   uint32_t *before_bits = (uint32_t *)orig;
1235
   uint32_t *after_bits = (uint32_t *)uncompacted;
1236
   fprintf(stderr, "  changed bits:\n");
1237
   for (int i = 0; i < 128; i++) {
1238
      uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1239
      uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1240
 
1241
      if (before != after) {
1242
         fprintf(stderr, "  bit %d, %s to %s\n", i,
1243
                 before ? "set" : "unset",
1244
                 after ? "set" : "unset");
1245
      }
1246
   }
1247
}
1248
 
1249
static int
1250
compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1251
{
1252
   int this_compacted_count = compacted_counts[old_ip];
1253
   int target_compacted_count = compacted_counts[old_target_ip];
1254
   return target_compacted_count - this_compacted_count;
1255
}
1256
 
1257
static void
1258
update_uip_jip(const struct brw_device_info *devinfo, brw_inst *insn,
1259
               int this_old_ip, int *compacted_counts)
1260
{
1261
   /* JIP and UIP are in units of:
1262
    *    - bytes on Gen8+; and
1263
    *    - compacted instructions on Gen6+.
1264
    */
1265
   int shift = devinfo->gen >= 8 ? 3 : 0;
1266
 
1267
   int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
1268
   jip_compacted -= compacted_between(this_old_ip,
1269
                                      this_old_ip + (jip_compacted / 2),
1270
                                      compacted_counts);
1271
   brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
1272
 
1273
   if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
1274
       brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
1275
       (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7))
1276
      return;
1277
 
1278
   int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
1279
   uip_compacted -= compacted_between(this_old_ip,
1280
                                      this_old_ip + (uip_compacted / 2),
1281
                                      compacted_counts);
1282
   brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
1283
}
1284
 
1285
static void
1286
update_gen4_jump_count(const struct brw_device_info *devinfo, brw_inst *insn,
1287
                       int this_old_ip, int *compacted_counts)
1288
{
1289
   assert(devinfo->gen == 5 || devinfo->is_g4x);
1290
 
1291
   /* Jump Count is in units of:
1292
    *    - uncompacted instructions on G45; and
1293
    *    - compacted instructions on Gen5.
1294
    */
1295
   int shift = devinfo->is_g4x ? 1 : 0;
1296
 
1297
   int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
1298
 
1299
   int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1300
 
1301
   int this_compacted_count = compacted_counts[this_old_ip];
1302
   int target_compacted_count = compacted_counts[target_old_ip];
1303
 
1304
   jump_count_compacted -= (target_compacted_count - this_compacted_count);
1305
   brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
1306
}
1307
 
1308
void
1309
brw_init_compaction_tables(const struct brw_device_info *devinfo)
1310
{
1311
   static bool initialized;
1312
   if (initialized || p_atomic_cmpxchg(&initialized, false, true) != false)
1313
      return;
1314
 
1315
   assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1316
   assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1317
   assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1318
   assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1319
   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
1320
   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
1321
   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
1322
   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
1323
   assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
1324
   assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
1325
   assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
1326
   assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
1327
   assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
1328
   assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
1329
   assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
1330
   assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
1331
 
1332
   switch (devinfo->gen) {
1333
   case 9:
1334
   case 8:
1335
      control_index_table = gen8_control_index_table;
1336
      datatype_table = gen8_datatype_table;
1337
      subreg_table = gen8_subreg_table;
1338
      src_index_table = gen8_src_index_table;
1339
      break;
1340
   case 7:
1341
      control_index_table = gen7_control_index_table;
1342
      datatype_table = gen7_datatype_table;
1343
      subreg_table = gen7_subreg_table;
1344
      src_index_table = gen7_src_index_table;
1345
      break;
1346
   case 6:
1347
      control_index_table = gen6_control_index_table;
1348
      datatype_table = gen6_datatype_table;
1349
      subreg_table = gen6_subreg_table;
1350
      src_index_table = gen6_src_index_table;
1351
      break;
1352
   case 5:
1353
   case 4:
1354
      control_index_table = g45_control_index_table;
1355
      datatype_table = g45_datatype_table;
1356
      subreg_table = g45_subreg_table;
1357
      src_index_table = g45_src_index_table;
1358
      break;
1359
   default:
1360
      unreachable("unknown generation");
1361
   }
1362
}
1363
 
1364
void
1365
brw_compact_instructions(struct brw_codegen *p, int start_offset,
1366
                         int num_annotations, struct annotation *annotation)
1367
{
1368
   const struct brw_device_info *devinfo = p->devinfo;
1369
   void *store = p->store + start_offset / 16;
1370
   /* For an instruction at byte offset 16*i before compaction, this is the
1371
    * number of compacted instructions minus the number of padding NOP/NENOPs
1372
    * that preceded it.
1373
    */
1374
   int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
1375
   /* For an instruction at byte offset 8*i after compaction, this was its IP
1376
    * (in 16-byte units) before compaction.
1377
    */
1378
   int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)];
1379
 
1380
   if (devinfo->gen == 4 && !devinfo->is_g4x)
1381
      return;
1382
 
1383
   int offset = 0;
1384
   int compacted_count = 0;
1385
   for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1386
        src_offset += sizeof(brw_inst)) {
1387
      brw_inst *src = store + src_offset;
1388
      void *dst = store + offset;
1389
 
1390
      old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1391
      compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1392
 
1393
      brw_inst saved = *src;
1394
 
1395
      if (brw_try_compact_instruction(devinfo, dst, src)) {
1396
         compacted_count++;
1397
 
1398
         if (INTEL_DEBUG) {
1399
            brw_inst uncompacted;
1400
            brw_uncompact_instruction(devinfo, &uncompacted, dst);
1401
            if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1402
               brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
1403
            }
1404
         }
1405
 
1406
         offset += sizeof(brw_compact_inst);
1407
      } else {
1408
         /* All uncompacted instructions need to be aligned on G45. */
1409
         if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
1410
            brw_compact_inst *align = store + offset;
1411
            memset(align, 0, sizeof(*align));
1412
            brw_compact_inst_set_opcode(align, BRW_OPCODE_NENOP);
1413
            brw_compact_inst_set_cmpt_control(align, true);
1414
            offset += sizeof(brw_compact_inst);
1415
            compacted_count--;
1416
            compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1417
            old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1418
 
1419
            dst = store + offset;
1420
         }
1421
 
1422
         /* If we didn't compact this intruction, we need to move it down into
1423
          * place.
1424
          */
1425
         if (offset != src_offset) {
1426
            memmove(dst, src, sizeof(brw_inst));
1427
         }
1428
         offset += sizeof(brw_inst);
1429
      }
1430
   }
1431
 
1432
   /* Fix up control flow offsets. */
1433
   p->next_insn_offset = start_offset + offset;
1434
   for (offset = 0; offset < p->next_insn_offset - start_offset;
1435
        offset = next_offset(devinfo, store, offset)) {
1436
      brw_inst *insn = store + offset;
1437
      int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
1438
      int this_compacted_count = compacted_counts[this_old_ip];
1439
 
1440
      switch (brw_inst_opcode(devinfo, insn)) {
1441
      case BRW_OPCODE_BREAK:
1442
      case BRW_OPCODE_CONTINUE:
1443
      case BRW_OPCODE_HALT:
1444
         if (devinfo->gen >= 6) {
1445
            update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1446
         } else {
1447
            update_gen4_jump_count(devinfo, insn, this_old_ip,
1448
                                   compacted_counts);
1449
         }
1450
         break;
1451
 
1452
      case BRW_OPCODE_IF:
1453
      case BRW_OPCODE_IFF:
1454
      case BRW_OPCODE_ELSE:
1455
      case BRW_OPCODE_ENDIF:
1456
      case BRW_OPCODE_WHILE:
1457
         if (devinfo->gen >= 7) {
1458
            if (brw_inst_cmpt_control(devinfo, insn)) {
1459
               brw_inst uncompacted;
1460
               brw_uncompact_instruction(devinfo, &uncompacted,
1461
                                         (brw_compact_inst *)insn);
1462
 
1463
               update_uip_jip(devinfo, &uncompacted, this_old_ip,
1464
                              compacted_counts);
1465
 
1466
               bool ret = brw_try_compact_instruction(devinfo,
1467
                                                      (brw_compact_inst *)insn,
1468
                                                      &uncompacted);
1469
               assert(ret); (void)ret;
1470
            } else {
1471
               update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1472
            }
1473
         } else if (devinfo->gen == 6) {
1474
            assert(!brw_inst_cmpt_control(devinfo, insn));
1475
 
1476
            /* Jump Count is in units of compacted instructions on Gen6. */
1477
            int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
1478
 
1479
            int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1480
            int target_compacted_count = compacted_counts[target_old_ip];
1481
            jump_count_compacted -= (target_compacted_count - this_compacted_count);
1482
            brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
1483
         } else {
1484
            update_gen4_jump_count(devinfo, insn, this_old_ip,
1485
                                   compacted_counts);
1486
         }
1487
         break;
1488
 
1489
      case BRW_OPCODE_ADD:
1490
         /* Add instructions modifying the IP register use an immediate src1,
1491
          * and Gens that use this cannot compact instructions with immediate
1492
          * operands.
1493
          */
1494
         if (brw_inst_cmpt_control(devinfo, insn))
1495
            break;
1496
 
1497
         if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
1498
             brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
1499
            assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
1500
 
1501
            int shift = 3;
1502
            int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
1503
 
1504
            int target_old_ip = this_old_ip + (jump_compacted / 2);
1505
            int target_compacted_count = compacted_counts[target_old_ip];
1506
            jump_compacted -= (target_compacted_count - this_compacted_count);
1507
            brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
1508
         }
1509
         break;
1510
      }
1511
   }
1512
 
1513
   /* p->nr_insn is counting the number of uncompacted instructions still, so
1514
    * divide.  We do want to be sure there's a valid instruction in any
1515
    * alignment padding, so that the next compression pass (for the FS 8/16
1516
    * compile passes) parses correctly.
1517
    */
1518
   if (p->next_insn_offset & sizeof(brw_compact_inst)) {
1519
      brw_compact_inst *align = store + offset;
1520
      memset(align, 0, sizeof(*align));
1521
      brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP);
1522
      brw_compact_inst_set_cmpt_control(align, true);
1523
      p->next_insn_offset += sizeof(brw_compact_inst);
1524
   }
1525
   p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
1526
 
1527
   /* Update the instruction offsets for each annotation. */
1528
   if (annotation) {
1529
      for (int offset = 0, i = 0; i < num_annotations; i++) {
1530
         while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1531
                sizeof(brw_inst) != annotation[i].offset) {
1532
            assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1533
                   sizeof(brw_inst) < annotation[i].offset);
1534
            offset = next_offset(devinfo, store, offset);
1535
         }
1536
 
1537
         annotation[i].offset = start_offset + offset;
1538
 
1539
         offset = next_offset(devinfo, store, offset);
1540
      }
1541
 
1542
      annotation[num_annotations].offset = p->next_insn_offset;
1543
   }
1544
}