Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright © 2012 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | /** @file brw_eu_compact.c |
||
25 | * |
||
26 | * Instruction compaction is a feature of G45 and newer hardware that allows |
||
27 | * for a smaller instruction encoding. |
||
28 | * |
||
29 | * The instruction cache is on the order of 32KB, and many programs generate |
||
30 | * far more instructions than that. The instruction cache is built to barely |
||
31 | * keep up with instruction dispatch ability in cache hit cases -- L1 |
||
32 | * instruction cache misses that still hit in the next level could limit |
||
33 | * throughput by around 50%. |
||
34 | * |
||
35 | * The idea of instruction compaction is that most instructions use a tiny |
||
36 | * subset of the GPU functionality, so we can encode what would be a 16 byte |
||
37 | * instruction in 8 bytes using some lookup tables for various fields. |
||
38 | * |
||
39 | * |
||
40 | * Instruction compaction capabilities vary subtly by generation. |
||
41 | * |
||
42 | * G45's support for instruction compaction is very limited. Jump counts on |
||
43 | * this generation are in units of 16-byte uncompacted instructions. As such, |
||
44 | * all jump targets must be 16-byte aligned. Also, all instructions must be |
||
45 | * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned. |
||
46 | * A G45-only instruction, NENOP, must be used to provide padding to align |
||
47 | * uncompacted instructions. |
||
48 | * |
||
49 | * Gen5 removes these restrictions and changes jump counts to be in units of |
||
50 | * 8-byte compacted instructions, allowing jump targets to be only 8-byte |
||
51 | * aligned. Uncompacted instructions can also be placed on 8-byte boundaries. |
||
52 | * |
||
53 | * Gen6 adds the ability to compact instructions with a limited range of |
||
54 | * immediate values. Compactable immediates have 12 unrestricted bits, and a |
||
55 | * 13th bit that's replicated through the high 20 bits, to create the 32-bit |
||
56 | * value of DW3 in the uncompacted instruction word. |
||
57 | * |
||
58 | * On Gen7 we can compact some control flow instructions with a small positive |
||
59 | * immediate in the low bits of DW3, like ENDIF with the JIP field. Other |
||
60 | * control flow instructions with UIP cannot be compacted, because of the |
||
61 | * replicated 13th bit. No control flow instructions can be compacted on Gen6 |
||
62 | * since the jump count field is not in DW3. |
||
63 | * |
||
64 | * break JIP/UIP |
||
65 | * cont JIP/UIP |
||
66 | * halt JIP/UIP |
||
67 | * if JIP/UIP |
||
68 | * else JIP (plus UIP on BDW+) |
||
69 | * endif JIP |
||
70 | * while JIP (must be negative) |
||
71 | * |
||
72 | * Gen 8 adds support for compacting 3-src instructions. |
||
73 | */ |
||
74 | |||
75 | #include "brw_context.h" |
||
76 | #include "brw_eu.h" |
||
77 | #include "intel_asm_annotation.h" |
||
78 | #include "util/u_atomic.h" /* for p_atomic_cmpxchg */ |
||
79 | |||
80 | static const uint32_t g45_control_index_table[32] = { |
||
81 | 0b00000000000000000, |
||
82 | 0b01000000000000000, |
||
83 | 0b00110000000000000, |
||
84 | 0b00000000000000010, |
||
85 | 0b00100000000000000, |
||
86 | 0b00010000000000000, |
||
87 | 0b01000000000100000, |
||
88 | 0b01000000100000000, |
||
89 | 0b01010000000100000, |
||
90 | 0b00000000100000010, |
||
91 | 0b11000000000000000, |
||
92 | 0b00001000100000010, |
||
93 | 0b01001000100000000, |
||
94 | 0b00000000100000000, |
||
95 | 0b11000000000100000, |
||
96 | 0b00001000100000000, |
||
97 | 0b10110000000000000, |
||
98 | 0b11010000000100000, |
||
99 | 0b00110000100000000, |
||
100 | 0b00100000100000000, |
||
101 | 0b01000000000001000, |
||
102 | 0b01000000000000100, |
||
103 | 0b00111100000000000, |
||
104 | 0b00101011000000000, |
||
105 | 0b00110000000010000, |
||
106 | 0b00010000100000000, |
||
107 | 0b01000000000100100, |
||
108 | 0b01000000000101000, |
||
109 | 0b00110000000000110, |
||
110 | 0b00000000000001010, |
||
111 | 0b01010000000101000, |
||
112 | 0b01010000000100100 |
||
113 | }; |
||
114 | |||
115 | static const uint32_t g45_datatype_table[32] = { |
||
116 | 0b001000000000100001, |
||
117 | 0b001011010110101101, |
||
118 | 0b001000001000110001, |
||
119 | 0b001111011110111101, |
||
120 | 0b001011010110101100, |
||
121 | 0b001000000110101101, |
||
122 | 0b001000000000100000, |
||
123 | 0b010100010110110001, |
||
124 | 0b001100011000101101, |
||
125 | 0b001000000000100010, |
||
126 | 0b001000001000110110, |
||
127 | 0b010000001000110001, |
||
128 | 0b001000001000110010, |
||
129 | 0b011000001000110010, |
||
130 | 0b001111011110111100, |
||
131 | 0b001000000100101000, |
||
132 | 0b010100011000110001, |
||
133 | 0b001010010100101001, |
||
134 | 0b001000001000101001, |
||
135 | 0b010000001000110110, |
||
136 | 0b101000001000110001, |
||
137 | 0b001011011000101101, |
||
138 | 0b001000000100001001, |
||
139 | 0b001011011000101100, |
||
140 | 0b110100011000110001, |
||
141 | 0b001000001110111101, |
||
142 | 0b110000001000110001, |
||
143 | 0b011000000100101010, |
||
144 | 0b101000001000101001, |
||
145 | 0b001011010110001100, |
||
146 | 0b001000000110100001, |
||
147 | 0b001010010100001000 |
||
148 | }; |
||
149 | |||
150 | static const uint16_t g45_subreg_table[32] = { |
||
151 | 0b000000000000000, |
||
152 | 0b000000010000000, |
||
153 | 0b000001000000000, |
||
154 | 0b000100000000000, |
||
155 | 0b000000000100000, |
||
156 | 0b100000000000000, |
||
157 | 0b000000000010000, |
||
158 | 0b001100000000000, |
||
159 | 0b001010000000000, |
||
160 | 0b000000100000000, |
||
161 | 0b001000000000000, |
||
162 | 0b000000000001000, |
||
163 | 0b000000001000000, |
||
164 | 0b000000000000001, |
||
165 | 0b000010000000000, |
||
166 | 0b000000010100000, |
||
167 | 0b000000000000111, |
||
168 | 0b000001000100000, |
||
169 | 0b011000000000000, |
||
170 | 0b000000110000000, |
||
171 | 0b000000000000010, |
||
172 | 0b000000000000100, |
||
173 | 0b000000001100000, |
||
174 | 0b000100000000010, |
||
175 | 0b001110011000110, |
||
176 | 0b001110100001000, |
||
177 | 0b000110011000110, |
||
178 | 0b000001000011000, |
||
179 | 0b000110010000100, |
||
180 | 0b001100000000110, |
||
181 | 0b000000010000110, |
||
182 | 0b000001000110000 |
||
183 | }; |
||
184 | |||
185 | static const uint16_t g45_src_index_table[32] = { |
||
186 | 0b000000000000, |
||
187 | 0b010001101000, |
||
188 | 0b010110001000, |
||
189 | 0b011010010000, |
||
190 | 0b001101001000, |
||
191 | 0b010110001010, |
||
192 | 0b010101110000, |
||
193 | 0b011001111000, |
||
194 | 0b001000101000, |
||
195 | 0b000000101000, |
||
196 | 0b010001010000, |
||
197 | 0b111101101100, |
||
198 | 0b010110001100, |
||
199 | 0b010001101100, |
||
200 | 0b011010010100, |
||
201 | 0b010001001100, |
||
202 | 0b001100101000, |
||
203 | 0b000000000010, |
||
204 | 0b111101001100, |
||
205 | 0b011001101000, |
||
206 | 0b010101001000, |
||
207 | 0b000000000100, |
||
208 | 0b000000101100, |
||
209 | 0b010001101010, |
||
210 | 0b000000111000, |
||
211 | 0b010101011000, |
||
212 | 0b000100100000, |
||
213 | 0b010110000000, |
||
214 | 0b010000000100, |
||
215 | 0b010000111000, |
||
216 | 0b000101100000, |
||
217 | 0b111101110100 |
||
218 | }; |
||
219 | |||
220 | static const uint32_t gen6_control_index_table[32] = { |
||
221 | 0b00000000000000000, |
||
222 | 0b01000000000000000, |
||
223 | 0b00110000000000000, |
||
224 | 0b00000000100000000, |
||
225 | 0b00010000000000000, |
||
226 | 0b00001000100000000, |
||
227 | 0b00000000100000010, |
||
228 | 0b00000000000000010, |
||
229 | 0b01000000100000000, |
||
230 | 0b01010000000000000, |
||
231 | 0b10110000000000000, |
||
232 | 0b00100000000000000, |
||
233 | 0b11010000000000000, |
||
234 | 0b11000000000000000, |
||
235 | 0b01001000100000000, |
||
236 | 0b01000000000001000, |
||
237 | 0b01000000000000100, |
||
238 | 0b00000000000001000, |
||
239 | 0b00000000000000100, |
||
240 | 0b00111000100000000, |
||
241 | 0b00001000100000010, |
||
242 | 0b00110000100000000, |
||
243 | 0b00110000000000001, |
||
244 | 0b00100000000000001, |
||
245 | 0b00110000000000010, |
||
246 | 0b00110000000000101, |
||
247 | 0b00110000000001001, |
||
248 | 0b00110000000010000, |
||
249 | 0b00110000000000011, |
||
250 | 0b00110000000000100, |
||
251 | 0b00110000100001000, |
||
252 | 0b00100000000001001 |
||
253 | }; |
||
254 | |||
255 | static const uint32_t gen6_datatype_table[32] = { |
||
256 | 0b001001110000000000, |
||
257 | 0b001000110000100000, |
||
258 | 0b001001110000000001, |
||
259 | 0b001000000001100000, |
||
260 | 0b001010110100101001, |
||
261 | 0b001000000110101101, |
||
262 | 0b001100011000101100, |
||
263 | 0b001011110110101101, |
||
264 | 0b001000000111101100, |
||
265 | 0b001000000001100001, |
||
266 | 0b001000110010100101, |
||
267 | 0b001000000001000001, |
||
268 | 0b001000001000110001, |
||
269 | 0b001000001000101001, |
||
270 | 0b001000000000100000, |
||
271 | 0b001000001000110010, |
||
272 | 0b001010010100101001, |
||
273 | 0b001011010010100101, |
||
274 | 0b001000000110100101, |
||
275 | 0b001100011000101001, |
||
276 | 0b001011011000101100, |
||
277 | 0b001011010110100101, |
||
278 | 0b001011110110100101, |
||
279 | 0b001111011110111101, |
||
280 | 0b001111011110111100, |
||
281 | 0b001111011110111101, |
||
282 | 0b001111011110011101, |
||
283 | 0b001111011110111110, |
||
284 | 0b001000000000100001, |
||
285 | 0b001000000000100010, |
||
286 | 0b001001111111011101, |
||
287 | 0b001000001110111110, |
||
288 | }; |
||
289 | |||
290 | static const uint16_t gen6_subreg_table[32] = { |
||
291 | 0b000000000000000, |
||
292 | 0b000000000000100, |
||
293 | 0b000000110000000, |
||
294 | 0b111000000000000, |
||
295 | 0b011110000001000, |
||
296 | 0b000010000000000, |
||
297 | 0b000000000010000, |
||
298 | 0b000110000001100, |
||
299 | 0b001000000000000, |
||
300 | 0b000001000000000, |
||
301 | 0b000001010010100, |
||
302 | 0b000000001010110, |
||
303 | 0b010000000000000, |
||
304 | 0b110000000000000, |
||
305 | 0b000100000000000, |
||
306 | 0b000000010000000, |
||
307 | 0b000000000001000, |
||
308 | 0b100000000000000, |
||
309 | 0b000001010000000, |
||
310 | 0b001010000000000, |
||
311 | 0b001100000000000, |
||
312 | 0b000000001010100, |
||
313 | 0b101101010010100, |
||
314 | 0b010100000000000, |
||
315 | 0b000000010001111, |
||
316 | 0b011000000000000, |
||
317 | 0b111110000000000, |
||
318 | 0b101000000000000, |
||
319 | 0b000000000001111, |
||
320 | 0b000100010001111, |
||
321 | 0b001000010001111, |
||
322 | 0b000110000000000, |
||
323 | }; |
||
324 | |||
325 | static const uint16_t gen6_src_index_table[32] = { |
||
326 | 0b000000000000, |
||
327 | 0b010110001000, |
||
328 | 0b010001101000, |
||
329 | 0b001000101000, |
||
330 | 0b011010010000, |
||
331 | 0b000100100000, |
||
332 | 0b010001101100, |
||
333 | 0b010101110000, |
||
334 | 0b011001111000, |
||
335 | 0b001100101000, |
||
336 | 0b010110001100, |
||
337 | 0b001000100000, |
||
338 | 0b010110001010, |
||
339 | 0b000000000010, |
||
340 | 0b010101010000, |
||
341 | 0b010101101000, |
||
342 | 0b111101001100, |
||
343 | 0b111100101100, |
||
344 | 0b011001110000, |
||
345 | 0b010110001001, |
||
346 | 0b010101011000, |
||
347 | 0b001101001000, |
||
348 | 0b010000101100, |
||
349 | 0b010000000000, |
||
350 | 0b001101110000, |
||
351 | 0b001100010000, |
||
352 | 0b001100000000, |
||
353 | 0b010001101010, |
||
354 | 0b001101111000, |
||
355 | 0b000001110000, |
||
356 | 0b001100100000, |
||
357 | 0b001101010000, |
||
358 | }; |
||
359 | |||
360 | static const uint32_t gen7_control_index_table[32] = { |
||
361 | 0b0000000000000000010, |
||
362 | 0b0000100000000000000, |
||
363 | 0b0000100000000000001, |
||
364 | 0b0000100000000000010, |
||
365 | 0b0000100000000000011, |
||
366 | 0b0000100000000000100, |
||
367 | 0b0000100000000000101, |
||
368 | 0b0000100000000000111, |
||
369 | 0b0000100000000001000, |
||
370 | 0b0000100000000001001, |
||
371 | 0b0000100000000001101, |
||
372 | 0b0000110000000000000, |
||
373 | 0b0000110000000000001, |
||
374 | 0b0000110000000000010, |
||
375 | 0b0000110000000000011, |
||
376 | 0b0000110000000000100, |
||
377 | 0b0000110000000000101, |
||
378 | 0b0000110000000000111, |
||
379 | 0b0000110000000001001, |
||
380 | 0b0000110000000001101, |
||
381 | 0b0000110000000010000, |
||
382 | 0b0000110000100000000, |
||
383 | 0b0001000000000000000, |
||
384 | 0b0001000000000000010, |
||
385 | 0b0001000000000000100, |
||
386 | 0b0001000000100000000, |
||
387 | 0b0010110000000000000, |
||
388 | 0b0010110000000010000, |
||
389 | 0b0011000000000000000, |
||
390 | 0b0011000000100000000, |
||
391 | 0b0101000000000000000, |
||
392 | 0b0101000000100000000 |
||
393 | }; |
||
394 | |||
395 | static const uint32_t gen7_datatype_table[32] = { |
||
396 | 0b001000000000000001, |
||
397 | 0b001000000000100000, |
||
398 | 0b001000000000100001, |
||
399 | 0b001000000001100001, |
||
400 | 0b001000000010111101, |
||
401 | 0b001000001011111101, |
||
402 | 0b001000001110100001, |
||
403 | 0b001000001110100101, |
||
404 | 0b001000001110111101, |
||
405 | 0b001000010000100001, |
||
406 | 0b001000110000100000, |
||
407 | 0b001000110000100001, |
||
408 | 0b001001010010100101, |
||
409 | 0b001001110010100100, |
||
410 | 0b001001110010100101, |
||
411 | 0b001111001110111101, |
||
412 | 0b001111011110011101, |
||
413 | 0b001111011110111100, |
||
414 | 0b001111011110111101, |
||
415 | 0b001111111110111100, |
||
416 | 0b000000001000001100, |
||
417 | 0b001000000000111101, |
||
418 | 0b001000000010100101, |
||
419 | 0b001000010000100000, |
||
420 | 0b001001010010100100, |
||
421 | 0b001001110010000100, |
||
422 | 0b001010010100001001, |
||
423 | 0b001101111110111101, |
||
424 | 0b001111111110111101, |
||
425 | 0b001011110110101100, |
||
426 | 0b001010010100101000, |
||
427 | 0b001010110100101000 |
||
428 | }; |
||
429 | |||
430 | static const uint16_t gen7_subreg_table[32] = { |
||
431 | 0b000000000000000, |
||
432 | 0b000000000000001, |
||
433 | 0b000000000001000, |
||
434 | 0b000000000001111, |
||
435 | 0b000000000010000, |
||
436 | 0b000000010000000, |
||
437 | 0b000000100000000, |
||
438 | 0b000000110000000, |
||
439 | 0b000001000000000, |
||
440 | 0b000001000010000, |
||
441 | 0b000010100000000, |
||
442 | 0b001000000000000, |
||
443 | 0b001000000000001, |
||
444 | 0b001000010000001, |
||
445 | 0b001000010000010, |
||
446 | 0b001000010000011, |
||
447 | 0b001000010000100, |
||
448 | 0b001000010000111, |
||
449 | 0b001000010001000, |
||
450 | 0b001000010001110, |
||
451 | 0b001000010001111, |
||
452 | 0b001000110000000, |
||
453 | 0b001000111101000, |
||
454 | 0b010000000000000, |
||
455 | 0b010000110000000, |
||
456 | 0b011000000000000, |
||
457 | 0b011110010000111, |
||
458 | 0b100000000000000, |
||
459 | 0b101000000000000, |
||
460 | 0b110000000000000, |
||
461 | 0b111000000000000, |
||
462 | 0b111000000011100 |
||
463 | }; |
||
464 | |||
465 | static const uint16_t gen7_src_index_table[32] = { |
||
466 | 0b000000000000, |
||
467 | 0b000000000010, |
||
468 | 0b000000010000, |
||
469 | 0b000000010010, |
||
470 | 0b000000011000, |
||
471 | 0b000000100000, |
||
472 | 0b000000101000, |
||
473 | 0b000001001000, |
||
474 | 0b000001010000, |
||
475 | 0b000001110000, |
||
476 | 0b000001111000, |
||
477 | 0b001100000000, |
||
478 | 0b001100000010, |
||
479 | 0b001100001000, |
||
480 | 0b001100010000, |
||
481 | 0b001100010010, |
||
482 | 0b001100100000, |
||
483 | 0b001100101000, |
||
484 | 0b001100111000, |
||
485 | 0b001101000000, |
||
486 | 0b001101000010, |
||
487 | 0b001101001000, |
||
488 | 0b001101010000, |
||
489 | 0b001101100000, |
||
490 | 0b001101101000, |
||
491 | 0b001101110000, |
||
492 | 0b001101110001, |
||
493 | 0b001101111000, |
||
494 | 0b010001101000, |
||
495 | 0b010001101001, |
||
496 | 0b010001101010, |
||
497 | 0b010110001000 |
||
498 | }; |
||
499 | |||
500 | static const uint32_t gen8_control_index_table[32] = { |
||
501 | 0b0000000000000000010, |
||
502 | 0b0000100000000000000, |
||
503 | 0b0000100000000000001, |
||
504 | 0b0000100000000000010, |
||
505 | 0b0000100000000000011, |
||
506 | 0b0000100000000000100, |
||
507 | 0b0000100000000000101, |
||
508 | 0b0000100000000000111, |
||
509 | 0b0000100000000001000, |
||
510 | 0b0000100000000001001, |
||
511 | 0b0000100000000001101, |
||
512 | 0b0000110000000000000, |
||
513 | 0b0000110000000000001, |
||
514 | 0b0000110000000000010, |
||
515 | 0b0000110000000000011, |
||
516 | 0b0000110000000000100, |
||
517 | 0b0000110000000000101, |
||
518 | 0b0000110000000000111, |
||
519 | 0b0000110000000001001, |
||
520 | 0b0000110000000001101, |
||
521 | 0b0000110000000010000, |
||
522 | 0b0000110000100000000, |
||
523 | 0b0001000000000000000, |
||
524 | 0b0001000000000000010, |
||
525 | 0b0001000000000000100, |
||
526 | 0b0001000000100000000, |
||
527 | 0b0010110000000000000, |
||
528 | 0b0010110000000010000, |
||
529 | 0b0011000000000000000, |
||
530 | 0b0011000000100000000, |
||
531 | 0b0101000000000000000, |
||
532 | 0b0101000000100000000 |
||
533 | }; |
||
534 | |||
535 | static const uint32_t gen8_datatype_table[32] = { |
||
536 | 0b001000000000000000001, |
||
537 | 0b001000000000001000000, |
||
538 | 0b001000000000001000001, |
||
539 | 0b001000000000011000001, |
||
540 | 0b001000000000101011101, |
||
541 | 0b001000000010111011101, |
||
542 | 0b001000000011101000001, |
||
543 | 0b001000000011101000101, |
||
544 | 0b001000000011101011101, |
||
545 | 0b001000001000001000001, |
||
546 | 0b001000011000001000000, |
||
547 | 0b001000011000001000001, |
||
548 | 0b001000101000101000101, |
||
549 | 0b001000111000101000100, |
||
550 | 0b001000111000101000101, |
||
551 | 0b001011100011101011101, |
||
552 | 0b001011101011100011101, |
||
553 | 0b001011101011101011100, |
||
554 | 0b001011101011101011101, |
||
555 | 0b001011111011101011100, |
||
556 | 0b000000000010000001100, |
||
557 | 0b001000000000001011101, |
||
558 | 0b001000000000101000101, |
||
559 | 0b001000001000001000000, |
||
560 | 0b001000101000101000100, |
||
561 | 0b001000111000100000100, |
||
562 | 0b001001001001000001001, |
||
563 | 0b001010111011101011101, |
||
564 | 0b001011111011101011101, |
||
565 | 0b001001111001101001100, |
||
566 | 0b001001001001001001000, |
||
567 | 0b001001011001001001000 |
||
568 | }; |
||
569 | |||
570 | static const uint16_t gen8_subreg_table[32] = { |
||
571 | 0b000000000000000, |
||
572 | 0b000000000000001, |
||
573 | 0b000000000001000, |
||
574 | 0b000000000001111, |
||
575 | 0b000000000010000, |
||
576 | 0b000000010000000, |
||
577 | 0b000000100000000, |
||
578 | 0b000000110000000, |
||
579 | 0b000001000000000, |
||
580 | 0b000001000010000, |
||
581 | 0b000001010000000, |
||
582 | 0b001000000000000, |
||
583 | 0b001000000000001, |
||
584 | 0b001000010000001, |
||
585 | 0b001000010000010, |
||
586 | 0b001000010000011, |
||
587 | 0b001000010000100, |
||
588 | 0b001000010000111, |
||
589 | 0b001000010001000, |
||
590 | 0b001000010001110, |
||
591 | 0b001000010001111, |
||
592 | 0b001000110000000, |
||
593 | 0b001000111101000, |
||
594 | 0b010000000000000, |
||
595 | 0b010000110000000, |
||
596 | 0b011000000000000, |
||
597 | 0b011110010000111, |
||
598 | 0b100000000000000, |
||
599 | 0b101000000000000, |
||
600 | 0b110000000000000, |
||
601 | 0b111000000000000, |
||
602 | 0b111000000011100 |
||
603 | }; |
||
604 | |||
605 | static const uint16_t gen8_src_index_table[32] = { |
||
606 | 0b000000000000, |
||
607 | 0b000000000010, |
||
608 | 0b000000010000, |
||
609 | 0b000000010010, |
||
610 | 0b000000011000, |
||
611 | 0b000000100000, |
||
612 | 0b000000101000, |
||
613 | 0b000001001000, |
||
614 | 0b000001010000, |
||
615 | 0b000001110000, |
||
616 | 0b000001111000, |
||
617 | 0b001100000000, |
||
618 | 0b001100000010, |
||
619 | 0b001100001000, |
||
620 | 0b001100010000, |
||
621 | 0b001100010010, |
||
622 | 0b001100100000, |
||
623 | 0b001100101000, |
||
624 | 0b001100111000, |
||
625 | 0b001101000000, |
||
626 | 0b001101000010, |
||
627 | 0b001101001000, |
||
628 | 0b001101010000, |
||
629 | 0b001101100000, |
||
630 | 0b001101101000, |
||
631 | 0b001101110000, |
||
632 | 0b001101110001, |
||
633 | 0b001101111000, |
||
634 | 0b010001101000, |
||
635 | 0b010001101001, |
||
636 | 0b010001101010, |
||
637 | 0b010110001000 |
||
638 | }; |
||
639 | |||
640 | /* This is actually the control index table for Cherryview (26 bits), but the |
||
641 | * only difference from Broadwell (24 bits) is that it has two extra 0-bits at |
||
642 | * the start. |
||
643 | * |
||
644 | * The low 24 bits have the same mappings on both hardware. |
||
645 | */ |
||
646 | static const uint32_t gen8_3src_control_index_table[4] = { |
||
647 | 0b00100000000110000000000001, |
||
648 | 0b00000000000110000000000001, |
||
649 | 0b00000000001000000000000001, |
||
650 | 0b00000000001000000000100001 |
||
651 | }; |
||
652 | |||
653 | /* This is actually the control index table for Cherryview (49 bits), but the |
||
654 | * only difference from Broadwell (46 bits) is that it has three extra 0-bits |
||
655 | * at the start. |
||
656 | * |
||
657 | * The low 44 bits have the same mappings on both hardware, and since the high |
||
658 | * three bits on Broadwell are zero, we can reuse Cherryview's table. |
||
659 | */ |
||
660 | static const uint64_t gen8_3src_source_index_table[4] = { |
||
661 | 0b0000001110010011100100111001000001111000000000000, |
||
662 | 0b0000001110010011100100111001000001111000000000010, |
||
663 | 0b0000001110010011100100111001000001111000000001000, |
||
664 | 0b0000001110010011100100111001000001111000000100000 |
||
665 | }; |
||
666 | |||
667 | static const uint32_t *control_index_table; |
||
668 | static const uint32_t *datatype_table; |
||
669 | static const uint16_t *subreg_table; |
||
670 | static const uint16_t *src_index_table; |
||
671 | |||
672 | static bool |
||
673 | set_control_index(const struct brw_device_info *devinfo, |
||
674 | brw_compact_inst *dst, brw_inst *src) |
||
675 | { |
||
676 | uint32_t uncompacted = devinfo->gen >= 8 /* 17b/G45; 19b/IVB+ */ |
||
677 | ? (brw_inst_bits(src, 33, 31) << 16) | /* 3b */ |
||
678 | (brw_inst_bits(src, 23, 12) << 4) | /* 12b */ |
||
679 | (brw_inst_bits(src, 10, 9) << 2) | /* 2b */ |
||
680 | (brw_inst_bits(src, 34, 34) << 1) | /* 1b */ |
||
681 | (brw_inst_bits(src, 8, 8)) /* 1b */ |
||
682 | : (brw_inst_bits(src, 31, 31) << 16) | /* 1b */ |
||
683 | (brw_inst_bits(src, 23, 8)); /* 16b */ |
||
684 | |||
685 | /* On gen7, the flag register and subregister numbers are integrated into |
||
686 | * the control index. |
||
687 | */ |
||
688 | if (devinfo->gen == 7) |
||
689 | uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */ |
||
690 | |||
691 | for (int i = 0; i < 32; i++) { |
||
692 | if (control_index_table[i] == uncompacted) { |
||
693 | brw_compact_inst_set_control_index(dst, i); |
||
694 | return true; |
||
695 | } |
||
696 | } |
||
697 | |||
698 | return false; |
||
699 | } |
||
700 | |||
701 | static bool |
||
702 | set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, |
||
703 | brw_inst *src) |
||
704 | { |
||
705 | uint32_t uncompacted = devinfo->gen >= 8 /* 18b/G45+; 21b/BDW+ */ |
||
706 | ? (brw_inst_bits(src, 63, 61) << 18) | /* 3b */ |
||
707 | (brw_inst_bits(src, 94, 89) << 12) | /* 6b */ |
||
708 | (brw_inst_bits(src, 46, 35)) /* 12b */ |
||
709 | : (brw_inst_bits(src, 63, 61) << 15) | /* 3b */ |
||
710 | (brw_inst_bits(src, 46, 32)); /* 15b */ |
||
711 | |||
712 | for (int i = 0; i < 32; i++) { |
||
713 | if (datatype_table[i] == uncompacted) { |
||
714 | brw_compact_inst_set_datatype_index(dst, i); |
||
715 | return true; |
||
716 | } |
||
717 | } |
||
718 | |||
719 | return false; |
||
720 | } |
||
721 | |||
722 | static bool |
||
723 | set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, |
||
724 | brw_inst *src, bool is_immediate) |
||
725 | { |
||
726 | uint16_t uncompacted = /* 15b */ |
||
727 | (brw_inst_bits(src, 52, 48) << 0) | /* 5b */ |
||
728 | (brw_inst_bits(src, 68, 64) << 5); /* 5b */ |
||
729 | |||
730 | if (!is_immediate) |
||
731 | uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */ |
||
732 | |||
733 | for (int i = 0; i < 32; i++) { |
||
734 | if (subreg_table[i] == uncompacted) { |
||
735 | brw_compact_inst_set_subreg_index(dst, i); |
||
736 | return true; |
||
737 | } |
||
738 | } |
||
739 | |||
740 | return false; |
||
741 | } |
||
742 | |||
743 | static bool |
||
744 | get_src_index(uint16_t uncompacted, |
||
745 | uint16_t *compacted) |
||
746 | { |
||
747 | for (int i = 0; i < 32; i++) { |
||
748 | if (src_index_table[i] == uncompacted) { |
||
749 | *compacted = i; |
||
750 | return true; |
||
751 | } |
||
752 | } |
||
753 | |||
754 | return false; |
||
755 | } |
||
756 | |||
757 | static bool |
||
758 | set_src0_index(const struct brw_device_info *devinfo, |
||
759 | brw_compact_inst *dst, brw_inst *src) |
||
760 | { |
||
761 | uint16_t compacted; |
||
762 | uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */ |
||
763 | |||
764 | if (!get_src_index(uncompacted, &compacted)) |
||
765 | return false; |
||
766 | |||
767 | brw_compact_inst_set_src0_index(dst, compacted); |
||
768 | |||
769 | return true; |
||
770 | } |
||
771 | |||
772 | static bool |
||
773 | set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, |
||
774 | brw_inst *src, bool is_immediate) |
||
775 | { |
||
776 | uint16_t compacted; |
||
777 | |||
778 | if (is_immediate) { |
||
779 | compacted = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f; |
||
780 | } else { |
||
781 | uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */ |
||
782 | |||
783 | if (!get_src_index(uncompacted, &compacted)) |
||
784 | return false; |
||
785 | } |
||
786 | |||
787 | brw_compact_inst_set_src1_index(dst, compacted); |
||
788 | |||
789 | return true; |
||
790 | } |
||
791 | |||
792 | static bool |
||
793 | set_3src_control_index(const struct brw_device_info *devinfo, |
||
794 | brw_compact_inst *dst, brw_inst *src) |
||
795 | { |
||
796 | assert(devinfo->gen >= 8); |
||
797 | |||
798 | uint32_t uncompacted = /* 24b/BDW; 26b/CHV */ |
||
799 | (brw_inst_bits(src, 34, 32) << 21) | /* 3b */ |
||
800 | (brw_inst_bits(src, 28, 8)); /* 21b */ |
||
801 | |||
802 | if (devinfo->gen >= 9 || devinfo->is_cherryview) |
||
803 | uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */ |
||
804 | |||
805 | for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) { |
||
806 | if (gen8_3src_control_index_table[i] == uncompacted) { |
||
807 | brw_compact_inst_set_3src_control_index(dst, i); |
||
808 | return true; |
||
809 | } |
||
810 | } |
||
811 | |||
812 | return false; |
||
813 | } |
||
814 | |||
815 | static bool |
||
816 | set_3src_source_index(const struct brw_device_info *devinfo, |
||
817 | brw_compact_inst *dst, brw_inst *src) |
||
818 | { |
||
819 | assert(devinfo->gen >= 8); |
||
820 | |||
821 | uint64_t uncompacted = /* 46b/BDW; 49b/CHV */ |
||
822 | (brw_inst_bits(src, 83, 83) << 43) | /* 1b */ |
||
823 | (brw_inst_bits(src, 114, 107) << 35) | /* 8b */ |
||
824 | (brw_inst_bits(src, 93, 86) << 27) | /* 8b */ |
||
825 | (brw_inst_bits(src, 72, 65) << 19) | /* 8b */ |
||
826 | (brw_inst_bits(src, 55, 37)); /* 19b */ |
||
827 | |||
828 | if (devinfo->gen >= 9 || devinfo->is_cherryview) { |
||
829 | uncompacted |= |
||
830 | (brw_inst_bits(src, 126, 125) << 47) | /* 2b */ |
||
831 | (brw_inst_bits(src, 105, 104) << 45) | /* 2b */ |
||
832 | (brw_inst_bits(src, 84, 84) << 44); /* 1b */ |
||
833 | } else { |
||
834 | uncompacted |= |
||
835 | (brw_inst_bits(src, 125, 125) << 45) | /* 1b */ |
||
836 | (brw_inst_bits(src, 104, 104) << 44); /* 1b */ |
||
837 | } |
||
838 | |||
839 | for (int i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) { |
||
840 | if (gen8_3src_source_index_table[i] == uncompacted) { |
||
841 | brw_compact_inst_set_3src_source_index(dst, i); |
||
842 | return true; |
||
843 | } |
||
844 | } |
||
845 | |||
846 | return false; |
||
847 | } |
||
848 | |||
849 | static bool |
||
850 | has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src) |
||
851 | { |
||
852 | /* Check for instruction bits that don't map to any of the fields of the |
||
853 | * compacted instruction. The instruction cannot be compacted if any of |
||
854 | * them are set. They overlap with: |
||
855 | * - NibCtrl (bit 47 on Gen7, bit 11 on Gen8) |
||
856 | * - Dst.AddrImm[9] (bit 47 on Gen8) |
||
857 | * - Src0.AddrImm[9] (bit 95 on Gen8) |
||
858 | * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8) |
||
859 | * - UIP[31] (bit 95 on Gen8) |
||
860 | */ |
||
861 | if (devinfo->gen >= 8) { |
||
862 | assert(!brw_inst_bits(src, 7, 7)); |
||
863 | return brw_inst_bits(src, 95, 95) || |
||
864 | brw_inst_bits(src, 47, 47) || |
||
865 | brw_inst_bits(src, 11, 11); |
||
866 | } else { |
||
867 | assert(!brw_inst_bits(src, 7, 7) && |
||
868 | !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90))); |
||
869 | return brw_inst_bits(src, 95, 91) || |
||
870 | brw_inst_bits(src, 47, 47); |
||
871 | } |
||
872 | } |
||
873 | |||
874 | static bool |
||
875 | has_3src_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src) |
||
876 | { |
||
877 | /* Check for three-source instruction bits that don't map to any of the |
||
878 | * fields of the compacted instruction. All of them seem to be reserved |
||
879 | * bits currently. |
||
880 | */ |
||
881 | if (devinfo->gen >= 9 || devinfo->is_cherryview) { |
||
882 | assert(!brw_inst_bits(src, 127, 127) && |
||
883 | !brw_inst_bits(src, 7, 7)); |
||
884 | } else { |
||
885 | assert(devinfo->gen >= 8); |
||
886 | assert(!brw_inst_bits(src, 127, 126) && |
||
887 | !brw_inst_bits(src, 105, 105) && |
||
888 | !brw_inst_bits(src, 84, 84) && |
||
889 | !brw_inst_bits(src, 36, 35) && |
||
890 | !brw_inst_bits(src, 7, 7)); |
||
891 | } |
||
892 | |||
893 | return false; |
||
894 | } |
||
895 | |||
896 | static bool |
||
897 | brw_try_compact_3src_instruction(const struct brw_device_info *devinfo, |
||
898 | brw_compact_inst *dst, brw_inst *src) |
||
899 | { |
||
900 | assert(devinfo->gen >= 8); |
||
901 | |||
902 | if (has_3src_unmapped_bits(devinfo, src)) |
||
903 | return false; |
||
904 | |||
905 | #define compact(field) \ |
||
906 | brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src)) |
||
907 | |||
908 | compact(opcode); |
||
909 | |||
910 | if (!set_3src_control_index(devinfo, dst, src)) |
||
911 | return false; |
||
912 | |||
913 | if (!set_3src_source_index(devinfo, dst, src)) |
||
914 | return false; |
||
915 | |||
916 | compact(dst_reg_nr); |
||
917 | compact(src0_rep_ctrl); |
||
918 | brw_compact_inst_set_3src_cmpt_control(dst, true); |
||
919 | compact(debug_control); |
||
920 | compact(saturate); |
||
921 | compact(src1_rep_ctrl); |
||
922 | compact(src2_rep_ctrl); |
||
923 | compact(src0_reg_nr); |
||
924 | compact(src1_reg_nr); |
||
925 | compact(src2_reg_nr); |
||
926 | compact(src0_subreg_nr); |
||
927 | compact(src1_subreg_nr); |
||
928 | compact(src2_subreg_nr); |
||
929 | |||
930 | #undef compact |
||
931 | |||
932 | return true; |
||
933 | } |
||
934 | |||
935 | /* Compacted instructions have 12-bits for immediate sources, and a 13th bit |
||
936 | * that's replicated through the high 20 bits. |
||
937 | * |
||
938 | * Effectively this means we get 12-bit integers, 0.0f, and some limited uses |
||
939 | * of packed vectors as compactable immediates. |
||
940 | */ |
||
941 | static bool |
||
942 | is_compactable_immediate(unsigned imm) |
||
943 | { |
||
944 | /* We get the low 12 bits as-is. */ |
||
945 | imm &= ~0xfff; |
||
946 | |||
947 | /* We get one bit replicated through the top 20 bits. */ |
||
948 | return imm == 0 || imm == 0xfffff000; |
||
949 | } |
||
950 | |||
951 | /* Returns whether an opcode takes three sources. */ |
||
952 | static bool |
||
953 | is_3src(uint32_t op) |
||
954 | { |
||
955 | return opcode_descs[op].nsrc == 3; |
||
956 | } |
||
957 | |||
958 | /** |
||
959 | * Tries to compact instruction src into dst. |
||
960 | * |
||
961 | * It doesn't modify dst unless src is compactable, which is relied on by |
||
962 | * brw_compact_instructions(). |
||
963 | */ |
||
964 | bool |
||
965 | brw_try_compact_instruction(const struct brw_device_info *devinfo, |
||
966 | brw_compact_inst *dst, brw_inst *src) |
||
967 | { |
||
968 | brw_compact_inst temp; |
||
969 | |||
970 | assert(brw_inst_cmpt_control(devinfo, src) == 0); |
||
971 | |||
972 | if (is_3src(brw_inst_opcode(devinfo, src))) { |
||
973 | if (devinfo->gen >= 8) { |
||
974 | memset(&temp, 0, sizeof(temp)); |
||
975 | if (brw_try_compact_3src_instruction(devinfo, &temp, src)) { |
||
976 | *dst = temp; |
||
977 | return true; |
||
978 | } else { |
||
979 | return false; |
||
980 | } |
||
981 | } else { |
||
982 | return false; |
||
983 | } |
||
984 | } |
||
985 | |||
986 | bool is_immediate = |
||
987 | brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE || |
||
988 | brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE; |
||
989 | if (is_immediate && |
||
990 | (devinfo->gen < 6 || |
||
991 | !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) { |
||
992 | return false; |
||
993 | } |
||
994 | |||
995 | if (has_unmapped_bits(devinfo, src)) |
||
996 | return false; |
||
997 | |||
998 | memset(&temp, 0, sizeof(temp)); |
||
999 | |||
1000 | brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src)); |
||
1001 | brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src)); |
||
1002 | if (!set_control_index(devinfo, &temp, src)) |
||
1003 | return false; |
||
1004 | if (!set_datatype_index(devinfo, &temp, src)) |
||
1005 | return false; |
||
1006 | if (!set_subreg_index(devinfo, &temp, src, is_immediate)) |
||
1007 | return false; |
||
1008 | brw_compact_inst_set_acc_wr_control(&temp, |
||
1009 | brw_inst_acc_wr_control(devinfo, src)); |
||
1010 | brw_compact_inst_set_cond_modifier(&temp, |
||
1011 | brw_inst_cond_modifier(devinfo, src)); |
||
1012 | if (devinfo->gen <= 6) |
||
1013 | brw_compact_inst_set_flag_subreg_nr(&temp, |
||
1014 | brw_inst_flag_subreg_nr(devinfo, src)); |
||
1015 | brw_compact_inst_set_cmpt_control(&temp, true); |
||
1016 | if (!set_src0_index(devinfo, &temp, src)) |
||
1017 | return false; |
||
1018 | if (!set_src1_index(devinfo, &temp, src, is_immediate)) |
||
1019 | return false; |
||
1020 | brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src)); |
||
1021 | brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src)); |
||
1022 | if (is_immediate) { |
||
1023 | brw_compact_inst_set_src1_reg_nr(&temp, |
||
1024 | brw_inst_imm_ud(devinfo, src) & 0xff); |
||
1025 | } else { |
||
1026 | brw_compact_inst_set_src1_reg_nr(&temp, |
||
1027 | brw_inst_src1_da_reg_nr(devinfo, src)); |
||
1028 | } |
||
1029 | |||
1030 | *dst = temp; |
||
1031 | |||
1032 | return true; |
||
1033 | } |
||
1034 | |||
1035 | static void |
||
1036 | set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst, |
||
1037 | brw_compact_inst *src) |
||
1038 | { |
||
1039 | uint32_t uncompacted = |
||
1040 | control_index_table[brw_compact_inst_control_index(src)]; |
||
1041 | |||
1042 | if (devinfo->gen >= 8) { |
||
1043 | brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16)); |
||
1044 | brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff); |
||
1045 | brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3); |
||
1046 | brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1); |
||
1047 | brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1); |
||
1048 | } else { |
||
1049 | brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1); |
||
1050 | brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff)); |
||
1051 | |||
1052 | if (devinfo->gen == 7) |
||
1053 | brw_inst_set_bits(dst, 90, 89, uncompacted >> 17); |
||
1054 | } |
||
1055 | } |
||
1056 | |||
1057 | static void |
||
1058 | set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst, |
||
1059 | brw_compact_inst *src) |
||
1060 | { |
||
1061 | uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)]; |
||
1062 | |||
1063 | if (devinfo->gen >= 8) { |
||
1064 | brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18)); |
||
1065 | brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f); |
||
1066 | brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff); |
||
1067 | } else { |
||
1068 | brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15)); |
||
1069 | brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff)); |
||
1070 | } |
||
1071 | } |
||
1072 | |||
1073 | static void |
||
1074 | set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst, |
||
1075 | brw_compact_inst *src) |
||
1076 | { |
||
1077 | uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)]; |
||
1078 | |||
1079 | brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10)); |
||
1080 | brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f); |
||
1081 | brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f); |
||
1082 | } |
||
1083 | |||
1084 | static void |
||
1085 | set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst, |
||
1086 | brw_compact_inst *src) |
||
1087 | { |
||
1088 | uint32_t compacted = brw_compact_inst_src0_index(src); |
||
1089 | uint16_t uncompacted = src_index_table[compacted]; |
||
1090 | |||
1091 | brw_inst_set_bits(dst, 88, 77, uncompacted); |
||
1092 | } |
||
1093 | |||
1094 | static void |
||
1095 | set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst, |
||
1096 | brw_compact_inst *src, bool is_immediate) |
||
1097 | { |
||
1098 | if (is_immediate) { |
||
1099 | signed high5 = brw_compact_inst_src1_index(src); |
||
1100 | /* Replicate top bit of src1_index into high 20 bits of the immediate. */ |
||
1101 | brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19); |
||
1102 | } else { |
||
1103 | uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)]; |
||
1104 | |||
1105 | brw_inst_set_bits(dst, 120, 109, uncompacted); |
||
1106 | } |
||
1107 | } |
||
1108 | |||
1109 | static void |
||
1110 | set_uncompacted_3src_control_index(const struct brw_device_info *devinfo, |
||
1111 | brw_inst *dst, brw_compact_inst *src) |
||
1112 | { |
||
1113 | assert(devinfo->gen >= 8); |
||
1114 | |||
1115 | uint32_t compacted = brw_compact_inst_3src_control_index(src); |
||
1116 | uint32_t uncompacted = gen8_3src_control_index_table[compacted]; |
||
1117 | |||
1118 | brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7); |
||
1119 | brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff); |
||
1120 | |||
1121 | if (devinfo->gen >= 9 || devinfo->is_cherryview) |
||
1122 | brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3); |
||
1123 | } |
||
1124 | |||
1125 | static void |
||
1126 | set_uncompacted_3src_source_index(const struct brw_device_info *devinfo, |
||
1127 | brw_inst *dst, brw_compact_inst *src) |
||
1128 | { |
||
1129 | assert(devinfo->gen >= 8); |
||
1130 | |||
1131 | uint32_t compacted = brw_compact_inst_3src_source_index(src); |
||
1132 | uint64_t uncompacted = gen8_3src_source_index_table[compacted]; |
||
1133 | |||
1134 | brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1); |
||
1135 | brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff); |
||
1136 | brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff); |
||
1137 | brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff); |
||
1138 | brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff); |
||
1139 | |||
1140 | if (devinfo->gen >= 9 || devinfo->is_cherryview) { |
||
1141 | brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3); |
||
1142 | brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3); |
||
1143 | brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1); |
||
1144 | } else { |
||
1145 | brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1); |
||
1146 | brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1); |
||
1147 | } |
||
1148 | } |
||
1149 | |||
1150 | static void |
||
1151 | brw_uncompact_3src_instruction(const struct brw_device_info *devinfo, |
||
1152 | brw_inst *dst, brw_compact_inst *src) |
||
1153 | { |
||
1154 | assert(devinfo->gen >= 8); |
||
1155 | |||
1156 | #define uncompact(field) \ |
||
1157 | brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src)) |
||
1158 | |||
1159 | uncompact(opcode); |
||
1160 | |||
1161 | set_uncompacted_3src_control_index(devinfo, dst, src); |
||
1162 | set_uncompacted_3src_source_index(devinfo, dst, src); |
||
1163 | |||
1164 | uncompact(dst_reg_nr); |
||
1165 | uncompact(src0_rep_ctrl); |
||
1166 | brw_inst_set_3src_cmpt_control(devinfo, dst, false); |
||
1167 | uncompact(debug_control); |
||
1168 | uncompact(saturate); |
||
1169 | uncompact(src1_rep_ctrl); |
||
1170 | uncompact(src2_rep_ctrl); |
||
1171 | uncompact(src0_reg_nr); |
||
1172 | uncompact(src1_reg_nr); |
||
1173 | uncompact(src2_reg_nr); |
||
1174 | uncompact(src0_subreg_nr); |
||
1175 | uncompact(src1_subreg_nr); |
||
1176 | uncompact(src2_subreg_nr); |
||
1177 | |||
1178 | #undef uncompact |
||
1179 | } |
||
1180 | |||
1181 | void |
||
1182 | brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst, |
||
1183 | brw_compact_inst *src) |
||
1184 | { |
||
1185 | memset(dst, 0, sizeof(*dst)); |
||
1186 | |||
1187 | if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) { |
||
1188 | brw_uncompact_3src_instruction(devinfo, dst, src); |
||
1189 | return; |
||
1190 | } |
||
1191 | |||
1192 | brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src)); |
||
1193 | brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src)); |
||
1194 | |||
1195 | set_uncompacted_control(devinfo, dst, src); |
||
1196 | set_uncompacted_datatype(devinfo, dst, src); |
||
1197 | |||
1198 | /* src0/1 register file fields are in the datatype table. */ |
||
1199 | bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE || |
||
1200 | brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE; |
||
1201 | |||
1202 | set_uncompacted_subreg(devinfo, dst, src); |
||
1203 | brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src)); |
||
1204 | brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src)); |
||
1205 | if (devinfo->gen <= 6) |
||
1206 | brw_inst_set_flag_subreg_nr(devinfo, dst, |
||
1207 | brw_compact_inst_flag_subreg_nr(src)); |
||
1208 | set_uncompacted_src0(devinfo, dst, src); |
||
1209 | set_uncompacted_src1(devinfo, dst, src, is_immediate); |
||
1210 | brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src)); |
||
1211 | brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src)); |
||
1212 | if (is_immediate) { |
||
1213 | brw_inst_set_imm_ud(devinfo, dst, |
||
1214 | brw_inst_imm_ud(devinfo, dst) | |
||
1215 | brw_compact_inst_src1_reg_nr(src)); |
||
1216 | } else { |
||
1217 | brw_inst_set_src1_da_reg_nr(devinfo, dst, brw_compact_inst_src1_reg_nr(src)); |
||
1218 | } |
||
1219 | } |
||
1220 | |||
1221 | void brw_debug_compact_uncompact(const struct brw_device_info *devinfo, |
||
1222 | brw_inst *orig, |
||
1223 | brw_inst *uncompacted) |
||
1224 | { |
||
1225 | fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n", |
||
1226 | devinfo->gen); |
||
1227 | |||
1228 | fprintf(stderr, " before: "); |
||
1229 | brw_disassemble_inst(stderr, devinfo, orig, true); |
||
1230 | |||
1231 | fprintf(stderr, " after: "); |
||
1232 | brw_disassemble_inst(stderr, devinfo, uncompacted, false); |
||
1233 | |||
1234 | uint32_t *before_bits = (uint32_t *)orig; |
||
1235 | uint32_t *after_bits = (uint32_t *)uncompacted; |
||
1236 | fprintf(stderr, " changed bits:\n"); |
||
1237 | for (int i = 0; i < 128; i++) { |
||
1238 | uint32_t before = before_bits[i / 32] & (1 << (i & 31)); |
||
1239 | uint32_t after = after_bits[i / 32] & (1 << (i & 31)); |
||
1240 | |||
1241 | if (before != after) { |
||
1242 | fprintf(stderr, " bit %d, %s to %s\n", i, |
||
1243 | before ? "set" : "unset", |
||
1244 | after ? "set" : "unset"); |
||
1245 | } |
||
1246 | } |
||
1247 | } |
||
1248 | |||
1249 | static int |
||
1250 | compacted_between(int old_ip, int old_target_ip, int *compacted_counts) |
||
1251 | { |
||
1252 | int this_compacted_count = compacted_counts[old_ip]; |
||
1253 | int target_compacted_count = compacted_counts[old_target_ip]; |
||
1254 | return target_compacted_count - this_compacted_count; |
||
1255 | } |
||
1256 | |||
1257 | static void |
||
1258 | update_uip_jip(const struct brw_device_info *devinfo, brw_inst *insn, |
||
1259 | int this_old_ip, int *compacted_counts) |
||
1260 | { |
||
1261 | /* JIP and UIP are in units of: |
||
1262 | * - bytes on Gen8+; and |
||
1263 | * - compacted instructions on Gen6+. |
||
1264 | */ |
||
1265 | int shift = devinfo->gen >= 8 ? 3 : 0; |
||
1266 | |||
1267 | int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift; |
||
1268 | jip_compacted -= compacted_between(this_old_ip, |
||
1269 | this_old_ip + (jip_compacted / 2), |
||
1270 | compacted_counts); |
||
1271 | brw_inst_set_jip(devinfo, insn, jip_compacted << shift); |
||
1272 | |||
1273 | if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF || |
||
1274 | brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE || |
||
1275 | (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7)) |
||
1276 | return; |
||
1277 | |||
1278 | int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift; |
||
1279 | uip_compacted -= compacted_between(this_old_ip, |
||
1280 | this_old_ip + (uip_compacted / 2), |
||
1281 | compacted_counts); |
||
1282 | brw_inst_set_uip(devinfo, insn, uip_compacted << shift); |
||
1283 | } |
||
1284 | |||
1285 | static void |
||
1286 | update_gen4_jump_count(const struct brw_device_info *devinfo, brw_inst *insn, |
||
1287 | int this_old_ip, int *compacted_counts) |
||
1288 | { |
||
1289 | assert(devinfo->gen == 5 || devinfo->is_g4x); |
||
1290 | |||
1291 | /* Jump Count is in units of: |
||
1292 | * - uncompacted instructions on G45; and |
||
1293 | * - compacted instructions on Gen5. |
||
1294 | */ |
||
1295 | int shift = devinfo->is_g4x ? 1 : 0; |
||
1296 | |||
1297 | int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift; |
||
1298 | |||
1299 | int target_old_ip = this_old_ip + (jump_count_compacted / 2); |
||
1300 | |||
1301 | int this_compacted_count = compacted_counts[this_old_ip]; |
||
1302 | int target_compacted_count = compacted_counts[target_old_ip]; |
||
1303 | |||
1304 | jump_count_compacted -= (target_compacted_count - this_compacted_count); |
||
1305 | brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift); |
||
1306 | } |
||
1307 | |||
1308 | void |
||
1309 | brw_init_compaction_tables(const struct brw_device_info *devinfo) |
||
1310 | { |
||
1311 | static bool initialized; |
||
1312 | if (initialized || p_atomic_cmpxchg(&initialized, false, true) != false) |
||
1313 | return; |
||
1314 | |||
1315 | assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0); |
||
1316 | assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0); |
||
1317 | assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0); |
||
1318 | assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0); |
||
1319 | assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0); |
||
1320 | assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); |
||
1321 | assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); |
||
1322 | assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0); |
||
1323 | assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0); |
||
1324 | assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0); |
||
1325 | assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0); |
||
1326 | assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0); |
||
1327 | assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0); |
||
1328 | assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0); |
||
1329 | assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0); |
||
1330 | assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0); |
||
1331 | |||
1332 | switch (devinfo->gen) { |
||
1333 | case 9: |
||
1334 | case 8: |
||
1335 | control_index_table = gen8_control_index_table; |
||
1336 | datatype_table = gen8_datatype_table; |
||
1337 | subreg_table = gen8_subreg_table; |
||
1338 | src_index_table = gen8_src_index_table; |
||
1339 | break; |
||
1340 | case 7: |
||
1341 | control_index_table = gen7_control_index_table; |
||
1342 | datatype_table = gen7_datatype_table; |
||
1343 | subreg_table = gen7_subreg_table; |
||
1344 | src_index_table = gen7_src_index_table; |
||
1345 | break; |
||
1346 | case 6: |
||
1347 | control_index_table = gen6_control_index_table; |
||
1348 | datatype_table = gen6_datatype_table; |
||
1349 | subreg_table = gen6_subreg_table; |
||
1350 | src_index_table = gen6_src_index_table; |
||
1351 | break; |
||
1352 | case 5: |
||
1353 | case 4: |
||
1354 | control_index_table = g45_control_index_table; |
||
1355 | datatype_table = g45_datatype_table; |
||
1356 | subreg_table = g45_subreg_table; |
||
1357 | src_index_table = g45_src_index_table; |
||
1358 | break; |
||
1359 | default: |
||
1360 | unreachable("unknown generation"); |
||
1361 | } |
||
1362 | } |
||
1363 | |||
1364 | void |
||
1365 | brw_compact_instructions(struct brw_codegen *p, int start_offset, |
||
1366 | int num_annotations, struct annotation *annotation) |
||
1367 | { |
||
1368 | const struct brw_device_info *devinfo = p->devinfo; |
||
1369 | void *store = p->store + start_offset / 16; |
||
1370 | /* For an instruction at byte offset 16*i before compaction, this is the |
||
1371 | * number of compacted instructions minus the number of padding NOP/NENOPs |
||
1372 | * that preceded it. |
||
1373 | */ |
||
1374 | int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)]; |
||
1375 | /* For an instruction at byte offset 8*i after compaction, this was its IP |
||
1376 | * (in 16-byte units) before compaction. |
||
1377 | */ |
||
1378 | int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)]; |
||
1379 | |||
1380 | if (devinfo->gen == 4 && !devinfo->is_g4x) |
||
1381 | return; |
||
1382 | |||
1383 | int offset = 0; |
||
1384 | int compacted_count = 0; |
||
1385 | for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset; |
||
1386 | src_offset += sizeof(brw_inst)) { |
||
1387 | brw_inst *src = store + src_offset; |
||
1388 | void *dst = store + offset; |
||
1389 | |||
1390 | old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst); |
||
1391 | compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count; |
||
1392 | |||
1393 | brw_inst saved = *src; |
||
1394 | |||
1395 | if (brw_try_compact_instruction(devinfo, dst, src)) { |
||
1396 | compacted_count++; |
||
1397 | |||
1398 | if (INTEL_DEBUG) { |
||
1399 | brw_inst uncompacted; |
||
1400 | brw_uncompact_instruction(devinfo, &uncompacted, dst); |
||
1401 | if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { |
||
1402 | brw_debug_compact_uncompact(devinfo, &saved, &uncompacted); |
||
1403 | } |
||
1404 | } |
||
1405 | |||
1406 | offset += sizeof(brw_compact_inst); |
||
1407 | } else { |
||
1408 | /* All uncompacted instructions need to be aligned on G45. */ |
||
1409 | if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){ |
||
1410 | brw_compact_inst *align = store + offset; |
||
1411 | memset(align, 0, sizeof(*align)); |
||
1412 | brw_compact_inst_set_opcode(align, BRW_OPCODE_NENOP); |
||
1413 | brw_compact_inst_set_cmpt_control(align, true); |
||
1414 | offset += sizeof(brw_compact_inst); |
||
1415 | compacted_count--; |
||
1416 | compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count; |
||
1417 | old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst); |
||
1418 | |||
1419 | dst = store + offset; |
||
1420 | } |
||
1421 | |||
1422 | /* If we didn't compact this intruction, we need to move it down into |
||
1423 | * place. |
||
1424 | */ |
||
1425 | if (offset != src_offset) { |
||
1426 | memmove(dst, src, sizeof(brw_inst)); |
||
1427 | } |
||
1428 | offset += sizeof(brw_inst); |
||
1429 | } |
||
1430 | } |
||
1431 | |||
1432 | /* Fix up control flow offsets. */ |
||
1433 | p->next_insn_offset = start_offset + offset; |
||
1434 | for (offset = 0; offset < p->next_insn_offset - start_offset; |
||
1435 | offset = next_offset(devinfo, store, offset)) { |
||
1436 | brw_inst *insn = store + offset; |
||
1437 | int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)]; |
||
1438 | int this_compacted_count = compacted_counts[this_old_ip]; |
||
1439 | |||
1440 | switch (brw_inst_opcode(devinfo, insn)) { |
||
1441 | case BRW_OPCODE_BREAK: |
||
1442 | case BRW_OPCODE_CONTINUE: |
||
1443 | case BRW_OPCODE_HALT: |
||
1444 | if (devinfo->gen >= 6) { |
||
1445 | update_uip_jip(devinfo, insn, this_old_ip, compacted_counts); |
||
1446 | } else { |
||
1447 | update_gen4_jump_count(devinfo, insn, this_old_ip, |
||
1448 | compacted_counts); |
||
1449 | } |
||
1450 | break; |
||
1451 | |||
1452 | case BRW_OPCODE_IF: |
||
1453 | case BRW_OPCODE_IFF: |
||
1454 | case BRW_OPCODE_ELSE: |
||
1455 | case BRW_OPCODE_ENDIF: |
||
1456 | case BRW_OPCODE_WHILE: |
||
1457 | if (devinfo->gen >= 7) { |
||
1458 | if (brw_inst_cmpt_control(devinfo, insn)) { |
||
1459 | brw_inst uncompacted; |
||
1460 | brw_uncompact_instruction(devinfo, &uncompacted, |
||
1461 | (brw_compact_inst *)insn); |
||
1462 | |||
1463 | update_uip_jip(devinfo, &uncompacted, this_old_ip, |
||
1464 | compacted_counts); |
||
1465 | |||
1466 | bool ret = brw_try_compact_instruction(devinfo, |
||
1467 | (brw_compact_inst *)insn, |
||
1468 | &uncompacted); |
||
1469 | assert(ret); (void)ret; |
||
1470 | } else { |
||
1471 | update_uip_jip(devinfo, insn, this_old_ip, compacted_counts); |
||
1472 | } |
||
1473 | } else if (devinfo->gen == 6) { |
||
1474 | assert(!brw_inst_cmpt_control(devinfo, insn)); |
||
1475 | |||
1476 | /* Jump Count is in units of compacted instructions on Gen6. */ |
||
1477 | int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn); |
||
1478 | |||
1479 | int target_old_ip = this_old_ip + (jump_count_compacted / 2); |
||
1480 | int target_compacted_count = compacted_counts[target_old_ip]; |
||
1481 | jump_count_compacted -= (target_compacted_count - this_compacted_count); |
||
1482 | brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted); |
||
1483 | } else { |
||
1484 | update_gen4_jump_count(devinfo, insn, this_old_ip, |
||
1485 | compacted_counts); |
||
1486 | } |
||
1487 | break; |
||
1488 | |||
1489 | case BRW_OPCODE_ADD: |
||
1490 | /* Add instructions modifying the IP register use an immediate src1, |
||
1491 | * and Gens that use this cannot compact instructions with immediate |
||
1492 | * operands. |
||
1493 | */ |
||
1494 | if (brw_inst_cmpt_control(devinfo, insn)) |
||
1495 | break; |
||
1496 | |||
1497 | if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE && |
||
1498 | brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) { |
||
1499 | assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE); |
||
1500 | |||
1501 | int shift = 3; |
||
1502 | int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift; |
||
1503 | |||
1504 | int target_old_ip = this_old_ip + (jump_compacted / 2); |
||
1505 | int target_compacted_count = compacted_counts[target_old_ip]; |
||
1506 | jump_compacted -= (target_compacted_count - this_compacted_count); |
||
1507 | brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift); |
||
1508 | } |
||
1509 | break; |
||
1510 | } |
||
1511 | } |
||
1512 | |||
1513 | /* p->nr_insn is counting the number of uncompacted instructions still, so |
||
1514 | * divide. We do want to be sure there's a valid instruction in any |
||
1515 | * alignment padding, so that the next compression pass (for the FS 8/16 |
||
1516 | * compile passes) parses correctly. |
||
1517 | */ |
||
1518 | if (p->next_insn_offset & sizeof(brw_compact_inst)) { |
||
1519 | brw_compact_inst *align = store + offset; |
||
1520 | memset(align, 0, sizeof(*align)); |
||
1521 | brw_compact_inst_set_opcode(align, BRW_OPCODE_NOP); |
||
1522 | brw_compact_inst_set_cmpt_control(align, true); |
||
1523 | p->next_insn_offset += sizeof(brw_compact_inst); |
||
1524 | } |
||
1525 | p->nr_insn = p->next_insn_offset / sizeof(brw_inst); |
||
1526 | |||
1527 | /* Update the instruction offsets for each annotation. */ |
||
1528 | if (annotation) { |
||
1529 | for (int offset = 0, i = 0; i < num_annotations; i++) { |
||
1530 | while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] * |
||
1531 | sizeof(brw_inst) != annotation[i].offset) { |
||
1532 | assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] * |
||
1533 | sizeof(brw_inst) < annotation[i].offset); |
||
1534 | offset = next_offset(devinfo, store, offset); |
||
1535 | } |
||
1536 | |||
1537 | annotation[i].offset = start_offset + offset; |
||
1538 | |||
1539 | offset = next_offset(devinfo, store, offset); |
||
1540 | } |
||
1541 | |||
1542 | annotation[num_annotations].offset = p->next_insn_offset; |
||
1543 | } |
||
1544 | }>>><>>>><>><>=>><>><>><>>=>><>=>>>>><>><>><>><>><>><>><>><>><>>><>><>>>><>><>><>>><>><>><>>><>><>><>><>><>><> |