Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5563 | serge | 1 | /* |
2 | * Copyright © 2012 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | /** @file brw_eu_compact.c |
||
25 | * |
||
26 | * Instruction compaction is a feature of gm45 and newer hardware that allows |
||
27 | * for a smaller instruction encoding. |
||
28 | * |
||
29 | * The instruction cache is on the order of 32KB, and many programs generate |
||
30 | * far more instructions than that. The instruction cache is built to barely |
||
31 | * keep up with instruction dispatch abaility in cache hit cases -- L1 |
||
32 | * instruction cache misses that still hit in the next level could limit |
||
33 | * throughput by around 50%. |
||
34 | * |
||
35 | * The idea of instruction compaction is that most instructions use a tiny |
||
36 | * subset of the GPU functionality, so we can encode what would be a 16 byte |
||
37 | * instruction in 8 bytes using some lookup tables for various fields. |
||
38 | */ |
||
39 | |||
40 | #include "brw_context.h" |
||
41 | #include "brw_eu.h" |
||
42 | |||
43 | static const uint32_t gen6_control_index_table[32] = { |
||
44 | 0b00000000000000000, |
||
45 | 0b01000000000000000, |
||
46 | 0b00110000000000000, |
||
47 | 0b00000000100000000, |
||
48 | 0b00010000000000000, |
||
49 | 0b00001000100000000, |
||
50 | 0b00000000100000010, |
||
51 | 0b00000000000000010, |
||
52 | 0b01000000100000000, |
||
53 | 0b01010000000000000, |
||
54 | 0b10110000000000000, |
||
55 | 0b00100000000000000, |
||
56 | 0b11010000000000000, |
||
57 | 0b11000000000000000, |
||
58 | 0b01001000100000000, |
||
59 | 0b01000000000001000, |
||
60 | 0b01000000000000100, |
||
61 | 0b00000000000001000, |
||
62 | 0b00000000000000100, |
||
63 | 0b00111000100000000, |
||
64 | 0b00001000100000010, |
||
65 | 0b00110000100000000, |
||
66 | 0b00110000000000001, |
||
67 | 0b00100000000000001, |
||
68 | 0b00110000000000010, |
||
69 | 0b00110000000000101, |
||
70 | 0b00110000000001001, |
||
71 | 0b00110000000010000, |
||
72 | 0b00110000000000011, |
||
73 | 0b00110000000000100, |
||
74 | 0b00110000100001000, |
||
75 | 0b00100000000001001 |
||
76 | }; |
||
77 | |||
78 | static const uint32_t gen6_datatype_table[32] = { |
||
79 | 0b001001110000000000, |
||
80 | 0b001000110000100000, |
||
81 | 0b001001110000000001, |
||
82 | 0b001000000001100000, |
||
83 | 0b001010110100101001, |
||
84 | 0b001000000110101101, |
||
85 | 0b001100011000101100, |
||
86 | 0b001011110110101101, |
||
87 | 0b001000000111101100, |
||
88 | 0b001000000001100001, |
||
89 | 0b001000110010100101, |
||
90 | 0b001000000001000001, |
||
91 | 0b001000001000110001, |
||
92 | 0b001000001000101001, |
||
93 | 0b001000000000100000, |
||
94 | 0b001000001000110010, |
||
95 | 0b001010010100101001, |
||
96 | 0b001011010010100101, |
||
97 | 0b001000000110100101, |
||
98 | 0b001100011000101001, |
||
99 | 0b001011011000101100, |
||
100 | 0b001011010110100101, |
||
101 | 0b001011110110100101, |
||
102 | 0b001111011110111101, |
||
103 | 0b001111011110111100, |
||
104 | 0b001111011110111101, |
||
105 | 0b001111011110011101, |
||
106 | 0b001111011110111110, |
||
107 | 0b001000000000100001, |
||
108 | 0b001000000000100010, |
||
109 | 0b001001111111011101, |
||
110 | 0b001000001110111110, |
||
111 | }; |
||
112 | |||
113 | static const uint32_t gen6_subreg_table[32] = { |
||
114 | 0b000000000000000, |
||
115 | 0b000000000000100, |
||
116 | 0b000000110000000, |
||
117 | 0b111000000000000, |
||
118 | 0b011110000001000, |
||
119 | 0b000010000000000, |
||
120 | 0b000000000010000, |
||
121 | 0b000110000001100, |
||
122 | 0b001000000000000, |
||
123 | 0b000001000000000, |
||
124 | 0b000001010010100, |
||
125 | 0b000000001010110, |
||
126 | 0b010000000000000, |
||
127 | 0b110000000000000, |
||
128 | 0b000100000000000, |
||
129 | 0b000000010000000, |
||
130 | 0b000000000001000, |
||
131 | 0b100000000000000, |
||
132 | 0b000001010000000, |
||
133 | 0b001010000000000, |
||
134 | 0b001100000000000, |
||
135 | 0b000000001010100, |
||
136 | 0b101101010010100, |
||
137 | 0b010100000000000, |
||
138 | 0b000000010001111, |
||
139 | 0b011000000000000, |
||
140 | 0b111110000000000, |
||
141 | 0b101000000000000, |
||
142 | 0b000000000001111, |
||
143 | 0b000100010001111, |
||
144 | 0b001000010001111, |
||
145 | 0b000110000000000, |
||
146 | }; |
||
147 | |||
148 | static const uint32_t gen6_src_index_table[32] = { |
||
149 | 0b000000000000, |
||
150 | 0b010110001000, |
||
151 | 0b010001101000, |
||
152 | 0b001000101000, |
||
153 | 0b011010010000, |
||
154 | 0b000100100000, |
||
155 | 0b010001101100, |
||
156 | 0b010101110000, |
||
157 | 0b011001111000, |
||
158 | 0b001100101000, |
||
159 | 0b010110001100, |
||
160 | 0b001000100000, |
||
161 | 0b010110001010, |
||
162 | 0b000000000010, |
||
163 | 0b010101010000, |
||
164 | 0b010101101000, |
||
165 | 0b111101001100, |
||
166 | 0b111100101100, |
||
167 | 0b011001110000, |
||
168 | 0b010110001001, |
||
169 | 0b010101011000, |
||
170 | 0b001101001000, |
||
171 | 0b010000101100, |
||
172 | 0b010000000000, |
||
173 | 0b001101110000, |
||
174 | 0b001100010000, |
||
175 | 0b001100000000, |
||
176 | 0b010001101010, |
||
177 | 0b001101111000, |
||
178 | 0b000001110000, |
||
179 | 0b001100100000, |
||
180 | 0b001101010000, |
||
181 | }; |
||
182 | |||
183 | static const uint32_t gen7_control_index_table[32] = { |
||
184 | 0b0000000000000000010, |
||
185 | 0b0000100000000000000, |
||
186 | 0b0000100000000000001, |
||
187 | 0b0000100000000000010, |
||
188 | 0b0000100000000000011, |
||
189 | 0b0000100000000000100, |
||
190 | 0b0000100000000000101, |
||
191 | 0b0000100000000000111, |
||
192 | 0b0000100000000001000, |
||
193 | 0b0000100000000001001, |
||
194 | 0b0000100000000001101, |
||
195 | 0b0000110000000000000, |
||
196 | 0b0000110000000000001, |
||
197 | 0b0000110000000000010, |
||
198 | 0b0000110000000000011, |
||
199 | 0b0000110000000000100, |
||
200 | 0b0000110000000000101, |
||
201 | 0b0000110000000000111, |
||
202 | 0b0000110000000001001, |
||
203 | 0b0000110000000001101, |
||
204 | 0b0000110000000010000, |
||
205 | 0b0000110000100000000, |
||
206 | 0b0001000000000000000, |
||
207 | 0b0001000000000000010, |
||
208 | 0b0001000000000000100, |
||
209 | 0b0001000000100000000, |
||
210 | 0b0010110000000000000, |
||
211 | 0b0010110000000010000, |
||
212 | 0b0011000000000000000, |
||
213 | 0b0011000000100000000, |
||
214 | 0b0101000000000000000, |
||
215 | 0b0101000000100000000 |
||
216 | }; |
||
217 | |||
218 | static const uint32_t gen7_datatype_table[32] = { |
||
219 | 0b001000000000000001, |
||
220 | 0b001000000000100000, |
||
221 | 0b001000000000100001, |
||
222 | 0b001000000001100001, |
||
223 | 0b001000000010111101, |
||
224 | 0b001000001011111101, |
||
225 | 0b001000001110100001, |
||
226 | 0b001000001110100101, |
||
227 | 0b001000001110111101, |
||
228 | 0b001000010000100001, |
||
229 | 0b001000110000100000, |
||
230 | 0b001000110000100001, |
||
231 | 0b001001010010100101, |
||
232 | 0b001001110010100100, |
||
233 | 0b001001110010100101, |
||
234 | 0b001111001110111101, |
||
235 | 0b001111011110011101, |
||
236 | 0b001111011110111100, |
||
237 | 0b001111011110111101, |
||
238 | 0b001111111110111100, |
||
239 | 0b000000001000001100, |
||
240 | 0b001000000000111101, |
||
241 | 0b001000000010100101, |
||
242 | 0b001000010000100000, |
||
243 | 0b001001010010100100, |
||
244 | 0b001001110010000100, |
||
245 | 0b001010010100001001, |
||
246 | 0b001101111110111101, |
||
247 | 0b001111111110111101, |
||
248 | 0b001011110110101100, |
||
249 | 0b001010010100101000, |
||
250 | 0b001010110100101000 |
||
251 | }; |
||
252 | |||
253 | static const uint32_t gen7_subreg_table[32] = { |
||
254 | 0b000000000000000, |
||
255 | 0b000000000000001, |
||
256 | 0b000000000001000, |
||
257 | 0b000000000001111, |
||
258 | 0b000000000010000, |
||
259 | 0b000000010000000, |
||
260 | 0b000000100000000, |
||
261 | 0b000000110000000, |
||
262 | 0b000001000000000, |
||
263 | 0b000001000010000, |
||
264 | 0b000010100000000, |
||
265 | 0b001000000000000, |
||
266 | 0b001000000000001, |
||
267 | 0b001000010000001, |
||
268 | 0b001000010000010, |
||
269 | 0b001000010000011, |
||
270 | 0b001000010000100, |
||
271 | 0b001000010000111, |
||
272 | 0b001000010001000, |
||
273 | 0b001000010001110, |
||
274 | 0b001000010001111, |
||
275 | 0b001000110000000, |
||
276 | 0b001000111101000, |
||
277 | 0b010000000000000, |
||
278 | 0b010000110000000, |
||
279 | 0b011000000000000, |
||
280 | 0b011110010000111, |
||
281 | 0b100000000000000, |
||
282 | 0b101000000000000, |
||
283 | 0b110000000000000, |
||
284 | 0b111000000000000, |
||
285 | 0b111000000011100 |
||
286 | }; |
||
287 | |||
288 | static const uint32_t gen7_src_index_table[32] = { |
||
289 | 0b000000000000, |
||
290 | 0b000000000010, |
||
291 | 0b000000010000, |
||
292 | 0b000000010010, |
||
293 | 0b000000011000, |
||
294 | 0b000000100000, |
||
295 | 0b000000101000, |
||
296 | 0b000001001000, |
||
297 | 0b000001010000, |
||
298 | 0b000001110000, |
||
299 | 0b000001111000, |
||
300 | 0b001100000000, |
||
301 | 0b001100000010, |
||
302 | 0b001100001000, |
||
303 | 0b001100010000, |
||
304 | 0b001100010010, |
||
305 | 0b001100100000, |
||
306 | 0b001100101000, |
||
307 | 0b001100111000, |
||
308 | 0b001101000000, |
||
309 | 0b001101000010, |
||
310 | 0b001101001000, |
||
311 | 0b001101010000, |
||
312 | 0b001101100000, |
||
313 | 0b001101101000, |
||
314 | 0b001101110000, |
||
315 | 0b001101110001, |
||
316 | 0b001101111000, |
||
317 | 0b010001101000, |
||
318 | 0b010001101001, |
||
319 | 0b010001101010, |
||
320 | 0b010110001000 |
||
321 | }; |
||
322 | |||
323 | static const uint32_t *control_index_table; |
||
324 | static const uint32_t *datatype_table; |
||
325 | static const uint32_t *subreg_table; |
||
326 | static const uint32_t *src_index_table; |
||
327 | |||
328 | static bool |
||
329 | set_control_index(struct brw_context *brw, |
||
330 | struct brw_compact_instruction *dst, |
||
331 | struct brw_instruction *src) |
||
332 | { |
||
333 | uint32_t *src_u32 = (uint32_t *)src; |
||
334 | uint32_t uncompacted = 0; |
||
335 | |||
336 | uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0; |
||
337 | uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16; |
||
338 | /* On gen7, the flag register number gets integrated into the control |
||
339 | * index. |
||
340 | */ |
||
341 | if (brw->gen >= 7) |
||
342 | uncompacted |= ((src_u32[2] >> 25) & 0x3) << 17; |
||
343 | |||
344 | for (int i = 0; i < 32; i++) { |
||
345 | if (control_index_table[i] == uncompacted) { |
||
346 | dst->dw0.control_index = i; |
||
347 | return true; |
||
348 | } |
||
349 | } |
||
350 | |||
351 | return false; |
||
352 | } |
||
353 | |||
354 | static bool |
||
355 | set_datatype_index(struct brw_compact_instruction *dst, |
||
356 | struct brw_instruction *src) |
||
357 | { |
||
358 | uint32_t uncompacted = 0; |
||
359 | |||
360 | uncompacted |= src->bits1.ud & 0x7fff; |
||
361 | uncompacted |= (src->bits1.ud >> 29) << 15; |
||
362 | |||
363 | for (int i = 0; i < 32; i++) { |
||
364 | if (datatype_table[i] == uncompacted) { |
||
365 | dst->dw0.data_type_index = i; |
||
366 | return true; |
||
367 | } |
||
368 | } |
||
369 | |||
370 | return false; |
||
371 | } |
||
372 | |||
373 | static bool |
||
374 | set_subreg_index(struct brw_compact_instruction *dst, |
||
375 | struct brw_instruction *src) |
||
376 | { |
||
377 | uint32_t uncompacted = 0; |
||
378 | |||
379 | uncompacted |= src->bits1.da1.dest_subreg_nr << 0; |
||
380 | uncompacted |= src->bits2.da1.src0_subreg_nr << 5; |
||
381 | uncompacted |= src->bits3.da1.src1_subreg_nr << 10; |
||
382 | |||
383 | for (int i = 0; i < 32; i++) { |
||
384 | if (subreg_table[i] == uncompacted) { |
||
385 | dst->dw0.sub_reg_index = i; |
||
386 | return true; |
||
387 | } |
||
388 | } |
||
389 | |||
390 | return false; |
||
391 | } |
||
392 | |||
393 | static bool |
||
394 | get_src_index(uint32_t uncompacted, |
||
395 | uint32_t *compacted) |
||
396 | { |
||
397 | for (int i = 0; i < 32; i++) { |
||
398 | if (src_index_table[i] == uncompacted) { |
||
399 | *compacted = i; |
||
400 | return true; |
||
401 | } |
||
402 | } |
||
403 | |||
404 | return false; |
||
405 | } |
||
406 | |||
407 | static bool |
||
408 | set_src0_index(struct brw_compact_instruction *dst, |
||
409 | struct brw_instruction *src) |
||
410 | { |
||
411 | uint32_t compacted, uncompacted = 0; |
||
412 | |||
413 | uncompacted |= (src->bits2.ud >> 13) & 0xfff; |
||
414 | |||
415 | if (!get_src_index(uncompacted, &compacted)) |
||
416 | return false; |
||
417 | |||
418 | dst->dw0.src0_index = compacted & 0x3; |
||
419 | dst->dw1.src0_index = compacted >> 2; |
||
420 | |||
421 | return true; |
||
422 | } |
||
423 | |||
424 | static bool |
||
425 | set_src1_index(struct brw_compact_instruction *dst, |
||
426 | struct brw_instruction *src) |
||
427 | { |
||
428 | uint32_t compacted, uncompacted = 0; |
||
429 | |||
430 | uncompacted |= (src->bits3.ud >> 13) & 0xfff; |
||
431 | |||
432 | if (!get_src_index(uncompacted, &compacted)) |
||
433 | return false; |
||
434 | |||
435 | dst->dw1.src1_index = compacted; |
||
436 | |||
437 | return true; |
||
438 | } |
||
439 | |||
440 | /** |
||
441 | * Tries to compact instruction src into dst. |
||
442 | * |
||
443 | * It doesn't modify dst unless src is compactable, which is relied on by |
||
444 | * brw_compact_instructions(). |
||
445 | */ |
||
446 | bool |
||
447 | brw_try_compact_instruction(struct brw_compile *p, |
||
448 | struct brw_compact_instruction *dst, |
||
449 | struct brw_instruction *src) |
||
450 | { |
||
451 | struct brw_context *brw = p->brw; |
||
452 | struct brw_compact_instruction temp; |
||
453 | |||
454 | if (src->header.opcode == BRW_OPCODE_IF || |
||
455 | src->header.opcode == BRW_OPCODE_ELSE || |
||
456 | src->header.opcode == BRW_OPCODE_ENDIF || |
||
457 | src->header.opcode == BRW_OPCODE_HALT || |
||
458 | src->header.opcode == BRW_OPCODE_DO || |
||
459 | src->header.opcode == BRW_OPCODE_WHILE) { |
||
460 | /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs |
||
461 | * to be able to handle compacted flow control instructions.. |
||
462 | */ |
||
463 | return false; |
||
464 | } |
||
465 | |||
466 | /* FINISHME: immediates */ |
||
467 | if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE || |
||
468 | src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) |
||
469 | return false; |
||
470 | |||
471 | memset(&temp, 0, sizeof(temp)); |
||
472 | |||
473 | temp.dw0.opcode = src->header.opcode; |
||
474 | temp.dw0.debug_control = src->header.debug_control; |
||
475 | if (!set_control_index(brw, &temp, src)) |
||
476 | return false; |
||
477 | if (!set_datatype_index(&temp, src)) |
||
478 | return false; |
||
479 | if (!set_subreg_index(&temp, src)) |
||
480 | return false; |
||
481 | temp.dw0.acc_wr_control = src->header.acc_wr_control; |
||
482 | temp.dw0.conditionalmod = src->header.destreg__conditionalmod; |
||
483 | if (brw->gen <= 6) |
||
484 | temp.dw0.flag_subreg_nr = src->bits2.da1.flag_subreg_nr; |
||
485 | temp.dw0.cmpt_ctrl = 1; |
||
486 | if (!set_src0_index(&temp, src)) |
||
487 | return false; |
||
488 | if (!set_src1_index(&temp, src)) |
||
489 | return false; |
||
490 | temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr; |
||
491 | temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr; |
||
492 | temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr; |
||
493 | |||
494 | *dst = temp; |
||
495 | |||
496 | return true; |
||
497 | } |
||
498 | |||
499 | static void |
||
500 | set_uncompacted_control(struct brw_context *brw, |
||
501 | struct brw_instruction *dst, |
||
502 | struct brw_compact_instruction *src) |
||
503 | { |
||
504 | uint32_t *dst_u32 = (uint32_t *)dst; |
||
505 | uint32_t uncompacted = control_index_table[src->dw0.control_index]; |
||
506 | |||
507 | dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8; |
||
508 | dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31; |
||
509 | |||
510 | if (brw->gen >= 7) |
||
511 | dst_u32[2] |= ((uncompacted >> 17) & 0x3) << 25; |
||
512 | } |
||
513 | |||
514 | static void |
||
515 | set_uncompacted_datatype(struct brw_instruction *dst, |
||
516 | struct brw_compact_instruction *src) |
||
517 | { |
||
518 | uint32_t uncompacted = datatype_table[src->dw0.data_type_index]; |
||
519 | |||
520 | dst->bits1.ud &= ~(0x7 << 29); |
||
521 | dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29; |
||
522 | dst->bits1.ud &= ~0x7fff; |
||
523 | dst->bits1.ud |= uncompacted & 0x7fff; |
||
524 | } |
||
525 | |||
526 | static void |
||
527 | set_uncompacted_subreg(struct brw_instruction *dst, |
||
528 | struct brw_compact_instruction *src) |
||
529 | { |
||
530 | uint32_t uncompacted = subreg_table[src->dw0.sub_reg_index]; |
||
531 | |||
532 | dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0) & 0x1f; |
||
533 | dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5) & 0x1f; |
||
534 | dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f; |
||
535 | } |
||
536 | |||
537 | static void |
||
538 | set_uncompacted_src0(struct brw_instruction *dst, |
||
539 | struct brw_compact_instruction *src) |
||
540 | { |
||
541 | uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2; |
||
542 | uint32_t uncompacted = src_index_table[compacted]; |
||
543 | |||
544 | dst->bits2.ud |= uncompacted << 13; |
||
545 | } |
||
546 | |||
547 | static void |
||
548 | set_uncompacted_src1(struct brw_instruction *dst, |
||
549 | struct brw_compact_instruction *src) |
||
550 | { |
||
551 | uint32_t uncompacted = src_index_table[src->dw1.src1_index]; |
||
552 | |||
553 | dst->bits3.ud |= uncompacted << 13; |
||
554 | } |
||
555 | |||
556 | void |
||
557 | brw_uncompact_instruction(struct brw_context *brw, |
||
558 | struct brw_instruction *dst, |
||
559 | struct brw_compact_instruction *src) |
||
560 | { |
||
561 | memset(dst, 0, sizeof(*dst)); |
||
562 | |||
563 | dst->header.opcode = src->dw0.opcode; |
||
564 | dst->header.debug_control = src->dw0.debug_control; |
||
565 | |||
566 | set_uncompacted_control(brw, dst, src); |
||
567 | set_uncompacted_datatype(dst, src); |
||
568 | set_uncompacted_subreg(dst, src); |
||
569 | dst->header.acc_wr_control = src->dw0.acc_wr_control; |
||
570 | dst->header.destreg__conditionalmod = src->dw0.conditionalmod; |
||
571 | if (brw->gen <= 6) |
||
572 | dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr; |
||
573 | set_uncompacted_src0(dst, src); |
||
574 | set_uncompacted_src1(dst, src); |
||
575 | dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr; |
||
576 | dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr; |
||
577 | dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr; |
||
578 | } |
||
579 | |||
580 | void brw_debug_compact_uncompact(struct brw_context *brw, |
||
581 | struct brw_instruction *orig, |
||
582 | struct brw_instruction *uncompacted) |
||
583 | { |
||
584 | fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n", |
||
585 | brw->gen); |
||
586 | |||
587 | fprintf(stderr, " before: "); |
||
588 | brw_disasm(stderr, orig, brw->gen); |
||
589 | |||
590 | fprintf(stderr, " after: "); |
||
591 | brw_disasm(stderr, uncompacted, brw->gen); |
||
592 | |||
593 | uint32_t *before_bits = (uint32_t *)orig; |
||
594 | uint32_t *after_bits = (uint32_t *)uncompacted; |
||
595 | printf(" changed bits:\n"); |
||
596 | for (int i = 0; i < 128; i++) { |
||
597 | uint32_t before = before_bits[i / 32] & (1 << (i & 31)); |
||
598 | uint32_t after = after_bits[i / 32] & (1 << (i & 31)); |
||
599 | |||
600 | if (before != after) { |
||
601 | printf(" bit %d, %s to %s\n", i, |
||
602 | before ? "set" : "unset", |
||
603 | after ? "set" : "unset"); |
||
604 | } |
||
605 | } |
||
606 | } |
||
607 | |||
608 | static int |
||
609 | compacted_between(int old_ip, int old_target_ip, int *compacted_counts) |
||
610 | { |
||
611 | int this_compacted_count = compacted_counts[old_ip]; |
||
612 | int target_compacted_count = compacted_counts[old_target_ip]; |
||
613 | return target_compacted_count - this_compacted_count; |
||
614 | } |
||
615 | |||
616 | static void |
||
617 | update_uip_jip(struct brw_instruction *insn, int this_old_ip, |
||
618 | int *compacted_counts) |
||
619 | { |
||
620 | int target_old_ip; |
||
621 | |||
622 | target_old_ip = this_old_ip + insn->bits3.break_cont.jip; |
||
623 | insn->bits3.break_cont.jip -= compacted_between(this_old_ip, |
||
624 | target_old_ip, |
||
625 | compacted_counts); |
||
626 | |||
627 | target_old_ip = this_old_ip + insn->bits3.break_cont.uip; |
||
628 | insn->bits3.break_cont.uip -= compacted_between(this_old_ip, |
||
629 | target_old_ip, |
||
630 | compacted_counts); |
||
631 | } |
||
632 | |||
633 | void |
||
634 | brw_init_compaction_tables(struct brw_context *brw) |
||
635 | { |
||
636 | assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0); |
||
637 | assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); |
||
638 | assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); |
||
639 | assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0); |
||
640 | assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0); |
||
641 | assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0); |
||
642 | assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0); |
||
643 | assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0); |
||
644 | |||
645 | switch (brw->gen) { |
||
646 | case 7: |
||
647 | control_index_table = gen7_control_index_table; |
||
648 | datatype_table = gen7_datatype_table; |
||
649 | subreg_table = gen7_subreg_table; |
||
650 | src_index_table = gen7_src_index_table; |
||
651 | break; |
||
652 | case 6: |
||
653 | control_index_table = gen6_control_index_table; |
||
654 | datatype_table = gen6_datatype_table; |
||
655 | subreg_table = gen6_subreg_table; |
||
656 | src_index_table = gen6_src_index_table; |
||
657 | break; |
||
658 | default: |
||
659 | return; |
||
660 | } |
||
661 | } |
||
662 | |||
663 | void |
||
664 | brw_compact_instructions(struct brw_compile *p) |
||
665 | { |
||
666 | struct brw_context *brw = p->brw; |
||
667 | void *store = p->store; |
||
668 | /* For an instruction at byte offset 8*i before compaction, this is the number |
||
669 | * of compacted instructions that preceded it. |
||
670 | */ |
||
671 | int compacted_counts[p->next_insn_offset / 8]; |
||
672 | /* For an instruction at byte offset 8*i after compaction, this is the |
||
673 | * 8-byte offset it was at before compaction. |
||
674 | */ |
||
675 | int old_ip[p->next_insn_offset / 8]; |
||
676 | |||
677 | if (brw->gen < 6) |
||
678 | return; |
||
679 | |||
680 | int src_offset; |
||
681 | int offset = 0; |
||
682 | int compacted_count = 0; |
||
683 | for (src_offset = 0; src_offset < p->nr_insn * 16;) { |
||
684 | struct brw_instruction *src = store + src_offset; |
||
685 | void *dst = store + offset; |
||
686 | |||
687 | old_ip[offset / 8] = src_offset / 8; |
||
688 | compacted_counts[src_offset / 8] = compacted_count; |
||
689 | |||
690 | struct brw_instruction saved = *src; |
||
691 | |||
692 | if (!src->header.cmpt_control && |
||
693 | brw_try_compact_instruction(p, dst, src)) { |
||
694 | compacted_count++; |
||
695 | |||
696 | if (INTEL_DEBUG) { |
||
697 | struct brw_instruction uncompacted; |
||
698 | brw_uncompact_instruction(brw, &uncompacted, dst); |
||
699 | if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { |
||
700 | brw_debug_compact_uncompact(brw, &saved, &uncompacted); |
||
701 | } |
||
702 | } |
||
703 | |||
704 | offset += 8; |
||
705 | src_offset += 16; |
||
706 | } else { |
||
707 | int size = src->header.cmpt_control ? 8 : 16; |
||
708 | |||
709 | /* It appears that the end of thread SEND instruction needs to be |
||
710 | * aligned, or the GPU hangs. |
||
711 | */ |
||
712 | if ((src->header.opcode == BRW_OPCODE_SEND || |
||
713 | src->header.opcode == BRW_OPCODE_SENDC) && |
||
714 | src->bits3.generic.end_of_thread && |
||
715 | (offset & 8) != 0) { |
||
716 | struct brw_compact_instruction *align = store + offset; |
||
717 | memset(align, 0, sizeof(*align)); |
||
718 | align->dw0.opcode = BRW_OPCODE_NOP; |
||
719 | align->dw0.cmpt_ctrl = 1; |
||
720 | offset += 8; |
||
721 | old_ip[offset / 8] = src_offset / 8; |
||
722 | dst = store + offset; |
||
723 | } |
||
724 | |||
725 | /* If we didn't compact this intruction, we need to move it down into |
||
726 | * place. |
||
727 | */ |
||
728 | if (offset != src_offset) { |
||
729 | memmove(dst, src, size); |
||
730 | } |
||
731 | offset += size; |
||
732 | src_offset += size; |
||
733 | } |
||
734 | } |
||
735 | |||
736 | /* Fix up control flow offsets. */ |
||
737 | p->next_insn_offset = offset; |
||
738 | for (offset = 0; offset < p->next_insn_offset;) { |
||
739 | struct brw_instruction *insn = store + offset; |
||
740 | int this_old_ip = old_ip[offset / 8]; |
||
741 | int this_compacted_count = compacted_counts[this_old_ip]; |
||
742 | int target_old_ip, target_compacted_count; |
||
743 | |||
744 | switch (insn->header.opcode) { |
||
745 | case BRW_OPCODE_BREAK: |
||
746 | case BRW_OPCODE_CONTINUE: |
||
747 | case BRW_OPCODE_HALT: |
||
748 | update_uip_jip(insn, this_old_ip, compacted_counts); |
||
749 | break; |
||
750 | |||
751 | case BRW_OPCODE_IF: |
||
752 | case BRW_OPCODE_ELSE: |
||
753 | case BRW_OPCODE_ENDIF: |
||
754 | case BRW_OPCODE_WHILE: |
||
755 | if (brw->gen == 6) { |
||
756 | target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count; |
||
757 | target_compacted_count = compacted_counts[target_old_ip]; |
||
758 | insn->bits1.branch_gen6.jump_count -= (target_compacted_count - |
||
759 | this_compacted_count); |
||
760 | } else { |
||
761 | update_uip_jip(insn, this_old_ip, compacted_counts); |
||
762 | } |
||
763 | break; |
||
764 | } |
||
765 | |||
766 | if (insn->header.cmpt_control) { |
||
767 | offset += 8; |
||
768 | } else { |
||
769 | offset += 16; |
||
770 | } |
||
771 | } |
||
772 | |||
773 | /* p->nr_insn is counting the number of uncompacted instructions still, so |
||
774 | * divide. We do want to be sure there's a valid instruction in any |
||
775 | * alignment padding, so that the next compression pass (for the FS 8/16 |
||
776 | * compile passes) parses correctly. |
||
777 | */ |
||
778 | if (p->next_insn_offset & 8) { |
||
779 | struct brw_compact_instruction *align = store + offset; |
||
780 | memset(align, 0, sizeof(*align)); |
||
781 | align->dw0.opcode = BRW_OPCODE_NOP; |
||
782 | align->dw0.cmpt_ctrl = 1; |
||
783 | p->next_insn_offset += 8; |
||
784 | } |
||
785 | p->nr_insn = p->next_insn_offset / 16; |
||
786 | |||
787 | if (0) { |
||
788 | fprintf(stdout, "dumping compacted program\n"); |
||
789 | brw_dump_compile(p, stdout, 0, p->next_insn_offset); |
||
790 | |||
791 | int cmp = 0; |
||
792 | for (offset = 0; offset < p->next_insn_offset;) { |
||
793 | struct brw_instruction *insn = store + offset; |
||
794 | |||
795 | if (insn->header.cmpt_control) { |
||
796 | offset += 8; |
||
797 | cmp++; |
||
798 | } else { |
||
799 | offset += 16; |
||
800 | } |
||
801 | } |
||
802 | fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8, |
||
803 | cmp * 8 * 100 / (offset + cmp * 8)); |
||
804 | } |
||
805 | }>>>>><>><>>=>><>><>><>><>><>><>><>><>=>>>><>><>><>>><>>><>><>><> |