Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Mesa 3-D graphics library |
||
3 | * |
||
4 | * Copyright (C) 2012-2013 LunarG, Inc. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included |
||
14 | * in all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
22 | * DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: |
||
25 | * Chia-I Wu |
||
26 | */ |
||
27 | |||
28 | #include "toy_compiler.h" |
||
29 | |||
30 | #define CG_REG_SHIFT 5 |
||
31 | #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT) |
||
32 | |||
33 | struct codegen { |
||
34 | const struct ilo_dev *dev; |
||
35 | const struct toy_inst *inst; |
||
36 | int pc; |
||
37 | |||
38 | unsigned flag_reg_num; |
||
39 | unsigned flag_sub_reg_num; |
||
40 | |||
41 | struct codegen_dst { |
||
42 | unsigned file; |
||
43 | unsigned type; |
||
44 | bool indirect; |
||
45 | unsigned indirect_subreg; |
||
46 | unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
||
47 | |||
48 | unsigned horz_stride; |
||
49 | |||
50 | unsigned writemask; |
||
51 | } dst; |
||
52 | |||
53 | struct codegen_src { |
||
54 | unsigned file; |
||
55 | unsigned type; |
||
56 | bool indirect; |
||
57 | unsigned indirect_subreg; |
||
58 | unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
||
59 | |||
60 | unsigned vert_stride; |
||
61 | unsigned width; |
||
62 | unsigned horz_stride; |
||
63 | |||
64 | unsigned swizzle[4]; |
||
65 | bool absolute; |
||
66 | bool negate; |
||
67 | } src[3]; |
||
68 | }; |
||
69 | |||
70 | /* |
||
71 | * From the Sandy Bridge PRM, volume 4 part 2, page 107-108: |
||
72 | * |
||
73 | * "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up |
||
74 | * result forms bits [88:77], the source 0 register region fields, of the |
||
75 | * 128-bit instruction word." |
||
76 | * |
||
77 | * "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit |
||
78 | * table-look-up result forms bits [100:96], [68,64] and [52,48] of the |
||
79 | * 128-bit instruction word." |
||
80 | * |
||
81 | * "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit |
||
82 | * table-look-up result forms bits [63:61] and [46, 32] of the 128-bit |
||
83 | * instruction word." |
||
84 | * |
||
85 | * "(ControlIndex) The 5-bit index for data type fields. The 17-bit |
||
86 | * table-look-up result forms bits[31], and [23, 8] of the 128-bit |
||
87 | * instruction word." |
||
88 | */ |
||
89 | static const struct toy_compaction_table toy_compaction_table_gen6 = { |
||
90 | .control = { |
||
91 | [0] = 0x00000, /* 00000000000000000 */ |
||
92 | [1] = 0x08000, /* 01000000000000000 */ |
||
93 | [2] = 0x06000, /* 00110000000000000 */ |
||
94 | [3] = 0x00100, /* 00000000100000000 */ |
||
95 | [4] = 0x02000, /* 00010000000000000 */ |
||
96 | [5] = 0x01100, /* 00001000100000000 */ |
||
97 | [6] = 0x00102, /* 00000000100000010 */ |
||
98 | [7] = 0x00002, /* 00000000000000010 */ |
||
99 | [8] = 0x08100, /* 01000000100000000 */ |
||
100 | [9] = 0x0a000, /* 01010000000000000 */ |
||
101 | [10] = 0x16000, /* 10110000000000000 */ |
||
102 | [11] = 0x04000, /* 00100000000000000 */ |
||
103 | [12] = 0x1a000, /* 11010000000000000 */ |
||
104 | [13] = 0x18000, /* 11000000000000000 */ |
||
105 | [14] = 0x09100, /* 01001000100000000 */ |
||
106 | [15] = 0x08008, /* 01000000000001000 */ |
||
107 | [16] = 0x08004, /* 01000000000000100 */ |
||
108 | [17] = 0x00008, /* 00000000000001000 */ |
||
109 | [18] = 0x00004, /* 00000000000000100 */ |
||
110 | [19] = 0x01100, /* 00111000100000000 */ |
||
111 | [20] = 0x01102, /* 00001000100000010 */ |
||
112 | [21] = 0x06100, /* 00110000100000000 */ |
||
113 | [22] = 0x06001, /* 00110000000000001 */ |
||
114 | [23] = 0x04001, /* 00100000000000001 */ |
||
115 | [24] = 0x06002, /* 00110000000000010 */ |
||
116 | [25] = 0x06005, /* 00110000000000101 */ |
||
117 | [26] = 0x06009, /* 00110000000001001 */ |
||
118 | [27] = 0x06010, /* 00110000000010000 */ |
||
119 | [28] = 0x06003, /* 00110000000000011 */ |
||
120 | [29] = 0x06004, /* 00110000000000100 */ |
||
121 | [30] = 0x06108, /* 00110000100001000 */ |
||
122 | [31] = 0x04009, /* 00100000000001001 */ |
||
123 | }, |
||
124 | .datatype = { |
||
125 | [0] = 0x09c00, /* 001001110000000000 */ |
||
126 | [1] = 0x08c20, /* 001000110000100000 */ |
||
127 | [2] = 0x09c01, /* 001001110000000001 */ |
||
128 | [3] = 0x08060, /* 001000000001100000 */ |
||
129 | [4] = 0x0ad29, /* 001010110100101001 */ |
||
130 | [5] = 0x081ad, /* 001000000110101101 */ |
||
131 | [6] = 0x0c62c, /* 001100011000101100 */ |
||
132 | [7] = 0x0bdad, /* 001011110110101101 */ |
||
133 | [8] = 0x081ec, /* 001000000111101100 */ |
||
134 | [9] = 0x08061, /* 001000000001100001 */ |
||
135 | [10] = 0x08ca5, /* 001000110010100101 */ |
||
136 | [11] = 0x08041, /* 001000000001000001 */ |
||
137 | [12] = 0x08231, /* 001000001000110001 */ |
||
138 | [13] = 0x08229, /* 001000001000101001 */ |
||
139 | [14] = 0x08020, /* 001000000000100000 */ |
||
140 | [15] = 0x08232, /* 001000001000110010 */ |
||
141 | [16] = 0x0a529, /* 001010010100101001 */ |
||
142 | [17] = 0x0b4a5, /* 001011010010100101 */ |
||
143 | [18] = 0x081a5, /* 001000000110100101 */ |
||
144 | [19] = 0x0c629, /* 001100011000101001 */ |
||
145 | [20] = 0x0b62c, /* 001011011000101100 */ |
||
146 | [21] = 0x0b5a5, /* 001011010110100101 */ |
||
147 | [22] = 0x0bda5, /* 001011110110100101 */ |
||
148 | [23] = 0x0f1bd, /* 001111011110111101 */ |
||
149 | [24] = 0x0f1bc, /* 001111011110111100 */ |
||
150 | [25] = 0x0f1bd, /* 001111011110111101 */ |
||
151 | [26] = 0x0f19d, /* 001111011110011101 */ |
||
152 | [27] = 0x0f1be, /* 001111011110111110 */ |
||
153 | [28] = 0x08021, /* 001000000000100001 */ |
||
154 | [29] = 0x08022, /* 001000000000100010 */ |
||
155 | [30] = 0x09fdd, /* 001001111111011101 */ |
||
156 | [31] = 0x083be, /* 001000001110111110 */ |
||
157 | }, |
||
158 | .subreg = { |
||
159 | [0] = 0x0000, /* 000000000000000 */ |
||
160 | [1] = 0x0004, /* 000000000000100 */ |
||
161 | [2] = 0x0180, /* 000000110000000 */ |
||
162 | [3] = 0x1000, /* 111000000000000 */ |
||
163 | [4] = 0x3c08, /* 011110000001000 */ |
||
164 | [5] = 0x0400, /* 000010000000000 */ |
||
165 | [6] = 0x0010, /* 000000000010000 */ |
||
166 | [7] = 0x0c0c, /* 000110000001100 */ |
||
167 | [8] = 0x1000, /* 001000000000000 */ |
||
168 | [9] = 0x0200, /* 000001000000000 */ |
||
169 | [10] = 0x0294, /* 000001010010100 */ |
||
170 | [11] = 0x0056, /* 000000001010110 */ |
||
171 | [12] = 0x2000, /* 010000000000000 */ |
||
172 | [13] = 0x6000, /* 110000000000000 */ |
||
173 | [14] = 0x0800, /* 000100000000000 */ |
||
174 | [15] = 0x0080, /* 000000010000000 */ |
||
175 | [16] = 0x0008, /* 000000000001000 */ |
||
176 | [17] = 0x4000, /* 100000000000000 */ |
||
177 | [18] = 0x0280, /* 000001010000000 */ |
||
178 | [19] = 0x1400, /* 001010000000000 */ |
||
179 | [20] = 0x1800, /* 001100000000000 */ |
||
180 | [21] = 0x0054, /* 000000001010100 */ |
||
181 | [22] = 0x5a94, /* 101101010010100 */ |
||
182 | [23] = 0x2800, /* 010100000000000 */ |
||
183 | [24] = 0x008f, /* 000000010001111 */ |
||
184 | [25] = 0x3000, /* 011000000000000 */ |
||
185 | [26] = 0x1c00, /* 111110000000000 */ |
||
186 | [27] = 0x5000, /* 101000000000000 */ |
||
187 | [28] = 0x000f, /* 000000000001111 */ |
||
188 | [29] = 0x088f, /* 000100010001111 */ |
||
189 | [30] = 0x108f, /* 001000010001111 */ |
||
190 | [31] = 0x0c00, /* 000110000000000 */ |
||
191 | }, |
||
192 | .src = { |
||
193 | [0] = 0x000, /* 000000000000 */ |
||
194 | [1] = 0x588, /* 010110001000 */ |
||
195 | [2] = 0x468, /* 010001101000 */ |
||
196 | [3] = 0x228, /* 001000101000 */ |
||
197 | [4] = 0x690, /* 011010010000 */ |
||
198 | [5] = 0x120, /* 000100100000 */ |
||
199 | [6] = 0x46c, /* 010001101100 */ |
||
200 | [7] = 0x510, /* 010101110000 */ |
||
201 | [8] = 0x618, /* 011001111000 */ |
||
202 | [9] = 0x328, /* 001100101000 */ |
||
203 | [10] = 0x58c, /* 010110001100 */ |
||
204 | [11] = 0x220, /* 001000100000 */ |
||
205 | [12] = 0x58a, /* 010110001010 */ |
||
206 | [13] = 0x002, /* 000000000010 */ |
||
207 | [14] = 0x550, /* 010101010000 */ |
||
208 | [15] = 0x568, /* 010101101000 */ |
||
209 | [16] = 0xf4c, /* 111101001100 */ |
||
210 | [17] = 0xf2c, /* 111100101100 */ |
||
211 | [18] = 0x610, /* 011001110000 */ |
||
212 | [19] = 0x589, /* 010110001001 */ |
||
213 | [20] = 0x558, /* 010101011000 */ |
||
214 | [21] = 0x348, /* 001101001000 */ |
||
215 | [22] = 0x42c, /* 010000101100 */ |
||
216 | [23] = 0x400, /* 010000000000 */ |
||
217 | [24] = 0x310, /* 001101110000 */ |
||
218 | [25] = 0x310, /* 001100010000 */ |
||
219 | [26] = 0x300, /* 001100000000 */ |
||
220 | [27] = 0x46a, /* 010001101010 */ |
||
221 | [28] = 0x318, /* 001101111000 */ |
||
222 | [29] = 0x010, /* 000001110000 */ |
||
223 | [30] = 0x320, /* 001100100000 */ |
||
224 | [31] = 0x350, /* 001101010000 */ |
||
225 | }, |
||
226 | }; |
||
227 | |||
228 | /* |
||
229 | * From the Ivy Bridge PRM, volume 4 part 3, page 128: |
||
230 | * |
||
231 | * "(Src0Index) Lookup one of 32 12-bit values. That value is used (from |
||
232 | * MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride, |
||
233 | * Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields." |
||
234 | * |
||
235 | * "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from |
||
236 | * MSB to LSB) for various fields for Src1, Src0, and Dst, including |
||
237 | * ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending |
||
238 | * on AddrMode and AccessMode. |
||
239 | * |
||
240 | * "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used |
||
241 | * (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType, |
||
242 | * Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and |
||
243 | * Src1.RegType bit fields." |
||
244 | * |
||
245 | * "(ControlIndex) Lookup one of 32 19-bit values. That value is used |
||
246 | * (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate, |
||
247 | * ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl, |
||
248 | * and AccessMode bit fields." |
||
249 | */ |
||
250 | static const struct toy_compaction_table toy_compaction_table_gen7 = { |
||
251 | .control = { |
||
252 | [0] = 0x00002, /* 0000000000000000010 */ |
||
253 | [1] = 0x04000, /* 0000100000000000000 */ |
||
254 | [2] = 0x04001, /* 0000100000000000001 */ |
||
255 | [3] = 0x04002, /* 0000100000000000010 */ |
||
256 | [4] = 0x04003, /* 0000100000000000011 */ |
||
257 | [5] = 0x04004, /* 0000100000000000100 */ |
||
258 | [6] = 0x04005, /* 0000100000000000101 */ |
||
259 | [7] = 0x04007, /* 0000100000000000111 */ |
||
260 | [8] = 0x04008, /* 0000100000000001000 */ |
||
261 | [9] = 0x04009, /* 0000100000000001001 */ |
||
262 | [10] = 0x0400d, /* 0000100000000001101 */ |
||
263 | [11] = 0x06000, /* 0000110000000000000 */ |
||
264 | [12] = 0x06001, /* 0000110000000000001 */ |
||
265 | [13] = 0x06002, /* 0000110000000000010 */ |
||
266 | [14] = 0x06003, /* 0000110000000000011 */ |
||
267 | [15] = 0x06004, /* 0000110000000000100 */ |
||
268 | [16] = 0x06005, /* 0000110000000000101 */ |
||
269 | [17] = 0x06007, /* 0000110000000000111 */ |
||
270 | [18] = 0x06009, /* 0000110000000001001 */ |
||
271 | [19] = 0x0600d, /* 0000110000000001101 */ |
||
272 | [20] = 0x06010, /* 0000110000000010000 */ |
||
273 | [21] = 0x06100, /* 0000110000100000000 */ |
||
274 | [22] = 0x08000, /* 0001000000000000000 */ |
||
275 | [23] = 0x08002, /* 0001000000000000010 */ |
||
276 | [24] = 0x08004, /* 0001000000000000100 */ |
||
277 | [25] = 0x08100, /* 0001000000100000000 */ |
||
278 | [26] = 0x16000, /* 0010110000000000000 */ |
||
279 | [27] = 0x16010, /* 0010110000000010000 */ |
||
280 | [28] = 0x18000, /* 0011000000000000000 */ |
||
281 | [29] = 0x18100, /* 0011000000100000000 */ |
||
282 | [30] = 0x28000, /* 0101000000000000000 */ |
||
283 | [31] = 0x28100, /* 0101000000100000000 */ |
||
284 | }, |
||
285 | .datatype = { |
||
286 | [0] = 0x08001, /* 001000000000000001 */ |
||
287 | [1] = 0x08020, /* 001000000000100000 */ |
||
288 | [2] = 0x08021, /* 001000000000100001 */ |
||
289 | [3] = 0x08061, /* 001000000001100001 */ |
||
290 | [4] = 0x080bd, /* 001000000010111101 */ |
||
291 | [5] = 0x082fd, /* 001000001011111101 */ |
||
292 | [6] = 0x083a1, /* 001000001110100001 */ |
||
293 | [7] = 0x083a5, /* 001000001110100101 */ |
||
294 | [8] = 0x083bd, /* 001000001110111101 */ |
||
295 | [9] = 0x08421, /* 001000010000100001 */ |
||
296 | [10] = 0x08c20, /* 001000110000100000 */ |
||
297 | [11] = 0x08c21, /* 001000110000100001 */ |
||
298 | [12] = 0x094a5, /* 001001010010100101 */ |
||
299 | [13] = 0x09ca4, /* 001001110010100100 */ |
||
300 | [14] = 0x09ca5, /* 001001110010100101 */ |
||
301 | [15] = 0x0f3bd, /* 001111001110111101 */ |
||
302 | [16] = 0x0f79d, /* 001111011110011101 */ |
||
303 | [17] = 0x0f7bc, /* 001111011110111100 */ |
||
304 | [18] = 0x0f7bd, /* 001111011110111101 */ |
||
305 | [19] = 0x0ffbc, /* 001111111110111100 */ |
||
306 | [20] = 0x0020c, /* 000000001000001100 */ |
||
307 | [21] = 0x0803d, /* 001000000000111101 */ |
||
308 | [22] = 0x080a5, /* 001000000010100101 */ |
||
309 | [23] = 0x08420, /* 001000010000100000 */ |
||
310 | [24] = 0x094a4, /* 001001010010100100 */ |
||
311 | [25] = 0x09c84, /* 001001110010000100 */ |
||
312 | [26] = 0x0a509, /* 001010010100001001 */ |
||
313 | [27] = 0x0dfbd, /* 001101111110111101 */ |
||
314 | [28] = 0x0ffbd, /* 001111111110111101 */ |
||
315 | [29] = 0x0bdac, /* 001011110110101100 */ |
||
316 | [30] = 0x0a528, /* 001010010100101000 */ |
||
317 | [31] = 0x0ad28, /* 001010110100101000 */ |
||
318 | }, |
||
319 | .subreg = { |
||
320 | [0] = 0x0000, /* 000000000000000 */ |
||
321 | [1] = 0x0001, /* 000000000000001 */ |
||
322 | [2] = 0x0008, /* 000000000001000 */ |
||
323 | [3] = 0x000f, /* 000000000001111 */ |
||
324 | [4] = 0x0010, /* 000000000010000 */ |
||
325 | [5] = 0x0080, /* 000000010000000 */ |
||
326 | [6] = 0x0100, /* 000000100000000 */ |
||
327 | [7] = 0x0180, /* 000000110000000 */ |
||
328 | [8] = 0x0200, /* 000001000000000 */ |
||
329 | [9] = 0x0210, /* 000001000010000 */ |
||
330 | [10] = 0x0280, /* 000001010000000 */ |
||
331 | [11] = 0x1000, /* 001000000000000 */ |
||
332 | [12] = 0x1001, /* 001000000000001 */ |
||
333 | [13] = 0x1081, /* 001000010000001 */ |
||
334 | [14] = 0x1082, /* 001000010000010 */ |
||
335 | [15] = 0x1083, /* 001000010000011 */ |
||
336 | [16] = 0x1084, /* 001000010000100 */ |
||
337 | [17] = 0x1087, /* 001000010000111 */ |
||
338 | [18] = 0x1088, /* 001000010001000 */ |
||
339 | [19] = 0x108e, /* 001000010001110 */ |
||
340 | [20] = 0x108f, /* 001000010001111 */ |
||
341 | [21] = 0x1180, /* 001000110000000 */ |
||
342 | [22] = 0x11e8, /* 001000111101000 */ |
||
343 | [23] = 0x2000, /* 010000000000000 */ |
||
344 | [24] = 0x2180, /* 010000110000000 */ |
||
345 | [25] = 0x3000, /* 011000000000000 */ |
||
346 | [26] = 0x3c87, /* 011110010000111 */ |
||
347 | [27] = 0x4000, /* 100000000000000 */ |
||
348 | [28] = 0x5000, /* 101000000000000 */ |
||
349 | [29] = 0x6000, /* 110000000000000 */ |
||
350 | [30] = 0x7000, /* 111000000000000 */ |
||
351 | [31] = 0x701c, /* 111000000011100 */ |
||
352 | }, |
||
353 | .src = { |
||
354 | [0] = 0x000, /* 000000000000 */ |
||
355 | [1] = 0x002, /* 000000000010 */ |
||
356 | [2] = 0x010, /* 000000010000 */ |
||
357 | [3] = 0x012, /* 000000010010 */ |
||
358 | [4] = 0x018, /* 000000011000 */ |
||
359 | [5] = 0x020, /* 000000100000 */ |
||
360 | [6] = 0x028, /* 000000101000 */ |
||
361 | [7] = 0x048, /* 000001001000 */ |
||
362 | [8] = 0x050, /* 000001010000 */ |
||
363 | [9] = 0x070, /* 000001110000 */ |
||
364 | [10] = 0x078, /* 000001111000 */ |
||
365 | [11] = 0x300, /* 001100000000 */ |
||
366 | [12] = 0x302, /* 001100000010 */ |
||
367 | [13] = 0x308, /* 001100001000 */ |
||
368 | [14] = 0x310, /* 001100010000 */ |
||
369 | [15] = 0x312, /* 001100010010 */ |
||
370 | [16] = 0x320, /* 001100100000 */ |
||
371 | [17] = 0x328, /* 001100101000 */ |
||
372 | [18] = 0x338, /* 001100111000 */ |
||
373 | [19] = 0x340, /* 001101000000 */ |
||
374 | [20] = 0x342, /* 001101000010 */ |
||
375 | [21] = 0x348, /* 001101001000 */ |
||
376 | [22] = 0x350, /* 001101010000 */ |
||
377 | [23] = 0x360, /* 001101100000 */ |
||
378 | [24] = 0x368, /* 001101101000 */ |
||
379 | [25] = 0x370, /* 001101110000 */ |
||
380 | [26] = 0x371, /* 001101110001 */ |
||
381 | [27] = 0x378, /* 001101111000 */ |
||
382 | [28] = 0x468, /* 010001101000 */ |
||
383 | [29] = 0x469, /* 010001101001 */ |
||
384 | [30] = 0x46a, /* 010001101010 */ |
||
385 | [31] = 0x588, /* 010110001000 */ |
||
386 | }, |
||
387 | }; |
||
388 | |||
389 | static const struct toy_compaction_table toy_compaction_table_gen8 = { |
||
390 | .control = { |
||
391 | }, |
||
392 | .datatype = { |
||
393 | }, |
||
394 | .subreg = { |
||
395 | }, |
||
396 | .src = { |
||
397 | }, |
||
398 | .control_3src = { |
||
399 | }, |
||
400 | .source_3src = { |
||
401 | }, |
||
402 | }; |
||
403 | |||
404 | const struct toy_compaction_table * |
||
405 | toy_compiler_get_compaction_table(const struct ilo_dev *dev) |
||
406 | { |
||
407 | switch (ilo_dev_gen(dev)) { |
||
408 | case ILO_GEN(8): |
||
409 | return &toy_compaction_table_gen8; |
||
410 | case ILO_GEN(7.5): |
||
411 | case ILO_GEN(7): |
||
412 | return &toy_compaction_table_gen7; |
||
413 | case ILO_GEN(6): |
||
414 | return &toy_compaction_table_gen6; |
||
415 | default: |
||
416 | assert(!"unsupported gen"); |
||
417 | return NULL; |
||
418 | } |
||
419 | } |
||
420 | |||
421 | /** |
||
422 | * Return true if the source operand is null. |
||
423 | */ |
||
424 | static bool |
||
425 | src_is_null(const struct codegen *cg, int idx) |
||
426 | { |
||
427 | const struct codegen_src *src = &cg->src[idx]; |
||
428 | |||
429 | return (src->file == GEN6_FILE_ARF && |
||
430 | src->origin == GEN6_ARF_NULL << CG_REG_SHIFT); |
||
431 | } |
||
432 | |||
433 | /** |
||
434 | * Translate a source operand to DW2 or DW3 of the 1-src/2-src format. |
||
435 | */ |
||
436 | static uint32_t |
||
437 | translate_src_gen6(const struct codegen *cg, int idx) |
||
438 | { |
||
439 | const struct codegen_src *src = &cg->src[idx]; |
||
440 | uint32_t dw; |
||
441 | |||
442 | ILO_DEV_ASSERT(cg->dev, 6, 8); |
||
443 | |||
444 | /* special treatment may be needed if any of the operand is immediate */ |
||
445 | if (cg->src[0].file == GEN6_FILE_IMM) { |
||
446 | assert(!cg->src[0].absolute && !cg->src[0].negate); |
||
447 | |||
448 | /* only the last src operand can be an immediate unless it is Gen8+ */ |
||
449 | assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1)); |
||
450 | |||
451 | if (!src_is_null(cg, 1)) |
||
452 | return cg->src[idx].origin; |
||
453 | |||
454 | if (idx == 0) { |
||
455 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
456 | return cg->src[1].type << 27 | |
||
457 | cg->src[1].file << 25; |
||
458 | } else { |
||
459 | return cg->flag_sub_reg_num << 25; |
||
460 | } |
||
461 | } else { |
||
462 | return cg->src[0].origin; |
||
463 | } |
||
464 | } |
||
465 | else if (idx && cg->src[1].file == GEN6_FILE_IMM) { |
||
466 | assert(!cg->src[1].absolute && !cg->src[1].negate); |
||
467 | return cg->src[1].origin; |
||
468 | } |
||
469 | |||
470 | assert(src->file != GEN6_FILE_IMM); |
||
471 | |||
472 | if (src->indirect) { |
||
473 | const int offset = (int) src->origin; |
||
474 | |||
475 | assert(src->file == GEN6_FILE_GRF); |
||
476 | assert(offset < 512 && offset >= -512); |
||
477 | |||
478 | if (cg->inst->access_mode == GEN6_ALIGN_16) { |
||
479 | assert(src->width == GEN6_WIDTH_4); |
||
480 | assert(src->horz_stride == GEN6_HORZSTRIDE_1); |
||
481 | |||
482 | /* the lower 4 bits are reserved for the swizzle_[xy] */ |
||
483 | assert(!(src->origin & 0xf)); |
||
484 | |||
485 | dw = src->vert_stride << 21 | |
||
486 | src->swizzle[3] << 18 | |
||
487 | src->swizzle[2] << 16 | |
||
488 | GEN6_ADDRMODE_INDIRECT << 15 | |
||
489 | src->negate << 14 | |
||
490 | src->absolute << 13 | |
||
491 | src->swizzle[1] << 2 | |
||
492 | src->swizzle[0]; |
||
493 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
494 | dw |= src->indirect_subreg << 9 | |
||
495 | (src->origin & 0x1f0); |
||
496 | } else { |
||
497 | dw |= src->indirect_subreg << 10 | |
||
498 | (src->origin & 0x3f0); |
||
499 | } |
||
500 | } |
||
501 | else { |
||
502 | assert(src->swizzle[0] == TOY_SWIZZLE_X && |
||
503 | src->swizzle[1] == TOY_SWIZZLE_Y && |
||
504 | src->swizzle[2] == TOY_SWIZZLE_Z && |
||
505 | src->swizzle[3] == TOY_SWIZZLE_W); |
||
506 | |||
507 | dw = src->vert_stride << 21 | |
||
508 | src->width << 18 | |
||
509 | src->horz_stride << 16 | |
||
510 | GEN6_ADDRMODE_INDIRECT << 15 | |
||
511 | src->negate << 14 | |
||
512 | src->absolute << 13; |
||
513 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
514 | dw |= src->indirect_subreg << 9 | |
||
515 | (src->origin & 0x1ff); |
||
516 | } else { |
||
517 | dw |= src->indirect_subreg << 10 | |
||
518 | (src->origin & 0x3ff); |
||
519 | } |
||
520 | } |
||
521 | } |
||
522 | else { |
||
523 | switch (src->file) { |
||
524 | case GEN6_FILE_ARF: |
||
525 | break; |
||
526 | case GEN6_FILE_GRF: |
||
527 | assert(CG_REG_NUM(src->origin) < 128); |
||
528 | break; |
||
529 | case GEN6_FILE_MRF: |
||
530 | assert(cg->inst->opcode == GEN6_OPCODE_SEND || |
||
531 | cg->inst->opcode == GEN6_OPCODE_SENDC); |
||
532 | assert(CG_REG_NUM(src->origin) < 16); |
||
533 | break; |
||
534 | case GEN6_FILE_IMM: |
||
535 | default: |
||
536 | assert(!"invalid src file"); |
||
537 | break; |
||
538 | } |
||
539 | |||
540 | if (cg->inst->access_mode == GEN6_ALIGN_16) { |
||
541 | assert(src->width == GEN6_WIDTH_4); |
||
542 | assert(src->horz_stride == GEN6_HORZSTRIDE_1); |
||
543 | |||
544 | /* the lower 4 bits are reserved for the swizzle_[xy] */ |
||
545 | assert(!(src->origin & 0xf)); |
||
546 | |||
547 | dw = src->vert_stride << 21 | |
||
548 | src->swizzle[3] << 18 | |
||
549 | src->swizzle[2] << 16 | |
||
550 | GEN6_ADDRMODE_DIRECT << 15 | |
||
551 | src->negate << 14 | |
||
552 | src->absolute << 13 | |
||
553 | src->origin | |
||
554 | src->swizzle[1] << 2 | |
||
555 | src->swizzle[0]; |
||
556 | } |
||
557 | else { |
||
558 | assert(src->swizzle[0] == TOY_SWIZZLE_X && |
||
559 | src->swizzle[1] == TOY_SWIZZLE_Y && |
||
560 | src->swizzle[2] == TOY_SWIZZLE_Z && |
||
561 | src->swizzle[3] == TOY_SWIZZLE_W); |
||
562 | |||
563 | dw = src->vert_stride << 21 | |
||
564 | src->width << 18 | |
||
565 | src->horz_stride << 16 | |
||
566 | GEN6_ADDRMODE_DIRECT << 15 | |
||
567 | src->negate << 14 | |
||
568 | src->absolute << 13 | |
||
569 | src->origin; |
||
570 | } |
||
571 | } |
||
572 | |||
573 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
574 | const bool indirect_origin_bit9 = (cg->dst.indirect) ? |
||
575 | (src->origin & 0x200) : 0; |
||
576 | |||
577 | if (idx == 0) { |
||
578 | dw |= indirect_origin_bit9 << 31 | |
||
579 | cg->src[1].type << 27 | |
||
580 | cg->src[1].file << 25; |
||
581 | } else { |
||
582 | dw |= indirect_origin_bit9 << 25; |
||
583 | } |
||
584 | } else { |
||
585 | if (idx == 0) |
||
586 | dw |= cg->flag_sub_reg_num << 25; |
||
587 | } |
||
588 | |||
589 | return dw; |
||
590 | } |
||
591 | |||
592 | /** |
||
593 | * Translate the destination operand to the higher 16 bits of DW1 of the |
||
594 | * 1-src/2-src format. |
||
595 | */ |
||
596 | static uint16_t |
||
597 | translate_dst_region_gen6(const struct codegen *cg) |
||
598 | { |
||
599 | const struct codegen_dst *dst = &cg->dst; |
||
600 | uint16_t dw1_region; |
||
601 | |||
602 | ILO_DEV_ASSERT(cg->dev, 6, 8); |
||
603 | |||
604 | if (dst->file == GEN6_FILE_IMM) { |
||
605 | /* dst is immediate (JIP) when the opcode is a conditional branch */ |
||
606 | switch (cg->inst->opcode) { |
||
607 | case GEN6_OPCODE_IF: |
||
608 | case GEN6_OPCODE_ELSE: |
||
609 | case GEN6_OPCODE_ENDIF: |
||
610 | case GEN6_OPCODE_WHILE: |
||
611 | assert(dst->type == GEN6_TYPE_W); |
||
612 | dw1_region = (dst->origin & 0xffff); |
||
613 | break; |
||
614 | default: |
||
615 | assert(!"dst cannot be immediate"); |
||
616 | dw1_region = 0; |
||
617 | break; |
||
618 | } |
||
619 | |||
620 | return dw1_region; |
||
621 | } |
||
622 | |||
623 | if (dst->indirect) { |
||
624 | const int offset = (int) dst->origin; |
||
625 | |||
626 | assert(dst->file == GEN6_FILE_GRF); |
||
627 | assert(offset < 512 && offset >= -512); |
||
628 | |||
629 | if (cg->inst->access_mode == GEN6_ALIGN_16) { |
||
630 | /* |
||
631 | * From the Sandy Bridge PRM, volume 4 part 2, page 144: |
||
632 | * |
||
633 | * "Allthough Dst.HorzStride is a don't care for Align16, HW |
||
634 | * needs this to be programmed as 01." |
||
635 | */ |
||
636 | assert(dst->horz_stride == GEN6_HORZSTRIDE_1); |
||
637 | /* the lower 4 bits are reserved for the writemask */ |
||
638 | assert(!(dst->origin & 0xf)); |
||
639 | |||
640 | dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | |
||
641 | dst->horz_stride << 13 | |
||
642 | dst->writemask; |
||
643 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
644 | dw1_region |= dst->indirect_subreg << 9 | |
||
645 | (dst->origin & 0x1f0); |
||
646 | } else { |
||
647 | dw1_region |= dst->indirect_subreg << 10 | |
||
648 | (dst->origin & 0x3f0); |
||
649 | } |
||
650 | } |
||
651 | else { |
||
652 | assert(dst->writemask == TOY_WRITEMASK_XYZW); |
||
653 | |||
654 | dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | |
||
655 | dst->horz_stride << 13; |
||
656 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
657 | dw1_region |= dst->indirect_subreg << 9 | |
||
658 | (dst->origin & 0x1ff); |
||
659 | } else { |
||
660 | dw1_region |= dst->indirect_subreg << 10 | |
||
661 | (dst->origin & 0x3ff); |
||
662 | } |
||
663 | } |
||
664 | } |
||
665 | else { |
||
666 | assert((dst->file == GEN6_FILE_GRF && |
||
667 | CG_REG_NUM(dst->origin) < 128) || |
||
668 | (dst->file == GEN6_FILE_MRF && |
||
669 | CG_REG_NUM(dst->origin) < 16) || |
||
670 | (dst->file == GEN6_FILE_ARF)); |
||
671 | |||
672 | if (cg->inst->access_mode == GEN6_ALIGN_16) { |
||
673 | /* similar to the indirect case */ |
||
674 | assert(dst->horz_stride == GEN6_HORZSTRIDE_1); |
||
675 | assert(!(dst->origin & 0xf)); |
||
676 | |||
677 | dw1_region = GEN6_ADDRMODE_DIRECT << 15 | |
||
678 | dst->horz_stride << 13 | |
||
679 | dst->origin | |
||
680 | dst->writemask; |
||
681 | } |
||
682 | else { |
||
683 | assert(dst->writemask == TOY_WRITEMASK_XYZW); |
||
684 | |||
685 | dw1_region = GEN6_ADDRMODE_DIRECT << 15 | |
||
686 | dst->horz_stride << 13 | |
||
687 | dst->origin; |
||
688 | } |
||
689 | } |
||
690 | |||
691 | return dw1_region; |
||
692 | } |
||
693 | |||
694 | /** |
||
695 | * Translate the destination operand to DW1 of the 1-src/2-src format. |
||
696 | */ |
||
697 | static uint32_t |
||
698 | translate_dst_gen6(const struct codegen *cg) |
||
699 | { |
||
700 | ILO_DEV_ASSERT(cg->dev, 6, 7.5); |
||
701 | |||
702 | return translate_dst_region_gen6(cg) << 16 | |
||
703 | cg->src[1].type << 12 | |
||
704 | cg->src[1].file << 10 | |
||
705 | cg->src[0].type << 7 | |
||
706 | cg->src[0].file << 5 | |
||
707 | cg->dst.type << 2 | |
||
708 | cg->dst.file; |
||
709 | } |
||
710 | |||
711 | static uint32_t |
||
712 | translate_dst_gen8(const struct codegen *cg) |
||
713 | { |
||
714 | const bool indirect_origin_bit9 = (cg->dst.indirect) ? |
||
715 | (cg->dst.origin & 0x200) : 0; |
||
716 | |||
717 | ILO_DEV_ASSERT(cg->dev, 8, 8); |
||
718 | |||
719 | return translate_dst_region_gen6(cg) << 16 | |
||
720 | indirect_origin_bit9 << 15 | |
||
721 | cg->src[0].type << 11 | |
||
722 | cg->src[0].file << 9 | |
||
723 | cg->dst.type << 5 | |
||
724 | cg->dst.file << 3 | |
||
725 | cg->inst->mask_ctrl << 2 | |
||
726 | cg->flag_reg_num << 1 | |
||
727 | cg->flag_sub_reg_num; |
||
728 | } |
||
729 | |||
730 | /** |
||
731 | * Translate the instruction to DW0 of the 1-src/2-src format. |
||
732 | */ |
||
733 | static uint32_t |
||
734 | translate_inst_gen6(const struct codegen *cg) |
||
735 | { |
||
736 | const bool debug_ctrl = false; |
||
737 | const bool cmpt_ctrl = false; |
||
738 | |||
739 | ILO_DEV_ASSERT(cg->dev, 6, 7.5); |
||
740 | |||
741 | assert(cg->inst->opcode < 128); |
||
742 | |||
743 | return cg->inst->saturate << 31 | |
||
744 | debug_ctrl << 30 | |
||
745 | cmpt_ctrl << 29 | |
||
746 | cg->inst->acc_wr_ctrl << 28 | |
||
747 | cg->inst->cond_modifier << 24 | |
||
748 | cg->inst->exec_size << 21 | |
||
749 | cg->inst->pred_inv << 20 | |
||
750 | cg->inst->pred_ctrl << 16 | |
||
751 | cg->inst->thread_ctrl << 14 | |
||
752 | cg->inst->qtr_ctrl << 12 | |
||
753 | cg->inst->dep_ctrl << 10 | |
||
754 | cg->inst->mask_ctrl << 9 | |
||
755 | cg->inst->access_mode << 8 | |
||
756 | cg->inst->opcode; |
||
757 | } |
||
758 | |||
759 | static uint32_t |
||
760 | translate_inst_gen8(const struct codegen *cg) |
||
761 | { |
||
762 | const bool debug_ctrl = false; |
||
763 | const bool cmpt_ctrl = false; |
||
764 | |||
765 | ILO_DEV_ASSERT(cg->dev, 8, 8); |
||
766 | |||
767 | assert(cg->inst->opcode < 128); |
||
768 | |||
769 | return cg->inst->saturate << 31 | |
||
770 | debug_ctrl << 30 | |
||
771 | cmpt_ctrl << 29 | |
||
772 | cg->inst->acc_wr_ctrl << 28 | |
||
773 | cg->inst->cond_modifier << 24 | |
||
774 | cg->inst->exec_size << 21 | |
||
775 | cg->inst->pred_inv << 20 | |
||
776 | cg->inst->pred_ctrl << 16 | |
||
777 | cg->inst->thread_ctrl << 14 | |
||
778 | cg->inst->qtr_ctrl << 12 | |
||
779 | cg->inst->dep_ctrl << 9 | |
||
780 | cg->inst->access_mode << 8 | |
||
781 | cg->inst->opcode; |
||
782 | } |
||
783 | |||
784 | /** |
||
785 | * Codegen an instruction in 1-src/2-src format. |
||
786 | */ |
||
787 | static void |
||
788 | codegen_inst_gen6(const struct codegen *cg, uint32_t *code) |
||
789 | { |
||
790 | ILO_DEV_ASSERT(cg->dev, 6, 8); |
||
791 | |||
792 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
793 | code[0] = translate_inst_gen8(cg); |
||
794 | code[1] = translate_dst_gen8(cg); |
||
795 | } else { |
||
796 | code[0] = translate_inst_gen6(cg); |
||
797 | code[1] = translate_dst_gen6(cg); |
||
798 | } |
||
799 | |||
800 | code[2] = translate_src_gen6(cg, 0); |
||
801 | code[3] = translate_src_gen6(cg, 1); |
||
802 | assert(src_is_null(cg, 2)); |
||
803 | } |
||
804 | |||
805 | /** |
||
806 | * Codegen an instruction in 3-src format. |
||
807 | */ |
||
808 | static void |
||
809 | codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code) |
||
810 | { |
||
811 | const struct codegen_dst *dst = &cg->dst; |
||
812 | uint32_t dw0, dw1, dw_src[3]; |
||
813 | int i; |
||
814 | |||
815 | ILO_DEV_ASSERT(cg->dev, 6, 8); |
||
816 | |||
817 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) |
||
818 | dw0 = translate_inst_gen8(cg); |
||
819 | else |
||
820 | dw0 = translate_inst_gen6(cg); |
||
821 | |||
822 | /* |
||
823 | * 3-src instruction restrictions |
||
824 | * |
||
825 | * - align16 with direct addressing |
||
826 | * - GRF or MRF dst |
||
827 | * - GRF src |
||
828 | * - sub_reg_num is DWORD aligned |
||
829 | * - no regioning except replication control |
||
830 | * (vert_stride == 0 && horz_stride == 0) |
||
831 | */ |
||
832 | assert(cg->inst->access_mode == GEN6_ALIGN_16); |
||
833 | |||
834 | assert(!dst->indirect); |
||
835 | assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) || |
||
836 | (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16)); |
||
837 | assert(!(dst->origin & 0x3)); |
||
838 | assert(dst->horz_stride == GEN6_HORZSTRIDE_1); |
||
839 | |||
840 | if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
||
841 | dw1 = dst->origin << 19 | |
||
842 | dst->writemask << 17 | |
||
843 | cg->src[2].negate << 10 | |
||
844 | cg->src[2].negate << 10 | |
||
845 | cg->src[2].absolute << 9 | |
||
846 | cg->src[1].negate << 8 | |
||
847 | cg->src[1].absolute << 7 | |
||
848 | cg->src[0].negate << 6 | |
||
849 | cg->src[0].absolute << 5 | |
||
850 | cg->inst->mask_ctrl << 2 | |
||
851 | cg->flag_reg_num << 1 | |
||
852 | cg->flag_sub_reg_num; |
||
853 | } else { |
||
854 | dw1 = dst->origin << 19 | |
||
855 | dst->writemask << 17 | |
||
856 | cg->src[2].negate << 9 | |
||
857 | cg->src[2].absolute << 8 | |
||
858 | cg->src[1].negate << 7 | |
||
859 | cg->src[1].absolute << 6 | |
||
860 | cg->src[0].negate << 5 | |
||
861 | cg->src[0].absolute << 4 | |
||
862 | cg->flag_sub_reg_num << 1 | |
||
863 | (dst->file == GEN6_FILE_MRF); |
||
864 | } |
||
865 | |||
866 | for (i = 0; i < 3; i++) { |
||
867 | const struct codegen_src *src = &cg->src[i]; |
||
868 | |||
869 | assert(!src->indirect); |
||
870 | assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128); |
||
871 | assert(!(src->origin & 0x3)); |
||
872 | |||
873 | assert((src->vert_stride == GEN6_VERTSTRIDE_4 && |
||
874 | src->horz_stride == GEN6_HORZSTRIDE_1) || |
||
875 | (src->vert_stride == GEN6_VERTSTRIDE_0 && |
||
876 | src->horz_stride == GEN6_HORZSTRIDE_0)); |
||
877 | assert(src->width == GEN6_WIDTH_4); |
||
878 | |||
879 | dw_src[i] = src->origin << 7 | |
||
880 | src->swizzle[3] << 7 | |
||
881 | src->swizzle[2] << 5 | |
||
882 | src->swizzle[1] << 3 | |
||
883 | src->swizzle[0] << 1 | |
||
884 | (src->vert_stride == GEN6_VERTSTRIDE_0 && |
||
885 | src->horz_stride == GEN6_HORZSTRIDE_0); |
||
886 | |||
887 | /* only the lower 20 bits are used */ |
||
888 | assert((dw_src[i] & 0xfffff) == dw_src[i]); |
||
889 | } |
||
890 | |||
891 | code[0] = dw0; |
||
892 | code[1] = dw1; |
||
893 | /* concatenate the bits of dw_src */ |
||
894 | code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0]; |
||
895 | code[3] = dw_src[2] << 10 | (dw_src[1] >> 11); |
||
896 | } |
||
897 | |||
898 | /** |
||
899 | * Sanity check the region parameters of the operands. |
||
900 | */ |
||
901 | static void |
||
902 | codegen_validate_region_restrictions(const struct codegen *cg) |
||
903 | { |
||
904 | const int exec_size_map[] = { |
||
905 | [GEN6_EXECSIZE_1] = 1, |
||
906 | [GEN6_EXECSIZE_2] = 2, |
||
907 | [GEN6_EXECSIZE_4] = 4, |
||
908 | [GEN6_EXECSIZE_8] = 8, |
||
909 | [GEN6_EXECSIZE_16] = 16, |
||
910 | [GEN6_EXECSIZE_32] = 32, |
||
911 | }; |
||
912 | const int width_map[] = { |
||
913 | [GEN6_WIDTH_1] = 1, |
||
914 | [GEN6_WIDTH_2] = 2, |
||
915 | [GEN6_WIDTH_4] = 4, |
||
916 | [GEN6_WIDTH_8] = 8, |
||
917 | [GEN6_WIDTH_16] = 16, |
||
918 | }; |
||
919 | const int horz_stride_map[] = { |
||
920 | [GEN6_HORZSTRIDE_0] = 0, |
||
921 | [GEN6_HORZSTRIDE_1] = 1, |
||
922 | [GEN6_HORZSTRIDE_2] = 2, |
||
923 | [GEN6_HORZSTRIDE_4] = 4, |
||
924 | }; |
||
925 | const int vert_stride_map[] = { |
||
926 | [GEN6_VERTSTRIDE_0] = 0, |
||
927 | [GEN6_VERTSTRIDE_1] = 1, |
||
928 | [GEN6_VERTSTRIDE_2] = 2, |
||
929 | [GEN6_VERTSTRIDE_4] = 4, |
||
930 | [GEN6_VERTSTRIDE_8] = 8, |
||
931 | [GEN6_VERTSTRIDE_16] = 16, |
||
932 | [GEN6_VERTSTRIDE_32] = 32, |
||
933 | [7] = 64, |
||
934 | [8] = 128, |
||
935 | [9] = 256, |
||
936 | [GEN6_VERTSTRIDE_VXH] = 0, |
||
937 | }; |
||
938 | const int exec_size = exec_size_map[cg->inst->exec_size]; |
||
939 | int i; |
||
940 | |||
941 | /* Sandy Bridge PRM, volume 4 part 2, page 94 */ |
||
942 | |||
943 | /* 1. (we don't do 32 anyway) */ |
||
944 | assert(exec_size <= 16); |
||
945 | |||
946 | for (i = 0; i < Elements(cg->src); i++) { |
||
947 | const int width = width_map[cg->src[i].width]; |
||
948 | const int horz_stride = horz_stride_map[cg->src[i].horz_stride]; |
||
949 | const int vert_stride = vert_stride_map[cg->src[i].vert_stride]; |
||
950 | |||
951 | if (src_is_null(cg, i)) |
||
952 | break; |
||
953 | |||
954 | /* 3. */ |
||
955 | assert(exec_size >= width); |
||
956 | |||
957 | if (exec_size == width) { |
||
958 | /* 4. & 5. */ |
||
959 | if (horz_stride) |
||
960 | assert(vert_stride == width * horz_stride); |
||
961 | } |
||
962 | |||
963 | if (width == 1) { |
||
964 | /* 6. */ |
||
965 | assert(horz_stride == 0); |
||
966 | |||
967 | /* 7. */ |
||
968 | if (exec_size == 1) |
||
969 | assert(vert_stride == 0); |
||
970 | } |
||
971 | |||
972 | /* 8. */ |
||
973 | if (!vert_stride && !horz_stride) |
||
974 | assert(width == 1); |
||
975 | } |
||
976 | |||
977 | /* derived from 10.1.2. & 10.2. */ |
||
978 | assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0); |
||
979 | } |
||
980 | |||
981 | static unsigned |
||
982 | translate_vfile(enum toy_file file) |
||
983 | { |
||
984 | switch (file) { |
||
985 | case TOY_FILE_ARF: return GEN6_FILE_ARF; |
||
986 | case TOY_FILE_GRF: return GEN6_FILE_GRF; |
||
987 | case TOY_FILE_MRF: return GEN6_FILE_MRF; |
||
988 | case TOY_FILE_IMM: return GEN6_FILE_IMM; |
||
989 | default: |
||
990 | assert(!"unhandled toy file"); |
||
991 | return GEN6_FILE_GRF; |
||
992 | } |
||
993 | } |
||
994 | |||
995 | static unsigned |
||
996 | translate_vtype(enum toy_type type) |
||
997 | { |
||
998 | switch (type) { |
||
999 | case TOY_TYPE_F: return GEN6_TYPE_F; |
||
1000 | case TOY_TYPE_D: return GEN6_TYPE_D; |
||
1001 | case TOY_TYPE_UD: return GEN6_TYPE_UD; |
||
1002 | case TOY_TYPE_W: return GEN6_TYPE_W; |
||
1003 | case TOY_TYPE_UW: return GEN6_TYPE_UW; |
||
1004 | case TOY_TYPE_V: return GEN6_TYPE_V_IMM; |
||
1005 | default: |
||
1006 | assert(!"unhandled toy type"); |
||
1007 | return GEN6_TYPE_F; |
||
1008 | } |
||
1009 | } |
||
1010 | |||
1011 | static unsigned |
||
1012 | translate_writemask(enum toy_writemask writemask) |
||
1013 | { |
||
1014 | /* TOY_WRITEMASK_* are compatible with the hardware definitions */ |
||
1015 | assert(writemask <= 0xf); |
||
1016 | return writemask; |
||
1017 | } |
||
1018 | |||
1019 | static unsigned |
||
1020 | translate_swizzle(enum toy_swizzle swizzle) |
||
1021 | { |
||
1022 | /* TOY_SWIZZLE_* are compatible with the hardware definitions */ |
||
1023 | assert(swizzle <= 3); |
||
1024 | return swizzle; |
||
1025 | } |
||
1026 | |||
1027 | /** |
||
1028 | * Prepare for generating an instruction. |
||
1029 | */ |
||
1030 | static void |
||
1031 | codegen_prepare(struct codegen *cg, const struct ilo_dev *dev, |
||
1032 | const struct toy_inst *inst, int pc, int rect_linear_width) |
||
1033 | { |
||
1034 | int i; |
||
1035 | |||
1036 | cg->dev = dev; |
||
1037 | cg->inst = inst; |
||
1038 | cg->pc = pc; |
||
1039 | |||
1040 | cg->flag_reg_num = 0; |
||
1041 | cg->flag_sub_reg_num = 0; |
||
1042 | |||
1043 | cg->dst.file = translate_vfile(inst->dst.file); |
||
1044 | cg->dst.type = translate_vtype(inst->dst.type); |
||
1045 | cg->dst.indirect = inst->dst.indirect; |
||
1046 | cg->dst.indirect_subreg = inst->dst.indirect_subreg; |
||
1047 | cg->dst.origin = inst->dst.val32; |
||
1048 | |||
1049 | /* |
||
1050 | * From the Sandy Bridge PRM, volume 4 part 2, page 81: |
||
1051 | * |
||
1052 | * "For a word or an unsigned word immediate data, software must |
||
1053 | * replicate the same 16-bit immediate value to both the lower word |
||
1054 | * and the high word of the 32-bit immediate field in an instruction." |
||
1055 | */ |
||
1056 | if (inst->dst.file == TOY_FILE_IMM) { |
||
1057 | switch (inst->dst.type) { |
||
1058 | case TOY_TYPE_W: |
||
1059 | case TOY_TYPE_UW: |
||
1060 | cg->dst.origin &= 0xffff; |
||
1061 | cg->dst.origin |= cg->dst.origin << 16; |
||
1062 | break; |
||
1063 | default: |
||
1064 | break; |
||
1065 | } |
||
1066 | } |
||
1067 | |||
1068 | cg->dst.writemask = translate_writemask(inst->dst.writemask); |
||
1069 | |||
1070 | switch (inst->dst.rect) { |
||
1071 | case TOY_RECT_LINEAR: |
||
1072 | cg->dst.horz_stride = GEN6_HORZSTRIDE_1; |
||
1073 | break; |
||
1074 | default: |
||
1075 | assert(!"unsupported dst region"); |
||
1076 | cg->dst.horz_stride = GEN6_HORZSTRIDE_1; |
||
1077 | break; |
||
1078 | } |
||
1079 | |||
1080 | for (i = 0; i < Elements(cg->src); i++) { |
||
1081 | struct codegen_src *src = &cg->src[i]; |
||
1082 | |||
1083 | src->file = translate_vfile(inst->src[i].file); |
||
1084 | src->type = translate_vtype(inst->src[i].type); |
||
1085 | src->indirect = inst->src[i].indirect; |
||
1086 | src->indirect_subreg = inst->src[i].indirect_subreg; |
||
1087 | src->origin = inst->src[i].val32; |
||
1088 | |||
1089 | /* do the same for src */ |
||
1090 | if (inst->dst.file == TOY_FILE_IMM) { |
||
1091 | switch (inst->src[i].type) { |
||
1092 | case TOY_TYPE_W: |
||
1093 | case TOY_TYPE_UW: |
||
1094 | src->origin &= 0xffff; |
||
1095 | src->origin |= src->origin << 16; |
||
1096 | break; |
||
1097 | default: |
||
1098 | break; |
||
1099 | } |
||
1100 | } |
||
1101 | |||
1102 | src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x); |
||
1103 | src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y); |
||
1104 | src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z); |
||
1105 | src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w); |
||
1106 | src->absolute = inst->src[i].absolute; |
||
1107 | src->negate = inst->src[i].negate; |
||
1108 | |||
1109 | switch (inst->src[i].rect) { |
||
1110 | case TOY_RECT_LINEAR: |
||
1111 | switch (rect_linear_width) { |
||
1112 | case 1: |
||
1113 | src->vert_stride = GEN6_VERTSTRIDE_1; |
||
1114 | src->width = GEN6_WIDTH_1; |
||
1115 | break; |
||
1116 | case 2: |
||
1117 | src->vert_stride = GEN6_VERTSTRIDE_2; |
||
1118 | src->width = GEN6_WIDTH_2; |
||
1119 | break; |
||
1120 | case 4: |
||
1121 | src->vert_stride = GEN6_VERTSTRIDE_4; |
||
1122 | src->width = GEN6_WIDTH_4; |
||
1123 | break; |
||
1124 | case 8: |
||
1125 | src->vert_stride = GEN6_VERTSTRIDE_8; |
||
1126 | src->width = GEN6_WIDTH_8; |
||
1127 | break; |
||
1128 | case 16: |
||
1129 | src->vert_stride = GEN6_VERTSTRIDE_16; |
||
1130 | src->width = GEN6_WIDTH_16; |
||
1131 | break; |
||
1132 | default: |
||
1133 | assert(!"unsupported TOY_RECT_LINEAR width"); |
||
1134 | src->vert_stride = GEN6_VERTSTRIDE_1; |
||
1135 | src->width = GEN6_WIDTH_1; |
||
1136 | break; |
||
1137 | } |
||
1138 | src->horz_stride = GEN6_HORZSTRIDE_1; |
||
1139 | break; |
||
1140 | case TOY_RECT_041: |
||
1141 | src->vert_stride = GEN6_VERTSTRIDE_0; |
||
1142 | src->width = GEN6_WIDTH_4; |
||
1143 | src->horz_stride = GEN6_HORZSTRIDE_1; |
||
1144 | break; |
||
1145 | case TOY_RECT_010: |
||
1146 | src->vert_stride = GEN6_VERTSTRIDE_0; |
||
1147 | src->width = GEN6_WIDTH_1; |
||
1148 | src->horz_stride = GEN6_HORZSTRIDE_0; |
||
1149 | break; |
||
1150 | case TOY_RECT_220: |
||
1151 | src->vert_stride = GEN6_VERTSTRIDE_2; |
||
1152 | src->width = GEN6_WIDTH_2; |
||
1153 | src->horz_stride = GEN6_HORZSTRIDE_0; |
||
1154 | break; |
||
1155 | case TOY_RECT_440: |
||
1156 | src->vert_stride = GEN6_VERTSTRIDE_4; |
||
1157 | src->width = GEN6_WIDTH_4; |
||
1158 | src->horz_stride = GEN6_HORZSTRIDE_0; |
||
1159 | break; |
||
1160 | case TOY_RECT_240: |
||
1161 | src->vert_stride = GEN6_VERTSTRIDE_2; |
||
1162 | src->width = GEN6_WIDTH_4; |
||
1163 | src->horz_stride = GEN6_HORZSTRIDE_0; |
||
1164 | break; |
||
1165 | default: |
||
1166 | assert(!"unsupported src region"); |
||
1167 | src->vert_stride = GEN6_VERTSTRIDE_1; |
||
1168 | src->width = GEN6_WIDTH_1; |
||
1169 | src->horz_stride = GEN6_HORZSTRIDE_1; |
||
1170 | break; |
||
1171 | } |
||
1172 | } |
||
1173 | } |
||
1174 | |||
1175 | /** |
||
1176 | * Generate HW shader code. The instructions should have been legalized. |
||
1177 | */ |
||
1178 | void * |
||
1179 | toy_compiler_assemble(struct toy_compiler *tc, int *size) |
||
1180 | { |
||
1181 | const struct toy_inst *inst; |
||
1182 | uint32_t *code; |
||
1183 | int pc; |
||
1184 | |||
1185 | code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t)); |
||
1186 | if (!code) |
||
1187 | return NULL; |
||
1188 | |||
1189 | pc = 0; |
||
1190 | tc_head(tc); |
||
1191 | while ((inst = tc_next(tc)) != NULL) { |
||
1192 | uint32_t *dw = &code[pc * 4]; |
||
1193 | struct codegen cg; |
||
1194 | |||
1195 | if (pc >= tc->num_instructions) { |
||
1196 | tc_fail(tc, "wrong instructoun count"); |
||
1197 | break; |
||
1198 | } |
||
1199 | |||
1200 | codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width); |
||
1201 | codegen_validate_region_restrictions(&cg); |
||
1202 | |||
1203 | switch (inst->opcode) { |
||
1204 | case GEN6_OPCODE_MAD: |
||
1205 | codegen_inst_3src_gen6(&cg, dw); |
||
1206 | break; |
||
1207 | default: |
||
1208 | codegen_inst_gen6(&cg, dw); |
||
1209 | break; |
||
1210 | } |
||
1211 | |||
1212 | pc++; |
||
1213 | } |
||
1214 | |||
1215 | /* never return an invalid kernel */ |
||
1216 | if (tc->fail) { |
||
1217 | FREE(code); |
||
1218 | return NULL; |
||
1219 | } |
||
1220 | |||
1221 | if (size) |
||
1222 | *size = pc * 4 * sizeof(uint32_t); |
||
1223 | |||
1224 | return code; |
||
1225 | }><>>><>=>=>>=>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><> |