Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5563 | serge | 1 | /* |
2 | * Mesa 3-D graphics library |
||
3 | * |
||
4 | * Copyright (C) 2012-2013 LunarG, Inc. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included |
||
14 | * in all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
22 | * DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: |
||
25 | * Chia-I Wu |
||
26 | */ |
||
27 | |||
28 | #include "toy_compiler.h" |
||
29 | |||
30 | #define CG_REG_SHIFT 5 |
||
31 | #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT) |
||
32 | |||
33 | struct codegen { |
||
34 | const struct toy_inst *inst; |
||
35 | int pc; |
||
36 | |||
37 | unsigned flag_sub_reg_num; |
||
38 | |||
39 | struct codegen_dst { |
||
40 | unsigned file; |
||
41 | unsigned type; |
||
42 | bool indirect; |
||
43 | unsigned indirect_subreg; |
||
44 | unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
||
45 | |||
46 | unsigned horz_stride; |
||
47 | |||
48 | unsigned writemask; |
||
49 | } dst; |
||
50 | |||
51 | struct codegen_src { |
||
52 | unsigned file; |
||
53 | unsigned type; |
||
54 | bool indirect; |
||
55 | unsigned indirect_subreg; |
||
56 | unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
||
57 | |||
58 | unsigned vert_stride; |
||
59 | unsigned width; |
||
60 | unsigned horz_stride; |
||
61 | |||
62 | unsigned swizzle[4]; |
||
63 | bool absolute; |
||
64 | bool negate; |
||
65 | } src[3]; |
||
66 | }; |
||
67 | |||
68 | /** |
||
69 | * Return true if the source operand is null. |
||
70 | */ |
||
71 | static bool |
||
72 | src_is_null(const struct codegen *cg, int idx) |
||
73 | { |
||
74 | const struct codegen_src *src = &cg->src[idx]; |
||
75 | |||
76 | return (src->file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
77 | src->origin == BRW_ARF_NULL << CG_REG_SHIFT); |
||
78 | } |
||
79 | |||
80 | /** |
||
81 | * Translate a source operand to DW2 or DW3 of the 1-src/2-src format. |
||
82 | */ |
||
83 | static uint32_t |
||
84 | translate_src(const struct codegen *cg, int idx) |
||
85 | { |
||
86 | const struct codegen_src *src = &cg->src[idx]; |
||
87 | uint32_t dw; |
||
88 | |||
89 | /* special treatment may be needed if any of the operand is immediate */ |
||
90 | if (cg->src[0].file == BRW_IMMEDIATE_VALUE) { |
||
91 | assert(!cg->src[0].absolute && !cg->src[0].negate); |
||
92 | /* only the last src operand can be an immediate */ |
||
93 | assert(src_is_null(cg, 1)); |
||
94 | |||
95 | if (idx == 0) |
||
96 | return cg->flag_sub_reg_num << 25; |
||
97 | else |
||
98 | return cg->src[0].origin; |
||
99 | } |
||
100 | else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) { |
||
101 | assert(!cg->src[1].absolute && !cg->src[1].negate); |
||
102 | return cg->src[1].origin; |
||
103 | } |
||
104 | |||
105 | assert(src->file != BRW_IMMEDIATE_VALUE); |
||
106 | |||
107 | if (src->indirect) { |
||
108 | const int offset = (int) src->origin; |
||
109 | |||
110 | assert(src->file == BRW_GENERAL_REGISTER_FILE); |
||
111 | assert(offset < 512 && offset >= -512); |
||
112 | |||
113 | if (cg->inst->access_mode == BRW_ALIGN_16) { |
||
114 | assert(src->width == BRW_WIDTH_4); |
||
115 | assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
||
116 | |||
117 | /* the lower 4 bits are reserved for the swizzle_[xy] */ |
||
118 | assert(!(src->origin & 0xf)); |
||
119 | |||
120 | dw = src->vert_stride << 21 | |
||
121 | src->swizzle[3] << 18 | |
||
122 | src->swizzle[2] << 16 | |
||
123 | BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
||
124 | src->negate << 14 | |
||
125 | src->absolute << 13 | |
||
126 | src->indirect_subreg << 10 | |
||
127 | (src->origin & 0x3f0) | |
||
128 | src->swizzle[1] << 2 | |
||
129 | src->swizzle[0]; |
||
130 | } |
||
131 | else { |
||
132 | assert(src->swizzle[0] == TOY_SWIZZLE_X && |
||
133 | src->swizzle[1] == TOY_SWIZZLE_Y && |
||
134 | src->swizzle[2] == TOY_SWIZZLE_Z && |
||
135 | src->swizzle[3] == TOY_SWIZZLE_W); |
||
136 | |||
137 | dw = src->vert_stride << 21 | |
||
138 | src->width << 18 | |
||
139 | src->horz_stride << 16 | |
||
140 | BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
||
141 | src->negate << 14 | |
||
142 | src->absolute << 13 | |
||
143 | src->indirect_subreg << 10 | |
||
144 | (src->origin & 0x3ff); |
||
145 | } |
||
146 | } |
||
147 | else { |
||
148 | switch (src->file) { |
||
149 | case BRW_ARCHITECTURE_REGISTER_FILE: |
||
150 | break; |
||
151 | case BRW_GENERAL_REGISTER_FILE: |
||
152 | assert(CG_REG_NUM(src->origin) < 128); |
||
153 | break; |
||
154 | case BRW_MESSAGE_REGISTER_FILE: |
||
155 | assert(cg->inst->opcode == BRW_OPCODE_SEND || |
||
156 | cg->inst->opcode == BRW_OPCODE_SENDC); |
||
157 | assert(CG_REG_NUM(src->origin) < 16); |
||
158 | break; |
||
159 | case BRW_IMMEDIATE_VALUE: |
||
160 | default: |
||
161 | assert(!"invalid src file"); |
||
162 | break; |
||
163 | } |
||
164 | |||
165 | if (cg->inst->access_mode == BRW_ALIGN_16) { |
||
166 | assert(src->width == BRW_WIDTH_4); |
||
167 | assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
||
168 | |||
169 | /* the lower 4 bits are reserved for the swizzle_[xy] */ |
||
170 | assert(!(src->origin & 0xf)); |
||
171 | |||
172 | dw = src->vert_stride << 21 | |
||
173 | src->swizzle[3] << 18 | |
||
174 | src->swizzle[2] << 16 | |
||
175 | BRW_ADDRESS_DIRECT << 15 | |
||
176 | src->negate << 14 | |
||
177 | src->absolute << 13 | |
||
178 | src->origin | |
||
179 | src->swizzle[1] << 2 | |
||
180 | src->swizzle[0]; |
||
181 | } |
||
182 | else { |
||
183 | assert(src->swizzle[0] == TOY_SWIZZLE_X && |
||
184 | src->swizzle[1] == TOY_SWIZZLE_Y && |
||
185 | src->swizzle[2] == TOY_SWIZZLE_Z && |
||
186 | src->swizzle[3] == TOY_SWIZZLE_W); |
||
187 | |||
188 | dw = src->vert_stride << 21 | |
||
189 | src->width << 18 | |
||
190 | src->horz_stride << 16 | |
||
191 | BRW_ADDRESS_DIRECT << 15 | |
||
192 | src->negate << 14 | |
||
193 | src->absolute << 13 | |
||
194 | src->origin; |
||
195 | } |
||
196 | } |
||
197 | |||
198 | if (idx == 0) |
||
199 | dw |= cg->flag_sub_reg_num << 25; |
||
200 | |||
201 | return dw; |
||
202 | } |
||
203 | |||
204 | /** |
||
205 | * Translate the destination operand to the higher 16 bits of DW1 of the |
||
206 | * 1-src/2-src format. |
||
207 | */ |
||
208 | static uint16_t |
||
209 | translate_dst_region(const struct codegen *cg) |
||
210 | { |
||
211 | const struct codegen_dst *dst = &cg->dst; |
||
212 | uint16_t dw1_region; |
||
213 | |||
214 | if (dst->file == BRW_IMMEDIATE_VALUE) { |
||
215 | /* dst is immediate (JIP) when the opcode is a conditional branch */ |
||
216 | switch (cg->inst->opcode) { |
||
217 | case BRW_OPCODE_IF: |
||
218 | case BRW_OPCODE_ELSE: |
||
219 | case BRW_OPCODE_ENDIF: |
||
220 | case BRW_OPCODE_WHILE: |
||
221 | assert(dst->type == BRW_REGISTER_TYPE_W); |
||
222 | dw1_region = (dst->origin & 0xffff); |
||
223 | break; |
||
224 | default: |
||
225 | assert(!"dst cannot be immediate"); |
||
226 | dw1_region = 0; |
||
227 | break; |
||
228 | } |
||
229 | |||
230 | return dw1_region; |
||
231 | } |
||
232 | |||
233 | if (dst->indirect) { |
||
234 | const int offset = (int) dst->origin; |
||
235 | |||
236 | assert(dst->file == BRW_GENERAL_REGISTER_FILE); |
||
237 | assert(offset < 512 && offset >= -512); |
||
238 | |||
239 | if (cg->inst->access_mode == BRW_ALIGN_16) { |
||
240 | /* |
||
241 | * From the Sandy Bridge PRM, volume 4 part 2, page 144: |
||
242 | * |
||
243 | * "Allthough Dst.HorzStride is a don't care for Align16, HW |
||
244 | * needs this to be programmed as 01." |
||
245 | */ |
||
246 | assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
||
247 | /* the lower 4 bits are reserved for the writemask */ |
||
248 | assert(!(dst->origin & 0xf)); |
||
249 | |||
250 | dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
||
251 | dst->horz_stride << 13 | |
||
252 | dst->indirect_subreg << 10 | |
||
253 | (dst->origin & 0x3f0) | |
||
254 | dst->writemask; |
||
255 | } |
||
256 | else { |
||
257 | assert(dst->writemask == TOY_WRITEMASK_XYZW); |
||
258 | |||
259 | dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
||
260 | dst->horz_stride << 13 | |
||
261 | dst->indirect_subreg << 10 | |
||
262 | (dst->origin & 0x3ff); |
||
263 | } |
||
264 | } |
||
265 | else { |
||
266 | assert((dst->file == BRW_GENERAL_REGISTER_FILE && |
||
267 | CG_REG_NUM(dst->origin) < 128) || |
||
268 | (dst->file == BRW_MESSAGE_REGISTER_FILE && |
||
269 | CG_REG_NUM(dst->origin) < 16) || |
||
270 | (dst->file == BRW_ARCHITECTURE_REGISTER_FILE)); |
||
271 | |||
272 | if (cg->inst->access_mode == BRW_ALIGN_16) { |
||
273 | /* similar to the indirect case */ |
||
274 | assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
||
275 | assert(!(dst->origin & 0xf)); |
||
276 | |||
277 | dw1_region = BRW_ADDRESS_DIRECT << 15 | |
||
278 | dst->horz_stride << 13 | |
||
279 | dst->origin | |
||
280 | dst->writemask; |
||
281 | } |
||
282 | else { |
||
283 | assert(dst->writemask == TOY_WRITEMASK_XYZW); |
||
284 | |||
285 | dw1_region = BRW_ADDRESS_DIRECT << 15 | |
||
286 | dst->horz_stride << 13 | |
||
287 | dst->origin; |
||
288 | } |
||
289 | } |
||
290 | |||
291 | return dw1_region; |
||
292 | } |
||
293 | |||
294 | /** |
||
295 | * Translate the destination operand to DW1 of the 1-src/2-src format. |
||
296 | */ |
||
297 | static uint32_t |
||
298 | translate_dst(const struct codegen *cg) |
||
299 | { |
||
300 | return translate_dst_region(cg) << 16 | |
||
301 | cg->src[1].type << 12 | |
||
302 | cg->src[1].file << 10 | |
||
303 | cg->src[0].type << 7 | |
||
304 | cg->src[0].file << 5 | |
||
305 | cg->dst.type << 2 | |
||
306 | cg->dst.file; |
||
307 | } |
||
308 | |||
309 | /** |
||
310 | * Translate the instruction to DW0 of the 1-src/2-src format. |
||
311 | */ |
||
312 | static uint32_t |
||
313 | translate_inst(const struct codegen *cg) |
||
314 | { |
||
315 | const bool debug_ctrl = false; |
||
316 | const bool cmpt_ctrl = false; |
||
317 | |||
318 | assert(cg->inst->opcode < 128); |
||
319 | |||
320 | return cg->inst->saturate << 31 | |
||
321 | debug_ctrl << 30 | |
||
322 | cmpt_ctrl << 29 | |
||
323 | cg->inst->acc_wr_ctrl << 28 | |
||
324 | cg->inst->cond_modifier << 24 | |
||
325 | cg->inst->exec_size << 21 | |
||
326 | cg->inst->pred_inv << 20 | |
||
327 | cg->inst->pred_ctrl << 16 | |
||
328 | cg->inst->thread_ctrl << 14 | |
||
329 | cg->inst->qtr_ctrl << 12 | |
||
330 | cg->inst->dep_ctrl << 10 | |
||
331 | cg->inst->mask_ctrl << 9 | |
||
332 | cg->inst->access_mode << 8 | |
||
333 | cg->inst->opcode; |
||
334 | } |
||
335 | |||
336 | /** |
||
337 | * Codegen an instruction in 1-src/2-src format. |
||
338 | */ |
||
339 | static void |
||
340 | codegen_inst(const struct codegen *cg, uint32_t *code) |
||
341 | { |
||
342 | code[0] = translate_inst(cg); |
||
343 | code[1] = translate_dst(cg); |
||
344 | code[2] = translate_src(cg, 0); |
||
345 | code[3] = translate_src(cg, 1); |
||
346 | assert(src_is_null(cg, 2)); |
||
347 | } |
||
348 | |||
349 | /** |
||
350 | * Codegen an instruction in 3-src format. |
||
351 | */ |
||
352 | static void |
||
353 | codegen_inst_3src(const struct codegen *cg, uint32_t *code) |
||
354 | { |
||
355 | const struct codegen_dst *dst = &cg->dst; |
||
356 | uint32_t dw0, dw1, dw_src[3]; |
||
357 | int i; |
||
358 | |||
359 | dw0 = translate_inst(cg); |
||
360 | |||
361 | /* |
||
362 | * 3-src instruction restrictions |
||
363 | * |
||
364 | * - align16 with direct addressing |
||
365 | * - GRF or MRF dst |
||
366 | * - GRF src |
||
367 | * - sub_reg_num is DWORD aligned |
||
368 | * - no regioning except replication control |
||
369 | * (vert_stride == 0 && horz_stride == 0) |
||
370 | */ |
||
371 | assert(cg->inst->access_mode == BRW_ALIGN_16); |
||
372 | |||
373 | assert(!dst->indirect); |
||
374 | assert((dst->file == BRW_GENERAL_REGISTER_FILE && |
||
375 | CG_REG_NUM(dst->origin) < 128) || |
||
376 | (dst->file == BRW_MESSAGE_REGISTER_FILE && |
||
377 | CG_REG_NUM(dst->origin) < 16)); |
||
378 | assert(!(dst->origin & 0x3)); |
||
379 | assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
||
380 | |||
381 | dw1 = dst->origin << 19 | |
||
382 | dst->writemask << 17 | |
||
383 | cg->src[2].negate << 9 | |
||
384 | cg->src[2].absolute << 8 | |
||
385 | cg->src[1].negate << 7 | |
||
386 | cg->src[1].absolute << 6 | |
||
387 | cg->src[0].negate << 5 | |
||
388 | cg->src[0].absolute << 4 | |
||
389 | cg->flag_sub_reg_num << 1 | |
||
390 | (dst->file == BRW_MESSAGE_REGISTER_FILE); |
||
391 | |||
392 | for (i = 0; i < 3; i++) { |
||
393 | const struct codegen_src *src = &cg->src[i]; |
||
394 | |||
395 | assert(!src->indirect); |
||
396 | assert(src->file == BRW_GENERAL_REGISTER_FILE && |
||
397 | CG_REG_NUM(src->origin) < 128); |
||
398 | assert(!(src->origin & 0x3)); |
||
399 | |||
400 | assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 && |
||
401 | src->horz_stride == BRW_HORIZONTAL_STRIDE_1) || |
||
402 | (src->vert_stride == BRW_VERTICAL_STRIDE_0 && |
||
403 | src->horz_stride == BRW_HORIZONTAL_STRIDE_0)); |
||
404 | assert(src->width == BRW_WIDTH_4); |
||
405 | |||
406 | dw_src[i] = src->origin << 7 | |
||
407 | src->swizzle[3] << 7 | |
||
408 | src->swizzle[2] << 5 | |
||
409 | src->swizzle[1] << 3 | |
||
410 | src->swizzle[0] << 1 | |
||
411 | (src->vert_stride == BRW_VERTICAL_STRIDE_0 && |
||
412 | src->horz_stride == BRW_HORIZONTAL_STRIDE_0); |
||
413 | |||
414 | /* only the lower 20 bits are used */ |
||
415 | assert((dw_src[i] & 0xfffff) == dw_src[i]); |
||
416 | } |
||
417 | |||
418 | code[0] = dw0; |
||
419 | code[1] = dw1; |
||
420 | /* concatenate the bits of dw_src */ |
||
421 | code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0]; |
||
422 | code[3] = dw_src[2] << 10 | (dw_src[1] >> 11); |
||
423 | } |
||
424 | |||
425 | /** |
||
426 | * Sanity check the region parameters of the operands. |
||
427 | */ |
||
428 | static void |
||
429 | codegen_validate_region_restrictions(const struct codegen *cg) |
||
430 | { |
||
431 | const int exec_size_map[] = { |
||
432 | [BRW_EXECUTE_1] = 1, |
||
433 | [BRW_EXECUTE_2] = 2, |
||
434 | [BRW_EXECUTE_4] = 4, |
||
435 | [BRW_EXECUTE_8] = 8, |
||
436 | [BRW_EXECUTE_16] = 16, |
||
437 | [BRW_EXECUTE_32] = 32, |
||
438 | }; |
||
439 | const int width_map[] = { |
||
440 | [BRW_WIDTH_1] = 1, |
||
441 | [BRW_WIDTH_2] = 2, |
||
442 | [BRW_WIDTH_4] = 4, |
||
443 | [BRW_WIDTH_8] = 8, |
||
444 | [BRW_WIDTH_16] = 16, |
||
445 | }; |
||
446 | const int horz_stride_map[] = { |
||
447 | [BRW_HORIZONTAL_STRIDE_0] = 0, |
||
448 | [BRW_HORIZONTAL_STRIDE_1] = 1, |
||
449 | [BRW_HORIZONTAL_STRIDE_2] = 2, |
||
450 | [BRW_HORIZONTAL_STRIDE_4] = 4, |
||
451 | }; |
||
452 | const int vert_stride_map[] = { |
||
453 | [BRW_VERTICAL_STRIDE_0] = 0, |
||
454 | [BRW_VERTICAL_STRIDE_1] = 1, |
||
455 | [BRW_VERTICAL_STRIDE_2] = 2, |
||
456 | [BRW_VERTICAL_STRIDE_4] = 4, |
||
457 | [BRW_VERTICAL_STRIDE_8] = 8, |
||
458 | [BRW_VERTICAL_STRIDE_16] = 16, |
||
459 | [BRW_VERTICAL_STRIDE_32] = 32, |
||
460 | [BRW_VERTICAL_STRIDE_64] = 64, |
||
461 | [BRW_VERTICAL_STRIDE_128] = 128, |
||
462 | [BRW_VERTICAL_STRIDE_256] = 256, |
||
463 | [BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0, |
||
464 | }; |
||
465 | const int exec_size = exec_size_map[cg->inst->exec_size]; |
||
466 | int i; |
||
467 | |||
468 | /* Sandy Bridge PRM, volume 4 part 2, page 94 */ |
||
469 | |||
470 | /* 1. (we don't do 32 anyway) */ |
||
471 | assert(exec_size <= 16); |
||
472 | |||
473 | for (i = 0; i < Elements(cg->src); i++) { |
||
474 | const int width = width_map[cg->src[i].width]; |
||
475 | const int horz_stride = horz_stride_map[cg->src[i].horz_stride]; |
||
476 | const int vert_stride = vert_stride_map[cg->src[i].vert_stride]; |
||
477 | |||
478 | if (src_is_null(cg, i)) |
||
479 | break; |
||
480 | |||
481 | /* 3. */ |
||
482 | assert(exec_size >= width); |
||
483 | |||
484 | if (exec_size == width) { |
||
485 | /* 4. & 5. */ |
||
486 | if (horz_stride) |
||
487 | assert(vert_stride == width * horz_stride); |
||
488 | } |
||
489 | |||
490 | if (width == 1) { |
||
491 | /* 6. */ |
||
492 | assert(horz_stride == 0); |
||
493 | |||
494 | /* 7. */ |
||
495 | if (exec_size == 1) |
||
496 | assert(vert_stride == 0); |
||
497 | } |
||
498 | |||
499 | /* 8. */ |
||
500 | if (!vert_stride && !horz_stride) |
||
501 | assert(width == 1); |
||
502 | } |
||
503 | |||
504 | /* derived from 10.1.2. & 10.2. */ |
||
505 | assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0); |
||
506 | } |
||
507 | |||
508 | static unsigned |
||
509 | translate_vfile(enum toy_file file) |
||
510 | { |
||
511 | switch (file) { |
||
512 | case TOY_FILE_ARF: return BRW_ARCHITECTURE_REGISTER_FILE; |
||
513 | case TOY_FILE_GRF: return BRW_GENERAL_REGISTER_FILE; |
||
514 | case TOY_FILE_MRF: return BRW_MESSAGE_REGISTER_FILE; |
||
515 | case TOY_FILE_IMM: return BRW_IMMEDIATE_VALUE; |
||
516 | default: |
||
517 | assert(!"unhandled toy file"); |
||
518 | return BRW_GENERAL_REGISTER_FILE; |
||
519 | } |
||
520 | } |
||
521 | |||
522 | static unsigned |
||
523 | translate_vtype(enum toy_type type) |
||
524 | { |
||
525 | switch (type) { |
||
526 | case TOY_TYPE_F: return BRW_REGISTER_TYPE_F; |
||
527 | case TOY_TYPE_D: return BRW_REGISTER_TYPE_D; |
||
528 | case TOY_TYPE_UD: return BRW_REGISTER_TYPE_UD; |
||
529 | case TOY_TYPE_W: return BRW_REGISTER_TYPE_W; |
||
530 | case TOY_TYPE_UW: return BRW_REGISTER_TYPE_UW; |
||
531 | case TOY_TYPE_V: return BRW_REGISTER_TYPE_V; |
||
532 | default: |
||
533 | assert(!"unhandled toy type"); |
||
534 | return BRW_REGISTER_TYPE_F; |
||
535 | } |
||
536 | } |
||
537 | |||
538 | static unsigned |
||
539 | translate_writemask(enum toy_writemask writemask) |
||
540 | { |
||
541 | /* TOY_WRITEMASK_* are compatible with the hardware definitions */ |
||
542 | assert(writemask <= 0xf); |
||
543 | return writemask; |
||
544 | } |
||
545 | |||
546 | static unsigned |
||
547 | translate_swizzle(enum toy_swizzle swizzle) |
||
548 | { |
||
549 | /* TOY_SWIZZLE_* are compatible with the hardware definitions */ |
||
550 | assert(swizzle <= 3); |
||
551 | return swizzle; |
||
552 | } |
||
553 | |||
554 | /** |
||
555 | * Prepare for generating an instruction. |
||
556 | */ |
||
557 | static void |
||
558 | codegen_prepare(struct codegen *cg, const struct toy_inst *inst, |
||
559 | int pc, int rect_linear_width) |
||
560 | { |
||
561 | int i; |
||
562 | |||
563 | cg->inst = inst; |
||
564 | cg->pc = pc; |
||
565 | |||
566 | cg->flag_sub_reg_num = 0; |
||
567 | |||
568 | cg->dst.file = translate_vfile(inst->dst.file); |
||
569 | cg->dst.type = translate_vtype(inst->dst.type); |
||
570 | cg->dst.indirect = inst->dst.indirect; |
||
571 | cg->dst.indirect_subreg = inst->dst.indirect_subreg; |
||
572 | cg->dst.origin = inst->dst.val32; |
||
573 | |||
574 | /* |
||
575 | * From the Sandy Bridge PRM, volume 4 part 2, page 81: |
||
576 | * |
||
577 | * "For a word or an unsigned word immediate data, software must |
||
578 | * replicate the same 16-bit immediate value to both the lower word |
||
579 | * and the high word of the 32-bit immediate field in an instruction." |
||
580 | */ |
||
581 | if (inst->dst.file == TOY_FILE_IMM) { |
||
582 | switch (inst->dst.type) { |
||
583 | case TOY_TYPE_W: |
||
584 | case TOY_TYPE_UW: |
||
585 | cg->dst.origin &= 0xffff; |
||
586 | cg->dst.origin |= cg->dst.origin << 16; |
||
587 | break; |
||
588 | default: |
||
589 | break; |
||
590 | } |
||
591 | } |
||
592 | |||
593 | cg->dst.writemask = translate_writemask(inst->dst.writemask); |
||
594 | |||
595 | switch (inst->dst.rect) { |
||
596 | case TOY_RECT_LINEAR: |
||
597 | cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1; |
||
598 | break; |
||
599 | default: |
||
600 | assert(!"unsupported dst region"); |
||
601 | cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1; |
||
602 | break; |
||
603 | } |
||
604 | |||
605 | for (i = 0; i < Elements(cg->src); i++) { |
||
606 | struct codegen_src *src = &cg->src[i]; |
||
607 | |||
608 | src->file = translate_vfile(inst->src[i].file); |
||
609 | src->type = translate_vtype(inst->src[i].type); |
||
610 | src->indirect = inst->src[i].indirect; |
||
611 | src->indirect_subreg = inst->src[i].indirect_subreg; |
||
612 | src->origin = inst->src[i].val32; |
||
613 | |||
614 | /* do the same for src */ |
||
615 | if (inst->dst.file == TOY_FILE_IMM) { |
||
616 | switch (inst->src[i].type) { |
||
617 | case TOY_TYPE_W: |
||
618 | case TOY_TYPE_UW: |
||
619 | src->origin &= 0xffff; |
||
620 | src->origin |= src->origin << 16; |
||
621 | break; |
||
622 | default: |
||
623 | break; |
||
624 | } |
||
625 | } |
||
626 | |||
627 | src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x); |
||
628 | src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y); |
||
629 | src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z); |
||
630 | src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w); |
||
631 | src->absolute = inst->src[i].absolute; |
||
632 | src->negate = inst->src[i].negate; |
||
633 | |||
634 | switch (inst->src[i].rect) { |
||
635 | case TOY_RECT_LINEAR: |
||
636 | switch (rect_linear_width) { |
||
637 | case 1: |
||
638 | src->vert_stride = BRW_VERTICAL_STRIDE_1; |
||
639 | src->width = BRW_WIDTH_1; |
||
640 | break; |
||
641 | case 2: |
||
642 | src->vert_stride = BRW_VERTICAL_STRIDE_2; |
||
643 | src->width = BRW_WIDTH_2; |
||
644 | break; |
||
645 | case 4: |
||
646 | src->vert_stride = BRW_VERTICAL_STRIDE_4; |
||
647 | src->width = BRW_WIDTH_4; |
||
648 | break; |
||
649 | case 8: |
||
650 | src->vert_stride = BRW_VERTICAL_STRIDE_8; |
||
651 | src->width = BRW_WIDTH_8; |
||
652 | break; |
||
653 | case 16: |
||
654 | src->vert_stride = BRW_VERTICAL_STRIDE_16; |
||
655 | src->width = BRW_WIDTH_16; |
||
656 | break; |
||
657 | default: |
||
658 | assert(!"unsupported TOY_RECT_LINEAR width"); |
||
659 | src->vert_stride = BRW_VERTICAL_STRIDE_1; |
||
660 | src->width = BRW_WIDTH_1; |
||
661 | break; |
||
662 | } |
||
663 | src->horz_stride = BRW_HORIZONTAL_STRIDE_1; |
||
664 | break; |
||
665 | case TOY_RECT_041: |
||
666 | src->vert_stride = BRW_VERTICAL_STRIDE_0; |
||
667 | src->width = BRW_WIDTH_4; |
||
668 | src->horz_stride = BRW_HORIZONTAL_STRIDE_1; |
||
669 | break; |
||
670 | case TOY_RECT_010: |
||
671 | src->vert_stride = BRW_VERTICAL_STRIDE_0; |
||
672 | src->width = BRW_WIDTH_1; |
||
673 | src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
||
674 | break; |
||
675 | case TOY_RECT_220: |
||
676 | src->vert_stride = BRW_VERTICAL_STRIDE_2; |
||
677 | src->width = BRW_WIDTH_2; |
||
678 | src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
||
679 | break; |
||
680 | case TOY_RECT_440: |
||
681 | src->vert_stride = BRW_VERTICAL_STRIDE_4; |
||
682 | src->width = BRW_WIDTH_4; |
||
683 | src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
||
684 | break; |
||
685 | case TOY_RECT_240: |
||
686 | src->vert_stride = BRW_VERTICAL_STRIDE_2; |
||
687 | src->width = BRW_WIDTH_4; |
||
688 | src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
||
689 | break; |
||
690 | default: |
||
691 | assert(!"unsupported src region"); |
||
692 | src->vert_stride = BRW_VERTICAL_STRIDE_1; |
||
693 | src->width = BRW_WIDTH_1; |
||
694 | src->horz_stride = BRW_HORIZONTAL_STRIDE_1; |
||
695 | break; |
||
696 | } |
||
697 | } |
||
698 | } |
||
699 | |||
700 | /** |
||
701 | * Generate HW shader code. The instructions should have been legalized. |
||
702 | */ |
||
703 | void * |
||
704 | toy_compiler_assemble(struct toy_compiler *tc, int *size) |
||
705 | { |
||
706 | const struct toy_inst *inst; |
||
707 | uint32_t *code; |
||
708 | int pc; |
||
709 | |||
710 | code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t)); |
||
711 | if (!code) |
||
712 | return NULL; |
||
713 | |||
714 | pc = 0; |
||
715 | tc_head(tc); |
||
716 | while ((inst = tc_next(tc)) != NULL) { |
||
717 | uint32_t *dw = &code[pc * 4]; |
||
718 | struct codegen cg; |
||
719 | |||
720 | if (pc >= tc->num_instructions) { |
||
721 | tc_fail(tc, "wrong instructoun count"); |
||
722 | break; |
||
723 | } |
||
724 | |||
725 | codegen_prepare(&cg, inst, pc, tc->rect_linear_width); |
||
726 | codegen_validate_region_restrictions(&cg); |
||
727 | |||
728 | switch (inst->opcode) { |
||
729 | case BRW_OPCODE_MAD: |
||
730 | codegen_inst_3src(&cg, dw); |
||
731 | break; |
||
732 | default: |
||
733 | codegen_inst(&cg, dw); |
||
734 | break; |
||
735 | } |
||
736 | |||
737 | pc++; |
||
738 | } |
||
739 | |||
740 | /* never return an invalid kernel */ |
||
741 | if (tc->fail) { |
||
742 | FREE(code); |
||
743 | return NULL; |
||
744 | } |
||
745 | |||
746 | if (size) |
||
747 | *size = pc * 4 * sizeof(uint32_t); |
||
748 | |||
749 | return code; |
||
750 | }><>>><>=>=>>=>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><> |