Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
2 | |||
3 | /* |
||
4 | * Copyright (C) 2013 Rob Clark |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice (including the next |
||
14 | * paragraph) shall be included in all copies or substantial portions of the |
||
15 | * Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
23 | * SOFTWARE. |
||
24 | * |
||
25 | * Authors: |
||
26 | * Rob Clark |
||
27 | */ |
||
28 | |||
29 | #include |
||
30 | |||
31 | #include "pipe/p_state.h" |
||
32 | #include "util/u_string.h" |
||
33 | #include "util/u_memory.h" |
||
34 | #include "util/u_inlines.h" |
||
35 | #include "tgsi/tgsi_parse.h" |
||
36 | #include "tgsi/tgsi_ureg.h" |
||
37 | #include "tgsi/tgsi_info.h" |
||
38 | #include "tgsi/tgsi_strings.h" |
||
39 | #include "tgsi/tgsi_dump.h" |
||
40 | #include "tgsi/tgsi_scan.h" |
||
41 | |||
42 | #include "fd3_compiler.h" |
||
43 | #include "fd3_program.h" |
||
44 | #include "fd3_util.h" |
||
45 | |||
46 | #include "instr-a3xx.h" |
||
47 | #include "ir-a3xx.h" |
||
48 | |||
49 | /* ************************************************************************* */ |
||
50 | /* split the out or find some helper to use.. like main/bitset.h.. */ |
||
51 | |||
52 | #define MAX_REG 256 |
||
53 | |||
54 | typedef uint8_t regmask_t[2 * MAX_REG / 8]; |
||
55 | |||
56 | static unsigned regmask_idx(struct ir3_register *reg) |
||
57 | { |
||
58 | unsigned num = reg->num; |
||
59 | assert(num < MAX_REG); |
||
60 | if (reg->flags & IR3_REG_HALF) |
||
61 | num += MAX_REG; |
||
62 | return num; |
||
63 | } |
||
64 | |||
4401 | Serge | 65 | static void regmask_set(regmask_t regmask, struct ir3_register *reg, |
66 | unsigned wrmask) |
||
4358 | Serge | 67 | { |
4401 | Serge | 68 | unsigned i; |
69 | for (i = 0; i < 4; i++) { |
||
70 | if (wrmask & (1 << i)) { |
||
71 | unsigned idx = regmask_idx(reg) + i; |
||
72 | regmask[idx / 8] |= 1 << (idx % 8); |
||
73 | } |
||
74 | } |
||
4358 | Serge | 75 | } |
76 | |||
77 | static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg) |
||
78 | { |
||
79 | unsigned idx = regmask_idx(reg); |
||
80 | return regmask[idx / 8] & (1 << (idx % 8)); |
||
81 | } |
||
82 | |||
83 | /* ************************************************************************* */ |
||
84 | |||
85 | struct fd3_compile_context { |
||
86 | const struct tgsi_token *tokens; |
||
87 | struct ir3_shader *ir; |
||
88 | struct fd3_shader_stateobj *so; |
||
89 | |||
90 | struct tgsi_parse_context parser; |
||
91 | unsigned type; |
||
92 | |||
93 | struct tgsi_shader_info info; |
||
94 | |||
95 | /* last input dst (for setting (ei) flag): */ |
||
96 | struct ir3_register *last_input; |
||
97 | |||
98 | unsigned next_inloc; |
||
99 | unsigned num_internal_temps; |
||
4401 | Serge | 100 | struct tgsi_src_register internal_temps[6]; |
4358 | Serge | 101 | |
102 | /* track registers which need to synchronize w/ "complex alu" cat3 |
||
103 | * instruction pipeline: |
||
104 | */ |
||
105 | regmask_t needs_ss; |
||
106 | |||
107 | /* track registers which need to synchronize with texture fetch |
||
108 | * pipeline: |
||
109 | */ |
||
110 | regmask_t needs_sy; |
||
111 | |||
112 | /* inputs start at r0, temporaries start after last input, and |
||
113 | * outputs start after last temporary. |
||
114 | * |
||
115 | * We could be more clever, because this is not a hw restriction, |
||
116 | * but probably best just to implement an optimizing pass to |
||
117 | * reduce the # of registers used and get rid of redundant mov's |
||
118 | * (to output register). |
||
119 | */ |
||
120 | unsigned base_reg[TGSI_FILE_COUNT]; |
||
121 | |||
122 | /* idx/slot for last compiler generated immediate */ |
||
123 | unsigned immediate_idx; |
||
124 | |||
125 | /* stack of branch instructions that start (potentially nested) |
||
126 | * branch instructions, so that we can fix up the branch targets |
||
127 | * so that we can fix up the branch target on the corresponding |
||
128 | * END instruction |
||
129 | */ |
||
130 | struct ir3_instruction *branch[16]; |
||
131 | unsigned int branch_count; |
||
132 | |||
133 | /* used when dst is same as one of the src, to avoid overwriting a |
||
134 | * src element before the remaining scalar instructions that make |
||
135 | * up the vector operation |
||
136 | */ |
||
137 | struct tgsi_dst_register tmp_dst; |
||
4401 | Serge | 138 | struct tgsi_src_register *tmp_src; |
4358 | Serge | 139 | }; |
140 | |||
4401 | Serge | 141 | |
142 | static void vectorize(struct fd3_compile_context *ctx, |
||
143 | struct ir3_instruction *instr, struct tgsi_dst_register *dst, |
||
144 | int nsrcs, ...); |
||
145 | static void create_mov(struct fd3_compile_context *ctx, |
||
146 | struct tgsi_dst_register *dst, struct tgsi_src_register *src); |
||
147 | |||
4358 | Serge | 148 | static unsigned |
149 | compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, |
||
150 | const struct tgsi_token *tokens) |
||
151 | { |
||
152 | unsigned ret; |
||
153 | |||
154 | ctx->tokens = tokens; |
||
155 | ctx->ir = so->ir; |
||
156 | ctx->so = so; |
||
157 | ctx->last_input = NULL; |
||
158 | ctx->next_inloc = 8; |
||
159 | ctx->num_internal_temps = 0; |
||
160 | ctx->branch_count = 0; |
||
161 | |||
162 | memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); |
||
163 | memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); |
||
164 | memset(ctx->base_reg, 0, sizeof(ctx->base_reg)); |
||
165 | |||
166 | tgsi_scan_shader(tokens, &ctx->info); |
||
167 | |||
168 | /* Immediates go after constants: */ |
||
169 | ctx->base_reg[TGSI_FILE_CONSTANT] = 0; |
||
170 | ctx->base_reg[TGSI_FILE_IMMEDIATE] = |
||
4401 | Serge | 171 | ctx->info.file_max[TGSI_FILE_CONSTANT] + 1; |
4358 | Serge | 172 | |
173 | /* Temporaries after outputs after inputs: */ |
||
174 | ctx->base_reg[TGSI_FILE_INPUT] = 0; |
||
175 | ctx->base_reg[TGSI_FILE_OUTPUT] = |
||
4401 | Serge | 176 | ctx->info.file_max[TGSI_FILE_INPUT] + 1; |
4358 | Serge | 177 | ctx->base_reg[TGSI_FILE_TEMPORARY] = |
4401 | Serge | 178 | ctx->info.file_max[TGSI_FILE_INPUT] + 1 + |
179 | ctx->info.file_max[TGSI_FILE_OUTPUT] + 1; |
||
4358 | Serge | 180 | |
181 | so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; |
||
4401 | Serge | 182 | ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 + |
183 | ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); |
||
4358 | Serge | 184 | |
185 | ret = tgsi_parse_init(&ctx->parser, tokens); |
||
186 | if (ret != TGSI_PARSE_OK) |
||
187 | return ret; |
||
188 | |||
189 | ctx->type = ctx->parser.FullHeader.Processor.Processor; |
||
190 | |||
191 | return ret; |
||
192 | } |
||
193 | |||
194 | static void |
||
4401 | Serge | 195 | compile_error(struct fd3_compile_context *ctx, const char *format, ...) |
196 | { |
||
197 | va_list ap; |
||
198 | va_start(ap, format); |
||
199 | _debug_vprintf(format, ap); |
||
200 | va_end(ap); |
||
201 | tgsi_dump(ctx->tokens, 0); |
||
202 | assert(0); |
||
203 | } |
||
204 | |||
205 | #define compile_assert(ctx, cond) do { \ |
||
206 | if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ |
||
207 | } while (0) |
||
208 | |||
209 | static void |
||
4358 | Serge | 210 | compile_free(struct fd3_compile_context *ctx) |
211 | { |
||
212 | tgsi_parse_free(&ctx->parser); |
||
213 | } |
||
214 | |||
215 | struct instr_translater { |
||
216 | void (*fxn)(const struct instr_translater *t, |
||
217 | struct fd3_compile_context *ctx, |
||
218 | struct tgsi_full_instruction *inst); |
||
219 | unsigned tgsi_opc; |
||
220 | opc_t opc; |
||
221 | opc_t hopc; /* opc to use for half_precision mode, if different */ |
||
222 | unsigned arg; |
||
223 | }; |
||
224 | |||
4401 | Serge | 225 | static unsigned |
226 | src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) |
||
227 | { |
||
228 | unsigned flags = 0; |
||
229 | |||
230 | if (regmask_get(ctx->needs_ss, reg)) { |
||
231 | flags |= IR3_INSTR_SS; |
||
232 | memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); |
||
233 | } |
||
234 | |||
235 | if (regmask_get(ctx->needs_sy, reg)) { |
||
236 | flags |= IR3_INSTR_SY; |
||
237 | memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); |
||
238 | } |
||
239 | |||
240 | return flags; |
||
241 | } |
||
242 | |||
4358 | Serge | 243 | static struct ir3_register * |
244 | add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, |
||
245 | const struct tgsi_dst_register *dst, unsigned chan) |
||
246 | { |
||
247 | unsigned flags = 0, num = 0; |
||
248 | |||
249 | switch (dst->File) { |
||
250 | case TGSI_FILE_OUTPUT: |
||
251 | case TGSI_FILE_TEMPORARY: |
||
252 | num = dst->Index + ctx->base_reg[dst->File]; |
||
253 | break; |
||
254 | default: |
||
4401 | Serge | 255 | compile_error(ctx, "unsupported dst register file: %s\n", |
4358 | Serge | 256 | tgsi_file_name(dst->File)); |
257 | break; |
||
258 | } |
||
259 | |||
260 | if (ctx->so->half_precision) |
||
261 | flags |= IR3_REG_HALF; |
||
262 | |||
263 | return ir3_reg_create(instr, regid(num, chan), flags); |
||
264 | } |
||
265 | |||
266 | static struct ir3_register * |
||
267 | add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, |
||
268 | const struct tgsi_src_register *src, unsigned chan) |
||
269 | { |
||
270 | unsigned flags = 0, num = 0; |
||
271 | struct ir3_register *reg; |
||
272 | |||
273 | switch (src->File) { |
||
274 | case TGSI_FILE_IMMEDIATE: |
||
275 | /* TODO if possible, use actual immediate instead of const.. but |
||
276 | * TGSI has vec4 immediates, we can only embed scalar (of limited |
||
277 | * size, depending on instruction..) |
||
278 | */ |
||
279 | case TGSI_FILE_CONSTANT: |
||
280 | flags |= IR3_REG_CONST; |
||
281 | num = src->Index + ctx->base_reg[src->File]; |
||
282 | break; |
||
4401 | Serge | 283 | case TGSI_FILE_OUTPUT: |
284 | /* NOTE: we should only end up w/ OUTPUT file for things like |
||
285 | * clamp()'ing saturated dst instructions |
||
286 | */ |
||
4358 | Serge | 287 | case TGSI_FILE_INPUT: |
288 | case TGSI_FILE_TEMPORARY: |
||
289 | num = src->Index + ctx->base_reg[src->File]; |
||
290 | break; |
||
291 | default: |
||
4401 | Serge | 292 | compile_error(ctx, "unsupported src register file: %s\n", |
4358 | Serge | 293 | tgsi_file_name(src->File)); |
294 | break; |
||
295 | } |
||
296 | |||
297 | if (src->Absolute) |
||
298 | flags |= IR3_REG_ABS; |
||
299 | if (src->Negate) |
||
300 | flags |= IR3_REG_NEGATE; |
||
301 | if (ctx->so->half_precision) |
||
302 | flags |= IR3_REG_HALF; |
||
303 | |||
304 | reg = ir3_reg_create(instr, regid(num, chan), flags); |
||
305 | |||
4401 | Serge | 306 | instr->flags |= src_flags(ctx, reg); |
4358 | Serge | 307 | |
308 | return reg; |
||
309 | } |
||
310 | |||
311 | static void |
||
312 | src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) |
||
313 | { |
||
314 | src->File = dst->File; |
||
315 | src->Indirect = dst->Indirect; |
||
316 | src->Dimension = dst->Dimension; |
||
317 | src->Index = dst->Index; |
||
318 | src->Absolute = 0; |
||
319 | src->Negate = 0; |
||
320 | src->SwizzleX = TGSI_SWIZZLE_X; |
||
321 | src->SwizzleY = TGSI_SWIZZLE_Y; |
||
322 | src->SwizzleZ = TGSI_SWIZZLE_Z; |
||
323 | src->SwizzleW = TGSI_SWIZZLE_W; |
||
324 | } |
||
325 | |||
326 | /* Get internal-temp src/dst to use for a sequence of instructions |
||
327 | * generated by a single TGSI op. |
||
328 | */ |
||
4401 | Serge | 329 | static struct tgsi_src_register * |
4358 | Serge | 330 | get_internal_temp(struct fd3_compile_context *ctx, |
4401 | Serge | 331 | struct tgsi_dst_register *tmp_dst) |
4358 | Serge | 332 | { |
4401 | Serge | 333 | struct tgsi_src_register *tmp_src; |
4358 | Serge | 334 | int n; |
335 | |||
336 | tmp_dst->File = TGSI_FILE_TEMPORARY; |
||
337 | tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; |
||
338 | tmp_dst->Indirect = 0; |
||
339 | tmp_dst->Dimension = 0; |
||
340 | |||
341 | /* assign next temporary: */ |
||
342 | n = ctx->num_internal_temps++; |
||
4401 | Serge | 343 | compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); |
344 | tmp_src = &ctx->internal_temps[n]; |
||
4358 | Serge | 345 | |
4401 | Serge | 346 | tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; |
4358 | Serge | 347 | |
348 | src_from_dst(tmp_src, tmp_dst); |
||
4401 | Serge | 349 | |
350 | return tmp_src; |
||
4358 | Serge | 351 | } |
352 | |||
353 | /* same as get_internal_temp, but w/ src.xxxx (for instructions that |
||
354 | * replicate their results) |
||
355 | */ |
||
4401 | Serge | 356 | static struct tgsi_src_register * |
4358 | Serge | 357 | get_internal_temp_repl(struct fd3_compile_context *ctx, |
4401 | Serge | 358 | struct tgsi_dst_register *tmp_dst) |
4358 | Serge | 359 | { |
4401 | Serge | 360 | struct tgsi_src_register *tmp_src = |
361 | get_internal_temp(ctx, tmp_dst); |
||
4358 | Serge | 362 | tmp_src->SwizzleX = tmp_src->SwizzleY = |
363 | tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X; |
||
4401 | Serge | 364 | return tmp_src; |
4358 | Serge | 365 | } |
366 | |||
4401 | Serge | 367 | static inline bool |
368 | is_const(struct tgsi_src_register *src) |
||
369 | { |
||
370 | return (src->File == TGSI_FILE_CONSTANT) || |
||
371 | (src->File == TGSI_FILE_IMMEDIATE); |
||
372 | } |
||
373 | |||
374 | static type_t |
||
375 | get_ftype(struct fd3_compile_context *ctx) |
||
376 | { |
||
377 | return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; |
||
378 | } |
||
379 | |||
380 | static type_t |
||
381 | get_utype(struct fd3_compile_context *ctx) |
||
382 | { |
||
383 | return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; |
||
384 | } |
||
385 | |||
386 | static unsigned |
||
387 | src_swiz(struct tgsi_src_register *src, int chan) |
||
388 | { |
||
389 | switch (chan) { |
||
390 | case 0: return src->SwizzleX; |
||
391 | case 1: return src->SwizzleY; |
||
392 | case 2: return src->SwizzleZ; |
||
393 | case 3: return src->SwizzleW; |
||
394 | } |
||
395 | assert(0); |
||
396 | return 0; |
||
397 | } |
||
398 | |||
399 | /* for instructions that cannot take a const register as src, if needed |
||
400 | * generate a move to temporary gpr: |
||
401 | */ |
||
402 | static struct tgsi_src_register * |
||
403 | get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) |
||
404 | { |
||
405 | struct tgsi_dst_register tmp_dst; |
||
406 | struct tgsi_src_register *tmp_src; |
||
407 | |||
408 | compile_assert(ctx, is_const(src)); |
||
409 | |||
410 | tmp_src = get_internal_temp(ctx, &tmp_dst); |
||
411 | |||
412 | create_mov(ctx, &tmp_dst, src); |
||
413 | |||
414 | return tmp_src; |
||
415 | } |
||
416 | |||
4358 | Serge | 417 | static void |
418 | get_immediate(struct fd3_compile_context *ctx, |
||
419 | struct tgsi_src_register *reg, uint32_t val) |
||
420 | { |
||
421 | unsigned neg, swiz, idx, i; |
||
422 | /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ |
||
423 | static const unsigned swiz2tgsi[] = { |
||
424 | TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, |
||
425 | }; |
||
426 | |||
427 | for (i = 0; i < ctx->immediate_idx; i++) { |
||
428 | swiz = i % 4; |
||
429 | idx = i / 4; |
||
430 | |||
431 | if (ctx->so->immediates[idx].val[swiz] == val) { |
||
432 | neg = 0; |
||
433 | break; |
||
434 | } |
||
435 | |||
436 | if (ctx->so->immediates[idx].val[swiz] == -val) { |
||
437 | neg = 1; |
||
438 | break; |
||
439 | } |
||
440 | } |
||
441 | |||
442 | if (i == ctx->immediate_idx) { |
||
443 | /* need to generate a new immediate: */ |
||
444 | swiz = i % 4; |
||
445 | idx = i / 4; |
||
446 | neg = 0; |
||
447 | ctx->so->immediates[idx].val[swiz] = val; |
||
448 | ctx->so->immediates_count = idx + 1; |
||
449 | ctx->immediate_idx++; |
||
450 | } |
||
451 | |||
452 | reg->File = TGSI_FILE_IMMEDIATE; |
||
453 | reg->Indirect = 0; |
||
454 | reg->Dimension = 0; |
||
455 | reg->Index = idx; |
||
456 | reg->Absolute = 0; |
||
457 | reg->Negate = neg; |
||
458 | reg->SwizzleX = swiz2tgsi[swiz]; |
||
459 | reg->SwizzleY = swiz2tgsi[swiz]; |
||
460 | reg->SwizzleZ = swiz2tgsi[swiz]; |
||
461 | reg->SwizzleW = swiz2tgsi[swiz]; |
||
462 | } |
||
463 | |||
464 | static void |
||
465 | create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, |
||
466 | struct tgsi_src_register *src) |
||
467 | { |
||
4401 | Serge | 468 | type_t type_mov = get_ftype(ctx); |
4358 | Serge | 469 | unsigned i; |
470 | |||
471 | for (i = 0; i < 4; i++) { |
||
472 | /* move to destination: */ |
||
473 | if (dst->WriteMask & (1 << i)) { |
||
474 | struct ir3_instruction *instr = |
||
475 | ir3_instr_create(ctx->ir, 1, 0); |
||
476 | instr->cat1.src_type = type_mov; |
||
477 | instr->cat1.dst_type = type_mov; |
||
478 | add_dst_reg(ctx, instr, dst, i); |
||
479 | add_src_reg(ctx, instr, src, src_swiz(src, i)); |
||
480 | } else { |
||
481 | ir3_instr_create(ctx->ir, 0, OPC_NOP); |
||
482 | } |
||
483 | } |
||
4401 | Serge | 484 | } |
4358 | Serge | 485 | |
4401 | Serge | 486 | static void |
487 | create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, |
||
488 | struct tgsi_src_register *minval, struct tgsi_src_register *maxval) |
||
489 | { |
||
490 | struct ir3_instruction *instr; |
||
491 | struct tgsi_src_register src; |
||
492 | |||
493 | src_from_dst(&src, dst); |
||
494 | |||
495 | instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); |
||
496 | vectorize(ctx, instr, dst, 2, &src, 0, minval, 0); |
||
497 | |||
498 | instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); |
||
499 | vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0); |
||
4358 | Serge | 500 | } |
501 | |||
4401 | Serge | 502 | static void |
503 | create_clamp_imm(struct fd3_compile_context *ctx, |
||
504 | struct tgsi_dst_register *dst, |
||
505 | uint32_t minval, uint32_t maxval) |
||
506 | { |
||
507 | struct tgsi_src_register minconst, maxconst; |
||
508 | |||
509 | get_immediate(ctx, &minconst, minval); |
||
510 | get_immediate(ctx, &maxconst, maxval); |
||
511 | |||
512 | create_clamp(ctx, dst, &minconst, &maxconst); |
||
513 | } |
||
514 | |||
4358 | Serge | 515 | static struct tgsi_dst_register * |
516 | get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) |
||
517 | { |
||
518 | struct tgsi_dst_register *dst = &inst->Dst[0].Register; |
||
519 | unsigned i; |
||
520 | for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { |
||
521 | struct tgsi_src_register *src = &inst->Src[i].Register; |
||
522 | if ((src->File == dst->File) && (src->Index == dst->Index)) { |
||
4401 | Serge | 523 | ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); |
4358 | Serge | 524 | ctx->tmp_dst.WriteMask = dst->WriteMask; |
525 | dst = &ctx->tmp_dst; |
||
526 | break; |
||
527 | } |
||
528 | } |
||
529 | return dst; |
||
530 | } |
||
531 | |||
532 | static void |
||
533 | put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, |
||
534 | struct tgsi_dst_register *dst) |
||
535 | { |
||
536 | /* if necessary, add mov back into original dst: */ |
||
537 | if (dst != &inst->Dst[0].Register) { |
||
4401 | Serge | 538 | create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); |
4358 | Serge | 539 | } |
540 | } |
||
541 | |||
542 | /* helper to generate the necessary repeat and/or additional instructions |
||
543 | * to turn a scalar instruction into a vector operation: |
||
544 | */ |
||
545 | static void |
||
546 | vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, |
||
547 | struct tgsi_dst_register *dst, int nsrcs, ...) |
||
548 | { |
||
549 | va_list ap; |
||
550 | int i, j, n = 0; |
||
551 | |||
552 | add_dst_reg(ctx, instr, dst, 0); |
||
553 | |||
554 | va_start(ap, nsrcs); |
||
555 | for (j = 0; j < nsrcs; j++) { |
||
556 | struct tgsi_src_register *src = |
||
557 | va_arg(ap, struct tgsi_src_register *); |
||
558 | unsigned flags = va_arg(ap, unsigned); |
||
559 | add_src_reg(ctx, instr, src, 0)->flags |= flags; |
||
560 | } |
||
561 | va_end(ap); |
||
562 | |||
563 | for (i = 0; i < 4; i++) { |
||
564 | if (dst->WriteMask & (1 << i)) { |
||
565 | struct ir3_instruction *cur; |
||
566 | |||
567 | if (n++ == 0) { |
||
568 | cur = instr; |
||
569 | } else { |
||
570 | cur = ir3_instr_clone(instr); |
||
571 | cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP); |
||
572 | } |
||
573 | |||
574 | /* fix-up dst register component: */ |
||
575 | cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i); |
||
576 | |||
577 | /* fix-up src register component: */ |
||
578 | va_start(ap, nsrcs); |
||
579 | for (j = 0; j < nsrcs; j++) { |
||
580 | struct tgsi_src_register *src = |
||
581 | va_arg(ap, struct tgsi_src_register *); |
||
582 | (void)va_arg(ap, unsigned); |
||
583 | cur->regs[j+1]->num = |
||
584 | regid(cur->regs[j+1]->num >> 2, |
||
585 | src_swiz(src, i)); |
||
4401 | Serge | 586 | cur->flags |= src_flags(ctx, cur->regs[j+1]); |
4358 | Serge | 587 | } |
588 | va_end(ap); |
||
589 | } |
||
590 | } |
||
591 | |||
592 | /* pad w/ nop's.. at least until we are clever enough to |
||
593 | * figure out if we really need to.. |
||
594 | */ |
||
595 | for (; n < 4; n++) { |
||
596 | ir3_instr_create(instr->shader, 0, OPC_NOP); |
||
597 | } |
||
598 | } |
||
599 | |||
600 | /* |
||
601 | * Handlers for TGSI instructions which do not have a 1:1 mapping to |
||
602 | * native instructions: |
||
603 | */ |
||
604 | |||
4401 | Serge | 605 | static inline void |
606 | get_swiz(unsigned *swiz, struct tgsi_src_register *src) |
||
607 | { |
||
608 | swiz[0] = src->SwizzleX; |
||
609 | swiz[1] = src->SwizzleY; |
||
610 | swiz[2] = src->SwizzleZ; |
||
611 | swiz[3] = src->SwizzleW; |
||
612 | } |
||
613 | |||
4358 | Serge | 614 | static void |
615 | trans_dotp(const struct instr_translater *t, |
||
616 | struct fd3_compile_context *ctx, |
||
617 | struct tgsi_full_instruction *inst) |
||
618 | { |
||
619 | struct ir3_instruction *instr; |
||
620 | struct tgsi_dst_register tmp_dst; |
||
4401 | Serge | 621 | struct tgsi_src_register *tmp_src; |
4358 | Serge | 622 | struct tgsi_dst_register *dst = &inst->Dst[0].Register; |
623 | struct tgsi_src_register *src0 = &inst->Src[0].Register; |
||
624 | struct tgsi_src_register *src1 = &inst->Src[1].Register; |
||
4401 | Serge | 625 | unsigned swiz0[4]; |
626 | unsigned swiz1[4]; |
||
4358 | Serge | 627 | opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; |
628 | unsigned n = t->arg; /* number of components */ |
||
4401 | Serge | 629 | unsigned i, swapped = 0; |
4358 | Serge | 630 | |
4401 | Serge | 631 | tmp_src = get_internal_temp_repl(ctx, &tmp_dst); |
4358 | Serge | 632 | |
4401 | Serge | 633 | /* in particular, can't handle const for src1 for cat3/mad: |
4358 | Serge | 634 | */ |
4401 | Serge | 635 | if (is_const(src1)) { |
636 | if (!is_const(src0)) { |
||
637 | struct tgsi_src_register *tmp; |
||
638 | tmp = src0; |
||
639 | src0 = src1; |
||
640 | src1 = tmp; |
||
641 | swapped = 1; |
||
642 | } else { |
||
643 | src0 = get_unconst(ctx, src0); |
||
644 | } |
||
645 | } |
||
4358 | Serge | 646 | |
4401 | Serge | 647 | get_swiz(swiz0, src0); |
648 | get_swiz(swiz1, src1); |
||
4358 | Serge | 649 | |
650 | instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); |
||
651 | add_dst_reg(ctx, instr, &tmp_dst, 0); |
||
652 | add_src_reg(ctx, instr, src0, swiz0[0]); |
||
653 | add_src_reg(ctx, instr, src1, swiz1[0]); |
||
654 | |||
655 | for (i = 1; i < n; i++) { |
||
656 | ir3_instr_create(ctx->ir, 0, OPC_NOP); |
||
657 | |||
658 | instr = ir3_instr_create(ctx->ir, 3, opc_mad); |
||
659 | add_dst_reg(ctx, instr, &tmp_dst, 0); |
||
660 | add_src_reg(ctx, instr, src0, swiz0[i]); |
||
661 | add_src_reg(ctx, instr, src1, swiz1[i]); |
||
4401 | Serge | 662 | add_src_reg(ctx, instr, tmp_src, 0); |
4358 | Serge | 663 | } |
664 | |||
665 | /* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */ |
||
666 | if (t->tgsi_opc == TGSI_OPCODE_DPH) { |
||
4401 | Serge | 667 | ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1; |
4358 | Serge | 668 | |
669 | instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); |
||
670 | add_dst_reg(ctx, instr, &tmp_dst, 0); |
||
4401 | Serge | 671 | if (swapped) |
672 | add_src_reg(ctx, instr, src0, swiz0[i]); |
||
673 | else |
||
674 | add_src_reg(ctx, instr, src1, swiz1[i]); |
||
675 | add_src_reg(ctx, instr, tmp_src, 0); |
||
4358 | Serge | 676 | |
677 | n++; |
||
678 | } |
||
679 | |||
4401 | Serge | 680 | ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2; |
4358 | Serge | 681 | |
4401 | Serge | 682 | create_mov(ctx, dst, tmp_src); |
4358 | Serge | 683 | } |
684 | |||
685 | /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ |
||
686 | static void |
||
687 | trans_lrp(const struct instr_translater *t, |
||
688 | struct fd3_compile_context *ctx, |
||
689 | struct tgsi_full_instruction *inst) |
||
690 | { |
||
691 | struct ir3_instruction *instr; |
||
692 | struct tgsi_dst_register tmp_dst1, tmp_dst2; |
||
4401 | Serge | 693 | struct tgsi_src_register *tmp_src1, *tmp_src2; |
4358 | Serge | 694 | struct tgsi_src_register tmp_const; |
4401 | Serge | 695 | struct tgsi_src_register *src0 = &inst->Src[0].Register; |
696 | struct tgsi_src_register *src1 = &inst->Src[1].Register; |
||
4358 | Serge | 697 | |
4401 | Serge | 698 | if (is_const(src0) && is_const(src1)) |
699 | src0 = get_unconst(ctx, src0); |
||
4358 | Serge | 700 | |
4401 | Serge | 701 | tmp_src1 = get_internal_temp(ctx, &tmp_dst1); |
702 | tmp_src2 = get_internal_temp(ctx, &tmp_dst2); |
||
703 | |||
4358 | Serge | 704 | get_immediate(ctx, &tmp_const, fui(1.0)); |
705 | |||
706 | /* tmp1 = (a * b) */ |
||
707 | instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); |
||
4401 | Serge | 708 | vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0); |
4358 | Serge | 709 | |
710 | /* tmp2 = (1 - a) */ |
||
711 | instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); |
||
4401 | Serge | 712 | vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0, |
713 | src0, IR3_REG_NEGATE); |
||
4358 | Serge | 714 | |
715 | /* tmp2 = tmp2 * c */ |
||
716 | instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); |
||
717 | vectorize(ctx, instr, &tmp_dst2, 2, |
||
4401 | Serge | 718 | tmp_src2, 0, |
4358 | Serge | 719 | &inst->Src[2].Register, 0); |
720 | |||
721 | /* dst = tmp1 + tmp2 */ |
||
722 | instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); |
||
723 | vectorize(ctx, instr, &inst->Dst[0].Register, 2, |
||
4401 | Serge | 724 | tmp_src1, 0, |
725 | tmp_src2, 0); |
||
4358 | Serge | 726 | } |
727 | |||
728 | /* FRC(x) = x - FLOOR(x) */ |
||
729 | static void |
||
730 | trans_frac(const struct instr_translater *t, |
||
731 | struct fd3_compile_context *ctx, |
||
732 | struct tgsi_full_instruction *inst) |
||
733 | { |
||
734 | struct ir3_instruction *instr; |
||
735 | struct tgsi_dst_register tmp_dst; |
||
4401 | Serge | 736 | struct tgsi_src_register *tmp_src; |
4358 | Serge | 737 | |
4401 | Serge | 738 | tmp_src = get_internal_temp(ctx, &tmp_dst); |
4358 | Serge | 739 | |
740 | /* tmp = FLOOR(x) */ |
||
741 | instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); |
||
742 | vectorize(ctx, instr, &tmp_dst, 1, |
||
743 | &inst->Src[0].Register, 0); |
||
744 | |||
745 | /* dst = x - tmp */ |
||
746 | instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); |
||
747 | vectorize(ctx, instr, &inst->Dst[0].Register, 2, |
||
748 | &inst->Src[0].Register, 0, |
||
4401 | Serge | 749 | tmp_src, IR3_REG_NEGATE); |
4358 | Serge | 750 | } |
751 | |||
752 | /* POW(a,b) = EXP2(b * LOG2(a)) */ |
||
753 | static void |
||
754 | trans_pow(const struct instr_translater *t, |
||
755 | struct fd3_compile_context *ctx, |
||
756 | struct tgsi_full_instruction *inst) |
||
757 | { |
||
758 | struct ir3_instruction *instr; |
||
759 | struct ir3_register *r; |
||
760 | struct tgsi_dst_register tmp_dst; |
||
4401 | Serge | 761 | struct tgsi_src_register *tmp_src; |
4358 | Serge | 762 | struct tgsi_dst_register *dst = &inst->Dst[0].Register; |
763 | struct tgsi_src_register *src0 = &inst->Src[0].Register; |
||
764 | struct tgsi_src_register *src1 = &inst->Src[1].Register; |
||
765 | |||
4401 | Serge | 766 | tmp_src = get_internal_temp_repl(ctx, &tmp_dst); |
4358 | Serge | 767 | |
768 | /* log2 Rtmp, Rsrc0 */ |
||
769 | ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; |
||
770 | instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); |
||
771 | r = add_dst_reg(ctx, instr, &tmp_dst, 0); |
||
772 | add_src_reg(ctx, instr, src0, src0->SwizzleX); |
||
4401 | Serge | 773 | regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); |
4358 | Serge | 774 | |
775 | /* mul.f Rtmp, Rtmp, Rsrc1 */ |
||
776 | instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); |
||
777 | add_dst_reg(ctx, instr, &tmp_dst, 0); |
||
4401 | Serge | 778 | add_src_reg(ctx, instr, tmp_src, 0); |
4358 | Serge | 779 | add_src_reg(ctx, instr, src1, src1->SwizzleX); |
780 | |||
781 | /* blob compiler seems to ensure there are at least 6 instructions |
||
782 | * between a "simple" (non-cat4) instruction and a dependent cat4.. |
||
783 | * probably we need to handle this in some other places too. |
||
784 | */ |
||
785 | ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; |
||
786 | |||
787 | /* exp2 Rdst, Rtmp */ |
||
788 | instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); |
||
789 | r = add_dst_reg(ctx, instr, &tmp_dst, 0); |
||
4401 | Serge | 790 | add_src_reg(ctx, instr, tmp_src, 0); |
791 | regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X); |
||
4358 | Serge | 792 | |
4401 | Serge | 793 | create_mov(ctx, dst, tmp_src); |
4358 | Serge | 794 | } |
795 | |||
796 | /* texture fetch/sample instructions: */ |
||
797 | static void |
||
798 | trans_samp(const struct instr_translater *t, |
||
799 | struct fd3_compile_context *ctx, |
||
800 | struct tgsi_full_instruction *inst) |
||
801 | { |
||
802 | struct ir3_register *r; |
||
803 | struct ir3_instruction *instr; |
||
804 | struct tgsi_src_register *coord = &inst->Src[0].Register; |
||
805 | struct tgsi_src_register *samp = &inst->Src[1].Register; |
||
806 | unsigned tex = inst->Texture.Texture; |
||
807 | int8_t *order; |
||
808 | unsigned i, j, flags = 0; |
||
809 | |||
810 | switch (t->arg) { |
||
811 | case TGSI_OPCODE_TEX: |
||
812 | order = (tex == TGSI_TEXTURE_2D) ? |
||
813 | (int8_t[4]){ 0, 1, -1, -1 } : /* 2D */ |
||
814 | (int8_t[4]){ 0, 1, 2, -1 }; /* 3D */ |
||
815 | break; |
||
816 | case TGSI_OPCODE_TXP: |
||
817 | order = (tex == TGSI_TEXTURE_2D) ? |
||
818 | (int8_t[4]){ 0, 1, 3, -1 } : /* 2D */ |
||
819 | (int8_t[4]){ 0, 1, 2, 3 }; /* 3D */ |
||
820 | flags |= IR3_INSTR_P; |
||
821 | break; |
||
822 | default: |
||
4401 | Serge | 823 | compile_assert(ctx, 0); |
4358 | Serge | 824 | break; |
825 | } |
||
826 | |||
827 | if (tex == TGSI_TEXTURE_3D) |
||
828 | flags |= IR3_INSTR_3D; |
||
829 | |||
830 | /* The texture sample instructions need to coord in successive |
||
831 | * registers/components (ie. src.xy but not src.yx). And TXP |
||
832 | * needs the .w component in .z for 2D.. so in some cases we |
||
833 | * might need to emit some mov instructions to shuffle things |
||
834 | * around: |
||
835 | */ |
||
836 | for (i = 1; (i < 4) && (order[i] >= 0); i++) { |
||
837 | if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) { |
||
4401 | Serge | 838 | struct tgsi_dst_register tmp_dst; |
839 | struct tgsi_src_register *tmp_src; |
||
4358 | Serge | 840 | |
4401 | Serge | 841 | type_t type_mov = get_ftype(ctx); |
842 | |||
4358 | Serge | 843 | /* need to move things around: */ |
4401 | Serge | 844 | tmp_src = get_internal_temp(ctx, &tmp_dst); |
4358 | Serge | 845 | |
846 | for (j = 0; (j < 4) && (order[j] >= 0); j++) { |
||
847 | instr = ir3_instr_create(ctx->ir, 1, 0); |
||
848 | instr->cat1.src_type = type_mov; |
||
849 | instr->cat1.dst_type = type_mov; |
||
850 | add_dst_reg(ctx, instr, &tmp_dst, j); |
||
851 | add_src_reg(ctx, instr, coord, |
||
852 | src_swiz(coord, order[j])); |
||
853 | } |
||
854 | |||
4401 | Serge | 855 | coord = tmp_src; |
4358 | Serge | 856 | |
857 | if (j < 4) |
||
858 | ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; |
||
859 | |||
860 | break; |
||
861 | } |
||
862 | } |
||
863 | |||
864 | instr = ir3_instr_create(ctx->ir, 5, t->opc); |
||
4401 | Serge | 865 | instr->cat5.type = get_ftype(ctx); |
4358 | Serge | 866 | instr->cat5.samp = samp->Index; |
867 | instr->cat5.tex = samp->Index; |
||
868 | instr->flags |= flags; |
||
869 | |||
870 | r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0); |
||
871 | r->wrmask = inst->Dst[0].Register.WriteMask; |
||
872 | |||
873 | add_src_reg(ctx, instr, coord, coord->SwizzleX); |
||
874 | |||
4401 | Serge | 875 | regmask_set(ctx->needs_sy, r, r->wrmask); |
4358 | Serge | 876 | } |
877 | |||
4401 | Serge | 878 | /* |
879 | * SEQ(a,b) = (a == b) ? 1.0 : 0.0 |
||
880 | * cmps.f.eq tmp0, b, a |
||
881 | * cov.u16f16 dst, tmp0 |
||
882 | * |
||
883 | * SNE(a,b) = (a != b) ? 1.0 : 0.0 |
||
884 | * cmps.f.eq tmp0, b, a |
||
885 | * add.s tmp0, tmp0, -1 |
||
886 | * sel.f16 dst, {0.0}, tmp0, {1.0} |
||
887 | * |
||
888 | * SGE(a,b) = (a >= b) ? 1.0 : 0.0 |
||
889 | * cmps.f.ge tmp0, a, b |
||
890 | * cov.u16f16 dst, tmp0 |
||
891 | * |
||
892 | * SLE(a,b) = (a <= b) ? 1.0 : 0.0 |
||
893 | * cmps.f.ge tmp0, b, a |
||
894 | * cov.u16f16 dst, tmp0 |
||
895 | * |
||
896 | * SGT(a,b) = (a > b) ? 1.0 : 0.0 |
||
897 | * cmps.f.ge tmp0, b, a |
||
898 | * add.s tmp0, tmp0, -1 |
||
899 | * sel.f16 dst, {0.0}, tmp0, {1.0} |
||
900 | * |
||
901 | * SLT(a,b) = (a < b) ? 1.0 : 0.0 |
||
902 | * cmps.f.ge tmp0, a, b |
||
903 | * add.s tmp0, tmp0, -1 |
||
904 | * sel.f16 dst, {0.0}, tmp0, {1.0} |
||
905 | * |
||
906 | * CMP(a,b,c) = (a < 0.0) ? b : c |
||
907 | * cmps.f.ge tmp0, a, {0.0} |
||
908 | * add.s tmp0, tmp0, -1 |
||
909 | * sel.f16 dst, c, tmp0, b |
||
910 | */ |
||
4358 | Serge | 911 | static void |
912 | trans_cmp(const struct instr_translater *t, |
||
913 | struct fd3_compile_context *ctx, |
||
914 | struct tgsi_full_instruction *inst) |
||
915 | { |
||
916 | struct ir3_instruction *instr; |
||
917 | struct tgsi_dst_register tmp_dst; |
||
4401 | Serge | 918 | struct tgsi_src_register *tmp_src; |
919 | struct tgsi_src_register constval0, constval1; |
||
920 | /* final instruction for CMP() uses orig src1 and src2: */ |
||
4358 | Serge | 921 | struct tgsi_dst_register *dst = get_dst(ctx, inst); |
4401 | Serge | 922 | struct tgsi_src_register *a0, *a1; |
923 | unsigned condition; |
||
4358 | Serge | 924 | |
4401 | Serge | 925 | tmp_src = get_internal_temp(ctx, &tmp_dst); |
4358 | Serge | 926 | |
4401 | Serge | 927 | switch (t->tgsi_opc) { |
928 | case TGSI_OPCODE_SEQ: |
||
929 | case TGSI_OPCODE_SNE: |
||
930 | a0 = &inst->Src[1].Register; /* b */ |
||
931 | a1 = &inst->Src[0].Register; /* a */ |
||
932 | condition = IR3_COND_EQ; |
||
933 | break; |
||
934 | case TGSI_OPCODE_SGE: |
||
935 | case TGSI_OPCODE_SLT: |
||
936 | a0 = &inst->Src[0].Register; /* a */ |
||
937 | a1 = &inst->Src[1].Register; /* b */ |
||
938 | condition = IR3_COND_GE; |
||
939 | break; |
||
940 | case TGSI_OPCODE_SLE: |
||
941 | case TGSI_OPCODE_SGT: |
||
942 | a0 = &inst->Src[1].Register; /* b */ |
||
943 | a1 = &inst->Src[0].Register; /* a */ |
||
944 | condition = IR3_COND_GE; |
||
945 | break; |
||
946 | case TGSI_OPCODE_CMP: |
||
947 | get_immediate(ctx, &constval0, fui(0.0)); |
||
948 | a0 = &inst->Src[0].Register; /* a */ |
||
949 | a1 = &constval0; /* {0.0} */ |
||
950 | condition = IR3_COND_GE; |
||
951 | break; |
||
952 | default: |
||
953 | compile_assert(ctx, 0); |
||
954 | return; |
||
955 | } |
||
956 | |||
957 | if (is_const(a0) && is_const(a1)) |
||
958 | a0 = get_unconst(ctx, a0); |
||
959 | |||
960 | /* cmps.f.ge tmp, a0, a1 */ |
||
4358 | Serge | 961 | instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); |
4401 | Serge | 962 | instr->cat2.condition = condition; |
963 | vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); |
||
4358 | Serge | 964 | |
4401 | Serge | 965 | switch (t->tgsi_opc) { |
966 | case TGSI_OPCODE_SEQ: |
||
967 | case TGSI_OPCODE_SGE: |
||
968 | case TGSI_OPCODE_SLE: |
||
969 | /* cov.u16f16 dst, tmp0 */ |
||
970 | instr = ir3_instr_create(ctx->ir, 1, 0); |
||
971 | instr->cat1.src_type = get_utype(ctx); |
||
972 | instr->cat1.dst_type = get_ftype(ctx); |
||
973 | vectorize(ctx, instr, dst, 1, tmp_src, 0); |
||
974 | break; |
||
975 | case TGSI_OPCODE_SNE: |
||
976 | case TGSI_OPCODE_SGT: |
||
977 | case TGSI_OPCODE_SLT: |
||
978 | case TGSI_OPCODE_CMP: |
||
979 | /* add.s tmp, tmp, -1 */ |
||
980 | instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); |
||
981 | instr->repeat = 3; |
||
982 | add_dst_reg(ctx, instr, &tmp_dst, 0); |
||
983 | add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R; |
||
984 | ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; |
||
4358 | Serge | 985 | |
4401 | Serge | 986 | if (t->tgsi_opc == TGSI_OPCODE_CMP) { |
987 | /* sel.{f32,f16} dst, src2, tmp, src1 */ |
||
988 | instr = ir3_instr_create(ctx->ir, 3, |
||
989 | ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); |
||
990 | vectorize(ctx, instr, dst, 3, |
||
991 | &inst->Src[2].Register, 0, |
||
992 | tmp_src, 0, |
||
993 | &inst->Src[1].Register, 0); |
||
994 | } else { |
||
995 | get_immediate(ctx, &constval0, fui(0.0)); |
||
996 | get_immediate(ctx, &constval1, fui(1.0)); |
||
997 | /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ |
||
998 | instr = ir3_instr_create(ctx->ir, 3, |
||
999 | ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); |
||
1000 | vectorize(ctx, instr, dst, 3, |
||
1001 | &constval0, 0, tmp_src, 0, &constval1, 0); |
||
1002 | } |
||
4358 | Serge | 1003 | |
4401 | Serge | 1004 | break; |
1005 | } |
||
1006 | |||
4358 | Serge | 1007 | put_dst(ctx, inst, dst); |
1008 | } |
||
1009 | |||
1010 | /* |
||
1011 | * Conditional / Flow control |
||
1012 | */ |
||
1013 | |||
1014 | static unsigned |
||
1015 | find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr) |
||
1016 | { |
||
1017 | unsigned i; |
||
1018 | for (i = 0; i < ctx->ir->instrs_count; i++) |
||
1019 | if (ctx->ir->instrs[i] == instr) |
||
1020 | return i; |
||
1021 | return ~0; |
||
1022 | } |
||
1023 | |||
1024 | static void |
||
1025 | push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr) |
||
1026 | { |
||
1027 | ctx->branch[ctx->branch_count++] = instr; |
||
1028 | } |
||
1029 | |||
1030 | static void |
||
1031 | pop_branch(struct fd3_compile_context *ctx) |
||
1032 | { |
||
1033 | struct ir3_instruction *instr; |
||
1034 | |||
1035 | /* if we were clever enough, we'd patch this up after the fact, |
||
1036 | * and set (jp) flag on whatever the next instruction was, rather |
||
1037 | * than inserting an extra nop.. |
||
1038 | */ |
||
1039 | instr = ir3_instr_create(ctx->ir, 0, OPC_NOP); |
||
1040 | instr->flags |= IR3_INSTR_JP; |
||
1041 | |||
1042 | /* pop the branch instruction from the stack and fix up branch target: */ |
||
1043 | instr = ctx->branch[--ctx->branch_count]; |
||
1044 | instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1; |
||
1045 | } |
||
1046 | |||
1047 | /* We probably don't really want to translate if/else/endif into branches.. |
||
1048 | * the blob driver evaluates both legs of the if and then uses the sel |
||
1049 | * instruction to pick which sides of the branch to "keep".. but figuring |
||
1050 | * that out will take somewhat more compiler smarts. So hopefully branches |
||
1051 | * don't kill performance too badly. |
||
1052 | */ |
||
1053 | static void |
||
1054 | trans_if(const struct instr_translater *t, |
||
1055 | struct fd3_compile_context *ctx, |
||
1056 | struct tgsi_full_instruction *inst) |
||
1057 | { |
||
1058 | struct ir3_instruction *instr; |
||
1059 | struct tgsi_src_register *src = &inst->Src[0].Register; |
||
1060 | struct tgsi_src_register constval; |
||
1061 | |||
1062 | get_immediate(ctx, &constval, fui(0.0)); |
||
1063 | |||
4401 | Serge | 1064 | if (is_const(src)) |
1065 | src = get_unconst(ctx, src); |
||
1066 | |||
4358 | Serge | 1067 | instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); |
1068 | ir3_reg_create(instr, regid(REG_P0, 0), 0); |
||
4401 | Serge | 1069 | add_src_reg(ctx, instr, src, src->SwizzleX); |
4358 | Serge | 1070 | add_src_reg(ctx, instr, &constval, constval.SwizzleX); |
1071 | instr->cat2.condition = IR3_COND_EQ; |
||
1072 | |||
1073 | instr = ir3_instr_create(ctx->ir, 0, OPC_BR); |
||
1074 | push_branch(ctx, instr); |
||
1075 | } |
||
1076 | |||
1077 | static void |
||
1078 | trans_else(const struct instr_translater *t, |
||
1079 | struct fd3_compile_context *ctx, |
||
1080 | struct tgsi_full_instruction *inst) |
||
1081 | { |
||
1082 | struct ir3_instruction *instr; |
||
1083 | |||
1084 | /* for first half of if/else/endif, generate a jump past the else: */ |
||
1085 | instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP); |
||
1086 | |||
1087 | pop_branch(ctx); |
||
1088 | push_branch(ctx, instr); |
||
1089 | } |
||
1090 | |||
1091 | static void |
||
1092 | trans_endif(const struct instr_translater *t, |
||
1093 | struct fd3_compile_context *ctx, |
||
1094 | struct tgsi_full_instruction *inst) |
||
1095 | { |
||
1096 | pop_branch(ctx); |
||
1097 | } |
||
1098 | |||
1099 | /* |
||
1100 | * Handlers for TGSI instructions which do have 1:1 mapping to native |
||
1101 | * instructions: |
||
1102 | */ |
||
1103 | |||
1104 | static void |
||
1105 | instr_cat0(const struct instr_translater *t, |
||
1106 | struct fd3_compile_context *ctx, |
||
1107 | struct tgsi_full_instruction *inst) |
||
1108 | { |
||
1109 | ir3_instr_create(ctx->ir, 0, t->opc); |
||
1110 | } |
||
1111 | |||
1112 | static void |
||
1113 | instr_cat1(const struct instr_translater *t, |
||
1114 | struct fd3_compile_context *ctx, |
||
1115 | struct tgsi_full_instruction *inst) |
||
1116 | { |
||
1117 | struct tgsi_dst_register *dst = get_dst(ctx, inst); |
||
1118 | struct tgsi_src_register *src = &inst->Src[0].Register; |
||
1119 | |||
1120 | /* mov instructions can't handle a negate on src: */ |
||
1121 | if (src->Negate) { |
||
1122 | struct tgsi_src_register constval; |
||
1123 | struct ir3_instruction *instr; |
||
1124 | |||
1125 | /* since right now, we are using uniformly either TYPE_F16 or |
||
1126 | * TYPE_F32, and we don't utilize the conversion possibilities |
||
1127 | * of mov instructions, we can get away with substituting an |
||
1128 | * add.f which can handle negate. Might need to revisit this |
||
1129 | * in the future if we start supporting widening/narrowing or |
||
1130 | * conversion to/from integer.. |
||
1131 | */ |
||
1132 | instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); |
||
1133 | get_immediate(ctx, &constval, fui(0.0)); |
||
1134 | vectorize(ctx, instr, dst, 2, src, 0, &constval, 0); |
||
1135 | } else { |
||
1136 | create_mov(ctx, dst, src); |
||
1137 | /* create_mov() generates vector sequence, so no vectorize() */ |
||
1138 | } |
||
1139 | put_dst(ctx, inst, dst); |
||
1140 | } |
||
1141 | |||
1142 | static void |
||
1143 | instr_cat2(const struct instr_translater *t, |
||
1144 | struct fd3_compile_context *ctx, |
||
1145 | struct tgsi_full_instruction *inst) |
||
1146 | { |
||
1147 | struct tgsi_dst_register *dst = get_dst(ctx, inst); |
||
4401 | Serge | 1148 | struct tgsi_src_register *src0 = &inst->Src[0].Register; |
1149 | struct tgsi_src_register *src1 = &inst->Src[1].Register; |
||
4358 | Serge | 1150 | struct ir3_instruction *instr; |
1151 | unsigned src0_flags = 0; |
||
1152 | |||
1153 | switch (t->tgsi_opc) { |
||
1154 | case TGSI_OPCODE_ABS: |
||
1155 | src0_flags = IR3_REG_ABS; |
||
1156 | break; |
||
1157 | } |
||
1158 | |||
1159 | switch (t->opc) { |
||
1160 | case OPC_ABSNEG_F: |
||
1161 | case OPC_ABSNEG_S: |
||
1162 | case OPC_CLZ_B: |
||
1163 | case OPC_CLZ_S: |
||
1164 | case OPC_SIGN_F: |
||
1165 | case OPC_FLOOR_F: |
||
1166 | case OPC_CEIL_F: |
||
1167 | case OPC_RNDNE_F: |
||
1168 | case OPC_RNDAZ_F: |
||
1169 | case OPC_TRUNC_F: |
||
1170 | case OPC_NOT_B: |
||
1171 | case OPC_BFREV_B: |
||
1172 | case OPC_SETRM: |
||
1173 | case OPC_CBITS_B: |
||
1174 | /* these only have one src reg */ |
||
4401 | Serge | 1175 | instr = ir3_instr_create(ctx->ir, 2, t->opc); |
1176 | vectorize(ctx, instr, dst, 1, src0, src0_flags); |
||
4358 | Serge | 1177 | break; |
1178 | default: |
||
4401 | Serge | 1179 | if (is_const(src0) && is_const(src1)) |
1180 | src0 = get_unconst(ctx, src0); |
||
1181 | |||
1182 | instr = ir3_instr_create(ctx->ir, 2, t->opc); |
||
1183 | vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0); |
||
4358 | Serge | 1184 | break; |
1185 | } |
||
1186 | |||
1187 | put_dst(ctx, inst, dst); |
||
1188 | } |
||
1189 | |||
4401 | Serge | 1190 | static bool is_mad(opc_t opc) |
1191 | { |
||
1192 | switch (opc) { |
||
1193 | case OPC_MAD_U16: |
||
1194 | case OPC_MADSH_U16: |
||
1195 | case OPC_MAD_S16: |
||
1196 | case OPC_MADSH_M16: |
||
1197 | case OPC_MAD_U24: |
||
1198 | case OPC_MAD_S24: |
||
1199 | case OPC_MAD_F16: |
||
1200 | case OPC_MAD_F32: |
||
1201 | return true; |
||
1202 | default: |
||
1203 | return false; |
||
1204 | } |
||
1205 | } |
||
1206 | |||
4358 | Serge | 1207 | static void |
1208 | instr_cat3(const struct instr_translater *t, |
||
1209 | struct fd3_compile_context *ctx, |
||
1210 | struct tgsi_full_instruction *inst) |
||
1211 | { |
||
1212 | struct tgsi_dst_register *dst = get_dst(ctx, inst); |
||
4401 | Serge | 1213 | struct tgsi_src_register *src0 = &inst->Src[0].Register; |
4358 | Serge | 1214 | struct tgsi_src_register *src1 = &inst->Src[1].Register; |
1215 | struct ir3_instruction *instr; |
||
1216 | |||
4401 | Serge | 1217 | /* in particular, can't handle const for src1 for cat3.. |
1218 | * for mad, we can swap first two src's if needed: |
||
4358 | Serge | 1219 | */ |
4401 | Serge | 1220 | if (is_const(src1)) { |
1221 | if (is_mad(t->opc) && !is_const(src0)) { |
||
1222 | struct tgsi_src_register *tmp; |
||
1223 | tmp = src0; |
||
1224 | src0 = src1; |
||
1225 | src1 = tmp; |
||
1226 | } else { |
||
1227 | src0 = get_unconst(ctx, src0); |
||
1228 | } |
||
4358 | Serge | 1229 | } |
1230 | |||
1231 | instr = ir3_instr_create(ctx->ir, 3, |
||
1232 | ctx->so->half_precision ? t->hopc : t->opc); |
||
4401 | Serge | 1233 | vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, |
4358 | Serge | 1234 | &inst->Src[2].Register, 0); |
1235 | put_dst(ctx, inst, dst); |
||
1236 | } |
||
1237 | |||
1238 | static void |
||
1239 | instr_cat4(const struct instr_translater *t, |
||
1240 | struct fd3_compile_context *ctx, |
||
1241 | struct tgsi_full_instruction *inst) |
||
1242 | { |
||
1243 | struct tgsi_dst_register *dst = get_dst(ctx, inst); |
||
4401 | Serge | 1244 | struct tgsi_src_register *src = &inst->Src[0].Register; |
4358 | Serge | 1245 | struct ir3_instruction *instr; |
1246 | |||
4401 | Serge | 1247 | /* seems like blob compiler avoids const as src.. */ |
1248 | if (is_const(src)) |
||
1249 | src = get_unconst(ctx, src); |
||
1250 | |||
4358 | Serge | 1251 | ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; |
1252 | instr = ir3_instr_create(ctx->ir, 4, t->opc); |
||
1253 | |||
4401 | Serge | 1254 | vectorize(ctx, instr, dst, 1, src, 0); |
4358 | Serge | 1255 | |
4401 | Serge | 1256 | regmask_set(ctx->needs_ss, instr->regs[0], |
1257 | inst->Dst[0].Register.WriteMask); |
||
4358 | Serge | 1258 | |
1259 | put_dst(ctx, inst, dst); |
||
1260 | } |
||
1261 | |||
1262 | static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { |
||
1263 | #define INSTR(n, f, ...) \ |
||
1264 | [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ } |
||
1265 | |||
1266 | INSTR(MOV, instr_cat1), |
||
1267 | INSTR(RCP, instr_cat4, .opc = OPC_RCP), |
||
1268 | INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), |
||
1269 | INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), |
||
1270 | INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), |
||
1271 | INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), |
||
1272 | INSTR(DP2, trans_dotp, .arg = 2), |
||
1273 | INSTR(DP3, trans_dotp, .arg = 3), |
||
1274 | INSTR(DP4, trans_dotp, .arg = 4), |
||
1275 | INSTR(DPH, trans_dotp, .arg = 3), /* almost like DP3 */ |
||
1276 | INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), |
||
1277 | INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), |
||
1278 | INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), |
||
1279 | INSTR(LRP, trans_lrp), |
||
1280 | INSTR(FRC, trans_frac), |
||
1281 | INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), |
||
4401 | Serge | 1282 | INSTR(ARL, instr_cat2, .opc = OPC_FLOOR_F), |
4358 | Serge | 1283 | INSTR(EX2, instr_cat4, .opc = OPC_EXP2), |
1284 | INSTR(LG2, instr_cat4, .opc = OPC_LOG2), |
||
1285 | INSTR(POW, trans_pow), |
||
1286 | INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F), |
||
1287 | INSTR(COS, instr_cat4, .opc = OPC_SIN), |
||
1288 | INSTR(SIN, instr_cat4, .opc = OPC_COS), |
||
1289 | INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), |
||
1290 | INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), |
||
4401 | Serge | 1291 | INSTR(SGT, trans_cmp), |
1292 | INSTR(SLT, trans_cmp), |
||
1293 | INSTR(SGE, trans_cmp), |
||
1294 | INSTR(SLE, trans_cmp), |
||
1295 | INSTR(SNE, trans_cmp), |
||
1296 | INSTR(SEQ, trans_cmp), |
||
4358 | Serge | 1297 | INSTR(CMP, trans_cmp), |
1298 | INSTR(IF, trans_if), |
||
1299 | INSTR(ELSE, trans_else), |
||
1300 | INSTR(ENDIF, trans_endif), |
||
1301 | INSTR(END, instr_cat0, .opc = OPC_END), |
||
1302 | }; |
||
1303 | |||
1304 | static int |
||
1305 | decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) |
||
1306 | { |
||
1307 | struct fd3_shader_stateobj *so = ctx->so; |
||
1308 | unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; |
||
1309 | unsigned i, flags = 0; |
||
1310 | int nop = 0; |
||
1311 | |||
1312 | if (ctx->so->half_precision) |
||
1313 | flags |= IR3_REG_HALF; |
||
1314 | |||
1315 | for (i = decl->Range.First; i <= decl->Range.Last; i++) { |
||
1316 | unsigned n = so->inputs_count++; |
||
1317 | unsigned r = regid(i + base, 0); |
||
1318 | unsigned ncomp; |
||
1319 | |||
1320 | /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */ |
||
1321 | ncomp = 4; |
||
1322 | |||
1323 | DBG("decl in -> r%d", i + base); // XXX |
||
1324 | |||
1325 | so->inputs[n].compmask = (1 << ncomp) - 1; |
||
1326 | so->inputs[n].regid = r; |
||
1327 | so->inputs[n].inloc = ctx->next_inloc; |
||
1328 | ctx->next_inloc += ncomp; |
||
1329 | |||
1330 | so->total_in += ncomp; |
||
1331 | |||
1332 | /* for frag shaders, we need to generate the corresponding bary instr: */ |
||
1333 | if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { |
||
1334 | struct ir3_instruction *instr; |
||
1335 | |||
1336 | instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F); |
||
1337 | instr->repeat = ncomp - 1; |
||
1338 | |||
1339 | /* dst register: */ |
||
1340 | ctx->last_input = ir3_reg_create(instr, r, flags); |
||
1341 | |||
1342 | /* input position: */ |
||
1343 | ir3_reg_create(instr, 0, IR3_REG_IMMED | IR3_REG_R)->iim_val = |
||
1344 | so->inputs[n].inloc - 8; |
||
1345 | |||
1346 | /* input base (always r0.x): */ |
||
1347 | ir3_reg_create(instr, regid(0,0), 0); |
||
1348 | |||
1349 | nop = 6; |
||
1350 | } |
||
1351 | } |
||
1352 | |||
1353 | return nop; |
||
1354 | } |
||
1355 | |||
1356 | static void |
||
1357 | decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) |
||
1358 | { |
||
1359 | struct fd3_shader_stateobj *so = ctx->so; |
||
1360 | unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; |
||
1361 | unsigned name = decl->Semantic.Name; |
||
1362 | unsigned i; |
||
1363 | |||
4401 | Serge | 1364 | compile_assert(ctx, decl->Declaration.Semantic); // TODO is this ever not true? |
4358 | Serge | 1365 | |
1366 | DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX |
||
1367 | |||
1368 | if (ctx->type == TGSI_PROCESSOR_VERTEX) { |
||
1369 | switch (name) { |
||
1370 | case TGSI_SEMANTIC_POSITION: |
||
1371 | so->pos_regid = regid(decl->Range.First + base, 0); |
||
1372 | break; |
||
1373 | case TGSI_SEMANTIC_PSIZE: |
||
1374 | so->psize_regid = regid(decl->Range.First + base, 0); |
||
1375 | break; |
||
1376 | case TGSI_SEMANTIC_COLOR: |
||
1377 | case TGSI_SEMANTIC_GENERIC: |
||
1378 | case TGSI_SEMANTIC_FOG: |
||
1379 | case TGSI_SEMANTIC_TEXCOORD: |
||
1380 | for (i = decl->Range.First; i <= decl->Range.Last; i++) |
||
1381 | so->outputs[so->outputs_count++].regid = regid(i + base, 0); |
||
1382 | break; |
||
1383 | default: |
||
4401 | Serge | 1384 | compile_error(ctx, "unknown VS semantic name: %s\n", |
4358 | Serge | 1385 | tgsi_semantic_names[name]); |
1386 | } |
||
1387 | } else { |
||
1388 | switch (name) { |
||
1389 | case TGSI_SEMANTIC_COLOR: |
||
1390 | so->color_regid = regid(decl->Range.First + base, 0); |
||
1391 | break; |
||
1392 | default: |
||
4401 | Serge | 1393 | compile_error(ctx, "unknown VS semantic name: %s\n", |
4358 | Serge | 1394 | tgsi_semantic_names[name]); |
1395 | } |
||
1396 | } |
||
1397 | } |
||
1398 | |||
1399 | static void |
||
1400 | decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) |
||
1401 | { |
||
1402 | ctx->so->samplers_count++; |
||
1403 | } |
||
1404 | |||
1405 | static void |
||
1406 | compile_instructions(struct fd3_compile_context *ctx) |
||
1407 | { |
||
1408 | struct ir3_shader *ir = ctx->ir; |
||
1409 | int nop = 0; |
||
1410 | |||
1411 | while (!tgsi_parse_end_of_tokens(&ctx->parser)) { |
||
1412 | tgsi_parse_token(&ctx->parser); |
||
1413 | |||
1414 | switch (ctx->parser.FullToken.Token.Type) { |
||
1415 | case TGSI_TOKEN_TYPE_DECLARATION: { |
||
1416 | struct tgsi_full_declaration *decl = |
||
1417 | &ctx->parser.FullToken.FullDeclaration; |
||
1418 | if (decl->Declaration.File == TGSI_FILE_OUTPUT) { |
||
1419 | decl_out(ctx, decl); |
||
1420 | } else if (decl->Declaration.File == TGSI_FILE_INPUT) { |
||
1421 | nop = decl_in(ctx, decl); |
||
1422 | } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { |
||
1423 | decl_samp(ctx, decl); |
||
1424 | } |
||
1425 | break; |
||
1426 | } |
||
1427 | case TGSI_TOKEN_TYPE_IMMEDIATE: { |
||
1428 | /* TODO: if we know the immediate is small enough, and only |
||
1429 | * used with instructions that can embed an immediate, we |
||
1430 | * can skip this: |
||
1431 | */ |
||
1432 | struct tgsi_full_immediate *imm = |
||
1433 | &ctx->parser.FullToken.FullImmediate; |
||
1434 | unsigned n = ctx->so->immediates_count++; |
||
1435 | memcpy(ctx->so->immediates[n].val, imm->u, 16); |
||
1436 | break; |
||
1437 | } |
||
1438 | case TGSI_TOKEN_TYPE_INSTRUCTION: { |
||
1439 | struct tgsi_full_instruction *inst = |
||
1440 | &ctx->parser.FullToken.FullInstruction; |
||
1441 | unsigned opc = inst->Instruction.Opcode; |
||
1442 | const struct instr_translater *t = &translaters[opc]; |
||
1443 | |||
1444 | if (nop) { |
||
1445 | ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = nop - 1; |
||
1446 | nop = 0; |
||
1447 | } |
||
1448 | |||
1449 | if (t->fxn) { |
||
1450 | t->fxn(t, ctx, inst); |
||
1451 | ctx->num_internal_temps = 0; |
||
1452 | } else { |
||
4401 | Serge | 1453 | compile_error(ctx, "unknown TGSI opc: %s\n", |
4358 | Serge | 1454 | tgsi_get_opcode_name(opc)); |
1455 | } |
||
1456 | |||
4401 | Serge | 1457 | switch (inst->Instruction.Saturate) { |
1458 | case TGSI_SAT_ZERO_ONE: |
||
1459 | create_clamp_imm(ctx, &inst->Dst[0].Register, |
||
1460 | fui(0.0), fui(1.0)); |
||
1461 | break; |
||
1462 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
1463 | create_clamp_imm(ctx, &inst->Dst[0].Register, |
||
1464 | fui(-1.0), fui(1.0)); |
||
1465 | break; |
||
1466 | } |
||
1467 | |||
4358 | Serge | 1468 | break; |
1469 | } |
||
1470 | default: |
||
1471 | break; |
||
1472 | } |
||
1473 | } |
||
1474 | |||
1475 | if (ir->instrs_count > 0) |
||
1476 | ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; |
||
1477 | |||
1478 | if (ctx->last_input) |
||
1479 | ctx->last_input->flags |= IR3_REG_EI; |
||
1480 | } |
||
1481 | |||
1482 | int |
||
1483 | fd3_compile_shader(struct fd3_shader_stateobj *so, |
||
1484 | const struct tgsi_token *tokens) |
||
1485 | { |
||
1486 | struct fd3_compile_context ctx; |
||
1487 | |||
1488 | assert(!so->ir); |
||
1489 | |||
1490 | so->ir = ir3_shader_create(); |
||
1491 | |||
4401 | Serge | 1492 | assert(so->ir); |
1493 | |||
4358 | Serge | 1494 | so->color_regid = regid(63,0); |
1495 | so->pos_regid = regid(63,0); |
||
1496 | so->psize_regid = regid(63,0); |
||
1497 | |||
1498 | if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) |
||
1499 | return -1; |
||
1500 | |||
1501 | compile_instructions(&ctx); |
||
1502 | |||
1503 | compile_free(&ctx); |
||
1504 | |||
1505 | return 0; |
||
1506 | }=>><>=>>>>=>>>>>>>><>>>>><>>>>><>><>><>>> |