Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
2 | |||
3 | /* |
||
4 | * Copyright (C) 2012 Rob Clark |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice (including the next |
||
14 | * paragraph) shall be included in all copies or substantial portions of the |
||
15 | * Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
23 | * SOFTWARE. |
||
24 | * |
||
25 | * Authors: |
||
26 | * Rob Clark |
||
27 | */ |
||
28 | |||
29 | #include "pipe/p_state.h" |
||
30 | #include "util/u_string.h" |
||
31 | #include "util/u_memory.h" |
||
32 | #include "util/u_inlines.h" |
||
33 | #include "tgsi/tgsi_parse.h" |
||
34 | #include "tgsi/tgsi_ureg.h" |
||
35 | #include "tgsi/tgsi_info.h" |
||
36 | #include "tgsi/tgsi_strings.h" |
||
37 | #include "tgsi/tgsi_dump.h" |
||
38 | |||
39 | #include "fd2_compiler.h" |
||
40 | #include "fd2_program.h" |
||
41 | #include "fd2_util.h" |
||
42 | |||
43 | #include "instr-a2xx.h" |
||
44 | #include "ir-a2xx.h" |
||
45 | |||
46 | struct fd2_compile_context { |
||
47 | struct fd_program_stateobj *prog; |
||
48 | struct fd2_shader_stateobj *so; |
||
49 | |||
50 | struct tgsi_parse_context parser; |
||
51 | unsigned type; |
||
52 | |||
53 | /* predicate stack: */ |
||
54 | int pred_depth; |
||
55 | enum ir2_pred pred_stack[8]; |
||
56 | |||
57 | /* Internal-Temporary and Predicate register assignment: |
||
58 | * |
||
59 | * Some TGSI instructions which translate into multiple actual |
||
60 | * instructions need one or more temporary registers, which are not |
||
61 | * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). |
||
62 | * And some instructions (texture fetch) cannot write directly to |
||
63 | * output registers. We could be more clever and re-use dst or a |
||
64 | * src register in some cases. But for now don't try to be clever. |
||
65 | * Eventually we should implement an optimization pass that re- |
||
66 | * juggles the register usage and gets rid of unneeded temporaries. |
||
67 | * |
||
68 | * The predicate register must be valid across multiple TGSI |
||
69 | * instructions, but internal temporary's do not. For this reason, |
||
70 | * once the predicate register is requested, until it is no longer |
||
71 | * needed, it gets the first register slot after after the TGSI |
||
72 | * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the |
||
73 | * internal temporaries get the register slots above this. |
||
74 | */ |
||
75 | |||
76 | int pred_reg; |
||
77 | int num_internal_temps; |
||
78 | |||
79 | uint8_t num_regs[TGSI_FILE_COUNT]; |
||
80 | |||
81 | /* maps input register idx to prog->export_linkage idx: */ |
||
82 | uint8_t input_export_idx[64]; |
||
83 | |||
84 | /* maps output register idx to prog->export_linkage idx: */ |
||
85 | uint8_t output_export_idx[64]; |
||
86 | |||
87 | /* idx/slot for last compiler generated immediate */ |
||
88 | unsigned immediate_idx; |
||
89 | |||
90 | // TODO we can skip emit exports in the VS that the FS doesn't need.. |
||
91 | // and get rid perhaps of num_param.. |
||
92 | unsigned num_position, num_param; |
||
93 | unsigned position, psize; |
||
94 | |||
95 | uint64_t need_sync; |
||
96 | |||
97 | /* current exec CF instruction */ |
||
98 | struct ir2_cf *cf; |
||
99 | }; |
||
100 | |||
101 | static int |
||
102 | semantic_idx(struct tgsi_declaration_semantic *semantic) |
||
103 | { |
||
104 | int idx = semantic->Name; |
||
105 | if (idx == TGSI_SEMANTIC_GENERIC) |
||
106 | idx = TGSI_SEMANTIC_COUNT + semantic->Index; |
||
107 | return idx; |
||
108 | } |
||
109 | |||
110 | /* assign/get the input/export register # for given semantic idx as |
||
111 | * returned by semantic_idx(): |
||
112 | */ |
||
113 | static int |
||
114 | export_linkage(struct fd2_compile_context *ctx, int idx) |
||
115 | { |
||
116 | struct fd_program_stateobj *prog = ctx->prog; |
||
117 | |||
118 | /* if first time we've seen this export, assign the next available slot: */ |
||
119 | if (prog->export_linkage[idx] == 0xff) |
||
120 | prog->export_linkage[idx] = prog->num_exports++; |
||
121 | |||
122 | return prog->export_linkage[idx]; |
||
123 | } |
||
124 | |||
125 | static unsigned |
||
126 | compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog, |
||
127 | struct fd2_shader_stateobj *so) |
||
128 | { |
||
129 | unsigned ret; |
||
130 | |||
131 | ctx->prog = prog; |
||
132 | ctx->so = so; |
||
133 | ctx->cf = NULL; |
||
134 | ctx->pred_depth = 0; |
||
135 | |||
136 | ret = tgsi_parse_init(&ctx->parser, so->tokens); |
||
137 | if (ret != TGSI_PARSE_OK) |
||
138 | return ret; |
||
139 | |||
140 | ctx->type = ctx->parser.FullHeader.Processor.Processor; |
||
141 | ctx->position = ~0; |
||
142 | ctx->psize = ~0; |
||
143 | ctx->num_position = 0; |
||
144 | ctx->num_param = 0; |
||
145 | ctx->need_sync = 0; |
||
146 | ctx->immediate_idx = 0; |
||
147 | ctx->pred_reg = -1; |
||
148 | ctx->num_internal_temps = 0; |
||
149 | |||
150 | memset(ctx->num_regs, 0, sizeof(ctx->num_regs)); |
||
151 | memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx)); |
||
152 | memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx)); |
||
153 | |||
154 | /* do first pass to extract declarations: */ |
||
155 | while (!tgsi_parse_end_of_tokens(&ctx->parser)) { |
||
156 | tgsi_parse_token(&ctx->parser); |
||
157 | |||
158 | switch (ctx->parser.FullToken.Token.Type) { |
||
159 | case TGSI_TOKEN_TYPE_DECLARATION: { |
||
160 | struct tgsi_full_declaration *decl = |
||
161 | &ctx->parser.FullToken.FullDeclaration; |
||
162 | if (decl->Declaration.File == TGSI_FILE_OUTPUT) { |
||
163 | unsigned name = decl->Semantic.Name; |
||
164 | |||
165 | assert(decl->Declaration.Semantic); // TODO is this ever not true? |
||
166 | |||
167 | ctx->output_export_idx[decl->Range.First] = |
||
168 | semantic_idx(&decl->Semantic); |
||
169 | |||
170 | if (ctx->type == TGSI_PROCESSOR_VERTEX) { |
||
171 | switch (name) { |
||
172 | case TGSI_SEMANTIC_POSITION: |
||
173 | ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT]; |
||
174 | ctx->num_position++; |
||
175 | break; |
||
176 | case TGSI_SEMANTIC_PSIZE: |
||
177 | ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT]; |
||
178 | ctx->num_position++; |
||
179 | break; |
||
180 | case TGSI_SEMANTIC_COLOR: |
||
181 | case TGSI_SEMANTIC_GENERIC: |
||
182 | ctx->num_param++; |
||
183 | break; |
||
184 | default: |
||
185 | DBG("unknown VS semantic name: %s", |
||
186 | tgsi_semantic_names[name]); |
||
187 | assert(0); |
||
188 | } |
||
189 | } else { |
||
190 | switch (name) { |
||
191 | case TGSI_SEMANTIC_COLOR: |
||
192 | case TGSI_SEMANTIC_GENERIC: |
||
193 | ctx->num_param++; |
||
194 | break; |
||
195 | default: |
||
196 | DBG("unknown PS semantic name: %s", |
||
197 | tgsi_semantic_names[name]); |
||
198 | assert(0); |
||
199 | } |
||
200 | } |
||
201 | } else if (decl->Declaration.File == TGSI_FILE_INPUT) { |
||
202 | ctx->input_export_idx[decl->Range.First] = |
||
203 | semantic_idx(&decl->Semantic); |
||
204 | } |
||
205 | ctx->num_regs[decl->Declaration.File] = |
||
206 | MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1); |
||
207 | break; |
||
208 | } |
||
209 | case TGSI_TOKEN_TYPE_IMMEDIATE: { |
||
210 | struct tgsi_full_immediate *imm = |
||
211 | &ctx->parser.FullToken.FullImmediate; |
||
212 | unsigned n = ctx->so->num_immediates++; |
||
213 | memcpy(ctx->so->immediates[n].val, imm->u, 16); |
||
214 | break; |
||
215 | } |
||
216 | default: |
||
217 | break; |
||
218 | } |
||
219 | } |
||
220 | |||
221 | /* TGSI generated immediates are always entire vec4's, ones we |
||
222 | * generate internally are not: |
||
223 | */ |
||
224 | ctx->immediate_idx = ctx->so->num_immediates * 4; |
||
225 | |||
226 | ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT]; |
||
227 | |||
228 | tgsi_parse_free(&ctx->parser); |
||
229 | |||
230 | return tgsi_parse_init(&ctx->parser, so->tokens); |
||
231 | } |
||
232 | |||
233 | static void |
||
234 | compile_free(struct fd2_compile_context *ctx) |
||
235 | { |
||
236 | tgsi_parse_free(&ctx->parser); |
||
237 | } |
||
238 | |||
239 | static struct ir2_cf * |
||
240 | next_exec_cf(struct fd2_compile_context *ctx) |
||
241 | { |
||
242 | struct ir2_cf *cf = ctx->cf; |
||
243 | if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs)) |
||
244 | ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC); |
||
245 | return cf; |
||
246 | } |
||
247 | |||
248 | static void |
||
249 | compile_vtx_fetch(struct fd2_compile_context *ctx) |
||
250 | { |
||
251 | struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs; |
||
252 | int i; |
||
253 | for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { |
||
254 | struct ir2_instruction *instr = ir2_instr_create( |
||
255 | next_exec_cf(ctx), IR2_FETCH); |
||
256 | instr->fetch.opc = VTX_FETCH; |
||
257 | |||
258 | ctx->need_sync |= 1 << (i+1); |
||
259 | |||
260 | ir2_reg_create(instr, i+1, "xyzw", 0); |
||
261 | ir2_reg_create(instr, 0, "x", 0); |
||
262 | |||
263 | if (i == 0) |
||
264 | instr->sync = true; |
||
265 | |||
266 | vfetch_instrs[i] = instr; |
||
267 | } |
||
268 | ctx->so->num_vfetch_instrs = i; |
||
269 | ctx->cf = NULL; |
||
270 | } |
||
271 | |||
272 | /* |
||
273 | * For vertex shaders (VS): |
||
274 | * --- ------ ------------- |
||
275 | * |
||
276 | * Inputs: R1-R(num_input) |
||
277 | * Constants: C0-C(num_const-1) |
||
278 | * Immediates: C(num_const)-C(num_const+num_imm-1) |
||
279 | * Outputs: export0-export(n) and export62, export63 |
||
280 | * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63) |
||
281 | * Temps: R(num_input+1)-R(num_input+num_temps) |
||
282 | * |
||
283 | * R0 could be clobbered after the vertex fetch instructions.. so we |
||
284 | * could use it for one of the temporaries. |
||
285 | * |
||
286 | * TODO: maybe the vertex fetch part could fetch first input into R0 as |
||
287 | * the last vtx fetch instruction, which would let us use the same |
||
288 | * register layout in either case.. although this is not what the blob |
||
289 | * compiler does. |
||
290 | * |
||
291 | * |
||
292 | * For frag shaders (PS): |
||
293 | * --- ---- ------------- |
||
294 | * |
||
295 | * Inputs: R0-R(num_input-1) |
||
296 | * Constants: same as VS |
||
297 | * Immediates: same as VS |
||
298 | * Outputs: export0-export(num_outputs) |
||
299 | * Temps: R(num_input)-R(num_input+num_temps-1) |
||
300 | * |
||
301 | * In either case, immediates are are postpended to the constants |
||
302 | * (uniforms). |
||
303 | * |
||
304 | */ |
||
305 | |||
306 | static unsigned |
||
307 | get_temp_gpr(struct fd2_compile_context *ctx, int idx) |
||
308 | { |
||
309 | unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT]; |
||
310 | if (ctx->type == TGSI_PROCESSOR_VERTEX) |
||
311 | num++; |
||
312 | return num; |
||
313 | } |
||
314 | |||
315 | static struct ir2_register * |
||
316 | add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, |
||
317 | const struct tgsi_dst_register *dst) |
||
318 | { |
||
319 | unsigned flags = 0, num = 0; |
||
320 | char swiz[5]; |
||
321 | |||
322 | switch (dst->File) { |
||
323 | case TGSI_FILE_OUTPUT: |
||
324 | flags |= IR2_REG_EXPORT; |
||
325 | if (ctx->type == TGSI_PROCESSOR_VERTEX) { |
||
326 | if (dst->Index == ctx->position) { |
||
327 | num = 62; |
||
328 | } else if (dst->Index == ctx->psize) { |
||
329 | num = 63; |
||
330 | } else { |
||
331 | num = export_linkage(ctx, |
||
332 | ctx->output_export_idx[dst->Index]); |
||
333 | } |
||
334 | } else { |
||
335 | num = dst->Index; |
||
336 | } |
||
337 | break; |
||
338 | case TGSI_FILE_TEMPORARY: |
||
339 | num = get_temp_gpr(ctx, dst->Index); |
||
340 | break; |
||
341 | default: |
||
342 | DBG("unsupported dst register file: %s", |
||
343 | tgsi_file_name(dst->File)); |
||
344 | assert(0); |
||
345 | break; |
||
346 | } |
||
347 | |||
348 | swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_'; |
||
349 | swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_'; |
||
350 | swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_'; |
||
351 | swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; |
||
352 | swiz[4] = '\0'; |
||
353 | |||
354 | return ir2_reg_create(alu, num, swiz, flags); |
||
355 | } |
||
356 | |||
357 | static struct ir2_register * |
||
358 | add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, |
||
359 | const struct tgsi_src_register *src) |
||
360 | { |
||
361 | static const char swiz_vals[] = { |
||
362 | 'x', 'y', 'z', 'w', |
||
363 | }; |
||
364 | char swiz[5]; |
||
365 | unsigned flags = 0, num = 0; |
||
366 | |||
367 | switch (src->File) { |
||
368 | case TGSI_FILE_CONSTANT: |
||
369 | num = src->Index; |
||
370 | flags |= IR2_REG_CONST; |
||
371 | break; |
||
372 | case TGSI_FILE_INPUT: |
||
373 | if (ctx->type == TGSI_PROCESSOR_VERTEX) { |
||
374 | num = src->Index + 1; |
||
375 | } else { |
||
376 | num = export_linkage(ctx, |
||
377 | ctx->input_export_idx[src->Index]); |
||
378 | } |
||
379 | break; |
||
380 | case TGSI_FILE_TEMPORARY: |
||
381 | num = get_temp_gpr(ctx, src->Index); |
||
382 | break; |
||
383 | case TGSI_FILE_IMMEDIATE: |
||
384 | num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT]; |
||
385 | flags |= IR2_REG_CONST; |
||
386 | break; |
||
387 | default: |
||
388 | DBG("unsupported src register file: %s", |
||
389 | tgsi_file_name(src->File)); |
||
390 | assert(0); |
||
391 | break; |
||
392 | } |
||
393 | |||
394 | if (src->Absolute) |
||
395 | flags |= IR2_REG_ABS; |
||
396 | if (src->Negate) |
||
397 | flags |= IR2_REG_NEGATE; |
||
398 | |||
399 | swiz[0] = swiz_vals[src->SwizzleX]; |
||
400 | swiz[1] = swiz_vals[src->SwizzleY]; |
||
401 | swiz[2] = swiz_vals[src->SwizzleZ]; |
||
402 | swiz[3] = swiz_vals[src->SwizzleW]; |
||
403 | swiz[4] = '\0'; |
||
404 | |||
405 | if ((ctx->need_sync & (uint64_t)(1 << num)) && |
||
406 | !(flags & IR2_REG_CONST)) { |
||
407 | alu->sync = true; |
||
408 | ctx->need_sync &= ~(uint64_t)(1 << num); |
||
409 | } |
||
410 | |||
411 | return ir2_reg_create(alu, num, swiz, flags); |
||
412 | } |
||
413 | |||
414 | static void |
||
415 | add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
||
416 | { |
||
417 | switch (inst->Instruction.Saturate) { |
||
418 | case TGSI_SAT_NONE: |
||
419 | break; |
||
420 | case TGSI_SAT_ZERO_ONE: |
||
421 | alu->alu.vector_clamp = true; |
||
422 | break; |
||
423 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
424 | DBG("unsupported saturate"); |
||
425 | assert(0); |
||
426 | break; |
||
427 | } |
||
428 | } |
||
429 | |||
430 | static void |
||
431 | add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
||
432 | { |
||
433 | switch (inst->Instruction.Saturate) { |
||
434 | case TGSI_SAT_NONE: |
||
435 | break; |
||
436 | case TGSI_SAT_ZERO_ONE: |
||
437 | alu->alu.scalar_clamp = true; |
||
438 | break; |
||
439 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
440 | DBG("unsupported saturate"); |
||
441 | assert(0); |
||
442 | break; |
||
443 | } |
||
444 | } |
||
445 | |||
446 | static void |
||
447 | add_regs_vector_1(struct fd2_compile_context *ctx, |
||
448 | struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
||
449 | { |
||
450 | assert(inst->Instruction.NumSrcRegs == 1); |
||
451 | assert(inst->Instruction.NumDstRegs == 1); |
||
452 | |||
453 | add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
||
454 | add_src_reg(ctx, alu, &inst->Src[0].Register); |
||
455 | add_src_reg(ctx, alu, &inst->Src[0].Register); |
||
456 | add_vector_clamp(inst, alu); |
||
457 | } |
||
458 | |||
459 | static void |
||
460 | add_regs_vector_2(struct fd2_compile_context *ctx, |
||
461 | struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
||
462 | { |
||
463 | assert(inst->Instruction.NumSrcRegs == 2); |
||
464 | assert(inst->Instruction.NumDstRegs == 1); |
||
465 | |||
466 | add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
||
467 | add_src_reg(ctx, alu, &inst->Src[0].Register); |
||
468 | add_src_reg(ctx, alu, &inst->Src[1].Register); |
||
469 | add_vector_clamp(inst, alu); |
||
470 | } |
||
471 | |||
472 | static void |
||
473 | add_regs_vector_3(struct fd2_compile_context *ctx, |
||
474 | struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
||
475 | { |
||
476 | assert(inst->Instruction.NumSrcRegs == 3); |
||
477 | assert(inst->Instruction.NumDstRegs == 1); |
||
478 | |||
479 | add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
||
480 | /* maybe should re-arrange the syntax some day, but |
||
481 | * in assembler/disassembler and what ir.c expects |
||
482 | * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 |
||
483 | */ |
||
484 | add_src_reg(ctx, alu, &inst->Src[2].Register); |
||
485 | add_src_reg(ctx, alu, &inst->Src[0].Register); |
||
486 | add_src_reg(ctx, alu, &inst->Src[1].Register); |
||
487 | add_vector_clamp(inst, alu); |
||
488 | } |
||
489 | |||
490 | static void |
||
491 | add_regs_dummy_vector(struct ir2_instruction *alu) |
||
492 | { |
||
493 | /* create dummy, non-written vector dst/src regs |
||
494 | * for unused vector instr slot: |
||
495 | */ |
||
496 | ir2_reg_create(alu, 0, "____", 0); /* vector dst */ |
||
497 | ir2_reg_create(alu, 0, NULL, 0); /* vector src1 */ |
||
498 | ir2_reg_create(alu, 0, NULL, 0); /* vector src2 */ |
||
499 | } |
||
500 | |||
501 | static void |
||
502 | add_regs_scalar_1(struct fd2_compile_context *ctx, |
||
503 | struct tgsi_full_instruction *inst, struct ir2_instruction *alu) |
||
504 | { |
||
505 | assert(inst->Instruction.NumSrcRegs == 1); |
||
506 | assert(inst->Instruction.NumDstRegs == 1); |
||
507 | |||
508 | add_regs_dummy_vector(alu); |
||
509 | |||
510 | add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
||
511 | add_src_reg(ctx, alu, &inst->Src[0].Register); |
||
512 | add_scalar_clamp(inst, alu); |
||
513 | } |
||
514 | |||
515 | /* |
||
516 | * Helpers for TGSI instructions that don't map to a single shader instr: |
||
517 | */ |
||
518 | |||
519 | static void |
||
520 | src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) |
||
521 | { |
||
522 | src->File = dst->File; |
||
523 | src->Indirect = dst->Indirect; |
||
524 | src->Dimension = dst->Dimension; |
||
525 | src->Index = dst->Index; |
||
526 | src->Absolute = 0; |
||
527 | src->Negate = 0; |
||
528 | src->SwizzleX = TGSI_SWIZZLE_X; |
||
529 | src->SwizzleY = TGSI_SWIZZLE_Y; |
||
530 | src->SwizzleZ = TGSI_SWIZZLE_Z; |
||
531 | src->SwizzleW = TGSI_SWIZZLE_W; |
||
532 | } |
||
533 | |||
534 | /* Get internal-temp src/dst to use for a sequence of instructions |
||
535 | * generated by a single TGSI op. |
||
536 | */ |
||
537 | static void |
||
538 | get_internal_temp(struct fd2_compile_context *ctx, |
||
539 | struct tgsi_dst_register *tmp_dst, |
||
540 | struct tgsi_src_register *tmp_src) |
||
541 | { |
||
542 | int n; |
||
543 | |||
544 | tmp_dst->File = TGSI_FILE_TEMPORARY; |
||
545 | tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; |
||
546 | tmp_dst->Indirect = 0; |
||
547 | tmp_dst->Dimension = 0; |
||
548 | |||
549 | /* assign next temporary: */ |
||
550 | n = ctx->num_internal_temps++; |
||
551 | if (ctx->pred_reg != -1) |
||
552 | n++; |
||
553 | |||
554 | tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n; |
||
555 | |||
556 | src_from_dst(tmp_src, tmp_dst); |
||
557 | } |
||
558 | |||
559 | static void |
||
560 | get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst, |
||
561 | struct tgsi_src_register *src) |
||
562 | { |
||
563 | assert(ctx->pred_reg != -1); |
||
564 | |||
565 | dst->File = TGSI_FILE_TEMPORARY; |
||
566 | dst->WriteMask = TGSI_WRITEMASK_W; |
||
567 | dst->Indirect = 0; |
||
568 | dst->Dimension = 0; |
||
569 | dst->Index = get_temp_gpr(ctx, ctx->pred_reg); |
||
570 | |||
571 | if (src) { |
||
572 | src_from_dst(src, dst); |
||
573 | src->SwizzleX = TGSI_SWIZZLE_W; |
||
574 | src->SwizzleY = TGSI_SWIZZLE_W; |
||
575 | src->SwizzleZ = TGSI_SWIZZLE_W; |
||
576 | src->SwizzleW = TGSI_SWIZZLE_W; |
||
577 | } |
||
578 | } |
||
579 | |||
580 | static void |
||
581 | push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) |
||
582 | { |
||
583 | struct ir2_instruction *alu; |
||
584 | struct tgsi_dst_register pred_dst; |
||
585 | |||
586 | /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by |
||
587 | * themselves: |
||
588 | */ |
||
589 | ctx->cf = NULL; |
||
590 | |||
591 | if (ctx->pred_depth == 0) { |
||
592 | /* assign predicate register: */ |
||
593 | ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY]; |
||
594 | |||
595 | get_predicate(ctx, &pred_dst, NULL); |
||
596 | |||
597 | alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs); |
||
598 | add_regs_dummy_vector(alu); |
||
599 | add_dst_reg(ctx, alu, &pred_dst); |
||
600 | add_src_reg(ctx, alu, src); |
||
601 | } else { |
||
602 | struct tgsi_src_register pred_src; |
||
603 | |||
604 | get_predicate(ctx, &pred_dst, &pred_src); |
||
605 | |||
606 | alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
||
607 | add_dst_reg(ctx, alu, &pred_dst); |
||
608 | add_src_reg(ctx, alu, &pred_src); |
||
609 | add_src_reg(ctx, alu, src); |
||
610 | |||
611 | // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make |
||
612 | // sure src reg is valid if it was calculated with a predicate |
||
613 | // condition.. |
||
614 | alu->pred = IR2_PRED_NONE; |
||
615 | } |
||
616 | |||
617 | /* save previous pred state to restore in pop_predicate(): */ |
||
618 | ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred; |
||
619 | |||
620 | ctx->cf = NULL; |
||
621 | } |
||
622 | |||
623 | static void |
||
624 | pop_predicate(struct fd2_compile_context *ctx) |
||
625 | { |
||
626 | /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by |
||
627 | * themselves: |
||
628 | */ |
||
629 | ctx->cf = NULL; |
||
630 | |||
631 | /* restore previous predicate state: */ |
||
632 | ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; |
||
633 | |||
634 | if (ctx->pred_depth != 0) { |
||
635 | struct ir2_instruction *alu; |
||
636 | struct tgsi_dst_register pred_dst; |
||
637 | struct tgsi_src_register pred_src; |
||
638 | |||
639 | get_predicate(ctx, &pred_dst, &pred_src); |
||
640 | |||
641 | alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs); |
||
642 | add_regs_dummy_vector(alu); |
||
643 | add_dst_reg(ctx, alu, &pred_dst); |
||
644 | add_src_reg(ctx, alu, &pred_src); |
||
645 | alu->pred = IR2_PRED_NONE; |
||
646 | } else { |
||
647 | /* predicate register no longer needed: */ |
||
648 | ctx->pred_reg = -1; |
||
649 | } |
||
650 | |||
651 | ctx->cf = NULL; |
||
652 | } |
||
653 | |||
654 | static void |
||
655 | get_immediate(struct fd2_compile_context *ctx, |
||
656 | struct tgsi_src_register *reg, uint32_t val) |
||
657 | { |
||
658 | unsigned neg, swiz, idx, i; |
||
659 | /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ |
||
660 | static const unsigned swiz2tgsi[] = { |
||
661 | TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, |
||
662 | }; |
||
663 | |||
664 | for (i = 0; i < ctx->immediate_idx; i++) { |
||
665 | swiz = i % 4; |
||
666 | idx = i / 4; |
||
667 | |||
668 | if (ctx->so->immediates[idx].val[swiz] == val) { |
||
669 | neg = 0; |
||
670 | break; |
||
671 | } |
||
672 | |||
673 | if (ctx->so->immediates[idx].val[swiz] == -val) { |
||
674 | neg = 1; |
||
675 | break; |
||
676 | } |
||
677 | } |
||
678 | |||
679 | if (i == ctx->immediate_idx) { |
||
680 | /* need to generate a new immediate: */ |
||
681 | swiz = i % 4; |
||
682 | idx = i / 4; |
||
683 | neg = 0; |
||
684 | ctx->so->immediates[idx].val[swiz] = val; |
||
685 | ctx->so->num_immediates = idx + 1; |
||
686 | ctx->immediate_idx++; |
||
687 | } |
||
688 | |||
689 | reg->File = TGSI_FILE_IMMEDIATE; |
||
690 | reg->Indirect = 0; |
||
691 | reg->Dimension = 0; |
||
692 | reg->Index = idx; |
||
693 | reg->Absolute = 0; |
||
694 | reg->Negate = neg; |
||
695 | reg->SwizzleX = swiz2tgsi[swiz]; |
||
696 | reg->SwizzleY = swiz2tgsi[swiz]; |
||
697 | reg->SwizzleZ = swiz2tgsi[swiz]; |
||
698 | reg->SwizzleW = swiz2tgsi[swiz]; |
||
699 | } |
||
700 | |||
701 | /* POW(a,b) = EXP2(b * LOG2(a)) */ |
||
702 | static void |
||
703 | translate_pow(struct fd2_compile_context *ctx, |
||
704 | struct tgsi_full_instruction *inst) |
||
705 | { |
||
706 | struct tgsi_dst_register tmp_dst; |
||
707 | struct tgsi_src_register tmp_src; |
||
708 | struct ir2_instruction *alu; |
||
709 | |||
710 | get_internal_temp(ctx, &tmp_dst, &tmp_src); |
||
711 | |||
712 | alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP); |
||
713 | add_regs_dummy_vector(alu); |
||
714 | add_dst_reg(ctx, alu, &tmp_dst); |
||
715 | add_src_reg(ctx, alu, &inst->Src[0].Register); |
||
716 | |||
717 | alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
||
718 | add_dst_reg(ctx, alu, &tmp_dst); |
||
719 | add_src_reg(ctx, alu, &tmp_src); |
||
720 | add_src_reg(ctx, alu, &inst->Src[1].Register); |
||
721 | |||
722 | /* NOTE: some of the instructions, like EXP_IEEE, seem hard- |
||
723 | * coded to take their input from the w component. |
||
724 | */ |
||
725 | switch(inst->Dst[0].Register.WriteMask) { |
||
726 | case TGSI_WRITEMASK_X: |
||
727 | tmp_src.SwizzleW = TGSI_SWIZZLE_X; |
||
728 | break; |
||
729 | case TGSI_WRITEMASK_Y: |
||
730 | tmp_src.SwizzleW = TGSI_SWIZZLE_Y; |
||
731 | break; |
||
732 | case TGSI_WRITEMASK_Z: |
||
733 | tmp_src.SwizzleW = TGSI_SWIZZLE_Z; |
||
734 | break; |
||
735 | case TGSI_WRITEMASK_W: |
||
736 | tmp_src.SwizzleW = TGSI_SWIZZLE_W; |
||
737 | break; |
||
738 | default: |
||
739 | DBG("invalid writemask!"); |
||
740 | assert(0); |
||
741 | break; |
||
742 | } |
||
743 | |||
744 | alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE); |
||
745 | add_regs_dummy_vector(alu); |
||
746 | add_dst_reg(ctx, alu, &inst->Dst[0].Register); |
||
747 | add_src_reg(ctx, alu, &tmp_src); |
||
748 | add_scalar_clamp(inst, alu); |
||
749 | } |
||
750 | |||
751 | static void |
||
752 | translate_tex(struct fd2_compile_context *ctx, |
||
753 | struct tgsi_full_instruction *inst, unsigned opc) |
||
754 | { |
||
755 | struct ir2_instruction *instr; |
||
756 | struct ir2_register *reg; |
||
757 | struct tgsi_dst_register tmp_dst; |
||
758 | struct tgsi_src_register tmp_src; |
||
759 | const struct tgsi_src_register *coord; |
||
760 | bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || |
||
761 | (inst->Instruction.Saturate != TGSI_SAT_NONE); |
||
762 | int idx; |
||
763 | |||
764 | if (using_temp || (opc == TGSI_OPCODE_TXP)) |
||
765 | get_internal_temp(ctx, &tmp_dst, &tmp_src); |
||
766 | |||
767 | if (opc == TGSI_OPCODE_TXP) { |
||
768 | static const char *swiz[] = { |
||
769 | [TGSI_SWIZZLE_X] = "xxxx", |
||
770 | [TGSI_SWIZZLE_Y] = "yyyy", |
||
771 | [TGSI_SWIZZLE_Z] = "zzzz", |
||
772 | [TGSI_SWIZZLE_W] = "wwww", |
||
773 | }; |
||
774 | |||
775 | /* TXP - Projective Texture Lookup: |
||
776 | * |
||
777 | * coord.x = src0.x / src.w |
||
778 | * coord.y = src0.y / src.w |
||
779 | * coord.z = src0.z / src.w |
||
780 | * coord.w = src0.w |
||
781 | * bias = 0.0 |
||
782 | * |
||
783 | * dst = texture_sample(unit, coord, bias) |
||
784 | */ |
||
785 | instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE); |
||
786 | |||
787 | /* MAXv: */ |
||
788 | add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; |
||
789 | add_src_reg(ctx, instr, &inst->Src[0].Register); |
||
790 | add_src_reg(ctx, instr, &inst->Src[0].Register); |
||
791 | |||
792 | /* RECIP_IEEE: */ |
||
793 | add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; |
||
794 | add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = |
||
795 | swiz[inst->Src[0].Register.SwizzleW]; |
||
796 | |||
797 | instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
||
798 | add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; |
||
799 | add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; |
||
800 | add_src_reg(ctx, instr, &inst->Src[0].Register); |
||
801 | |||
802 | coord = &tmp_src; |
||
803 | } else { |
||
804 | coord = &inst->Src[0].Register; |
||
805 | } |
||
806 | |||
807 | instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH); |
||
808 | instr->fetch.opc = TEX_FETCH; |
||
809 | instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D); |
||
810 | assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases? |
||
811 | |||
812 | /* save off the tex fetch to be patched later with correct const_idx: */ |
||
813 | idx = ctx->so->num_tfetch_instrs++; |
||
814 | ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; |
||
815 | ctx->so->tfetch_instrs[idx].instr = instr; |
||
816 | |||
817 | add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register); |
||
818 | reg = add_src_reg(ctx, instr, coord); |
||
819 | |||
820 | /* blob compiler always sets 3rd component to same as 1st for 2d: */ |
||
821 | if (inst->Texture.Texture == TGSI_TEXTURE_2D) |
||
822 | reg->swizzle[2] = reg->swizzle[0]; |
||
823 | |||
824 | /* dst register needs to be marked for sync: */ |
||
825 | ctx->need_sync |= 1 << instr->regs[0]->num; |
||
826 | |||
827 | /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */ |
||
828 | instr->sync = true; |
||
829 | |||
830 | if (using_temp) { |
||
831 | /* texture fetch can't write directly to export, so if tgsi |
||
832 | * is telling us the dst register is in output file, we load |
||
833 | * the texture to a temp and the use ALU instruction to move |
||
834 | * to output |
||
835 | */ |
||
836 | instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0); |
||
837 | |||
838 | add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
||
839 | add_src_reg(ctx, instr, &tmp_src); |
||
840 | add_src_reg(ctx, instr, &tmp_src); |
||
841 | add_vector_clamp(inst, instr); |
||
842 | } |
||
843 | } |
||
844 | |||
845 | /* SGE(a,b) = GTE((b - a), 1.0, 0.0) */ |
||
846 | /* SLT(a,b) = GTE((b - a), 0.0, 1.0) */ |
||
847 | static void |
||
848 | translate_sge_slt(struct fd2_compile_context *ctx, |
||
849 | struct tgsi_full_instruction *inst, unsigned opc) |
||
850 | { |
||
851 | struct ir2_instruction *instr; |
||
852 | struct tgsi_dst_register tmp_dst; |
||
853 | struct tgsi_src_register tmp_src; |
||
854 | struct tgsi_src_register tmp_const; |
||
855 | float c0, c1; |
||
856 | |||
857 | switch (opc) { |
||
858 | default: |
||
859 | assert(0); |
||
860 | case TGSI_OPCODE_SGE: |
||
861 | c0 = 1.0; |
||
862 | c1 = 0.0; |
||
863 | break; |
||
864 | case TGSI_OPCODE_SLT: |
||
865 | c0 = 0.0; |
||
866 | c1 = 1.0; |
||
867 | break; |
||
868 | } |
||
869 | |||
870 | get_internal_temp(ctx, &tmp_dst, &tmp_src); |
||
871 | |||
872 | instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); |
||
873 | add_dst_reg(ctx, instr, &tmp_dst); |
||
874 | add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; |
||
875 | add_src_reg(ctx, instr, &inst->Src[1].Register); |
||
876 | |||
877 | instr = ir2_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0); |
||
878 | add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
||
879 | /* maybe should re-arrange the syntax some day, but |
||
880 | * in assembler/disassembler and what ir.c expects |
||
881 | * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 |
||
882 | */ |
||
883 | get_immediate(ctx, &tmp_const, fui(c0)); |
||
884 | add_src_reg(ctx, instr, &tmp_const); |
||
885 | add_src_reg(ctx, instr, &tmp_src); |
||
886 | get_immediate(ctx, &tmp_const, fui(c1)); |
||
887 | add_src_reg(ctx, instr, &tmp_const); |
||
888 | } |
||
889 | |||
890 | /* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ |
||
891 | static void |
||
892 | translate_lrp(struct fd2_compile_context *ctx, |
||
893 | struct tgsi_full_instruction *inst, |
||
894 | unsigned opc) |
||
895 | { |
||
896 | struct ir2_instruction *instr; |
||
897 | struct tgsi_dst_register tmp_dst1, tmp_dst2; |
||
898 | struct tgsi_src_register tmp_src1, tmp_src2; |
||
899 | struct tgsi_src_register tmp_const; |
||
900 | |||
901 | get_internal_temp(ctx, &tmp_dst1, &tmp_src1); |
||
902 | get_internal_temp(ctx, &tmp_dst2, &tmp_src2); |
||
903 | |||
904 | get_immediate(ctx, &tmp_const, fui(1.0)); |
||
905 | |||
906 | /* tmp1 = (a * b) */ |
||
907 | instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
||
908 | add_dst_reg(ctx, instr, &tmp_dst1); |
||
909 | add_src_reg(ctx, instr, &inst->Src[0].Register); |
||
910 | add_src_reg(ctx, instr, &inst->Src[1].Register); |
||
911 | |||
912 | /* tmp2 = (1 - a) */ |
||
913 | instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); |
||
914 | add_dst_reg(ctx, instr, &tmp_dst2); |
||
915 | add_src_reg(ctx, instr, &tmp_const); |
||
916 | add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; |
||
917 | |||
918 | /* tmp2 = tmp2 * c */ |
||
919 | instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0); |
||
920 | add_dst_reg(ctx, instr, &tmp_dst2); |
||
921 | add_src_reg(ctx, instr, &tmp_src2); |
||
922 | add_src_reg(ctx, instr, &inst->Src[2].Register); |
||
923 | |||
924 | /* dst = tmp1 + tmp2 */ |
||
925 | instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0); |
||
926 | add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
||
927 | add_src_reg(ctx, instr, &tmp_src1); |
||
928 | add_src_reg(ctx, instr, &tmp_src2); |
||
929 | } |
||
930 | |||
931 | static void |
||
932 | translate_trig(struct fd2_compile_context *ctx, |
||
933 | struct tgsi_full_instruction *inst, |
||
934 | unsigned opc) |
||
935 | { |
||
936 | struct ir2_instruction *instr; |
||
937 | struct tgsi_dst_register tmp_dst; |
||
938 | struct tgsi_src_register tmp_src; |
||
939 | struct tgsi_src_register tmp_const; |
||
940 | instr_scalar_opc_t op; |
||
941 | |||
942 | switch (opc) { |
||
943 | default: |
||
944 | assert(0); |
||
945 | case TGSI_OPCODE_SIN: |
||
946 | op = SIN; |
||
947 | break; |
||
948 | case TGSI_OPCODE_COS: |
||
949 | op = COS; |
||
950 | break; |
||
951 | } |
||
952 | |||
953 | get_internal_temp(ctx, &tmp_dst, &tmp_src); |
||
954 | |||
955 | tmp_dst.WriteMask = TGSI_WRITEMASK_X; |
||
956 | tmp_src.SwizzleX = tmp_src.SwizzleY = |
||
957 | tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X; |
||
958 | |||
959 | /* maybe should re-arrange the syntax some day, but |
||
960 | * in assembler/disassembler and what ir.c expects |
||
961 | * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1 |
||
962 | */ |
||
963 | instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); |
||
964 | add_dst_reg(ctx, instr, &tmp_dst); |
||
965 | get_immediate(ctx, &tmp_const, fui(0.5)); |
||
966 | add_src_reg(ctx, instr, &tmp_const); |
||
967 | add_src_reg(ctx, instr, &inst->Src[0].Register); |
||
968 | get_immediate(ctx, &tmp_const, fui(0.159155)); |
||
969 | add_src_reg(ctx, instr, &tmp_const); |
||
970 | |||
971 | instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0); |
||
972 | add_dst_reg(ctx, instr, &tmp_dst); |
||
973 | add_src_reg(ctx, instr, &tmp_src); |
||
974 | add_src_reg(ctx, instr, &tmp_src); |
||
975 | |||
976 | instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0); |
||
977 | add_dst_reg(ctx, instr, &tmp_dst); |
||
978 | get_immediate(ctx, &tmp_const, fui(-3.141593)); |
||
979 | add_src_reg(ctx, instr, &tmp_const); |
||
980 | add_src_reg(ctx, instr, &tmp_src); |
||
981 | get_immediate(ctx, &tmp_const, fui(6.283185)); |
||
982 | add_src_reg(ctx, instr, &tmp_const); |
||
983 | |||
984 | instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op); |
||
985 | add_regs_dummy_vector(instr); |
||
986 | add_dst_reg(ctx, instr, &inst->Dst[0].Register); |
||
987 | add_src_reg(ctx, instr, &tmp_src); |
||
988 | } |
||
989 | |||
990 | /* |
||
991 | * Main part of compiler/translator: |
||
992 | */ |
||
993 | |||
994 | static void |
||
995 | translate_instruction(struct fd2_compile_context *ctx, |
||
996 | struct tgsi_full_instruction *inst) |
||
997 | { |
||
998 | unsigned opc = inst->Instruction.Opcode; |
||
999 | struct ir2_instruction *instr; |
||
1000 | static struct ir2_cf *cf; |
||
1001 | |||
1002 | if (opc == TGSI_OPCODE_END) |
||
1003 | return; |
||
1004 | |||
1005 | if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { |
||
1006 | unsigned num = inst->Dst[0].Register.Index; |
||
1007 | /* seems like we need to ensure that position vs param/pixel |
||
1008 | * exports don't end up in the same EXEC clause.. easy way |
||
1009 | * to do this is force a new EXEC clause on first appearance |
||
1010 | * of an position or param/pixel export. |
||
1011 | */ |
||
1012 | if ((num == ctx->position) || (num == ctx->psize)) { |
||
1013 | if (ctx->num_position > 0) { |
||
1014 | ctx->cf = NULL; |
||
1015 | ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION, |
||
1016 | ctx->num_position - 1); |
||
1017 | ctx->num_position = 0; |
||
1018 | } |
||
1019 | } else { |
||
1020 | if (ctx->num_param > 0) { |
||
1021 | ctx->cf = NULL; |
||
1022 | ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL, |
||
1023 | ctx->num_param - 1); |
||
1024 | ctx->num_param = 0; |
||
1025 | } |
||
1026 | } |
||
1027 | } |
||
1028 | |||
1029 | cf = next_exec_cf(ctx); |
||
1030 | |||
1031 | /* TODO turn this into a table: */ |
||
1032 | switch (opc) { |
||
1033 | case TGSI_OPCODE_MOV: |
||
1034 | instr = ir2_instr_create_alu(cf, MAXv, ~0); |
||
1035 | add_regs_vector_1(ctx, inst, instr); |
||
1036 | break; |
||
1037 | case TGSI_OPCODE_RCP: |
||
1038 | instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE); |
||
1039 | add_regs_scalar_1(ctx, inst, instr); |
||
1040 | break; |
||
1041 | case TGSI_OPCODE_RSQ: |
||
1042 | instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE); |
||
1043 | add_regs_scalar_1(ctx, inst, instr); |
||
1044 | break; |
||
1045 | case TGSI_OPCODE_SQRT: |
||
1046 | instr = ir2_instr_create_alu(cf, ~0, SQRT_IEEE); |
||
1047 | add_regs_scalar_1(ctx, inst, instr); |
||
1048 | break; |
||
1049 | case TGSI_OPCODE_MUL: |
||
1050 | instr = ir2_instr_create_alu(cf, MULv, ~0); |
||
1051 | add_regs_vector_2(ctx, inst, instr); |
||
1052 | break; |
||
1053 | case TGSI_OPCODE_ADD: |
||
1054 | instr = ir2_instr_create_alu(cf, ADDv, ~0); |
||
1055 | add_regs_vector_2(ctx, inst, instr); |
||
1056 | break; |
||
1057 | case TGSI_OPCODE_DP3: |
||
1058 | instr = ir2_instr_create_alu(cf, DOT3v, ~0); |
||
1059 | add_regs_vector_2(ctx, inst, instr); |
||
1060 | break; |
||
1061 | case TGSI_OPCODE_DP4: |
||
1062 | instr = ir2_instr_create_alu(cf, DOT4v, ~0); |
||
1063 | add_regs_vector_2(ctx, inst, instr); |
||
1064 | break; |
||
1065 | case TGSI_OPCODE_MIN: |
||
1066 | instr = ir2_instr_create_alu(cf, MINv, ~0); |
||
1067 | add_regs_vector_2(ctx, inst, instr); |
||
1068 | break; |
||
1069 | case TGSI_OPCODE_MAX: |
||
1070 | instr = ir2_instr_create_alu(cf, MAXv, ~0); |
||
1071 | add_regs_vector_2(ctx, inst, instr); |
||
1072 | break; |
||
1073 | case TGSI_OPCODE_SLT: |
||
1074 | case TGSI_OPCODE_SGE: |
||
1075 | translate_sge_slt(ctx, inst, opc); |
||
1076 | break; |
||
1077 | case TGSI_OPCODE_MAD: |
||
1078 | instr = ir2_instr_create_alu(cf, MULADDv, ~0); |
||
1079 | add_regs_vector_3(ctx, inst, instr); |
||
1080 | break; |
||
1081 | case TGSI_OPCODE_LRP: |
||
1082 | translate_lrp(ctx, inst, opc); |
||
1083 | break; |
||
1084 | case TGSI_OPCODE_FRC: |
||
1085 | instr = ir2_instr_create_alu(cf, FRACv, ~0); |
||
1086 | add_regs_vector_1(ctx, inst, instr); |
||
1087 | break; |
||
1088 | case TGSI_OPCODE_FLR: |
||
1089 | instr = ir2_instr_create_alu(cf, FLOORv, ~0); |
||
1090 | add_regs_vector_1(ctx, inst, instr); |
||
1091 | break; |
||
1092 | case TGSI_OPCODE_EX2: |
||
1093 | instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE); |
||
1094 | add_regs_scalar_1(ctx, inst, instr); |
||
1095 | break; |
||
1096 | case TGSI_OPCODE_POW: |
||
1097 | translate_pow(ctx, inst); |
||
1098 | break; |
||
1099 | case TGSI_OPCODE_ABS: |
||
1100 | instr = ir2_instr_create_alu(cf, MAXv, ~0); |
||
1101 | add_regs_vector_1(ctx, inst, instr); |
||
1102 | instr->regs[1]->flags |= IR2_REG_NEGATE; /* src0 */ |
||
1103 | break; |
||
1104 | case TGSI_OPCODE_COS: |
||
1105 | case TGSI_OPCODE_SIN: |
||
1106 | translate_trig(ctx, inst, opc); |
||
1107 | break; |
||
1108 | case TGSI_OPCODE_TEX: |
||
1109 | case TGSI_OPCODE_TXP: |
||
1110 | translate_tex(ctx, inst, opc); |
||
1111 | break; |
||
1112 | case TGSI_OPCODE_CMP: |
||
1113 | instr = ir2_instr_create_alu(cf, CNDGTEv, ~0); |
||
1114 | add_regs_vector_3(ctx, inst, instr); |
||
1115 | // TODO this should be src0 if regs where in sane order.. |
||
1116 | instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */ |
||
1117 | break; |
||
1118 | case TGSI_OPCODE_IF: |
||
1119 | push_predicate(ctx, &inst->Src[0].Register); |
||
1120 | ctx->so->ir->pred = IR2_PRED_EQ; |
||
1121 | break; |
||
1122 | case TGSI_OPCODE_ELSE: |
||
1123 | ctx->so->ir->pred = IR2_PRED_NE; |
||
1124 | /* not sure if this is required in all cases, but blob compiler |
||
1125 | * won't combine EQ and NE in same CF: |
||
1126 | */ |
||
1127 | ctx->cf = NULL; |
||
1128 | break; |
||
1129 | case TGSI_OPCODE_ENDIF: |
||
1130 | pop_predicate(ctx); |
||
1131 | break; |
||
1132 | case TGSI_OPCODE_F2I: |
||
1133 | instr = ir2_instr_create_alu(cf, TRUNCv, ~0); |
||
1134 | add_regs_vector_1(ctx, inst, instr); |
||
1135 | break; |
||
1136 | default: |
||
1137 | DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc)); |
||
1138 | tgsi_dump(ctx->so->tokens, 0); |
||
1139 | assert(0); |
||
1140 | break; |
||
1141 | } |
||
1142 | |||
1143 | /* internal temporaries are only valid for the duration of a single |
||
1144 | * TGSI instruction: |
||
1145 | */ |
||
1146 | ctx->num_internal_temps = 0; |
||
1147 | } |
||
1148 | |||
1149 | static void |
||
1150 | compile_instructions(struct fd2_compile_context *ctx) |
||
1151 | { |
||
1152 | while (!tgsi_parse_end_of_tokens(&ctx->parser)) { |
||
1153 | tgsi_parse_token(&ctx->parser); |
||
1154 | |||
1155 | switch (ctx->parser.FullToken.Token.Type) { |
||
1156 | case TGSI_TOKEN_TYPE_INSTRUCTION: |
||
1157 | translate_instruction(ctx, |
||
1158 | &ctx->parser.FullToken.FullInstruction); |
||
1159 | break; |
||
1160 | default: |
||
1161 | break; |
||
1162 | } |
||
1163 | } |
||
1164 | |||
1165 | ctx->cf->cf_type = EXEC_END; |
||
1166 | } |
||
1167 | |||
1168 | int |
||
1169 | fd2_compile_shader(struct fd_program_stateobj *prog, |
||
1170 | struct fd2_shader_stateobj *so) |
||
1171 | { |
||
1172 | struct fd2_compile_context ctx; |
||
1173 | |||
1174 | ir2_shader_destroy(so->ir); |
||
1175 | so->ir = ir2_shader_create(); |
||
1176 | so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0; |
||
1177 | |||
1178 | if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK) |
||
1179 | return -1; |
||
1180 | |||
1181 | if (ctx.type == TGSI_PROCESSOR_VERTEX) { |
||
1182 | compile_vtx_fetch(&ctx); |
||
1183 | } else if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { |
||
1184 | prog->num_exports = 0; |
||
1185 | memset(prog->export_linkage, 0xff, |
||
1186 | sizeof(prog->export_linkage)); |
||
1187 | } |
||
1188 | |||
1189 | compile_instructions(&ctx); |
||
1190 | |||
1191 | compile_free(&ctx); |
||
1192 | |||
1193 | return 0; |
||
1194 | }><>=>>><>><>><>> |
||
1195 |