Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5563 | serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #define FBC_DEBUG 0 |
||
28 | |||
29 | #if FBC_DEBUG |
||
30 | #define FBC_DUMP(q) do { q } while (0) |
||
31 | #else |
||
32 | #define FBC_DUMP(q) |
||
33 | #endif |
||
34 | |||
35 | #include "sb_bc.h" |
||
36 | #include "sb_shader.h" |
||
37 | #include "sb_pass.h" |
||
38 | |||
39 | namespace r600_sb { |
||
40 | |||
41 | int bc_finalizer::run() { |
||
42 | |||
43 | regions_vec &rv = sh.get_regions(); |
||
44 | |||
45 | for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E; |
||
46 | ++I) { |
||
47 | region_node *r = *I; |
||
48 | |||
49 | assert(r); |
||
50 | |||
51 | bool loop = r->is_loop(); |
||
52 | |||
53 | if (loop) |
||
54 | finalize_loop(r); |
||
55 | else |
||
56 | finalize_if(r); |
||
57 | |||
58 | r->expand(); |
||
59 | } |
||
60 | |||
61 | run_on(sh.root); |
||
62 | |||
63 | cf_peephole(); |
||
64 | |||
65 | // workaround for some problems on r6xx/7xx |
||
66 | // add ALU NOP to each vertex shader |
||
67 | if (!ctx.is_egcm() && sh.target == TARGET_VS) { |
||
68 | cf_node *c = sh.create_clause(NST_ALU_CLAUSE); |
||
69 | |||
70 | alu_group_node *g = sh.create_alu_group(); |
||
71 | |||
72 | alu_node *a = sh.create_alu(); |
||
73 | a->bc.set_op(ALU_OP0_NOP); |
||
74 | a->bc.last = 1; |
||
75 | |||
76 | g->push_back(a); |
||
77 | c->push_back(g); |
||
78 | |||
79 | sh.root->push_back(c); |
||
80 | |||
81 | c = sh.create_cf(CF_OP_NOP); |
||
82 | sh.root->push_back(c); |
||
83 | |||
84 | last_cf = c; |
||
85 | } |
||
86 | |||
87 | if (last_cf->bc.op_ptr->flags & CF_ALU) { |
||
88 | last_cf = sh.create_cf(CF_OP_NOP); |
||
89 | sh.root->push_back(last_cf); |
||
90 | } |
||
91 | |||
92 | if (ctx.is_cayman()) |
||
93 | last_cf->insert_after(sh.create_cf(CF_OP_CF_END)); |
||
94 | else |
||
95 | last_cf->bc.end_of_program = 1; |
||
96 | |||
97 | for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) { |
||
98 | cf_node *le = last_export[t]; |
||
99 | if (le) |
||
100 | le->bc.set_op(CF_OP_EXPORT_DONE); |
||
101 | } |
||
102 | |||
103 | sh.ngpr = ngpr; |
||
104 | sh.nstack = nstack; |
||
105 | return 0; |
||
106 | } |
||
107 | |||
108 | void bc_finalizer::finalize_loop(region_node* r) { |
||
109 | |||
110 | cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); |
||
111 | cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); |
||
112 | |||
113 | loop_start->jump_after(loop_end); |
||
114 | loop_end->jump_after(loop_start); |
||
115 | |||
116 | for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); |
||
117 | I != E; ++I) { |
||
118 | depart_node *dep = *I; |
||
119 | cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK); |
||
120 | loop_break->jump(loop_end); |
||
121 | dep->push_back(loop_break); |
||
122 | dep->expand(); |
||
123 | } |
||
124 | |||
125 | // FIXME produces unnecessary LOOP_CONTINUE |
||
126 | for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end(); |
||
127 | I != E; ++I) { |
||
128 | repeat_node *rep = *I; |
||
129 | if (!(rep->parent == r && rep->prev == NULL)) { |
||
130 | cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE); |
||
131 | loop_cont->jump(loop_end); |
||
132 | rep->push_back(loop_cont); |
||
133 | } |
||
134 | rep->expand(); |
||
135 | } |
||
136 | |||
137 | r->push_front(loop_start); |
||
138 | r->push_back(loop_end); |
||
139 | } |
||
140 | |||
141 | void bc_finalizer::finalize_if(region_node* r) { |
||
142 | |||
143 | update_nstack(r); |
||
144 | |||
145 | // expecting the following control flow structure here: |
||
146 | // - region |
||
147 | // { |
||
148 | // - depart/repeat 1 (it may be depart/repeat for some outer region) |
||
149 | // { |
||
150 | // - if |
||
151 | // { |
||
152 | // - depart/repeat 2 (possibly for outer region) |
||
153 | // { |
||
154 | // - some optional code |
||
155 | // } |
||
156 | // } |
||
157 | // - optional |
||
158 | // } |
||
159 | // } |
||
160 | |||
161 | container_node *repdep1 = static_cast |
||
162 | assert(repdep1->is_depart() || repdep1->is_repeat()); |
||
163 | |||
164 | if_node *n_if = static_cast |
||
165 | |||
166 | if (n_if) { |
||
167 | |||
168 | |||
169 | assert(n_if->is_if()); |
||
170 | |||
171 | container_node *repdep2 = static_cast |
||
172 | assert(repdep2->is_depart() || repdep2->is_repeat()); |
||
173 | |||
174 | cf_node *if_jump = sh.create_cf(CF_OP_JUMP); |
||
175 | cf_node *if_pop = sh.create_cf(CF_OP_POP); |
||
176 | |||
177 | if_pop->bc.pop_count = 1; |
||
178 | if_pop->jump_after(if_pop); |
||
179 | |||
180 | r->push_front(if_jump); |
||
181 | r->push_back(if_pop); |
||
182 | |||
183 | bool has_else = n_if->next; |
||
184 | |||
185 | if (has_else) { |
||
186 | cf_node *nelse = sh.create_cf(CF_OP_ELSE); |
||
187 | n_if->insert_after(nelse); |
||
188 | if_jump->jump(nelse); |
||
189 | nelse->jump_after(if_pop); |
||
190 | nelse->bc.pop_count = 1; |
||
191 | |||
192 | } else { |
||
193 | if_jump->jump_after(if_pop); |
||
194 | if_jump->bc.pop_count = 1; |
||
195 | } |
||
196 | |||
197 | n_if->expand(); |
||
198 | } |
||
199 | |||
200 | for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); |
||
201 | I != E; ++I) { |
||
202 | (*I)->expand(); |
||
203 | } |
||
204 | r->departs.clear(); |
||
205 | assert(r->repeats.empty()); |
||
206 | } |
||
207 | |||
208 | void bc_finalizer::run_on(container_node* c) { |
||
209 | |||
210 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
211 | node *n = *I; |
||
212 | |||
213 | if (n->is_alu_group()) { |
||
214 | finalize_alu_group(static_cast |
||
215 | } else { |
||
216 | if (n->is_fetch_inst()) { |
||
217 | finalize_fetch(static_cast |
||
218 | } else if (n->is_cf_inst()) { |
||
219 | finalize_cf(static_cast |
||
220 | } else if (n->is_alu_clause()) { |
||
221 | |||
222 | } else if (n->is_fetch_clause()) { |
||
223 | |||
224 | } else { |
||
225 | assert(!"unexpected node"); |
||
226 | } |
||
227 | |||
228 | if (n->is_container()) |
||
229 | run_on(static_cast |
||
230 | } |
||
231 | } |
||
232 | } |
||
233 | |||
234 | void bc_finalizer::finalize_alu_group(alu_group_node* g) { |
||
235 | |||
236 | alu_node *last = NULL; |
||
237 | |||
238 | for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { |
||
239 | alu_node *n = static_cast |
||
240 | unsigned slot = n->bc.slot; |
||
241 | |||
242 | value *d = n->dst.empty() ? NULL : n->dst[0]; |
||
243 | |||
244 | if (d && d->is_special_reg()) { |
||
245 | assert(n->bc.op_ptr->flags & AF_MOVA); |
||
246 | d = NULL; |
||
247 | } |
||
248 | |||
249 | sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0); |
||
250 | |||
251 | if (d) { |
||
252 | assert(fdst.chan() == slot || slot == SLOT_TRANS); |
||
253 | } |
||
254 | |||
255 | n->bc.dst_gpr = fdst.sel(); |
||
256 | n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; |
||
257 | |||
258 | |||
259 | if (d && d->is_rel() && d->rel && !d->rel->is_const()) { |
||
260 | n->bc.dst_rel = 1; |
||
261 | update_ngpr(d->array->gpr.sel() + d->array->array_size -1); |
||
262 | } else { |
||
263 | n->bc.dst_rel = 0; |
||
264 | } |
||
265 | |||
266 | n->bc.write_mask = d != NULL; |
||
267 | n->bc.last = 0; |
||
268 | |||
269 | if (n->bc.op_ptr->flags & AF_PRED) { |
||
270 | n->bc.update_pred = (n->dst[1] != NULL); |
||
271 | n->bc.update_exec_mask = (n->dst[2] != NULL); |
||
272 | } |
||
273 | |||
274 | // FIXME handle predication here |
||
275 | n->bc.pred_sel = PRED_SEL_OFF; |
||
276 | |||
277 | update_ngpr(n->bc.dst_gpr); |
||
278 | |||
279 | finalize_alu_src(g, n); |
||
280 | |||
281 | last = n; |
||
282 | } |
||
283 | |||
284 | last->bc.last = 1; |
||
285 | } |
||
286 | |||
287 | void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { |
||
288 | vvec &sv = a->src; |
||
289 | |||
290 | FBC_DUMP( |
||
291 | sblog << "finalize_alu_src: "; |
||
292 | dump::dump_op(a); |
||
293 | sblog << "\n"; |
||
294 | ); |
||
295 | |||
296 | unsigned si = 0; |
||
297 | |||
298 | for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) { |
||
299 | value *v = *I; |
||
300 | assert(v); |
||
301 | |||
302 | bc_alu_src &src = a->bc.src[si]; |
||
303 | sel_chan sc; |
||
304 | src.rel = 0; |
||
305 | |||
306 | sel_chan gpr; |
||
307 | |||
308 | switch (v->kind) { |
||
309 | case VLK_REL_REG: |
||
310 | sc = v->get_final_gpr(); |
||
311 | src.sel = sc.sel(); |
||
312 | src.chan = sc.chan(); |
||
313 | if (!v->rel->is_const()) { |
||
314 | src.rel = 1; |
||
315 | update_ngpr(v->array->gpr.sel() + v->array->array_size -1); |
||
316 | } else |
||
317 | src.rel = 0; |
||
318 | |||
319 | break; |
||
320 | case VLK_REG: |
||
321 | gpr = v->get_final_gpr(); |
||
322 | src.sel = gpr.sel(); |
||
323 | src.chan = gpr.chan(); |
||
324 | update_ngpr(src.sel); |
||
325 | break; |
||
326 | case VLK_TEMP: |
||
327 | src.sel = v->gpr.sel(); |
||
328 | src.chan = v->gpr.chan(); |
||
329 | update_ngpr(src.sel); |
||
330 | break; |
||
331 | case VLK_UNDEF: |
||
332 | case VLK_CONST: { |
||
333 | literal lv = v->literal_value; |
||
334 | src.chan = 0; |
||
335 | |||
336 | if (lv == literal(0)) |
||
337 | src.sel = ALU_SRC_0; |
||
338 | else if (lv == literal(0.5f)) |
||
339 | src.sel = ALU_SRC_0_5; |
||
340 | else if (lv == literal(1.0f)) |
||
341 | src.sel = ALU_SRC_1; |
||
342 | else if (lv == literal(1)) |
||
343 | src.sel = ALU_SRC_1_INT; |
||
344 | else if (lv == literal(-1)) |
||
345 | src.sel = ALU_SRC_M_1_INT; |
||
346 | else { |
||
347 | src.sel = ALU_SRC_LITERAL; |
||
348 | src.chan = g->literal_chan(lv); |
||
349 | src.value = lv; |
||
350 | } |
||
351 | break; |
||
352 | } |
||
353 | case VLK_KCACHE: { |
||
354 | cf_node *clause = static_cast |
||
355 | assert(clause->is_alu_clause()); |
||
356 | sel_chan k = translate_kcache(clause, v); |
||
357 | |||
358 | assert(k && "kcache translation failed"); |
||
359 | |||
360 | src.sel = k.sel(); |
||
361 | src.chan = k.chan(); |
||
362 | break; |
||
363 | } |
||
364 | case VLK_PARAM: |
||
365 | case VLK_SPECIAL_CONST: |
||
366 | src.sel = v->select.sel(); |
||
367 | src.chan = v->select.chan(); |
||
368 | break; |
||
369 | default: |
||
370 | assert(!"unknown value kind"); |
||
371 | break; |
||
372 | } |
||
373 | } |
||
374 | |||
375 | while (si < 3) { |
||
376 | a->bc.src[si++].sel = 0; |
||
377 | } |
||
378 | } |
||
379 | |||
380 | void bc_finalizer::emit_set_grad(fetch_node* f) { |
||
381 | |||
382 | assert(f->src.size() == 12); |
||
383 | unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; |
||
384 | |||
385 | unsigned arg_start = 0; |
||
386 | |||
387 | for (unsigned op = 0; op < 2; ++op) { |
||
388 | fetch_node *n = sh.create_fetch(); |
||
389 | n->bc.set_op(ops[op]); |
||
390 | |||
391 | // FIXME extract this loop into a separate method and reuse it |
||
392 | |||
393 | int reg = -1; |
||
394 | |||
395 | arg_start += 4; |
||
396 | |||
397 | for (unsigned chan = 0; chan < 4; ++chan) { |
||
398 | |||
399 | n->bc.dst_sel[chan] = SEL_MASK; |
||
400 | |||
401 | unsigned sel = SEL_MASK; |
||
402 | |||
403 | value *v = f->src[arg_start + chan]; |
||
404 | |||
405 | if (!v || v->is_undef()) { |
||
406 | sel = SEL_MASK; |
||
407 | } else if (v->is_const()) { |
||
408 | literal l = v->literal_value; |
||
409 | if (l == literal(0)) |
||
410 | sel = SEL_0; |
||
411 | else if (l == literal(1.0f)) |
||
412 | sel = SEL_1; |
||
413 | else { |
||
414 | sblog << "invalid fetch constant operand " << chan << " "; |
||
415 | dump::dump_op(f); |
||
416 | sblog << "\n"; |
||
417 | abort(); |
||
418 | } |
||
419 | |||
420 | } else if (v->is_any_gpr()) { |
||
421 | unsigned vreg = v->gpr.sel(); |
||
422 | unsigned vchan = v->gpr.chan(); |
||
423 | |||
424 | if (reg == -1) |
||
425 | reg = vreg; |
||
426 | else if ((unsigned)reg != vreg) { |
||
427 | sblog << "invalid fetch source operand " << chan << " "; |
||
428 | dump::dump_op(f); |
||
429 | sblog << "\n"; |
||
430 | abort(); |
||
431 | } |
||
432 | |||
433 | sel = vchan; |
||
434 | |||
435 | } else { |
||
436 | sblog << "invalid fetch source operand " << chan << " "; |
||
437 | dump::dump_op(f); |
||
438 | sblog << "\n"; |
||
439 | abort(); |
||
440 | } |
||
441 | |||
442 | n->bc.src_sel[chan] = sel; |
||
443 | } |
||
444 | |||
445 | if (reg >= 0) |
||
446 | update_ngpr(reg); |
||
447 | |||
448 | n->bc.src_gpr = reg >= 0 ? reg : 0; |
||
449 | |||
450 | f->insert_before(n); |
||
451 | } |
||
452 | |||
453 | } |
||
454 | |||
455 | void bc_finalizer::finalize_fetch(fetch_node* f) { |
||
456 | |||
457 | int reg = -1; |
||
458 | |||
459 | // src |
||
460 | |||
461 | unsigned src_count = 4; |
||
462 | |||
463 | unsigned flags = f->bc.op_ptr->flags; |
||
464 | |||
465 | if (flags & FF_VTX) { |
||
466 | src_count = 1; |
||
467 | } else if (flags & FF_USEGRAD) { |
||
468 | emit_set_grad(f); |
||
469 | } |
||
470 | |||
471 | for (unsigned chan = 0; chan < src_count; ++chan) { |
||
472 | |||
473 | unsigned sel = f->bc.src_sel[chan]; |
||
474 | |||
475 | if (sel > SEL_W) |
||
476 | continue; |
||
477 | |||
478 | value *v = f->src[chan]; |
||
479 | |||
480 | if (v->is_undef()) { |
||
481 | sel = SEL_MASK; |
||
482 | } else if (v->is_const()) { |
||
483 | literal l = v->literal_value; |
||
484 | if (l == literal(0)) |
||
485 | sel = SEL_0; |
||
486 | else if (l == literal(1.0f)) |
||
487 | sel = SEL_1; |
||
488 | else { |
||
489 | sblog << "invalid fetch constant operand " << chan << " "; |
||
490 | dump::dump_op(f); |
||
491 | sblog << "\n"; |
||
492 | abort(); |
||
493 | } |
||
494 | |||
495 | } else if (v->is_any_gpr()) { |
||
496 | unsigned vreg = v->gpr.sel(); |
||
497 | unsigned vchan = v->gpr.chan(); |
||
498 | |||
499 | if (reg == -1) |
||
500 | reg = vreg; |
||
501 | else if ((unsigned)reg != vreg) { |
||
502 | sblog << "invalid fetch source operand " << chan << " "; |
||
503 | dump::dump_op(f); |
||
504 | sblog << "\n"; |
||
505 | abort(); |
||
506 | } |
||
507 | |||
508 | sel = vchan; |
||
509 | |||
510 | } else { |
||
511 | sblog << "invalid fetch source operand " << chan << " "; |
||
512 | dump::dump_op(f); |
||
513 | sblog << "\n"; |
||
514 | abort(); |
||
515 | } |
||
516 | |||
517 | f->bc.src_sel[chan] = sel; |
||
518 | } |
||
519 | |||
520 | if (reg >= 0) |
||
521 | update_ngpr(reg); |
||
522 | |||
523 | f->bc.src_gpr = reg >= 0 ? reg : 0; |
||
524 | |||
525 | // dst |
||
526 | |||
527 | reg = -1; |
||
528 | |||
529 | unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK}; |
||
530 | |||
531 | for (unsigned chan = 0; chan < 4; ++chan) { |
||
532 | |||
533 | unsigned sel = f->bc.dst_sel[chan]; |
||
534 | |||
535 | if (sel == SEL_MASK) |
||
536 | continue; |
||
537 | |||
538 | value *v = f->dst[chan]; |
||
539 | if (!v) |
||
540 | continue; |
||
541 | |||
542 | if (v->is_any_gpr()) { |
||
543 | unsigned vreg = v->gpr.sel(); |
||
544 | unsigned vchan = v->gpr.chan(); |
||
545 | |||
546 | if (reg == -1) |
||
547 | reg = vreg; |
||
548 | else if ((unsigned)reg != vreg) { |
||
549 | sblog << "invalid fetch dst operand " << chan << " "; |
||
550 | dump::dump_op(f); |
||
551 | sblog << "\n"; |
||
552 | abort(); |
||
553 | } |
||
554 | |||
555 | dst_swz[vchan] = sel; |
||
556 | |||
557 | } else { |
||
558 | sblog << "invalid fetch dst operand " << chan << " "; |
||
559 | dump::dump_op(f); |
||
560 | sblog << "\n"; |
||
561 | abort(); |
||
562 | } |
||
563 | |||
564 | } |
||
565 | |||
566 | for (unsigned i = 0; i < 4; ++i) |
||
567 | f->bc.dst_sel[i] = dst_swz[i]; |
||
568 | |||
569 | assert(reg >= 0); |
||
570 | |||
571 | if (reg >= 0) |
||
572 | update_ngpr(reg); |
||
573 | |||
574 | f->bc.dst_gpr = reg >= 0 ? reg : 0; |
||
575 | } |
||
576 | |||
577 | void bc_finalizer::finalize_cf(cf_node* c) { |
||
578 | |||
579 | unsigned flags = c->bc.op_ptr->flags; |
||
580 | |||
581 | if (flags & CF_CALL) { |
||
582 | update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2); |
||
583 | } |
||
584 | |||
585 | c->bc.end_of_program = 0; |
||
586 | last_cf = c; |
||
587 | |||
588 | if (flags & CF_EXP) { |
||
589 | c->bc.set_op(CF_OP_EXPORT); |
||
590 | last_export[c->bc.type] = c; |
||
591 | |||
592 | int reg = -1; |
||
593 | |||
594 | for (unsigned chan = 0; chan < 4; ++chan) { |
||
595 | |||
596 | unsigned sel = c->bc.sel[chan]; |
||
597 | |||
598 | if (sel > SEL_W) |
||
599 | continue; |
||
600 | |||
601 | value *v = c->src[chan]; |
||
602 | |||
603 | if (v->is_undef()) { |
||
604 | sel = SEL_MASK; |
||
605 | } else if (v->is_const()) { |
||
606 | literal l = v->literal_value; |
||
607 | if (l == literal(0)) |
||
608 | sel = SEL_0; |
||
609 | else if (l == literal(1.0f)) |
||
610 | sel = SEL_1; |
||
611 | else { |
||
612 | sblog << "invalid export constant operand " << chan << " "; |
||
613 | dump::dump_op(c); |
||
614 | sblog << "\n"; |
||
615 | abort(); |
||
616 | } |
||
617 | |||
618 | } else if (v->is_any_gpr()) { |
||
619 | unsigned vreg = v->gpr.sel(); |
||
620 | unsigned vchan = v->gpr.chan(); |
||
621 | |||
622 | if (reg == -1) |
||
623 | reg = vreg; |
||
624 | else if ((unsigned)reg != vreg) { |
||
625 | sblog << "invalid export source operand " << chan << " "; |
||
626 | dump::dump_op(c); |
||
627 | sblog << "\n"; |
||
628 | abort(); |
||
629 | } |
||
630 | |||
631 | sel = vchan; |
||
632 | |||
633 | } else { |
||
634 | sblog << "invalid export source operand " << chan << " "; |
||
635 | dump::dump_op(c); |
||
636 | sblog << "\n"; |
||
637 | abort(); |
||
638 | } |
||
639 | |||
640 | c->bc.sel[chan] = sel; |
||
641 | } |
||
642 | |||
643 | if (reg >= 0) |
||
644 | update_ngpr(reg); |
||
645 | |||
646 | c->bc.rw_gpr = reg >= 0 ? reg : 0; |
||
647 | |||
648 | } else if (flags & CF_MEM) { |
||
649 | |||
650 | int reg = -1; |
||
651 | unsigned mask = 0; |
||
652 | |||
653 | for (unsigned chan = 0; chan < 4; ++chan) { |
||
654 | value *v = c->src[chan]; |
||
655 | if (!v || v->is_undef()) |
||
656 | continue; |
||
657 | |||
658 | if (!v->is_any_gpr() || v->gpr.chan() != chan) { |
||
659 | sblog << "invalid source operand " << chan << " "; |
||
660 | dump::dump_op(c); |
||
661 | sblog << "\n"; |
||
662 | abort(); |
||
663 | } |
||
664 | unsigned vreg = v->gpr.sel(); |
||
665 | if (reg == -1) |
||
666 | reg = vreg; |
||
667 | else if ((unsigned)reg != vreg) { |
||
668 | sblog << "invalid source operand " << chan << " "; |
||
669 | dump::dump_op(c); |
||
670 | sblog << "\n"; |
||
671 | abort(); |
||
672 | } |
||
673 | |||
674 | mask |= (1 << chan); |
||
675 | } |
||
676 | |||
677 | assert(reg >= 0 && mask); |
||
678 | |||
679 | if (reg >= 0) |
||
680 | update_ngpr(reg); |
||
681 | |||
682 | c->bc.rw_gpr = reg >= 0 ? reg : 0; |
||
683 | c->bc.comp_mask = mask; |
||
684 | |||
685 | if ((flags & CF_RAT) && (c->bc.type & 1)) { |
||
686 | |||
687 | reg = -1; |
||
688 | |||
689 | for (unsigned chan = 0; chan < 4; ++chan) { |
||
690 | value *v = c->src[4 + chan]; |
||
691 | if (!v || v->is_undef()) |
||
692 | continue; |
||
693 | |||
694 | if (!v->is_any_gpr() || v->gpr.chan() != chan) { |
||
695 | sblog << "invalid source operand " << chan << " "; |
||
696 | dump::dump_op(c); |
||
697 | sblog << "\n"; |
||
698 | abort(); |
||
699 | } |
||
700 | unsigned vreg = v->gpr.sel(); |
||
701 | if (reg == -1) |
||
702 | reg = vreg; |
||
703 | else if ((unsigned)reg != vreg) { |
||
704 | sblog << "invalid source operand " << chan << " "; |
||
705 | dump::dump_op(c); |
||
706 | sblog << "\n"; |
||
707 | abort(); |
||
708 | } |
||
709 | } |
||
710 | |||
711 | assert(reg >= 0); |
||
712 | |||
713 | if (reg >= 0) |
||
714 | update_ngpr(reg); |
||
715 | |||
716 | c->bc.index_gpr = reg >= 0 ? reg : 0; |
||
717 | } |
||
718 | |||
719 | |||
720 | |||
721 | } else { |
||
722 | |||
723 | #if 0 |
||
724 | if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) { |
||
725 | c->bc.valid_pixel_mode = 1; |
||
726 | } |
||
727 | #endif |
||
728 | |||
729 | } |
||
730 | } |
||
731 | |||
732 | sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { |
||
733 | unsigned sel = v->select.sel(); |
||
734 | unsigned bank = sel >> 12; |
||
735 | unsigned chan = v->select.chan(); |
||
736 | static const unsigned kc_base[] = {128, 160, 256, 288}; |
||
737 | |||
738 | sel &= 4095; |
||
739 | |||
740 | unsigned line = sel >> 4; |
||
741 | |||
742 | for (unsigned k = 0; k < 4; ++k) { |
||
743 | bc_kcache &kc = alu->bc.kc[k]; |
||
744 | |||
745 | if (kc.mode == KC_LOCK_NONE) |
||
746 | break; |
||
747 | |||
748 | if (kc.bank == bank && (kc.addr == line || |
||
749 | (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) { |
||
750 | |||
751 | sel = kc_base[k] + (sel - (kc.addr << 4)); |
||
752 | |||
753 | return sel_chan(sel, chan); |
||
754 | } |
||
755 | } |
||
756 | |||
757 | assert(!"kcache translation error"); |
||
758 | return 0; |
||
759 | } |
||
760 | |||
761 | void bc_finalizer::update_ngpr(unsigned gpr) { |
||
762 | if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr) |
||
763 | ngpr = gpr + 1; |
||
764 | } |
||
765 | |||
766 | void bc_finalizer::update_nstack(region_node* r, unsigned add) { |
||
767 | unsigned loops = 0; |
||
768 | unsigned ifs = 0; |
||
769 | |||
770 | while (r) { |
||
771 | if (r->is_loop()) |
||
772 | ++loops; |
||
773 | else |
||
774 | ++ifs; |
||
775 | |||
776 | r = r->get_parent_region(); |
||
777 | } |
||
778 | |||
779 | unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add; |
||
780 | |||
781 | // FIXME calculate more precisely |
||
782 | if (ctx.is_evergreen()) { |
||
783 | ++stack_elements; |
||
784 | } else { |
||
785 | stack_elements += 2; |
||
786 | if (ctx.is_cayman()) |
||
787 | ++stack_elements; |
||
788 | } |
||
789 | |||
790 | unsigned stack_entries = (stack_elements + 3) >> 2; |
||
791 | |||
792 | if (nstack < stack_entries) |
||
793 | nstack = stack_entries; |
||
794 | } |
||
795 | |||
796 | void bc_finalizer::cf_peephole() { |
||
797 | |||
798 | for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; |
||
799 | I = N) { |
||
800 | N = I; ++N; |
||
801 | |||
802 | cf_node *c = static_cast |
||
803 | |||
804 | if (c->jump_after_target) { |
||
805 | c->jump_target = static_cast |
||
806 | c->jump_after_target = false; |
||
807 | } |
||
808 | |||
809 | if (c->is_cf_op(CF_OP_POP)) { |
||
810 | node *p = c->prev; |
||
811 | if (p->is_alu_clause()) { |
||
812 | cf_node *a = static_cast |
||
813 | |||
814 | if (a->bc.op == CF_OP_ALU) { |
||
815 | a->bc.set_op(CF_OP_ALU_POP_AFTER); |
||
816 | c->remove(); |
||
817 | } |
||
818 | } |
||
819 | } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) { |
||
820 | // if JUMP is immediately followed by its jump target, |
||
821 | // then JUMP is useless and we can eliminate it |
||
822 | c->remove(); |
||
823 | } |
||
824 | } |
||
825 | } |
||
826 | |||
827 | } // namespace r600_sb>>><>>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>>>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>>>>><>><>>> |