0,0 → 1,827 |
/* |
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* on the rights to use, copy, modify, merge, publish, distribute, sub |
* license, and/or sell copies of the Software, and to permit persons to whom |
* the Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
* USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Vadim Girlin |
*/ |
|
#define FBC_DEBUG 0 |
|
#if FBC_DEBUG |
#define FBC_DUMP(q) do { q } while (0) |
#else |
#define FBC_DUMP(q) |
#endif |
|
#include "sb_bc.h" |
#include "sb_shader.h" |
#include "sb_pass.h" |
|
namespace r600_sb { |
|
int bc_finalizer::run() { |
|
regions_vec &rv = sh.get_regions(); |
|
for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E; |
++I) { |
region_node *r = *I; |
|
assert(r); |
|
bool loop = r->is_loop(); |
|
if (loop) |
finalize_loop(r); |
else |
finalize_if(r); |
|
r->expand(); |
} |
|
run_on(sh.root); |
|
cf_peephole(); |
|
// workaround for some problems on r6xx/7xx |
// add ALU NOP to each vertex shader |
if (!ctx.is_egcm() && sh.target == TARGET_VS) { |
cf_node *c = sh.create_clause(NST_ALU_CLAUSE); |
|
alu_group_node *g = sh.create_alu_group(); |
|
alu_node *a = sh.create_alu(); |
a->bc.set_op(ALU_OP0_NOP); |
a->bc.last = 1; |
|
g->push_back(a); |
c->push_back(g); |
|
sh.root->push_back(c); |
|
c = sh.create_cf(CF_OP_NOP); |
sh.root->push_back(c); |
|
last_cf = c; |
} |
|
if (last_cf->bc.op_ptr->flags & CF_ALU) { |
last_cf = sh.create_cf(CF_OP_NOP); |
sh.root->push_back(last_cf); |
} |
|
if (ctx.is_cayman()) |
last_cf->insert_after(sh.create_cf(CF_OP_CF_END)); |
else |
last_cf->bc.end_of_program = 1; |
|
for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) { |
cf_node *le = last_export[t]; |
if (le) |
le->bc.set_op(CF_OP_EXPORT_DONE); |
} |
|
sh.ngpr = ngpr; |
sh.nstack = nstack; |
return 0; |
} |
|
void bc_finalizer::finalize_loop(region_node* r) { |
|
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); |
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); |
|
loop_start->jump_after(loop_end); |
loop_end->jump_after(loop_start); |
|
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); |
I != E; ++I) { |
depart_node *dep = *I; |
cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK); |
loop_break->jump(loop_end); |
dep->push_back(loop_break); |
dep->expand(); |
} |
|
// FIXME produces unnecessary LOOP_CONTINUE |
for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end(); |
I != E; ++I) { |
repeat_node *rep = *I; |
if (!(rep->parent == r && rep->prev == NULL)) { |
cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE); |
loop_cont->jump(loop_end); |
rep->push_back(loop_cont); |
} |
rep->expand(); |
} |
|
r->push_front(loop_start); |
r->push_back(loop_end); |
} |
|
void bc_finalizer::finalize_if(region_node* r) { |
|
update_nstack(r); |
|
// expecting the following control flow structure here: |
// - region |
// { |
// - depart/repeat 1 (it may be depart/repeat for some outer region) |
// { |
// - if |
// { |
// - depart/repeat 2 (possibly for outer region) |
// { |
// - some optional code |
// } |
// } |
// - optional <else> code> ... |
// } |
// } |
|
container_node *repdep1 = static_cast<container_node*>(r->first); |
assert(repdep1->is_depart() || repdep1->is_repeat()); |
|
if_node *n_if = static_cast<if_node*>(repdep1->first); |
|
if (n_if) { |
|
|
assert(n_if->is_if()); |
|
container_node *repdep2 = static_cast<container_node*>(n_if->first); |
assert(repdep2->is_depart() || repdep2->is_repeat()); |
|
cf_node *if_jump = sh.create_cf(CF_OP_JUMP); |
cf_node *if_pop = sh.create_cf(CF_OP_POP); |
|
if_pop->bc.pop_count = 1; |
if_pop->jump_after(if_pop); |
|
r->push_front(if_jump); |
r->push_back(if_pop); |
|
bool has_else = n_if->next; |
|
if (has_else) { |
cf_node *nelse = sh.create_cf(CF_OP_ELSE); |
n_if->insert_after(nelse); |
if_jump->jump(nelse); |
nelse->jump_after(if_pop); |
nelse->bc.pop_count = 1; |
|
} else { |
if_jump->jump_after(if_pop); |
if_jump->bc.pop_count = 1; |
} |
|
n_if->expand(); |
} |
|
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); |
I != E; ++I) { |
(*I)->expand(); |
} |
r->departs.clear(); |
assert(r->repeats.empty()); |
} |
|
void bc_finalizer::run_on(container_node* c) { |
|
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
node *n = *I; |
|
if (n->is_alu_group()) { |
finalize_alu_group(static_cast<alu_group_node*>(n)); |
} else { |
if (n->is_fetch_inst()) { |
finalize_fetch(static_cast<fetch_node*>(n)); |
} else if (n->is_cf_inst()) { |
finalize_cf(static_cast<cf_node*>(n)); |
} else if (n->is_alu_clause()) { |
|
} else if (n->is_fetch_clause()) { |
|
} else { |
assert(!"unexpected node"); |
} |
|
if (n->is_container()) |
run_on(static_cast<container_node*>(n)); |
} |
} |
} |
|
void bc_finalizer::finalize_alu_group(alu_group_node* g) { |
|
alu_node *last = NULL; |
|
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { |
alu_node *n = static_cast<alu_node*>(*I); |
unsigned slot = n->bc.slot; |
|
value *d = n->dst.empty() ? NULL : n->dst[0]; |
|
if (d && d->is_special_reg()) { |
assert(n->bc.op_ptr->flags & AF_MOVA); |
d = NULL; |
} |
|
sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0); |
|
if (d) { |
assert(fdst.chan() == slot || slot == SLOT_TRANS); |
} |
|
n->bc.dst_gpr = fdst.sel(); |
n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; |
|
|
if (d && d->is_rel() && d->rel && !d->rel->is_const()) { |
n->bc.dst_rel = 1; |
update_ngpr(d->array->gpr.sel() + d->array->array_size -1); |
} else { |
n->bc.dst_rel = 0; |
} |
|
n->bc.write_mask = d != NULL; |
n->bc.last = 0; |
|
if (n->bc.op_ptr->flags & AF_PRED) { |
n->bc.update_pred = (n->dst[1] != NULL); |
n->bc.update_exec_mask = (n->dst[2] != NULL); |
} |
|
// FIXME handle predication here |
n->bc.pred_sel = PRED_SEL_OFF; |
|
update_ngpr(n->bc.dst_gpr); |
|
finalize_alu_src(g, n); |
|
last = n; |
} |
|
last->bc.last = 1; |
} |
|
void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { |
vvec &sv = a->src; |
|
FBC_DUMP( |
sblog << "finalize_alu_src: "; |
dump::dump_op(a); |
sblog << "\n"; |
); |
|
unsigned si = 0; |
|
for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) { |
value *v = *I; |
assert(v); |
|
bc_alu_src &src = a->bc.src[si]; |
sel_chan sc; |
src.rel = 0; |
|
sel_chan gpr; |
|
switch (v->kind) { |
case VLK_REL_REG: |
sc = v->get_final_gpr(); |
src.sel = sc.sel(); |
src.chan = sc.chan(); |
if (!v->rel->is_const()) { |
src.rel = 1; |
update_ngpr(v->array->gpr.sel() + v->array->array_size -1); |
} else |
src.rel = 0; |
|
break; |
case VLK_REG: |
gpr = v->get_final_gpr(); |
src.sel = gpr.sel(); |
src.chan = gpr.chan(); |
update_ngpr(src.sel); |
break; |
case VLK_TEMP: |
src.sel = v->gpr.sel(); |
src.chan = v->gpr.chan(); |
update_ngpr(src.sel); |
break; |
case VLK_UNDEF: |
case VLK_CONST: { |
literal lv = v->literal_value; |
src.chan = 0; |
|
if (lv == literal(0)) |
src.sel = ALU_SRC_0; |
else if (lv == literal(0.5f)) |
src.sel = ALU_SRC_0_5; |
else if (lv == literal(1.0f)) |
src.sel = ALU_SRC_1; |
else if (lv == literal(1)) |
src.sel = ALU_SRC_1_INT; |
else if (lv == literal(-1)) |
src.sel = ALU_SRC_M_1_INT; |
else { |
src.sel = ALU_SRC_LITERAL; |
src.chan = g->literal_chan(lv); |
src.value = lv; |
} |
break; |
} |
case VLK_KCACHE: { |
cf_node *clause = static_cast<cf_node*>(g->parent); |
assert(clause->is_alu_clause()); |
sel_chan k = translate_kcache(clause, v); |
|
assert(k && "kcache translation failed"); |
|
src.sel = k.sel(); |
src.chan = k.chan(); |
break; |
} |
case VLK_PARAM: |
case VLK_SPECIAL_CONST: |
src.sel = v->select.sel(); |
src.chan = v->select.chan(); |
break; |
default: |
assert(!"unknown value kind"); |
break; |
} |
} |
|
while (si < 3) { |
a->bc.src[si++].sel = 0; |
} |
} |
|
void bc_finalizer::emit_set_grad(fetch_node* f) { |
|
assert(f->src.size() == 12); |
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; |
|
unsigned arg_start = 0; |
|
for (unsigned op = 0; op < 2; ++op) { |
fetch_node *n = sh.create_fetch(); |
n->bc.set_op(ops[op]); |
|
// FIXME extract this loop into a separate method and reuse it |
|
int reg = -1; |
|
arg_start += 4; |
|
for (unsigned chan = 0; chan < 4; ++chan) { |
|
n->bc.dst_sel[chan] = SEL_MASK; |
|
unsigned sel = SEL_MASK; |
|
value *v = f->src[arg_start + chan]; |
|
if (!v || v->is_undef()) { |
sel = SEL_MASK; |
} else if (v->is_const()) { |
literal l = v->literal_value; |
if (l == literal(0)) |
sel = SEL_0; |
else if (l == literal(1.0f)) |
sel = SEL_1; |
else { |
sblog << "invalid fetch constant operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
} else if (v->is_any_gpr()) { |
unsigned vreg = v->gpr.sel(); |
unsigned vchan = v->gpr.chan(); |
|
if (reg == -1) |
reg = vreg; |
else if ((unsigned)reg != vreg) { |
sblog << "invalid fetch source operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
sel = vchan; |
|
} else { |
sblog << "invalid fetch source operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
n->bc.src_sel[chan] = sel; |
} |
|
if (reg >= 0) |
update_ngpr(reg); |
|
n->bc.src_gpr = reg >= 0 ? reg : 0; |
|
f->insert_before(n); |
} |
|
} |
|
void bc_finalizer::finalize_fetch(fetch_node* f) { |
|
int reg = -1; |
|
// src |
|
unsigned src_count = 4; |
|
unsigned flags = f->bc.op_ptr->flags; |
|
if (flags & FF_VTX) { |
src_count = 1; |
} else if (flags & FF_USEGRAD) { |
emit_set_grad(f); |
} |
|
for (unsigned chan = 0; chan < src_count; ++chan) { |
|
unsigned sel = f->bc.src_sel[chan]; |
|
if (sel > SEL_W) |
continue; |
|
value *v = f->src[chan]; |
|
if (v->is_undef()) { |
sel = SEL_MASK; |
} else if (v->is_const()) { |
literal l = v->literal_value; |
if (l == literal(0)) |
sel = SEL_0; |
else if (l == literal(1.0f)) |
sel = SEL_1; |
else { |
sblog << "invalid fetch constant operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
} else if (v->is_any_gpr()) { |
unsigned vreg = v->gpr.sel(); |
unsigned vchan = v->gpr.chan(); |
|
if (reg == -1) |
reg = vreg; |
else if ((unsigned)reg != vreg) { |
sblog << "invalid fetch source operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
sel = vchan; |
|
} else { |
sblog << "invalid fetch source operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
f->bc.src_sel[chan] = sel; |
} |
|
if (reg >= 0) |
update_ngpr(reg); |
|
f->bc.src_gpr = reg >= 0 ? reg : 0; |
|
// dst |
|
reg = -1; |
|
unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK}; |
|
for (unsigned chan = 0; chan < 4; ++chan) { |
|
unsigned sel = f->bc.dst_sel[chan]; |
|
if (sel == SEL_MASK) |
continue; |
|
value *v = f->dst[chan]; |
if (!v) |
continue; |
|
if (v->is_any_gpr()) { |
unsigned vreg = v->gpr.sel(); |
unsigned vchan = v->gpr.chan(); |
|
if (reg == -1) |
reg = vreg; |
else if ((unsigned)reg != vreg) { |
sblog << "invalid fetch dst operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
dst_swz[vchan] = sel; |
|
} else { |
sblog << "invalid fetch dst operand " << chan << " "; |
dump::dump_op(f); |
sblog << "\n"; |
abort(); |
} |
|
} |
|
for (unsigned i = 0; i < 4; ++i) |
f->bc.dst_sel[i] = dst_swz[i]; |
|
assert(reg >= 0); |
|
if (reg >= 0) |
update_ngpr(reg); |
|
f->bc.dst_gpr = reg >= 0 ? reg : 0; |
} |
|
void bc_finalizer::finalize_cf(cf_node* c) { |
|
unsigned flags = c->bc.op_ptr->flags; |
|
if (flags & CF_CALL) { |
update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2); |
} |
|
c->bc.end_of_program = 0; |
last_cf = c; |
|
if (flags & CF_EXP) { |
c->bc.set_op(CF_OP_EXPORT); |
last_export[c->bc.type] = c; |
|
int reg = -1; |
|
for (unsigned chan = 0; chan < 4; ++chan) { |
|
unsigned sel = c->bc.sel[chan]; |
|
if (sel > SEL_W) |
continue; |
|
value *v = c->src[chan]; |
|
if (v->is_undef()) { |
sel = SEL_MASK; |
} else if (v->is_const()) { |
literal l = v->literal_value; |
if (l == literal(0)) |
sel = SEL_0; |
else if (l == literal(1.0f)) |
sel = SEL_1; |
else { |
sblog << "invalid export constant operand " << chan << " "; |
dump::dump_op(c); |
sblog << "\n"; |
abort(); |
} |
|
} else if (v->is_any_gpr()) { |
unsigned vreg = v->gpr.sel(); |
unsigned vchan = v->gpr.chan(); |
|
if (reg == -1) |
reg = vreg; |
else if ((unsigned)reg != vreg) { |
sblog << "invalid export source operand " << chan << " "; |
dump::dump_op(c); |
sblog << "\n"; |
abort(); |
} |
|
sel = vchan; |
|
} else { |
sblog << "invalid export source operand " << chan << " "; |
dump::dump_op(c); |
sblog << "\n"; |
abort(); |
} |
|
c->bc.sel[chan] = sel; |
} |
|
if (reg >= 0) |
update_ngpr(reg); |
|
c->bc.rw_gpr = reg >= 0 ? reg : 0; |
|
} else if (flags & CF_MEM) { |
|
int reg = -1; |
unsigned mask = 0; |
|
for (unsigned chan = 0; chan < 4; ++chan) { |
value *v = c->src[chan]; |
if (!v || v->is_undef()) |
continue; |
|
if (!v->is_any_gpr() || v->gpr.chan() != chan) { |
sblog << "invalid source operand " << chan << " "; |
dump::dump_op(c); |
sblog << "\n"; |
abort(); |
} |
unsigned vreg = v->gpr.sel(); |
if (reg == -1) |
reg = vreg; |
else if ((unsigned)reg != vreg) { |
sblog << "invalid source operand " << chan << " "; |
dump::dump_op(c); |
sblog << "\n"; |
abort(); |
} |
|
mask |= (1 << chan); |
} |
|
assert(reg >= 0 && mask); |
|
if (reg >= 0) |
update_ngpr(reg); |
|
c->bc.rw_gpr = reg >= 0 ? reg : 0; |
c->bc.comp_mask = mask; |
|
if ((flags & CF_RAT) && (c->bc.type & 1)) { |
|
reg = -1; |
|
for (unsigned chan = 0; chan < 4; ++chan) { |
value *v = c->src[4 + chan]; |
if (!v || v->is_undef()) |
continue; |
|
if (!v->is_any_gpr() || v->gpr.chan() != chan) { |
sblog << "invalid source operand " << chan << " "; |
dump::dump_op(c); |
sblog << "\n"; |
abort(); |
} |
unsigned vreg = v->gpr.sel(); |
if (reg == -1) |
reg = vreg; |
else if ((unsigned)reg != vreg) { |
sblog << "invalid source operand " << chan << " "; |
dump::dump_op(c); |
sblog << "\n"; |
abort(); |
} |
} |
|
assert(reg >= 0); |
|
if (reg >= 0) |
update_ngpr(reg); |
|
c->bc.index_gpr = reg >= 0 ? reg : 0; |
} |
|
|
|
} else { |
|
#if 0 |
if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) { |
c->bc.valid_pixel_mode = 1; |
} |
#endif |
|
} |
} |
|
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { |
unsigned sel = v->select.sel(); |
unsigned bank = sel >> 12; |
unsigned chan = v->select.chan(); |
static const unsigned kc_base[] = {128, 160, 256, 288}; |
|
sel &= 4095; |
|
unsigned line = sel >> 4; |
|
for (unsigned k = 0; k < 4; ++k) { |
bc_kcache &kc = alu->bc.kc[k]; |
|
if (kc.mode == KC_LOCK_NONE) |
break; |
|
if (kc.bank == bank && (kc.addr == line || |
(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) { |
|
sel = kc_base[k] + (sel - (kc.addr << 4)); |
|
return sel_chan(sel, chan); |
} |
} |
|
assert(!"kcache translation error"); |
return 0; |
} |
|
void bc_finalizer::update_ngpr(unsigned gpr) { |
if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr) |
ngpr = gpr + 1; |
} |
|
void bc_finalizer::update_nstack(region_node* r, unsigned add) { |
unsigned loops = 0; |
unsigned ifs = 0; |
|
while (r) { |
if (r->is_loop()) |
++loops; |
else |
++ifs; |
|
r = r->get_parent_region(); |
} |
|
unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add; |
|
// FIXME calculate more precisely |
if (ctx.is_evergreen()) { |
++stack_elements; |
} else { |
stack_elements += 2; |
if (ctx.is_cayman()) |
++stack_elements; |
} |
|
unsigned stack_entries = (stack_elements + 3) >> 2; |
|
if (nstack < stack_entries) |
nstack = stack_entries; |
} |
|
void bc_finalizer::cf_peephole() { |
|
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; |
I = N) { |
N = I; ++N; |
|
cf_node *c = static_cast<cf_node*>(*I); |
|
if (c->jump_after_target) { |
c->jump_target = static_cast<cf_node*>(c->jump_target->next); |
c->jump_after_target = false; |
} |
|
if (c->is_cf_op(CF_OP_POP)) { |
node *p = c->prev; |
if (p->is_alu_clause()) { |
cf_node *a = static_cast<cf_node*>(p); |
|
if (a->bc.op == CF_OP_ALU) { |
a->bc.set_op(CF_OP_ALU_POP_AFTER); |
c->remove(); |
} |
} |
} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) { |
// if JUMP is immediately followed by its jump target, |
// then JUMP is useless and we can eliminate it |
c->remove(); |
} |
} |
} |
|
} // namespace r600_sb |