/*
* Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Vadim Girlin
*/
#define FBC_DEBUG 0
#if FBC_DEBUG
#define FBC_DUMP(q) do { q } while (0)
#else
#define FBC_DUMP(q)
#endif
#include "sb_bc.h"
#include "sb_shader.h"
#include "sb_pass.h"
namespace r600_sb {
int bc_finalizer::run() {
regions_vec &rv = sh.get_regions();
for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
++I) {
region_node *r = *I;
assert(r);
bool loop = r->is_loop();
if (loop)
finalize_loop(r);
else
finalize_if(r);
r->expand();
}
run_on(sh.root);
cf_peephole();
// workaround for some problems on r6xx/7xx
// add ALU NOP to each vertex shader
if (!ctx.is_egcm() && sh.target == TARGET_VS) {
cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
alu_group_node *g = sh.create_alu_group();
alu_node *a = sh.create_alu();
a->bc.set_op(ALU_OP0_NOP);
a->bc.last = 1;
g->push_back(a);
c->push_back(g);
sh.root->push_back(c);
c = sh.create_cf(CF_OP_NOP);
sh.root->push_back(c);
last_cf = c;
}
if (last_cf->bc.op_ptr->flags & CF_ALU) {
last_cf = sh.create_cf(CF_OP_NOP);
sh.root->push_back(last_cf);
}
if (ctx.is_cayman())
last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
else
last_cf->bc.end_of_program = 1;
for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
cf_node *le = last_export[t];
if (le)
le->bc.set_op(CF_OP_EXPORT_DONE);
}
sh.ngpr = ngpr;
sh.nstack = nstack;
return 0;
}
void bc_finalizer::finalize_loop(region_node* r) {
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
loop_start->jump_after(loop_end);
loop_end->jump_after(loop_start);
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
I != E; ++I) {
depart_node *dep = *I;
cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
loop_break->jump(loop_end);
dep->push_back(loop_break);
dep->expand();
}
// FIXME produces unnecessary LOOP_CONTINUE
for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
I != E; ++I) {
repeat_node *rep = *I;
if (!(rep->parent == r && rep->prev == NULL)) {
cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
loop_cont->jump(loop_end);
rep->push_back(loop_cont);
}
rep->expand();
}
r->push_front(loop_start);
r->push_back(loop_end);
}
void bc_finalizer::finalize_if(region_node* r) {
update_nstack(r);
// expecting the following control flow structure here:
// - region
// {
// - depart/repeat 1 (it may be depart/repeat for some outer region)
// {
// - if
// {
// - depart/repeat 2 (possibly for outer region)
// {
// - some optional code
// }
// }
// - optional <else> code> ...
// }
// }
container_node *repdep1 = static_cast<container_node*>(r->first);
assert(repdep1->is_depart() || repdep1->is_repeat());
if_node *n_if = static_cast<if_node*>(repdep1->first);
if (n_if) {
assert(n_if->is_if());
container_node *repdep2 = static_cast<container_node*>(n_if->first);
assert(repdep2->is_depart() || repdep2->is_repeat());
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
cf_node *if_pop = sh.create_cf(CF_OP_POP);
if_pop->bc.pop_count = 1;
if_pop->jump_after(if_pop);
r->push_front(if_jump);
r->push_back(if_pop);
bool has_else = n_if->next;
if (has_else) {
cf_node *nelse = sh.create_cf(CF_OP_ELSE);
n_if->insert_after(nelse);
if_jump->jump(nelse);
nelse->jump_after(if_pop);
nelse->bc.pop_count = 1;
} else {
if_jump->jump_after(if_pop);
if_jump->bc.pop_count = 1;
}
n_if->expand();
}
for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
I != E; ++I) {
(*I)->expand();
}
r->departs.clear();
assert(r->repeats.empty());
}
void bc_finalizer::run_on(container_node* c) {
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
node *n = *I;
if (n->is_alu_group()) {
finalize_alu_group(static_cast<alu_group_node*>(n));
} else {
if (n->is_fetch_inst()) {
finalize_fetch(static_cast<fetch_node*>(n));
} else if (n->is_cf_inst()) {
finalize_cf(static_cast<cf_node*>(n));
} else if (n->is_alu_clause()) {
} else if (n->is_fetch_clause()) {
} else {
assert(!"unexpected node");
}
if (n->is_container())
run_on(static_cast<container_node*>(n));
}
}
}
void bc_finalizer::finalize_alu_group(alu_group_node* g) {
alu_node *last = NULL;
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
alu_node *n = static_cast<alu_node*>(*I);
unsigned slot = n->bc.slot;
value *d = n->dst.empty() ? NULL : n->dst[0];
if (d && d->is_special_reg()) {
assert(n->bc.op_ptr->flags & AF_MOVA);
d = NULL;
}
sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
if (d) {
assert(fdst.chan() == slot || slot == SLOT_TRANS);
}
n->bc.dst_gpr = fdst.sel();
n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
n->bc.dst_rel = 1;
update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
} else {
n->bc.dst_rel = 0;
}
n->bc.write_mask = d != NULL;
n->bc.last = 0;
if (n->bc.op_ptr->flags & AF_PRED) {
n->bc.update_pred = (n->dst[1] != NULL);
n->bc.update_exec_mask = (n->dst[2] != NULL);
}
// FIXME handle predication here
n->bc.pred_sel = PRED_SEL_OFF;
update_ngpr(n->bc.dst_gpr);
finalize_alu_src(g, n);
last = n;
}
last->bc.last = 1;
}
void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
vvec &sv = a->src;
FBC_DUMP(
sblog << "finalize_alu_src: ";
dump::dump_op(a);
sblog << "\n";
);
unsigned si = 0;
for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
value *v = *I;
assert(v);
bc_alu_src &src = a->bc.src[si];
sel_chan sc;
src.rel = 0;
sel_chan gpr;
switch (v->kind) {
case VLK_REL_REG:
sc = v->get_final_gpr();
src.sel = sc.sel();
src.chan = sc.chan();
if (!v->rel->is_const()) {
src.rel = 1;
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
} else
src.rel = 0;
break;
case VLK_REG:
gpr = v->get_final_gpr();
src.sel = gpr.sel();
src.chan = gpr.chan();
update_ngpr(src.sel);
break;
case VLK_TEMP:
src.sel = v->gpr.sel();
src.chan = v->gpr.chan();
update_ngpr(src.sel);
break;
case VLK_UNDEF:
case VLK_CONST: {
literal lv = v->literal_value;
src.chan = 0;
if (lv == literal(0))
src.sel = ALU_SRC_0;
else if (lv == literal(0.5f))
src.sel = ALU_SRC_0_5;
else if (lv == literal(1.0f))
src.sel = ALU_SRC_1;
else if (lv == literal(1))
src.sel = ALU_SRC_1_INT;
else if (lv == literal(-1))
src.sel = ALU_SRC_M_1_INT;
else {
src.sel = ALU_SRC_LITERAL;
src.chan = g->literal_chan(lv);
src.value = lv;
}
break;
}
case VLK_KCACHE: {
cf_node *clause = static_cast<cf_node*>(g->parent);
assert(clause->is_alu_clause());
sel_chan k = translate_kcache(clause, v);
assert(k && "kcache translation failed");
src.sel = k.sel();
src.chan = k.chan();
break;
}
case VLK_PARAM:
case VLK_SPECIAL_CONST:
src.sel = v->select.sel();
src.chan = v->select.chan();
break;
default:
assert(!"unknown value kind");
break;
}
}
while (si < 3) {
a->bc.src[si++].sel = 0;
}
}
void bc_finalizer::emit_set_grad(fetch_node* f) {
assert(f->src.size() == 12);
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
unsigned arg_start = 0;
for (unsigned op = 0; op < 2; ++op) {
fetch_node *n = sh.create_fetch();
n->bc.set_op(ops[op]);
// FIXME extract this loop into a separate method and reuse it
int reg = -1;
arg_start += 4;
for (unsigned chan = 0; chan < 4; ++chan) {
n->bc.dst_sel[chan] = SEL_MASK;
unsigned sel = SEL_MASK;
value *v = f->src[arg_start + chan];
if (!v || v->is_undef()) {
sel = SEL_MASK;
} else if (v->is_const()) {
literal l = v->literal_value;
if (l == literal(0))
sel = SEL_0;
else if (l == literal(1.0f))
sel = SEL_1;
else {
sblog << "invalid fetch constant operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
} else if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
sel = vchan;
} else {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
n->bc.src_sel[chan] = sel;
}
if (reg >= 0)
update_ngpr(reg);
n->bc.src_gpr = reg >= 0 ? reg : 0;
f->insert_before(n);
}
}
void bc_finalizer::finalize_fetch(fetch_node* f) {
int reg = -1;
// src
unsigned src_count = 4;
unsigned flags = f->bc.op_ptr->flags;
if (flags & FF_VTX) {
src_count = 1;
} else if (flags & FF_USEGRAD) {
emit_set_grad(f);
}
for (unsigned chan = 0; chan < src_count; ++chan) {
unsigned sel = f->bc.src_sel[chan];
if (sel > SEL_W)
continue;
value *v = f->src[chan];
if (v->is_undef()) {
sel = SEL_MASK;
} else if (v->is_const()) {
literal l = v->literal_value;
if (l == literal(0))
sel = SEL_0;
else if (l == literal(1.0f))
sel = SEL_1;
else {
sblog << "invalid fetch constant operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
} else if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
sel = vchan;
} else {
sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
f->bc.src_sel[chan] = sel;
}
if (reg >= 0)
update_ngpr(reg);
f->bc.src_gpr = reg >= 0 ? reg : 0;
// dst
reg = -1;
unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
for (unsigned chan = 0; chan < 4; ++chan) {
unsigned sel = f->bc.dst_sel[chan];
if (sel == SEL_MASK)
continue;
value *v = f->dst[chan];
if (!v)
continue;
if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid fetch dst operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
dst_swz[vchan] = sel;
} else {
sblog << "invalid fetch dst operand " << chan << " ";
dump::dump_op(f);
sblog << "\n";
abort();
}
}
for (unsigned i = 0; i < 4; ++i)
f->bc.dst_sel[i] = dst_swz[i];
assert(reg >= 0);
if (reg >= 0)
update_ngpr(reg);
f->bc.dst_gpr = reg >= 0 ? reg : 0;
}
void bc_finalizer::finalize_cf(cf_node* c) {
unsigned flags = c->bc.op_ptr->flags;
if (flags & CF_CALL) {
update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2);
}
c->bc.end_of_program = 0;
last_cf = c;
if (flags & CF_EXP) {
c->bc.set_op(CF_OP_EXPORT);
last_export[c->bc.type] = c;
int reg = -1;
for (unsigned chan = 0; chan < 4; ++chan) {
unsigned sel = c->bc.sel[chan];
if (sel > SEL_W)
continue;
value *v = c->src[chan];
if (v->is_undef()) {
sel = SEL_MASK;
} else if (v->is_const()) {
literal l = v->literal_value;
if (l == literal(0))
sel = SEL_0;
else if (l == literal(1.0f))
sel = SEL_1;
else {
sblog << "invalid export constant operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
} else if (v->is_any_gpr()) {
unsigned vreg = v->gpr.sel();
unsigned vchan = v->gpr.chan();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid export source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
sel = vchan;
} else {
sblog << "invalid export source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
c->bc.sel[chan] = sel;
}
if (reg >= 0)
update_ngpr(reg);
c->bc.rw_gpr = reg >= 0 ? reg : 0;
} else if (flags & CF_MEM) {
int reg = -1;
unsigned mask = 0;
for (unsigned chan = 0; chan < 4; ++chan) {
value *v = c->src[chan];
if (!v || v->is_undef())
continue;
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
unsigned vreg = v->gpr.sel();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
mask |= (1 << chan);
}
assert(reg >= 0 && mask);
if (reg >= 0)
update_ngpr(reg);
c->bc.rw_gpr = reg >= 0 ? reg : 0;
c->bc.comp_mask = mask;
if ((flags & CF_RAT) && (c->bc.type & 1)) {
reg = -1;
for (unsigned chan = 0; chan < 4; ++chan) {
value *v = c->src[4 + chan];
if (!v || v->is_undef())
continue;
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
unsigned vreg = v->gpr.sel();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
sblog << "\n";
abort();
}
}
assert(reg >= 0);
if (reg >= 0)
update_ngpr(reg);
c->bc.index_gpr = reg >= 0 ? reg : 0;
}
} else {
#if 0
if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) {
c->bc.valid_pixel_mode = 1;
}
#endif
}
}
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
unsigned sel = v->select.sel();
unsigned bank = sel >> 12;
unsigned chan = v->select.chan();
static const unsigned kc_base[] = {128, 160, 256, 288};
sel &= 4095;
unsigned line = sel >> 4;
for (unsigned k = 0; k < 4; ++k) {
bc_kcache &kc = alu->bc.kc[k];
if (kc.mode == KC_LOCK_NONE)
break;
if (kc.bank == bank && (kc.addr == line ||
(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
sel = kc_base[k] + (sel - (kc.addr << 4));
return sel_chan(sel, chan);
}
}
assert(!"kcache translation error");
return 0;
}
void bc_finalizer::update_ngpr(unsigned gpr) {
if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
ngpr = gpr + 1;
}
void bc_finalizer::update_nstack(region_node* r, unsigned add) {
unsigned loops = 0;
unsigned ifs = 0;
while (r) {
if (r->is_loop())
++loops;
else
++ifs;
r = r->get_parent_region();
}
unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add;
// FIXME calculate more precisely
if (ctx.is_evergreen()) {
++stack_elements;
} else {
stack_elements += 2;
if (ctx.is_cayman())
++stack_elements;
}
unsigned stack_entries = (stack_elements + 3) >> 2;
if (nstack < stack_entries)
nstack = stack_entries;
}
void bc_finalizer::cf_peephole() {
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
I = N) {
N = I; ++N;
cf_node *c = static_cast<cf_node*>(*I);
if (c->jump_after_target) {
c->jump_target = static_cast<cf_node*>(c->jump_target->next);
c->jump_after_target = false;
}
if (c->is_cf_op(CF_OP_POP)) {
node *p = c->prev;
if (p->is_alu_clause()) {
cf_node *a = static_cast<cf_node*>(p);
if (a->bc.op == CF_OP_ALU) {
a->bc.set_op(CF_OP_ALU_POP_AFTER);
c->remove();
}
}
} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
// if JUMP is immediately followed by its jump target,
// then JUMP is useless and we can eliminate it
c->remove();
}
}
}
} // namespace r600_sb