Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2011 Christoph Bumiller
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "codegen/nv50_ir.h"
  24. #include "codegen/nv50_ir_target.h"
  25. #include "codegen/nv50_ir_driver.h"
  26.  
  27. extern "C" {
  28. #include "nouveau_debug.h"
  29. #include "nv50/nv50_program.h"
  30. }
  31.  
  32. namespace nv50_ir {
  33.  
  34. Modifier::Modifier(operation op)
  35. {
  36.    switch (op) {
  37.    case OP_NEG: bits = NV50_IR_MOD_NEG; break;
  38.    case OP_ABS: bits = NV50_IR_MOD_ABS; break;
  39.    case OP_SAT: bits = NV50_IR_MOD_SAT; break;
  40.    case OP_NOT: bits = NV50_IR_MOD_NOT; break;
  41.    default:
  42.       bits = 0;
  43.       break;
  44.    }
  45. }
  46.  
  47. Modifier Modifier::operator*(const Modifier m) const
  48. {
  49.    unsigned int a, b, c;
  50.  
  51.    b = m.bits;
  52.    if (this->bits & NV50_IR_MOD_ABS)
  53.       b &= ~NV50_IR_MOD_NEG;
  54.  
  55.    a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
  56.    c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
  57.  
  58.    return Modifier(a | c);
  59. }
  60.  
  61. ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
  62. {
  63.    indirect[0] = -1;
  64.    indirect[1] = -1;
  65.    usedAsPtr = false;
  66.    set(v);
  67. }
  68.  
  69. ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
  70. {
  71.    set(ref);
  72.    usedAsPtr = ref.usedAsPtr;
  73. }
  74.  
  75. ValueRef::~ValueRef()
  76. {
  77.    this->set(NULL);
  78. }
  79.  
  80. bool ValueRef::getImmediate(ImmediateValue &imm) const
  81. {
  82.    const ValueRef *src = this;
  83.    Modifier m;
  84.    DataType type = src->insn->sType;
  85.  
  86.    while (src) {
  87.       if (src->mod) {
  88.          if (src->insn->sType != type)
  89.             break;
  90.          m *= src->mod;
  91.       }
  92.       if (src->getFile() == FILE_IMMEDIATE) {
  93.          imm = *(src->value->asImm());
  94.          // The immediate's type isn't required to match its use, it's
  95.          // more of a hint; applying a modifier makes use of that hint.
  96.          imm.reg.type = type;
  97.          m.applyTo(imm);
  98.          return true;
  99.       }
  100.  
  101.       Instruction *insn = src->value->getUniqueInsn();
  102.  
  103.       if (insn && insn->op == OP_MOV) {
  104.          src = &insn->src(0);
  105.          if (src->mod)
  106.             WARN("OP_MOV with modifier encountered !\n");
  107.       } else {
  108.          src = NULL;
  109.       }
  110.    }
  111.    return false;
  112. }
  113.  
  114. ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
  115. {
  116.    set(v);
  117. }
  118.  
  119. ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
  120. {
  121.    set(def.get());
  122. }
  123.  
  124. ValueDef::~ValueDef()
  125. {
  126.    this->set(NULL);
  127. }
  128.  
  129. void
  130. ValueRef::set(const ValueRef &ref)
  131. {
  132.    this->set(ref.get());
  133.    mod = ref.mod;
  134.    indirect[0] = ref.indirect[0];
  135.    indirect[1] = ref.indirect[1];
  136. }
  137.  
  138. void
  139. ValueRef::set(Value *refVal)
  140. {
  141.    if (value == refVal)
  142.       return;
  143.    if (value)
  144.       value->uses.erase(this);
  145.    if (refVal)
  146.       refVal->uses.insert(this);
  147.  
  148.    value = refVal;
  149. }
  150.  
  151. void
  152. ValueDef::set(Value *defVal)
  153. {
  154.    if (value == defVal)
  155.       return;
  156.    if (value)
  157.       value->defs.remove(this);
  158.    if (defVal)
  159.       defVal->defs.push_back(this);
  160.  
  161.    value = defVal;
  162. }
  163.  
  164. // Check if we can replace this definition's value by the value in @rep,
  165. // including the source modifiers, i.e. make sure that all uses support
  166. // @rep.mod.
  167. bool
  168. ValueDef::mayReplace(const ValueRef &rep)
  169. {
  170.    if (!rep.mod)
  171.       return true;
  172.  
  173.    if (!insn || !insn->bb) // Unbound instruction ?
  174.       return false;
  175.  
  176.    const Target *target = insn->bb->getProgram()->getTarget();
  177.  
  178.    for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
  179.         ++it) {
  180.       Instruction *insn = (*it)->getInsn();
  181.       int s = -1;
  182.  
  183.       for (int i = 0; insn->srcExists(i); ++i) {
  184.          if (insn->src(i).get() == value) {
  185.             // If there are multiple references to us we'd have to check if the
  186.             // combination of mods is still supported, but just bail for now.
  187.             if (&insn->src(i) != (*it))
  188.                return false;
  189.             s = i;
  190.          }
  191.       }
  192.       assert(s >= 0); // integrity of uses list
  193.  
  194.       if (!target->isModSupported(insn, s, rep.mod))
  195.          return false;
  196.    }
  197.    return true;
  198. }
  199.  
  200. void
  201. ValueDef::replace(const ValueRef &repVal, bool doSet)
  202. {
  203.    assert(mayReplace(repVal));
  204.  
  205.    if (value == repVal.get())
  206.       return;
  207.  
  208.    while (!value->uses.empty()) {
  209.       ValueRef *ref = *value->uses.begin();
  210.       ref->set(repVal.get());
  211.       ref->mod *= repVal.mod;
  212.    }
  213.  
  214.    if (doSet)
  215.       set(repVal.get());
  216. }
  217.  
  218. Value::Value()
  219. {
  220.   join = this;
  221.   memset(&reg, 0, sizeof(reg));
  222.   reg.size = 4;
  223. }
  224.  
  225. LValue::LValue(Function *fn, DataFile file)
  226. {
  227.    reg.file = file;
  228.    reg.size = (file != FILE_PREDICATE) ? 4 : 1;
  229.    reg.data.id = -1;
  230.  
  231.    compMask = 0;
  232.    compound = 0;
  233.    ssa = 0;
  234.    fixedReg = 0;
  235.    noSpill = 0;
  236.  
  237.    fn->add(this, this->id);
  238. }
  239.  
  240. LValue::LValue(Function *fn, LValue *lval)
  241. {
  242.    assert(lval);
  243.  
  244.    reg.file = lval->reg.file;
  245.    reg.size = lval->reg.size;
  246.    reg.data.id = -1;
  247.  
  248.    compMask = 0;
  249.    compound = 0;
  250.    ssa = 0;
  251.    fixedReg = 0;
  252.    noSpill = 0;
  253.  
  254.    fn->add(this, this->id);
  255. }
  256.  
  257. LValue *
  258. LValue::clone(ClonePolicy<Function>& pol) const
  259. {
  260.    LValue *that = new_LValue(pol.context(), reg.file);
  261.  
  262.    pol.set<Value>(this, that);
  263.  
  264.    that->reg.size = this->reg.size;
  265.    that->reg.type = this->reg.type;
  266.    that->reg.data = this->reg.data;
  267.  
  268.    return that;
  269. }
  270.  
  271. bool
  272. LValue::isUniform() const
  273. {
  274.    if (defs.size() > 1)
  275.       return false;
  276.    Instruction *insn = getInsn();
  277.    // let's not try too hard here for now ...
  278.    return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
  279. }
  280.  
  281. Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
  282. {
  283.    baseSym = NULL;
  284.  
  285.    reg.file = f;
  286.    reg.fileIndex = fidx;
  287.    reg.data.offset = 0;
  288.  
  289.    prog->add(this, this->id);
  290. }
  291.  
  292. Symbol *
  293. Symbol::clone(ClonePolicy<Function>& pol) const
  294. {
  295.    Program *prog = pol.context()->getProgram();
  296.  
  297.    Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
  298.  
  299.    pol.set<Value>(this, that);
  300.  
  301.    that->reg.size = this->reg.size;
  302.    that->reg.type = this->reg.type;
  303.    that->reg.data = this->reg.data;
  304.  
  305.    that->baseSym = this->baseSym;
  306.  
  307.    return that;
  308. }
  309.  
  310. bool
  311. Symbol::isUniform() const
  312. {
  313.    return
  314.       reg.file != FILE_SYSTEM_VALUE &&
  315.       reg.file != FILE_MEMORY_LOCAL &&
  316.       reg.file != FILE_SHADER_INPUT;
  317. }
  318.  
  319. ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
  320. {
  321.    memset(&reg, 0, sizeof(reg));
  322.  
  323.    reg.file = FILE_IMMEDIATE;
  324.    reg.size = 4;
  325.    reg.type = TYPE_U32;
  326.  
  327.    reg.data.u32 = uval;
  328.  
  329.    prog->add(this, this->id);
  330. }
  331.  
  332. ImmediateValue::ImmediateValue(Program *prog, float fval)
  333. {
  334.    memset(&reg, 0, sizeof(reg));
  335.  
  336.    reg.file = FILE_IMMEDIATE;
  337.    reg.size = 4;
  338.    reg.type = TYPE_F32;
  339.  
  340.    reg.data.f32 = fval;
  341.  
  342.    prog->add(this, this->id);
  343. }
  344.  
  345. ImmediateValue::ImmediateValue(Program *prog, double dval)
  346. {
  347.    memset(&reg, 0, sizeof(reg));
  348.  
  349.    reg.file = FILE_IMMEDIATE;
  350.    reg.size = 8;
  351.    reg.type = TYPE_F64;
  352.  
  353.    reg.data.f64 = dval;
  354.  
  355.    prog->add(this, this->id);
  356. }
  357.  
  358. ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
  359. {
  360.    reg = proto->reg;
  361.  
  362.    reg.type = ty;
  363.    reg.size = typeSizeof(ty);
  364. }
  365.  
  366. ImmediateValue *
  367. ImmediateValue::clone(ClonePolicy<Function>& pol) const
  368. {
  369.    Program *prog = pol.context()->getProgram();
  370.    ImmediateValue *that = new_ImmediateValue(prog, 0u);
  371.  
  372.    pol.set<Value>(this, that);
  373.  
  374.    that->reg.size = this->reg.size;
  375.    that->reg.type = this->reg.type;
  376.    that->reg.data = this->reg.data;
  377.  
  378.    return that;
  379. }
  380.  
  381. bool
  382. ImmediateValue::isInteger(const int i) const
  383. {
  384.    switch (reg.type) {
  385.    case TYPE_S8:
  386.       return reg.data.s8 == i;
  387.    case TYPE_U8:
  388.       return reg.data.u8 == i;
  389.    case TYPE_S16:
  390.       return reg.data.s16 == i;
  391.    case TYPE_U16:
  392.       return reg.data.u16 == i;
  393.    case TYPE_S32:
  394.    case TYPE_U32:
  395.       return reg.data.s32 == i; // as if ...
  396.    case TYPE_F32:
  397.       return reg.data.f32 == static_cast<float>(i);
  398.    case TYPE_F64:
  399.       return reg.data.f64 == static_cast<double>(i);
  400.    default:
  401.       return false;
  402.    }
  403. }
  404.  
  405. bool
  406. ImmediateValue::isNegative() const
  407. {
  408.    switch (reg.type) {
  409.    case TYPE_S8:  return reg.data.s8 < 0;
  410.    case TYPE_S16: return reg.data.s16 < 0;
  411.    case TYPE_S32:
  412.    case TYPE_U32: return reg.data.s32 < 0;
  413.    case TYPE_F32: return reg.data.u32 & (1 << 31);
  414.    case TYPE_F64: return reg.data.u64 & (1ULL << 63);
  415.    default:
  416.       return false;
  417.    }
  418. }
  419.  
  420. bool
  421. ImmediateValue::isPow2() const
  422. {
  423.    switch (reg.type) {
  424.    case TYPE_U8:
  425.    case TYPE_U16:
  426.    case TYPE_U32: return util_is_power_of_two(reg.data.u32);
  427.    default:
  428.       return false;
  429.    }
  430. }
  431.  
  432. void
  433. ImmediateValue::applyLog2()
  434. {
  435.    switch (reg.type) {
  436.    case TYPE_S8:
  437.    case TYPE_S16:
  438.    case TYPE_S32:
  439.       assert(!this->isNegative());
  440.       // fall through
  441.    case TYPE_U8:
  442.    case TYPE_U16:
  443.    case TYPE_U32:
  444.       reg.data.u32 = util_logbase2(reg.data.u32);
  445.       break;
  446.    case TYPE_F32:
  447.       reg.data.f32 = log2f(reg.data.f32);
  448.       break;
  449.    case TYPE_F64:
  450.       reg.data.f64 = log2(reg.data.f64);
  451.       break;
  452.    default:
  453.       assert(0);
  454.       break;
  455.    }
  456. }
  457.  
  458. bool
  459. ImmediateValue::compare(CondCode cc, float fval) const
  460. {
  461.    if (reg.type != TYPE_F32)
  462.       ERROR("immediate value is not of type f32");
  463.  
  464.    switch (static_cast<CondCode>(cc & 7)) {
  465.    case CC_TR: return true;
  466.    case CC_FL: return false;
  467.    case CC_LT: return reg.data.f32 <  fval;
  468.    case CC_LE: return reg.data.f32 <= fval;
  469.    case CC_GT: return reg.data.f32 >  fval;
  470.    case CC_GE: return reg.data.f32 >= fval;
  471.    case CC_EQ: return reg.data.f32 == fval;
  472.    case CC_NE: return reg.data.f32 != fval;
  473.    default:
  474.       assert(0);
  475.       return false;
  476.    }
  477. }
  478.  
  479. ImmediateValue&
  480. ImmediateValue::operator=(const ImmediateValue &that)
  481. {
  482.    this->reg = that.reg;
  483.    return (*this);
  484. }
  485.  
  486. bool
  487. Value::interfers(const Value *that) const
  488. {
  489.    uint32_t idA, idB;
  490.  
  491.    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
  492.       return false;
  493.    if (this->asImm())
  494.       return false;
  495.  
  496.    if (this->asSym()) {
  497.       idA = this->join->reg.data.offset;
  498.       idB = that->join->reg.data.offset;
  499.    } else {
  500.       idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
  501.       idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
  502.    }
  503.  
  504.    if (idA < idB)
  505.       return (idA + this->reg.size > idB);
  506.    else
  507.    if (idA > idB)
  508.       return (idB + that->reg.size > idA);
  509.    else
  510.       return (idA == idB);
  511. }
  512.  
  513. bool
  514. Value::equals(const Value *that, bool strict) const
  515. {
  516.    if (strict)
  517.       return this == that;
  518.  
  519.    if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
  520.       return false;
  521.    if (that->reg.size != this->reg.size)
  522.       return false;
  523.  
  524.    if (that->reg.data.id != this->reg.data.id)
  525.       return false;
  526.  
  527.    return true;
  528. }
  529.  
  530. bool
  531. ImmediateValue::equals(const Value *that, bool strict) const
  532. {
  533.    const ImmediateValue *imm = that->asImm();
  534.    if (!imm)
  535.       return false;
  536.    return reg.data.u64 == imm->reg.data.u64;
  537. }
  538.  
  539. bool
  540. Symbol::equals(const Value *that, bool strict) const
  541. {
  542.    if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
  543.       return false;
  544.    assert(that->asSym());
  545.  
  546.    if (this->baseSym != that->asSym()->baseSym)
  547.       return false;
  548.  
  549.    if (reg.file == FILE_SYSTEM_VALUE)
  550.       return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
  551.               this->reg.data.sv.index == that->reg.data.sv.index);
  552.    return this->reg.data.offset == that->reg.data.offset;
  553. }
  554.  
  555. void Instruction::init()
  556. {
  557.    next = prev = 0;
  558.  
  559.    cc = CC_ALWAYS;
  560.    rnd = ROUND_N;
  561.    cache = CACHE_CA;
  562.    subOp = 0;
  563.  
  564.    saturate = 0;
  565.    join = 0;
  566.    exit = 0;
  567.    terminator = 0;
  568.    ftz = 0;
  569.    dnz = 0;
  570.    perPatch = 0;
  571.    fixed = 0;
  572.    encSize = 0;
  573.    ipa = 0;
  574.    mask = 0;
  575.  
  576.    lanes = 0xf;
  577.  
  578.    postFactor = 0;
  579.  
  580.    predSrc = -1;
  581.    flagsDef = -1;
  582.    flagsSrc = -1;
  583. }
  584.  
  585. Instruction::Instruction()
  586. {
  587.    init();
  588.  
  589.    op = OP_NOP;
  590.    dType = sType = TYPE_F32;
  591.  
  592.    id = -1;
  593.    bb = 0;
  594. }
  595.  
  596. Instruction::Instruction(Function *fn, operation opr, DataType ty)
  597. {
  598.    init();
  599.  
  600.    op = opr;
  601.    dType = sType = ty;
  602.  
  603.    fn->add(this, id);
  604. }
  605.  
  606. Instruction::~Instruction()
  607. {
  608.    if (bb) {
  609.       Function *fn = bb->getFunction();
  610.       bb->remove(this);
  611.       fn->allInsns.remove(id);
  612.    }
  613.  
  614.    for (int s = 0; srcExists(s); ++s)
  615.       setSrc(s, NULL);
  616.    // must unlink defs too since the list pointers will get deallocated
  617.    for (int d = 0; defExists(d); ++d)
  618.       setDef(d, NULL);
  619. }
  620.  
  621. void
  622. Instruction::setDef(int i, Value *val)
  623. {
  624.    int size = defs.size();
  625.    if (i >= size) {
  626.       defs.resize(i + 1);
  627.       while (size <= i)
  628.          defs[size++].setInsn(this);
  629.    }
  630.    defs[i].set(val);
  631. }
  632.  
  633. void
  634. Instruction::setSrc(int s, Value *val)
  635. {
  636.    int size = srcs.size();
  637.    if (s >= size) {
  638.       srcs.resize(s + 1);
  639.       while (size <= s)
  640.          srcs[size++].setInsn(this);
  641.    }
  642.    srcs[s].set(val);
  643. }
  644.  
  645. void
  646. Instruction::setSrc(int s, const ValueRef& ref)
  647. {
  648.    setSrc(s, ref.get());
  649.    srcs[s].mod = ref.mod;
  650. }
  651.  
  652. void
  653. Instruction::swapSources(int a, int b)
  654. {
  655.    Value *value = srcs[a].get();
  656.    Modifier m = srcs[a].mod;
  657.  
  658.    setSrc(a, srcs[b]);
  659.  
  660.    srcs[b].set(value);
  661.    srcs[b].mod = m;
  662. }
  663.  
  664. static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
  665. {
  666.    if (index >= s)
  667.       index += delta;
  668.    else
  669.    if ((delta < 0) && (index >= (s + delta)))
  670.       index = -1;
  671. }
  672.  
  673. // Moves sources [@s,last_source] by @delta.
  674. // If @delta < 0, sources [@s - abs(@delta), @s) are erased.
  675. void
  676. Instruction::moveSources(const int s, const int delta)
  677. {
  678.    if (delta == 0)
  679.       return;
  680.    assert(s + delta >= 0);
  681.  
  682.    int k;
  683.  
  684.    for (k = 0; srcExists(k); ++k) {
  685.       for (int i = 0; i < 2; ++i)
  686.          moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
  687.    }
  688.    moveSourcesAdjustIndex(predSrc, s, delta);
  689.    moveSourcesAdjustIndex(flagsSrc, s, delta);
  690.    if (asTex()) {
  691.       TexInstruction *tex = asTex();
  692.       moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
  693.       moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
  694.    }
  695.  
  696.    if (delta > 0) {
  697.       --k;
  698.       for (int p = k + delta; k >= s; --k, --p)
  699.          setSrc(p, src(k));
  700.    } else {
  701.       int p;
  702.       for (p = s; p < k; ++p)
  703.          setSrc(p + delta, src(p));
  704.       for (; (p + delta) < k; ++p)
  705.          setSrc(p + delta, NULL);
  706.    }
  707. }
  708.  
  709. void
  710. Instruction::takeExtraSources(int s, Value *values[3])
  711. {
  712.    values[0] = getIndirect(s, 0);
  713.    if (values[0])
  714.       setIndirect(s, 0, NULL);
  715.  
  716.    values[1] = getIndirect(s, 1);
  717.    if (values[1])
  718.       setIndirect(s, 1, NULL);
  719.  
  720.    values[2] = getPredicate();
  721.    if (values[2])
  722.       setPredicate(cc, NULL);
  723. }
  724.  
  725. void
  726. Instruction::putExtraSources(int s, Value *values[3])
  727. {
  728.    if (values[0])
  729.       setIndirect(s, 0, values[0]);
  730.    if (values[1])
  731.       setIndirect(s, 1, values[1]);
  732.    if (values[2])
  733.       setPredicate(cc, values[2]);
  734. }
  735.  
  736. Instruction *
  737. Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
  738. {
  739.    if (!i)
  740.       i = new_Instruction(pol.context(), op, dType);
  741. #ifndef NDEBUG // non-conformant assert, so this is required
  742.    assert(typeid(*i) == typeid(*this));
  743. #endif
  744.  
  745.    pol.set<Instruction>(this, i);
  746.  
  747.    i->sType = sType;
  748.  
  749.    i->rnd = rnd;
  750.    i->cache = cache;
  751.    i->subOp = subOp;
  752.  
  753.    i->saturate = saturate;
  754.    i->join = join;
  755.    i->exit = exit;
  756.    i->mask = mask;
  757.    i->ftz = ftz;
  758.    i->dnz = dnz;
  759.    i->ipa = ipa;
  760.    i->lanes = lanes;
  761.    i->perPatch = perPatch;
  762.  
  763.    i->postFactor = postFactor;
  764.  
  765.    for (int d = 0; defExists(d); ++d)
  766.       i->setDef(d, pol.get(getDef(d)));
  767.  
  768.    for (int s = 0; srcExists(s); ++s) {
  769.       i->setSrc(s, pol.get(getSrc(s)));
  770.       i->src(s).mod = src(s).mod;
  771.    }
  772.  
  773.    i->cc = cc;
  774.    i->predSrc = predSrc;
  775.    i->flagsDef = flagsDef;
  776.    i->flagsSrc = flagsSrc;
  777.  
  778.    return i;
  779. }
  780.  
  781. unsigned int
  782. Instruction::defCount(unsigned int mask, bool singleFile) const
  783. {
  784.    unsigned int i, n;
  785.  
  786.    if (singleFile) {
  787.       unsigned int d = ffs(mask);
  788.       if (!d)
  789.          return 0;
  790.       for (i = d--; defExists(i); ++i)
  791.          if (getDef(i)->reg.file != getDef(d)->reg.file)
  792.             mask &= ~(1 << i);
  793.    }
  794.  
  795.    for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
  796.       n += mask & 1;
  797.    return n;
  798. }
  799.  
  800. unsigned int
  801. Instruction::srcCount(unsigned int mask, bool singleFile) const
  802. {
  803.    unsigned int i, n;
  804.  
  805.    if (singleFile) {
  806.       unsigned int s = ffs(mask);
  807.       if (!s)
  808.          return 0;
  809.       for (i = s--; srcExists(i); ++i)
  810.          if (getSrc(i)->reg.file != getSrc(s)->reg.file)
  811.             mask &= ~(1 << i);
  812.    }
  813.  
  814.    for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
  815.       n += mask & 1;
  816.    return n;
  817. }
  818.  
  819. bool
  820. Instruction::setIndirect(int s, int dim, Value *value)
  821. {
  822.    assert(this->srcExists(s));
  823.  
  824.    int p = srcs[s].indirect[dim];
  825.    if (p < 0) {
  826.       if (!value)
  827.          return true;
  828.       p = srcs.size();
  829.       while (p > 0 && !srcExists(p - 1))
  830.          --p;
  831.    }
  832.    setSrc(p, value);
  833.    srcs[p].usedAsPtr = (value != 0);
  834.    srcs[s].indirect[dim] = value ? p : -1;
  835.    return true;
  836. }
  837.  
  838. bool
  839. Instruction::setPredicate(CondCode ccode, Value *value)
  840. {
  841.    cc = ccode;
  842.  
  843.    if (!value) {
  844.       if (predSrc >= 0) {
  845.          srcs[predSrc].set(NULL);
  846.          predSrc = -1;
  847.       }
  848.       return true;
  849.    }
  850.  
  851.    if (predSrc < 0) {
  852.       predSrc = srcs.size();
  853.       while (predSrc > 0 && !srcExists(predSrc - 1))
  854.          --predSrc;
  855.    }
  856.  
  857.    setSrc(predSrc, value);
  858.    return true;
  859. }
  860.  
  861. bool
  862. Instruction::writesPredicate() const
  863. {
  864.    for (int d = 0; defExists(d); ++d)
  865.       if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
  866.          return true;
  867.    return false;
  868. }
  869.  
  870. static bool
  871. insnCheckCommutationDefSrc(const Instruction *a, const Instruction *b)
  872. {
  873.    for (int d = 0; a->defExists(d); ++d)
  874.       for (int s = 0; b->srcExists(s); ++s)
  875.          if (a->getDef(d)->interfers(b->getSrc(s)))
  876.             return false;
  877.    return true;
  878. }
  879.  
  880. static bool
  881. insnCheckCommutationDefDef(const Instruction *a, const Instruction *b)
  882. {
  883.    for (int d = 0; a->defExists(d); ++d)
  884.       for (int c = 0; b->defExists(c); ++c)
  885.          if (a->getDef(d)->interfers(b->getDef(c)))
  886.             return false;
  887.    return true;
  888. }
  889.  
  890. bool
  891. Instruction::isCommutationLegal(const Instruction *i) const
  892. {
  893.    bool ret = insnCheckCommutationDefDef(this, i);
  894.    ret = ret && insnCheckCommutationDefSrc(this, i);
  895.    ret = ret && insnCheckCommutationDefSrc(i, this);
  896.    return ret;
  897. }
  898.  
  899. TexInstruction::TexInstruction(Function *fn, operation op)
  900.    : Instruction(fn, op, TYPE_F32)
  901. {
  902.    memset(&tex, 0, sizeof(tex));
  903.  
  904.    tex.rIndirectSrc = -1;
  905.    tex.sIndirectSrc = -1;
  906. }
  907.  
  908. TexInstruction::~TexInstruction()
  909. {
  910.    for (int c = 0; c < 3; ++c) {
  911.       dPdx[c].set(NULL);
  912.       dPdy[c].set(NULL);
  913.    }
  914.    for (int n = 0; n < 4; ++n)
  915.       for (int c = 0; c < 3; ++c)
  916.          offset[n][c].set(NULL);
  917. }
  918.  
  919. TexInstruction *
  920. TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
  921. {
  922.    TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
  923.                           new_TexInstruction(pol.context(), op));
  924.  
  925.    Instruction::clone(pol, tex);
  926.  
  927.    tex->tex = this->tex;
  928.  
  929.    if (op == OP_TXD) {
  930.       for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
  931.          tex->dPdx[c].set(dPdx[c]);
  932.          tex->dPdy[c].set(dPdy[c]);
  933.       }
  934.    }
  935.  
  936.    for (int n = 0; n < tex->tex.useOffsets; ++n)
  937.       for (int c = 0; c < 3; ++c)
  938.          tex->offset[n][c].set(offset[n][c]);
  939.  
  940.    return tex;
  941. }
  942.  
  943. const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
  944. {
  945.    { "1D",                1, 1, false, false, false },
  946.    { "2D",                2, 2, false, false, false },
  947.    { "2D_MS",             2, 3, false, false, false },
  948.    { "3D",                3, 3, false, false, false },
  949.    { "CUBE",              2, 3, false, true,  false },
  950.    { "1D_SHADOW",         1, 1, false, false, true  },
  951.    { "2D_SHADOW",         2, 2, false, false, true  },
  952.    { "CUBE_SHADOW",       2, 3, false, true,  true  },
  953.    { "1D_ARRAY",          1, 2, true,  false, false },
  954.    { "2D_ARRAY",          2, 3, true,  false, false },
  955.    { "2D_MS_ARRAY",       2, 4, true,  false, false },
  956.    { "CUBE_ARRAY",        2, 4, true,  true,  false },
  957.    { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
  958.    { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
  959.    { "RECT",              2, 2, false, false, false },
  960.    { "RECT_SHADOW",       2, 2, false, false, true  },
  961.    { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
  962.    { "BUFFER",            1, 1, false, false, false },
  963. };
  964.  
  965. void
  966. TexInstruction::setIndirectR(Value *v)
  967. {
  968.    int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
  969.    if (p >= 0) {
  970.       tex.rIndirectSrc = p;
  971.       setSrc(p, v);
  972.       srcs[p].usedAsPtr = !!v;
  973.    }
  974. }
  975.  
  976. void
  977. TexInstruction::setIndirectS(Value *v)
  978. {
  979.    int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
  980.    if (p >= 0) {
  981.       tex.sIndirectSrc = p;
  982.       setSrc(p, v);
  983.       srcs[p].usedAsPtr = !!v;
  984.    }
  985. }
  986.  
  987. CmpInstruction::CmpInstruction(Function *fn, operation op)
  988.    : Instruction(fn, op, TYPE_F32)
  989. {
  990.    setCond = CC_ALWAYS;
  991. }
  992.  
  993. CmpInstruction *
  994. CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
  995. {
  996.    CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
  997.                           new_CmpInstruction(pol.context(), op));
  998.    cmp->dType = dType;
  999.    Instruction::clone(pol, cmp);
  1000.    cmp->setCond = setCond;
  1001.    return cmp;
  1002. }
  1003.  
  1004. FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
  1005.    : Instruction(fn, op, TYPE_NONE)
  1006. {
  1007.    if (op == OP_CALL)
  1008.       target.fn = reinterpret_cast<Function *>(targ);
  1009.    else
  1010.       target.bb = reinterpret_cast<BasicBlock *>(targ);
  1011.  
  1012.    if (op == OP_BRA ||
  1013.        op == OP_CONT || op == OP_BREAK ||
  1014.        op == OP_RET || op == OP_EXIT)
  1015.       terminator = 1;
  1016.    else
  1017.    if (op == OP_JOIN)
  1018.       terminator = targ ? 1 : 0;
  1019.  
  1020.    allWarp = absolute = limit = builtin = indirect = 0;
  1021. }
  1022.  
  1023. FlowInstruction *
  1024. FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
  1025. {
  1026.    FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
  1027.                             new_FlowInstruction(pol.context(), op, NULL));
  1028.  
  1029.    Instruction::clone(pol, flow);
  1030.    flow->allWarp = allWarp;
  1031.    flow->absolute = absolute;
  1032.    flow->limit = limit;
  1033.    flow->builtin = builtin;
  1034.  
  1035.    if (builtin)
  1036.       flow->target.builtin = target.builtin;
  1037.    else
  1038.    if (op == OP_CALL)
  1039.       flow->target.fn = target.fn;
  1040.    else
  1041.    if (target.bb)
  1042.       flow->target.bb = pol.get<BasicBlock>(target.bb);
  1043.  
  1044.    return flow;
  1045. }
  1046.  
  1047. Program::Program(Type type, Target *arch)
  1048.    : progType(type),
  1049.      target(arch),
  1050.      mem_Instruction(sizeof(Instruction), 6),
  1051.      mem_CmpInstruction(sizeof(CmpInstruction), 4),
  1052.      mem_TexInstruction(sizeof(TexInstruction), 4),
  1053.      mem_FlowInstruction(sizeof(FlowInstruction), 4),
  1054.      mem_LValue(sizeof(LValue), 8),
  1055.      mem_Symbol(sizeof(Symbol), 7),
  1056.      mem_ImmediateValue(sizeof(ImmediateValue), 7)
  1057. {
  1058.    code = NULL;
  1059.    binSize = 0;
  1060.  
  1061.    maxGPR = -1;
  1062.  
  1063.    main = new Function(this, "MAIN", ~0);
  1064.    calls.insert(&main->call);
  1065.  
  1066.    dbgFlags = 0;
  1067.    optLevel = 0;
  1068.  
  1069.    targetPriv = NULL;
  1070. }
  1071.  
  1072. Program::~Program()
  1073. {
  1074.    for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
  1075.       delete reinterpret_cast<Function *>(it.get());
  1076.  
  1077.    for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
  1078.       releaseValue(reinterpret_cast<Value *>(it.get()));
  1079. }
  1080.  
  1081. void Program::releaseInstruction(Instruction *insn)
  1082. {
  1083.    // TODO: make this not suck so much
  1084.  
  1085.    insn->~Instruction();
  1086.  
  1087.    if (insn->asCmp())
  1088.       mem_CmpInstruction.release(insn);
  1089.    else
  1090.    if (insn->asTex())
  1091.       mem_TexInstruction.release(insn);
  1092.    else
  1093.    if (insn->asFlow())
  1094.       mem_FlowInstruction.release(insn);
  1095.    else
  1096.       mem_Instruction.release(insn);
  1097. }
  1098.  
  1099. void Program::releaseValue(Value *value)
  1100. {
  1101.    value->~Value();
  1102.  
  1103.    if (value->asLValue())
  1104.       mem_LValue.release(value);
  1105.    else
  1106.    if (value->asImm())
  1107.       mem_ImmediateValue.release(value);
  1108.    else
  1109.    if (value->asSym())
  1110.       mem_Symbol.release(value);
  1111. }
  1112.  
  1113.  
  1114. } // namespace nv50_ir
  1115.  
  1116. extern "C" {
  1117.  
  1118. static void
  1119. nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
  1120. {
  1121. #if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN)
  1122.    if (info->type == PIPE_SHADER_HULL || info->type == PIPE_SHADER_DOMAIN) {
  1123.       info->prop.tp.domain = PIPE_PRIM_MAX;
  1124.       info->prop.tp.outputPrim = PIPE_PRIM_MAX;
  1125.    }
  1126. #endif
  1127.    if (info->type == PIPE_SHADER_GEOMETRY) {
  1128.       info->prop.gp.instanceCount = 1;
  1129.       info->prop.gp.maxVertices = 1;
  1130.    }
  1131.    info->io.clipDistance = 0xff;
  1132.    info->io.pointSize = 0xff;
  1133.    info->io.instanceId = 0xff;
  1134.    info->io.vertexId = 0xff;
  1135.    info->io.edgeFlagIn = 0xff;
  1136.    info->io.edgeFlagOut = 0xff;
  1137.    info->io.fragDepth = 0xff;
  1138.    info->io.sampleMask = 0xff;
  1139.    info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
  1140. }
  1141.  
  1142. int
  1143. nv50_ir_generate_code(struct nv50_ir_prog_info *info)
  1144. {
  1145.    int ret = 0;
  1146.  
  1147.    nv50_ir::Program::Type type;
  1148.  
  1149.    nv50_ir_init_prog_info(info);
  1150.  
  1151. #define PROG_TYPE_CASE(a, b)                                      \
  1152.    case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
  1153.  
  1154.    switch (info->type) {
  1155.    PROG_TYPE_CASE(VERTEX, VERTEX);
  1156. // PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
  1157. // PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
  1158.    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
  1159.    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
  1160.    PROG_TYPE_CASE(COMPUTE, COMPUTE);
  1161.    default:
  1162.       type = nv50_ir::Program::TYPE_COMPUTE;
  1163.       break;
  1164.    }
  1165.    INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
  1166.  
  1167.    nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
  1168.    if (!targ)
  1169.       return -1;
  1170.  
  1171.    nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
  1172.    if (!prog)
  1173.       return -1;
  1174.    prog->driver = info;
  1175.    prog->dbgFlags = info->dbgFlags;
  1176.    prog->optLevel = info->optLevel;
  1177.  
  1178.    switch (info->bin.sourceRep) {
  1179. #if 0
  1180.    case PIPE_IR_LLVM:
  1181.    case PIPE_IR_GLSL:
  1182.       return -1;
  1183.    case PIPE_IR_SM4:
  1184.       ret = prog->makeFromSM4(info) ? 0 : -2;
  1185.       break;
  1186.    case PIPE_IR_TGSI:
  1187. #endif
  1188.    default:
  1189.       ret = prog->makeFromTGSI(info) ? 0 : -2;
  1190.       break;
  1191.    }
  1192.    if (ret < 0)
  1193.       goto out;
  1194.    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
  1195.       prog->print();
  1196.  
  1197.    targ->parseDriverInfo(info);
  1198.    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
  1199.  
  1200.    prog->convertToSSA();
  1201.  
  1202.    if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
  1203.       prog->print();
  1204.  
  1205.    prog->optimizeSSA(info->optLevel);
  1206.    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
  1207.  
  1208.    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
  1209.       prog->print();
  1210.  
  1211.    if (!prog->registerAllocation()) {
  1212.       ret = -4;
  1213.       goto out;
  1214.    }
  1215.    prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
  1216.  
  1217.    prog->optimizePostRA(info->optLevel);
  1218.  
  1219.    if (!prog->emitBinary(info)) {
  1220.       ret = -5;
  1221.       goto out;
  1222.    }
  1223.  
  1224. out:
  1225.    INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
  1226.  
  1227.    info->bin.maxGPR = prog->maxGPR;
  1228.    info->bin.code = prog->code;
  1229.    info->bin.codeSize = prog->binSize;
  1230.    info->bin.tlsSpace = prog->tlsSize;
  1231.  
  1232.    delete prog;
  1233.    nv50_ir::Target::destroy(targ);
  1234.  
  1235.    return ret;
  1236. }
  1237.  
  1238. } // extern "C"
  1239.