Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2011 Christoph Bumiller
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "codegen/nv50_ir.h"
  24. #include "codegen/nv50_ir_target_nv50.h"
  25.  
  26. namespace nv50_ir {
  27.  
  28. #define NV50_OP_ENC_LONG     0
  29. #define NV50_OP_ENC_SHORT    1
  30. #define NV50_OP_ENC_IMM      2
  31. #define NV50_OP_ENC_LONG_ALT 3
  32.  
  33. class CodeEmitterNV50 : public CodeEmitter
  34. {
  35. public:
  36.    CodeEmitterNV50(const TargetNV50 *);
  37.  
  38.    virtual bool emitInstruction(Instruction *);
  39.  
  40.    virtual uint32_t getMinEncodingSize(const Instruction *) const;
  41.  
  42.    inline void setProgramType(Program::Type pType) { progType = pType; }
  43.  
  44.    virtual void prepareEmission(Function *);
  45.  
  46. private:
  47.    Program::Type progType;
  48.  
  49.    const TargetNV50 *targNV50;
  50.  
  51. private:
  52.    inline void defId(const ValueDef&, const int pos);
  53.    inline void srcId(const ValueRef&, const int pos);
  54.    inline void srcId(const ValueRef *, const int pos);
  55.  
  56.    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
  57.    inline void srcAddr8(const ValueRef&, const int pos);
  58.  
  59.    void emitFlagsRd(const Instruction *);
  60.    void emitFlagsWr(const Instruction *);
  61.  
  62.    void emitCondCode(CondCode cc, DataType ty, int pos);
  63.  
  64.    inline void setARegBits(unsigned int);
  65.  
  66.    void setAReg16(const Instruction *, int s);
  67.    void setImmediate(const Instruction *, int s);
  68.  
  69.    void setDst(const Value *);
  70.    void setDst(const Instruction *, int d);
  71.    void setSrcFileBits(const Instruction *, int enc);
  72.    void setSrc(const Instruction *, unsigned int s, int slot);
  73.  
  74.    void emitForm_MAD(const Instruction *);
  75.    void emitForm_ADD(const Instruction *);
  76.    void emitForm_MUL(const Instruction *);
  77.    void emitForm_IMM(const Instruction *);
  78.  
  79.    void emitLoadStoreSizeLG(DataType ty, int pos);
  80.    void emitLoadStoreSizeCS(DataType ty);
  81.  
  82.    void roundMode_MAD(const Instruction *);
  83.    void roundMode_CVT(RoundMode);
  84.  
  85.    void emitMNeg12(const Instruction *);
  86.  
  87.    void emitLOAD(const Instruction *);
  88.    void emitSTORE(const Instruction *);
  89.    void emitMOV(const Instruction *);
  90.    void emitRDSV(const Instruction *);
  91.    void emitNOP();
  92.    void emitINTERP(const Instruction *);
  93.    void emitPFETCH(const Instruction *);
  94.    void emitOUT(const Instruction *);
  95.  
  96.    void emitUADD(const Instruction *);
  97.    void emitAADD(const Instruction *);
  98.    void emitFADD(const Instruction *);
  99.    void emitIMUL(const Instruction *);
  100.    void emitFMUL(const Instruction *);
  101.    void emitFMAD(const Instruction *);
  102.    void emitIMAD(const Instruction *);
  103.    void emitISAD(const Instruction *);
  104.  
  105.    void emitMINMAX(const Instruction *);
  106.  
  107.    void emitPreOp(const Instruction *);
  108.    void emitSFnOp(const Instruction *, uint8_t subOp);
  109.  
  110.    void emitShift(const Instruction *);
  111.    void emitARL(const Instruction *, unsigned int shl);
  112.    void emitLogicOp(const Instruction *);
  113.    void emitNOT(const Instruction *);
  114.  
  115.    void emitCVT(const Instruction *);
  116.    void emitSET(const Instruction *);
  117.  
  118.    void emitTEX(const TexInstruction *);
  119.    void emitTXQ(const TexInstruction *);
  120.    void emitTEXPREP(const TexInstruction *);
  121.  
  122.    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
  123.  
  124.    void emitFlow(const Instruction *, uint8_t flowOp);
  125.    void emitPRERETEmu(const FlowInstruction *);
  126.    void emitBAR(const Instruction *);
  127.  
  128.    void emitATOM(const Instruction *);
  129. };
  130.  
  131. #define SDATA(a) ((a).rep()->reg.data)
  132. #define DDATA(a) ((a).rep()->reg.data)
  133.  
  134. void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
  135. {
  136.    assert(src.get());
  137.    code[pos / 32] |= SDATA(src).id << (pos % 32);
  138. }
  139.  
  140. void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
  141. {
  142.    assert(src->get());
  143.    code[pos / 32] |= SDATA(*src).id << (pos % 32);
  144. }
  145.  
  146. void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
  147. {
  148.    assert(src.get());
  149.  
  150.    int32_t offset = SDATA(src).offset;
  151.  
  152.    assert(!adj || src.get()->reg.size <= 4);
  153.    if (adj)
  154.       offset /= src.get()->reg.size;
  155.  
  156.    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
  157.  
  158.    if (offset < 0)
  159.       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
  160.  
  161.    code[pos / 32] |= offset << (pos % 32);
  162. }
  163.  
  164. void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
  165. {
  166.    assert(src.get());
  167.  
  168.    uint32_t offset = SDATA(src).offset;
  169.  
  170.    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
  171.  
  172.    code[pos / 32] |= (offset >> 2) << (pos % 32);
  173. }
  174.  
  175. void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
  176. {
  177.    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
  178.  
  179.    code[pos / 32] |= DDATA(def).id << (pos % 32);
  180. }
  181.  
  182. void
  183. CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
  184. {
  185.    switch (insn->rnd) {
  186.    case ROUND_M: code[1] |= 1 << 22; break;
  187.    case ROUND_P: code[1] |= 2 << 22; break;
  188.    case ROUND_Z: code[1] |= 3 << 22; break;
  189.    default:
  190.       assert(insn->rnd == ROUND_N);
  191.       break;
  192.    }
  193. }
  194.  
  195. void
  196. CodeEmitterNV50::emitMNeg12(const Instruction *i)
  197. {
  198.    code[1] |= i->src(0).mod.neg() << 26;
  199.    code[1] |= i->src(1).mod.neg() << 27;
  200. }
  201.  
  202. void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
  203. {
  204.    uint8_t enc;
  205.  
  206.    assert(pos >= 32 || pos <= 27);
  207.  
  208.    switch (cc) {
  209.    case CC_LT:  enc = 0x1; break;
  210.    case CC_LTU: enc = 0x9; break;
  211.    case CC_EQ:  enc = 0x2; break;
  212.    case CC_EQU: enc = 0xa; break;
  213.    case CC_LE:  enc = 0x3; break;
  214.    case CC_LEU: enc = 0xb; break;
  215.    case CC_GT:  enc = 0x4; break;
  216.    case CC_GTU: enc = 0xc; break;
  217.    case CC_NE:  enc = 0x5; break;
  218.    case CC_NEU: enc = 0xd; break;
  219.    case CC_GE:  enc = 0x6; break;
  220.    case CC_GEU: enc = 0xe; break;
  221.    case CC_TR:  enc = 0xf; break;
  222.    case CC_FL:  enc = 0x0; break;
  223.  
  224.    case CC_O:  enc = 0x10; break;
  225.    case CC_C:  enc = 0x11; break;
  226.    case CC_A:  enc = 0x12; break;
  227.    case CC_S:  enc = 0x13; break;
  228.    case CC_NS: enc = 0x1c; break;
  229.    case CC_NA: enc = 0x1d; break;
  230.    case CC_NC: enc = 0x1e; break;
  231.    case CC_NO: enc = 0x1f; break;
  232.  
  233.    default:
  234.       enc = 0;
  235.       assert(!"invalid condition code");
  236.       break;
  237.    }
  238.    if (ty != TYPE_NONE && !isFloatType(ty))
  239.       enc &= ~0x8; // unordered only exists for float types
  240.  
  241.    code[pos / 32] |= enc << (pos % 32);
  242. }
  243.  
  244. void
  245. CodeEmitterNV50::emitFlagsRd(const Instruction *i)
  246. {
  247.    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
  248.  
  249.    assert(!(code[1] & 0x00003f80));
  250.  
  251.    if (s >= 0) {
  252.       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
  253.       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
  254.       srcId(i->src(s), 32 + 12);
  255.    } else {
  256.       code[1] |= 0x0780;
  257.    }
  258. }
  259.  
  260. void
  261. CodeEmitterNV50::emitFlagsWr(const Instruction *i)
  262. {
  263.    assert(!(code[1] & 0x70));
  264.  
  265.    int flagsDef = i->flagsDef;
  266.  
  267.    // find flags definition and check that it is the last def
  268.    if (flagsDef < 0) {
  269.       for (int d = 0; i->defExists(d); ++d)
  270.          if (i->def(d).getFile() == FILE_FLAGS)
  271.             flagsDef = d;
  272.       if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
  273.          WARN("Instruction::flagsDef was not set properly\n");
  274.    }
  275.    if (flagsDef == 0 && i->defExists(1))
  276.       WARN("flags def should not be the primary definition\n");
  277.  
  278.    if (flagsDef >= 0)
  279.       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
  280.  
  281. }
  282.  
  283. void
  284. CodeEmitterNV50::setARegBits(unsigned int u)
  285. {
  286.    code[0] |= (u & 3) << 26;
  287.    code[1] |= (u & 4);
  288. }
  289.  
  290. void
  291. CodeEmitterNV50::setAReg16(const Instruction *i, int s)
  292. {
  293.    if (i->srcExists(s)) {
  294.       s = i->src(s).indirect[0];
  295.       if (s >= 0)
  296.          setARegBits(SDATA(i->src(s)).id + 1);
  297.    }
  298. }
  299.  
  300. void
  301. CodeEmitterNV50::setImmediate(const Instruction *i, int s)
  302. {
  303.    const ImmediateValue *imm = i->src(s).get()->asImm();
  304.    assert(imm);
  305.  
  306.    uint32_t u = imm->reg.data.u32;
  307.  
  308.    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
  309.       u = ~u;
  310.  
  311.    code[1] |= 3;
  312.    code[0] |= (u & 0x3f) << 16;
  313.    code[1] |= (u >> 6) << 2;
  314. }
  315.  
  316. void
  317. CodeEmitterNV50::setDst(const Value *dst)
  318. {
  319.    const Storage *reg = &dst->join->reg;
  320.  
  321.    assert(reg->file != FILE_ADDRESS);
  322.  
  323.    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
  324.       code[0] |= (127 << 2) | 1;
  325.       code[1] |= 8;
  326.    } else {
  327.       int id;
  328.       if (reg->file == FILE_SHADER_OUTPUT) {
  329.          code[1] |= 8;
  330.          id = reg->data.offset / 4;
  331.       } else {
  332.          id = reg->data.id;
  333.       }
  334.       code[0] |= id << 2;
  335.    }
  336. }
  337.  
  338. void
  339. CodeEmitterNV50::setDst(const Instruction *i, int d)
  340. {
  341.    if (i->defExists(d)) {
  342.       setDst(i->getDef(d));
  343.    } else
  344.    if (!d) {
  345.       code[0] |= 0x01fc; // bit bucket
  346.       code[1] |= 0x0008;
  347.    }
  348. }
  349.  
  350. // 3 * 2 bits:
  351. // 0: r
  352. // 1: a/s
  353. // 2: c
  354. // 3: i
  355. void
  356. CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
  357. {
  358.    uint8_t mode = 0;
  359.  
  360.    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
  361.       switch (i->src(s).getFile()) {
  362.       case FILE_GPR:
  363.          break;
  364.       case FILE_MEMORY_SHARED:
  365.       case FILE_SHADER_INPUT:
  366.          mode |= 1 << (s * 2);
  367.          break;
  368.       case FILE_MEMORY_CONST:
  369.          mode |= 2 << (s * 2);
  370.          break;
  371.       case FILE_IMMEDIATE:
  372.          mode |= 3 << (s * 2);
  373.          break;
  374.       default:
  375.               ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
  376.          assert(0);
  377.          break;
  378.       }
  379.    }
  380.    switch (mode) {
  381.    case 0x00: // rrr
  382.       break;
  383.    case 0x01: // arr/grr
  384.       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
  385.          code[0] |= 0x01800000;
  386.          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
  387.             code[1] |= 0x00200000;
  388.       } else {
  389.          if (enc == NV50_OP_ENC_SHORT)
  390.             code[0] |= 0x01000000;
  391.          else
  392.             code[1] |= 0x00200000;
  393.       }
  394.       break;
  395.    case 0x03: // irr
  396.       assert(i->op == OP_MOV);
  397.       return;
  398.    case 0x0c: // rir
  399.       break;
  400.    case 0x0d: // gir
  401.       assert(progType == Program::TYPE_GEOMETRY ||
  402.              progType == Program::TYPE_COMPUTE);
  403.       code[0] |= 0x01000000;
  404.       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
  405.          int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
  406.          assert(reg < 3);
  407.          code[0] |= (reg + 1) << 26;
  408.       }
  409.       break;
  410.    case 0x08: // rcr
  411.       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
  412.       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
  413.       break;
  414.    case 0x09: // acr/gcr
  415.       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
  416.          code[0] |= 0x01800000;
  417.       } else {
  418.          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
  419.          code[1] |= 0x00200000;
  420.       }
  421.       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
  422.       break;
  423.    case 0x20: // rrc
  424.       code[0] |= 0x01000000;
  425.       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
  426.       break;
  427.    case 0x21: // arc
  428.       code[0] |= 0x01000000;
  429.       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
  430.       assert(progType != Program::TYPE_GEOMETRY);
  431.       break;
  432.    default:
  433.       ERROR("not encodable: %x\n", mode);
  434.       assert(0);
  435.       break;
  436.    }
  437.    if (progType != Program::TYPE_COMPUTE)
  438.       return;
  439.  
  440.    if ((mode & 3) == 1) {
  441.       const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
  442.  
  443.       switch (i->getSrc(0)->reg.type) {
  444.       case TYPE_U8:
  445.          break;
  446.       case TYPE_U16:
  447.          code[0] |= 1 << pos;
  448.          break;
  449.       case TYPE_S16:
  450.          code[0] |= 2 << pos;
  451.          break;
  452.       default:
  453.          code[0] |= 3 << pos;
  454.          assert(i->getSrc(0)->reg.size == 4);
  455.          break;
  456.       }
  457.    }
  458. }
  459.  
  460. void
  461. CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
  462. {
  463.    if (Target::operationSrcNr[i->op] <= s)
  464.       return;
  465.    const Storage *reg = &i->src(s).rep()->reg;
  466.  
  467.    unsigned int id = (reg->file == FILE_GPR) ?
  468.       reg->data.id :
  469.       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
  470.  
  471.    switch (slot) {
  472.    case 0: code[0] |= id << 9; break;
  473.    case 1: code[0] |= id << 16; break;
  474.    case 2: code[1] |= id << 14; break;
  475.    default:
  476.       assert(0);
  477.       break;
  478.    }
  479. }
  480.  
  481. // the default form:
  482. //  - long instruction
  483. //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
  484. //  - address & flags
  485. void
  486. CodeEmitterNV50::emitForm_MAD(const Instruction *i)
  487. {
  488.    assert(i->encSize == 8);
  489.    code[0] |= 1;
  490.  
  491.    emitFlagsRd(i);
  492.    emitFlagsWr(i);
  493.  
  494.    setDst(i, 0);
  495.  
  496.    setSrcFileBits(i, NV50_OP_ENC_LONG);
  497.    setSrc(i, 0, 0);
  498.    setSrc(i, 1, 1);
  499.    setSrc(i, 2, 2);
  500.  
  501.    if (i->getIndirect(0, 0)) {
  502.       assert(!i->getIndirect(1, 0));
  503.       setAReg16(i, 0);
  504.    } else {
  505.       setAReg16(i, 1);
  506.    }
  507. }
  508.  
  509. // like default form, but 2nd source in slot 2, and no 3rd source
  510. void
  511. CodeEmitterNV50::emitForm_ADD(const Instruction *i)
  512. {
  513.    assert(i->encSize == 8);
  514.    code[0] |= 1;
  515.  
  516.    emitFlagsRd(i);
  517.    emitFlagsWr(i);
  518.  
  519.    setDst(i, 0);
  520.  
  521.    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
  522.    setSrc(i, 0, 0);
  523.    setSrc(i, 1, 2);
  524.  
  525.    if (i->getIndirect(0, 0)) {
  526.       assert(!i->getIndirect(1, 0));
  527.       setAReg16(i, 0);
  528.    } else {
  529.       setAReg16(i, 1);
  530.    }
  531. }
  532.  
  533. // default short form (rr, ar, rc, gr)
  534. void
  535. CodeEmitterNV50::emitForm_MUL(const Instruction *i)
  536. {
  537.    assert(i->encSize == 4 && !(code[0] & 1));
  538.    assert(i->defExists(0));
  539.    assert(!i->getPredicate());
  540.  
  541.    setDst(i, 0);
  542.  
  543.    setSrcFileBits(i, NV50_OP_ENC_SHORT);
  544.    setSrc(i, 0, 0);
  545.    setSrc(i, 1, 1);
  546. }
  547.  
  548. // usual immediate form
  549. // - 1 to 3 sources where last is immediate (rir, gir)
  550. // - no address or predicate possible
  551. void
  552. CodeEmitterNV50::emitForm_IMM(const Instruction *i)
  553. {
  554.    assert(i->encSize == 8);
  555.    code[0] |= 1;
  556.  
  557.    assert(i->defExists(0) && i->srcExists(0));
  558.  
  559.    setDst(i, 0);
  560.  
  561.    setSrcFileBits(i, NV50_OP_ENC_IMM);
  562.    if (Target::operationSrcNr[i->op] > 1) {
  563.       setSrc(i, 0, 0);
  564.       setImmediate(i, 1);
  565.       setSrc(i, 2, 1);
  566.    } else {
  567.       setImmediate(i, 0);
  568.    }
  569. }
  570.  
  571. void
  572. CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
  573. {
  574.    uint8_t enc;
  575.  
  576.    switch (ty) {
  577.    case TYPE_F32: // fall through
  578.    case TYPE_S32: // fall through
  579.    case TYPE_U32:  enc = 0x6; break;
  580.    case TYPE_B128: enc = 0x5; break;
  581.    case TYPE_F64: // fall through
  582.    case TYPE_S64: // fall through
  583.    case TYPE_U64:  enc = 0x4; break;
  584.    case TYPE_S16:  enc = 0x3; break;
  585.    case TYPE_U16:  enc = 0x2; break;
  586.    case TYPE_S8:   enc = 0x1; break;
  587.    case TYPE_U8:   enc = 0x0; break;
  588.    default:
  589.       enc = 0;
  590.       assert(!"invalid load/store type");
  591.       break;
  592.    }
  593.    code[pos / 32] |= enc << (pos % 32);
  594. }
  595.  
  596. void
  597. CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
  598. {
  599.    switch (ty) {
  600.    case TYPE_U8: break;
  601.    case TYPE_U16: code[1] |= 0x4000; break;
  602.    case TYPE_S16: code[1] |= 0x8000; break;
  603.    case TYPE_F32:
  604.    case TYPE_S32:
  605.    case TYPE_U32: code[1] |= 0xc000; break;
  606.    default:
  607.       assert(0);
  608.       break;
  609.    }
  610. }
  611.  
  612. void
  613. CodeEmitterNV50::emitLOAD(const Instruction *i)
  614. {
  615.    DataFile sf = i->src(0).getFile();
  616.    int32_t offset = i->getSrc(0)->reg.data.offset;
  617.  
  618.    switch (sf) {
  619.    case FILE_SHADER_INPUT:
  620.       if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
  621.          code[0] = 0x11800001;
  622.       else
  623.          // use 'mov' where we can
  624.          code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
  625.       code[1] = 0x00200000 | (i->lanes << 14);
  626.       if (typeSizeof(i->dType) == 4)
  627.          code[1] |= 0x04000000;
  628.       break;
  629.    case FILE_MEMORY_SHARED:
  630.       if (targ->getChipset() >= 0x84) {
  631.          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
  632.          code[0] = 0x10000001;
  633.          code[1] = 0x40000000;
  634.  
  635.          if (typeSizeof(i->dType) == 4)
  636.             code[1] |= 0x04000000;
  637.  
  638.          emitLoadStoreSizeCS(i->sType);
  639.       } else {
  640.          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
  641.          code[0] = 0x10000001;
  642.          code[1] = 0x00200000 | (i->lanes << 14);
  643.          emitLoadStoreSizeCS(i->sType);
  644.       }
  645.       break;
  646.    case FILE_MEMORY_CONST:
  647.       code[0] = 0x10000001;
  648.       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
  649.       if (typeSizeof(i->dType) == 4)
  650.          code[1] |= 0x04000000;
  651.       emitLoadStoreSizeCS(i->sType);
  652.       break;
  653.    case FILE_MEMORY_LOCAL:
  654.       code[0] = 0xd0000001;
  655.       code[1] = 0x40000000;
  656.       break;
  657.    case FILE_MEMORY_GLOBAL:
  658.       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
  659.       code[1] = 0x80000000;
  660.       break;
  661.    default:
  662.       assert(!"invalid load source file");
  663.       break;
  664.    }
  665.    if (sf == FILE_MEMORY_LOCAL ||
  666.        sf == FILE_MEMORY_GLOBAL)
  667.       emitLoadStoreSizeLG(i->sType, 21 + 32);
  668.  
  669.    setDst(i, 0);
  670.  
  671.    emitFlagsRd(i);
  672.    emitFlagsWr(i);
  673.  
  674.    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
  675.       srcId(*i->src(0).getIndirect(0), 9);
  676.    } else {
  677.       setAReg16(i, 0);
  678.       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
  679.    }
  680. }
  681.  
  682. void
  683. CodeEmitterNV50::emitSTORE(const Instruction *i)
  684. {
  685.    DataFile f = i->getSrc(0)->reg.file;
  686.    int32_t offset = i->getSrc(0)->reg.data.offset;
  687.  
  688.    switch (f) {
  689.    case FILE_SHADER_OUTPUT:
  690.       code[0] = 0x00000001 | ((offset >> 2) << 9);
  691.       code[1] = 0x80c00000;
  692.       srcId(i->src(1), 32 + 14);
  693.       break;
  694.    case FILE_MEMORY_GLOBAL:
  695.       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
  696.       code[1] = 0xa0000000;
  697.       emitLoadStoreSizeLG(i->dType, 21 + 32);
  698.       srcId(i->src(1), 2);
  699.       break;
  700.    case FILE_MEMORY_LOCAL:
  701.       code[0] = 0xd0000001;
  702.       code[1] = 0x60000000;
  703.       emitLoadStoreSizeLG(i->dType, 21 + 32);
  704.       srcId(i->src(1), 2);
  705.       break;
  706.    case FILE_MEMORY_SHARED:
  707.       code[0] = 0x00000001;
  708.       code[1] = 0xe0000000;
  709.       switch (typeSizeof(i->dType)) {
  710.       case 1:
  711.          code[0] |= offset << 9;
  712.          code[1] |= 0x00400000;
  713.          break;
  714.       case 2:
  715.          code[0] |= (offset >> 1) << 9;
  716.          break;
  717.       case 4:
  718.          code[0] |= (offset >> 2) << 9;
  719.          code[1] |= 0x04200000;
  720.          break;
  721.       default:
  722.          assert(0);
  723.          break;
  724.       }
  725.       srcId(i->src(1), 32 + 14);
  726.       break;
  727.    default:
  728.       assert(!"invalid store destination file");
  729.       break;
  730.    }
  731.  
  732.    if (f == FILE_MEMORY_GLOBAL)
  733.       srcId(*i->src(0).getIndirect(0), 9);
  734.    else
  735.       setAReg16(i, 0);
  736.  
  737.    if (f == FILE_MEMORY_LOCAL)
  738.       srcAddr16(i->src(0), false, 9);
  739.  
  740.    emitFlagsRd(i);
  741. }
  742.  
  743. void
  744. CodeEmitterNV50::emitMOV(const Instruction *i)
  745. {
  746.    DataFile sf = i->getSrc(0)->reg.file;
  747.    DataFile df = i->getDef(0)->reg.file;
  748.  
  749.    assert(sf == FILE_GPR || df == FILE_GPR);
  750.  
  751.    if (sf == FILE_FLAGS) {
  752.       code[0] = 0x00000001;
  753.       code[1] = 0x20000000;
  754.       defId(i->def(0), 2);
  755.       srcId(i->src(0), 12);
  756.       emitFlagsRd(i);
  757.    } else
  758.    if (sf == FILE_ADDRESS) {
  759.       code[0] = 0x00000001;
  760.       code[1] = 0x40000000;
  761.       defId(i->def(0), 2);
  762.       setARegBits(SDATA(i->src(0)).id + 1);
  763.       emitFlagsRd(i);
  764.    } else
  765.    if (df == FILE_FLAGS) {
  766.       code[0] = 0x00000001;
  767.       code[1] = 0xa0000000;
  768.       defId(i->def(0), 4);
  769.       srcId(i->src(0), 9);
  770.       emitFlagsRd(i);
  771.    } else
  772.    if (sf == FILE_IMMEDIATE) {
  773.       code[0] = 0x10008001;
  774.       code[1] = 0x00000003;
  775.       emitForm_IMM(i);
  776.    } else {
  777.       if (i->encSize == 4) {
  778.          code[0] = 0x10008000;
  779.       } else {
  780.          code[0] = 0x10000001;
  781.          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
  782.          code[1] |= (i->lanes << 14);
  783.          emitFlagsRd(i);
  784.       }
  785.       defId(i->def(0), 2);
  786.       srcId(i->src(0), 9);
  787.    }
  788.    if (df == FILE_SHADER_OUTPUT) {
  789.       assert(i->encSize == 8);
  790.       code[1] |= 0x8;
  791.    }
  792. }
  793.  
  794. static inline uint8_t getSRegEncoding(const ValueRef &ref)
  795. {
  796.    switch (SDATA(ref).sv.sv) {
  797.    case SV_PHYSID:        return 0;
  798.    case SV_CLOCK:         return 1;
  799.    case SV_VERTEX_STRIDE: return 3;
  800. // case SV_PM_COUNTER:    return 4 + SDATA(ref).sv.index;
  801.    case SV_SAMPLE_INDEX:  return 8;
  802.    default:
  803.       assert(!"no sreg for system value");
  804.       return 0;
  805.    }
  806. }
  807.  
  808. void
  809. CodeEmitterNV50::emitRDSV(const Instruction *i)
  810. {
  811.    code[0] = 0x00000001;
  812.    code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
  813.    defId(i->def(0), 2);
  814.    emitFlagsRd(i);
  815. }
  816.  
  817. void
  818. CodeEmitterNV50::emitNOP()
  819. {
  820.    code[0] = 0xf0000001;
  821.    code[1] = 0xe0000000;
  822. }
  823.  
  824. void
  825. CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
  826. {
  827.    code[0] = 0xc0000000 | (lane << 16);
  828.    code[1] = 0x80000000;
  829.  
  830.    code[0] |= (quOp & 0x03) << 20;
  831.    code[1] |= (quOp & 0xfc) << 20;
  832.  
  833.    emitForm_ADD(i);
  834.  
  835.    if (!i->srcExists(1))
  836.       srcId(i->src(0), 32 + 14);
  837. }
  838.  
  839. /* NOTE: This returns the base address of a vertex inside the primitive.
  840.  * src0 is an immediate, the index (not offset) of the vertex
  841.  * inside the primitive. XXX: signed or unsigned ?
  842.  * src1 (may be NULL) should use whatever units the hardware requires
  843.  * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
  844.  */
  845. void
  846. CodeEmitterNV50::emitPFETCH(const Instruction *i)
  847. {
  848.    const uint32_t prim = i->src(0).get()->reg.data.u32;
  849.    assert(prim <= 127);
  850.  
  851.    if (i->def(0).getFile() == FILE_ADDRESS) {
  852.       // shl $aX a[] 0
  853.       code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
  854.       code[1] = 0xc0200000;
  855.       code[0] |= prim << 9;
  856.       assert(!i->srcExists(1));
  857.    } else
  858.    if (i->srcExists(1)) {
  859.       // ld b32 $rX a[$aX+base]
  860.       code[0] = 0x00000001;
  861.       code[1] = 0x04200000 | (0xf << 14);
  862.       defId(i->def(0), 2);
  863.       code[0] |= prim << 9;
  864.       setARegBits(SDATA(i->src(1)).id + 1);
  865.    } else {
  866.       // mov b32 $rX a[]
  867.       code[0] = 0x10000001;
  868.       code[1] = 0x04200000 | (0xf << 14);
  869.       defId(i->def(0), 2);
  870.       code[0] |= prim << 9;
  871.    }
  872.    emitFlagsRd(i);
  873. }
  874.  
  875. void
  876. CodeEmitterNV50::emitINTERP(const Instruction *i)
  877. {
  878.    code[0] = 0x80000000;
  879.  
  880.    defId(i->def(0), 2);
  881.    srcAddr8(i->src(0), 16);
  882.  
  883.    if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
  884.       code[0] |= 1 << 8;
  885.    } else {
  886.       if (i->op == OP_PINTERP) {
  887.          code[0] |= 1 << 25;
  888.          srcId(i->src(1), 9);
  889.       }
  890.       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
  891.          code[0] |= 1 << 24;
  892.    }
  893.  
  894.    if (i->encSize == 8) {
  895.       code[1] =
  896.          (code[0] & (3 << 24)) >> (24 - 16) |
  897.          (code[0] & (1 <<  8)) << (18 -  8);
  898.       code[0] &= ~0x03000100;
  899.       code[0] |= 1;
  900.       emitFlagsRd(i);
  901.    }
  902. }
  903.  
  904. void
  905. CodeEmitterNV50::emitMINMAX(const Instruction *i)
  906. {
  907.    if (i->dType == TYPE_F64) {
  908.       code[0] = 0xe0000000;
  909.       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
  910.    } else {
  911.       code[0] = 0x30000000;
  912.       code[1] = 0x80000000;
  913.       if (i->op == OP_MIN)
  914.          code[1] |= 0x20000000;
  915.  
  916.       switch (i->dType) {
  917.       case TYPE_F32: code[0] |= 0x80000000; break;
  918.       case TYPE_S32: code[1] |= 0x8c000000; break;
  919.       case TYPE_U32: code[1] |= 0x84000000; break;
  920.       case TYPE_S16: code[1] |= 0x80000000; break;
  921.       case TYPE_U16: break;
  922.       default:
  923.          assert(0);
  924.          break;
  925.       }
  926.       code[1] |= i->src(0).mod.abs() << 20;
  927.       code[1] |= i->src(0).mod.neg() << 26;
  928.       code[1] |= i->src(1).mod.abs() << 19;
  929.       code[1] |= i->src(1).mod.neg() << 27;
  930.    }
  931.    emitForm_MAD(i);
  932. }
  933.  
  934. void
  935. CodeEmitterNV50::emitFMAD(const Instruction *i)
  936. {
  937.    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
  938.    const int neg_add = i->src(2).mod.neg();
  939.  
  940.    code[0] = 0xe0000000;
  941.  
  942.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  943.       code[1] = 0;
  944.       emitForm_IMM(i);
  945.       code[0] |= neg_mul << 15;
  946.       code[0] |= neg_add << 22;
  947.       if (i->saturate)
  948.          code[0] |= 1 << 8;
  949.    } else
  950.    if (i->encSize == 4) {
  951.       emitForm_MUL(i);
  952.       code[0] |= neg_mul << 15;
  953.       code[0] |= neg_add << 22;
  954.       if (i->saturate)
  955.          code[0] |= 1 << 8;
  956.    } else {
  957.       code[1]  = neg_mul << 26;
  958.       code[1] |= neg_add << 27;
  959.       if (i->saturate)
  960.          code[1] |= 1 << 29;
  961.       emitForm_MAD(i);
  962.    }
  963. }
  964.  
  965. void
  966. CodeEmitterNV50::emitFADD(const Instruction *i)
  967. {
  968.    const int neg0 = i->src(0).mod.neg();
  969.    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
  970.  
  971.    code[0] = 0xb0000000;
  972.  
  973.    assert(!(i->src(0).mod | i->src(1).mod).abs());
  974.  
  975.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  976.       code[1] = 0;
  977.       emitForm_IMM(i);
  978.       code[0] |= neg0 << 15;
  979.       code[0] |= neg1 << 22;
  980.       if (i->saturate)
  981.          code[0] |= 1 << 8;
  982.    } else
  983.    if (i->encSize == 8) {
  984.       code[1] = 0;
  985.       emitForm_ADD(i);
  986.       code[1] |= neg0 << 26;
  987.       code[1] |= neg1 << 27;
  988.       if (i->saturate)
  989.          code[1] |= 1 << 29;
  990.    } else {
  991.       emitForm_MUL(i);
  992.       code[0] |= neg0 << 15;
  993.       code[0] |= neg1 << 22;
  994.       if (i->saturate)
  995.          code[0] |= 1 << 8;
  996.    }
  997. }
  998.  
  999. void
  1000. CodeEmitterNV50::emitUADD(const Instruction *i)
  1001. {
  1002.    const int neg0 = i->src(0).mod.neg();
  1003.    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
  1004.  
  1005.    code[0] = 0x20008000;
  1006.  
  1007.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  1008.       code[1] = 0;
  1009.       emitForm_IMM(i);
  1010.    } else
  1011.    if (i->encSize == 8) {
  1012.       code[0] = 0x20000000;
  1013.       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
  1014.       emitForm_ADD(i);
  1015.    } else {
  1016.       emitForm_MUL(i);
  1017.    }
  1018.    assert(!(neg0 && neg1));
  1019.    code[0] |= neg0 << 28;
  1020.    code[0] |= neg1 << 22;
  1021.  
  1022.    if (i->flagsSrc >= 0) {
  1023.       // addc == sub | subr
  1024.       assert(!(code[0] & 0x10400000) && !i->getPredicate());
  1025.       code[0] |= 0x10400000;
  1026.       srcId(i->src(i->flagsSrc), 32 + 12);
  1027.    }
  1028. }
  1029.  
  1030. void
  1031. CodeEmitterNV50::emitAADD(const Instruction *i)
  1032. {
  1033.    const int s = (i->op == OP_MOV) ? 0 : 1;
  1034.  
  1035.    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
  1036.    code[1] = 0x20000000;
  1037.  
  1038.    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
  1039.  
  1040.    emitFlagsRd(i);
  1041.  
  1042.    if (s && i->srcExists(0))
  1043.       setARegBits(SDATA(i->src(0)).id + 1);
  1044. }
  1045.  
  1046. void
  1047. CodeEmitterNV50::emitIMUL(const Instruction *i)
  1048. {
  1049.    code[0] = 0x40000000;
  1050.  
  1051.    if (i->encSize == 8) {
  1052.       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
  1053.       emitForm_MAD(i);
  1054.    } else {
  1055.       if (i->sType == TYPE_S16)
  1056.          code[0] |= 0x8100;
  1057.       emitForm_MUL(i);
  1058.    }
  1059. }
  1060.  
  1061. void
  1062. CodeEmitterNV50::emitFMUL(const Instruction *i)
  1063. {
  1064.    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
  1065.  
  1066.    code[0] = 0xc0000000;
  1067.  
  1068.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  1069.       code[1] = 0;
  1070.       emitForm_IMM(i);
  1071.       if (neg)
  1072.          code[0] |= 0x8000;
  1073.       if (i->saturate)
  1074.          code[0] |= 1 << 8;
  1075.    } else
  1076.    if (i->encSize == 8) {
  1077.       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
  1078.       if (neg)
  1079.          code[1] |= 0x08000000;
  1080.       if (i->saturate)
  1081.          code[1] |= 1 << 20;
  1082.       emitForm_MAD(i);
  1083.    } else {
  1084.       emitForm_MUL(i);
  1085.       if (neg)
  1086.          code[0] |= 0x8000;
  1087.       if (i->saturate)
  1088.          code[0] |= 1 << 8;
  1089.    }
  1090. }
  1091.  
  1092. void
  1093. CodeEmitterNV50::emitIMAD(const Instruction *i)
  1094. {
  1095.    code[0] = 0x60000000;
  1096.    if (isSignedType(i->sType))
  1097.       code[1] = i->saturate ? 0x40000000 : 0x20000000;
  1098.    else
  1099.       code[1] = 0x00000000;
  1100.  
  1101.    int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
  1102.    int neg2 = i->src(2).mod.neg();
  1103.  
  1104.    assert(!(neg1 & neg2));
  1105.    code[1] |= neg1 << 27;
  1106.    code[1] |= neg2 << 26;
  1107.  
  1108.    emitForm_MAD(i);
  1109.  
  1110.    if (i->flagsSrc >= 0) {
  1111.       // add with carry from $cX
  1112.       assert(!(code[1] & 0x0c000000) && !i->getPredicate());
  1113.       code[1] |= 0xc << 24;
  1114.       srcId(i->src(i->flagsSrc), 32 + 12);
  1115.    }
  1116. }
  1117.  
  1118. void
  1119. CodeEmitterNV50::emitISAD(const Instruction *i)
  1120. {
  1121.    if (i->encSize == 8) {
  1122.       code[0] = 0x50000000;
  1123.       switch (i->sType) {
  1124.       case TYPE_U32: code[1] = 0x04000000; break;
  1125.       case TYPE_S32: code[1] = 0x0c000000; break;
  1126.       case TYPE_U16: code[1] = 0x00000000; break;
  1127.       case TYPE_S16: code[1] = 0x08000000; break;
  1128.       default:
  1129.          assert(0);
  1130.          break;
  1131.       }
  1132.       emitForm_MAD(i);
  1133.    } else {
  1134.       switch (i->sType) {
  1135.       case TYPE_U32: code[0] = 0x50008000; break;
  1136.       case TYPE_S32: code[0] = 0x50008100; break;
  1137.       case TYPE_U16: code[0] = 0x50000000; break;
  1138.       case TYPE_S16: code[0] = 0x50000100; break;
  1139.       default:
  1140.          assert(0);
  1141.          break;
  1142.       }
  1143.       emitForm_MUL(i);
  1144.    }
  1145. }
  1146.  
  1147. void
  1148. CodeEmitterNV50::emitSET(const Instruction *i)
  1149. {
  1150.    code[0] = 0x30000000;
  1151.    code[1] = 0x60000000;
  1152.  
  1153.    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
  1154.  
  1155.    switch (i->sType) {
  1156.    case TYPE_F32: code[0] |= 0x80000000; break;
  1157.    case TYPE_S32: code[1] |= 0x0c000000; break;
  1158.    case TYPE_U32: code[1] |= 0x04000000; break;
  1159.    case TYPE_S16: code[1] |= 0x08000000; break;
  1160.    case TYPE_U16: break;
  1161.    default:
  1162.       assert(0);
  1163.       break;
  1164.    }
  1165.    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
  1166.    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
  1167.    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
  1168.    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
  1169.  
  1170.    emitForm_MAD(i);
  1171. }
  1172.  
  1173. void
  1174. CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
  1175. {
  1176.    switch (rnd) {
  1177.    case ROUND_NI: code[1] |= 0x08000000; break;
  1178.    case ROUND_M:  code[1] |= 0x00020000; break;
  1179.    case ROUND_MI: code[1] |= 0x08020000; break;
  1180.    case ROUND_P:  code[1] |= 0x00040000; break;
  1181.    case ROUND_PI: code[1] |= 0x08040000; break;
  1182.    case ROUND_Z:  code[1] |= 0x00060000; break;
  1183.    case ROUND_ZI: code[1] |= 0x08060000; break;
  1184.    default:
  1185.       assert(rnd == ROUND_N);
  1186.       break;
  1187.    }
  1188. }
  1189.  
  1190. void
  1191. CodeEmitterNV50::emitCVT(const Instruction *i)
  1192. {
  1193.    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
  1194.    RoundMode rnd;
  1195.    DataType dType;
  1196.  
  1197.    switch (i->op) {
  1198.    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
  1199.    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
  1200.    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
  1201.    default:
  1202.       rnd = i->rnd;
  1203.       break;
  1204.    }
  1205.  
  1206.    if (i->op == OP_NEG && i->dType == TYPE_U32)
  1207.       dType = TYPE_S32;
  1208.    else
  1209.       dType = i->dType;
  1210.  
  1211.    code[0] = 0xa0000000;
  1212.  
  1213.    switch (dType) {
  1214.    case TYPE_F64:
  1215.       switch (i->sType) {
  1216.       case TYPE_F64: code[1] = 0xc4404000; break;
  1217.       case TYPE_S64: code[1] = 0x44414000; break;
  1218.       case TYPE_U64: code[1] = 0x44404000; break;
  1219.       case TYPE_F32: code[1] = 0xc4400000; break;
  1220.       case TYPE_S32: code[1] = 0x44410000; break;
  1221.       case TYPE_U32: code[1] = 0x44400000; break;
  1222.       default:
  1223.          assert(0);
  1224.          break;
  1225.       }
  1226.       break;
  1227.    case TYPE_S64:
  1228.       switch (i->sType) {
  1229.       case TYPE_F64: code[1] = 0x8c404000; break;
  1230.       case TYPE_F32: code[1] = 0x8c400000; break;
  1231.       default:
  1232.          assert(0);
  1233.          break;
  1234.       }
  1235.       break;
  1236.    case TYPE_U64:
  1237.       switch (i->sType) {
  1238.       case TYPE_F64: code[1] = 0x84404000; break;
  1239.       case TYPE_F32: code[1] = 0x84400000; break;
  1240.       default:
  1241.          assert(0);
  1242.          break;
  1243.       }
  1244.       break;
  1245.    case TYPE_F32:
  1246.       switch (i->sType) {
  1247.       case TYPE_F64: code[1] = 0xc0404000; break;
  1248.       case TYPE_S64: code[1] = 0x40414000; break;
  1249.       case TYPE_U64: code[1] = 0x40404000; break;
  1250.       case TYPE_F32: code[1] = 0xc4004000; break;
  1251.       case TYPE_S32: code[1] = 0x44014000; break;
  1252.       case TYPE_U32: code[1] = 0x44004000; break;
  1253.       case TYPE_F16: code[1] = 0xc4000000; break;
  1254.       case TYPE_U16: code[1] = 0x44000000; break;
  1255.       default:
  1256.          assert(0);
  1257.          break;
  1258.       }
  1259.       break;
  1260.    case TYPE_S32:
  1261.       switch (i->sType) {
  1262.       case TYPE_F64: code[1] = 0x88404000; break;
  1263.       case TYPE_F32: code[1] = 0x8c004000; break;
  1264.       case TYPE_S32: code[1] = 0x0c014000; break;
  1265.       case TYPE_U32: code[1] = 0x0c004000; break;
  1266.       case TYPE_F16: code[1] = 0x8c000000; break;
  1267.       case TYPE_S16: code[1] = 0x0c010000; break;
  1268.       case TYPE_U16: code[1] = 0x0c000000; break;
  1269.       case TYPE_S8:  code[1] = 0x0c018000; break;
  1270.       case TYPE_U8:  code[1] = 0x0c008000; break;
  1271.       default:
  1272.          assert(0);
  1273.          break;
  1274.       }
  1275.       break;
  1276.    case TYPE_U32:
  1277.       switch (i->sType) {
  1278.       case TYPE_F64: code[1] = 0x80404000; break;
  1279.       case TYPE_F32: code[1] = 0x84004000; break;
  1280.       case TYPE_S32: code[1] = 0x04014000; break;
  1281.       case TYPE_U32: code[1] = 0x04004000; break;
  1282.       case TYPE_F16: code[1] = 0x84000000; break;
  1283.       case TYPE_S16: code[1] = 0x04010000; break;
  1284.       case TYPE_U16: code[1] = 0x04000000; break;
  1285.       case TYPE_S8:  code[1] = 0x04018000; break;
  1286.       case TYPE_U8:  code[1] = 0x04008000; break;
  1287.       default:
  1288.          assert(0);
  1289.          break;
  1290.       }
  1291.       break;
  1292.    case TYPE_S16:
  1293.    case TYPE_U16:
  1294.    case TYPE_S8:
  1295.    case TYPE_U8:
  1296.    default:
  1297.       assert(0);
  1298.       break;
  1299.    }
  1300.    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
  1301.       code[1] |= 0x00004000;
  1302.  
  1303.    roundMode_CVT(rnd);
  1304.  
  1305.    switch (i->op) {
  1306.    case OP_ABS: code[1] |= 1 << 20; break;
  1307.    case OP_SAT: code[1] |= 1 << 19; break;
  1308.    case OP_NEG: code[1] |= 1 << 29; break;
  1309.    default:
  1310.       break;
  1311.    }
  1312.    code[1] ^= i->src(0).mod.neg() << 29;
  1313.    code[1] |= i->src(0).mod.abs() << 20;
  1314.    if (i->saturate)
  1315.       code[1] |= 1 << 19;
  1316.  
  1317.    assert(i->op != OP_ABS || !i->src(0).mod.neg());
  1318.  
  1319.    emitForm_MAD(i);
  1320. }
  1321.  
  1322. void
  1323. CodeEmitterNV50::emitPreOp(const Instruction *i)
  1324. {
  1325.    code[0] = 0xb0000000;
  1326.    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
  1327.  
  1328.    code[1] |= i->src(0).mod.abs() << 20;
  1329.    code[1] |= i->src(0).mod.neg() << 26;
  1330.  
  1331.    emitForm_MAD(i);
  1332. }
  1333.  
  1334. void
  1335. CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
  1336. {
  1337.    code[0] = 0x90000000;
  1338.  
  1339.    if (i->encSize == 4) {
  1340.       assert(i->op == OP_RCP);
  1341.       code[0] |= i->src(0).mod.abs() << 15;
  1342.       code[0] |= i->src(0).mod.neg() << 22;
  1343.       emitForm_MUL(i);
  1344.    } else {
  1345.       code[1] = subOp << 29;
  1346.       code[1] |= i->src(0).mod.abs() << 20;
  1347.       code[1] |= i->src(0).mod.neg() << 26;
  1348.       emitForm_MAD(i);
  1349.    }
  1350. }
  1351.  
  1352. void
  1353. CodeEmitterNV50::emitNOT(const Instruction *i)
  1354. {
  1355.    code[0] = 0xd0000000;
  1356.    code[1] = 0x0002c000;
  1357.  
  1358.    switch (i->sType) {
  1359.    case TYPE_U32:
  1360.    case TYPE_S32:
  1361.       code[1] |= 0x04000000;
  1362.       break;
  1363.    default:
  1364.       break;
  1365.    }
  1366.    emitForm_MAD(i);
  1367.    setSrc(i, 0, 1);
  1368. }
  1369.  
  1370. void
  1371. CodeEmitterNV50::emitLogicOp(const Instruction *i)
  1372. {
  1373.    code[0] = 0xd0000000;
  1374.    code[1] = 0;
  1375.  
  1376.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  1377.       switch (i->op) {
  1378.       case OP_OR:  code[0] |= 0x0100; break;
  1379.       case OP_XOR: code[0] |= 0x8000; break;
  1380.       default:
  1381.          assert(i->op == OP_AND);
  1382.          break;
  1383.       }
  1384.       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
  1385.          code[0] |= 1 << 22;
  1386.  
  1387.       emitForm_IMM(i);
  1388.    } else {
  1389.       switch (i->op) {
  1390.       case OP_AND: code[1] = 0x04000000; break;
  1391.       case OP_OR:  code[1] = 0x04004000; break;
  1392.       case OP_XOR: code[1] = 0x04008000; break;
  1393.       default:
  1394.          assert(0);
  1395.          break;
  1396.       }
  1397.       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
  1398.          code[1] |= 1 << 16;
  1399.       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
  1400.          code[1] |= 1 << 17;
  1401.  
  1402.       emitForm_MAD(i);
  1403.    }
  1404. }
  1405.  
  1406. void
  1407. CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
  1408. {
  1409.    code[0] = 0x00000001 | (shl << 16);
  1410.    code[1] = 0xc0000000;
  1411.  
  1412.    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
  1413.  
  1414.    setSrcFileBits(i, NV50_OP_ENC_IMM);
  1415.    setSrc(i, 0, 0);
  1416.    emitFlagsRd(i);
  1417. }
  1418.  
  1419. void
  1420. CodeEmitterNV50::emitShift(const Instruction *i)
  1421. {
  1422.    if (i->def(0).getFile() == FILE_ADDRESS) {
  1423.       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
  1424.       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
  1425.    } else {
  1426.       code[0] = 0x30000001;
  1427.       code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
  1428.       if (i->op == OP_SHR && isSignedType(i->sType))
  1429.           code[1] |= 1 << 27;
  1430.  
  1431.       if (i->src(1).getFile() == FILE_IMMEDIATE) {
  1432.          code[1] |= 1 << 20;
  1433.          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
  1434.          defId(i->def(0), 2);
  1435.          srcId(i->src(0), 9);
  1436.          emitFlagsRd(i);
  1437.       } else {
  1438.          emitForm_MAD(i);
  1439.       }
  1440.    }
  1441. }
  1442.  
  1443. void
  1444. CodeEmitterNV50::emitOUT(const Instruction *i)
  1445. {
  1446.    code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
  1447.    code[1] = 0xc0000000;
  1448.  
  1449.    emitFlagsRd(i);
  1450. }
  1451.  
  1452. void
  1453. CodeEmitterNV50::emitTEX(const TexInstruction *i)
  1454. {
  1455.    code[0] = 0xf0000001;
  1456.    code[1] = 0x00000000;
  1457.  
  1458.    switch (i->op) {
  1459.    case OP_TXB:
  1460.       code[1] = 0x20000000;
  1461.       break;
  1462.    case OP_TXL:
  1463.       code[1] = 0x40000000;
  1464.       break;
  1465.    case OP_TXF:
  1466.       code[0] |= 0x01000000;
  1467.       break;
  1468.    case OP_TXG:
  1469.       code[0] |= 0x01000000;
  1470.       code[1] = 0x80000000;
  1471.       break;
  1472.    case OP_TXLQ:
  1473.       code[1] = 0x60020000;
  1474.       break;
  1475.    default:
  1476.       assert(i->op == OP_TEX);
  1477.       break;
  1478.    }
  1479.  
  1480.    code[0] |= i->tex.r << 9;
  1481.    code[0] |= i->tex.s << 17;
  1482.  
  1483.    int argc = i->tex.target.getArgCount();
  1484.  
  1485.    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
  1486.       argc += 1;
  1487.    if (i->tex.target.isShadow())
  1488.       argc += 1;
  1489.    assert(argc <= 4);
  1490.  
  1491.    code[0] |= (argc - 1) << 22;
  1492.  
  1493.    if (i->tex.target.isCube()) {
  1494.       code[0] |= 0x08000000;
  1495.    } else
  1496.    if (i->tex.useOffsets) {
  1497.       code[1] |= (i->tex.offset[0] & 0xf) << 24;
  1498.       code[1] |= (i->tex.offset[1] & 0xf) << 20;
  1499.       code[1] |= (i->tex.offset[2] & 0xf) << 16;
  1500.    }
  1501.  
  1502.    code[0] |= (i->tex.mask & 0x3) << 25;
  1503.    code[1] |= (i->tex.mask & 0xc) << 12;
  1504.  
  1505.    if (i->tex.liveOnly)
  1506.       code[1] |= 4;
  1507.  
  1508.    defId(i->def(0), 2);
  1509.  
  1510.    emitFlagsRd(i);
  1511. }
  1512.  
  1513. void
  1514. CodeEmitterNV50::emitTXQ(const TexInstruction *i)
  1515. {
  1516.    assert(i->tex.query == TXQ_DIMS);
  1517.  
  1518.    code[0] = 0xf0000001;
  1519.    code[1] = 0x60000000;
  1520.  
  1521.    code[0] |= i->tex.r << 9;
  1522.    code[0] |= i->tex.s << 17;
  1523.  
  1524.    code[0] |= (i->tex.mask & 0x3) << 25;
  1525.    code[1] |= (i->tex.mask & 0xc) << 12;
  1526.  
  1527.    defId(i->def(0), 2);
  1528.  
  1529.    emitFlagsRd(i);
  1530. }
  1531.  
  1532. void
  1533. CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
  1534. {
  1535.    code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
  1536.    code[1] = 0x60010000;
  1537.  
  1538.    code[0] |= (i->tex.mask & 0x3) << 25;
  1539.    code[1] |= (i->tex.mask & 0xc) << 12;
  1540.    defId(i->def(0), 2);
  1541.  
  1542.    emitFlagsRd(i);
  1543. }
  1544.  
  1545. void
  1546. CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
  1547. {
  1548.    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
  1549.  
  1550.    code[0] = 0x10000003; // bra
  1551.    code[1] = 0x00000780; // always
  1552.  
  1553.    switch (i->subOp) {
  1554.    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
  1555.       break;
  1556.    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
  1557.       pos += 8;
  1558.       break;
  1559.    default:
  1560.       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
  1561.       code[0] = 0x20000003; // call
  1562.       code[1] = 0x00000000; // no predicate
  1563.       break;
  1564.    }
  1565.    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
  1566.    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
  1567. }
  1568.  
  1569. void
  1570. CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
  1571. {
  1572.    const FlowInstruction *f = i->asFlow();
  1573.    bool hasPred = false;
  1574.    bool hasTarg = false;
  1575.  
  1576.    code[0] = 0x00000003 | (flowOp << 28);
  1577.    code[1] = 0x00000000;
  1578.  
  1579.    switch (i->op) {
  1580.    case OP_BRA:
  1581.       hasPred = true;
  1582.       hasTarg = true;
  1583.       break;
  1584.    case OP_BREAK:
  1585.    case OP_BRKPT:
  1586.    case OP_DISCARD:
  1587.    case OP_RET:
  1588.       hasPred = true;
  1589.       break;
  1590.    case OP_CALL:
  1591.    case OP_PREBREAK:
  1592.    case OP_JOINAT:
  1593.       hasTarg = true;
  1594.       break;
  1595.    case OP_PRERET:
  1596.       hasTarg = true;
  1597.       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
  1598.          emitPRERETEmu(f);
  1599.          return;
  1600.       }
  1601.       break;
  1602.    default:
  1603.       break;
  1604.    }
  1605.  
  1606.    if (hasPred)
  1607.       emitFlagsRd(i);
  1608.  
  1609.    if (hasTarg && f) {
  1610.       uint32_t pos;
  1611.  
  1612.       if (f->op == OP_CALL) {
  1613.          if (f->builtin) {
  1614.             pos = targNV50->getBuiltinOffset(f->target.builtin);
  1615.          } else {
  1616.             pos = f->target.fn->binPos;
  1617.          }
  1618.       } else {
  1619.          pos = f->target.bb->binPos;
  1620.       }
  1621.  
  1622.       code[0] |= ((pos >>  2) & 0xffff) << 11;
  1623.       code[1] |= ((pos >> 18) & 0x003f) << 14;
  1624.  
  1625.       RelocEntry::Type relocTy;
  1626.  
  1627.       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
  1628.  
  1629.       addReloc(relocTy, 0, pos, 0x07fff800, 9);
  1630.       addReloc(relocTy, 1, pos, 0x000fc000, -4);
  1631.    }
  1632. }
  1633.  
  1634. void
  1635. CodeEmitterNV50::emitBAR(const Instruction *i)
  1636. {
  1637.    ImmediateValue *barId = i->getSrc(0)->asImm();
  1638.    assert(barId);
  1639.  
  1640.    code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
  1641.    code[1] = 0x00004000;
  1642.  
  1643.    if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
  1644.       code[0] |= 1 << 26;
  1645. }
  1646.  
  1647. void
  1648. CodeEmitterNV50::emitATOM(const Instruction *i)
  1649. {
  1650.    uint8_t subOp;
  1651.    switch (i->subOp) {
  1652.    case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
  1653.    case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
  1654.    case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
  1655.    case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
  1656.    case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
  1657.    case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
  1658.    case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
  1659.    case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
  1660.    case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
  1661.    case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
  1662.    default:
  1663.       assert(!"invalid subop");
  1664.       return;
  1665.    }
  1666.    code[0] = 0xd0000001;
  1667.    code[1] = 0xe0c00000 | (subOp << 2);
  1668.    if (isSignedType(i->dType))
  1669.       code[1] |= 1 << 21;
  1670.  
  1671.    // args
  1672.    emitFlagsRd(i);
  1673.    setDst(i, 0);
  1674.    setSrc(i, 1, 1);
  1675.    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
  1676.       setSrc(i, 2, 2);
  1677.  
  1678.    // g[] pointer
  1679.    code[0] |= i->getSrc(0)->reg.fileIndex << 23;
  1680.    srcId(i->getIndirect(0, 0), 9);
  1681. }
  1682.  
  1683. bool
  1684. CodeEmitterNV50::emitInstruction(Instruction *insn)
  1685. {
  1686.    if (!insn->encSize) {
  1687.       ERROR("skipping unencodable instruction: "); insn->print();
  1688.       return false;
  1689.    } else
  1690.    if (codeSize + insn->encSize > codeSizeLimit) {
  1691.       ERROR("code emitter output buffer too small\n");
  1692.       return false;
  1693.    }
  1694.  
  1695.    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
  1696.       INFO("EMIT: "); insn->print();
  1697.    }
  1698.  
  1699.    switch (insn->op) {
  1700.    case OP_MOV:
  1701.       emitMOV(insn);
  1702.       break;
  1703.    case OP_EXIT:
  1704.    case OP_NOP:
  1705.    case OP_JOIN:
  1706.       emitNOP();
  1707.       break;
  1708.    case OP_VFETCH:
  1709.    case OP_LOAD:
  1710.       emitLOAD(insn);
  1711.       break;
  1712.    case OP_EXPORT:
  1713.    case OP_STORE:
  1714.       emitSTORE(insn);
  1715.       break;
  1716.    case OP_PFETCH:
  1717.       emitPFETCH(insn);
  1718.       break;
  1719.    case OP_RDSV:
  1720.       emitRDSV(insn);
  1721.       break;
  1722.    case OP_LINTERP:
  1723.    case OP_PINTERP:
  1724.       emitINTERP(insn);
  1725.       break;
  1726.    case OP_ADD:
  1727.    case OP_SUB:
  1728.       if (isFloatType(insn->dType))
  1729.          emitFADD(insn);
  1730.       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
  1731.          emitAADD(insn);
  1732.       else
  1733.          emitUADD(insn);
  1734.       break;
  1735.    case OP_MUL:
  1736.       if (isFloatType(insn->dType))
  1737.          emitFMUL(insn);
  1738.       else
  1739.          emitIMUL(insn);
  1740.       break;
  1741.    case OP_MAD:
  1742.    case OP_FMA:
  1743.       if (isFloatType(insn->dType))
  1744.          emitFMAD(insn);
  1745.       else
  1746.          emitIMAD(insn);
  1747.       break;
  1748.    case OP_SAD:
  1749.       emitISAD(insn);
  1750.       break;
  1751.    case OP_NOT:
  1752.       emitNOT(insn);
  1753.       break;
  1754.    case OP_AND:
  1755.    case OP_OR:
  1756.    case OP_XOR:
  1757.       emitLogicOp(insn);
  1758.       break;
  1759.    case OP_SHL:
  1760.    case OP_SHR:
  1761.       emitShift(insn);
  1762.       break;
  1763.    case OP_SET:
  1764.       emitSET(insn);
  1765.       break;
  1766.    case OP_MIN:
  1767.    case OP_MAX:
  1768.       emitMINMAX(insn);
  1769.       break;
  1770.    case OP_CEIL:
  1771.    case OP_FLOOR:
  1772.    case OP_TRUNC:
  1773.    case OP_ABS:
  1774.    case OP_NEG:
  1775.    case OP_SAT:
  1776.       emitCVT(insn);
  1777.       break;
  1778.    case OP_CVT:
  1779.       if (insn->def(0).getFile() == FILE_ADDRESS)
  1780.          emitARL(insn, 0);
  1781.       else
  1782.       if (insn->def(0).getFile() == FILE_FLAGS ||
  1783.           insn->src(0).getFile() == FILE_FLAGS ||
  1784.           insn->src(0).getFile() == FILE_ADDRESS)
  1785.          emitMOV(insn);
  1786.       else
  1787.          emitCVT(insn);
  1788.       break;
  1789.    case OP_RCP:
  1790.       emitSFnOp(insn, 0);
  1791.       break;
  1792.    case OP_RSQ:
  1793.       emitSFnOp(insn, 2);
  1794.       break;
  1795.    case OP_LG2:
  1796.       emitSFnOp(insn, 3);
  1797.       break;
  1798.    case OP_SIN:
  1799.       emitSFnOp(insn, 4);
  1800.       break;
  1801.    case OP_COS:
  1802.       emitSFnOp(insn, 5);
  1803.       break;
  1804.    case OP_EX2:
  1805.       emitSFnOp(insn, 6);
  1806.       break;
  1807.    case OP_PRESIN:
  1808.    case OP_PREEX2:
  1809.       emitPreOp(insn);
  1810.       break;
  1811.    case OP_TEX:
  1812.    case OP_TXB:
  1813.    case OP_TXL:
  1814.    case OP_TXF:
  1815.    case OP_TXG:
  1816.    case OP_TXLQ:
  1817.       emitTEX(insn->asTex());
  1818.       break;
  1819.    case OP_TXQ:
  1820.       emitTXQ(insn->asTex());
  1821.       break;
  1822.    case OP_TEXPREP:
  1823.       emitTEXPREP(insn->asTex());
  1824.       break;
  1825.    case OP_EMIT:
  1826.    case OP_RESTART:
  1827.       emitOUT(insn);
  1828.       break;
  1829.    case OP_DISCARD:
  1830.       emitFlow(insn, 0x0);
  1831.       break;
  1832.    case OP_BRA:
  1833.       emitFlow(insn, 0x1);
  1834.       break;
  1835.    case OP_CALL:
  1836.       emitFlow(insn, 0x2);
  1837.       break;
  1838.    case OP_RET:
  1839.       emitFlow(insn, 0x3);
  1840.       break;
  1841.    case OP_PREBREAK:
  1842.       emitFlow(insn, 0x4);
  1843.       break;
  1844.    case OP_BREAK:
  1845.       emitFlow(insn, 0x5);
  1846.       break;
  1847.    case OP_QUADON:
  1848.       emitFlow(insn, 0x6);
  1849.       break;
  1850.    case OP_QUADPOP:
  1851.       emitFlow(insn, 0x7);
  1852.       break;
  1853.    case OP_JOINAT:
  1854.       emitFlow(insn, 0xa);
  1855.       break;
  1856.    case OP_PRERET:
  1857.       emitFlow(insn, 0xd);
  1858.       break;
  1859.    case OP_QUADOP:
  1860.       emitQUADOP(insn, insn->lanes, insn->subOp);
  1861.       break;
  1862.    case OP_DFDX:
  1863.       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
  1864.       break;
  1865.    case OP_DFDY:
  1866.       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
  1867.       break;
  1868.    case OP_ATOM:
  1869.       emitATOM(insn);
  1870.       break;
  1871.    case OP_BAR:
  1872.       emitBAR(insn);
  1873.       break;
  1874.    case OP_PHI:
  1875.    case OP_UNION:
  1876.    case OP_CONSTRAINT:
  1877.       ERROR("operation should have been eliminated\n");
  1878.       return false;
  1879.    case OP_EXP:
  1880.    case OP_LOG:
  1881.    case OP_SQRT:
  1882.    case OP_POW:
  1883.    case OP_SELP:
  1884.    case OP_SLCT:
  1885.    case OP_TXD:
  1886.    case OP_PRECONT:
  1887.    case OP_CONT:
  1888.    case OP_POPCNT:
  1889.    case OP_INSBF:
  1890.    case OP_EXTBF:
  1891.       ERROR("operation should have been lowered\n");
  1892.       return false;
  1893.    default:
  1894.       ERROR("unknown op: %u\n", insn->op);
  1895.       return false;
  1896.    }
  1897.    if (insn->join || insn->op == OP_JOIN)
  1898.       code[1] |= 0x2;
  1899.    else
  1900.    if (insn->exit || insn->op == OP_EXIT)
  1901.       code[1] |= 0x1;
  1902.  
  1903.    assert((insn->encSize == 8) == (code[0] & 1));
  1904.  
  1905.    code += insn->encSize / 4;
  1906.    codeSize += insn->encSize;
  1907.    return true;
  1908. }
  1909.  
  1910. uint32_t
  1911. CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
  1912. {
  1913.    const Target::OpInfo &info = targ->getOpInfo(i);
  1914.  
  1915.    if (info.minEncSize > 4)
  1916.       return 8;
  1917.  
  1918.    // check constraints on dst and src operands
  1919.    for (int d = 0; i->defExists(d); ++d) {
  1920.       if (i->def(d).rep()->reg.data.id > 63 ||
  1921.           i->def(d).rep()->reg.file != FILE_GPR)
  1922.          return 8;
  1923.    }
  1924.  
  1925.    for (int s = 0; i->srcExists(s); ++s) {
  1926.       DataFile sf = i->src(s).getFile();
  1927.       if (sf != FILE_GPR)
  1928.          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
  1929.             return 8;
  1930.       if (i->src(s).rep()->reg.data.id > 63)
  1931.          return 8;
  1932.    }
  1933.  
  1934.    // check modifiers & rounding
  1935.    if (i->join || i->lanes != 0xf || i->exit)
  1936.       return 8;
  1937.    if (i->op == OP_MUL && i->rnd != ROUND_N)
  1938.       return 8;
  1939.  
  1940.    if (i->asTex())
  1941.       return 8; // TODO: short tex encoding
  1942.  
  1943.    // check constraints on short MAD
  1944.    if (info.srcNr >= 2 && i->srcExists(2)) {
  1945.       if (!i->defExists(0) || !isFloatType(i->dType) ||
  1946.           i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
  1947.          return 8;
  1948.    }
  1949.  
  1950.    return info.minEncSize;
  1951. }
  1952.  
  1953. // Change the encoding size of an instruction after BBs have been scheduled.
  1954. static void
  1955. makeInstructionLong(Instruction *insn)
  1956. {
  1957.    if (insn->encSize == 8)
  1958.       return;
  1959.    Function *fn = insn->bb->getFunction();
  1960.    int n = 0;
  1961.    int adj = 4;
  1962.  
  1963.    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
  1964.  
  1965.    if (n & 1) {
  1966.       adj = 8;
  1967.       insn->next->encSize = 8;
  1968.    } else
  1969.    if (insn->prev && insn->prev->encSize == 4) {
  1970.       adj = 8;
  1971.       insn->prev->encSize = 8;
  1972.    }
  1973.    insn->encSize = 8;
  1974.  
  1975.    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
  1976.       fn->bbArray[i]->binPos += 4;
  1977.    }
  1978.    fn->binSize += adj;
  1979.    insn->bb->binSize += adj;
  1980. }
  1981.  
  1982. static bool
  1983. trySetExitModifier(Instruction *insn)
  1984. {
  1985.    if (insn->op == OP_DISCARD ||
  1986.        insn->op == OP_QUADON ||
  1987.        insn->op == OP_QUADPOP)
  1988.       return false;
  1989.    for (int s = 0; insn->srcExists(s); ++s)
  1990.       if (insn->src(s).getFile() == FILE_IMMEDIATE)
  1991.          return false;
  1992.    if (insn->asFlow()) {
  1993.       if (insn->op == OP_CALL) // side effects !
  1994.          return false;
  1995.       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
  1996.          return false;
  1997.       insn->op = OP_EXIT;
  1998.    }
  1999.    insn->exit = 1;
  2000.    makeInstructionLong(insn);
  2001.    return true;
  2002. }
  2003.  
  2004. static void
  2005. replaceExitWithModifier(Function *func)
  2006. {
  2007.    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
  2008.  
  2009.    if (!epilogue->getExit() ||
  2010.        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
  2011.       return;
  2012.  
  2013.    if (epilogue->getEntry()->op != OP_EXIT) {
  2014.       Instruction *insn = epilogue->getExit()->prev;
  2015.       if (!insn || !trySetExitModifier(insn))
  2016.          return;
  2017.       insn->exit = 1;
  2018.    } else {
  2019.       for (Graph::EdgeIterator ei = func->cfgExit->incident();
  2020.            !ei.end(); ei.next()) {
  2021.          BasicBlock *bb = BasicBlock::get(ei.getNode());
  2022.          Instruction *i = bb->getExit();
  2023.  
  2024.          if (!i || !trySetExitModifier(i))
  2025.             return;
  2026.       }
  2027.    }
  2028.    epilogue->binSize -= 8;
  2029.    func->binSize -= 8;
  2030.    delete_Instruction(func->getProgram(), epilogue->getExit());
  2031. }
  2032.  
  2033. void
  2034. CodeEmitterNV50::prepareEmission(Function *func)
  2035. {
  2036.    CodeEmitter::prepareEmission(func);
  2037.  
  2038.    replaceExitWithModifier(func);
  2039. }
  2040.  
  2041. CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
  2042.    CodeEmitter(target), targNV50(target)
  2043. {
  2044.    targ = target; // specialized
  2045.    code = NULL;
  2046.    codeSize = codeSizeLimit = 0;
  2047.    relocInfo = NULL;
  2048. }
  2049.  
  2050. CodeEmitter *
  2051. TargetNV50::getCodeEmitter(Program::Type type)
  2052. {
  2053.    CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
  2054.    emit->setProgramType(type);
  2055.    return emit;
  2056. }
  2057.  
  2058. } // namespace nv50_ir
  2059.