Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2011 Christoph Bumiller
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "nv50_ir.h"
  24. #include "nv50_ir_target_nv50.h"
  25.  
  26. namespace nv50_ir {
  27.  
  28. #define NV50_OP_ENC_LONG     0
  29. #define NV50_OP_ENC_SHORT    1
  30. #define NV50_OP_ENC_IMM      2
  31. #define NV50_OP_ENC_LONG_ALT 3
  32.  
  33. class CodeEmitterNV50 : public CodeEmitter
  34. {
  35. public:
  36.    CodeEmitterNV50(const TargetNV50 *);
  37.  
  38.    virtual bool emitInstruction(Instruction *);
  39.  
  40.    virtual uint32_t getMinEncodingSize(const Instruction *) const;
  41.  
  42.    inline void setProgramType(Program::Type pType) { progType = pType; }
  43.  
  44.    virtual void prepareEmission(Function *);
  45.  
  46. private:
  47.    Program::Type progType;
  48.  
  49.    const TargetNV50 *targNV50;
  50.  
  51. private:
  52.    inline void defId(const ValueDef&, const int pos);
  53.    inline void srcId(const ValueRef&, const int pos);
  54.    inline void srcId(const ValueRef *, const int pos);
  55.  
  56.    inline void srcAddr16(const ValueRef&, bool adj, const int pos);
  57.    inline void srcAddr8(const ValueRef&, const int pos);
  58.  
  59.    void emitFlagsRd(const Instruction *);
  60.    void emitFlagsWr(const Instruction *);
  61.  
  62.    void emitCondCode(CondCode cc, DataType ty, int pos);
  63.  
  64.    inline void setARegBits(unsigned int);
  65.  
  66.    void setAReg16(const Instruction *, int s);
  67.    void setImmediate(const Instruction *, int s);
  68.  
  69.    void setDst(const Value *);
  70.    void setDst(const Instruction *, int d);
  71.    void setSrcFileBits(const Instruction *, int enc);
  72.    void setSrc(const Instruction *, unsigned int s, int slot);
  73.  
  74.    void emitForm_MAD(const Instruction *);
  75.    void emitForm_ADD(const Instruction *);
  76.    void emitForm_MUL(const Instruction *);
  77.    void emitForm_IMM(const Instruction *);
  78.  
  79.    void emitLoadStoreSizeLG(DataType ty, int pos);
  80.    void emitLoadStoreSizeCS(DataType ty);
  81.  
  82.    void roundMode_MAD(const Instruction *);
  83.    void roundMode_CVT(RoundMode);
  84.  
  85.    void emitMNeg12(const Instruction *);
  86.  
  87.    void emitLOAD(const Instruction *);
  88.    void emitSTORE(const Instruction *);
  89.    void emitMOV(const Instruction *);
  90.    void emitNOP();
  91.    void emitINTERP(const Instruction *);
  92.    void emitPFETCH(const Instruction *);
  93.    void emitOUT(const Instruction *);
  94.  
  95.    void emitUADD(const Instruction *);
  96.    void emitAADD(const Instruction *);
  97.    void emitFADD(const Instruction *);
  98.    void emitIMUL(const Instruction *);
  99.    void emitFMUL(const Instruction *);
  100.    void emitFMAD(const Instruction *);
  101.    void emitIMAD(const Instruction *);
  102.    void emitISAD(const Instruction *);
  103.  
  104.    void emitMINMAX(const Instruction *);
  105.  
  106.    void emitPreOp(const Instruction *);
  107.    void emitSFnOp(const Instruction *, uint8_t subOp);
  108.  
  109.    void emitShift(const Instruction *);
  110.    void emitARL(const Instruction *, unsigned int shl);
  111.    void emitLogicOp(const Instruction *);
  112.    void emitNOT(const Instruction *);
  113.  
  114.    void emitCVT(const Instruction *);
  115.    void emitSET(const Instruction *);
  116.  
  117.    void emitTEX(const TexInstruction *);
  118.    void emitTXQ(const TexInstruction *);
  119.    void emitTEXPREP(const TexInstruction *);
  120.  
  121.    void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
  122.  
  123.    void emitFlow(const Instruction *, uint8_t flowOp);
  124.    void emitPRERETEmu(const FlowInstruction *);
  125.    void emitBAR(const Instruction *);
  126.  
  127.    void emitATOM(const Instruction *);
  128. };
  129.  
  130. #define SDATA(a) ((a).rep()->reg.data)
  131. #define DDATA(a) ((a).rep()->reg.data)
  132.  
  133. void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
  134. {
  135.    assert(src.get());
  136.    code[pos / 32] |= SDATA(src).id << (pos % 32);
  137. }
  138.  
  139. void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
  140. {
  141.    assert(src->get());
  142.    code[pos / 32] |= SDATA(*src).id << (pos % 32);
  143. }
  144.  
  145. void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
  146. {
  147.    assert(src.get());
  148.  
  149.    int32_t offset = SDATA(src).offset;
  150.  
  151.    assert(!adj || src.get()->reg.size <= 4);
  152.    if (adj)
  153.       offset /= src.get()->reg.size;
  154.  
  155.    assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
  156.  
  157.    if (offset < 0)
  158.       offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
  159.  
  160.    code[pos / 32] |= offset << (pos % 32);
  161. }
  162.  
  163. void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
  164. {
  165.    assert(src.get());
  166.  
  167.    uint32_t offset = SDATA(src).offset;
  168.  
  169.    assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
  170.  
  171.    code[pos / 32] |= (offset >> 2) << (pos % 32);
  172. }
  173.  
  174. void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
  175. {
  176.    assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
  177.  
  178.    code[pos / 32] |= DDATA(def).id << (pos % 32);
  179. }
  180.  
  181. void
  182. CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
  183. {
  184.    switch (insn->rnd) {
  185.    case ROUND_M: code[1] |= 1 << 22; break;
  186.    case ROUND_P: code[1] |= 2 << 22; break;
  187.    case ROUND_Z: code[1] |= 3 << 22; break;
  188.    default:
  189.       assert(insn->rnd == ROUND_N);
  190.       break;
  191.    }
  192. }
  193.  
  194. void
  195. CodeEmitterNV50::emitMNeg12(const Instruction *i)
  196. {
  197.    code[1] |= i->src(0).mod.neg() << 26;
  198.    code[1] |= i->src(1).mod.neg() << 27;
  199. }
  200.  
  201. void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
  202. {
  203.    uint8_t enc;
  204.  
  205.    assert(pos >= 32 || pos <= 27);
  206.  
  207.    switch (cc) {
  208.    case CC_LT:  enc = 0x1; break;
  209.    case CC_LTU: enc = 0x9; break;
  210.    case CC_EQ:  enc = 0x2; break;
  211.    case CC_EQU: enc = 0xa; break;
  212.    case CC_LE:  enc = 0x3; break;
  213.    case CC_LEU: enc = 0xb; break;
  214.    case CC_GT:  enc = 0x4; break;
  215.    case CC_GTU: enc = 0xc; break;
  216.    case CC_NE:  enc = 0x5; break;
  217.    case CC_NEU: enc = 0xd; break;
  218.    case CC_GE:  enc = 0x6; break;
  219.    case CC_GEU: enc = 0xe; break;
  220.    case CC_TR:  enc = 0xf; break;
  221.    case CC_FL:  enc = 0x0; break;
  222.  
  223.    case CC_O:  enc = 0x10; break;
  224.    case CC_C:  enc = 0x11; break;
  225.    case CC_A:  enc = 0x12; break;
  226.    case CC_S:  enc = 0x13; break;
  227.    case CC_NS: enc = 0x1c; break;
  228.    case CC_NA: enc = 0x1d; break;
  229.    case CC_NC: enc = 0x1e; break;
  230.    case CC_NO: enc = 0x1f; break;
  231.  
  232.    default:
  233.       enc = 0;
  234.       assert(!"invalid condition code");
  235.       break;
  236.    }
  237.    if (ty != TYPE_NONE && !isFloatType(ty))
  238.       enc &= ~0x8; // unordered only exists for float types
  239.  
  240.    code[pos / 32] |= enc << (pos % 32);
  241. }
  242.  
  243. void
  244. CodeEmitterNV50::emitFlagsRd(const Instruction *i)
  245. {
  246.    int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
  247.  
  248.    assert(!(code[1] & 0x00003f80));
  249.  
  250.    if (s >= 0) {
  251.       assert(i->getSrc(s)->reg.file == FILE_FLAGS);
  252.       emitCondCode(i->cc, TYPE_NONE, 32 + 7);
  253.       srcId(i->src(s), 32 + 12);
  254.    } else {
  255.       code[1] |= 0x0780;
  256.    }
  257. }
  258.  
  259. void
  260. CodeEmitterNV50::emitFlagsWr(const Instruction *i)
  261. {
  262.    assert(!(code[1] & 0x70));
  263.  
  264.    int flagsDef = i->flagsDef;
  265.  
  266.    // find flags definition and check that it is the last def
  267.    if (flagsDef < 0) {
  268.       for (int d = 0; i->defExists(d); ++d)
  269.          if (i->def(d).getFile() == FILE_FLAGS)
  270.             flagsDef = d;
  271.       if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
  272.          WARN("Instruction::flagsDef was not set properly\n");
  273.    }
  274.    if (flagsDef == 0 && i->defExists(1))
  275.       WARN("flags def should not be the primary definition\n");
  276.  
  277.    if (flagsDef >= 0)
  278.       code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
  279.  
  280. }
  281.  
  282. void
  283. CodeEmitterNV50::setARegBits(unsigned int u)
  284. {
  285.    code[0] |= (u & 3) << 26;
  286.    code[1] |= (u & 4);
  287. }
  288.  
  289. void
  290. CodeEmitterNV50::setAReg16(const Instruction *i, int s)
  291. {
  292.    if (i->srcExists(s)) {
  293.       s = i->src(s).indirect[0];
  294.       if (s >= 0)
  295.          setARegBits(SDATA(i->src(s)).id + 1);
  296.    }
  297. }
  298.  
  299. void
  300. CodeEmitterNV50::setImmediate(const Instruction *i, int s)
  301. {
  302.    const ImmediateValue *imm = i->src(s).get()->asImm();
  303.    assert(imm);
  304.  
  305.    uint32_t u = imm->reg.data.u32;
  306.  
  307.    if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
  308.       u = ~u;
  309.  
  310.    code[1] |= 3;
  311.    code[0] |= (u & 0x3f) << 16;
  312.    code[1] |= (u >> 6) << 2;
  313. }
  314.  
  315. void
  316. CodeEmitterNV50::setDst(const Value *dst)
  317. {
  318.    const Storage *reg = &dst->join->reg;
  319.  
  320.    assert(reg->file != FILE_ADDRESS);
  321.  
  322.    if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
  323.       code[0] |= (127 << 2) | 1;
  324.       code[1] |= 8;
  325.    } else {
  326.       int id;
  327.       if (reg->file == FILE_SHADER_OUTPUT) {
  328.          code[1] |= 8;
  329.          id = reg->data.offset / 4;
  330.       } else {
  331.          id = reg->data.id;
  332.       }
  333.       code[0] |= id << 2;
  334.    }
  335. }
  336.  
  337. void
  338. CodeEmitterNV50::setDst(const Instruction *i, int d)
  339. {
  340.    if (i->defExists(d)) {
  341.       setDst(i->getDef(d));
  342.    } else
  343.    if (!d) {
  344.       code[0] |= 0x01fc; // bit bucket
  345.       code[1] |= 0x0008;
  346.    }
  347. }
  348.  
  349. // 3 * 2 bits:
  350. // 0: r
  351. // 1: a/s
  352. // 2: c
  353. // 3: i
  354. void
  355. CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
  356. {
  357.    uint8_t mode = 0;
  358.  
  359.    for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
  360.       switch (i->src(s).getFile()) {
  361.       case FILE_GPR:
  362.          break;
  363.       case FILE_MEMORY_SHARED:
  364.       case FILE_SHADER_INPUT:
  365.          mode |= 1 << (s * 2);
  366.          break;
  367.       case FILE_MEMORY_CONST:
  368.          mode |= 2 << (s * 2);
  369.          break;
  370.       case FILE_IMMEDIATE:
  371.          mode |= 3 << (s * 2);
  372.          break;
  373.       default:
  374.               ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
  375.          assert(0);
  376.          break;
  377.       }
  378.    }
  379.    switch (mode) {
  380.    case 0x00: // rrr
  381.       break;
  382.    case 0x01: // arr/grr
  383.       if (progType == Program::TYPE_GEOMETRY) {
  384.          code[0] |= 0x01800000;
  385.          if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
  386.             code[1] |= 0x00200000;
  387.       } else {
  388.          if (enc == NV50_OP_ENC_SHORT)
  389.             code[0] |= 0x01000000;
  390.          else
  391.             code[1] |= 0x00200000;
  392.       }
  393.       break;
  394.    case 0x03: // irr
  395.       assert(i->op == OP_MOV);
  396.       return;
  397.    case 0x0c: // rir
  398.       break;
  399.    case 0x0d: // gir
  400.       code[0] |= 0x01000000;
  401.       assert(progType == Program::TYPE_GEOMETRY ||
  402.              progType == Program::TYPE_COMPUTE);
  403.       break;
  404.    case 0x08: // rcr
  405.       code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
  406.       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
  407.       break;
  408.    case 0x09: // acr/gcr
  409.       if (progType == Program::TYPE_GEOMETRY) {
  410.          code[0] |= 0x01800000;
  411.       } else {
  412.          code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
  413.          code[1] |= 0x00200000;
  414.       }
  415.       code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
  416.       break;
  417.    case 0x20: // rrc
  418.       code[0] |= 0x01000000;
  419.       code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
  420.       break;
  421.    case 0x21: // arc
  422.       code[0] |= 0x01000000;
  423.       code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
  424.       assert(progType != Program::TYPE_GEOMETRY);
  425.       break;
  426.    default:
  427.       ERROR("not encodable: %x\n", mode);
  428.       assert(0);
  429.       break;
  430.    }
  431.    if (progType != Program::TYPE_COMPUTE)
  432.       return;
  433.  
  434.    if ((mode & 3) == 1) {
  435.       const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
  436.  
  437.       switch (i->getSrc(0)->reg.type) {
  438.       case TYPE_U8:
  439.          break;
  440.       case TYPE_U16:
  441.          code[0] |= 1 << pos;
  442.          break;
  443.       case TYPE_S16:
  444.          code[0] |= 2 << pos;
  445.          break;
  446.       default:
  447.          code[0] |= 3 << pos;
  448.          assert(i->getSrc(0)->reg.size == 4);
  449.          break;
  450.       }
  451.    }
  452. }
  453.  
  454. void
  455. CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
  456. {
  457.    if (Target::operationSrcNr[i->op] <= s)
  458.       return;
  459.    const Storage *reg = &i->src(s).rep()->reg;
  460.  
  461.    unsigned int id = (reg->file == FILE_GPR) ?
  462.       reg->data.id :
  463.       reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
  464.  
  465.    switch (slot) {
  466.    case 0: code[0] |= id << 9; break;
  467.    case 1: code[0] |= id << 16; break;
  468.    case 2: code[1] |= id << 14; break;
  469.    default:
  470.       assert(0);
  471.       break;
  472.    }
  473. }
  474.  
  475. // the default form:
  476. //  - long instruction
  477. //  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
  478. //  - address & flags
  479. void
  480. CodeEmitterNV50::emitForm_MAD(const Instruction *i)
  481. {
  482.    assert(i->encSize == 8);
  483.    code[0] |= 1;
  484.  
  485.    emitFlagsRd(i);
  486.    emitFlagsWr(i);
  487.  
  488.    setDst(i, 0);
  489.  
  490.    setSrcFileBits(i, NV50_OP_ENC_LONG);
  491.    setSrc(i, 0, 0);
  492.    setSrc(i, 1, 1);
  493.    setSrc(i, 2, 2);
  494.  
  495.    setAReg16(i, 1);
  496. }
  497.  
  498. // like default form, but 2nd source in slot 2, and no 3rd source
  499. void
  500. CodeEmitterNV50::emitForm_ADD(const Instruction *i)
  501. {
  502.    assert(i->encSize == 8);
  503.    code[0] |= 1;
  504.  
  505.    emitFlagsRd(i);
  506.    emitFlagsWr(i);
  507.  
  508.    setDst(i, 0);
  509.  
  510.    setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
  511.    setSrc(i, 0, 0);
  512.    setSrc(i, 1, 2);
  513.  
  514.    setAReg16(i, 1);
  515. }
  516.  
  517. // default short form (rr, ar, rc, gr)
  518. void
  519. CodeEmitterNV50::emitForm_MUL(const Instruction *i)
  520. {
  521.    assert(i->encSize == 4 && !(code[0] & 1));
  522.    assert(i->defExists(0));
  523.    assert(!i->getPredicate());
  524.  
  525.    setDst(i, 0);
  526.  
  527.    setSrcFileBits(i, NV50_OP_ENC_SHORT);
  528.    setSrc(i, 0, 0);
  529.    setSrc(i, 1, 1);
  530. }
  531.  
  532. // usual immediate form
  533. // - 1 to 3 sources where last is immediate (rir, gir)
  534. // - no address or predicate possible
  535. void
  536. CodeEmitterNV50::emitForm_IMM(const Instruction *i)
  537. {
  538.    assert(i->encSize == 8);
  539.    code[0] |= 1;
  540.  
  541.    assert(i->defExists(0) && i->srcExists(0));
  542.  
  543.    setDst(i, 0);
  544.  
  545.    setSrcFileBits(i, NV50_OP_ENC_IMM);
  546.    if (Target::operationSrcNr[i->op] > 1) {
  547.       setSrc(i, 0, 0);
  548.       setImmediate(i, 1);
  549.       setSrc(i, 2, 1);
  550.    } else {
  551.       setImmediate(i, 0);
  552.    }
  553. }
  554.  
  555. void
  556. CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
  557. {
  558.    uint8_t enc;
  559.  
  560.    switch (ty) {
  561.    case TYPE_F32: // fall through
  562.    case TYPE_S32: // fall through
  563.    case TYPE_U32:  enc = 0x6; break;
  564.    case TYPE_B128: enc = 0x5; break;
  565.    case TYPE_F64: // fall through
  566.    case TYPE_S64: // fall through
  567.    case TYPE_U64:  enc = 0x4; break;
  568.    case TYPE_S16:  enc = 0x3; break;
  569.    case TYPE_U16:  enc = 0x2; break;
  570.    case TYPE_S8:   enc = 0x1; break;
  571.    case TYPE_U8:   enc = 0x0; break;
  572.    default:
  573.       enc = 0;
  574.       assert(!"invalid load/store type");
  575.       break;
  576.    }
  577.    code[pos / 32] |= enc << (pos % 32);
  578. }
  579.  
  580. void
  581. CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
  582. {
  583.    switch (ty) {
  584.    case TYPE_U8: break;
  585.    case TYPE_U16: code[1] |= 0x4000; break;
  586.    case TYPE_S16: code[1] |= 0x8000; break;
  587.    case TYPE_F32:
  588.    case TYPE_S32:
  589.    case TYPE_U32: code[1] |= 0xc000; break;
  590.    default:
  591.       assert(0);
  592.       break;
  593.    }
  594. }
  595.  
  596. void
  597. CodeEmitterNV50::emitLOAD(const Instruction *i)
  598. {
  599.    DataFile sf = i->src(0).getFile();
  600.    int32_t offset = i->getSrc(0)->reg.data.offset;
  601.  
  602.    switch (sf) {
  603.    case FILE_SHADER_INPUT:
  604.       // use 'mov' where we can
  605.       code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
  606.       code[1] = 0x00200000 | (i->lanes << 14);
  607.       if (typeSizeof(i->dType) == 4)
  608.          code[1] |= 0x04000000;
  609.       break;
  610.    case FILE_MEMORY_SHARED:
  611.       if (targ->getChipset() >= 0x84) {
  612.          assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
  613.          code[0] = 0x10000001;
  614.          code[1] = 0x40000000;
  615.  
  616.          if (typeSizeof(i->dType) == 4)
  617.             code[1] |= 0x04000000;
  618.  
  619.          emitLoadStoreSizeCS(i->sType);
  620.       } else {
  621.          assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
  622.          code[0] = 0x10000001;
  623.          code[1] = 0x00200000 | (i->lanes << 14);
  624.          emitLoadStoreSizeCS(i->sType);
  625.       }
  626.       break;
  627.    case FILE_MEMORY_CONST:
  628.       code[0] = 0x10000001;
  629.       code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
  630.       if (typeSizeof(i->dType) == 4)
  631.          code[1] |= 0x04000000;
  632.       emitLoadStoreSizeCS(i->sType);
  633.       break;
  634.    case FILE_MEMORY_LOCAL:
  635.       code[0] = 0xd0000001;
  636.       code[1] = 0x40000000;
  637.       break;
  638.    case FILE_MEMORY_GLOBAL:
  639.       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
  640.       code[1] = 0x80000000;
  641.       break;
  642.    default:
  643.       assert(!"invalid load source file");
  644.       break;
  645.    }
  646.    if (sf == FILE_MEMORY_LOCAL ||
  647.        sf == FILE_MEMORY_GLOBAL)
  648.       emitLoadStoreSizeLG(i->sType, 21 + 32);
  649.  
  650.    setDst(i, 0);
  651.  
  652.    emitFlagsRd(i);
  653.    emitFlagsWr(i);
  654.  
  655.    if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
  656.       srcId(*i->src(0).getIndirect(0), 9);
  657.    } else {
  658.       setAReg16(i, 0);
  659.       srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
  660.    }
  661. }
  662.  
  663. void
  664. CodeEmitterNV50::emitSTORE(const Instruction *i)
  665. {
  666.    DataFile f = i->getSrc(0)->reg.file;
  667.    int32_t offset = i->getSrc(0)->reg.data.offset;
  668.  
  669.    switch (f) {
  670.    case FILE_SHADER_OUTPUT:
  671.       code[0] = 0x00000001 | ((offset >> 2) << 9);
  672.       code[1] = 0x80c00000;
  673.       srcId(i->src(1), 32 + 14);
  674.       break;
  675.    case FILE_MEMORY_GLOBAL:
  676.       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
  677.       code[1] = 0xa0000000;
  678.       emitLoadStoreSizeLG(i->dType, 21 + 32);
  679.       srcId(i->src(1), 2);
  680.       break;
  681.    case FILE_MEMORY_LOCAL:
  682.       code[0] = 0xd0000001;
  683.       code[1] = 0x60000000;
  684.       emitLoadStoreSizeLG(i->dType, 21 + 32);
  685.       srcId(i->src(1), 2);
  686.       break;
  687.    case FILE_MEMORY_SHARED:
  688.       code[0] = 0x00000001;
  689.       code[1] = 0xe0000000;
  690.       switch (typeSizeof(i->dType)) {
  691.       case 1:
  692.          code[0] |= offset << 9;
  693.          code[1] |= 0x00400000;
  694.          break;
  695.       case 2:
  696.          code[0] |= (offset >> 1) << 9;
  697.          break;
  698.       case 4:
  699.          code[0] |= (offset >> 2) << 9;
  700.          code[1] |= 0x04200000;
  701.          break;
  702.       default:
  703.          assert(0);
  704.          break;
  705.       }
  706.       srcId(i->src(1), 32 + 14);
  707.       break;
  708.    default:
  709.       assert(!"invalid store destination file");
  710.       break;
  711.    }
  712.  
  713.    if (f == FILE_MEMORY_GLOBAL)
  714.       srcId(*i->src(0).getIndirect(0), 9);
  715.    else
  716.       setAReg16(i, 0);
  717.  
  718.    if (f == FILE_MEMORY_LOCAL)
  719.       srcAddr16(i->src(0), false, 9);
  720.  
  721.    emitFlagsRd(i);
  722. }
  723.  
  724. void
  725. CodeEmitterNV50::emitMOV(const Instruction *i)
  726. {
  727.    DataFile sf = i->getSrc(0)->reg.file;
  728.    DataFile df = i->getDef(0)->reg.file;
  729.  
  730.    assert(sf == FILE_GPR || df == FILE_GPR);
  731.  
  732.    if (sf == FILE_FLAGS) {
  733.       code[0] = 0x00000001;
  734.       code[1] = 0x20000000;
  735.       defId(i->def(0), 2);
  736.       srcId(i->src(0), 12);
  737.       emitFlagsRd(i);
  738.    } else
  739.    if (sf == FILE_ADDRESS) {
  740.       code[0] = 0x00000001;
  741.       code[1] = 0x40000000;
  742.       defId(i->def(0), 2);
  743.       setARegBits(SDATA(i->src(0)).id + 1);
  744.       emitFlagsRd(i);
  745.    } else
  746.    if (df == FILE_FLAGS) {
  747.       code[0] = 0x00000001;
  748.       code[1] = 0xa0000000;
  749.       defId(i->def(0), 4);
  750.       srcId(i->src(0), 9);
  751.       emitFlagsRd(i);
  752.    } else
  753.    if (sf == FILE_IMMEDIATE) {
  754.       code[0] = 0x10008001;
  755.       code[1] = 0x00000003;
  756.       emitForm_IMM(i);
  757.    } else {
  758.       if (i->encSize == 4) {
  759.          code[0] = 0x10008000;
  760.       } else {
  761.          code[0] = 0x10000001;
  762.          code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
  763.          code[1] |= (i->lanes << 14);
  764.          emitFlagsRd(i);
  765.       }
  766.       defId(i->def(0), 2);
  767.       srcId(i->src(0), 9);
  768.    }
  769.    if (df == FILE_SHADER_OUTPUT) {
  770.       assert(i->encSize == 8);
  771.       code[1] |= 0x8;
  772.    }
  773. }
  774.  
  775. void
  776. CodeEmitterNV50::emitNOP()
  777. {
  778.    code[0] = 0xf0000001;
  779.    code[1] = 0xe0000000;
  780. }
  781.  
  782. void
  783. CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
  784. {
  785.    code[0] = 0xc0000000 | (lane << 16);
  786.    code[1] = 0x80000000;
  787.  
  788.    code[0] |= (quOp & 0x03) << 20;
  789.    code[1] |= (quOp & 0xfc) << 20;
  790.  
  791.    emitForm_ADD(i);
  792.  
  793.    if (!i->srcExists(1))
  794.       srcId(i->src(0), 32 + 14);
  795. }
  796.  
  797. void
  798. CodeEmitterNV50::emitPFETCH(const Instruction *i)
  799. {
  800.    code[0] = 0x11800001;
  801.    code[1] = 0x04200000 | (0xf << 14);
  802.  
  803.    defId(i->def(0), 2);
  804.    srcAddr8(i->src(0), 9);
  805.    setAReg16(i, 0);
  806. }
  807.  
  808. void
  809. CodeEmitterNV50::emitINTERP(const Instruction *i)
  810. {
  811.    code[0] = 0x80000000;
  812.  
  813.    defId(i->def(0), 2);
  814.    srcAddr8(i->src(0), 16);
  815.  
  816.    if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
  817.       code[0] |= 1 << 8;
  818.    } else {
  819.       if (i->op == OP_PINTERP) {
  820.          code[0] |= 1 << 25;
  821.          srcId(i->src(1), 9);
  822.       }
  823.       if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
  824.          code[0] |= 1 << 24;
  825.    }
  826.  
  827.    if (i->encSize == 8) {
  828.       code[1] =
  829.          (code[0] & (3 << 24)) >> (24 - 16) |
  830.          (code[0] & (1 <<  8)) << (18 -  8);
  831.       code[0] &= ~0x03000100;
  832.       code[0] |= 1;
  833.       emitFlagsRd(i);
  834.    }
  835. }
  836.  
  837. void
  838. CodeEmitterNV50::emitMINMAX(const Instruction *i)
  839. {
  840.    if (i->dType == TYPE_F64) {
  841.       code[0] = 0xe0000000;
  842.       code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
  843.    } else {
  844.       code[0] = 0x30000000;
  845.       code[1] = 0x80000000;
  846.       if (i->op == OP_MIN)
  847.          code[1] |= 0x20000000;
  848.  
  849.       switch (i->dType) {
  850.       case TYPE_F32: code[0] |= 0x80000000; break;
  851.       case TYPE_S32: code[1] |= 0x8c000000; break;
  852.       case TYPE_U32: code[1] |= 0x84000000; break;
  853.       case TYPE_S16: code[1] |= 0x80000000; break;
  854.       case TYPE_U16: break;
  855.       default:
  856.          assert(0);
  857.          break;
  858.       }
  859.       code[1] |= i->src(0).mod.abs() << 20;
  860.       code[1] |= i->src(1).mod.abs() << 19;
  861.    }
  862.    emitForm_MAD(i);
  863. }
  864.  
  865. void
  866. CodeEmitterNV50::emitFMAD(const Instruction *i)
  867. {
  868.    const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
  869.    const int neg_add = i->src(2).mod.neg();
  870.  
  871.    code[0] = 0xe0000000;
  872.  
  873.    if (i->encSize == 4) {
  874.       emitForm_MUL(i);
  875.       assert(!neg_mul && !neg_add);
  876.    } else {
  877.       code[1]  = neg_mul << 26;
  878.       code[1] |= neg_add << 27;
  879.       if (i->saturate)
  880.          code[1] |= 1 << 29;
  881.       emitForm_MAD(i);
  882.    }
  883. }
  884.  
  885. void
  886. CodeEmitterNV50::emitFADD(const Instruction *i)
  887. {
  888.    const int neg0 = i->src(0).mod.neg();
  889.    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
  890.  
  891.    code[0] = 0xb0000000;
  892.  
  893.    assert(!(i->src(0).mod | i->src(1).mod).abs());
  894.  
  895.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  896.       code[1] = 0;
  897.       emitForm_IMM(i);
  898.       code[0] |= neg0 << 15;
  899.       code[0] |= neg1 << 22;
  900.       if (i->saturate)
  901.          code[0] |= 1 << 8;
  902.    } else
  903.    if (i->encSize == 8) {
  904.       code[1] = 0;
  905.       emitForm_ADD(i);
  906.       code[1] |= neg0 << 26;
  907.       code[1] |= neg1 << 27;
  908.       if (i->saturate)
  909.          code[1] |= 1 << 29;
  910.    } else {
  911.       emitForm_MUL(i);
  912.       code[0] |= neg0 << 15;
  913.       code[0] |= neg1 << 22;
  914.       if (i->saturate)
  915.          code[0] |= 1 << 8;
  916.    }
  917. }
  918.  
  919. void
  920. CodeEmitterNV50::emitUADD(const Instruction *i)
  921. {
  922.    const int neg0 = i->src(0).mod.neg();
  923.    const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
  924.  
  925.    code[0] = 0x20008000;
  926.  
  927.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  928.       code[1] = 0;
  929.       emitForm_IMM(i);
  930.    } else
  931.    if (i->encSize == 8) {
  932.       code[0] = 0x20000000;
  933.       code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
  934.       emitForm_ADD(i);
  935.    } else {
  936.       emitForm_MUL(i);
  937.    }
  938.    assert(!(neg0 && neg1));
  939.    code[0] |= neg0 << 28;
  940.    code[0] |= neg1 << 22;
  941.  
  942.    if (i->flagsSrc >= 0) {
  943.       // addc == sub | subr
  944.       assert(!(code[0] & 0x10400000) && !i->getPredicate());
  945.       code[0] |= 0x10400000;
  946.       srcId(i->src(i->flagsSrc), 32 + 12);
  947.    }
  948. }
  949.  
  950. void
  951. CodeEmitterNV50::emitAADD(const Instruction *i)
  952. {
  953.    const int s = (i->op == OP_MOV) ? 0 : 1;
  954.  
  955.    code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
  956.    code[1] = 0x20000000;
  957.  
  958.    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
  959.  
  960.    emitFlagsRd(i);
  961.  
  962.    if (s && i->srcExists(0))
  963.       setARegBits(SDATA(i->src(0)).id + 1);
  964. }
  965.  
  966. void
  967. CodeEmitterNV50::emitIMUL(const Instruction *i)
  968. {
  969.    code[0] = 0x40000000;
  970.  
  971.    if (i->encSize == 8) {
  972.       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
  973.       emitForm_MAD(i);
  974.    } else {
  975.       if (i->sType == TYPE_S16)
  976.          code[0] |= 0x8100;
  977.       emitForm_MUL(i);
  978.    }
  979. }
  980.  
  981. void
  982. CodeEmitterNV50::emitFMUL(const Instruction *i)
  983. {
  984.    const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
  985.  
  986.    code[0] = 0xc0000000;
  987.  
  988.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  989.       code[1] = 0;
  990.       emitForm_IMM(i);
  991.       if (neg)
  992.          code[0] |= 0x8000;
  993.    } else
  994.    if (i->encSize == 8) {
  995.       code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
  996.       if (neg)
  997.          code[1] |= 0x08000000;
  998.       emitForm_MAD(i);
  999.    } else {
  1000.       emitForm_MUL(i);
  1001.       if (neg)
  1002.          code[0] |= 0x8000;
  1003.    }
  1004. }
  1005.  
  1006. void
  1007. CodeEmitterNV50::emitIMAD(const Instruction *i)
  1008. {
  1009.    code[0] = 0x60000000;
  1010.    if (isSignedType(i->sType))
  1011.       code[1] = i->saturate ? 0x40000000 : 0x20000000;
  1012.    else
  1013.       code[1] = 0x00000000;
  1014.  
  1015.    int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
  1016.    int neg2 = i->src(2).mod.neg();
  1017.  
  1018.    assert(!(neg1 & neg2));
  1019.    code[1] |= neg1 << 27;
  1020.    code[1] |= neg2 << 26;
  1021.  
  1022.    emitForm_MAD(i);
  1023.  
  1024.    if (i->flagsSrc >= 0) {
  1025.       // add with carry from $cX
  1026.       assert(!(code[1] & 0x0c000000) && !i->getPredicate());
  1027.       code[1] |= 0xc << 24;
  1028.       srcId(i->src(i->flagsSrc), 32 + 12);
  1029.    }
  1030. }
  1031.  
  1032. void
  1033. CodeEmitterNV50::emitISAD(const Instruction *i)
  1034. {
  1035.    if (i->encSize == 8) {
  1036.       code[0] = 0x50000000;
  1037.       switch (i->sType) {
  1038.       case TYPE_U32: code[1] = 0x04000000; break;
  1039.       case TYPE_S32: code[1] = 0x0c000000; break;
  1040.       case TYPE_U16: code[1] = 0x00000000; break;
  1041.       case TYPE_S16: code[1] = 0x08000000; break;
  1042.       default:
  1043.          assert(0);
  1044.          break;
  1045.       }
  1046.       emitForm_MAD(i);
  1047.    } else {
  1048.       switch (i->sType) {
  1049.       case TYPE_U32: code[0] = 0x50008000; break;
  1050.       case TYPE_S32: code[0] = 0x50008100; break;
  1051.       case TYPE_U16: code[0] = 0x50000000; break;
  1052.       case TYPE_S16: code[0] = 0x50000100; break;
  1053.       default:
  1054.          assert(0);
  1055.          break;
  1056.       }
  1057.       emitForm_MUL(i);
  1058.    }
  1059. }
  1060.  
  1061. void
  1062. CodeEmitterNV50::emitSET(const Instruction *i)
  1063. {
  1064.    code[0] = 0x30000000;
  1065.    code[1] = 0x60000000;
  1066.  
  1067.    emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
  1068.  
  1069.    switch (i->sType) {
  1070.    case TYPE_F32: code[0] |= 0x80000000; break;
  1071.    case TYPE_S32: code[1] |= 0x0c000000; break;
  1072.    case TYPE_U32: code[1] |= 0x04000000; break;
  1073.    case TYPE_S16: code[1] |= 0x08000000; break;
  1074.    case TYPE_U16: break;
  1075.    default:
  1076.       assert(0);
  1077.       break;
  1078.    }
  1079.    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
  1080.    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
  1081.    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
  1082.    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
  1083.  
  1084.    emitForm_MAD(i);
  1085. }
  1086.  
  1087. void
  1088. CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
  1089. {
  1090.    switch (rnd) {
  1091.    case ROUND_NI: code[1] |= 0x08000000; break;
  1092.    case ROUND_M:  code[1] |= 0x00020000; break;
  1093.    case ROUND_MI: code[1] |= 0x08020000; break;
  1094.    case ROUND_P:  code[1] |= 0x00040000; break;
  1095.    case ROUND_PI: code[1] |= 0x08040000; break;
  1096.    case ROUND_Z:  code[1] |= 0x00060000; break;
  1097.    case ROUND_ZI: code[1] |= 0x08060000; break;
  1098.    default:
  1099.       assert(rnd == ROUND_N);
  1100.       break;
  1101.    }
  1102. }
  1103.  
  1104. void
  1105. CodeEmitterNV50::emitCVT(const Instruction *i)
  1106. {
  1107.    const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
  1108.    RoundMode rnd;
  1109.  
  1110.    switch (i->op) {
  1111.    case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
  1112.    case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
  1113.    case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
  1114.    default:
  1115.       rnd = i->rnd;
  1116.       break;
  1117.    }
  1118.  
  1119.    code[0] = 0xa0000000;
  1120.  
  1121.    switch (i->dType) {
  1122.    case TYPE_F64:
  1123.       switch (i->sType) {
  1124.       case TYPE_F64: code[1] = 0xc4404000; break;
  1125.       case TYPE_S64: code[1] = 0x44414000; break;
  1126.       case TYPE_U64: code[1] = 0x44404000; break;
  1127.       case TYPE_F32: code[1] = 0xc4400000; break;
  1128.       case TYPE_S32: code[1] = 0x44410000; break;
  1129.       case TYPE_U32: code[1] = 0x44400000; break;
  1130.       default:
  1131.          assert(0);
  1132.          break;
  1133.       }
  1134.       break;
  1135.    case TYPE_S64:
  1136.       switch (i->sType) {
  1137.       case TYPE_F64: code[1] = 0x8c404000; break;
  1138.       case TYPE_F32: code[1] = 0x8c400000; break;
  1139.       default:
  1140.          assert(0);
  1141.          break;
  1142.       }
  1143.       break;
  1144.    case TYPE_U64:
  1145.       switch (i->sType) {
  1146.       case TYPE_F64: code[1] = 0x84404000; break;
  1147.       case TYPE_F32: code[1] = 0x84400000; break;
  1148.       default:
  1149.          assert(0);
  1150.          break;
  1151.       }
  1152.       break;
  1153.    case TYPE_F32:
  1154.       switch (i->sType) {
  1155.       case TYPE_F64: code[1] = 0xc0404000; break;
  1156.       case TYPE_S64: code[1] = 0x40414000; break;
  1157.       case TYPE_U64: code[1] = 0x40404000; break;
  1158.       case TYPE_F32: code[1] = 0xc4004000; break;
  1159.       case TYPE_S32: code[1] = 0x44014000; break;
  1160.       case TYPE_U32: code[1] = 0x44004000; break;
  1161.       case TYPE_F16: code[1] = 0xc4000000; break;
  1162.       default:
  1163.          assert(0);
  1164.          break;
  1165.       }
  1166.       break;
  1167.    case TYPE_S32:
  1168.       switch (i->sType) {
  1169.       case TYPE_F64: code[1] = 0x88404000; break;
  1170.       case TYPE_F32: code[1] = 0x8c004000; break;
  1171.       case TYPE_S32: code[1] = 0x0c014000; break;
  1172.       case TYPE_U32: code[1] = 0x0c004000; break;
  1173.       case TYPE_F16: code[1] = 0x8c000000; break;
  1174.       case TYPE_S16: code[1] = 0x0c010000; break;
  1175.       case TYPE_U16: code[1] = 0x0c000000; break;
  1176.       case TYPE_S8:  code[1] = 0x0c018000; break;
  1177.       case TYPE_U8:  code[1] = 0x0c008000; break;
  1178.       default:
  1179.          assert(0);
  1180.          break;
  1181.       }
  1182.       break;
  1183.    case TYPE_U32:
  1184.       switch (i->sType) {
  1185.       case TYPE_F64: code[1] = 0x80404000; break;
  1186.       case TYPE_F32: code[1] = 0x84004000; break;
  1187.       case TYPE_S32: code[1] = 0x04014000; break;
  1188.       case TYPE_U32: code[1] = 0x04004000; break;
  1189.       case TYPE_F16: code[1] = 0x84000000; break;
  1190.       case TYPE_S16: code[1] = 0x04010000; break;
  1191.       case TYPE_U16: code[1] = 0x04000000; break;
  1192.       case TYPE_S8:  code[1] = 0x04018000; break;
  1193.       case TYPE_U8:  code[1] = 0x04008000; break;
  1194.       default:
  1195.          assert(0);
  1196.          break;
  1197.       }
  1198.       break;
  1199.    case TYPE_S16:
  1200.    case TYPE_U16:
  1201.    case TYPE_S8:
  1202.    case TYPE_U8:
  1203.    default:
  1204.       assert(0);
  1205.       break;
  1206.    }
  1207.    if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
  1208.       code[1] |= 0x00004000;
  1209.  
  1210.    roundMode_CVT(rnd);
  1211.  
  1212.    switch (i->op) {
  1213.    case OP_ABS: code[1] |= 1 << 20; break;
  1214.    case OP_SAT: code[1] |= 1 << 19; break;
  1215.    case OP_NEG: code[1] |= 1 << 29; break;
  1216.    default:
  1217.       break;
  1218.    }
  1219.    code[1] ^= i->src(0).mod.neg() << 29;
  1220.    code[1] |= i->src(0).mod.abs() << 20;
  1221.    if (i->saturate)
  1222.       code[1] |= 1 << 19;
  1223.  
  1224.    assert(i->op != OP_ABS || !i->src(0).mod.neg());
  1225.  
  1226.    emitForm_MAD(i);
  1227. }
  1228.  
  1229. void
  1230. CodeEmitterNV50::emitPreOp(const Instruction *i)
  1231. {
  1232.    code[0] = 0xb0000000;
  1233.    code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
  1234.  
  1235.    code[1] |= i->src(0).mod.abs() << 20;
  1236.    code[1] |= i->src(0).mod.neg() << 26;
  1237.  
  1238.    emitForm_MAD(i);
  1239. }
  1240.  
  1241. void
  1242. CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
  1243. {
  1244.    code[0] = 0x90000000;
  1245.  
  1246.    if (i->encSize == 4) {
  1247.       assert(i->op == OP_RCP);
  1248.       code[0] |= i->src(0).mod.abs() << 15;
  1249.       code[0] |= i->src(0).mod.neg() << 22;
  1250.       emitForm_MUL(i);
  1251.    } else {
  1252.       code[1] = subOp << 29;
  1253.       code[1] |= i->src(0).mod.abs() << 20;
  1254.       code[1] |= i->src(0).mod.neg() << 26;
  1255.       emitForm_MAD(i);
  1256.    }
  1257. }
  1258.  
  1259. void
  1260. CodeEmitterNV50::emitNOT(const Instruction *i)
  1261. {
  1262.    code[0] = 0xd0000000;
  1263.    code[1] = 0x0002c000;
  1264.  
  1265.    switch (i->sType) {
  1266.    case TYPE_U32:
  1267.    case TYPE_S32:
  1268.       code[1] |= 0x04000000;
  1269.       break;
  1270.    default:
  1271.       break;
  1272.    }
  1273.    emitForm_MAD(i);
  1274.    setSrc(i, 0, 1);
  1275. }
  1276.  
  1277. void
  1278. CodeEmitterNV50::emitLogicOp(const Instruction *i)
  1279. {
  1280.    code[0] = 0xd0000000;
  1281.    code[1] = 0;
  1282.  
  1283.    if (i->src(1).getFile() == FILE_IMMEDIATE) {
  1284.       switch (i->op) {
  1285.       case OP_OR:  code[0] |= 0x0100; break;
  1286.       case OP_XOR: code[0] |= 0x8000; break;
  1287.       default:
  1288.          assert(i->op == OP_AND);
  1289.          break;
  1290.       }
  1291.       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
  1292.          code[0] |= 1 << 22;
  1293.  
  1294.       emitForm_IMM(i);
  1295.    } else {
  1296.       switch (i->op) {
  1297.       case OP_AND: code[1] = 0x04000000; break;
  1298.       case OP_OR:  code[1] = 0x04004000; break;
  1299.       case OP_XOR: code[1] = 0x04008000; break;
  1300.       default:
  1301.          assert(0);
  1302.          break;
  1303.       }
  1304.       if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
  1305.          code[1] |= 1 << 16;
  1306.       if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
  1307.          code[1] |= 1 << 17;
  1308.  
  1309.       emitForm_MAD(i);
  1310.    }
  1311. }
  1312.  
  1313. void
  1314. CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
  1315. {
  1316.    code[0] = 0x00000001 | (shl << 16);
  1317.    code[1] = 0xc0000000;
  1318.  
  1319.    code[0] |= (DDATA(i->def(0)).id + 1) << 2;
  1320.  
  1321.    setSrcFileBits(i, NV50_OP_ENC_IMM);
  1322.    setSrc(i, 0, 0);
  1323.    emitFlagsRd(i);
  1324. }
  1325.  
  1326. void
  1327. CodeEmitterNV50::emitShift(const Instruction *i)
  1328. {
  1329.    if (i->def(0).getFile() == FILE_ADDRESS) {
  1330.       assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
  1331.       emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
  1332.    } else {
  1333.       code[0] = 0x30000001;
  1334.       code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
  1335.       if (i->op == OP_SHR && isSignedType(i->sType))
  1336.           code[1] |= 1 << 27;
  1337.  
  1338.       if (i->src(1).getFile() == FILE_IMMEDIATE) {
  1339.          code[1] |= 1 << 20;
  1340.          code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
  1341.          defId(i->def(0), 2);
  1342.          srcId(i->src(0), 9);
  1343.          emitFlagsRd(i);
  1344.       } else {
  1345.          emitForm_MAD(i);
  1346.       }
  1347.    }
  1348. }
  1349.  
  1350. void
  1351. CodeEmitterNV50::emitOUT(const Instruction *i)
  1352. {
  1353.    code[0] = (i->op == OP_EMIT) ? 0xf0000200 : 0xf0000400;
  1354.    code[1] = 0xc0000001;
  1355.  
  1356.    emitFlagsRd(i);
  1357. }
  1358.  
  1359. void
  1360. CodeEmitterNV50::emitTEX(const TexInstruction *i)
  1361. {
  1362.    code[0] = 0xf0000001;
  1363.    code[1] = 0x00000000;
  1364.  
  1365.    switch (i->op) {
  1366.    case OP_TXB:
  1367.       code[1] = 0x20000000;
  1368.       break;
  1369.    case OP_TXL:
  1370.       code[1] = 0x40000000;
  1371.       break;
  1372.    case OP_TXF:
  1373.       code[0] |= 0x01000000;
  1374.       break;
  1375.    case OP_TXG:
  1376.       code[0] = 0x01000000;
  1377.       code[1] = 0x80000000;
  1378.       break;
  1379.    default:
  1380.       assert(i->op == OP_TEX);
  1381.       break;
  1382.    }
  1383.  
  1384.    code[0] |= i->tex.r << 9;
  1385.    code[0] |= i->tex.s << 17;
  1386.  
  1387.    int argc = i->tex.target.getArgCount();
  1388.  
  1389.    if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
  1390.       argc += 1;
  1391.    if (i->tex.target.isShadow())
  1392.       argc += 1;
  1393.    assert(argc <= 4);
  1394.  
  1395.    code[0] |= (argc - 1) << 22;
  1396.  
  1397.    if (i->tex.target.isCube()) {
  1398.       code[0] |= 0x08000000;
  1399.    } else
  1400.    if (i->tex.useOffsets) {
  1401.       code[1] |= (i->tex.offset[0][0] & 0xf) << 24;
  1402.       code[1] |= (i->tex.offset[0][1] & 0xf) << 20;
  1403.       code[1] |= (i->tex.offset[0][2] & 0xf) << 16;
  1404.    }
  1405.  
  1406.    code[0] |= (i->tex.mask & 0x3) << 25;
  1407.    code[1] |= (i->tex.mask & 0xc) << 12;
  1408.  
  1409.    if (i->tex.liveOnly)
  1410.       code[1] |= 4;
  1411.  
  1412.    defId(i->def(0), 2);
  1413.  
  1414.    emitFlagsRd(i);
  1415. }
  1416.  
  1417. void
  1418. CodeEmitterNV50::emitTXQ(const TexInstruction *i)
  1419. {
  1420.    assert(i->tex.query == TXQ_DIMS);
  1421.  
  1422.    code[0] = 0xf0000001;
  1423.    code[1] = 0x60000000;
  1424.  
  1425.    code[0] |= i->tex.r << 9;
  1426.    code[0] |= i->tex.s << 17;
  1427.  
  1428.    code[0] |= (i->tex.mask & 0x3) << 25;
  1429.    code[1] |= (i->tex.mask & 0xc) << 12;
  1430.  
  1431.    defId(i->def(0), 2);
  1432.  
  1433.    emitFlagsRd(i);
  1434. }
  1435.  
  1436. void
  1437. CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
  1438. {
  1439.    code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
  1440.    code[1] = 0x60010000;
  1441.  
  1442.    code[0] |= (i->tex.mask & 0x3) << 25;
  1443.    code[1] |= (i->tex.mask & 0xc) << 12;
  1444.    defId(i->def(0), 2);
  1445.  
  1446.    emitFlagsRd(i);
  1447. }
  1448.  
  1449. void
  1450. CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
  1451. {
  1452.    uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
  1453.  
  1454.    code[0] = 0x10000003; // bra
  1455.    code[1] = 0x00000780; // always
  1456.  
  1457.    switch (i->subOp) {
  1458.    case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
  1459.       break;
  1460.    case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
  1461.       pos += 8;
  1462.       break;
  1463.    default:
  1464.       assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
  1465.       code[0] = 0x20000003; // call
  1466.       code[1] = 0x00000000; // no predicate
  1467.       break;
  1468.    }
  1469.    addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
  1470.    addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
  1471. }
  1472.  
  1473. void
  1474. CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
  1475. {
  1476.    const FlowInstruction *f = i->asFlow();
  1477.    bool hasPred = false;
  1478.    bool hasTarg = false;
  1479.  
  1480.    code[0] = 0x00000003 | (flowOp << 28);
  1481.    code[1] = 0x00000000;
  1482.  
  1483.    switch (i->op) {
  1484.    case OP_BRA:
  1485.       hasPred = true;
  1486.       hasTarg = true;
  1487.       break;
  1488.    case OP_BREAK:
  1489.    case OP_BRKPT:
  1490.    case OP_DISCARD:
  1491.    case OP_RET:
  1492.       hasPred = true;
  1493.       break;
  1494.    case OP_CALL:
  1495.    case OP_PREBREAK:
  1496.    case OP_JOINAT:
  1497.       hasTarg = true;
  1498.       break;
  1499.    case OP_PRERET:
  1500.       hasTarg = true;
  1501.       if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
  1502.          emitPRERETEmu(f);
  1503.          return;
  1504.       }
  1505.       break;
  1506.    default:
  1507.       break;
  1508.    }
  1509.  
  1510.    if (hasPred)
  1511.       emitFlagsRd(i);
  1512.  
  1513.    if (hasTarg && f) {
  1514.       uint32_t pos;
  1515.  
  1516.       if (f->op == OP_CALL) {
  1517.          if (f->builtin) {
  1518.             pos = targNV50->getBuiltinOffset(f->target.builtin);
  1519.          } else {
  1520.             pos = f->target.fn->binPos;
  1521.          }
  1522.       } else {
  1523.          pos = f->target.bb->binPos;
  1524.       }
  1525.  
  1526.       code[0] |= ((pos >>  2) & 0xffff) << 11;
  1527.       code[1] |= ((pos >> 18) & 0x003f) << 14;
  1528.  
  1529.       RelocEntry::Type relocTy;
  1530.  
  1531.       relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
  1532.  
  1533.       addReloc(relocTy, 0, pos, 0x07fff800, 9);
  1534.       addReloc(relocTy, 1, pos, 0x000fc000, -4);
  1535.    }
  1536. }
  1537.  
  1538. void
  1539. CodeEmitterNV50::emitBAR(const Instruction *i)
  1540. {
  1541.    ImmediateValue *barId = i->getSrc(0)->asImm();
  1542.    assert(barId);
  1543.  
  1544.    code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
  1545.    code[1] = 0x00004000;
  1546.  
  1547.    if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
  1548.       code[0] |= 1 << 26;
  1549. }
  1550.  
  1551. void
  1552. CodeEmitterNV50::emitATOM(const Instruction *i)
  1553. {
  1554.    uint8_t subOp;
  1555.    switch (i->subOp) {
  1556.    case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
  1557.    case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
  1558.    case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
  1559.    case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
  1560.    case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
  1561.    case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
  1562.    case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
  1563.    case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
  1564.    case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
  1565.    case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
  1566.    default:
  1567.       assert(!"invalid subop");
  1568.       return;
  1569.    }
  1570.    code[0] = 0xd0000001;
  1571.    code[1] = 0xe0c00000 | (subOp << 2);
  1572.    if (isSignedType(i->dType))
  1573.       code[1] |= 1 << 21;
  1574.  
  1575.    // args
  1576.    emitFlagsRd(i);
  1577.    setDst(i, 0);
  1578.    setSrc(i, 1, 1);
  1579.    if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
  1580.       setSrc(i, 2, 2);
  1581.  
  1582.    // g[] pointer
  1583.    code[0] |= i->getSrc(0)->reg.fileIndex << 23;
  1584.    srcId(i->getIndirect(0, 0), 9);
  1585. }
  1586.  
  1587. bool
  1588. CodeEmitterNV50::emitInstruction(Instruction *insn)
  1589. {
  1590.    if (!insn->encSize) {
  1591.       ERROR("skipping unencodable instruction: "); insn->print();
  1592.       return false;
  1593.    } else
  1594.    if (codeSize + insn->encSize > codeSizeLimit) {
  1595.       ERROR("code emitter output buffer too small\n");
  1596.       return false;
  1597.    }
  1598.  
  1599.    if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
  1600.       INFO("EMIT: "); insn->print();
  1601.    }
  1602.  
  1603.    switch (insn->op) {
  1604.    case OP_MOV:
  1605.       emitMOV(insn);
  1606.       break;
  1607.    case OP_EXIT:
  1608.    case OP_NOP:
  1609.    case OP_JOIN:
  1610.       emitNOP();
  1611.       break;
  1612.    case OP_VFETCH:
  1613.    case OP_LOAD:
  1614.       emitLOAD(insn);
  1615.       break;
  1616.    case OP_EXPORT:
  1617.    case OP_STORE:
  1618.       emitSTORE(insn);
  1619.       break;
  1620.    case OP_PFETCH:
  1621.       emitPFETCH(insn);
  1622.       break;
  1623.    case OP_LINTERP:
  1624.    case OP_PINTERP:
  1625.       emitINTERP(insn);
  1626.       break;
  1627.    case OP_ADD:
  1628.    case OP_SUB:
  1629.       if (isFloatType(insn->dType))
  1630.          emitFADD(insn);
  1631.       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
  1632.          emitAADD(insn);
  1633.       else
  1634.          emitUADD(insn);
  1635.       break;
  1636.    case OP_MUL:
  1637.       if (isFloatType(insn->dType))
  1638.          emitFMUL(insn);
  1639.       else
  1640.          emitIMUL(insn);
  1641.       break;
  1642.    case OP_MAD:
  1643.    case OP_FMA:
  1644.       if (isFloatType(insn->dType))
  1645.          emitFMAD(insn);
  1646.       else
  1647.          emitIMAD(insn);
  1648.       break;
  1649.    case OP_SAD:
  1650.       emitISAD(insn);
  1651.       break;
  1652.    case OP_NOT:
  1653.       emitNOT(insn);
  1654.       break;
  1655.    case OP_AND:
  1656.    case OP_OR:
  1657.    case OP_XOR:
  1658.       emitLogicOp(insn);
  1659.       break;
  1660.    case OP_SHL:
  1661.    case OP_SHR:
  1662.       emitShift(insn);
  1663.       break;
  1664.    case OP_SET:
  1665.       emitSET(insn);
  1666.       break;
  1667.    case OP_MIN:
  1668.    case OP_MAX:
  1669.       emitMINMAX(insn);
  1670.       break;
  1671.    case OP_CEIL:
  1672.    case OP_FLOOR:
  1673.    case OP_TRUNC:
  1674.    case OP_ABS:
  1675.    case OP_NEG:
  1676.    case OP_SAT:
  1677.       emitCVT(insn);
  1678.       break;
  1679.    case OP_CVT:
  1680.       if (insn->def(0).getFile() == FILE_ADDRESS)
  1681.          emitARL(insn, 0);
  1682.       else
  1683.       if (insn->def(0).getFile() == FILE_FLAGS ||
  1684.           insn->src(0).getFile() == FILE_FLAGS ||
  1685.           insn->src(0).getFile() == FILE_ADDRESS)
  1686.          emitMOV(insn);
  1687.       else
  1688.          emitCVT(insn);
  1689.       break;
  1690.    case OP_RCP:
  1691.       emitSFnOp(insn, 0);
  1692.       break;
  1693.    case OP_RSQ:
  1694.       emitSFnOp(insn, 2);
  1695.       break;
  1696.    case OP_LG2:
  1697.       emitSFnOp(insn, 3);
  1698.       break;
  1699.    case OP_SIN:
  1700.       emitSFnOp(insn, 4);
  1701.       break;
  1702.    case OP_COS:
  1703.       emitSFnOp(insn, 5);
  1704.       break;
  1705.    case OP_EX2:
  1706.       emitSFnOp(insn, 6);
  1707.       break;
  1708.    case OP_PRESIN:
  1709.    case OP_PREEX2:
  1710.       emitPreOp(insn);
  1711.       break;
  1712.    case OP_TEX:
  1713.    case OP_TXB:
  1714.    case OP_TXL:
  1715.    case OP_TXF:
  1716.       emitTEX(insn->asTex());
  1717.       break;
  1718.    case OP_TXQ:
  1719.       emitTXQ(insn->asTex());
  1720.       break;
  1721.    case OP_TEXPREP:
  1722.       emitTEXPREP(insn->asTex());
  1723.       break;
  1724.    case OP_EMIT:
  1725.    case OP_RESTART:
  1726.       emitOUT(insn);
  1727.       break;
  1728.    case OP_DISCARD:
  1729.       emitFlow(insn, 0x0);
  1730.       break;
  1731.    case OP_BRA:
  1732.       emitFlow(insn, 0x1);
  1733.       break;
  1734.    case OP_CALL:
  1735.       emitFlow(insn, 0x2);
  1736.       break;
  1737.    case OP_RET:
  1738.       emitFlow(insn, 0x3);
  1739.       break;
  1740.    case OP_PREBREAK:
  1741.       emitFlow(insn, 0x4);
  1742.       break;
  1743.    case OP_BREAK:
  1744.       emitFlow(insn, 0x5);
  1745.       break;
  1746.    case OP_QUADON:
  1747.       emitFlow(insn, 0x6);
  1748.       break;
  1749.    case OP_QUADPOP:
  1750.       emitFlow(insn, 0x7);
  1751.       break;
  1752.    case OP_JOINAT:
  1753.       emitFlow(insn, 0xa);
  1754.       break;
  1755.    case OP_PRERET:
  1756.       emitFlow(insn, 0xd);
  1757.       break;
  1758.    case OP_QUADOP:
  1759.       emitQUADOP(insn, insn->lanes, insn->subOp);
  1760.       break;
  1761.    case OP_DFDX:
  1762.       emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
  1763.       break;
  1764.    case OP_DFDY:
  1765.       emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
  1766.       break;
  1767.    case OP_ATOM:
  1768.       emitATOM(insn);
  1769.       break;
  1770.    case OP_BAR:
  1771.       emitBAR(insn);
  1772.       break;
  1773.    case OP_PHI:
  1774.    case OP_UNION:
  1775.    case OP_CONSTRAINT:
  1776.       ERROR("operation should have been eliminated\n");
  1777.       return false;
  1778.    case OP_EXP:
  1779.    case OP_LOG:
  1780.    case OP_SQRT:
  1781.    case OP_POW:
  1782.    case OP_SELP:
  1783.    case OP_SLCT:
  1784.    case OP_TXD:
  1785.    case OP_PRECONT:
  1786.    case OP_CONT:
  1787.    case OP_POPCNT:
  1788.    case OP_INSBF:
  1789.    case OP_EXTBF:
  1790.       ERROR("operation should have been lowered\n");
  1791.       return false;
  1792.    default:
  1793.       ERROR("unknown op: %u\n", insn->op);
  1794.       return false;
  1795.    }
  1796.    if (insn->join || insn->op == OP_JOIN)
  1797.       code[1] |= 0x2;
  1798.    else
  1799.    if (insn->exit || insn->op == OP_EXIT)
  1800.       code[1] |= 0x1;
  1801.  
  1802.    assert((insn->encSize == 8) == (code[0] & 1));
  1803.  
  1804.    code += insn->encSize / 4;
  1805.    codeSize += insn->encSize;
  1806.    return true;
  1807. }
  1808.  
  1809. uint32_t
  1810. CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
  1811. {
  1812.    const Target::OpInfo &info = targ->getOpInfo(i);
  1813.  
  1814.    if (info.minEncSize > 4)
  1815.       return 8;
  1816.  
  1817.    // check constraints on dst and src operands
  1818.    for (int d = 0; i->defExists(d); ++d) {
  1819.       if (i->def(d).rep()->reg.data.id > 63 ||
  1820.           i->def(d).rep()->reg.file != FILE_GPR)
  1821.          return 8;
  1822.    }
  1823.  
  1824.    for (int s = 0; i->srcExists(s); ++s) {
  1825.       DataFile sf = i->src(s).getFile();
  1826.       if (sf != FILE_GPR)
  1827.          if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
  1828.             return 8;
  1829.       if (i->src(s).rep()->reg.data.id > 63)
  1830.          return 8;
  1831.    }
  1832.  
  1833.    // check modifiers & rounding
  1834.    if (i->join || i->lanes != 0xf || i->exit)
  1835.       return 8;
  1836.    if (i->op == OP_MUL && i->rnd != ROUND_N)
  1837.       return 8;
  1838.  
  1839.    if (i->asTex())
  1840.       return 8; // TODO: short tex encoding
  1841.  
  1842.    // check constraints on short MAD
  1843.    if (info.srcNr >= 2 && i->srcExists(2)) {
  1844.       if (i->saturate || i->src(2).mod)
  1845.          return 8;
  1846.       if ((i->src(0).mod ^ i->src(1).mod) ||
  1847.           (i->src(0).mod | i->src(1).mod).abs())
  1848.          return 8;
  1849.       if (!i->defExists(0) ||
  1850.           i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
  1851.          return 8;
  1852.    }
  1853.  
  1854.    return info.minEncSize;
  1855. }
  1856.  
  1857. // Change the encoding size of an instruction after BBs have been scheduled.
  1858. static void
  1859. makeInstructionLong(Instruction *insn)
  1860. {
  1861.    if (insn->encSize == 8)
  1862.       return;
  1863.    Function *fn = insn->bb->getFunction();
  1864.    int n = 0;
  1865.    int adj = 4;
  1866.  
  1867.    for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
  1868.  
  1869.    if (n & 1) {
  1870.       adj = 8;
  1871.       insn->next->encSize = 8;
  1872.    } else
  1873.    if (insn->prev && insn->prev->encSize == 4) {
  1874.       adj = 8;
  1875.       insn->prev->encSize = 8;
  1876.    }
  1877.    insn->encSize = 8;
  1878.  
  1879.    for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
  1880.       fn->bbArray[i]->binPos += 4;
  1881.    }
  1882.    fn->binSize += adj;
  1883.    insn->bb->binSize += adj;
  1884. }
  1885.  
  1886. static bool
  1887. trySetExitModifier(Instruction *insn)
  1888. {
  1889.    if (insn->op == OP_DISCARD ||
  1890.        insn->op == OP_QUADON ||
  1891.        insn->op == OP_QUADPOP)
  1892.       return false;
  1893.    for (int s = 0; insn->srcExists(s); ++s)
  1894.       if (insn->src(s).getFile() == FILE_IMMEDIATE)
  1895.          return false;
  1896.    if (insn->asFlow()) {
  1897.       if (insn->op == OP_CALL) // side effects !
  1898.          return false;
  1899.       if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
  1900.          return false;
  1901.       insn->op = OP_EXIT;
  1902.    }
  1903.    insn->exit = 1;
  1904.    makeInstructionLong(insn);
  1905.    return true;
  1906. }
  1907.  
  1908. static void
  1909. replaceExitWithModifier(Function *func)
  1910. {
  1911.    BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
  1912.  
  1913.    if (!epilogue->getExit() ||
  1914.        epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
  1915.       return;
  1916.  
  1917.    if (epilogue->getEntry()->op != OP_EXIT) {
  1918.       Instruction *insn = epilogue->getExit()->prev;
  1919.       if (!insn || !trySetExitModifier(insn))
  1920.          return;
  1921.       insn->exit = 1;
  1922.    } else {
  1923.       for (Graph::EdgeIterator ei = func->cfgExit->incident();
  1924.            !ei.end(); ei.next()) {
  1925.          BasicBlock *bb = BasicBlock::get(ei.getNode());
  1926.          Instruction *i = bb->getExit();
  1927.  
  1928.          if (!i || !trySetExitModifier(i))
  1929.             return;
  1930.       }
  1931.    }
  1932.    epilogue->binSize -= 8;
  1933.    func->binSize -= 8;
  1934.    delete_Instruction(func->getProgram(), epilogue->getExit());
  1935. }
  1936.  
  1937. void
  1938. CodeEmitterNV50::prepareEmission(Function *func)
  1939. {
  1940.    CodeEmitter::prepareEmission(func);
  1941.  
  1942.    replaceExitWithModifier(func);
  1943. }
  1944.  
  1945. CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
  1946.    CodeEmitter(target), targNV50(target)
  1947. {
  1948.    targ = target; // specialized
  1949.    code = NULL;
  1950.    codeSize = codeSizeLimit = 0;
  1951.    relocInfo = NULL;
  1952. }
  1953.  
  1954. CodeEmitter *
  1955. TargetNV50::getCodeEmitter(Program::Type type)
  1956. {
  1957.    CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
  1958.    emit->setProgramType(type);
  1959.    return emit;
  1960. }
  1961.  
  1962. } // namespace nv50_ir
  1963.