Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2011 Christoph Bumiller
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "codegen/nv50_ir.h"
  24. #include "codegen/nv50_ir_target.h"
  25.  
  26. namespace nv50_ir {
  27.  
  28. const uint8_t Target::operationSrcNr[] =
  29. {
  30.    0, 0,                   // NOP, PHI
  31.    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
  32.    1, 1, 2,                // MOV, LOAD, STORE
  33.    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
  34.    1, 1, 1,                // ABS, NEG, NOT
  35.    2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
  36.    2, 2, 1,                // MAX, MIN, SAT
  37.    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
  38.    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
  39.    1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
  40.    1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
  41.    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
  42.    0, 0, 0,                // PRERET,CONT,BREAK
  43.    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
  44.    1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
  45.    1, 1,                   // EMIT, RESTART
  46.    1, 1, 1,                // TEX, TXB, TXL,
  47.    1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
  48.    1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
  49.    3, 3, 3, 3,             // SUBFM, SUCLAMP, SUEAU, MADSP
  50.    0,                      // TEXBAR
  51.    1, 1,                   // DFDX, DFDY
  52.    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
  53.    2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
  54.    2, 2,                   // ATOM, BAR
  55.    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
  56.    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
  57.    3,                      // SHFL
  58.    0
  59. };
  60.  
  61. const OpClass Target::operationClass[] =
  62. {
  63.    // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
  64.    OPCLASS_OTHER,
  65.    OPCLASS_PSEUDO,
  66.    OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
  67.    // MOV; LOAD; STORE
  68.    OPCLASS_MOVE,
  69.    OPCLASS_LOAD,
  70.    OPCLASS_STORE,
  71.    // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
  72.    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
  73.    OPCLASS_ARITH, OPCLASS_ARITH,
  74.    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
  75.    // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
  76.    OPCLASS_CONVERT, OPCLASS_CONVERT,
  77.    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
  78.    OPCLASS_SHIFT, OPCLASS_SHIFT,
  79.    // MAX, MIN
  80.    OPCLASS_COMPARE, OPCLASS_COMPARE,
  81.    // SAT, CEIL, FLOOR, TRUNC; CVT
  82.    OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
  83.    OPCLASS_CONVERT,
  84.    // SET(AND,OR,XOR); SELP, SLCT
  85.    OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
  86.    OPCLASS_COMPARE, OPCLASS_COMPARE,
  87.    // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
  88.    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
  89.    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
  90.    OPCLASS_SFU, OPCLASS_SFU,
  91.    // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
  92.    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
  93.    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
  94.    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
  95.    // DISCARD, EXIT
  96.    OPCLASS_FLOW, OPCLASS_FLOW,
  97.    // MEMBAR
  98.    OPCLASS_CONTROL,
  99.    // VFETCH, PFETCH, EXPORT
  100.    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
  101.    // LINTERP, PINTERP
  102.    OPCLASS_SFU, OPCLASS_SFU,
  103.    // EMIT, RESTART
  104.    OPCLASS_CONTROL, OPCLASS_CONTROL,
  105.    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
  106.    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
  107.    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
  108.    OPCLASS_TEXTURE, OPCLASS_TEXTURE,
  109.    // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
  110.    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
  111.    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
  112.    // SUBFM, SUCLAMP, SUEAU, MADSP
  113.    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
  114.    // TEXBAR
  115.    OPCLASS_OTHER,
  116.    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
  117.    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
  118.    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
  119.    // POPCNT, INSBF, EXTBF, BFIND; PERMT
  120.    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
  121.    OPCLASS_BITFIELD,
  122.    // ATOM, BAR
  123.    OPCLASS_ATOMIC, OPCLASS_CONTROL,
  124.    // VADD, VAVG, VMIN, VMAX
  125.    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
  126.    // VSAD, VSET, VSHR, VSHL
  127.    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
  128.    // VSEL, CCTL
  129.    OPCLASS_VECTOR, OPCLASS_CONTROL,
  130.    // SHFL
  131.    OPCLASS_OTHER,
  132.    OPCLASS_PSEUDO // LAST
  133. };
  134.  
  135.  
  136. extern Target *getTargetGM107(unsigned int chipset);
  137. extern Target *getTargetNVC0(unsigned int chipset);
  138. extern Target *getTargetNV50(unsigned int chipset);
  139.  
  140. Target *Target::create(unsigned int chipset)
  141. {
  142.    STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1);
  143.    STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
  144.    switch (chipset & ~0xf) {
  145.    case 0x110:
  146.       return getTargetGM107(chipset);
  147.    case 0xc0:
  148.    case 0xd0:
  149.    case 0xe0:
  150.    case 0xf0:
  151.    case 0x100:
  152.       return getTargetNVC0(chipset);
  153.    case 0x50:
  154.    case 0x80:
  155.    case 0x90:
  156.    case 0xa0:
  157.       return getTargetNV50(chipset);
  158.    default:
  159.       ERROR("unsupported target: NV%x\n", chipset);
  160.       return 0;
  161.    }
  162. }
  163.  
  164. void Target::destroy(Target *targ)
  165. {
  166.    delete targ;
  167. }
  168.  
  169. CodeEmitter::CodeEmitter(const Target *target) : targ(target)
  170. {
  171. }
  172.  
  173. void
  174. CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
  175. {
  176.    code = reinterpret_cast<uint32_t *>(ptr);
  177.    codeSize = 0;
  178.    codeSizeLimit = size;
  179. }
  180.  
  181. void
  182. CodeEmitter::printBinary() const
  183. {
  184.    uint32_t *bin = code - codeSize / 4;
  185.    INFO("program binary (%u bytes)", codeSize);
  186.    for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
  187.       if ((pos % 8) == 0)
  188.          INFO("\n");
  189.       INFO("%08x ", bin[pos]);
  190.    }
  191.    INFO("\n");
  192. }
  193.  
  194. static inline uint32_t sizeToBundlesNVE4(uint32_t size)
  195. {
  196.    return (size + 55) / 56;
  197. }
  198.  
  199. void
  200. CodeEmitter::prepareEmission(Program *prog)
  201. {
  202.    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
  203.         !fi.end(); fi.next()) {
  204.       Function *func = reinterpret_cast<Function *>(fi.get());
  205.       func->binPos = prog->binSize;
  206.       prepareEmission(func);
  207.  
  208.       // adjust sizes & positions for schedulding info:
  209.       if (prog->getTarget()->hasSWSched) {
  210.          uint32_t adjPos = func->binPos;
  211.          BasicBlock *bb = NULL;
  212.          for (int i = 0; i < func->bbCount; ++i) {
  213.             bb = func->bbArray[i];
  214.             int32_t adjSize = bb->binSize;
  215.             if (adjPos % 64) {
  216.                adjSize -= 64 - adjPos % 64;
  217.                if (adjSize < 0)
  218.                   adjSize = 0;
  219.             }
  220.             adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
  221.             bb->binPos = adjPos;
  222.             bb->binSize = adjSize;
  223.             adjPos += adjSize;
  224.          }
  225.          if (bb)
  226.             func->binSize = adjPos - func->binPos;
  227.       }
  228.  
  229.       prog->binSize += func->binSize;
  230.    }
  231. }
  232.  
  233. void
  234. CodeEmitter::prepareEmission(Function *func)
  235. {
  236.    func->bbCount = 0;
  237.    func->bbArray = new BasicBlock * [func->cfg.getSize()];
  238.  
  239.    BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
  240.  
  241.    for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
  242.       prepareEmission(BasicBlock::get(*it));
  243. }
  244.  
  245. void
  246. CodeEmitter::prepareEmission(BasicBlock *bb)
  247. {
  248.    Instruction *i, *next;
  249.    Function *func = bb->getFunction();
  250.    int j;
  251.    unsigned int nShort;
  252.  
  253.    for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
  254.  
  255.    for (; j >= 0; --j) {
  256.       BasicBlock *in = func->bbArray[j];
  257.       Instruction *exit = in->getExit();
  258.  
  259.       if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
  260.          in->binSize -= 8;
  261.          func->binSize -= 8;
  262.  
  263.          for (++j; j < func->bbCount; ++j)
  264.             func->bbArray[j]->binPos -= 8;
  265.  
  266.          in->remove(exit);
  267.       }
  268.       bb->binPos = in->binPos + in->binSize;
  269.       if (in->binSize) // no more no-op branches to bb
  270.          break;
  271.    }
  272.    func->bbArray[func->bbCount++] = bb;
  273.  
  274.    if (!bb->getExit())
  275.       return;
  276.  
  277.    // determine encoding size, try to group short instructions
  278.    nShort = 0;
  279.    for (i = bb->getEntry(); i; i = next) {
  280.       next = i->next;
  281.  
  282.       if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
  283.          bb->remove(i);
  284.          continue;
  285.       }
  286.  
  287.       i->encSize = getMinEncodingSize(i);
  288.       if (next && i->encSize < 8)
  289.          ++nShort;
  290.       else
  291.       if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
  292.          if (i->isCommutationLegal(i->next)) {
  293.             bb->permuteAdjacent(i, next);
  294.             next->encSize = 4;
  295.             next = i;
  296.             i = i->prev;
  297.             ++nShort;
  298.          } else
  299.          if (i->isCommutationLegal(i->prev) && next->next) {
  300.             bb->permuteAdjacent(i->prev, i);
  301.             next->encSize = 4;
  302.             next = next->next;
  303.             bb->binSize += 4;
  304.             ++nShort;
  305.          } else {
  306.             i->encSize = 8;
  307.             i->prev->encSize = 8;
  308.             bb->binSize += 4;
  309.             nShort = 0;
  310.          }
  311.       } else {
  312.          i->encSize = 8;
  313.          if (nShort & 1) {
  314.             i->prev->encSize = 8;
  315.             bb->binSize += 4;
  316.          }
  317.          nShort = 0;
  318.       }
  319.       bb->binSize += i->encSize;
  320.    }
  321.  
  322.    if (bb->getExit()->encSize == 4) {
  323.       assert(nShort);
  324.       bb->getExit()->encSize = 8;
  325.       bb->binSize += 4;
  326.  
  327.       if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
  328.          bb->binSize += 8;
  329.          bb->getExit()->prev->encSize = 8;
  330.       }
  331.    }
  332.    assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
  333.  
  334.    func->binSize += bb->binSize;
  335. }
  336.  
  337. void
  338. Program::emitSymbolTable(struct nv50_ir_prog_info *info)
  339. {
  340.    unsigned int n = 0, nMax = allFuncs.getSize();
  341.  
  342.    info->bin.syms =
  343.       (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
  344.  
  345.    for (ArrayList::Iterator fi = allFuncs.iterator();
  346.         !fi.end();
  347.         fi.next(), ++n) {
  348.       Function *f = (Function *)fi.get();
  349.       assert(n < nMax);
  350.  
  351.       info->bin.syms[n].label = f->getLabel();
  352.       info->bin.syms[n].offset = f->binPos;
  353.    }
  354.  
  355.    info->bin.numSyms = n;
  356. }
  357.  
  358. bool
  359. Program::emitBinary(struct nv50_ir_prog_info *info)
  360. {
  361.    CodeEmitter *emit = target->getCodeEmitter(progType);
  362.  
  363.    emit->prepareEmission(this);
  364.  
  365.    if (dbgFlags & NV50_IR_DEBUG_BASIC)
  366.       this->print();
  367.  
  368.    if (!binSize) {
  369.       code = NULL;
  370.       return false;
  371.    }
  372.    code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
  373.    if (!code)
  374.       return false;
  375.    emit->setCodeLocation(code, binSize);
  376.  
  377.    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
  378.       Function *fn = reinterpret_cast<Function *>(fi.get());
  379.  
  380.       assert(emit->getCodeSize() == fn->binPos);
  381.  
  382.       for (int b = 0; b < fn->bbCount; ++b) {
  383.          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
  384.             emit->emitInstruction(i);
  385.             if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
  386.                info->io.fp64 = true;
  387.          }
  388.       }
  389.    }
  390.    info->bin.relocData = emit->getRelocInfo();
  391.  
  392.    emitSymbolTable(info);
  393.  
  394.    // the nvc0 driver will print the binary iself together with the header
  395.    if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
  396.       emit->printBinary();
  397.  
  398.    delete emit;
  399.    return true;
  400. }
  401.  
  402. #define RELOC_ALLOC_INCREMENT 8
  403.  
  404. bool
  405. CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
  406.                       int s)
  407. {
  408.    unsigned int n = relocInfo ? relocInfo->count : 0;
  409.  
  410.    if (!(n % RELOC_ALLOC_INCREMENT)) {
  411.       size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
  412.       relocInfo = reinterpret_cast<RelocInfo *>(
  413.          REALLOC(relocInfo, n ? size : 0,
  414.                  size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
  415.       if (!relocInfo)
  416.          return false;
  417.       if (n == 0)
  418.          memset(relocInfo, 0, sizeof(RelocInfo));
  419.    }
  420.    ++relocInfo->count;
  421.  
  422.    relocInfo->entry[n].data = data;
  423.    relocInfo->entry[n].mask = m;
  424.    relocInfo->entry[n].offset = codeSize + w * 4;
  425.    relocInfo->entry[n].bitPos = s;
  426.    relocInfo->entry[n].type = ty;
  427.  
  428.    return true;
  429. }
  430.  
  431. void
  432. RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
  433. {
  434.    uint32_t value = 0;
  435.  
  436.    switch (type) {
  437.    case TYPE_CODE: value = info->codePos; break;
  438.    case TYPE_BUILTIN: value = info->libPos; break;
  439.    case TYPE_DATA: value = info->dataPos; break;
  440.    default:
  441.       assert(0);
  442.       break;
  443.    }
  444.    value += data;
  445.    value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
  446.  
  447.    binary[offset / 4] &= ~mask;
  448.    binary[offset / 4] |= value & mask;
  449. }
  450.  
  451. } // namespace nv50_ir
  452.  
  453.  
  454. #include "codegen/nv50_ir_driver.h"
  455.  
  456. extern "C" {
  457.  
  458. void
  459. nv50_ir_relocate_code(void *relocData, uint32_t *code,
  460.                       uint32_t codePos,
  461.                       uint32_t libPos,
  462.                       uint32_t dataPos)
  463. {
  464.    nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
  465.  
  466.    info->codePos = codePos;
  467.    info->libPos = libPos;
  468.    info->dataPos = dataPos;
  469.  
  470.    for (unsigned int i = 0; i < info->count; ++i)
  471.       info->entry[i].apply(code, info);
  472. }
  473.  
  474. void
  475. nv50_ir_get_target_library(uint32_t chipset,
  476.                            const uint32_t **code, uint32_t *size)
  477. {
  478.    nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
  479.    targ->getBuiltinCode(code, size);
  480.    nv50_ir::Target::destroy(targ);
  481. }
  482.  
  483. }
  484.