Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2011 Christoph Bumiller
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "nv50/codegen/nv50_ir.h"
  24. #include "nv50/codegen/nv50_ir_target.h"
  25.  
  26. namespace nv50_ir {
  27.  
  28. const uint8_t Target::operationSrcNr[OP_LAST + 1] =
  29. {
  30.    0, 0,                   // NOP, PHI
  31.    0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
  32.    1, 1, 2,                // MOV, LOAD, STORE
  33.    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
  34.    1, 1, 1,                // ABS, NEG, NOT
  35.    2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
  36.    2, 2, 1,                // MAX, MIN, SAT
  37.    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
  38.    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
  39.    1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
  40.    1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
  41.    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
  42.    0, 0, 0,                // PRERET,CONT,BREAK
  43.    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
  44.    1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
  45.    1, 1,                   // EMIT, RESTART
  46.    1, 1, 1,                // TEX, TXB, TXL,
  47.    1, 1, 1, 1, 1, 2,       // TXF, TXQ, TXD, TXG, TEXCSAA, TEXPREP
  48.    1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
  49.    3, 3, 3, 3,             // SUBFM, SUCLAMP, SUEAU, MADSP
  50.    0,                      // TEXBAR
  51.    1, 1,                   // DFDX, DFDY
  52.    1, 2, 2, 0, 0,          // RDSV, WRSV, QUADOP, QUADON, QUADPOP
  53.    2, 3, 2, 3,             // POPCNT, INSBF, EXTBF, PERMT
  54.    2, 2,                   // ATOM, BAR
  55.    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
  56.    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
  57.    0
  58. };
  59.  
  60. const OpClass Target::operationClass[OP_LAST + 1] =
  61. {
  62.    // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
  63.    OPCLASS_OTHER,
  64.    OPCLASS_PSEUDO,
  65.    OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
  66.    // MOV; LOAD; STORE
  67.    OPCLASS_MOVE,
  68.    OPCLASS_LOAD,
  69.    OPCLASS_STORE,
  70.    // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
  71.    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
  72.    OPCLASS_ARITH, OPCLASS_ARITH,
  73.    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
  74.    // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
  75.    OPCLASS_CONVERT, OPCLASS_CONVERT,
  76.    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
  77.    OPCLASS_SHIFT, OPCLASS_SHIFT,
  78.    // MAX, MIN
  79.    OPCLASS_COMPARE, OPCLASS_COMPARE,
  80.    // SAT, CEIL, FLOOR, TRUNC; CVT
  81.    OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
  82.    OPCLASS_CONVERT,
  83.    // SET(AND,OR,XOR); SELP, SLCT
  84.    OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
  85.    OPCLASS_COMPARE, OPCLASS_COMPARE,
  86.    // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
  87.    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
  88.    OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
  89.    OPCLASS_SFU, OPCLASS_SFU,
  90.    // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
  91.    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
  92.    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
  93.    OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
  94.    // DISCARD, EXIT
  95.    OPCLASS_FLOW, OPCLASS_FLOW,
  96.    // MEMBAR
  97.    OPCLASS_CONTROL,
  98.    // VFETCH, PFETCH, EXPORT
  99.    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
  100.    // LINTERP, PINTERP
  101.    OPCLASS_SFU, OPCLASS_SFU,
  102.    // EMIT, RESTART
  103.    OPCLASS_CONTROL, OPCLASS_CONTROL,
  104.    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA; TEXPREP
  105.    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
  106.    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
  107.    OPCLASS_TEXTURE,
  108.    // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
  109.    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
  110.    OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
  111.    // SUBFM, SUCLAMP, SUEAU, MADSP
  112.    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
  113.    // TEXBAR
  114.    OPCLASS_OTHER,
  115.    // DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP
  116.    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
  117.    OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
  118.    // POPCNT, INSBF, EXTBF, PERMT
  119.    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
  120.    // ATOM, BAR
  121.    OPCLASS_ATOMIC, OPCLASS_CONTROL,
  122.    // VADD, VAVG, VMIN, VMAX
  123.    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
  124.    // VSAD, VSET, VSHR, VSHL
  125.    OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
  126.    // VSEL, CCTL
  127.    OPCLASS_VECTOR, OPCLASS_CONTROL,
  128.    OPCLASS_PSEUDO // LAST
  129. };
  130.  
  131.  
  132. extern Target *getTargetNVC0(unsigned int chipset);
  133. extern Target *getTargetNV50(unsigned int chipset);
  134.  
  135. Target *Target::create(unsigned int chipset)
  136. {
  137.    switch (chipset & 0xf0) {
  138.    case 0xc0:
  139.    case 0xd0:
  140.    case 0xe0:
  141.    case NVISA_GK110_CHIPSET:
  142.       return getTargetNVC0(chipset);
  143.    case 0x50:
  144.    case 0x80:
  145.    case 0x90:
  146.    case 0xa0:
  147.       return getTargetNV50(chipset);
  148.    default:
  149.       ERROR("unsupported target: NV%x\n", chipset);
  150.       return 0;
  151.    }
  152. }
  153.  
  154. void Target::destroy(Target *targ)
  155. {
  156.    delete targ;
  157. }
  158.  
  159. CodeEmitter::CodeEmitter(const Target *target) : targ(target)
  160. {
  161. }
  162.  
  163. void
  164. CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
  165. {
  166.    code = reinterpret_cast<uint32_t *>(ptr);
  167.    codeSize = 0;
  168.    codeSizeLimit = size;
  169. }
  170.  
  171. void
  172. CodeEmitter::printBinary() const
  173. {
  174.    uint32_t *bin = code - codeSize / 4;
  175.    INFO("program binary (%u bytes)", codeSize);
  176.    for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
  177.       if ((pos % 8) == 0)
  178.          INFO("\n");
  179.       INFO("%08x ", bin[pos]);
  180.    }
  181.    INFO("\n");
  182. }
  183.  
  184. static inline uint32_t sizeToBundlesNVE4(uint32_t size)
  185. {
  186.    return (size + 55) / 56;
  187. }
  188.  
  189. void
  190. CodeEmitter::prepareEmission(Program *prog)
  191. {
  192.    for (ArrayList::Iterator fi = prog->allFuncs.iterator();
  193.         !fi.end(); fi.next()) {
  194.       Function *func = reinterpret_cast<Function *>(fi.get());
  195.       func->binPos = prog->binSize;
  196.       prepareEmission(func);
  197.  
  198.       // adjust sizes & positions for schedulding info:
  199.       if (prog->getTarget()->hasSWSched) {
  200.          uint32_t adjPos = func->binPos;
  201.          BasicBlock *bb = NULL;
  202.          for (int i = 0; i < func->bbCount; ++i) {
  203.             bb = func->bbArray[i];
  204.             int32_t adjSize = bb->binSize;
  205.             if (adjPos % 64) {
  206.                adjSize -= 64 - adjPos % 64;
  207.                if (adjSize < 0)
  208.                   adjSize = 0;
  209.             }
  210.             adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
  211.             bb->binPos = adjPos;
  212.             bb->binSize = adjSize;
  213.             adjPos += adjSize;
  214.          }
  215.          if (bb)
  216.             func->binSize = adjPos - func->binPos;
  217.       }
  218.  
  219.       prog->binSize += func->binSize;
  220.    }
  221. }
  222.  
  223. void
  224. CodeEmitter::prepareEmission(Function *func)
  225. {
  226.    func->bbCount = 0;
  227.    func->bbArray = new BasicBlock * [func->cfg.getSize()];
  228.  
  229.    BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
  230.  
  231.    for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
  232.       prepareEmission(BasicBlock::get(*it));
  233. }
  234.  
  235. void
  236. CodeEmitter::prepareEmission(BasicBlock *bb)
  237. {
  238.    Instruction *i, *next;
  239.    Function *func = bb->getFunction();
  240.    int j;
  241.    unsigned int nShort;
  242.  
  243.    for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
  244.  
  245.    for (; j >= 0; --j) {
  246.       BasicBlock *in = func->bbArray[j];
  247.       Instruction *exit = in->getExit();
  248.  
  249.       if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
  250.          in->binSize -= 8;
  251.          func->binSize -= 8;
  252.  
  253.          for (++j; j < func->bbCount; ++j)
  254.             func->bbArray[j]->binPos -= 8;
  255.  
  256.          in->remove(exit);
  257.       }
  258.       bb->binPos = in->binPos + in->binSize;
  259.       if (in->binSize) // no more no-op branches to bb
  260.          break;
  261.    }
  262.    func->bbArray[func->bbCount++] = bb;
  263.  
  264.    if (!bb->getExit())
  265.       return;
  266.  
  267.    // determine encoding size, try to group short instructions
  268.    nShort = 0;
  269.    for (i = bb->getEntry(); i; i = next) {
  270.       next = i->next;
  271.  
  272.       if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
  273.          bb->remove(i);
  274.          continue;
  275.       }
  276.  
  277.       i->encSize = getMinEncodingSize(i);
  278.       if (next && i->encSize < 8)
  279.          ++nShort;
  280.       else
  281.       if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
  282.          if (i->isCommutationLegal(i->next)) {
  283.             bb->permuteAdjacent(i, next);
  284.             next->encSize = 4;
  285.             next = i;
  286.             i = i->prev;
  287.             ++nShort;
  288.          } else
  289.          if (i->isCommutationLegal(i->prev) && next->next) {
  290.             bb->permuteAdjacent(i->prev, i);
  291.             next->encSize = 4;
  292.             next = next->next;
  293.             bb->binSize += 4;
  294.             ++nShort;
  295.          } else {
  296.             i->encSize = 8;
  297.             i->prev->encSize = 8;
  298.             bb->binSize += 4;
  299.             nShort = 0;
  300.          }
  301.       } else {
  302.          i->encSize = 8;
  303.          if (nShort & 1) {
  304.             i->prev->encSize = 8;
  305.             bb->binSize += 4;
  306.          }
  307.          nShort = 0;
  308.       }
  309.       bb->binSize += i->encSize;
  310.    }
  311.  
  312.    if (bb->getExit()->encSize == 4) {
  313.       assert(nShort);
  314.       bb->getExit()->encSize = 8;
  315.       bb->binSize += 4;
  316.  
  317.       if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
  318.          bb->binSize += 8;
  319.          bb->getExit()->prev->encSize = 8;
  320.       }
  321.    }
  322.    assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
  323.  
  324.    func->binSize += bb->binSize;
  325. }
  326.  
  327. void
  328. Program::emitSymbolTable(struct nv50_ir_prog_info *info)
  329. {
  330.    unsigned int n = 0, nMax = allFuncs.getSize();
  331.  
  332.    info->bin.syms =
  333.       (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
  334.  
  335.    for (ArrayList::Iterator fi = allFuncs.iterator();
  336.         !fi.end();
  337.         fi.next(), ++n) {
  338.       Function *f = (Function *)fi.get();
  339.       assert(n < nMax);
  340.  
  341.       info->bin.syms[n].label = f->getLabel();
  342.       info->bin.syms[n].offset = f->binPos;
  343.    }
  344.  
  345.    info->bin.numSyms = n;
  346. }
  347.  
  348. bool
  349. Program::emitBinary(struct nv50_ir_prog_info *info)
  350. {
  351.    CodeEmitter *emit = target->getCodeEmitter(progType);
  352.  
  353.    emit->prepareEmission(this);
  354.  
  355.    if (dbgFlags & NV50_IR_DEBUG_BASIC)
  356.       this->print();
  357.  
  358.    if (!binSize) {
  359.       code = NULL;
  360.       return false;
  361.    }
  362.    code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
  363.    if (!code)
  364.       return false;
  365.    emit->setCodeLocation(code, binSize);
  366.  
  367.    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
  368.       Function *fn = reinterpret_cast<Function *>(fi.get());
  369.  
  370.       assert(emit->getCodeSize() == fn->binPos);
  371.  
  372.       for (int b = 0; b < fn->bbCount; ++b)
  373.          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
  374.             emit->emitInstruction(i);
  375.    }
  376.    info->bin.relocData = emit->getRelocInfo();
  377.  
  378.    emitSymbolTable(info);
  379.  
  380.    // the nvc0 driver will print the binary iself together with the header
  381.    if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
  382.       emit->printBinary();
  383.  
  384.    delete emit;
  385.    return true;
  386. }
  387.  
  388. #define RELOC_ALLOC_INCREMENT 8
  389.  
  390. bool
  391. CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
  392.                       int s)
  393. {
  394.    unsigned int n = relocInfo ? relocInfo->count : 0;
  395.  
  396.    if (!(n % RELOC_ALLOC_INCREMENT)) {
  397.       size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
  398.       relocInfo = reinterpret_cast<RelocInfo *>(
  399.          REALLOC(relocInfo, n ? size : 0,
  400.                  size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
  401.       if (!relocInfo)
  402.          return false;
  403.       if (n == 0)
  404.          memset(relocInfo, 0, sizeof(RelocInfo));
  405.    }
  406.    ++relocInfo->count;
  407.  
  408.    relocInfo->entry[n].data = data;
  409.    relocInfo->entry[n].mask = m;
  410.    relocInfo->entry[n].offset = codeSize + w * 4;
  411.    relocInfo->entry[n].bitPos = s;
  412.    relocInfo->entry[n].type = ty;
  413.  
  414.    return true;
  415. }
  416.  
  417. void
  418. RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
  419. {
  420.    uint32_t value = 0;
  421.  
  422.    switch (type) {
  423.    case TYPE_CODE: value = info->codePos; break;
  424.    case TYPE_BUILTIN: value = info->libPos; break;
  425.    case TYPE_DATA: value = info->dataPos; break;
  426.    default:
  427.       assert(0);
  428.       break;
  429.    }
  430.    value += data;
  431.    value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
  432.  
  433.    binary[offset / 4] &= ~mask;
  434.    binary[offset / 4] |= value & mask;
  435. }
  436.  
  437. } // namespace nv50_ir
  438.  
  439.  
  440. #include "nv50/codegen/nv50_ir_driver.h"
  441.  
  442. extern "C" {
  443.  
  444. void
  445. nv50_ir_relocate_code(void *relocData, uint32_t *code,
  446.                       uint32_t codePos,
  447.                       uint32_t libPos,
  448.                       uint32_t dataPos)
  449. {
  450.    nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
  451.  
  452.    info->codePos = codePos;
  453.    info->libPos = libPos;
  454.    info->dataPos = dataPos;
  455.  
  456.    for (unsigned int i = 0; i < info->count; ++i)
  457.       info->entry[i].apply(code, info);
  458. }
  459.  
  460. void
  461. nv50_ir_get_target_library(uint32_t chipset,
  462.                            const uint32_t **code, uint32_t *size)
  463. {
  464.    nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
  465.    targ->getBuiltinCode(code, size);
  466.    nv50_ir::Target::destroy(targ);
  467. }
  468.  
  469. }
  470.