Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2011 Christoph Bumiller
  3.  *           2014 Red Hat Inc.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the "Software"),
  7.  * to deal in the Software without restriction, including without limitation
  8.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9.  * and/or sell copies of the Software, and to permit persons to whom the
  10.  * Software is furnished to do so, subject to the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice shall be included in
  13.  * all copies or substantial portions of the Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21.  * OTHER DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "codegen/nv50_ir.h"
  25. #include "codegen/nv50_ir_build_util.h"
  26.  
  27. #include "codegen/nv50_ir_target_nvc0.h"
  28. #include "codegen/nv50_ir_lowering_gm107.h"
  29.  
  30. #include <limits>
  31.  
  32. namespace nv50_ir {
  33.  
  34. #define QOP_ADD  0
  35. #define QOP_SUBR 1
  36. #define QOP_SUB  2
  37. #define QOP_MOV2 3
  38.  
  39. //             UL UR LL LR
  40. #define QUADOP(q, r, s, t)                      \
  41.    ((QOP_##q << 6) | (QOP_##r << 4) |           \
  42.     (QOP_##s << 2) | (QOP_##t << 0))
  43.  
  44. bool
  45. GM107LoweringPass::handleManualTXD(TexInstruction *i)
  46. {
  47.    static const uint8_t qOps[4][2] =
  48.    {
  49.       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
  50.       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
  51.       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
  52.       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
  53.    };
  54.    Value *def[4][4];
  55.    Value *crd[3];
  56.    Value *tmp;
  57.    Instruction *tex, *add;
  58.    Value *zero = bld.loadImm(bld.getSSA(), 0);
  59.    int l, c;
  60.    const int dim = i->tex.target.getDim();
  61.    const int array = i->tex.target.isArray();
  62.  
  63.    i->op = OP_TEX; // no need to clone dPdx/dPdy later
  64.  
  65.    for (c = 0; c < dim; ++c)
  66.       crd[c] = bld.getScratch();
  67.    tmp = bld.getScratch();
  68.  
  69.    for (l = 0; l < 4; ++l) {
  70.       // mov coordinates from lane l to all lanes
  71.       bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
  72.       for (c = 0; c < dim; ++c) {
  73.          bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l));
  74.          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
  75.          add->subOp = 0x00;
  76.          add->lanes = 1; /* abused for .ndv */
  77.       }
  78.  
  79.       // add dPdx from lane l to lanes dx
  80.       for (c = 0; c < dim; ++c) {
  81.          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
  82.          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
  83.          add->subOp = qOps[l][0];
  84.          add->lanes = 1; /* abused for .ndv */
  85.       }
  86.  
  87.       // add dPdy from lane l to lanes dy
  88.       for (c = 0; c < dim; ++c) {
  89.          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
  90.          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
  91.          add->subOp = qOps[l][1];
  92.          add->lanes = 1; /* abused for .ndv */
  93.       }
  94.  
  95.       // texture
  96.       bld.insert(tex = cloneForward(func, i));
  97.       for (c = 0; c < dim; ++c)
  98.          tex->setSrc(c + array, crd[c]);
  99.       bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
  100.  
  101.       // save results
  102.       for (c = 0; i->defExists(c); ++c) {
  103.          Instruction *mov;
  104.          def[c][l] = bld.getSSA();
  105.          mov = bld.mkMov(def[c][l], tex->getDef(c));
  106.          mov->fixed = 1;
  107.          mov->lanes = 1 << l;
  108.       }
  109.    }
  110.  
  111.    for (c = 0; i->defExists(c); ++c) {
  112.       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
  113.       for (l = 0; l < 4; ++l)
  114.          u->setSrc(l, def[c][l]);
  115.    }
  116.  
  117.    i->bb->remove(i);
  118.    return true;
  119. }
  120.  
  121. bool
  122. GM107LoweringPass::handleDFDX(Instruction *insn)
  123. {
  124.    Instruction *shfl;
  125.    int qop = 0, xid = 0;
  126.  
  127.    switch (insn->op) {
  128.    case OP_DFDX:
  129.       qop = QUADOP(SUB, SUBR, SUB, SUBR);
  130.       xid = 1;
  131.       break;
  132.    case OP_DFDY:
  133.       qop = QUADOP(SUB, SUB, SUBR, SUBR);
  134.       xid = 2;
  135.       break;
  136.    default:
  137.       assert(!"invalid dfdx opcode");
  138.       break;
  139.    }
  140.  
  141.    shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
  142.                     insn->getSrc(0), bld.mkImm(xid));
  143.    shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
  144.    insn->op = OP_QUADOP;
  145.    insn->subOp = qop;
  146.    insn->lanes = 0; /* abused for !.ndv */
  147.    insn->setSrc(1, insn->getSrc(0));
  148.    insn->setSrc(0, shfl->getDef(0));
  149.    return true;
  150. }
  151.  
  152. bool
  153. GM107LoweringPass::handlePFETCH(Instruction *i)
  154. {
  155.    Value *tmp0 = bld.getScratch();
  156.    Value *tmp1 = bld.getScratch();
  157.    Value *tmp2 = bld.getScratch();
  158.    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
  159.    bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
  160.    bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
  161.    bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
  162.    if (i->getSrc(1))
  163.       bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
  164.    else
  165.       bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
  166.    bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
  167.    i->setSrc(0, tmp0);
  168.    i->setSrc(1, NULL);
  169.    return true;
  170. }
  171.  
  172. bool
  173. GM107LoweringPass::handlePOPCNT(Instruction *i)
  174. {
  175.    Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
  176.                            i->getSrc(0), i->getSrc(1));
  177.    i->setSrc(0, tmp);
  178.    i->setSrc(1, NULL);
  179.    return TRUE;
  180. }
  181.  
  182. //
  183. // - add quadop dance for texturing
  184. // - put FP outputs in GPRs
  185. // - convert instruction sequences
  186. //
  187. bool
  188. GM107LoweringPass::visit(Instruction *i)
  189. {
  190.    bld.setPosition(i, false);
  191.  
  192.    if (i->cc != CC_ALWAYS)
  193.       checkPredicate(i);
  194.  
  195.    switch (i->op) {
  196.    case OP_TEX:
  197.    case OP_TXB:
  198.    case OP_TXL:
  199.    case OP_TXF:
  200.    case OP_TXG:
  201.       return handleTEX(i->asTex());
  202.    case OP_TXD:
  203.       return handleTXD(i->asTex());
  204.    case OP_TXLQ:
  205.       return handleTXLQ(i->asTex());
  206.    case OP_TXQ:
  207.       return handleTXQ(i->asTex());
  208.    case OP_EX2:
  209.       bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
  210.       i->setSrc(0, i->getDef(0));
  211.       break;
  212.    case OP_POW:
  213.       return handlePOW(i);
  214.    case OP_DIV:
  215.       return handleDIV(i);
  216.    case OP_MOD:
  217.       return handleMOD(i);
  218.    case OP_SQRT:
  219.       return handleSQRT(i);
  220.    case OP_EXPORT:
  221.       return handleEXPORT(i);
  222.    case OP_PFETCH:
  223.       return handlePFETCH(i);
  224.    case OP_EMIT:
  225.    case OP_RESTART:
  226.       return handleOUT(i);
  227.    case OP_RDSV:
  228.       return handleRDSV(i);
  229.    case OP_WRSV:
  230.       return handleWRSV(i);
  231.    case OP_LOAD:
  232.       if (i->src(0).getFile() == FILE_SHADER_INPUT) {
  233.          if (prog->getType() == Program::TYPE_COMPUTE) {
  234.             i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
  235.             i->getSrc(0)->reg.fileIndex = 0;
  236.          } else
  237.          if (prog->getType() == Program::TYPE_GEOMETRY &&
  238.              i->src(0).isIndirect(0)) {
  239.             // XXX: this assumes vec4 units
  240.             Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
  241.                                     i->getIndirect(0, 0), bld.mkImm(4));
  242.             i->setIndirect(0, 0, ptr);
  243.             i->op = OP_VFETCH;
  244.          } else {
  245.             i->op = OP_VFETCH;
  246.             assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
  247.          }
  248.       } else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
  249.          if (i->src(0).isIndirect(1)) {
  250.             Value *ptr;
  251.             if (i->src(0).isIndirect(0))
  252.                ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
  253.                                 i->getIndirect(0, 1), bld.mkImm(0x1010),
  254.                                 i->getIndirect(0, 0));
  255.             else
  256.                ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
  257.                                 i->getIndirect(0, 1), bld.mkImm(16));
  258.             i->setIndirect(0, 1, NULL);
  259.             i->setIndirect(0, 0, ptr);
  260.             i->subOp = NV50_IR_SUBOP_LDC_IS;
  261.          }
  262.       }
  263.       break;
  264.    case OP_ATOM:
  265.    {
  266.       const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
  267.       handleATOM(i);
  268.       handleCasExch(i, cctl);
  269.    }
  270.       break;
  271.    case OP_SULDB:
  272.    case OP_SULDP:
  273.    case OP_SUSTB:
  274.    case OP_SUSTP:
  275.    case OP_SUREDB:
  276.    case OP_SUREDP:
  277.       handleSurfaceOpNVE4(i->asTex());
  278.       break;
  279.    case OP_DFDX:
  280.    case OP_DFDY:
  281.       handleDFDX(i);
  282.       break;
  283.    case OP_POPCNT:
  284.       handlePOPCNT(i);
  285.       break;
  286.    default:
  287.       break;
  288.    }
  289.    return true;
  290. }
  291.  
  292. } // namespace nv50_ir
  293.