Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright 2011 Christoph Bumiller
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 */
22
 
23
#include "codegen/nv50_ir.h"
24
#include "codegen/nv50_ir_target.h"
25
 
26
namespace nv50_ir {
27
 
28
const uint8_t Target::operationSrcNr[] =
29
{
30
   0, 0,                   // NOP, PHI
31
   0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32
   1, 1, 2,                // MOV, LOAD, STORE
33
   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34
   1, 1, 1,                // ABS, NEG, NOT
35
   2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
36
   2, 2, 1,                // MAX, MIN, SAT
37
   1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
38
   3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
39
   1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
40
   1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41
   0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
42
   0, 0, 0,                // PRERET,CONT,BREAK
43
   0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44
   1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45
   1, 1,                   // EMIT, RESTART
46
   1, 1, 1,                // TEX, TXB, TXL,
47
   1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
48
   1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
49
   3, 3, 3, 3,             // SUBFM, SUCLAMP, SUEAU, MADSP
50
   0,                      // TEXBAR
51
   1, 1,                   // DFDX, DFDY
52
   1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
53
   2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
54
   2, 2,                   // ATOM, BAR
55
   2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
56
   2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
57
   3,                      // SHFL
58
 
59
};
60
 
61
const OpClass Target::operationClass[] =
62
{
63
   // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
64
   OPCLASS_OTHER,
65
   OPCLASS_PSEUDO,
66
   OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
67
   // MOV; LOAD; STORE
68
   OPCLASS_MOVE,
69
   OPCLASS_LOAD,
70
   OPCLASS_STORE,
71
   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
72
   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
73
   OPCLASS_ARITH, OPCLASS_ARITH,
74
   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
75
   // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
76
   OPCLASS_CONVERT, OPCLASS_CONVERT,
77
   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
78
   OPCLASS_SHIFT, OPCLASS_SHIFT,
79
   // MAX, MIN
80
   OPCLASS_COMPARE, OPCLASS_COMPARE,
81
   // SAT, CEIL, FLOOR, TRUNC; CVT
82
   OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
83
   OPCLASS_CONVERT,
84
   // SET(AND,OR,XOR); SELP, SLCT
85
   OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
86
   OPCLASS_COMPARE, OPCLASS_COMPARE,
87
   // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
88
   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
89
   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
90
   OPCLASS_SFU, OPCLASS_SFU,
91
   // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
92
   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
93
   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
94
   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
95
   // DISCARD, EXIT
96
   OPCLASS_FLOW, OPCLASS_FLOW,
97
   // MEMBAR
98
   OPCLASS_CONTROL,
99
   // VFETCH, PFETCH, EXPORT
100
   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
101
   // LINTERP, PINTERP
102
   OPCLASS_SFU, OPCLASS_SFU,
103
   // EMIT, RESTART
104
   OPCLASS_CONTROL, OPCLASS_CONTROL,
105
   // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
106
   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
107
   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
108
   OPCLASS_TEXTURE, OPCLASS_TEXTURE,
109
   // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
110
   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
111
   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
112
   // SUBFM, SUCLAMP, SUEAU, MADSP
113
   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
114
   // TEXBAR
115
   OPCLASS_OTHER,
116
   // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
117
   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
118
   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
119
   // POPCNT, INSBF, EXTBF, BFIND; PERMT
120
   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
121
   OPCLASS_BITFIELD,
122
   // ATOM, BAR
123
   OPCLASS_ATOMIC, OPCLASS_CONTROL,
124
   // VADD, VAVG, VMIN, VMAX
125
   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
126
   // VSAD, VSET, VSHR, VSHL
127
   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
128
   // VSEL, CCTL
129
   OPCLASS_VECTOR, OPCLASS_CONTROL,
130
   // SHFL
131
   OPCLASS_OTHER,
132
   OPCLASS_PSEUDO // LAST
133
};
134
 
135
 
136
extern Target *getTargetGM107(unsigned int chipset);
137
extern Target *getTargetNVC0(unsigned int chipset);
138
extern Target *getTargetNV50(unsigned int chipset);
139
 
140
Target *Target::create(unsigned int chipset)
141
{
142
   STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1);
143
   STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1);
144
   switch (chipset & ~0xf) {
145
   case 0x110:
146
      return getTargetGM107(chipset);
147
   case 0xc0:
148
   case 0xd0:
149
   case 0xe0:
150
   case 0xf0:
151
   case 0x100:
152
      return getTargetNVC0(chipset);
153
   case 0x50:
154
   case 0x80:
155
   case 0x90:
156
   case 0xa0:
157
      return getTargetNV50(chipset);
158
   default:
159
      ERROR("unsupported target: NV%x\n", chipset);
160
      return 0;
161
   }
162
}
163
 
164
void Target::destroy(Target *targ)
165
{
166
   delete targ;
167
}
168
 
169
CodeEmitter::CodeEmitter(const Target *target) : targ(target)
170
{
171
}
172
 
173
void
174
CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
175
{
176
   code = reinterpret_cast(ptr);
177
   codeSize = 0;
178
   codeSizeLimit = size;
179
}
180
 
181
void
182
CodeEmitter::printBinary() const
183
{
184
   uint32_t *bin = code - codeSize / 4;
185
   INFO("program binary (%u bytes)", codeSize);
186
   for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
187
      if ((pos % 8) == 0)
188
         INFO("\n");
189
      INFO("%08x ", bin[pos]);
190
   }
191
   INFO("\n");
192
}
193
 
194
static inline uint32_t sizeToBundlesNVE4(uint32_t size)
195
{
196
   return (size + 55) / 56;
197
}
198
 
199
void
200
CodeEmitter::prepareEmission(Program *prog)
201
{
202
   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
203
        !fi.end(); fi.next()) {
204
      Function *func = reinterpret_cast(fi.get());
205
      func->binPos = prog->binSize;
206
      prepareEmission(func);
207
 
208
      // adjust sizes & positions for schedulding info:
209
      if (prog->getTarget()->hasSWSched) {
210
         uint32_t adjPos = func->binPos;
211
         BasicBlock *bb = NULL;
212
         for (int i = 0; i < func->bbCount; ++i) {
213
            bb = func->bbArray[i];
214
            int32_t adjSize = bb->binSize;
215
            if (adjPos % 64) {
216
               adjSize -= 64 - adjPos % 64;
217
               if (adjSize < 0)
218
                  adjSize = 0;
219
            }
220
            adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
221
            bb->binPos = adjPos;
222
            bb->binSize = adjSize;
223
            adjPos += adjSize;
224
         }
225
         if (bb)
226
            func->binSize = adjPos - func->binPos;
227
      }
228
 
229
      prog->binSize += func->binSize;
230
   }
231
}
232
 
233
void
234
CodeEmitter::prepareEmission(Function *func)
235
{
236
   func->bbCount = 0;
237
   func->bbArray = new BasicBlock * [func->cfg.getSize()];
238
 
239
   BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
240
 
241
   for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
242
      prepareEmission(BasicBlock::get(*it));
243
}
244
 
245
void
246
CodeEmitter::prepareEmission(BasicBlock *bb)
247
{
248
   Instruction *i, *next;
249
   Function *func = bb->getFunction();
250
   int j;
251
   unsigned int nShort;
252
 
253
   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
254
 
255
   for (; j >= 0; --j) {
256
      BasicBlock *in = func->bbArray[j];
257
      Instruction *exit = in->getExit();
258
 
259
      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
260
         in->binSize -= 8;
261
         func->binSize -= 8;
262
 
263
         for (++j; j < func->bbCount; ++j)
264
            func->bbArray[j]->binPos -= 8;
265
 
266
         in->remove(exit);
267
      }
268
      bb->binPos = in->binPos + in->binSize;
269
      if (in->binSize) // no more no-op branches to bb
270
         break;
271
   }
272
   func->bbArray[func->bbCount++] = bb;
273
 
274
   if (!bb->getExit())
275
      return;
276
 
277
   // determine encoding size, try to group short instructions
278
   nShort = 0;
279
   for (i = bb->getEntry(); i; i = next) {
280
      next = i->next;
281
 
282
      if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
283
         bb->remove(i);
284
         continue;
285
      }
286
 
287
      i->encSize = getMinEncodingSize(i);
288
      if (next && i->encSize < 8)
289
         ++nShort;
290
      else
291
      if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
292
         if (i->isCommutationLegal(i->next)) {
293
            bb->permuteAdjacent(i, next);
294
            next->encSize = 4;
295
            next = i;
296
            i = i->prev;
297
            ++nShort;
298
         } else
299
         if (i->isCommutationLegal(i->prev) && next->next) {
300
            bb->permuteAdjacent(i->prev, i);
301
            next->encSize = 4;
302
            next = next->next;
303
            bb->binSize += 4;
304
            ++nShort;
305
         } else {
306
            i->encSize = 8;
307
            i->prev->encSize = 8;
308
            bb->binSize += 4;
309
            nShort = 0;
310
         }
311
      } else {
312
         i->encSize = 8;
313
         if (nShort & 1) {
314
            i->prev->encSize = 8;
315
            bb->binSize += 4;
316
         }
317
         nShort = 0;
318
      }
319
      bb->binSize += i->encSize;
320
   }
321
 
322
   if (bb->getExit()->encSize == 4) {
323
      assert(nShort);
324
      bb->getExit()->encSize = 8;
325
      bb->binSize += 4;
326
 
327
      if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
328
         bb->binSize += 8;
329
         bb->getExit()->prev->encSize = 8;
330
      }
331
   }
332
   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
333
 
334
   func->binSize += bb->binSize;
335
}
336
 
337
void
338
Program::emitSymbolTable(struct nv50_ir_prog_info *info)
339
{
340
   unsigned int n = 0, nMax = allFuncs.getSize();
341
 
342
   info->bin.syms =
343
      (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
344
 
345
   for (ArrayList::Iterator fi = allFuncs.iterator();
346
        !fi.end();
347
        fi.next(), ++n) {
348
      Function *f = (Function *)fi.get();
349
      assert(n < nMax);
350
 
351
      info->bin.syms[n].label = f->getLabel();
352
      info->bin.syms[n].offset = f->binPos;
353
   }
354
 
355
   info->bin.numSyms = n;
356
}
357
 
358
bool
359
Program::emitBinary(struct nv50_ir_prog_info *info)
360
{
361
   CodeEmitter *emit = target->getCodeEmitter(progType);
362
 
363
   emit->prepareEmission(this);
364
 
365
   if (dbgFlags & NV50_IR_DEBUG_BASIC)
366
      this->print();
367
 
368
   if (!binSize) {
369
      code = NULL;
370
      return false;
371
   }
372
   code = reinterpret_cast(MALLOC(binSize));
373
   if (!code)
374
      return false;
375
   emit->setCodeLocation(code, binSize);
376
 
377
   for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
378
      Function *fn = reinterpret_cast(fi.get());
379
 
380
      assert(emit->getCodeSize() == fn->binPos);
381
 
382
      for (int b = 0; b < fn->bbCount; ++b) {
383
         for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
384
            emit->emitInstruction(i);
385
            if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
386
               info->io.fp64 = true;
387
         }
388
      }
389
   }
390
   info->bin.relocData = emit->getRelocInfo();
391
 
392
   emitSymbolTable(info);
393
 
394
   // the nvc0 driver will print the binary iself together with the header
395
   if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
396
      emit->printBinary();
397
 
398
   delete emit;
399
   return true;
400
}
401
 
402
#define RELOC_ALLOC_INCREMENT 8
403
 
404
bool
405
CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
406
                      int s)
407
{
408
   unsigned int n = relocInfo ? relocInfo->count : 0;
409
 
410
   if (!(n % RELOC_ALLOC_INCREMENT)) {
411
      size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
412
      relocInfo = reinterpret_cast(
413
         REALLOC(relocInfo, n ? size : 0,
414
                 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
415
      if (!relocInfo)
416
         return false;
417
      if (n == 0)
418
         memset(relocInfo, 0, sizeof(RelocInfo));
419
   }
420
   ++relocInfo->count;
421
 
422
   relocInfo->entry[n].data = data;
423
   relocInfo->entry[n].mask = m;
424
   relocInfo->entry[n].offset = codeSize + w * 4;
425
   relocInfo->entry[n].bitPos = s;
426
   relocInfo->entry[n].type = ty;
427
 
428
   return true;
429
}
430
 
431
void
432
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
433
{
434
   uint32_t value = 0;
435
 
436
   switch (type) {
437
   case TYPE_CODE: value = info->codePos; break;
438
   case TYPE_BUILTIN: value = info->libPos; break;
439
   case TYPE_DATA: value = info->dataPos; break;
440
   default:
441
      assert(0);
442
      break;
443
   }
444
   value += data;
445
   value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
446
 
447
   binary[offset / 4] &= ~mask;
448
   binary[offset / 4] |= value & mask;
449
}
450
 
451
} // namespace nv50_ir
452
 
453
 
454
#include "codegen/nv50_ir_driver.h"
455
 
456
extern "C" {
457
 
458
void
459
nv50_ir_relocate_code(void *relocData, uint32_t *code,
460
                      uint32_t codePos,
461
                      uint32_t libPos,
462
                      uint32_t dataPos)
463
{
464
   nv50_ir::RelocInfo *info = reinterpret_cast(relocData);
465
 
466
   info->codePos = codePos;
467
   info->libPos = libPos;
468
   info->dataPos = dataPos;
469
 
470
   for (unsigned int i = 0; i < info->count; ++i)
471
      info->entry[i].apply(code, info);
472
}
473
 
474
void
475
nv50_ir_get_target_library(uint32_t chipset,
476
                           const uint32_t **code, uint32_t *size)
477
{
478
   nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
479
   targ->getBuiltinCode(code, size);
480
   nv50_ir::Target::destroy(targ);
481
}
482
 
483
}