Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2011 Christoph Bumiller
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 */
22
 
23
#include "nv50/codegen/nv50_ir.h"
24
#include "nv50/codegen/nv50_ir_target.h"
25
 
26
namespace nv50_ir {
27
 
28
const uint8_t Target::operationSrcNr[OP_LAST + 1] =
29
{
30
   0, 0,                   // NOP, PHI
31
   0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32
   1, 1, 2,                // MOV, LOAD, STORE
33
   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34
   1, 1, 1,                // ABS, NEG, NOT
35
   2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
36
   2, 2, 1,                // MAX, MIN, SAT
37
   1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
38
   3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
39
   1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
40
   1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41
   0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
42
   0, 0, 0,                // PRERET,CONT,BREAK
43
   0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44
   1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45
   1, 1,                   // EMIT, RESTART
46
   1, 1, 1,                // TEX, TXB, TXL,
47
   1, 1, 1, 1, 1, 2,       // TXF, TXQ, TXD, TXG, TEXCSAA, TEXPREP
48
   1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
49
   3, 3, 3, 3,             // SUBFM, SUCLAMP, SUEAU, MADSP
50
   0,                      // TEXBAR
51
   1, 1,                   // DFDX, DFDY
52
   1, 2, 2, 0, 0,          // RDSV, WRSV, QUADOP, QUADON, QUADPOP
53
   2, 3, 2, 3,             // POPCNT, INSBF, EXTBF, PERMT
54
   2, 2,                   // ATOM, BAR
55
   2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
56
   2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
57
 
58
};
59
 
60
const OpClass Target::operationClass[OP_LAST + 1] =
61
{
62
   // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
63
   OPCLASS_OTHER,
64
   OPCLASS_PSEUDO,
65
   OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
66
   // MOV; LOAD; STORE
67
   OPCLASS_MOVE,
68
   OPCLASS_LOAD,
69
   OPCLASS_STORE,
70
   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
71
   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
72
   OPCLASS_ARITH, OPCLASS_ARITH,
73
   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
74
   // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
75
   OPCLASS_CONVERT, OPCLASS_CONVERT,
76
   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
77
   OPCLASS_SHIFT, OPCLASS_SHIFT,
78
   // MAX, MIN
79
   OPCLASS_COMPARE, OPCLASS_COMPARE,
80
   // SAT, CEIL, FLOOR, TRUNC; CVT
81
   OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
82
   OPCLASS_CONVERT,
83
   // SET(AND,OR,XOR); SELP, SLCT
84
   OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
85
   OPCLASS_COMPARE, OPCLASS_COMPARE,
86
   // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
87
   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
88
   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
89
   OPCLASS_SFU, OPCLASS_SFU,
90
   // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
91
   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
92
   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
93
   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
94
   // DISCARD, EXIT
95
   OPCLASS_FLOW, OPCLASS_FLOW,
96
   // MEMBAR
97
   OPCLASS_CONTROL,
98
   // VFETCH, PFETCH, EXPORT
99
   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
100
   // LINTERP, PINTERP
101
   OPCLASS_SFU, OPCLASS_SFU,
102
   // EMIT, RESTART
103
   OPCLASS_CONTROL, OPCLASS_CONTROL,
104
   // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA; TEXPREP
105
   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
106
   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
107
   OPCLASS_TEXTURE,
108
   // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
109
   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
110
   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
111
   // SUBFM, SUCLAMP, SUEAU, MADSP
112
   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
113
   // TEXBAR
114
   OPCLASS_OTHER,
115
   // DFDX, DFDY, RDSV, WRSV; QUADOP, QUADON, QUADPOP
116
   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
117
   OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
118
   // POPCNT, INSBF, EXTBF, PERMT
119
   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
120
   // ATOM, BAR
121
   OPCLASS_ATOMIC, OPCLASS_CONTROL,
122
   // VADD, VAVG, VMIN, VMAX
123
   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
124
   // VSAD, VSET, VSHR, VSHL
125
   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
126
   // VSEL, CCTL
127
   OPCLASS_VECTOR, OPCLASS_CONTROL,
128
   OPCLASS_PSEUDO // LAST
129
};
130
 
131
 
132
extern Target *getTargetNVC0(unsigned int chipset);
133
extern Target *getTargetNV50(unsigned int chipset);
134
 
135
Target *Target::create(unsigned int chipset)
136
{
137
   switch (chipset & 0xf0) {
138
   case 0xc0:
139
   case 0xd0:
140
   case 0xe0:
141
   case NVISA_GK110_CHIPSET:
142
      return getTargetNVC0(chipset);
143
   case 0x50:
144
   case 0x80:
145
   case 0x90:
146
   case 0xa0:
147
      return getTargetNV50(chipset);
148
   default:
149
      ERROR("unsupported target: NV%x\n", chipset);
150
      return 0;
151
   }
152
}
153
 
154
void Target::destroy(Target *targ)
155
{
156
   delete targ;
157
}
158
 
159
CodeEmitter::CodeEmitter(const Target *target) : targ(target)
160
{
161
}
162
 
163
void
164
CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
165
{
166
   code = reinterpret_cast(ptr);
167
   codeSize = 0;
168
   codeSizeLimit = size;
169
}
170
 
171
void
172
CodeEmitter::printBinary() const
173
{
174
   uint32_t *bin = code - codeSize / 4;
175
   INFO("program binary (%u bytes)", codeSize);
176
   for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
177
      if ((pos % 8) == 0)
178
         INFO("\n");
179
      INFO("%08x ", bin[pos]);
180
   }
181
   INFO("\n");
182
}
183
 
184
static inline uint32_t sizeToBundlesNVE4(uint32_t size)
185
{
186
   return (size + 55) / 56;
187
}
188
 
189
void
190
CodeEmitter::prepareEmission(Program *prog)
191
{
192
   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
193
        !fi.end(); fi.next()) {
194
      Function *func = reinterpret_cast(fi.get());
195
      func->binPos = prog->binSize;
196
      prepareEmission(func);
197
 
198
      // adjust sizes & positions for schedulding info:
199
      if (prog->getTarget()->hasSWSched) {
200
         uint32_t adjPos = func->binPos;
201
         BasicBlock *bb = NULL;
202
         for (int i = 0; i < func->bbCount; ++i) {
203
            bb = func->bbArray[i];
204
            int32_t adjSize = bb->binSize;
205
            if (adjPos % 64) {
206
               adjSize -= 64 - adjPos % 64;
207
               if (adjSize < 0)
208
                  adjSize = 0;
209
            }
210
            adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
211
            bb->binPos = adjPos;
212
            bb->binSize = adjSize;
213
            adjPos += adjSize;
214
         }
215
         if (bb)
216
            func->binSize = adjPos - func->binPos;
217
      }
218
 
219
      prog->binSize += func->binSize;
220
   }
221
}
222
 
223
void
224
CodeEmitter::prepareEmission(Function *func)
225
{
226
   func->bbCount = 0;
227
   func->bbArray = new BasicBlock * [func->cfg.getSize()];
228
 
229
   BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
230
 
231
   for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
232
      prepareEmission(BasicBlock::get(*it));
233
}
234
 
235
void
236
CodeEmitter::prepareEmission(BasicBlock *bb)
237
{
238
   Instruction *i, *next;
239
   Function *func = bb->getFunction();
240
   int j;
241
   unsigned int nShort;
242
 
243
   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
244
 
245
   for (; j >= 0; --j) {
246
      BasicBlock *in = func->bbArray[j];
247
      Instruction *exit = in->getExit();
248
 
249
      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
250
         in->binSize -= 8;
251
         func->binSize -= 8;
252
 
253
         for (++j; j < func->bbCount; ++j)
254
            func->bbArray[j]->binPos -= 8;
255
 
256
         in->remove(exit);
257
      }
258
      bb->binPos = in->binPos + in->binSize;
259
      if (in->binSize) // no more no-op branches to bb
260
         break;
261
   }
262
   func->bbArray[func->bbCount++] = bb;
263
 
264
   if (!bb->getExit())
265
      return;
266
 
267
   // determine encoding size, try to group short instructions
268
   nShort = 0;
269
   for (i = bb->getEntry(); i; i = next) {
270
      next = i->next;
271
 
272
      if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
273
         bb->remove(i);
274
         continue;
275
      }
276
 
277
      i->encSize = getMinEncodingSize(i);
278
      if (next && i->encSize < 8)
279
         ++nShort;
280
      else
281
      if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
282
         if (i->isCommutationLegal(i->next)) {
283
            bb->permuteAdjacent(i, next);
284
            next->encSize = 4;
285
            next = i;
286
            i = i->prev;
287
            ++nShort;
288
         } else
289
         if (i->isCommutationLegal(i->prev) && next->next) {
290
            bb->permuteAdjacent(i->prev, i);
291
            next->encSize = 4;
292
            next = next->next;
293
            bb->binSize += 4;
294
            ++nShort;
295
         } else {
296
            i->encSize = 8;
297
            i->prev->encSize = 8;
298
            bb->binSize += 4;
299
            nShort = 0;
300
         }
301
      } else {
302
         i->encSize = 8;
303
         if (nShort & 1) {
304
            i->prev->encSize = 8;
305
            bb->binSize += 4;
306
         }
307
         nShort = 0;
308
      }
309
      bb->binSize += i->encSize;
310
   }
311
 
312
   if (bb->getExit()->encSize == 4) {
313
      assert(nShort);
314
      bb->getExit()->encSize = 8;
315
      bb->binSize += 4;
316
 
317
      if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
318
         bb->binSize += 8;
319
         bb->getExit()->prev->encSize = 8;
320
      }
321
   }
322
   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
323
 
324
   func->binSize += bb->binSize;
325
}
326
 
327
void
328
Program::emitSymbolTable(struct nv50_ir_prog_info *info)
329
{
330
   unsigned int n = 0, nMax = allFuncs.getSize();
331
 
332
   info->bin.syms =
333
      (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
334
 
335
   for (ArrayList::Iterator fi = allFuncs.iterator();
336
        !fi.end();
337
        fi.next(), ++n) {
338
      Function *f = (Function *)fi.get();
339
      assert(n < nMax);
340
 
341
      info->bin.syms[n].label = f->getLabel();
342
      info->bin.syms[n].offset = f->binPos;
343
   }
344
 
345
   info->bin.numSyms = n;
346
}
347
 
348
bool
349
Program::emitBinary(struct nv50_ir_prog_info *info)
350
{
351
   CodeEmitter *emit = target->getCodeEmitter(progType);
352
 
353
   emit->prepareEmission(this);
354
 
355
   if (dbgFlags & NV50_IR_DEBUG_BASIC)
356
      this->print();
357
 
358
   if (!binSize) {
359
      code = NULL;
360
      return false;
361
   }
362
   code = reinterpret_cast(MALLOC(binSize));
363
   if (!code)
364
      return false;
365
   emit->setCodeLocation(code, binSize);
366
 
367
   for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
368
      Function *fn = reinterpret_cast(fi.get());
369
 
370
      assert(emit->getCodeSize() == fn->binPos);
371
 
372
      for (int b = 0; b < fn->bbCount; ++b)
373
         for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
374
            emit->emitInstruction(i);
375
   }
376
   info->bin.relocData = emit->getRelocInfo();
377
 
378
   emitSymbolTable(info);
379
 
380
   // the nvc0 driver will print the binary iself together with the header
381
   if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
382
      emit->printBinary();
383
 
384
   delete emit;
385
   return true;
386
}
387
 
388
#define RELOC_ALLOC_INCREMENT 8
389
 
390
bool
391
CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
392
                      int s)
393
{
394
   unsigned int n = relocInfo ? relocInfo->count : 0;
395
 
396
   if (!(n % RELOC_ALLOC_INCREMENT)) {
397
      size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
398
      relocInfo = reinterpret_cast(
399
         REALLOC(relocInfo, n ? size : 0,
400
                 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
401
      if (!relocInfo)
402
         return false;
403
      if (n == 0)
404
         memset(relocInfo, 0, sizeof(RelocInfo));
405
   }
406
   ++relocInfo->count;
407
 
408
   relocInfo->entry[n].data = data;
409
   relocInfo->entry[n].mask = m;
410
   relocInfo->entry[n].offset = codeSize + w * 4;
411
   relocInfo->entry[n].bitPos = s;
412
   relocInfo->entry[n].type = ty;
413
 
414
   return true;
415
}
416
 
417
void
418
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
419
{
420
   uint32_t value = 0;
421
 
422
   switch (type) {
423
   case TYPE_CODE: value = info->codePos; break;
424
   case TYPE_BUILTIN: value = info->libPos; break;
425
   case TYPE_DATA: value = info->dataPos; break;
426
   default:
427
      assert(0);
428
      break;
429
   }
430
   value += data;
431
   value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
432
 
433
   binary[offset / 4] &= ~mask;
434
   binary[offset / 4] |= value & mask;
435
}
436
 
437
} // namespace nv50_ir
438
 
439
 
440
#include "nv50/codegen/nv50_ir_driver.h"
441
 
442
extern "C" {
443
 
444
void
445
nv50_ir_relocate_code(void *relocData, uint32_t *code,
446
                      uint32_t codePos,
447
                      uint32_t libPos,
448
                      uint32_t dataPos)
449
{
450
   nv50_ir::RelocInfo *info = reinterpret_cast(relocData);
451
 
452
   info->codePos = codePos;
453
   info->libPos = libPos;
454
   info->dataPos = dataPos;
455
 
456
   for (unsigned int i = 0; i < info->count; ++i)
457
      info->entry[i].apply(code, info);
458
}
459
 
460
void
461
nv50_ir_get_target_library(uint32_t chipset,
462
                           const uint32_t **code, uint32_t *size)
463
{
464
   nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
465
   targ->getBuiltinCode(code, size);
466
   nv50_ir::Target::destroy(targ);
467
}
468
 
469
}