Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright 2011 Christoph Bumiller
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 */
22
 
23
#include "codegen/nv50_ir_target_nvc0.h"
24
 
25
namespace nv50_ir {
26
 
27
// Argh, all these assertions ...
28
 
29
class CodeEmitterNVC0 : public CodeEmitter
30
{
31
public:
32
   CodeEmitterNVC0(const TargetNVC0 *);
33
 
34
   virtual bool emitInstruction(Instruction *);
35
   virtual uint32_t getMinEncodingSize(const Instruction *) const;
36
   virtual void prepareEmission(Function *);
37
 
38
   inline void setProgramType(Program::Type pType) { progType = pType; }
39
 
40
private:
41
   const TargetNVC0 *targNVC0;
42
 
43
   Program::Type progType;
44
 
45
   const bool writeIssueDelays;
46
 
47
private:
48
   void emitForm_A(const Instruction *, uint64_t);
49
   void emitForm_B(const Instruction *, uint64_t);
50
   void emitForm_S(const Instruction *, uint32_t, bool pred);
51
 
52
   void emitPredicate(const Instruction *);
53
 
54
   void setAddress16(const ValueRef&);
55
   void setAddress24(const ValueRef&);
56
   void setAddressByFile(const ValueRef&);
57
   void setImmediate(const Instruction *, const int s); // needs op already set
58
   void setImmediateS8(const ValueRef&);
59
   void setSUConst16(const Instruction *, const int s);
60
   void setSUPred(const Instruction *, const int s);
61
 
62
   void emitCondCode(CondCode cc, int pos);
63
   void emitInterpMode(const Instruction *);
64
   void emitLoadStoreType(DataType ty);
65
   void emitSUGType(DataType);
66
   void emitCachingMode(CacheMode c);
67
 
68
   void emitShortSrc2(const ValueRef&);
69
 
70
   inline uint8_t getSRegEncoding(const ValueRef&);
71
 
72
   void roundMode_A(const Instruction *);
73
   void roundMode_C(const Instruction *);
74
   void roundMode_CS(const Instruction *);
75
 
76
   void emitNegAbs12(const Instruction *);
77
 
78
   void emitNOP(const Instruction *);
79
 
80
   void emitLOAD(const Instruction *);
81
   void emitSTORE(const Instruction *);
82
   void emitMOV(const Instruction *);
83
   void emitATOM(const Instruction *);
84
   void emitMEMBAR(const Instruction *);
85
   void emitCCTL(const Instruction *);
86
 
87
   void emitINTERP(const Instruction *);
88
   void emitPFETCH(const Instruction *);
89
   void emitVFETCH(const Instruction *);
90
   void emitEXPORT(const Instruction *);
91
   void emitOUT(const Instruction *);
92
 
93
   void emitUADD(const Instruction *);
94
   void emitFADD(const Instruction *);
95
   void emitDADD(const Instruction *);
96
   void emitUMUL(const Instruction *);
97
   void emitFMUL(const Instruction *);
98
   void emitDMUL(const Instruction *);
99
   void emitIMAD(const Instruction *);
100
   void emitISAD(const Instruction *);
101
   void emitFMAD(const Instruction *);
102
   void emitDMAD(const Instruction *);
103
   void emitMADSP(const Instruction *);
104
 
105
   void emitNOT(Instruction *);
106
   void emitLogicOp(const Instruction *, uint8_t subOp);
107
   void emitPOPC(const Instruction *);
108
   void emitINSBF(const Instruction *);
109
   void emitEXTBF(const Instruction *);
110
   void emitBFIND(const Instruction *);
111
   void emitPERMT(const Instruction *);
112
   void emitShift(const Instruction *);
113
 
114
   void emitSFnOp(const Instruction *, uint8_t subOp);
115
 
116
   void emitCVT(Instruction *);
117
   void emitMINMAX(const Instruction *);
118
   void emitPreOp(const Instruction *);
119
 
120
   void emitSET(const CmpInstruction *);
121
   void emitSLCT(const CmpInstruction *);
122
   void emitSELP(const Instruction *);
123
 
124
   void emitTEXBAR(const Instruction *);
125
   void emitTEX(const TexInstruction *);
126
   void emitTEXCSAA(const TexInstruction *);
127
   void emitTXQ(const TexInstruction *);
128
 
129
   void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
130
 
131
   void emitFlow(const Instruction *);
132
   void emitBAR(const Instruction *);
133
 
134
   void emitSUCLAMPMode(uint16_t);
135
   void emitSUCalc(Instruction *);
136
   void emitSULDGB(const TexInstruction *);
137
   void emitSUSTGx(const TexInstruction *);
138
 
139
   void emitVSHL(const Instruction *);
140
   void emitVectorSubOp(const Instruction *);
141
 
142
   void emitPIXLD(const Instruction *);
143
 
144
   inline void defId(const ValueDef&, const int pos);
145
   inline void defId(const Instruction *, int d, const int pos);
146
   inline void srcId(const ValueRef&, const int pos);
147
   inline void srcId(const ValueRef *, const int pos);
148
   inline void srcId(const Instruction *, int s, const int pos);
149
   inline void srcAddr32(const ValueRef&, int pos, int shr);
150
 
151
   inline bool isLIMM(const ValueRef&, DataType ty);
152
};
153
 
154
// for better visibility
155
#define HEX64(h, l) 0x##h##l##ULL
156
 
157
#define SDATA(a) ((a).rep()->reg.data)
158
#define DDATA(a) ((a).rep()->reg.data)
159
 
160
void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
161
{
162
   code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
163
}
164
 
165
void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
166
{
167
   code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
168
}
169
 
170
void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
171
{
172
   int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
173
   code[pos / 32] |= r << (pos % 32);
174
}
175
 
176
void
177
CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
178
{
179
   const uint32_t offset = SDATA(src).offset >> shr;
180
 
181
   code[pos / 32] |= offset << (pos % 32);
182
   if (pos && (pos < 32))
183
      code[1] |= offset >> (32 - pos);
184
}
185
 
186
void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
187
{
188
   code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
189
}
190
 
191
void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
192
{
193
   int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
194
   code[pos / 32] |= r << (pos % 32);
195
}
196
 
197
bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
198
{
199
   const ImmediateValue *imm = ref.get()->asImm();
200
 
201
   return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
202
}
203
 
204
void
205
CodeEmitterNVC0::roundMode_A(const Instruction *insn)
206
{
207
   switch (insn->rnd) {
208
   case ROUND_M: code[1] |= 1 << 23; break;
209
   case ROUND_P: code[1] |= 2 << 23; break;
210
   case ROUND_Z: code[1] |= 3 << 23; break;
211
   default:
212
      assert(insn->rnd == ROUND_N);
213
      break;
214
   }
215
}
216
 
217
void
218
CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
219
{
220
   if (i->src(1).mod.abs()) code[0] |= 1 << 6;
221
   if (i->src(0).mod.abs()) code[0] |= 1 << 7;
222
   if (i->src(1).mod.neg()) code[0] |= 1 << 8;
223
   if (i->src(0).mod.neg()) code[0] |= 1 << 9;
224
}
225
 
226
void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
227
{
228
   uint8_t val;
229
 
230
   switch (cc) {
231
   case CC_LT:  val = 0x1; break;
232
   case CC_LTU: val = 0x9; break;
233
   case CC_EQ:  val = 0x2; break;
234
   case CC_EQU: val = 0xa; break;
235
   case CC_LE:  val = 0x3; break;
236
   case CC_LEU: val = 0xb; break;
237
   case CC_GT:  val = 0x4; break;
238
   case CC_GTU: val = 0xc; break;
239
   case CC_NE:  val = 0x5; break;
240
   case CC_NEU: val = 0xd; break;
241
   case CC_GE:  val = 0x6; break;
242
   case CC_GEU: val = 0xe; break;
243
   case CC_TR:  val = 0xf; break;
244
   case CC_FL:  val = 0x0; break;
245
 
246
   case CC_A:  val = 0x14; break;
247
   case CC_NA: val = 0x13; break;
248
   case CC_S:  val = 0x15; break;
249
   case CC_NS: val = 0x12; break;
250
   case CC_C:  val = 0x16; break;
251
   case CC_NC: val = 0x11; break;
252
   case CC_O:  val = 0x17; break;
253
   case CC_NO: val = 0x10; break;
254
 
255
   default:
256
      val = 0;
257
      assert(!"invalid condition code");
258
      break;
259
   }
260
   code[pos / 32] |= val << (pos % 32);
261
}
262
 
263
void
264
CodeEmitterNVC0::emitPredicate(const Instruction *i)
265
{
266
   if (i->predSrc >= 0) {
267
      assert(i->getPredicate()->reg.file == FILE_PREDICATE);
268
      srcId(i->src(i->predSrc), 10);
269
      if (i->cc == CC_NOT_P)
270
         code[0] |= 0x2000; // negate
271
   } else {
272
      code[0] |= 0x1c00;
273
   }
274
}
275
 
276
void
277
CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
278
{
279
   switch (src.getFile()) {
280
   case FILE_MEMORY_GLOBAL:
281
      srcAddr32(src, 26, 0);
282
      break;
283
   case FILE_MEMORY_LOCAL:
284
   case FILE_MEMORY_SHARED:
285
      setAddress24(src);
286
      break;
287
   default:
288
      assert(src.getFile() == FILE_MEMORY_CONST);
289
      setAddress16(src);
290
      break;
291
   }
292
}
293
 
294
void
295
CodeEmitterNVC0::setAddress16(const ValueRef& src)
296
{
297
   Symbol *sym = src.get()->asSym();
298
 
299
   assert(sym);
300
 
301
   code[0] |= (sym->reg.data.offset & 0x003f) << 26;
302
   code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
303
}
304
 
305
void
306
CodeEmitterNVC0::setAddress24(const ValueRef& src)
307
{
308
   Symbol *sym = src.get()->asSym();
309
 
310
   assert(sym);
311
 
312
   code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
313
   code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
314
}
315
 
316
void
317
CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
318
{
319
   const ImmediateValue *imm = i->src(s).get()->asImm();
320
   uint32_t u32;
321
 
322
   assert(imm);
323
   u32 = imm->reg.data.u32;
324
 
325
   if ((code[0] & 0xf) == 0x2) {
326
      // LIMM
327
      code[0] |= (u32 & 0x3f) << 26;
328
      code[1] |= u32 >> 6;
329
   } else
330
   if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
331
      // integer immediate
332
      assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
333
      assert(!(code[1] & 0xc000));
334
      u32 &= 0xfffff;
335
      code[0] |= (u32 & 0x3f) << 26;
336
      code[1] |= 0xc000 | (u32 >> 6);
337
   } else {
338
      // float immediate
339
      assert(!(u32 & 0x00000fff));
340
      assert(!(code[1] & 0xc000));
341
      code[0] |= ((u32 >> 12) & 0x3f) << 26;
342
      code[1] |= 0xc000 | (u32 >> 18);
343
   }
344
}
345
 
346
void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
347
{
348
   const ImmediateValue *imm = ref.get()->asImm();
349
 
350
   int8_t s8 = static_cast(imm->reg.data.s32);
351
 
352
   assert(s8 == imm->reg.data.s32);
353
 
354
   code[0] |= (s8 & 0x3f) << 26;
355
   code[0] |= (s8 >> 6) << 8;
356
}
357
 
358
void
359
CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
360
{
361
   code[0] = opc;
362
   code[1] = opc >> 32;
363
 
364
   emitPredicate(i);
365
 
366
   defId(i->def(0), 14);
367
 
368
   int s1 = 26;
369
   if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
370
      s1 = 49;
371
 
372
   for (int s = 0; s < 3 && i->srcExists(s); ++s) {
373
      switch (i->getSrc(s)->reg.file) {
374
      case FILE_MEMORY_CONST:
375
         assert(!(code[1] & 0xc000));
376
         code[1] |= (s == 2) ? 0x8000 : 0x4000;
377
         code[1] |= i->getSrc(s)->reg.fileIndex << 10;
378
         setAddress16(i->src(s));
379
         break;
380
      case FILE_IMMEDIATE:
381
         assert(s == 1 ||
382
                i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
383
         assert(!(code[1] & 0xc000));
384
         setImmediate(i, s);
385
         break;
386
      case FILE_GPR:
387
         if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
388
            break;
389
         srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
390
         break;
391
      default:
392
         // ignore here, can be predicate or flags, but must not be address
393
         break;
394
      }
395
   }
396
}
397
 
398
void
399
CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
400
{
401
   code[0] = opc;
402
   code[1] = opc >> 32;
403
 
404
   emitPredicate(i);
405
 
406
   defId(i->def(0), 14);
407
 
408
   switch (i->src(0).getFile()) {
409
   case FILE_MEMORY_CONST:
410
      assert(!(code[1] & 0xc000));
411
      code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
412
      setAddress16(i->src(0));
413
      break;
414
   case FILE_IMMEDIATE:
415
      assert(!(code[1] & 0xc000));
416
      setImmediate(i, 0);
417
      break;
418
   case FILE_GPR:
419
      srcId(i->src(0), 26);
420
      break;
421
   default:
422
      // ignore here, can be predicate or flags, but must not be address
423
      break;
424
   }
425
}
426
 
427
void
428
CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
429
{
430
   code[0] = opc;
431
 
432
   int ss2a = 0;
433
   if (opc == 0x0d || opc == 0x0e)
434
      ss2a = 2;
435
 
436
   defId(i->def(0), 14);
437
   srcId(i->src(0), 20);
438
 
439
   assert(pred || (i->predSrc < 0));
440
   if (pred)
441
      emitPredicate(i);
442
 
443
   for (int s = 1; s < 3 && i->srcExists(s); ++s) {
444
      if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
445
         assert(!(code[0] & (0x300 >> ss2a)));
446
         switch (i->src(s).get()->reg.fileIndex) {
447
         case 0:  code[0] |= 0x100 >> ss2a; break;
448
         case 1:  code[0] |= 0x200 >> ss2a; break;
449
         case 16: code[0] |= 0x300 >> ss2a; break;
450
         default:
451
            ERROR("invalid c[] space for short form\n");
452
            break;
453
         }
454
         if (s == 1)
455
            code[0] |= i->getSrc(s)->reg.data.offset << 24;
456
         else
457
            code[0] |= i->getSrc(s)->reg.data.offset << 6;
458
      } else
459
      if (i->src(s).getFile() == FILE_IMMEDIATE) {
460
         assert(s == 1);
461
         setImmediateS8(i->src(s));
462
      } else
463
      if (i->src(s).getFile() == FILE_GPR) {
464
         srcId(i->src(s), (s == 1) ? 26 : 8);
465
      }
466
   }
467
}
468
 
469
void
470
CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
471
{
472
   if (src.getFile() == FILE_MEMORY_CONST) {
473
      switch (src.get()->reg.fileIndex) {
474
      case 0:  code[0] |= 0x100; break;
475
      case 1:  code[0] |= 0x200; break;
476
      case 16: code[0] |= 0x300; break;
477
      default:
478
         assert(!"unsupported file index for short op");
479
         break;
480
      }
481
      srcAddr32(src, 20, 2);
482
   } else {
483
      srcId(src, 20);
484
      assert(src.getFile() == FILE_GPR);
485
   }
486
}
487
 
488
void
489
CodeEmitterNVC0::emitNOP(const Instruction *i)
490
{
491
   code[0] = 0x000001e4;
492
   code[1] = 0x40000000;
493
   emitPredicate(i);
494
}
495
 
496
void
497
CodeEmitterNVC0::emitFMAD(const Instruction *i)
498
{
499
   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
500
 
501
   if (i->encSize == 8) {
502
      if (isLIMM(i->src(1), TYPE_F32)) {
503
         emitForm_A(i, HEX64(20000000, 00000002));
504
      } else {
505
         emitForm_A(i, HEX64(30000000, 00000000));
506
 
507
         if (i->src(2).mod.neg())
508
            code[0] |= 1 << 8;
509
      }
510
      roundMode_A(i);
511
 
512
      if (neg1)
513
         code[0] |= 1 << 9;
514
 
515
      if (i->saturate)
516
         code[0] |= 1 << 5;
517
      if (i->ftz)
518
         code[0] |= 1 << 6;
519
   } else {
520
      assert(!i->saturate && !i->src(2).mod.neg());
521
      emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
522
                 false);
523
      if (neg1)
524
         code[0] |= 1 << 4;
525
   }
526
}
527
 
528
void
529
CodeEmitterNVC0::emitDMAD(const Instruction *i)
530
{
531
   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
532
 
533
   emitForm_A(i, HEX64(20000000, 00000001));
534
 
535
   if (i->src(2).mod.neg())
536
      code[0] |= 1 << 8;
537
 
538
   roundMode_A(i);
539
 
540
   if (neg1)
541
      code[0] |= 1 << 9;
542
 
543
   assert(!i->saturate);
544
   assert(!i->ftz);
545
}
546
 
547
void
548
CodeEmitterNVC0::emitFMUL(const Instruction *i)
549
{
550
   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
551
 
552
   assert(i->postFactor >= -3 && i->postFactor <= 3);
553
 
554
   if (i->encSize == 8) {
555
      if (isLIMM(i->src(1), TYPE_F32)) {
556
         assert(i->postFactor == 0); // constant folded, hopefully
557
         emitForm_A(i, HEX64(30000000, 00000002));
558
      } else {
559
         emitForm_A(i, HEX64(58000000, 00000000));
560
         roundMode_A(i);
561
         code[1] |= ((i->postFactor > 0) ?
562
                     (7 - i->postFactor) : (0 - i->postFactor)) << 17;
563
      }
564
      if (neg)
565
         code[1] ^= 1 << 25; // aliases with LIMM sign bit
566
 
567
      if (i->saturate)
568
         code[0] |= 1 << 5;
569
 
570
      if (i->dnz)
571
         code[0] |= 1 << 7;
572
      else
573
      if (i->ftz)
574
         code[0] |= 1 << 6;
575
   } else {
576
      assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
577
      emitForm_S(i, 0xa8, true);
578
   }
579
}
580
 
581
void
582
CodeEmitterNVC0::emitDMUL(const Instruction *i)
583
{
584
   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
585
 
586
   emitForm_A(i, HEX64(50000000, 00000001));
587
   roundMode_A(i);
588
 
589
   if (neg)
590
      code[0] |= 1 << 9;
591
 
592
   assert(!i->saturate);
593
   assert(!i->ftz);
594
   assert(!i->dnz);
595
   assert(!i->postFactor);
596
}
597
 
598
void
599
CodeEmitterNVC0::emitUMUL(const Instruction *i)
600
{
601
   if (i->encSize == 8) {
602
      if (i->src(1).getFile() == FILE_IMMEDIATE) {
603
         emitForm_A(i, HEX64(10000000, 00000002));
604
      } else {
605
         emitForm_A(i, HEX64(50000000, 00000003));
606
      }
607
      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
608
         code[0] |= 1 << 6;
609
      if (i->sType == TYPE_S32)
610
         code[0] |= 1 << 5;
611
      if (i->dType == TYPE_S32)
612
         code[0] |= 1 << 7;
613
   } else {
614
      emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
615
 
616
      if (i->sType == TYPE_S32)
617
         code[0] |= 1 << 6;
618
   }
619
}
620
 
621
void
622
CodeEmitterNVC0::emitFADD(const Instruction *i)
623
{
624
   if (i->encSize == 8) {
625
      if (isLIMM(i->src(1), TYPE_F32)) {
626
         assert(!i->saturate);
627
         emitForm_A(i, HEX64(28000000, 00000002));
628
 
629
         code[0] |= i->src(0).mod.abs() << 7;
630
         code[0] |= i->src(0).mod.neg() << 9;
631
 
632
         if (i->src(1).mod.abs())
633
            code[1] &= 0xfdffffff;
634
         if ((i->op == OP_SUB) != static_cast(i->src(1).mod.neg()))
635
            code[1] ^= 0x02000000;
636
      } else {
637
         emitForm_A(i, HEX64(50000000, 00000000));
638
 
639
         roundMode_A(i);
640
         if (i->saturate)
641
            code[1] |= 1 << 17;
642
 
643
         emitNegAbs12(i);
644
         if (i->op == OP_SUB) code[0] ^= 1 << 8;
645
      }
646
      if (i->ftz)
647
         code[0] |= 1 << 5;
648
   } else {
649
      assert(!i->saturate && i->op != OP_SUB &&
650
             !i->src(0).mod.abs() &&
651
             !i->src(1).mod.neg() && !i->src(1).mod.abs());
652
 
653
      emitForm_S(i, 0x49, true);
654
 
655
      if (i->src(0).mod.neg())
656
         code[0] |= 1 << 7;
657
   }
658
}
659
 
660
void
661
CodeEmitterNVC0::emitDADD(const Instruction *i)
662
{
663
   assert(i->encSize == 8);
664
   emitForm_A(i, HEX64(48000000, 00000001));
665
   roundMode_A(i);
666
   assert(!i->saturate);
667
   assert(!i->ftz);
668
   emitNegAbs12(i);
669
   if (i->op == OP_SUB)
670
      code[0] ^= 1 << 8;
671
}
672
 
673
void
674
CodeEmitterNVC0::emitUADD(const Instruction *i)
675
{
676
   uint32_t addOp = 0;
677
 
678
   assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
679
   assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
680
 
681
   if (i->src(0).mod.neg())
682
      addOp |= 0x200;
683
   if (i->src(1).mod.neg())
684
      addOp |= 0x100;
685
   if (i->op == OP_SUB) {
686
      addOp ^= 0x100;
687
      assert(addOp != 0x300); // would be add-plus-one
688
   }
689
 
690
   if (i->encSize == 8) {
691
      if (isLIMM(i->src(1), TYPE_U32)) {
692
         emitForm_A(i, HEX64(08000000, 00000002));
693
         if (i->defExists(1))
694
            code[1] |= 1 << 26; // write carry
695
      } else {
696
         emitForm_A(i, HEX64(48000000, 00000003));
697
         if (i->defExists(1))
698
            code[1] |= 1 << 16; // write carry
699
      }
700
      code[0] |= addOp;
701
 
702
      if (i->saturate)
703
         code[0] |= 1 << 5;
704
      if (i->flagsSrc >= 0) // add carry
705
         code[0] |= 1 << 6;
706
   } else {
707
      assert(!(addOp & 0x100));
708
      emitForm_S(i, (addOp >> 3) |
709
                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
710
   }
711
}
712
 
713
// TODO: shl-add
714
void
715
CodeEmitterNVC0::emitIMAD(const Instruction *i)
716
{
717
   assert(i->encSize == 8);
718
   emitForm_A(i, HEX64(20000000, 00000003));
719
 
720
   if (isSignedType(i->dType))
721
      code[0] |= 1 << 7;
722
   if (isSignedType(i->sType))
723
      code[0] |= 1 << 5;
724
 
725
   code[1] |= i->saturate << 24;
726
 
727
   if (i->flagsDef >= 0) code[1] |= 1 << 16;
728
   if (i->flagsSrc >= 0) code[1] |= 1 << 23;
729
 
730
   if (i->src(2).mod.neg()) code[0] |= 0x10;
731
   if (i->src(1).mod.neg() ^
732
       i->src(0).mod.neg()) code[0] |= 0x20;
733
 
734
   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
735
      code[0] |= 1 << 6;
736
}
737
 
738
void
739
CodeEmitterNVC0::emitMADSP(const Instruction *i)
740
{
741
   assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
742
 
743
   emitForm_A(i, HEX64(00000000, 00000003));
744
 
745
   if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
746
      code[1] |= 0x01800000;
747
   } else {
748
      code[0] |= (i->subOp & 0x00f) << 7;
749
      code[0] |= (i->subOp & 0x0f0) << 1;
750
      code[0] |= (i->subOp & 0x100) >> 3;
751
      code[0] |= (i->subOp & 0x200) >> 2;
752
      code[1] |= (i->subOp & 0xc00) << 13;
753
   }
754
 
755
   if (i->flagsDef >= 0)
756
      code[1] |= 1 << 16;
757
}
758
 
759
void
760
CodeEmitterNVC0::emitISAD(const Instruction *i)
761
{
762
   assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
763
   assert(i->encSize == 8);
764
 
765
   emitForm_A(i, HEX64(38000000, 00000003));
766
 
767
   if (i->dType == TYPE_S32)
768
      code[0] |= 1 << 5;
769
}
770
 
771
void
772
CodeEmitterNVC0::emitNOT(Instruction *i)
773
{
774
   assert(i->encSize == 8);
775
   i->setSrc(1, i->src(0));
776
   emitForm_A(i, HEX64(68000000, 000001c3));
777
}
778
 
779
void
780
CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
781
{
782
   if (i->def(0).getFile() == FILE_PREDICATE) {
783
      code[0] = 0x00000004 | (subOp << 30);
784
      code[1] = 0x0c000000;
785
 
786
      emitPredicate(i);
787
 
788
      defId(i->def(0), 17);
789
      srcId(i->src(0), 20);
790
      if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
791
      srcId(i->src(1), 26);
792
      if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
793
 
794
      if (i->defExists(1)) {
795
         defId(i->def(1), 14);
796
      } else {
797
         code[0] |= 7 << 14;
798
      }
799
      // (a OP b) OP c
800
      if (i->predSrc != 2 && i->srcExists(2)) {
801
         code[1] |= subOp << 21;
802
         srcId(i->src(2), 17);
803
         if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
804
      } else {
805
         code[1] |= 0x000e0000;
806
      }
807
   } else
808
   if (i->encSize == 8) {
809
      if (isLIMM(i->src(1), TYPE_U32)) {
810
         emitForm_A(i, HEX64(38000000, 00000002));
811
 
812
         if (i->flagsDef >= 0)
813
            code[1] |= 1 << 26;
814
      } else {
815
         emitForm_A(i, HEX64(68000000, 00000003));
816
 
817
         if (i->flagsDef >= 0)
818
            code[1] |= 1 << 16;
819
      }
820
      code[0] |= subOp << 6;
821
 
822
      if (i->flagsSrc >= 0) // carry
823
         code[0] |= 1 << 5;
824
 
825
      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
826
      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
827
   } else {
828
      emitForm_S(i, (subOp << 5) |
829
                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
830
   }
831
}
832
 
833
void
834
CodeEmitterNVC0::emitPOPC(const Instruction *i)
835
{
836
   emitForm_A(i, HEX64(54000000, 00000004));
837
 
838
   if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
839
   if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
840
}
841
 
842
void
843
CodeEmitterNVC0::emitINSBF(const Instruction *i)
844
{
845
   emitForm_A(i, HEX64(28000000, 00000003));
846
}
847
 
848
void
849
CodeEmitterNVC0::emitEXTBF(const Instruction *i)
850
{
851
   emitForm_A(i, HEX64(70000000, 00000003));
852
 
853
   if (i->dType == TYPE_S32)
854
      code[0] |= 1 << 5;
855
   if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
856
      code[0] |= 1 << 8;
857
}
858
 
859
void
860
CodeEmitterNVC0::emitBFIND(const Instruction *i)
861
{
862
   emitForm_B(i, HEX64(78000000, 00000003));
863
 
864
   if (i->dType == TYPE_S32)
865
      code[0] |= 1 << 5;
866
   if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT))
867
      code[0] |= 1 << 8;
868
   if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT)
869
      code[0] |= 1 << 6;
870
}
871
 
872
void
873
CodeEmitterNVC0::emitPERMT(const Instruction *i)
874
{
875
   emitForm_A(i, HEX64(24000000, 00000004));
876
 
877
   code[0] |= i->subOp << 5;
878
}
879
 
880
void
881
CodeEmitterNVC0::emitShift(const Instruction *i)
882
{
883
   if (i->op == OP_SHR) {
884
      emitForm_A(i, HEX64(58000000, 00000003)
885
                 | (isSignedType(i->dType) ? 0x20 : 0x00));
886
   } else {
887
      emitForm_A(i, HEX64(60000000, 00000003));
888
   }
889
 
890
   if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
891
      code[0] |= 1 << 9;
892
}
893
 
894
void
895
CodeEmitterNVC0::emitPreOp(const Instruction *i)
896
{
897
   if (i->encSize == 8) {
898
      emitForm_B(i, HEX64(60000000, 00000000));
899
 
900
      if (i->op == OP_PREEX2)
901
         code[0] |= 0x20;
902
 
903
      if (i->src(0).mod.abs()) code[0] |= 1 << 6;
904
      if (i->src(0).mod.neg()) code[0] |= 1 << 8;
905
   } else {
906
      emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
907
   }
908
}
909
 
910
void
911
CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
912
{
913
   if (i->encSize == 8) {
914
      code[0] = 0x00000000 | (subOp << 26);
915
      code[1] = 0xc8000000;
916
 
917
      emitPredicate(i);
918
 
919
      defId(i->def(0), 14);
920
      srcId(i->src(0), 20);
921
 
922
      assert(i->src(0).getFile() == FILE_GPR);
923
 
924
      if (i->saturate) code[0] |= 1 << 5;
925
 
926
      if (i->src(0).mod.abs()) code[0] |= 1 << 7;
927
      if (i->src(0).mod.neg()) code[0] |= 1 << 9;
928
   } else {
929
      emitForm_S(i, 0x80000008 | (subOp << 26), true);
930
 
931
      assert(!i->src(0).mod.neg());
932
      if (i->src(0).mod.abs()) code[0] |= 1 << 30;
933
   }
934
}
935
 
936
void
937
CodeEmitterNVC0::emitMINMAX(const Instruction *i)
938
{
939
   uint64_t op;
940
 
941
   assert(i->encSize == 8);
942
 
943
   op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
944
 
945
   if (i->ftz)
946
      op |= 1 << 5;
947
   else
948
   if (!isFloatType(i->dType))
949
      op |= isSignedType(i->dType) ? 0x23 : 0x03;
950
   if (i->dType == TYPE_F64)
951
      op |= 0x01;
952
 
953
   emitForm_A(i, op);
954
   emitNegAbs12(i);
955
}
956
 
957
void
958
CodeEmitterNVC0::roundMode_C(const Instruction *i)
959
{
960
   switch (i->rnd) {
961
   case ROUND_M:  code[1] |= 1 << 17; break;
962
   case ROUND_P:  code[1] |= 2 << 17; break;
963
   case ROUND_Z:  code[1] |= 3 << 17; break;
964
   case ROUND_NI: code[0] |= 1 << 7; break;
965
   case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
966
   case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
967
   case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
968
   case ROUND_N: break;
969
   default:
970
      assert(!"invalid round mode");
971
      break;
972
   }
973
}
974
 
975
void
976
CodeEmitterNVC0::roundMode_CS(const Instruction *i)
977
{
978
   switch (i->rnd) {
979
   case ROUND_M:
980
   case ROUND_MI: code[0] |= 1 << 16; break;
981
   case ROUND_P:
982
   case ROUND_PI: code[0] |= 2 << 16; break;
983
   case ROUND_Z:
984
   case ROUND_ZI: code[0] |= 3 << 16; break;
985
   default:
986
      break;
987
   }
988
}
989
 
990
void
991
CodeEmitterNVC0::emitCVT(Instruction *i)
992
{
993
   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
994
   DataType dType;
995
 
996
   switch (i->op) {
997
   case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
998
   case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
999
   case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1000
   default:
1001
      break;
1002
   }
1003
 
1004
   const bool sat = (i->op == OP_SAT) || i->saturate;
1005
   const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
1006
   const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
1007
 
1008
   if (i->op == OP_NEG && i->dType == TYPE_U32)
1009
      dType = TYPE_S32;
1010
   else
1011
      dType = i->dType;
1012
 
1013
   if (i->encSize == 8) {
1014
      emitForm_B(i, HEX64(10000000, 00000004));
1015
 
1016
      roundMode_C(i);
1017
 
1018
      // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
1019
      code[0] |= util_logbase2(typeSizeof(dType)) << 20;
1020
      code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
1021
 
1022
      if (sat)
1023
         code[0] |= 0x20;
1024
      if (abs)
1025
         code[0] |= 1 << 6;
1026
      if (neg && i->op != OP_ABS)
1027
         code[0] |= 1 << 8;
1028
 
1029
      if (i->ftz)
1030
         code[1] |= 1 << 23;
1031
 
1032
      if (isSignedIntType(dType))
1033
         code[0] |= 0x080;
1034
      if (isSignedIntType(i->sType))
1035
         code[0] |= 0x200;
1036
 
1037
      if (isFloatType(dType)) {
1038
         if (!isFloatType(i->sType))
1039
            code[1] |= 0x08000000;
1040
      } else {
1041
         if (isFloatType(i->sType))
1042
            code[1] |= 0x04000000;
1043
         else
1044
            code[1] |= 0x0c000000;
1045
      }
1046
   } else {
1047
      if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
1048
         code[0] = 0x298;
1049
      } else
1050
      if (isFloatType(dType)) {
1051
         if (isFloatType(i->sType))
1052
            code[0] = 0x098;
1053
         else
1054
            code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
1055
      } else {
1056
         assert(isFloatType(i->sType));
1057
 
1058
         code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
1059
      }
1060
 
1061
      if (neg) code[0] |= 1 << 16;
1062
      if (sat) code[0] |= 1 << 18;
1063
      if (abs) code[0] |= 1 << 19;
1064
 
1065
      roundMode_CS(i);
1066
   }
1067
}
1068
 
1069
void
1070
CodeEmitterNVC0::emitSET(const CmpInstruction *i)
1071
{
1072
   uint32_t hi;
1073
   uint32_t lo = 0;
1074
 
1075
   if (i->sType == TYPE_F64)
1076
      lo = 0x1;
1077
   else
1078
   if (!isFloatType(i->sType))
1079
      lo = 0x3;
1080
 
1081
   if (isFloatType(i->dType) || isSignedIntType(i->sType))
1082
      lo |= 0x20;
1083
 
1084
   switch (i->op) {
1085
   case OP_SET_AND: hi = 0x10000000; break;
1086
   case OP_SET_OR:  hi = 0x10200000; break;
1087
   case OP_SET_XOR: hi = 0x10400000; break;
1088
   default:
1089
      hi = 0x100e0000;
1090
      break;
1091
   }
1092
   emitForm_A(i, (static_cast(hi) << 32) | lo);
1093
 
1094
   if (i->op != OP_SET)
1095
      srcId(i->src(2), 32 + 17);
1096
 
1097
   if (i->def(0).getFile() == FILE_PREDICATE) {
1098
      if (i->sType == TYPE_F32)
1099
         code[1] += 0x10000000;
1100
      else
1101
         code[1] += 0x08000000;
1102
 
1103
      code[0] &= ~0xfc000;
1104
      defId(i->def(0), 17);
1105
      if (i->defExists(1))
1106
         defId(i->def(1), 14);
1107
      else
1108
         code[0] |= 0x1c000;
1109
   }
1110
 
1111
   if (i->ftz)
1112
      code[1] |= 1 << 27;
1113
 
1114
   emitCondCode(i->setCond, 32 + 23);
1115
   emitNegAbs12(i);
1116
}
1117
 
1118
void
1119
CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1120
{
1121
   uint64_t op;
1122
 
1123
   switch (i->dType) {
1124
   case TYPE_S32:
1125
      op = HEX64(30000000, 00000023);
1126
      break;
1127
   case TYPE_U32:
1128
      op = HEX64(30000000, 00000003);
1129
      break;
1130
   case TYPE_F32:
1131
      op = HEX64(38000000, 00000000);
1132
      break;
1133
   default:
1134
      assert(!"invalid type for SLCT");
1135
      op = 0;
1136
      break;
1137
   }
1138
   emitForm_A(i, op);
1139
 
1140
   CondCode cc = i->setCond;
1141
 
1142
   if (i->src(2).mod.neg())
1143
      cc = reverseCondCode(cc);
1144
 
1145
   emitCondCode(cc, 32 + 23);
1146
 
1147
   if (i->ftz)
1148
      code[0] |= 1 << 5;
1149
}
1150
 
1151
void CodeEmitterNVC0::emitSELP(const Instruction *i)
1152
{
1153
   emitForm_A(i, HEX64(20000000, 00000004));
1154
 
1155
   if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1156
      code[1] |= 1 << 20;
1157
}
1158
 
1159
void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1160
{
1161
   code[0] = 0x00000006 | (i->subOp << 26);
1162
   code[1] = 0xf0000000;
1163
   emitPredicate(i);
1164
   emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1165
}
1166
 
1167
void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1168
{
1169
   code[0] = 0x00000086;
1170
   code[1] = 0xd0000000;
1171
 
1172
   code[1] |= i->tex.r;
1173
   code[1] |= i->tex.s << 8;
1174
 
1175
   if (i->tex.liveOnly)
1176
      code[0] |= 1 << 9;
1177
 
1178
   defId(i->def(0), 14);
1179
   srcId(i->src(0), 20);
1180
}
1181
 
1182
static inline bool
1183
isNextIndependentTex(const TexInstruction *i)
1184
{
1185
   if (!i->next || !isTextureOp(i->next->op))
1186
      return false;
1187
   if (i->getDef(0)->interfers(i->next->getSrc(0)))
1188
      return false;
1189
   return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1190
}
1191
 
1192
void
1193
CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1194
{
1195
   code[0] = 0x00000006;
1196
 
1197
   if (isNextIndependentTex(i))
1198
      code[0] |= 0x080; // t mode
1199
   else
1200
      code[0] |= 0x100; // p mode
1201
 
1202
   if (i->tex.liveOnly)
1203
      code[0] |= 1 << 9;
1204
 
1205
   switch (i->op) {
1206
   case OP_TEX: code[1] = 0x80000000; break;
1207
   case OP_TXB: code[1] = 0x84000000; break;
1208
   case OP_TXL: code[1] = 0x86000000; break;
1209
   case OP_TXF: code[1] = 0x90000000; break;
1210
   case OP_TXG: code[1] = 0xa0000000; break;
1211
   case OP_TXLQ: code[1] = 0xb0000000; break;
1212
   case OP_TXD: code[1] = 0xe0000000; break;
1213
   default:
1214
      assert(!"invalid texture op");
1215
      break;
1216
   }
1217
   if (i->op == OP_TXF) {
1218
      if (!i->tex.levelZero)
1219
         code[1] |= 0x02000000;
1220
   } else
1221
   if (i->tex.levelZero) {
1222
      code[1] |= 0x02000000;
1223
   }
1224
 
1225
   if (i->op != OP_TXD && i->tex.derivAll)
1226
      code[1] |= 1 << 13;
1227
 
1228
   defId(i->def(0), 14);
1229
   srcId(i->src(0), 20);
1230
 
1231
   emitPredicate(i);
1232
 
1233
   if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1234
 
1235
   code[1] |= i->tex.mask << 14;
1236
 
1237
   code[1] |= i->tex.r;
1238
   code[1] |= i->tex.s << 8;
1239
   if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1240
      code[1] |= 1 << 18; // in 1st source (with array index)
1241
 
1242
   // texture target:
1243
   code[1] |= (i->tex.target.getDim() - 1) << 20;
1244
   if (i->tex.target.isCube())
1245
      code[1] += 2 << 20;
1246
   if (i->tex.target.isArray())
1247
      code[1] |= 1 << 19;
1248
   if (i->tex.target.isShadow())
1249
      code[1] |= 1 << 24;
1250
 
1251
   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1252
 
1253
   if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1254
      // lzero
1255
      if (i->op == OP_TXL)
1256
         code[1] &= ~(1 << 26);
1257
      else
1258
      if (i->op == OP_TXF)
1259
         code[1] &= ~(1 << 25);
1260
   }
1261
   if (i->tex.target == TEX_TARGET_2D_MS ||
1262
       i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1263
      code[1] |= 1 << 23;
1264
 
1265
   if (i->tex.useOffsets == 1)
1266
      code[1] |= 1 << 22;
1267
   if (i->tex.useOffsets == 4)
1268
      code[1] |= 1 << 23;
1269
 
1270
   srcId(i, src1, 26);
1271
}
1272
 
1273
void
1274
CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1275
{
1276
   code[0] = 0x00000086;
1277
   code[1] = 0xc0000000;
1278
 
1279
   switch (i->tex.query) {
1280
   case TXQ_DIMS:            code[1] |= 0 << 22; break;
1281
   case TXQ_TYPE:            code[1] |= 1 << 22; break;
1282
   case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1283
   case TXQ_FILTER:          code[1] |= 3 << 22; break;
1284
   case TXQ_LOD:             code[1] |= 4 << 22; break;
1285
   case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
1286
   default:
1287
      assert(!"invalid texture query");
1288
      break;
1289
   }
1290
 
1291
   code[1] |= i->tex.mask << 14;
1292
 
1293
   code[1] |= i->tex.r;
1294
   code[1] |= i->tex.s << 8;
1295
   if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1296
      code[1] |= 1 << 18;
1297
 
1298
   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1299
 
1300
   defId(i->def(0), 14);
1301
   srcId(i->src(0), 20);
1302
   srcId(i, src1, 26);
1303
 
1304
   emitPredicate(i);
1305
}
1306
 
1307
void
1308
CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1309
{
1310
   code[0] = 0x00000000 | (laneMask << 6);
1311
   code[1] = 0x48000000 | qOp;
1312
 
1313
   defId(i->def(0), 14);
1314
   srcId(i->src(0), 20);
1315
   srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1316
 
1317
   if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1318
      code[0] |= 1 << 9; // dall
1319
 
1320
   emitPredicate(i);
1321
}
1322
 
1323
void
1324
CodeEmitterNVC0::emitFlow(const Instruction *i)
1325
{
1326
   const FlowInstruction *f = i->asFlow();
1327
 
1328
   unsigned mask; // bit 0: predicate, bit 1: target
1329
 
1330
   code[0] = 0x00000007;
1331
 
1332
   switch (i->op) {
1333
   case OP_BRA:
1334
      code[1] = f->absolute ? 0x00000000 : 0x40000000;
1335
      if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1336
         code[0] |= 0x4000;
1337
      mask = 3;
1338
      break;
1339
   case OP_CALL:
1340
      code[1] = f->absolute ? 0x10000000 : 0x50000000;
1341
      if (f->indirect)
1342
         code[0] |= 0x4000; // indirect calls always use c[] source
1343
      mask = 2;
1344
      break;
1345
 
1346
   case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
1347
   case OP_RET:     code[1] = 0x90000000; mask = 1; break;
1348
   case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1349
   case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
1350
   case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
1351
 
1352
   case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
1353
   case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1354
   case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
1355
   case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
1356
 
1357
   case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
1358
   case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1359
   case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
1360
   default:
1361
      assert(!"invalid flow operation");
1362
      return;
1363
   }
1364
 
1365
   if (mask & 1) {
1366
      emitPredicate(i);
1367
      if (i->flagsSrc < 0)
1368
         code[0] |= 0x1e0;
1369
   }
1370
 
1371
   if (!f)
1372
      return;
1373
 
1374
   if (f->allWarp)
1375
      code[0] |= 1 << 15;
1376
   if (f->limit)
1377
      code[0] |= 1 << 16;
1378
 
1379
   if (f->indirect) {
1380
      if (code[0] & 0x4000) {
1381
         assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1382
         setAddress16(i->src(0));
1383
         code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1384
         if (f->op == OP_BRA)
1385
            srcId(f->src(0).getIndirect(0), 20);
1386
      } else {
1387
         srcId(f, 0, 20);
1388
      }
1389
   }
1390
 
1391
   if (f->op == OP_CALL) {
1392
      if (f->indirect) {
1393
         // nothing
1394
      } else
1395
      if (f->builtin) {
1396
         assert(f->absolute);
1397
         uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1398
         addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1399
         addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1400
      } else {
1401
         assert(!f->absolute);
1402
         int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1403
         code[0] |= (pcRel & 0x3f) << 26;
1404
         code[1] |= (pcRel >> 6) & 0x3ffff;
1405
      }
1406
   } else
1407
   if (mask & 2) {
1408
      int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1409
      // currently we don't want absolute branches
1410
      assert(!f->absolute);
1411
      code[0] |= (pcRel & 0x3f) << 26;
1412
      code[1] |= (pcRel >> 6) & 0x3ffff;
1413
   }
1414
}
1415
 
1416
void
1417
CodeEmitterNVC0::emitBAR(const Instruction *i)
1418
{
1419
   Value *rDef = NULL, *pDef = NULL;
1420
 
1421
   switch (i->subOp) {
1422
   case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
1423
   case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
1424
   case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
1425
   case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1426
   default:
1427
      code[0] = 0x04;
1428
      assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1429
      break;
1430
   }
1431
   code[1] = 0x50000000;
1432
 
1433
   code[0] |= 63 << 14;
1434
   code[1] |= 7 << 21;
1435
 
1436
   emitPredicate(i);
1437
 
1438
   // barrier id
1439
   if (i->src(0).getFile() == FILE_GPR) {
1440
      srcId(i->src(0), 20);
1441
   } else {
1442
      ImmediateValue *imm = i->getSrc(0)->asImm();
1443
      assert(imm);
1444
      code[0] |= imm->reg.data.u32 << 20;
1445
   }
1446
 
1447
   // thread count
1448
   if (i->src(1).getFile() == FILE_GPR) {
1449
      srcId(i->src(1), 26);
1450
   } else {
1451
      ImmediateValue *imm = i->getSrc(1)->asImm();
1452
      assert(imm);
1453
      code[0] |= imm->reg.data.u32 << 26;
1454
      code[1] |= imm->reg.data.u32 >> 6;
1455
   }
1456
 
1457
   if (i->srcExists(2) && (i->predSrc != 2)) {
1458
      srcId(i->src(2), 32 + 17);
1459
      if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1460
         code[1] |= 1 << 20;
1461
   } else {
1462
      code[1] |= 7 << 17;
1463
   }
1464
 
1465
   if (i->defExists(0)) {
1466
      if (i->def(0).getFile() == FILE_GPR)
1467
         rDef = i->getDef(0);
1468
      else
1469
         pDef = i->getDef(0);
1470
 
1471
      if (i->defExists(1)) {
1472
         if (i->def(1).getFile() == FILE_GPR)
1473
            rDef = i->getDef(1);
1474
         else
1475
            pDef = i->getDef(1);
1476
      }
1477
   }
1478
   if (rDef) {
1479
      code[0] &= ~(63 << 14);
1480
      defId(rDef, 14);
1481
   }
1482
   if (pDef) {
1483
      code[1] &= ~(7 << 21);
1484
      defId(pDef, 32 + 21);
1485
   }
1486
}
1487
 
1488
void
1489
CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1490
{
1491
   uint32_t prim = i->src(0).get()->reg.data.u32;
1492
 
1493
   code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1494
   code[1] = 0x00000000 | (prim >> 6);
1495
 
1496
   emitPredicate(i);
1497
 
1498
   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1499
 
1500
   defId(i->def(0), 14);
1501
   srcId(i, src1, 20);
1502
}
1503
 
1504
void
1505
CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1506
{
1507
   code[0] = 0x00000006;
1508
   code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1509
 
1510
   if (i->perPatch)
1511
      code[0] |= 0x100;
1512
   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1513
      code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1514
 
1515
   emitPredicate(i);
1516
 
1517
   code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1518
 
1519
   defId(i->def(0), 14);
1520
   srcId(i->src(0).getIndirect(0), 20);
1521
   srcId(i->src(0).getIndirect(1), 26); // vertex address
1522
}
1523
 
1524
void
1525
CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1526
{
1527
   unsigned int size = typeSizeof(i->dType);
1528
 
1529
   code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1530
   code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1531
 
1532
   assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1533
 
1534
   if (i->perPatch)
1535
      code[0] |= 0x100;
1536
 
1537
   emitPredicate(i);
1538
 
1539
   assert(i->src(1).getFile() == FILE_GPR);
1540
 
1541
   srcId(i->src(0).getIndirect(0), 20);
1542
   srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1543
   srcId(i->src(1), 26);
1544
}
1545
 
1546
void
1547
CodeEmitterNVC0::emitOUT(const Instruction *i)
1548
{
1549
   code[0] = 0x00000006;
1550
   code[1] = 0x1c000000;
1551
 
1552
   emitPredicate(i);
1553
 
1554
   defId(i->def(0), 14); // new secret address
1555
   srcId(i->src(0), 20); // old secret address, should be 0 initially
1556
 
1557
   assert(i->src(0).getFile() == FILE_GPR);
1558
 
1559
   if (i->op == OP_EMIT)
1560
      code[0] |= 1 << 5;
1561
   if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1562
      code[0] |= 1 << 6;
1563
 
1564
   // vertex stream
1565
   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1566
      unsigned int stream = SDATA(i->src(1)).u32;
1567
      assert(stream < 4);
1568
      if (stream) {
1569
         code[1] |= 0xc000;
1570
         code[0] |= stream << 26;
1571
      } else {
1572
         srcId(NULL, 26);
1573
      }
1574
   } else {
1575
      srcId(i->src(1), 26);
1576
   }
1577
}
1578
 
1579
void
1580
CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1581
{
1582
   if (i->encSize == 8) {
1583
      code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1584
   } else {
1585
      if (i->getInterpMode() == NV50_IR_INTERP_SC)
1586
         code[0] |= 0x80;
1587
      assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1588
   }
1589
}
1590
 
1591
void
1592
CodeEmitterNVC0::emitINTERP(const Instruction *i)
1593
{
1594
   const uint32_t base = i->getSrc(0)->reg.data.offset;
1595
 
1596
   if (i->encSize == 8) {
1597
      code[0] = 0x00000000;
1598
      code[1] = 0xc0000000 | (base & 0xffff);
1599
 
1600
      if (i->saturate)
1601
         code[0] |= 1 << 5;
1602
 
1603
      if (i->op == OP_PINTERP)
1604
         srcId(i->src(1), 26);
1605
      else
1606
         code[0] |= 0x3f << 26;
1607
 
1608
      srcId(i->src(0).getIndirect(0), 20);
1609
   } else {
1610
      assert(i->op == OP_PINTERP);
1611
      code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1612
      srcId(i->src(1), 20);
1613
   }
1614
   emitInterpMode(i);
1615
 
1616
   emitPredicate(i);
1617
   defId(i->def(0), 14);
1618
 
1619
   if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1620
      srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17);
1621
   else
1622
      code[1] |= 0x3f << 17;
1623
}
1624
 
1625
void
1626
CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1627
{
1628
   uint8_t val;
1629
 
1630
   switch (ty) {
1631
   case TYPE_U8:
1632
      val = 0x00;
1633
      break;
1634
   case TYPE_S8:
1635
      val = 0x20;
1636
      break;
1637
   case TYPE_F16:
1638
   case TYPE_U16:
1639
      val = 0x40;
1640
      break;
1641
   case TYPE_S16:
1642
      val = 0x60;
1643
      break;
1644
   case TYPE_F32:
1645
   case TYPE_U32:
1646
   case TYPE_S32:
1647
      val = 0x80;
1648
      break;
1649
   case TYPE_F64:
1650
   case TYPE_U64:
1651
   case TYPE_S64:
1652
      val = 0xa0;
1653
      break;
1654
   case TYPE_B128:
1655
      val = 0xc0;
1656
      break;
1657
   default:
1658
      val = 0x80;
1659
      assert(!"invalid type");
1660
      break;
1661
   }
1662
   code[0] |= val;
1663
}
1664
 
1665
void
1666
CodeEmitterNVC0::emitCachingMode(CacheMode c)
1667
{
1668
   uint32_t val;
1669
 
1670
   switch (c) {
1671
   case CACHE_CA:
1672
// case CACHE_WB:
1673
      val = 0x000;
1674
      break;
1675
   case CACHE_CG:
1676
      val = 0x100;
1677
      break;
1678
   case CACHE_CS:
1679
      val = 0x200;
1680
      break;
1681
   case CACHE_CV:
1682
// case CACHE_WT:
1683
      val = 0x300;
1684
      break;
1685
   default:
1686
      val = 0;
1687
      assert(!"invalid caching mode");
1688
      break;
1689
   }
1690
   code[0] |= val;
1691
}
1692
 
1693
static inline bool
1694
uses64bitAddress(const Instruction *ldst)
1695
{
1696
   return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1697
      ldst->src(0).isIndirect(0) &&
1698
      ldst->getIndirect(0, 0)->reg.size == 8;
1699
}
1700
 
1701
void
1702
CodeEmitterNVC0::emitSTORE(const Instruction *i)
1703
{
1704
   uint32_t opc;
1705
 
1706
   switch (i->src(0).getFile()) {
1707
   case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1708
   case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
1709
   case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1710
   default:
1711
      assert(!"invalid memory file");
1712
      opc = 0;
1713
      break;
1714
   }
1715
   code[0] = 0x00000005;
1716
   code[1] = opc;
1717
 
1718
   setAddressByFile(i->src(0));
1719
   srcId(i->src(1), 14);
1720
   srcId(i->src(0).getIndirect(0), 20);
1721
   if (uses64bitAddress(i))
1722
      code[1] |= 1 << 26;
1723
 
1724
   emitPredicate(i);
1725
 
1726
   emitLoadStoreType(i->dType);
1727
   emitCachingMode(i->cache);
1728
}
1729
 
1730
void
1731
CodeEmitterNVC0::emitLOAD(const Instruction *i)
1732
{
1733
   uint32_t opc;
1734
 
1735
   code[0] = 0x00000005;
1736
 
1737
   switch (i->src(0).getFile()) {
1738
   case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1739
   case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
1740
   case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1741
   case FILE_MEMORY_CONST:
1742
      if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1743
         emitMOV(i); // not sure if this is any better
1744
         return;
1745
      }
1746
      opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1747
      code[0] = 0x00000006 | (i->subOp << 8);
1748
      break;
1749
   default:
1750
      assert(!"invalid memory file");
1751
      opc = 0;
1752
      break;
1753
   }
1754
   code[1] = opc;
1755
 
1756
   defId(i->def(0), 14);
1757
 
1758
   setAddressByFile(i->src(0));
1759
   srcId(i->src(0).getIndirect(0), 20);
1760
   if (uses64bitAddress(i))
1761
      code[1] |= 1 << 26;
1762
 
1763
   emitPredicate(i);
1764
 
1765
   emitLoadStoreType(i->dType);
1766
   emitCachingMode(i->cache);
1767
}
1768
 
1769
uint8_t
1770
CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1771
{
1772
   switch (SDATA(ref).sv.sv) {
1773
   case SV_LANEID:        return 0x00;
1774
   case SV_PHYSID:        return 0x03;
1775
   case SV_VERTEX_COUNT:  return 0x10;
1776
   case SV_INVOCATION_ID: return 0x11;
1777
   case SV_YDIR:          return 0x12;
1778
   case SV_TID:           return 0x21 + SDATA(ref).sv.index;
1779
   case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
1780
   case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
1781
   case SV_GRIDID:        return 0x2c;
1782
   case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
1783
   case SV_LBASE:         return 0x34;
1784
   case SV_SBASE:         return 0x30;
1785
   case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
1786
   default:
1787
      assert(!"no sreg for system value");
1788
      return 0;
1789
   }
1790
}
1791
 
1792
void
1793
CodeEmitterNVC0::emitMOV(const Instruction *i)
1794
{
1795
   if (i->def(0).getFile() == FILE_PREDICATE) {
1796
      if (i->src(0).getFile() == FILE_GPR) {
1797
         code[0] = 0xfc01c003;
1798
         code[1] = 0x1a8e0000;
1799
         srcId(i->src(0), 20);
1800
      } else {
1801
         code[0] = 0x0001c004;
1802
         code[1] = 0x0c0e0000;
1803
         if (i->src(0).getFile() == FILE_IMMEDIATE) {
1804
            code[0] |= 7 << 20;
1805
            if (!i->getSrc(0)->reg.data.u32)
1806
               code[0] |= 1 << 23;
1807
         } else {
1808
            srcId(i->src(0), 20);
1809
         }
1810
      }
1811
      defId(i->def(0), 17);
1812
      emitPredicate(i);
1813
   } else
1814
   if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1815
      uint8_t sr = getSRegEncoding(i->src(0));
1816
 
1817
      if (i->encSize == 8) {
1818
         code[0] = 0x00000004 | (sr << 26);
1819
         code[1] = 0x2c000000;
1820
      } else {
1821
         code[0] = 0x40000008 | (sr << 20);
1822
      }
1823
      defId(i->def(0), 14);
1824
 
1825
      emitPredicate(i);
1826
   } else
1827
   if (i->encSize == 8) {
1828
      uint64_t opc;
1829
 
1830
      if (i->src(0).getFile() == FILE_IMMEDIATE)
1831
         opc = HEX64(18000000, 000001e2);
1832
      else
1833
      if (i->src(0).getFile() == FILE_PREDICATE)
1834
         opc = HEX64(080e0000, 1c000004);
1835
      else
1836
         opc = HEX64(28000000, 00000004);
1837
 
1838
      opc |= i->lanes << 5;
1839
 
1840
      emitForm_B(i, opc);
1841
   } else {
1842
      uint32_t imm;
1843
 
1844
      if (i->src(0).getFile() == FILE_IMMEDIATE) {
1845
         imm = SDATA(i->src(0)).u32;
1846
         if (imm & 0xfff00000) {
1847
            assert(!(imm & 0x000fffff));
1848
            code[0] = 0x00000318 | imm;
1849
         } else {
1850
            assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1851
            code[0] = 0x00000118 | (imm << 20);
1852
         }
1853
      } else {
1854
         code[0] = 0x0028;
1855
         emitShortSrc2(i->src(0));
1856
      }
1857
      defId(i->def(0), 14);
1858
 
1859
      emitPredicate(i);
1860
   }
1861
}
1862
 
1863
void
1864
CodeEmitterNVC0::emitATOM(const Instruction *i)
1865
{
1866
   const bool hasDst = i->defExists(0);
1867
   const bool casOrExch =
1868
      i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1869
      i->subOp == NV50_IR_SUBOP_ATOM_CAS;
1870
 
1871
   if (i->dType == TYPE_U64) {
1872
      switch (i->subOp) {
1873
      case NV50_IR_SUBOP_ATOM_ADD:
1874
         code[0] = 0x205;
1875
         if (hasDst)
1876
            code[1] = 0x507e0000;
1877
         else
1878
            code[1] = 0x10000000;
1879
         break;
1880
      case NV50_IR_SUBOP_ATOM_EXCH:
1881
         code[0] = 0x305;
1882
         code[1] = 0x507e0000;
1883
         break;
1884
      case NV50_IR_SUBOP_ATOM_CAS:
1885
         code[0] = 0x325;
1886
         code[1] = 0x50000000;
1887
         break;
1888
      default:
1889
         assert(!"invalid u64 red op");
1890
         break;
1891
      }
1892
   } else
1893
   if (i->dType == TYPE_U32) {
1894
      switch (i->subOp) {
1895
      case NV50_IR_SUBOP_ATOM_EXCH:
1896
         code[0] = 0x105;
1897
         code[1] = 0x507e0000;
1898
         break;
1899
      case NV50_IR_SUBOP_ATOM_CAS:
1900
         code[0] = 0x125;
1901
         code[1] = 0x50000000;
1902
         break;
1903
      default:
1904
         code[0] = 0x5 | (i->subOp << 5);
1905
         if (hasDst)
1906
            code[1] = 0x507e0000;
1907
         else
1908
            code[1] = 0x10000000;
1909
         break;
1910
      }
1911
   } else
1912
   if (i->dType == TYPE_S32) {
1913
      assert(i->subOp <= 2);
1914
      code[0] = 0x205 | (i->subOp << 5);
1915
      if (hasDst)
1916
         code[1] = 0x587e0000;
1917
      else
1918
         code[1] = 0x18000000;
1919
   } else
1920
   if (i->dType == TYPE_F32) {
1921
      assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
1922
      code[0] = 0x205;
1923
      if (hasDst)
1924
         code[1] = 0x687e0000;
1925
      else
1926
         code[1] = 0x28000000;
1927
   }
1928
 
1929
   emitPredicate(i);
1930
 
1931
   srcId(i->src(1), 14);
1932
 
1933
   if (hasDst)
1934
      defId(i->def(0), 32 + 11);
1935
   else
1936
   if (casOrExch)
1937
      code[1] |= 63 << 11;
1938
 
1939
   if (hasDst || casOrExch) {
1940
      const int32_t offset = SDATA(i->src(0)).offset;
1941
      assert(offset < 0x80000 && offset >= -0x80000);
1942
      code[0] |= offset << 26;
1943
      code[1] |= (offset & 0x1ffc0) >> 6;
1944
      code[1] |= (offset & 0xe0000) << 6;
1945
   } else {
1946
      srcAddr32(i->src(0), 26, 0);
1947
   }
1948
   if (i->getIndirect(0, 0)) {
1949
      srcId(i->getIndirect(0, 0), 20);
1950
      if (i->getIndirect(0, 0)->reg.size == 8)
1951
         code[1] |= 1 << 26;
1952
   } else {
1953
      code[0] |= 63 << 20;
1954
   }
1955
 
1956
   if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1957
      srcId(i->src(2), 32 + 17);
1958
}
1959
 
1960
void
1961
CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
1962
{
1963
   switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
1964
   case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
1965
   case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
1966
   default:
1967
      code[0] = 0x45;
1968
      assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
1969
      break;
1970
   }
1971
   code[1] = 0xe0000000;
1972
 
1973
   emitPredicate(i);
1974
}
1975
 
1976
void
1977
CodeEmitterNVC0::emitCCTL(const Instruction *i)
1978
{
1979
   code[0] = 0x00000005 | (i->subOp << 5);
1980
 
1981
   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
1982
      code[1] = 0x98000000;
1983
      srcAddr32(i->src(0), 28, 2);
1984
   } else {
1985
      code[1] = 0xd0000000;
1986
      setAddress24(i->src(0));
1987
   }
1988
   if (uses64bitAddress(i))
1989
      code[1] |= 1 << 26;
1990
   srcId(i->src(0).getIndirect(0), 20);
1991
 
1992
   emitPredicate(i);
1993
 
1994
   defId(i, 0, 14);
1995
}
1996
 
1997
void
1998
CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
1999
{
2000
   uint8_t m;
2001
   switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
2002
   case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
2003
   case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
2004
   case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
2005
   case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
2006
   case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
2007
   case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
2008
   case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
2009
   case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
2010
   case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
2011
   case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
2012
   case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
2013
   case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
2014
   case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
2015
   case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
2016
   case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
2017
   default:
2018
      return;
2019
   }
2020
   code[0] |= m << 5;
2021
   if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
2022
      code[1] |= 1 << 16;
2023
}
2024
 
2025
void
2026
CodeEmitterNVC0::emitSUCalc(Instruction *i)
2027
{
2028
   ImmediateValue *imm = NULL;
2029
   uint64_t opc;
2030
 
2031
   if (i->srcExists(2)) {
2032
      imm = i->getSrc(2)->asImm();
2033
      if (imm)
2034
         i->setSrc(2, NULL); // special case, make emitForm_A not assert
2035
   }
2036
 
2037
   switch (i->op) {
2038
   case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
2039
   case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
2040
   case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
2041
   default:
2042
      assert(0);
2043
      return;
2044
   }
2045
   emitForm_A(i, opc);
2046
 
2047
   if (i->op == OP_SUCLAMP) {
2048
      if (i->dType == TYPE_S32)
2049
         code[0] |= 1 << 9;
2050
      emitSUCLAMPMode(i->subOp);
2051
   }
2052
 
2053
   if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
2054
         code[1] |= 1 << 16;
2055
 
2056
   if (i->op != OP_SUEAU) {
2057
      if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
2058
         code[0] |= 63 << 14;
2059
         code[1] |= i->getDef(0)->reg.data.id << 23;
2060
      } else
2061
      if (i->defExists(1)) { // r, p
2062
         assert(i->def(1).getFile() == FILE_PREDICATE);
2063
         code[1] |= i->getDef(1)->reg.data.id << 23;
2064
      } else { // r, #
2065
         code[1] |= 7 << 23;
2066
      }
2067
   }
2068
   if (imm) {
2069
      assert(i->op == OP_SUCLAMP);
2070
      i->setSrc(2, imm);
2071
      code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
2072
   }
2073
}
2074
 
2075
void
2076
CodeEmitterNVC0::emitSUGType(DataType ty)
2077
{
2078
   switch (ty) {
2079
   case TYPE_S32: code[1] |= 1 << 13; break;
2080
   case TYPE_U8:  code[1] |= 2 << 13; break;
2081
   case TYPE_S8:  code[1] |= 3 << 13; break;
2082
   default:
2083
      assert(ty == TYPE_U32);
2084
      break;
2085
   }
2086
}
2087
 
2088
void
2089
CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2090
{
2091
   const uint32_t offset = i->getSrc(s)->reg.data.offset;
2092
 
2093
   assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2094
   assert(offset == (offset & 0xfffc));
2095
 
2096
   code[1] |= 1 << 21;
2097
   code[0] |= offset << 24;
2098
   code[1] |= offset >> 8;
2099
   code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2100
}
2101
 
2102
void
2103
CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2104
{
2105
   if (!i->srcExists(s) || (i->predSrc == s)) {
2106
      code[1] |= 0x7 << 17;
2107
   } else {
2108
      if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2109
         code[1] |= 1 << 20;
2110
      srcId(i->src(s), 32 + 17);
2111
   }
2112
}
2113
 
2114
void
2115
CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2116
{
2117
   code[0] = 0x5;
2118
   code[1] = 0xd4000000 | (i->subOp << 15);
2119
 
2120
   emitLoadStoreType(i->dType);
2121
   emitSUGType(i->sType);
2122
   emitCachingMode(i->cache);
2123
 
2124
   emitPredicate(i);
2125
   defId(i->def(0), 14); // destination
2126
   srcId(i->src(0), 20); // address
2127
   // format
2128
   if (i->src(1).getFile() == FILE_GPR)
2129
      srcId(i->src(1), 26);
2130
   else
2131
      setSUConst16(i, 1);
2132
   setSUPred(i, 2);
2133
}
2134
 
2135
void
2136
CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2137
{
2138
   code[0] = 0x5;
2139
   code[1] = 0xdc000000 | (i->subOp << 15);
2140
 
2141
   if (i->op == OP_SUSTP)
2142
      code[1] |= i->tex.mask << 22;
2143
   else
2144
      emitLoadStoreType(i->dType);
2145
   emitSUGType(i->sType);
2146
   emitCachingMode(i->cache);
2147
 
2148
   emitPredicate(i);
2149
   srcId(i->src(0), 20); // address
2150
   // format
2151
   if (i->src(1).getFile() == FILE_GPR)
2152
      srcId(i->src(1), 26);
2153
   else
2154
      setSUConst16(i, 1);
2155
   srcId(i->src(3), 14); // values
2156
   setSUPred(i, 2);
2157
}
2158
 
2159
void
2160
CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2161
{
2162
   switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2163
   case 0:
2164
      code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2165
      code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
2166
      code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
2167
      code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2168
      break;
2169
   case 1:
2170
      code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
2171
      code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2172
      code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
2173
      code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
2174
      code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
2175
      code[1] |= (i->mask & 0x3) << 2;
2176
      break;
2177
   case 2:
2178
      code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2179
      code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2180
      code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2181
      code[1] |= (i->mask & 0x3) << 2;
2182
      code[1] |= (i->mask & 0xc) << 21;
2183
      break;
2184
   default:
2185
      assert(0);
2186
      break;
2187
   }
2188
}
2189
 
2190
void
2191
CodeEmitterNVC0::emitVSHL(const Instruction *i)
2192
{
2193
   uint64_t opc = 0x4;
2194
 
2195
   switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2196
   case 0: opc |= 0xe8ULL << 56; break;
2197
   case 1: opc |= 0xb4ULL << 56; break;
2198
   case 2: opc |= 0x94ULL << 56; break;
2199
   default:
2200
      assert(0);
2201
      break;
2202
   }
2203
   if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2204
      if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2205
      if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2206
   } else {
2207
      if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2208
      if (isSignedType(i->sType)) opc |= 1 << 6;
2209
   }
2210
   emitForm_A(i, opc);
2211
   emitVectorSubOp(i);
2212
 
2213
   if (i->saturate)
2214
      code[0] |= 1 << 9;
2215
   if (i->flagsDef >= 0)
2216
      code[1] |= 1 << 16;
2217
}
2218
 
2219
void
2220
CodeEmitterNVC0::emitPIXLD(const Instruction *i)
2221
{
2222
   assert(i->encSize == 8);
2223
   emitForm_A(i, HEX64(10000000, 00000006));
2224
   code[0] |= i->subOp << 5;
2225
   code[1] |= 0x00e00000;
2226
}
2227
 
2228
bool
2229
CodeEmitterNVC0::emitInstruction(Instruction *insn)
2230
{
2231
   unsigned int size = insn->encSize;
2232
 
2233
   if (writeIssueDelays && !(codeSize & 0x3f))
2234
      size += 8;
2235
 
2236
   if (!insn->encSize) {
2237
      ERROR("skipping unencodable instruction: "); insn->print();
2238
      return false;
2239
   } else
2240
   if (codeSize + size > codeSizeLimit) {
2241
      ERROR("code emitter output buffer too small\n");
2242
      return false;
2243
   }
2244
 
2245
   if (writeIssueDelays) {
2246
      if (!(codeSize & 0x3f)) {
2247
         code[0] = 0x00000007; // cf issue delay "instruction"
2248
         code[1] = 0x20000000;
2249
         code += 2;
2250
         codeSize += 8;
2251
      }
2252
      const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2253
      uint32_t *data = code - (id * 2 + 2);
2254
      if (id <= 2) {
2255
         data[0] |= insn->sched << (id * 8 + 4);
2256
      } else
2257
      if (id == 3) {
2258
         data[0] |= insn->sched << 28;
2259
         data[1] |= insn->sched >> 4;
2260
      } else {
2261
         data[1] |= insn->sched << ((id - 4) * 8 + 4);
2262
      }
2263
   }
2264
 
2265
   // assert that instructions with multiple defs don't corrupt registers
2266
   for (int d = 0; insn->defExists(d); ++d)
2267
      assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2268
 
2269
   switch (insn->op) {
2270
   case OP_MOV:
2271
   case OP_RDSV:
2272
      emitMOV(insn);
2273
      break;
2274
   case OP_NOP:
2275
      break;
2276
   case OP_LOAD:
2277
      emitLOAD(insn);
2278
      break;
2279
   case OP_STORE:
2280
      emitSTORE(insn);
2281
      break;
2282
   case OP_LINTERP:
2283
   case OP_PINTERP:
2284
      emitINTERP(insn);
2285
      break;
2286
   case OP_VFETCH:
2287
      emitVFETCH(insn);
2288
      break;
2289
   case OP_EXPORT:
2290
      emitEXPORT(insn);
2291
      break;
2292
   case OP_PFETCH:
2293
      emitPFETCH(insn);
2294
      break;
2295
   case OP_EMIT:
2296
   case OP_RESTART:
2297
      emitOUT(insn);
2298
      break;
2299
   case OP_ADD:
2300
   case OP_SUB:
2301
      if (insn->dType == TYPE_F64)
2302
         emitDADD(insn);
2303
      else if (isFloatType(insn->dType))
2304
         emitFADD(insn);
2305
      else
2306
         emitUADD(insn);
2307
      break;
2308
   case OP_MUL:
2309
      if (insn->dType == TYPE_F64)
2310
         emitDMUL(insn);
2311
      else if (isFloatType(insn->dType))
2312
         emitFMUL(insn);
2313
      else
2314
         emitUMUL(insn);
2315
      break;
2316
   case OP_MAD:
2317
   case OP_FMA:
2318
      if (insn->dType == TYPE_F64)
2319
         emitDMAD(insn);
2320
      else if (isFloatType(insn->dType))
2321
         emitFMAD(insn);
2322
      else
2323
         emitIMAD(insn);
2324
      break;
2325
   case OP_SAD:
2326
      emitISAD(insn);
2327
      break;
2328
   case OP_NOT:
2329
      emitNOT(insn);
2330
      break;
2331
   case OP_AND:
2332
      emitLogicOp(insn, 0);
2333
      break;
2334
   case OP_OR:
2335
      emitLogicOp(insn, 1);
2336
      break;
2337
   case OP_XOR:
2338
      emitLogicOp(insn, 2);
2339
      break;
2340
   case OP_SHL:
2341
   case OP_SHR:
2342
      emitShift(insn);
2343
      break;
2344
   case OP_SET:
2345
   case OP_SET_AND:
2346
   case OP_SET_OR:
2347
   case OP_SET_XOR:
2348
      emitSET(insn->asCmp());
2349
      break;
2350
   case OP_SELP:
2351
      emitSELP(insn);
2352
      break;
2353
   case OP_SLCT:
2354
      emitSLCT(insn->asCmp());
2355
      break;
2356
   case OP_MIN:
2357
   case OP_MAX:
2358
      emitMINMAX(insn);
2359
      break;
2360
   case OP_ABS:
2361
   case OP_NEG:
2362
   case OP_CEIL:
2363
   case OP_FLOOR:
2364
   case OP_TRUNC:
2365
   case OP_CVT:
2366
   case OP_SAT:
2367
      emitCVT(insn);
2368
      break;
2369
   case OP_RSQ:
2370
      emitSFnOp(insn, 5 + 2 * insn->subOp);
2371
      break;
2372
   case OP_RCP:
2373
      emitSFnOp(insn, 4 + 2 * insn->subOp);
2374
      break;
2375
   case OP_LG2:
2376
      emitSFnOp(insn, 3);
2377
      break;
2378
   case OP_EX2:
2379
      emitSFnOp(insn, 2);
2380
      break;
2381
   case OP_SIN:
2382
      emitSFnOp(insn, 1);
2383
      break;
2384
   case OP_COS:
2385
      emitSFnOp(insn, 0);
2386
      break;
2387
   case OP_PRESIN:
2388
   case OP_PREEX2:
2389
      emitPreOp(insn);
2390
      break;
2391
   case OP_TEX:
2392
   case OP_TXB:
2393
   case OP_TXL:
2394
   case OP_TXD:
2395
   case OP_TXF:
2396
   case OP_TXG:
2397
   case OP_TXLQ:
2398
      emitTEX(insn->asTex());
2399
      break;
2400
   case OP_TXQ:
2401
      emitTXQ(insn->asTex());
2402
      break;
2403
   case OP_TEXBAR:
2404
      emitTEXBAR(insn);
2405
      break;
2406
   case OP_SUBFM:
2407
   case OP_SUCLAMP:
2408
   case OP_SUEAU:
2409
      emitSUCalc(insn);
2410
      break;
2411
   case OP_MADSP:
2412
      emitMADSP(insn);
2413
      break;
2414
   case OP_SULDB:
2415
      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2416
         emitSULDGB(insn->asTex());
2417
      else
2418
         ERROR("SULDB not yet supported on < nve4\n");
2419
      break;
2420
   case OP_SUSTB:
2421
   case OP_SUSTP:
2422
      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2423
         emitSUSTGx(insn->asTex());
2424
      else
2425
         ERROR("SUSTx not yet supported on < nve4\n");
2426
      break;
2427
   case OP_ATOM:
2428
      emitATOM(insn);
2429
      break;
2430
   case OP_BRA:
2431
   case OP_CALL:
2432
   case OP_PRERET:
2433
   case OP_RET:
2434
   case OP_DISCARD:
2435
   case OP_EXIT:
2436
   case OP_PRECONT:
2437
   case OP_CONT:
2438
   case OP_PREBREAK:
2439
   case OP_BREAK:
2440
   case OP_JOINAT:
2441
   case OP_BRKPT:
2442
   case OP_QUADON:
2443
   case OP_QUADPOP:
2444
      emitFlow(insn);
2445
      break;
2446
   case OP_QUADOP:
2447
      emitQUADOP(insn, insn->subOp, insn->lanes);
2448
      break;
2449
   case OP_DFDX:
2450
      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2451
      break;
2452
   case OP_DFDY:
2453
      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2454
      break;
2455
   case OP_POPCNT:
2456
      emitPOPC(insn);
2457
      break;
2458
   case OP_INSBF:
2459
      emitINSBF(insn);
2460
      break;
2461
   case OP_EXTBF:
2462
      emitEXTBF(insn);
2463
      break;
2464
   case OP_BFIND:
2465
      emitBFIND(insn);
2466
      break;
2467
   case OP_PERMT:
2468
      emitPERMT(insn);
2469
      break;
2470
   case OP_JOIN:
2471
      emitNOP(insn);
2472
      insn->join = 1;
2473
      break;
2474
   case OP_BAR:
2475
      emitBAR(insn);
2476
      break;
2477
   case OP_MEMBAR:
2478
      emitMEMBAR(insn);
2479
      break;
2480
   case OP_CCTL:
2481
      emitCCTL(insn);
2482
      break;
2483
   case OP_VSHL:
2484
      emitVSHL(insn);
2485
      break;
2486
   case OP_PIXLD:
2487
      emitPIXLD(insn);
2488
      break;
2489
   case OP_PHI:
2490
   case OP_UNION:
2491
   case OP_CONSTRAINT:
2492
      ERROR("operation should have been eliminated");
2493
      return false;
2494
   case OP_EXP:
2495
   case OP_LOG:
2496
   case OP_SQRT:
2497
   case OP_POW:
2498
      ERROR("operation should have been lowered\n");
2499
      return false;
2500
   default:
2501
      ERROR("unknow op\n");
2502
      return false;
2503
   }
2504
 
2505
   if (insn->join) {
2506
      code[0] |= 0x10;
2507
      assert(insn->encSize == 8);
2508
   }
2509
 
2510
   code += insn->encSize / 4;
2511
   codeSize += insn->encSize;
2512
   return true;
2513
}
2514
 
2515
uint32_t
2516
CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2517
{
2518
   const Target::OpInfo &info = targ->getOpInfo(i);
2519
 
2520
   if (writeIssueDelays || info.minEncSize == 8 || 1)
2521
      return 8;
2522
 
2523
   if (i->ftz || i->saturate || i->join)
2524
      return 8;
2525
   if (i->rnd != ROUND_N)
2526
      return 8;
2527
   if (i->predSrc >= 0 && i->op == OP_MAD)
2528
      return 8;
2529
 
2530
   if (i->op == OP_PINTERP) {
2531
      if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2532
         return 8;
2533
   } else
2534
   if (i->op == OP_MOV && i->lanes != 0xf) {
2535
      return 8;
2536
   }
2537
 
2538
   for (int s = 0; i->srcExists(s); ++s) {
2539
      if (i->src(s).isIndirect(0))
2540
         return 8;
2541
 
2542
      if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2543
         if (SDATA(i->src(s)).offset >= 0x100)
2544
            return 8;
2545
         if (i->getSrc(s)->reg.fileIndex > 1 &&
2546
             i->getSrc(s)->reg.fileIndex != 16)
2547
             return 8;
2548
      } else
2549
      if (i->src(s).getFile() == FILE_IMMEDIATE) {
2550
         if (i->dType == TYPE_F32) {
2551
            if (SDATA(i->src(s)).u32 >= 0x100)
2552
               return 8;
2553
         } else {
2554
            if (SDATA(i->src(s)).u32 > 0xff)
2555
               return 8;
2556
         }
2557
      }
2558
 
2559
      if (i->op == OP_CVT)
2560
         continue;
2561
      if (i->src(s).mod != Modifier(0)) {
2562
         if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2563
            if (i->op != OP_RSQ)
2564
               return 8;
2565
         if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2566
            if (i->op != OP_ADD || s != 0)
2567
               return 8;
2568
      }
2569
   }
2570
 
2571
   return 4;
2572
}
2573
 
2574
// Simplified, erring on safe side.
2575
class SchedDataCalculator : public Pass
2576
{
2577
public:
2578
   SchedDataCalculator(const Target *targ) : targ(targ) { }
2579
 
2580
private:
2581
   struct RegScores
2582
   {
2583
      struct Resource {
2584
         int st[DATA_FILE_COUNT]; // LD to LD delay 3
2585
         int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2586
         int tex; // TEX to non-TEX delay 17 (0x11)
2587
         int sfu; // SFU to SFU delay 3 (except PRE-ops)
2588
         int imul; // integer MUL to MUL delay 3
2589
      } res;
2590
      struct ScoreData {
2591
         int r[64];
2592
         int p[8];
2593
         int c;
2594
      } rd, wr;
2595
      int base;
2596
 
2597
      void rebase(const int base)
2598
      {
2599
         const int delta = this->base - base;
2600
         if (!delta)
2601
            return;
2602
         this->base = 0;
2603
 
2604
         for (int i = 0; i < 64; ++i) {
2605
            rd.r[i] += delta;
2606
            wr.r[i] += delta;
2607
         }
2608
         for (int i = 0; i < 8; ++i) {
2609
            rd.p[i] += delta;
2610
            wr.p[i] += delta;
2611
         }
2612
         rd.c += delta;
2613
         wr.c += delta;
2614
 
2615
         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2616
            res.ld[f] += delta;
2617
            res.st[f] += delta;
2618
         }
2619
         res.sfu += delta;
2620
         res.imul += delta;
2621
         res.tex += delta;
2622
      }
2623
      void wipe()
2624
      {
2625
         memset(&rd, 0, sizeof(rd));
2626
         memset(&wr, 0, sizeof(wr));
2627
         memset(&res, 0, sizeof(res));
2628
      }
2629
      int getLatest(const ScoreData& d) const
2630
      {
2631
         int max = 0;
2632
         for (int i = 0; i < 64; ++i)
2633
            if (d.r[i] > max)
2634
               max = d.r[i];
2635
         for (int i = 0; i < 8; ++i)
2636
            if (d.p[i] > max)
2637
               max = d.p[i];
2638
         if (d.c > max)
2639
            max = d.c;
2640
         return max;
2641
      }
2642
      inline int getLatestRd() const
2643
      {
2644
         return getLatest(rd);
2645
      }
2646
      inline int getLatestWr() const
2647
      {
2648
         return getLatest(wr);
2649
      }
2650
      inline int getLatest() const
2651
      {
2652
         const int a = getLatestRd();
2653
         const int b = getLatestWr();
2654
 
2655
         int max = MAX2(a, b);
2656
         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2657
            max = MAX2(res.ld[f], max);
2658
            max = MAX2(res.st[f], max);
2659
         }
2660
         max = MAX2(res.sfu, max);
2661
         max = MAX2(res.imul, max);
2662
         max = MAX2(res.tex, max);
2663
         return max;
2664
      }
2665
      void setMax(const RegScores *that)
2666
      {
2667
         for (int i = 0; i < 64; ++i) {
2668
            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
2669
            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
2670
         }
2671
         for (int i = 0; i < 8; ++i) {
2672
            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
2673
            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
2674
         }
2675
         rd.c = MAX2(rd.c, that->rd.c);
2676
         wr.c = MAX2(wr.c, that->wr.c);
2677
 
2678
         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2679
            res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
2680
            res.st[f] = MAX2(res.st[f], that->res.st[f]);
2681
         }
2682
         res.sfu = MAX2(res.sfu, that->res.sfu);
2683
         res.imul = MAX2(res.imul, that->res.imul);
2684
         res.tex = MAX2(res.tex, that->res.tex);
2685
      }
2686
      void print(int cycle)
2687
      {
2688
         for (int i = 0; i < 64; ++i) {
2689
            if (rd.r[i] > cycle)
2690
               INFO("rd $r%i @ %i\n", i, rd.r[i]);
2691
            if (wr.r[i] > cycle)
2692
               INFO("wr $r%i @ %i\n", i, wr.r[i]);
2693
         }
2694
         for (int i = 0; i < 8; ++i) {
2695
            if (rd.p[i] > cycle)
2696
               INFO("rd $p%i @ %i\n", i, rd.p[i]);
2697
            if (wr.p[i] > cycle)
2698
               INFO("wr $p%i @ %i\n", i, wr.p[i]);
2699
         }
2700
         if (rd.c > cycle)
2701
            INFO("rd $c @ %i\n", rd.c);
2702
         if (wr.c > cycle)
2703
            INFO("wr $c @ %i\n", wr.c);
2704
         if (res.sfu > cycle)
2705
            INFO("sfu @ %i\n", res.sfu);
2706
         if (res.imul > cycle)
2707
            INFO("imul @ %i\n", res.imul);
2708
         if (res.tex > cycle)
2709
            INFO("tex @ %i\n", res.tex);
2710
      }
2711
   };
2712
 
2713
   RegScores *score; // for current BB
2714
   std::vector scoreBoards;
2715
   int prevData;
2716
   operation prevOp;
2717
 
2718
   const Target *targ;
2719
 
2720
   bool visit(Function *);
2721
   bool visit(BasicBlock *);
2722
 
2723
   void commitInsn(const Instruction *, int cycle);
2724
   int calcDelay(const Instruction *, int cycle) const;
2725
   void setDelay(Instruction *, int delay, Instruction *next);
2726
 
2727
   void recordRd(const Value *, const int ready);
2728
   void recordWr(const Value *, const int ready);
2729
   void checkRd(const Value *, int cycle, int& delay) const;
2730
   void checkWr(const Value *, int cycle, int& delay) const;
2731
 
2732
   int getCycles(const Instruction *, int origDelay) const;
2733
};
2734
 
2735
void
2736
SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
2737
{
2738
   if (insn->op == OP_EXIT || insn->op == OP_RET)
2739
      delay = MAX2(delay, 14);
2740
 
2741
   if (insn->op == OP_TEXBAR) {
2742
      // TODO: except if results not used before EXIT
2743
      insn->sched = 0xc2;
2744
   } else
2745
   if (insn->op == OP_JOIN || insn->join) {
2746
      insn->sched = 0x00;
2747
   } else
2748
   if (delay >= 0 || prevData == 0x04 ||
2749
       !next || !targ->canDualIssue(insn, next)) {
2750
      insn->sched = static_cast(MAX2(delay, 0));
2751
      if (prevOp == OP_EXPORT)
2752
         insn->sched |= 0x40;
2753
      else
2754
         insn->sched |= 0x20;
2755
   } else {
2756
      insn->sched = 0x04; // dual-issue
2757
   }
2758
 
2759
   if (prevData != 0x04 || prevOp != OP_EXPORT)
2760
      if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2761
         prevOp = insn->op;
2762
 
2763
   prevData = insn->sched;
2764
}
2765
 
2766
int
2767
SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2768
{
2769
   if (insn->sched & 0x80) {
2770
      int c = (insn->sched & 0x0f) * 2 + 1;
2771
      if (insn->op == OP_TEXBAR && origDelay > 0)
2772
         c += origDelay;
2773
      return c;
2774
   }
2775
   if (insn->sched & 0x60)
2776
      return (insn->sched & 0x1f) + 1;
2777
   return (insn->sched == 0x04) ? 0 : 32;
2778
}
2779
 
2780
bool
2781
SchedDataCalculator::visit(Function *func)
2782
{
2783
   scoreBoards.resize(func->cfg.getSize());
2784
   for (size_t i = 0; i < scoreBoards.size(); ++i)
2785
      scoreBoards[i].wipe();
2786
   return true;
2787
}
2788
 
2789
bool
2790
SchedDataCalculator::visit(BasicBlock *bb)
2791
{
2792
   Instruction *insn;
2793
   Instruction *next = NULL;
2794
 
2795
   int cycle = 0;
2796
 
2797
   prevData = 0x00;
2798
   prevOp = OP_NOP;
2799
   score = &scoreBoards.at(bb->getId());
2800
 
2801
   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2802
      // back branches will wait until all target dependencies are satisfied
2803
      if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
2804
         continue;
2805
      BasicBlock *in = BasicBlock::get(ei.getNode());
2806
      if (in->getExit()) {
2807
         if (prevData != 0x04)
2808
            prevData = in->getExit()->sched;
2809
         prevOp = in->getExit()->op;
2810
      }
2811
      score->setMax(&scoreBoards.at(in->getId()));
2812
   }
2813
   if (bb->cfg.incidentCount() > 1)
2814
      prevOp = OP_NOP;
2815
 
2816
#ifdef NVC0_DEBUG_SCHED_DATA
2817
   INFO("=== BB:%i initial scores\n", bb->getId());
2818
   score->print(cycle);
2819
#endif
2820
 
2821
   for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2822
      next = insn->next;
2823
 
2824
      commitInsn(insn, cycle);
2825
      int delay = calcDelay(next, cycle);
2826
      setDelay(insn, delay, next);
2827
      cycle += getCycles(insn, delay);
2828
 
2829
#ifdef NVC0_DEBUG_SCHED_DATA
2830
      INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2831
      insn->print();
2832
      next->print();
2833
#endif
2834
   }
2835
   if (!insn)
2836
      return true;
2837
   commitInsn(insn, cycle);
2838
 
2839
   int bbDelay = -1;
2840
 
2841
   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2842
      BasicBlock *out = BasicBlock::get(ei.getNode());
2843
 
2844
      if (ei.getType() != Graph::Edge::BACK) {
2845
         // only test the first instruction of the outgoing block
2846
         next = out->getEntry();
2847
         if (next)
2848
            bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2849
      } else {
2850
         // wait until all dependencies are satisfied
2851
         const int regsFree = score->getLatest();
2852
         next = out->getFirst();
2853
         for (int c = cycle; next && c < regsFree; next = next->next) {
2854
            bbDelay = MAX2(bbDelay, calcDelay(next, c));
2855
            c += getCycles(next, bbDelay);
2856
         }
2857
         next = NULL;
2858
      }
2859
   }
2860
   if (bb->cfg.outgoingCount() != 1)
2861
      next = NULL;
2862
   setDelay(insn, bbDelay, next);
2863
   cycle += getCycles(insn, bbDelay);
2864
 
2865
   score->rebase(cycle); // common base for initializing out blocks' scores
2866
   return true;
2867
}
2868
 
2869
#define NVE4_MAX_ISSUE_DELAY 0x1f
2870
int
2871
SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2872
{
2873
   int delay = 0, ready = cycle;
2874
 
2875
   for (int s = 0; insn->srcExists(s); ++s)
2876
      checkRd(insn->getSrc(s), cycle, delay);
2877
   // WAR & WAW don't seem to matter
2878
   // for (int s = 0; insn->srcExists(s); ++s)
2879
   //   recordRd(insn->getSrc(s), cycle);
2880
 
2881
   switch (Target::getOpClass(insn->op)) {
2882
   case OPCLASS_SFU:
2883
      ready = score->res.sfu;
2884
      break;
2885
   case OPCLASS_ARITH:
2886
      if (insn->op == OP_MUL && !isFloatType(insn->dType))
2887
         ready = score->res.imul;
2888
      break;
2889
   case OPCLASS_TEXTURE:
2890
      ready = score->res.tex;
2891
      break;
2892
   case OPCLASS_LOAD:
2893
      ready = score->res.ld[insn->src(0).getFile()];
2894
      break;
2895
   case OPCLASS_STORE:
2896
      ready = score->res.st[insn->src(0).getFile()];
2897
      break;
2898
   default:
2899
      break;
2900
   }
2901
   if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2902
      ready = MAX2(ready, score->res.tex);
2903
 
2904
   delay = MAX2(delay, ready - cycle);
2905
 
2906
   // if can issue next cycle, delay is 0, not 1
2907
   return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2908
}
2909
 
2910
void
2911
SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2912
{
2913
   const int ready = cycle + targ->getLatency(insn);
2914
 
2915
   for (int d = 0; insn->defExists(d); ++d)
2916
      recordWr(insn->getDef(d), ready);
2917
   // WAR & WAW don't seem to matter
2918
   // for (int s = 0; insn->srcExists(s); ++s)
2919
   //   recordRd(insn->getSrc(s), cycle);
2920
 
2921
   switch (Target::getOpClass(insn->op)) {
2922
   case OPCLASS_SFU:
2923
      score->res.sfu = cycle + 4;
2924
      break;
2925
   case OPCLASS_ARITH:
2926
      if (insn->op == OP_MUL && !isFloatType(insn->dType))
2927
         score->res.imul = cycle + 4;
2928
      break;
2929
   case OPCLASS_TEXTURE:
2930
      score->res.tex = cycle + 18;
2931
      break;
2932
   case OPCLASS_LOAD:
2933
      if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2934
         break;
2935
      score->res.ld[insn->src(0).getFile()] = cycle + 4;
2936
      score->res.st[insn->src(0).getFile()] = ready;
2937
      break;
2938
   case OPCLASS_STORE:
2939
      score->res.st[insn->src(0).getFile()] = cycle + 4;
2940
      score->res.ld[insn->src(0).getFile()] = ready;
2941
      break;
2942
   case OPCLASS_OTHER:
2943
      if (insn->op == OP_TEXBAR)
2944
         score->res.tex = cycle;
2945
      break;
2946
   default:
2947
      break;
2948
   }
2949
 
2950
#ifdef NVC0_DEBUG_SCHED_DATA
2951
   score->print(cycle);
2952
#endif
2953
}
2954
 
2955
void
2956
SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2957
{
2958
   int ready = cycle;
2959
   int a, b;
2960
 
2961
   switch (v->reg.file) {
2962
   case FILE_GPR:
2963
      a = v->reg.data.id;
2964
      b = a + v->reg.size / 4;
2965
      for (int r = a; r < b; ++r)
2966
         ready = MAX2(ready, score->rd.r[r]);
2967
      break;
2968
   case FILE_PREDICATE:
2969
      ready = MAX2(ready, score->rd.p[v->reg.data.id]);
2970
      break;
2971
   case FILE_FLAGS:
2972
      ready = MAX2(ready, score->rd.c);
2973
      break;
2974
   case FILE_SHADER_INPUT:
2975
   case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
2976
   case FILE_MEMORY_LOCAL:
2977
   case FILE_MEMORY_CONST:
2978
   case FILE_MEMORY_SHARED:
2979
   case FILE_MEMORY_GLOBAL:
2980
   case FILE_SYSTEM_VALUE:
2981
      // TODO: any restrictions here ?
2982
      break;
2983
   case FILE_IMMEDIATE:
2984
      break;
2985
   default:
2986
      assert(0);
2987
      break;
2988
   }
2989
   if (cycle < ready)
2990
      delay = MAX2(delay, ready - cycle);
2991
}
2992
 
2993
void
2994
SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
2995
{
2996
   int ready = cycle;
2997
   int a, b;
2998
 
2999
   switch (v->reg.file) {
3000
   case FILE_GPR:
3001
      a = v->reg.data.id;
3002
      b = a + v->reg.size / 4;
3003
      for (int r = a; r < b; ++r)
3004
         ready = MAX2(ready, score->wr.r[r]);
3005
      break;
3006
   case FILE_PREDICATE:
3007
      ready = MAX2(ready, score->wr.p[v->reg.data.id]);
3008
      break;
3009
   default:
3010
      assert(v->reg.file == FILE_FLAGS);
3011
      ready = MAX2(ready, score->wr.c);
3012
      break;
3013
   }
3014
   if (cycle < ready)
3015
      delay = MAX2(delay, ready - cycle);
3016
}
3017
 
3018
void
3019
SchedDataCalculator::recordWr(const Value *v, const int ready)
3020
{
3021
   int a = v->reg.data.id;
3022
 
3023
   if (v->reg.file == FILE_GPR) {
3024
      int b = a + v->reg.size / 4;
3025
      for (int r = a; r < b; ++r)
3026
         score->rd.r[r] = ready;
3027
   } else
3028
   // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
3029
   if (v->reg.file == FILE_PREDICATE) {
3030
      score->rd.p[a] = ready + 4;
3031
   } else {
3032
      assert(v->reg.file == FILE_FLAGS);
3033
      score->rd.c = ready + 4;
3034
   }
3035
}
3036
 
3037
void
3038
SchedDataCalculator::recordRd(const Value *v, const int ready)
3039
{
3040
   int a = v->reg.data.id;
3041
 
3042
   if (v->reg.file == FILE_GPR) {
3043
      int b = a + v->reg.size / 4;
3044
      for (int r = a; r < b; ++r)
3045
         score->wr.r[r] = ready;
3046
   } else
3047
   if (v->reg.file == FILE_PREDICATE) {
3048
      score->wr.p[a] = ready;
3049
   } else
3050
   if (v->reg.file == FILE_FLAGS) {
3051
      score->wr.c = ready;
3052
   }
3053
}
3054
 
3055
bool
3056
calculateSchedDataNVC0(const Target *targ, Function *func)
3057
{
3058
   SchedDataCalculator sched(targ);
3059
   return sched.run(func, true, true);
3060
}
3061
 
3062
void
3063
CodeEmitterNVC0::prepareEmission(Function *func)
3064
{
3065
   CodeEmitter::prepareEmission(func);
3066
 
3067
   if (targ->hasSWSched)
3068
      calculateSchedDataNVC0(targ, func);
3069
}
3070
 
3071
CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
3072
   : CodeEmitter(target),
3073
     targNVC0(target),
3074
     writeIssueDelays(target->hasSWSched)
3075
{
3076
   code = NULL;
3077
   codeSize = codeSizeLimit = 0;
3078
   relocInfo = NULL;
3079
}
3080
 
3081
CodeEmitter *
3082
TargetNVC0::createCodeEmitterNVC0(Program::Type type)
3083
{
3084
   CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
3085
   emit->setProgramType(type);
3086
   return emit;
3087
}
3088
 
3089
CodeEmitter *
3090
TargetNVC0::getCodeEmitter(Program::Type type)
3091
{
3092
   if (chipset >= NVISA_GK20A_CHIPSET)
3093
      return createCodeEmitterGK110(type);
3094
   return createCodeEmitterNVC0(type);
3095
}
3096
 
3097
} // namespace nv50_ir