Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2011 Christoph Bumiller
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 */
22
 
23
#include "nv50_ir_target_nvc0.h"
24
 
25
namespace nv50_ir {
26
 
27
// Argh, all these assertions ...
28
 
29
class CodeEmitterNVC0 : public CodeEmitter
30
{
31
public:
32
   CodeEmitterNVC0(const TargetNVC0 *);
33
 
34
   virtual bool emitInstruction(Instruction *);
35
   virtual uint32_t getMinEncodingSize(const Instruction *) const;
36
   virtual void prepareEmission(Function *);
37
 
38
   inline void setProgramType(Program::Type pType) { progType = pType; }
39
 
40
private:
41
   const TargetNVC0 *targNVC0;
42
 
43
   Program::Type progType;
44
 
45
   const bool writeIssueDelays;
46
 
47
private:
48
   void emitForm_A(const Instruction *, uint64_t);
49
   void emitForm_B(const Instruction *, uint64_t);
50
   void emitForm_S(const Instruction *, uint32_t, bool pred);
51
 
52
   void emitPredicate(const Instruction *);
53
 
54
   void setAddress16(const ValueRef&);
55
   void setAddress24(const ValueRef&);
56
   void setAddressByFile(const ValueRef&);
57
   void setImmediate(const Instruction *, const int s); // needs op already set
58
   void setImmediateS8(const ValueRef&);
59
   void setSUConst16(const Instruction *, const int s);
60
   void setSUPred(const Instruction *, const int s);
61
 
62
   void emitCondCode(CondCode cc, int pos);
63
   void emitInterpMode(const Instruction *);
64
   void emitLoadStoreType(DataType ty);
65
   void emitSUGType(DataType);
66
   void emitCachingMode(CacheMode c);
67
 
68
   void emitShortSrc2(const ValueRef&);
69
 
70
   inline uint8_t getSRegEncoding(const ValueRef&);
71
 
72
   void roundMode_A(const Instruction *);
73
   void roundMode_C(const Instruction *);
74
   void roundMode_CS(const Instruction *);
75
 
76
   void emitNegAbs12(const Instruction *);
77
 
78
   void emitNOP(const Instruction *);
79
 
80
   void emitLOAD(const Instruction *);
81
   void emitSTORE(const Instruction *);
82
   void emitMOV(const Instruction *);
83
   void emitATOM(const Instruction *);
84
   void emitMEMBAR(const Instruction *);
85
   void emitCCTL(const Instruction *);
86
 
87
   void emitINTERP(const Instruction *);
88
   void emitPFETCH(const Instruction *);
89
   void emitVFETCH(const Instruction *);
90
   void emitEXPORT(const Instruction *);
91
   void emitOUT(const Instruction *);
92
 
93
   void emitUADD(const Instruction *);
94
   void emitFADD(const Instruction *);
95
   void emitUMUL(const Instruction *);
96
   void emitFMUL(const Instruction *);
97
   void emitIMAD(const Instruction *);
98
   void emitISAD(const Instruction *);
99
   void emitFMAD(const Instruction *);
100
   void emitMADSP(const Instruction *);
101
 
102
   void emitNOT(Instruction *);
103
   void emitLogicOp(const Instruction *, uint8_t subOp);
104
   void emitPOPC(const Instruction *);
105
   void emitINSBF(const Instruction *);
106
   void emitEXTBF(const Instruction *);
107
   void emitPERMT(const Instruction *);
108
   void emitShift(const Instruction *);
109
 
110
   void emitSFnOp(const Instruction *, uint8_t subOp);
111
 
112
   void emitCVT(Instruction *);
113
   void emitMINMAX(const Instruction *);
114
   void emitPreOp(const Instruction *);
115
 
116
   void emitSET(const CmpInstruction *);
117
   void emitSLCT(const CmpInstruction *);
118
   void emitSELP(const Instruction *);
119
 
120
   void emitTEXBAR(const Instruction *);
121
   void emitTEX(const TexInstruction *);
122
   void emitTEXCSAA(const TexInstruction *);
123
   void emitTXQ(const TexInstruction *);
124
 
125
   void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask);
126
 
127
   void emitFlow(const Instruction *);
128
   void emitBAR(const Instruction *);
129
 
130
   void emitSUCLAMPMode(uint16_t);
131
   void emitSUCalc(Instruction *);
132
   void emitSULDGB(const TexInstruction *);
133
   void emitSUSTGx(const TexInstruction *);
134
 
135
   void emitVSHL(const Instruction *);
136
   void emitVectorSubOp(const Instruction *);
137
 
138
   inline void defId(const ValueDef&, const int pos);
139
   inline void defId(const Instruction *, int d, const int pos);
140
   inline void srcId(const ValueRef&, const int pos);
141
   inline void srcId(const ValueRef *, const int pos);
142
   inline void srcId(const Instruction *, int s, const int pos);
143
   inline void srcAddr32(const ValueRef&, int pos, int shr);
144
 
145
   inline bool isLIMM(const ValueRef&, DataType ty);
146
};
147
 
148
// for better visibility
149
#define HEX64(h, l) 0x##h##l##ULL
150
 
151
#define SDATA(a) ((a).rep()->reg.data)
152
#define DDATA(a) ((a).rep()->reg.data)
153
 
154
void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos)
155
{
156
   code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32);
157
}
158
 
159
void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos)
160
{
161
   code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32);
162
}
163
 
164
void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos)
165
{
166
   int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63;
167
   code[pos / 32] |= r << (pos % 32);
168
}
169
 
170
void
171
CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr)
172
{
173
   const uint32_t offset = SDATA(src).offset >> shr;
174
 
175
   code[pos / 32] |= offset << (pos % 32);
176
   if (pos && (pos < 32))
177
      code[1] |= offset >> (32 - pos);
178
}
179
 
180
void CodeEmitterNVC0::defId(const ValueDef& def, const int pos)
181
{
182
   code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32);
183
}
184
 
185
void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos)
186
{
187
   int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63;
188
   code[pos / 32] |= r << (pos % 32);
189
}
190
 
191
bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty)
192
{
193
   const ImmediateValue *imm = ref.get()->asImm();
194
 
195
   return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000));
196
}
197
 
198
void
199
CodeEmitterNVC0::roundMode_A(const Instruction *insn)
200
{
201
   switch (insn->rnd) {
202
   case ROUND_M: code[1] |= 1 << 23; break;
203
   case ROUND_P: code[1] |= 2 << 23; break;
204
   case ROUND_Z: code[1] |= 3 << 23; break;
205
   default:
206
      assert(insn->rnd == ROUND_N);
207
      break;
208
   }
209
}
210
 
211
void
212
CodeEmitterNVC0::emitNegAbs12(const Instruction *i)
213
{
214
   if (i->src(1).mod.abs()) code[0] |= 1 << 6;
215
   if (i->src(0).mod.abs()) code[0] |= 1 << 7;
216
   if (i->src(1).mod.neg()) code[0] |= 1 << 8;
217
   if (i->src(0).mod.neg()) code[0] |= 1 << 9;
218
}
219
 
220
void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos)
221
{
222
   uint8_t val;
223
 
224
   switch (cc) {
225
   case CC_LT:  val = 0x1; break;
226
   case CC_LTU: val = 0x9; break;
227
   case CC_EQ:  val = 0x2; break;
228
   case CC_EQU: val = 0xa; break;
229
   case CC_LE:  val = 0x3; break;
230
   case CC_LEU: val = 0xb; break;
231
   case CC_GT:  val = 0x4; break;
232
   case CC_GTU: val = 0xc; break;
233
   case CC_NE:  val = 0x5; break;
234
   case CC_NEU: val = 0xd; break;
235
   case CC_GE:  val = 0x6; break;
236
   case CC_GEU: val = 0xe; break;
237
   case CC_TR:  val = 0xf; break;
238
   case CC_FL:  val = 0x0; break;
239
 
240
   case CC_A:  val = 0x14; break;
241
   case CC_NA: val = 0x13; break;
242
   case CC_S:  val = 0x15; break;
243
   case CC_NS: val = 0x12; break;
244
   case CC_C:  val = 0x16; break;
245
   case CC_NC: val = 0x11; break;
246
   case CC_O:  val = 0x17; break;
247
   case CC_NO: val = 0x10; break;
248
 
249
   default:
250
      val = 0;
251
      assert(!"invalid condition code");
252
      break;
253
   }
254
   code[pos / 32] |= val << (pos % 32);
255
}
256
 
257
void
258
CodeEmitterNVC0::emitPredicate(const Instruction *i)
259
{
260
   if (i->predSrc >= 0) {
261
      assert(i->getPredicate()->reg.file == FILE_PREDICATE);
262
      srcId(i->src(i->predSrc), 10);
263
      if (i->cc == CC_NOT_P)
264
         code[0] |= 0x2000; // negate
265
   } else {
266
      code[0] |= 0x1c00;
267
   }
268
}
269
 
270
void
271
CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
272
{
273
   switch (src.getFile()) {
274
   case FILE_MEMORY_GLOBAL:
275
      srcAddr32(src, 26, 0);
276
      break;
277
   case FILE_MEMORY_LOCAL:
278
   case FILE_MEMORY_SHARED:
279
      setAddress24(src);
280
      break;
281
   default:
282
      assert(src.getFile() == FILE_MEMORY_CONST);
283
      setAddress16(src);
284
      break;
285
   }
286
}
287
 
288
void
289
CodeEmitterNVC0::setAddress16(const ValueRef& src)
290
{
291
   Symbol *sym = src.get()->asSym();
292
 
293
   assert(sym);
294
 
295
   code[0] |= (sym->reg.data.offset & 0x003f) << 26;
296
   code[1] |= (sym->reg.data.offset & 0xffc0) >> 6;
297
}
298
 
299
void
300
CodeEmitterNVC0::setAddress24(const ValueRef& src)
301
{
302
   Symbol *sym = src.get()->asSym();
303
 
304
   assert(sym);
305
 
306
   code[0] |= (sym->reg.data.offset & 0x00003f) << 26;
307
   code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6;
308
}
309
 
310
void
311
CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
312
{
313
   const ImmediateValue *imm = i->src(s).get()->asImm();
314
   uint32_t u32;
315
 
316
   assert(imm);
317
   u32 = imm->reg.data.u32;
318
 
319
   if ((code[0] & 0xf) == 0x2) {
320
      // LIMM
321
      code[0] |= (u32 & 0x3f) << 26;
322
      code[1] |= u32 >> 6;
323
   } else
324
   if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) {
325
      // integer immediate
326
      assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000);
327
      assert(!(code[1] & 0xc000));
328
      u32 &= 0xfffff;
329
      code[0] |= (u32 & 0x3f) << 26;
330
      code[1] |= 0xc000 | (u32 >> 6);
331
   } else {
332
      // float immediate
333
      assert(!(u32 & 0x00000fff));
334
      assert(!(code[1] & 0xc000));
335
      code[0] |= ((u32 >> 12) & 0x3f) << 26;
336
      code[1] |= 0xc000 | (u32 >> 18);
337
   }
338
}
339
 
340
void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref)
341
{
342
   const ImmediateValue *imm = ref.get()->asImm();
343
 
344
   int8_t s8 = static_cast(imm->reg.data.s32);
345
 
346
   assert(s8 == imm->reg.data.s32);
347
 
348
   code[0] |= (s8 & 0x3f) << 26;
349
   code[0] |= (s8 >> 6) << 8;
350
}
351
 
352
void
353
CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc)
354
{
355
   code[0] = opc;
356
   code[1] = opc >> 32;
357
 
358
   emitPredicate(i);
359
 
360
   defId(i->def(0), 14);
361
 
362
   int s1 = 26;
363
   if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST)
364
      s1 = 49;
365
 
366
   for (int s = 0; s < 3 && i->srcExists(s); ++s) {
367
      switch (i->getSrc(s)->reg.file) {
368
      case FILE_MEMORY_CONST:
369
         assert(!(code[1] & 0xc000));
370
         code[1] |= (s == 2) ? 0x8000 : 0x4000;
371
         code[1] |= i->getSrc(s)->reg.fileIndex << 10;
372
         setAddress16(i->src(s));
373
         break;
374
      case FILE_IMMEDIATE:
375
         assert(s == 1 ||
376
                i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2);
377
         assert(!(code[1] & 0xc000));
378
         setImmediate(i, s);
379
         break;
380
      case FILE_GPR:
381
         if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst
382
            break;
383
         srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20);
384
         break;
385
      default:
386
         // ignore here, can be predicate or flags, but must not be address
387
         break;
388
      }
389
   }
390
}
391
 
392
void
393
CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc)
394
{
395
   code[0] = opc;
396
   code[1] = opc >> 32;
397
 
398
   emitPredicate(i);
399
 
400
   defId(i->def(0), 14);
401
 
402
   switch (i->src(0).getFile()) {
403
   case FILE_MEMORY_CONST:
404
      assert(!(code[1] & 0xc000));
405
      code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10);
406
      setAddress16(i->src(0));
407
      break;
408
   case FILE_IMMEDIATE:
409
      assert(!(code[1] & 0xc000));
410
      setImmediate(i, 0);
411
      break;
412
   case FILE_GPR:
413
      srcId(i->src(0), 26);
414
      break;
415
   default:
416
      // ignore here, can be predicate or flags, but must not be address
417
      break;
418
   }
419
}
420
 
421
void
422
CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred)
423
{
424
   code[0] = opc;
425
 
426
   int ss2a = 0;
427
   if (opc == 0x0d || opc == 0x0e)
428
      ss2a = 2;
429
 
430
   defId(i->def(0), 14);
431
   srcId(i->src(0), 20);
432
 
433
   assert(pred || (i->predSrc < 0));
434
   if (pred)
435
      emitPredicate(i);
436
 
437
   for (int s = 1; s < 3 && i->srcExists(s); ++s) {
438
      if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) {
439
         assert(!(code[0] & (0x300 >> ss2a)));
440
         switch (i->src(s).get()->reg.fileIndex) {
441
         case 0:  code[0] |= 0x100 >> ss2a; break;
442
         case 1:  code[0] |= 0x200 >> ss2a; break;
443
         case 16: code[0] |= 0x300 >> ss2a; break;
444
         default:
445
            ERROR("invalid c[] space for short form\n");
446
            break;
447
         }
448
         if (s == 1)
449
            code[0] |= i->getSrc(s)->reg.data.offset << 24;
450
         else
451
            code[0] |= i->getSrc(s)->reg.data.offset << 6;
452
      } else
453
      if (i->src(s).getFile() == FILE_IMMEDIATE) {
454
         assert(s == 1);
455
         setImmediateS8(i->src(s));
456
      } else
457
      if (i->src(s).getFile() == FILE_GPR) {
458
         srcId(i->src(s), (s == 1) ? 26 : 8);
459
      }
460
   }
461
}
462
 
463
void
464
CodeEmitterNVC0::emitShortSrc2(const ValueRef &src)
465
{
466
   if (src.getFile() == FILE_MEMORY_CONST) {
467
      switch (src.get()->reg.fileIndex) {
468
      case 0:  code[0] |= 0x100; break;
469
      case 1:  code[0] |= 0x200; break;
470
      case 16: code[0] |= 0x300; break;
471
      default:
472
         assert(!"unsupported file index for short op");
473
         break;
474
      }
475
      srcAddr32(src, 20, 2);
476
   } else {
477
      srcId(src, 20);
478
      assert(src.getFile() == FILE_GPR);
479
   }
480
}
481
 
482
void
483
CodeEmitterNVC0::emitNOP(const Instruction *i)
484
{
485
   code[0] = 0x000001e4;
486
   code[1] = 0x40000000;
487
   emitPredicate(i);
488
}
489
 
490
void
491
CodeEmitterNVC0::emitFMAD(const Instruction *i)
492
{
493
   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
494
 
495
   if (i->encSize == 8) {
496
      if (isLIMM(i->src(1), TYPE_F32)) {
497
         emitForm_A(i, HEX64(20000000, 00000002));
498
      } else {
499
         emitForm_A(i, HEX64(30000000, 00000000));
500
 
501
         if (i->src(2).mod.neg())
502
            code[0] |= 1 << 8;
503
      }
504
      roundMode_A(i);
505
 
506
      if (neg1)
507
         code[0] |= 1 << 9;
508
 
509
      if (i->saturate)
510
         code[0] |= 1 << 5;
511
      if (i->ftz)
512
         code[0] |= 1 << 6;
513
   } else {
514
      assert(!i->saturate && !i->src(2).mod.neg());
515
      emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e,
516
                 false);
517
      if (neg1)
518
         code[0] |= 1 << 4;
519
   }
520
}
521
 
522
void
523
CodeEmitterNVC0::emitFMUL(const Instruction *i)
524
{
525
   bool neg = (i->src(0).mod ^ i->src(1).mod).neg();
526
 
527
   assert(i->postFactor >= -3 && i->postFactor <= 3);
528
 
529
   if (i->encSize == 8) {
530
      if (isLIMM(i->src(1), TYPE_F32)) {
531
         assert(i->postFactor == 0); // constant folded, hopefully
532
         emitForm_A(i, HEX64(30000000, 00000002));
533
      } else {
534
         emitForm_A(i, HEX64(58000000, 00000000));
535
         roundMode_A(i);
536
         code[1] |= ((i->postFactor > 0) ?
537
                     (7 - i->postFactor) : (0 - i->postFactor)) << 17;
538
      }
539
      if (neg)
540
         code[1] ^= 1 << 25; // aliases with LIMM sign bit
541
 
542
      if (i->saturate)
543
         code[0] |= 1 << 5;
544
 
545
      if (i->dnz)
546
         code[0] |= 1 << 7;
547
      else
548
      if (i->ftz)
549
         code[0] |= 1 << 6;
550
   } else {
551
      assert(!neg && !i->saturate && !i->ftz && !i->postFactor);
552
      emitForm_S(i, 0xa8, true);
553
   }
554
}
555
 
556
void
557
CodeEmitterNVC0::emitUMUL(const Instruction *i)
558
{
559
   if (i->encSize == 8) {
560
      if (i->src(1).getFile() == FILE_IMMEDIATE) {
561
         emitForm_A(i, HEX64(10000000, 00000002));
562
      } else {
563
         emitForm_A(i, HEX64(50000000, 00000003));
564
      }
565
      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
566
         code[0] |= 1 << 6;
567
      if (i->sType == TYPE_S32)
568
         code[0] |= 1 << 5;
569
      if (i->dType == TYPE_S32)
570
         code[0] |= 1 << 7;
571
   } else {
572
      emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true);
573
 
574
      if (i->sType == TYPE_S32)
575
         code[0] |= 1 << 6;
576
   }
577
}
578
 
579
void
580
CodeEmitterNVC0::emitFADD(const Instruction *i)
581
{
582
   if (i->encSize == 8) {
583
      if (isLIMM(i->src(1), TYPE_F32)) {
584
         assert(!i->saturate);
585
         emitForm_A(i, HEX64(28000000, 00000002));
586
 
587
         code[0] |= i->src(0).mod.abs() << 7;
588
         code[0] |= i->src(0).mod.neg() << 9;
589
 
590
         if (i->src(1).mod.abs())
591
            code[1] &= 0xfdffffff;
592
         if ((i->op == OP_SUB) != static_cast(i->src(1).mod.neg()))
593
            code[1] ^= 0x02000000;
594
      } else {
595
         emitForm_A(i, HEX64(50000000, 00000000));
596
 
597
         roundMode_A(i);
598
         if (i->saturate)
599
            code[1] |= 1 << 17;
600
 
601
         emitNegAbs12(i);
602
         if (i->op == OP_SUB) code[0] ^= 1 << 8;
603
      }
604
      if (i->ftz)
605
         code[0] |= 1 << 5;
606
   } else {
607
      assert(!i->saturate && i->op != OP_SUB &&
608
             !i->src(0).mod.abs() &&
609
             !i->src(1).mod.neg() && !i->src(1).mod.abs());
610
 
611
      emitForm_S(i, 0x49, true);
612
 
613
      if (i->src(0).mod.neg())
614
         code[0] |= 1 << 7;
615
   }
616
}
617
 
618
void
619
CodeEmitterNVC0::emitUADD(const Instruction *i)
620
{
621
   uint32_t addOp = 0;
622
 
623
   assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
624
   assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
625
 
626
   if (i->src(0).mod.neg())
627
      addOp |= 0x200;
628
   if (i->src(1).mod.neg())
629
      addOp |= 0x100;
630
   if (i->op == OP_SUB) {
631
      addOp ^= 0x100;
632
      assert(addOp != 0x300); // would be add-plus-one
633
   }
634
 
635
   if (i->encSize == 8) {
636
      if (isLIMM(i->src(1), TYPE_U32)) {
637
         emitForm_A(i, HEX64(08000000, 00000002));
638
         if (i->defExists(1))
639
            code[1] |= 1 << 26; // write carry
640
      } else {
641
         emitForm_A(i, HEX64(48000000, 00000003));
642
         if (i->defExists(1))
643
            code[1] |= 1 << 16; // write carry
644
      }
645
      code[0] |= addOp;
646
 
647
      if (i->saturate)
648
         code[0] |= 1 << 5;
649
      if (i->flagsSrc >= 0) // add carry
650
         code[0] |= 1 << 6;
651
   } else {
652
      assert(!(addOp & 0x100));
653
      emitForm_S(i, (addOp >> 3) |
654
                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true);
655
   }
656
}
657
 
658
// TODO: shl-add
659
void
660
CodeEmitterNVC0::emitIMAD(const Instruction *i)
661
{
662
   assert(i->encSize == 8);
663
   emitForm_A(i, HEX64(20000000, 00000003));
664
 
665
   if (isSignedType(i->dType))
666
      code[0] |= 1 << 7;
667
   if (isSignedType(i->sType))
668
      code[0] |= 1 << 5;
669
 
670
   code[1] |= i->saturate << 24;
671
 
672
   if (i->flagsDef >= 0) code[1] |= 1 << 16;
673
   if (i->flagsSrc >= 0) code[1] |= 1 << 23;
674
 
675
   if (i->src(2).mod.neg()) code[0] |= 0x10;
676
   if (i->src(1).mod.neg() ^
677
       i->src(0).mod.neg()) code[0] |= 0x20;
678
 
679
   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
680
      code[0] |= 1 << 6;
681
}
682
 
683
void
684
CodeEmitterNVC0::emitMADSP(const Instruction *i)
685
{
686
   assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
687
 
688
   emitForm_A(i, HEX64(00000000, 00000003));
689
 
690
   if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
691
      code[1] |= 0x01800000;
692
   } else {
693
      code[0] |= (i->subOp & 0x00f) << 7;
694
      code[0] |= (i->subOp & 0x0f0) << 1;
695
      code[0] |= (i->subOp & 0x100) >> 3;
696
      code[0] |= (i->subOp & 0x200) >> 2;
697
      code[1] |= (i->subOp & 0xc00) << 13;
698
   }
699
 
700
   if (i->flagsDef >= 0)
701
      code[1] |= 1 << 16;
702
}
703
 
704
void
705
CodeEmitterNVC0::emitISAD(const Instruction *i)
706
{
707
   assert(i->dType == TYPE_S32 || i->dType == TYPE_U32);
708
   assert(i->encSize == 8);
709
 
710
   emitForm_A(i, HEX64(38000000, 00000003));
711
 
712
   if (i->dType == TYPE_S32)
713
      code[0] |= 1 << 5;
714
}
715
 
716
void
717
CodeEmitterNVC0::emitNOT(Instruction *i)
718
{
719
   assert(i->encSize == 8);
720
   i->setSrc(1, i->src(0));
721
   emitForm_A(i, HEX64(68000000, 000001c3));
722
}
723
 
724
void
725
CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
726
{
727
   if (i->def(0).getFile() == FILE_PREDICATE) {
728
      code[0] = 0x00000004 | (subOp << 30);
729
      code[1] = 0x0c000000;
730
 
731
      emitPredicate(i);
732
 
733
      defId(i->def(0), 17);
734
      srcId(i->src(0), 20);
735
      if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23;
736
      srcId(i->src(1), 26);
737
      if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29;
738
 
739
      if (i->defExists(1)) {
740
         defId(i->def(1), 14);
741
      } else {
742
         code[0] |= 7 << 14;
743
      }
744
      // (a OP b) OP c
745
      if (i->predSrc != 2 && i->srcExists(2)) {
746
         code[1] |= subOp << 21;
747
         srcId(i->src(2), 17);
748
         if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
749
      } else {
750
         code[1] |= 0x000e0000;
751
      }
752
   } else
753
   if (i->encSize == 8) {
754
      if (isLIMM(i->src(1), TYPE_U32)) {
755
         emitForm_A(i, HEX64(38000000, 00000002));
756
 
757
         if (i->flagsDef >= 0)
758
            code[1] |= 1 << 26;
759
      } else {
760
         emitForm_A(i, HEX64(68000000, 00000003));
761
 
762
         if (i->flagsDef >= 0)
763
            code[1] |= 1 << 16;
764
      }
765
      code[0] |= subOp << 6;
766
 
767
      if (i->flagsSrc >= 0) // carry
768
         code[0] |= 1 << 5;
769
 
770
      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
771
      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
772
   } else {
773
      emitForm_S(i, (subOp << 5) |
774
                 ((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true);
775
   }
776
}
777
 
778
void
779
CodeEmitterNVC0::emitPOPC(const Instruction *i)
780
{
781
   emitForm_A(i, HEX64(54000000, 00000004));
782
 
783
   if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9;
784
   if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8;
785
}
786
 
787
void
788
CodeEmitterNVC0::emitINSBF(const Instruction *i)
789
{
790
   emitForm_A(i, HEX64(28000000, 00000003));
791
}
792
 
793
void
794
CodeEmitterNVC0::emitEXTBF(const Instruction *i)
795
{
796
   emitForm_A(i, HEX64(70000000, 00000003));
797
 
798
   if (i->dType == TYPE_S32)
799
      code[0] |= 1 << 5;
800
   if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
801
      code[0] |= 1 << 8;
802
}
803
 
804
void
805
CodeEmitterNVC0::emitPERMT(const Instruction *i)
806
{
807
   emitForm_A(i, HEX64(24000000, 00000004));
808
 
809
   code[0] |= i->subOp << 5;
810
}
811
 
812
void
813
CodeEmitterNVC0::emitShift(const Instruction *i)
814
{
815
   if (i->op == OP_SHR) {
816
      emitForm_A(i, HEX64(58000000, 00000003)
817
                 | (isSignedType(i->dType) ? 0x20 : 0x00));
818
   } else {
819
      emitForm_A(i, HEX64(60000000, 00000003));
820
   }
821
 
822
   if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP)
823
      code[0] |= 1 << 9;
824
}
825
 
826
void
827
CodeEmitterNVC0::emitPreOp(const Instruction *i)
828
{
829
   if (i->encSize == 8) {
830
      emitForm_B(i, HEX64(60000000, 00000000));
831
 
832
      if (i->op == OP_PREEX2)
833
         code[0] |= 0x20;
834
 
835
      if (i->src(0).mod.abs()) code[0] |= 1 << 6;
836
      if (i->src(0).mod.neg()) code[0] |= 1 << 8;
837
   } else {
838
      emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true);
839
   }
840
}
841
 
842
void
843
CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp)
844
{
845
   if (i->encSize == 8) {
846
      code[0] = 0x00000000 | (subOp << 26);
847
      code[1] = 0xc8000000;
848
 
849
      emitPredicate(i);
850
 
851
      defId(i->def(0), 14);
852
      srcId(i->src(0), 20);
853
 
854
      assert(i->src(0).getFile() == FILE_GPR);
855
 
856
      if (i->saturate) code[0] |= 1 << 5;
857
 
858
      if (i->src(0).mod.abs()) code[0] |= 1 << 7;
859
      if (i->src(0).mod.neg()) code[0] |= 1 << 9;
860
   } else {
861
      emitForm_S(i, 0x80000008 | (subOp << 26), true);
862
 
863
      assert(!i->src(0).mod.neg());
864
      if (i->src(0).mod.abs()) code[0] |= 1 << 30;
865
   }
866
}
867
 
868
void
869
CodeEmitterNVC0::emitMINMAX(const Instruction *i)
870
{
871
   uint64_t op;
872
 
873
   assert(i->encSize == 8);
874
 
875
   op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL;
876
 
877
   if (i->ftz)
878
      op |= 1 << 5;
879
   else
880
   if (!isFloatType(i->dType))
881
      op |= isSignedType(i->dType) ? 0x23 : 0x03;
882
 
883
   emitForm_A(i, op);
884
   emitNegAbs12(i);
885
}
886
 
887
void
888
CodeEmitterNVC0::roundMode_C(const Instruction *i)
889
{
890
   switch (i->rnd) {
891
   case ROUND_M:  code[1] |= 1 << 17; break;
892
   case ROUND_P:  code[1] |= 2 << 17; break;
893
   case ROUND_Z:  code[1] |= 3 << 17; break;
894
   case ROUND_NI: code[0] |= 1 << 7; break;
895
   case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break;
896
   case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break;
897
   case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break;
898
   case ROUND_N: break;
899
   default:
900
      assert(!"invalid round mode");
901
      break;
902
   }
903
}
904
 
905
void
906
CodeEmitterNVC0::roundMode_CS(const Instruction *i)
907
{
908
   switch (i->rnd) {
909
   case ROUND_M:
910
   case ROUND_MI: code[0] |= 1 << 16; break;
911
   case ROUND_P:
912
   case ROUND_PI: code[0] |= 2 << 16; break;
913
   case ROUND_Z:
914
   case ROUND_ZI: code[0] |= 3 << 16; break;
915
   default:
916
      break;
917
   }
918
}
919
 
920
void
921
CodeEmitterNVC0::emitCVT(Instruction *i)
922
{
923
   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
924
 
925
   switch (i->op) {
926
   case OP_CEIL:  i->rnd = f2f ? ROUND_PI : ROUND_P; break;
927
   case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break;
928
   case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break;
929
   default:
930
      break;
931
   }
932
 
933
   const bool sat = (i->op == OP_SAT) || i->saturate;
934
   const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs();
935
   const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg();
936
 
937
   if (i->encSize == 8) {
938
      emitForm_B(i, HEX64(10000000, 00000004));
939
 
940
      roundMode_C(i);
941
 
942
      // cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size()
943
      code[0] |= util_logbase2(typeSizeof(i->dType)) << 20;
944
      code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
945
 
946
      if (sat)
947
         code[0] |= 0x20;
948
      if (abs)
949
         code[0] |= 1 << 6;
950
      if (neg && i->op != OP_ABS)
951
         code[0] |= 1 << 8;
952
 
953
      if (i->ftz)
954
         code[1] |= 1 << 23;
955
 
956
      if (isSignedIntType(i->dType))
957
         code[0] |= 0x080;
958
      if (isSignedIntType(i->sType))
959
         code[0] |= 0x200;
960
 
961
      if (isFloatType(i->dType)) {
962
         if (!isFloatType(i->sType))
963
            code[1] |= 0x08000000;
964
      } else {
965
         if (isFloatType(i->sType))
966
            code[1] |= 0x04000000;
967
         else
968
            code[1] |= 0x0c000000;
969
      }
970
   } else {
971
      if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) {
972
         code[0] = 0x298;
973
      } else
974
      if (isFloatType(i->dType)) {
975
         if (isFloatType(i->sType))
976
            code[0] = 0x098;
977
         else
978
            code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0);
979
      } else {
980
         assert(isFloatType(i->sType));
981
 
982
         code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0);
983
      }
984
 
985
      if (neg) code[0] |= 1 << 16;
986
      if (sat) code[0] |= 1 << 18;
987
      if (abs) code[0] |= 1 << 19;
988
 
989
      roundMode_CS(i);
990
   }
991
}
992
 
993
void
994
CodeEmitterNVC0::emitSET(const CmpInstruction *i)
995
{
996
   uint32_t hi;
997
   uint32_t lo = 0;
998
 
999
   if (i->sType == TYPE_F64)
1000
      lo = 0x1;
1001
   else
1002
   if (!isFloatType(i->sType))
1003
      lo = 0x3;
1004
 
1005
   if (isFloatType(i->dType) || isSignedIntType(i->sType))
1006
      lo |= 0x20;
1007
 
1008
   switch (i->op) {
1009
   case OP_SET_AND: hi = 0x10000000; break;
1010
   case OP_SET_OR:  hi = 0x10200000; break;
1011
   case OP_SET_XOR: hi = 0x10400000; break;
1012
   default:
1013
      hi = 0x100e0000;
1014
      break;
1015
   }
1016
   emitForm_A(i, (static_cast(hi) << 32) | lo);
1017
 
1018
   if (i->op != OP_SET)
1019
      srcId(i->src(2), 32 + 17);
1020
 
1021
   if (i->def(0).getFile() == FILE_PREDICATE) {
1022
      if (i->sType == TYPE_F32)
1023
         code[1] += 0x10000000;
1024
      else
1025
         code[1] += 0x08000000;
1026
 
1027
      code[0] &= ~0xfc000;
1028
      defId(i->def(0), 17);
1029
      if (i->defExists(1))
1030
         defId(i->def(1), 14);
1031
      else
1032
         code[0] |= 0x1c000;
1033
   }
1034
 
1035
   if (i->ftz)
1036
      code[1] |= 1 << 27;
1037
 
1038
   emitCondCode(i->setCond, 32 + 23);
1039
   emitNegAbs12(i);
1040
}
1041
 
1042
void
1043
CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
1044
{
1045
   uint64_t op;
1046
 
1047
   switch (i->dType) {
1048
   case TYPE_S32:
1049
      op = HEX64(30000000, 00000023);
1050
      break;
1051
   case TYPE_U32:
1052
      op = HEX64(30000000, 00000003);
1053
      break;
1054
   case TYPE_F32:
1055
      op = HEX64(38000000, 00000000);
1056
      break;
1057
   default:
1058
      assert(!"invalid type for SLCT");
1059
      op = 0;
1060
      break;
1061
   }
1062
   emitForm_A(i, op);
1063
 
1064
   CondCode cc = i->setCond;
1065
 
1066
   if (i->src(2).mod.neg())
1067
      cc = reverseCondCode(cc);
1068
 
1069
   emitCondCode(cc, 32 + 23);
1070
 
1071
   if (i->ftz)
1072
      code[0] |= 1 << 5;
1073
}
1074
 
1075
void CodeEmitterNVC0::emitSELP(const Instruction *i)
1076
{
1077
   emitForm_A(i, HEX64(20000000, 00000004));
1078
 
1079
   if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
1080
      code[1] |= 1 << 20;
1081
}
1082
 
1083
void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
1084
{
1085
   code[0] = 0x00000006 | (i->subOp << 26);
1086
   code[1] = 0xf0000000;
1087
   emitPredicate(i);
1088
   emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5);
1089
}
1090
 
1091
void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i)
1092
{
1093
   code[0] = 0x00000086;
1094
   code[1] = 0xd0000000;
1095
 
1096
   code[1] |= i->tex.r;
1097
   code[1] |= i->tex.s << 8;
1098
 
1099
   if (i->tex.liveOnly)
1100
      code[0] |= 1 << 9;
1101
 
1102
   defId(i->def(0), 14);
1103
   srcId(i->src(0), 20);
1104
}
1105
 
1106
static inline bool
1107
isNextIndependentTex(const TexInstruction *i)
1108
{
1109
   if (!i->next || !isTextureOp(i->next->op))
1110
      return false;
1111
   if (i->getDef(0)->interfers(i->next->getSrc(0)))
1112
      return false;
1113
   return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1));
1114
}
1115
 
1116
void
1117
CodeEmitterNVC0::emitTEX(const TexInstruction *i)
1118
{
1119
   code[0] = 0x00000006;
1120
 
1121
   if (isNextIndependentTex(i))
1122
      code[0] |= 0x080; // t mode
1123
   else
1124
      code[0] |= 0x100; // p mode
1125
 
1126
   if (i->tex.liveOnly)
1127
      code[0] |= 1 << 9;
1128
 
1129
   switch (i->op) {
1130
   case OP_TEX: code[1] = 0x80000000; break;
1131
   case OP_TXB: code[1] = 0x84000000; break;
1132
   case OP_TXL: code[1] = 0x86000000; break;
1133
   case OP_TXF: code[1] = 0x90000000; break;
1134
   case OP_TXG: code[1] = 0xa0000000; break;
1135
   case OP_TXD: code[1] = 0xe0000000; break;
1136
   default:
1137
      assert(!"invalid texture op");
1138
      break;
1139
   }
1140
   if (i->op == OP_TXF) {
1141
      if (!i->tex.levelZero)
1142
         code[1] |= 0x02000000;
1143
   } else
1144
   if (i->tex.levelZero) {
1145
      code[1] |= 0x02000000;
1146
   }
1147
 
1148
   if (i->op != OP_TXD && i->tex.derivAll)
1149
      code[1] |= 1 << 13;
1150
 
1151
   defId(i->def(0), 14);
1152
   srcId(i->src(0), 20);
1153
 
1154
   emitPredicate(i);
1155
 
1156
   if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5;
1157
 
1158
   code[1] |= i->tex.mask << 14;
1159
 
1160
   code[1] |= i->tex.r;
1161
   code[1] |= i->tex.s << 8;
1162
   if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0)
1163
      code[1] |= 1 << 18; // in 1st source (with array index)
1164
 
1165
   // texture target:
1166
   code[1] |= (i->tex.target.getDim() - 1) << 20;
1167
   if (i->tex.target.isCube())
1168
      code[1] += 2 << 20;
1169
   if (i->tex.target.isArray())
1170
      code[1] |= 1 << 19;
1171
   if (i->tex.target.isShadow())
1172
      code[1] |= 1 << 24;
1173
 
1174
   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1175
 
1176
   if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) {
1177
      // lzero
1178
      if (i->op == OP_TXL)
1179
         code[1] &= ~(1 << 26);
1180
      else
1181
      if (i->op == OP_TXF)
1182
         code[1] &= ~(1 << 25);
1183
   }
1184
   if (i->tex.target == TEX_TARGET_2D_MS ||
1185
       i->tex.target == TEX_TARGET_2D_MS_ARRAY)
1186
      code[1] |= 1 << 23;
1187
 
1188
   if (i->tex.useOffsets) // in vecSrc0.w
1189
      code[1] |= 1 << 22;
1190
 
1191
   srcId(i, src1, 26);
1192
}
1193
 
1194
void
1195
CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
1196
{
1197
   code[0] = 0x00000086;
1198
   code[1] = 0xc0000000;
1199
 
1200
   switch (i->tex.query) {
1201
   case TXQ_DIMS:            code[1] |= 0 << 22; break;
1202
   case TXQ_TYPE:            code[1] |= 1 << 22; break;
1203
   case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break;
1204
   case TXQ_FILTER:          code[1] |= 3 << 22; break;
1205
   case TXQ_LOD:             code[1] |= 4 << 22; break;
1206
   case TXQ_BORDER_COLOUR:   code[1] |= 5 << 22; break;
1207
   default:
1208
      assert(!"invalid texture query");
1209
      break;
1210
   }
1211
 
1212
   code[1] |= i->tex.mask << 14;
1213
 
1214
   code[1] |= i->tex.r;
1215
   code[1] |= i->tex.s << 8;
1216
   if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0)
1217
      code[1] |= 1 << 18;
1218
 
1219
   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
1220
 
1221
   defId(i->def(0), 14);
1222
   srcId(i->src(0), 20);
1223
   srcId(i, src1, 26);
1224
 
1225
   emitPredicate(i);
1226
}
1227
 
1228
void
1229
CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
1230
{
1231
   code[0] = 0x00000000 | (laneMask << 6);
1232
   code[1] = 0x48000000 | qOp;
1233
 
1234
   defId(i->def(0), 14);
1235
   srcId(i->src(0), 20);
1236
   srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26);
1237
 
1238
   if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
1239
      code[0] |= 1 << 9; // dall
1240
 
1241
   emitPredicate(i);
1242
}
1243
 
1244
void
1245
CodeEmitterNVC0::emitFlow(const Instruction *i)
1246
{
1247
   const FlowInstruction *f = i->asFlow();
1248
 
1249
   unsigned mask; // bit 0: predicate, bit 1: target
1250
 
1251
   code[0] = 0x00000007;
1252
 
1253
   switch (i->op) {
1254
   case OP_BRA:
1255
      code[1] = f->absolute ? 0x00000000 : 0x40000000;
1256
      if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST)
1257
         code[0] |= 0x4000;
1258
      mask = 3;
1259
      break;
1260
   case OP_CALL:
1261
      code[1] = f->absolute ? 0x10000000 : 0x50000000;
1262
      if (f->indirect)
1263
         code[0] |= 0x4000; // indirect calls always use c[] source
1264
      mask = 2;
1265
      break;
1266
 
1267
   case OP_EXIT:    code[1] = 0x80000000; mask = 1; break;
1268
   case OP_RET:     code[1] = 0x90000000; mask = 1; break;
1269
   case OP_DISCARD: code[1] = 0x98000000; mask = 1; break;
1270
   case OP_BREAK:   code[1] = 0xa8000000; mask = 1; break;
1271
   case OP_CONT:    code[1] = 0xb0000000; mask = 1; break;
1272
 
1273
   case OP_JOINAT:   code[1] = 0x60000000; mask = 2; break;
1274
   case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break;
1275
   case OP_PRECONT:  code[1] = 0x70000000; mask = 2; break;
1276
   case OP_PRERET:   code[1] = 0x78000000; mask = 2; break;
1277
 
1278
   case OP_QUADON:  code[1] = 0xc0000000; mask = 0; break;
1279
   case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break;
1280
   case OP_BRKPT:   code[1] = 0xd0000000; mask = 0; break;
1281
   default:
1282
      assert(!"invalid flow operation");
1283
      return;
1284
   }
1285
 
1286
   if (mask & 1) {
1287
      emitPredicate(i);
1288
      if (i->flagsSrc < 0)
1289
         code[0] |= 0x1e0;
1290
   }
1291
 
1292
   if (!f)
1293
      return;
1294
 
1295
   if (f->allWarp)
1296
      code[0] |= 1 << 15;
1297
   if (f->limit)
1298
      code[0] |= 1 << 16;
1299
 
1300
   if (f->indirect) {
1301
      if (code[0] & 0x4000) {
1302
         assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST);
1303
         setAddress16(i->src(0));
1304
         code[1] |= i->getSrc(0)->reg.fileIndex << 10;
1305
         if (f->op == OP_BRA)
1306
            srcId(f->src(0).getIndirect(0), 20);
1307
      } else {
1308
         srcId(f, 0, 20);
1309
      }
1310
   }
1311
 
1312
   if (f->op == OP_CALL) {
1313
      if (f->indirect) {
1314
         // nothing
1315
      } else
1316
      if (f->builtin) {
1317
         assert(f->absolute);
1318
         uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin);
1319
         addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26);
1320
         addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6);
1321
      } else {
1322
         assert(!f->absolute);
1323
         int32_t pcRel = f->target.fn->binPos - (codeSize + 8);
1324
         code[0] |= (pcRel & 0x3f) << 26;
1325
         code[1] |= (pcRel >> 6) & 0x3ffff;
1326
      }
1327
   } else
1328
   if (mask & 2) {
1329
      int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
1330
      // currently we don't want absolute branches
1331
      assert(!f->absolute);
1332
      code[0] |= (pcRel & 0x3f) << 26;
1333
      code[1] |= (pcRel >> 6) & 0x3ffff;
1334
   }
1335
}
1336
 
1337
void
1338
CodeEmitterNVC0::emitBAR(const Instruction *i)
1339
{
1340
   Value *rDef = NULL, *pDef = NULL;
1341
 
1342
   switch (i->subOp) {
1343
   case NV50_IR_SUBOP_BAR_ARRIVE:   code[0] = 0x84; break;
1344
   case NV50_IR_SUBOP_BAR_RED_AND:  code[0] = 0x24; break;
1345
   case NV50_IR_SUBOP_BAR_RED_OR:   code[0] = 0x44; break;
1346
   case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break;
1347
   default:
1348
      code[0] = 0x04;
1349
      assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC);
1350
      break;
1351
   }
1352
   code[1] = 0x50000000;
1353
 
1354
   code[0] |= 63 << 14;
1355
   code[1] |= 7 << 21;
1356
 
1357
   emitPredicate(i);
1358
 
1359
   // barrier id
1360
   if (i->src(0).getFile() == FILE_GPR) {
1361
      srcId(i->src(0), 20);
1362
   } else {
1363
      ImmediateValue *imm = i->getSrc(0)->asImm();
1364
      assert(imm);
1365
      code[0] |= imm->reg.data.u32 << 20;
1366
   }
1367
 
1368
   // thread count
1369
   if (i->src(1).getFile() == FILE_GPR) {
1370
      srcId(i->src(1), 26);
1371
   } else {
1372
      ImmediateValue *imm = i->getSrc(1)->asImm();
1373
      assert(imm);
1374
      code[0] |= imm->reg.data.u32 << 26;
1375
      code[1] |= imm->reg.data.u32 >> 6;
1376
   }
1377
 
1378
   if (i->srcExists(2) && (i->predSrc != 2)) {
1379
      srcId(i->src(2), 32 + 17);
1380
      if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT))
1381
         code[1] |= 1 << 20;
1382
   } else {
1383
      code[1] |= 7 << 17;
1384
   }
1385
 
1386
   if (i->defExists(0)) {
1387
      if (i->def(0).getFile() == FILE_GPR)
1388
         rDef = i->getDef(0);
1389
      else
1390
         pDef = i->getDef(0);
1391
 
1392
      if (i->defExists(1)) {
1393
         if (i->def(1).getFile() == FILE_GPR)
1394
            rDef = i->getDef(1);
1395
         else
1396
            pDef = i->getDef(1);
1397
      }
1398
   }
1399
   if (rDef) {
1400
      code[0] &= ~(63 << 14);
1401
      defId(rDef, 14);
1402
   }
1403
   if (pDef) {
1404
      code[1] &= ~(7 << 21);
1405
      defId(pDef, 32 + 21);
1406
   }
1407
}
1408
 
1409
void
1410
CodeEmitterNVC0::emitPFETCH(const Instruction *i)
1411
{
1412
   uint32_t prim = i->src(0).get()->reg.data.u32;
1413
 
1414
   code[0] = 0x00000006 | ((prim & 0x3f) << 26);
1415
   code[1] = 0x00000000 | (prim >> 6);
1416
 
1417
   emitPredicate(i);
1418
 
1419
   defId(i->def(0), 14);
1420
   srcId(i->src(1), 20);
1421
}
1422
 
1423
void
1424
CodeEmitterNVC0::emitVFETCH(const Instruction *i)
1425
{
1426
   code[0] = 0x00000006;
1427
   code[1] = 0x06000000 | i->src(0).get()->reg.data.offset;
1428
 
1429
   if (i->perPatch)
1430
      code[0] |= 0x100;
1431
   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
1432
      code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads
1433
 
1434
   emitPredicate(i);
1435
 
1436
   code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5;
1437
 
1438
   defId(i->def(0), 14);
1439
   srcId(i->src(0).getIndirect(0), 20);
1440
   srcId(i->src(0).getIndirect(1), 26); // vertex address
1441
}
1442
 
1443
void
1444
CodeEmitterNVC0::emitEXPORT(const Instruction *i)
1445
{
1446
   unsigned int size = typeSizeof(i->dType);
1447
 
1448
   code[0] = 0x00000006 | ((size / 4 - 1) << 5);
1449
   code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset;
1450
 
1451
   assert(!(code[1] & ((size == 12) ? 15 : (size - 1))));
1452
 
1453
   if (i->perPatch)
1454
      code[0] |= 0x100;
1455
 
1456
   emitPredicate(i);
1457
 
1458
   assert(i->src(1).getFile() == FILE_GPR);
1459
 
1460
   srcId(i->src(0).getIndirect(0), 20);
1461
   srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address
1462
   srcId(i->src(1), 26);
1463
}
1464
 
1465
void
1466
CodeEmitterNVC0::emitOUT(const Instruction *i)
1467
{
1468
   code[0] = 0x00000006;
1469
   code[1] = 0x1c000000;
1470
 
1471
   emitPredicate(i);
1472
 
1473
   defId(i->def(0), 14); // new secret address
1474
   srcId(i->src(0), 20); // old secret address, should be 0 initially
1475
 
1476
   assert(i->src(0).getFile() == FILE_GPR);
1477
 
1478
   if (i->op == OP_EMIT)
1479
      code[0] |= 1 << 5;
1480
   if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART)
1481
      code[0] |= 1 << 6;
1482
 
1483
   // vertex stream
1484
   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1485
      code[1] |= 0xc000;
1486
      code[0] |= SDATA(i->src(1)).u32 << 26;
1487
   } else {
1488
      srcId(i->src(1), 26);
1489
   }
1490
}
1491
 
1492
void
1493
CodeEmitterNVC0::emitInterpMode(const Instruction *i)
1494
{
1495
   if (i->encSize == 8) {
1496
      code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID
1497
   } else {
1498
      if (i->getInterpMode() == NV50_IR_INTERP_SC)
1499
         code[0] |= 0x80;
1500
      assert(i->op == OP_PINTERP && i->getSampleMode() == 0);
1501
   }
1502
}
1503
 
1504
void
1505
CodeEmitterNVC0::emitINTERP(const Instruction *i)
1506
{
1507
   const uint32_t base = i->getSrc(0)->reg.data.offset;
1508
 
1509
   if (i->encSize == 8) {
1510
      code[0] = 0x00000000;
1511
      code[1] = 0xc0000000 | (base & 0xffff);
1512
 
1513
      if (i->saturate)
1514
         code[0] |= 1 << 5;
1515
 
1516
      if (i->op == OP_PINTERP)
1517
         srcId(i->src(1), 26);
1518
      else
1519
         code[0] |= 0x3f << 26;
1520
 
1521
      srcId(i->src(0).getIndirect(0), 20);
1522
   } else {
1523
      assert(i->op == OP_PINTERP);
1524
      code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26);
1525
      srcId(i->src(1), 20);
1526
   }
1527
   emitInterpMode(i);
1528
 
1529
   emitPredicate(i);
1530
   defId(i->def(0), 14);
1531
 
1532
   if (i->getSampleMode() == NV50_IR_INTERP_OFFSET)
1533
      srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 17);
1534
   else
1535
      code[1] |= 0x3f << 17;
1536
}
1537
 
1538
void
1539
CodeEmitterNVC0::emitLoadStoreType(DataType ty)
1540
{
1541
   uint8_t val;
1542
 
1543
   switch (ty) {
1544
   case TYPE_U8:
1545
      val = 0x00;
1546
      break;
1547
   case TYPE_S8:
1548
      val = 0x20;
1549
      break;
1550
   case TYPE_F16:
1551
   case TYPE_U16:
1552
      val = 0x40;
1553
      break;
1554
   case TYPE_S16:
1555
      val = 0x60;
1556
      break;
1557
   case TYPE_F32:
1558
   case TYPE_U32:
1559
   case TYPE_S32:
1560
      val = 0x80;
1561
      break;
1562
   case TYPE_F64:
1563
   case TYPE_U64:
1564
   case TYPE_S64:
1565
      val = 0xa0;
1566
      break;
1567
   case TYPE_B128:
1568
      val = 0xc0;
1569
      break;
1570
   default:
1571
      val = 0x80;
1572
      assert(!"invalid type");
1573
      break;
1574
   }
1575
   code[0] |= val;
1576
}
1577
 
1578
void
1579
CodeEmitterNVC0::emitCachingMode(CacheMode c)
1580
{
1581
   uint32_t val;
1582
 
1583
   switch (c) {
1584
   case CACHE_CA:
1585
// case CACHE_WB:
1586
      val = 0x000;
1587
      break;
1588
   case CACHE_CG:
1589
      val = 0x100;
1590
      break;
1591
   case CACHE_CS:
1592
      val = 0x200;
1593
      break;
1594
   case CACHE_CV:
1595
// case CACHE_WT:
1596
      val = 0x300;
1597
      break;
1598
   default:
1599
      val = 0;
1600
      assert(!"invalid caching mode");
1601
      break;
1602
   }
1603
   code[0] |= val;
1604
}
1605
 
1606
static inline bool
1607
uses64bitAddress(const Instruction *ldst)
1608
{
1609
   return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL &&
1610
      ldst->src(0).isIndirect(0) &&
1611
      ldst->getIndirect(0, 0)->reg.size == 8;
1612
}
1613
 
1614
void
1615
CodeEmitterNVC0::emitSTORE(const Instruction *i)
1616
{
1617
   uint32_t opc;
1618
 
1619
   switch (i->src(0).getFile()) {
1620
   case FILE_MEMORY_GLOBAL: opc = 0x90000000; break;
1621
   case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
1622
   case FILE_MEMORY_SHARED: opc = 0xc9000000; break;
1623
   default:
1624
      assert(!"invalid memory file");
1625
      opc = 0;
1626
      break;
1627
   }
1628
   code[0] = 0x00000005;
1629
   code[1] = opc;
1630
 
1631
   setAddressByFile(i->src(0));
1632
   srcId(i->src(1), 14);
1633
   srcId(i->src(0).getIndirect(0), 20);
1634
   if (uses64bitAddress(i))
1635
      code[1] |= 1 << 26;
1636
 
1637
   emitPredicate(i);
1638
 
1639
   emitLoadStoreType(i->dType);
1640
   emitCachingMode(i->cache);
1641
}
1642
 
1643
void
1644
CodeEmitterNVC0::emitLOAD(const Instruction *i)
1645
{
1646
   uint32_t opc;
1647
 
1648
   code[0] = 0x00000005;
1649
 
1650
   switch (i->src(0).getFile()) {
1651
   case FILE_MEMORY_GLOBAL: opc = 0x80000000; break;
1652
   case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
1653
   case FILE_MEMORY_SHARED: opc = 0xc1000000; break;
1654
   case FILE_MEMORY_CONST:
1655
      if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) {
1656
         emitMOV(i); // not sure if this is any better
1657
         return;
1658
      }
1659
      opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10);
1660
      code[0] = 0x00000006 | (i->subOp << 8);
1661
      break;
1662
   default:
1663
      assert(!"invalid memory file");
1664
      opc = 0;
1665
      break;
1666
   }
1667
   code[1] = opc;
1668
 
1669
   defId(i->def(0), 14);
1670
 
1671
   setAddressByFile(i->src(0));
1672
   srcId(i->src(0).getIndirect(0), 20);
1673
   if (uses64bitAddress(i))
1674
      code[1] |= 1 << 26;
1675
 
1676
   emitPredicate(i);
1677
 
1678
   emitLoadStoreType(i->dType);
1679
   emitCachingMode(i->cache);
1680
}
1681
 
1682
uint8_t
1683
CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
1684
{
1685
   switch (SDATA(ref).sv.sv) {
1686
   case SV_LANEID:        return 0x00;
1687
   case SV_PHYSID:        return 0x03;
1688
   case SV_VERTEX_COUNT:  return 0x10;
1689
   case SV_INVOCATION_ID: return 0x11;
1690
   case SV_YDIR:          return 0x12;
1691
   case SV_TID:           return 0x21 + SDATA(ref).sv.index;
1692
   case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
1693
   case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
1694
   case SV_GRIDID:        return 0x2c;
1695
   case SV_NCTAID:        return 0x2d + SDATA(ref).sv.index;
1696
   case SV_LBASE:         return 0x34;
1697
   case SV_SBASE:         return 0x30;
1698
   case SV_CLOCK:         return 0x50 + SDATA(ref).sv.index;
1699
   default:
1700
      assert(!"no sreg for system value");
1701
      return 0;
1702
   }
1703
}
1704
 
1705
void
1706
CodeEmitterNVC0::emitMOV(const Instruction *i)
1707
{
1708
   if (i->def(0).getFile() == FILE_PREDICATE) {
1709
      if (i->src(0).getFile() == FILE_GPR) {
1710
         code[0] = 0xfc01c003;
1711
         code[1] = 0x1a8e0000;
1712
         srcId(i->src(0), 20);
1713
      } else {
1714
         code[0] = 0x0001c004;
1715
         code[1] = 0x0c0e0000;
1716
         if (i->src(0).getFile() == FILE_IMMEDIATE) {
1717
            code[0] |= 7 << 20;
1718
            if (!i->getSrc(0)->reg.data.u32)
1719
               code[0] |= 1 << 23;
1720
         } else {
1721
            srcId(i->src(0), 20);
1722
         }
1723
      }
1724
      defId(i->def(0), 17);
1725
      emitPredicate(i);
1726
   } else
1727
   if (i->src(0).getFile() == FILE_SYSTEM_VALUE) {
1728
      uint8_t sr = getSRegEncoding(i->src(0));
1729
 
1730
      if (i->encSize == 8) {
1731
         code[0] = 0x00000004 | (sr << 26);
1732
         code[1] = 0x2c000000;
1733
      } else {
1734
         code[0] = 0x40000008 | (sr << 20);
1735
      }
1736
      defId(i->def(0), 14);
1737
 
1738
      emitPredicate(i);
1739
   } else
1740
   if (i->encSize == 8) {
1741
      uint64_t opc;
1742
 
1743
      if (i->src(0).getFile() == FILE_IMMEDIATE)
1744
         opc = HEX64(18000000, 000001e2);
1745
      else
1746
      if (i->src(0).getFile() == FILE_PREDICATE)
1747
         opc = HEX64(080e0000, 1c000004);
1748
      else
1749
         opc = HEX64(28000000, 00000004);
1750
 
1751
      opc |= i->lanes << 5;
1752
 
1753
      emitForm_B(i, opc);
1754
   } else {
1755
      uint32_t imm;
1756
 
1757
      if (i->src(0).getFile() == FILE_IMMEDIATE) {
1758
         imm = SDATA(i->src(0)).u32;
1759
         if (imm & 0xfff00000) {
1760
            assert(!(imm & 0x000fffff));
1761
            code[0] = 0x00000318 | imm;
1762
         } else {
1763
            assert(imm < 0x800 || ((int32_t)imm >= -0x800));
1764
            code[0] = 0x00000118 | (imm << 20);
1765
         }
1766
      } else {
1767
         code[0] = 0x0028;
1768
         emitShortSrc2(i->src(0));
1769
      }
1770
      defId(i->def(0), 14);
1771
 
1772
      emitPredicate(i);
1773
   }
1774
}
1775
 
1776
void
1777
CodeEmitterNVC0::emitATOM(const Instruction *i)
1778
{
1779
   const bool hasDst = i->defExists(0);
1780
   const bool casOrExch =
1781
      i->subOp == NV50_IR_SUBOP_ATOM_EXCH ||
1782
      i->subOp == NV50_IR_SUBOP_ATOM_CAS;
1783
 
1784
   if (i->dType == TYPE_U64) {
1785
      switch (i->subOp) {
1786
      case NV50_IR_SUBOP_ATOM_ADD:
1787
         code[0] = 0x205;
1788
         if (hasDst)
1789
            code[1] = 0x507e0000;
1790
         else
1791
            code[1] = 0x10000000;
1792
         break;
1793
      case NV50_IR_SUBOP_ATOM_EXCH:
1794
         code[0] = 0x305;
1795
         code[1] = 0x507e0000;
1796
         break;
1797
      case NV50_IR_SUBOP_ATOM_CAS:
1798
         code[0] = 0x325;
1799
         code[1] = 0x50000000;
1800
         break;
1801
      default:
1802
         assert(!"invalid u64 red op");
1803
         break;
1804
      }
1805
   } else
1806
   if (i->dType == TYPE_U32) {
1807
      switch (i->subOp) {
1808
      case NV50_IR_SUBOP_ATOM_EXCH:
1809
         code[0] = 0x105;
1810
         code[1] = 0x507e0000;
1811
         break;
1812
      case NV50_IR_SUBOP_ATOM_CAS:
1813
         code[0] = 0x125;
1814
         code[1] = 0x50000000;
1815
         break;
1816
      default:
1817
         code[0] = 0x5 | (i->subOp << 5);
1818
         if (hasDst)
1819
            code[1] = 0x507e0000;
1820
         else
1821
            code[1] = 0x10000000;
1822
         break;
1823
      }
1824
   } else
1825
   if (i->dType == TYPE_S32) {
1826
      assert(i->subOp <= 2);
1827
      code[0] = 0x205 | (i->subOp << 5);
1828
      if (hasDst)
1829
         code[1] = 0x587e0000;
1830
      else
1831
         code[1] = 0x18000000;
1832
   } else
1833
   if (i->dType == TYPE_F32) {
1834
      assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD);
1835
      code[0] = 0x205;
1836
      if (hasDst)
1837
         code[1] = 0x687e0000;
1838
      else
1839
         code[1] = 0x28000000;
1840
   }
1841
 
1842
   emitPredicate(i);
1843
 
1844
   srcId(i->src(1), 14);
1845
 
1846
   if (hasDst)
1847
      defId(i->def(0), 32 + 11);
1848
   else
1849
   if (casOrExch)
1850
      code[1] |= 63 << 11;
1851
 
1852
   if (hasDst || casOrExch) {
1853
      const int32_t offset = SDATA(i->src(0)).offset;
1854
      assert(offset < 0x80000 && offset >= -0x80000);
1855
      code[0] |= offset << 26;
1856
      code[1] |= (offset & 0x1ffc0) >> 6;
1857
      code[1] |= (offset & 0xe0000) << 6;
1858
   } else {
1859
      srcAddr32(i->src(0), 26, 0);
1860
   }
1861
   if (i->getIndirect(0, 0)) {
1862
      srcId(i->getIndirect(0, 0), 20);
1863
      if (i->getIndirect(0, 0)->reg.size == 8)
1864
         code[1] |= 1 << 26;
1865
   } else {
1866
      code[0] |= 63 << 20;
1867
   }
1868
 
1869
   if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1870
      srcId(i->src(2), 32 + 17);
1871
}
1872
 
1873
void
1874
CodeEmitterNVC0::emitMEMBAR(const Instruction *i)
1875
{
1876
   switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) {
1877
   case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break;
1878
   case NV50_IR_SUBOP_MEMBAR_GL:  code[0] = 0x25; break;
1879
   default:
1880
      code[0] = 0x45;
1881
      assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS);
1882
      break;
1883
   }
1884
   code[1] = 0xe0000000;
1885
 
1886
   emitPredicate(i);
1887
}
1888
 
1889
void
1890
CodeEmitterNVC0::emitCCTL(const Instruction *i)
1891
{
1892
   code[0] = 0x00000005 | (i->subOp << 5);
1893
 
1894
   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
1895
      code[1] = 0x98000000;
1896
      srcAddr32(i->src(0), 28, 2);
1897
   } else {
1898
      code[1] = 0xd0000000;
1899
      setAddress24(i->src(0));
1900
   }
1901
   if (uses64bitAddress(i))
1902
      code[1] |= 1 << 26;
1903
   srcId(i->src(0).getIndirect(0), 20);
1904
 
1905
   emitPredicate(i);
1906
 
1907
   defId(i, 0, 14);
1908
}
1909
 
1910
void
1911
CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp)
1912
{
1913
   uint8_t m;
1914
   switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) {
1915
   case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break;
1916
   case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break;
1917
   case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break;
1918
   case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break;
1919
   case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break;
1920
   case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break;
1921
   case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break;
1922
   case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break;
1923
   case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break;
1924
   case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break;
1925
   case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break;
1926
   case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break;
1927
   case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break;
1928
   case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break;
1929
   case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break;
1930
   default:
1931
      return;
1932
   }
1933
   code[0] |= m << 5;
1934
   if (subOp & NV50_IR_SUBOP_SUCLAMP_2D)
1935
      code[1] |= 1 << 16;
1936
}
1937
 
1938
void
1939
CodeEmitterNVC0::emitSUCalc(Instruction *i)
1940
{
1941
   ImmediateValue *imm = NULL;
1942
   uint64_t opc;
1943
 
1944
   if (i->srcExists(2)) {
1945
      imm = i->getSrc(2)->asImm();
1946
      if (imm)
1947
         i->setSrc(2, NULL); // special case, make emitForm_A not assert
1948
   }
1949
 
1950
   switch (i->op) {
1951
   case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break;
1952
   case OP_SUBFM: opc = HEX64(5c000000, 00000004); break;
1953
   case OP_SUEAU: opc = HEX64(60000000, 00000004); break;
1954
   default:
1955
      assert(0);
1956
      return;
1957
   }
1958
   emitForm_A(i, opc);
1959
 
1960
   if (i->op == OP_SUCLAMP) {
1961
      if (i->dType == TYPE_S32)
1962
         code[0] |= 1 << 9;
1963
      emitSUCLAMPMode(i->subOp);
1964
   }
1965
 
1966
   if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D)
1967
         code[1] |= 1 << 16;
1968
 
1969
   if (i->op != OP_SUEAU) {
1970
      if (i->def(0).getFile() == FILE_PREDICATE) { // p, #
1971
         code[0] |= 63 << 14;
1972
         code[1] |= i->getDef(0)->reg.data.id << 23;
1973
      } else
1974
      if (i->defExists(1)) { // r, p
1975
         assert(i->def(1).getFile() == FILE_PREDICATE);
1976
         code[1] |= i->getDef(1)->reg.data.id << 23;
1977
      } else { // r, #
1978
         code[1] |= 7 << 23;
1979
      }
1980
   }
1981
   if (imm) {
1982
      assert(i->op == OP_SUCLAMP);
1983
      i->setSrc(2, imm);
1984
      code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6
1985
   }
1986
}
1987
 
1988
void
1989
CodeEmitterNVC0::emitSUGType(DataType ty)
1990
{
1991
   switch (ty) {
1992
   case TYPE_S32: code[1] |= 1 << 13; break;
1993
   case TYPE_U8:  code[1] |= 2 << 13; break;
1994
   case TYPE_S8:  code[1] |= 3 << 13; break;
1995
   default:
1996
      assert(ty == TYPE_U32);
1997
      break;
1998
   }
1999
}
2000
 
2001
void
2002
CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s)
2003
{
2004
   const uint32_t offset = i->getSrc(s)->reg.data.offset;
2005
 
2006
   assert(i->src(s).getFile() == FILE_MEMORY_CONST);
2007
   assert(offset == (offset & 0xfffc));
2008
 
2009
   code[1] |= 1 << 21;
2010
   code[0] |= offset << 24;
2011
   code[1] |= offset >> 8;
2012
   code[1] |= i->getSrc(s)->reg.fileIndex << 8;
2013
}
2014
 
2015
void
2016
CodeEmitterNVC0::setSUPred(const Instruction *i, const int s)
2017
{
2018
   if (!i->srcExists(s) || (i->predSrc == s)) {
2019
      code[1] |= 0x7 << 17;
2020
   } else {
2021
      if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT))
2022
         code[1] |= 1 << 20;
2023
      srcId(i->src(s), 32 + 17);
2024
   }
2025
}
2026
 
2027
void
2028
CodeEmitterNVC0::emitSULDGB(const TexInstruction *i)
2029
{
2030
   code[0] = 0x5;
2031
   code[1] = 0xd4000000 | (i->subOp << 15);
2032
 
2033
   emitLoadStoreType(i->dType);
2034
   emitSUGType(i->sType);
2035
   emitCachingMode(i->cache);
2036
 
2037
   emitPredicate(i);
2038
   defId(i->def(0), 14); // destination
2039
   srcId(i->src(0), 20); // address
2040
   // format
2041
   if (i->src(1).getFile() == FILE_GPR)
2042
      srcId(i->src(1), 26);
2043
   else
2044
      setSUConst16(i, 1);
2045
   setSUPred(i, 2);
2046
}
2047
 
2048
void
2049
CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i)
2050
{
2051
   code[0] = 0x5;
2052
   code[1] = 0xdc000000 | (i->subOp << 15);
2053
 
2054
   if (i->op == OP_SUSTP)
2055
      code[1] |= i->tex.mask << 22;
2056
   else
2057
      emitLoadStoreType(i->dType);
2058
   emitSUGType(i->sType);
2059
   emitCachingMode(i->cache);
2060
 
2061
   emitPredicate(i);
2062
   srcId(i->src(0), 20); // address
2063
   // format
2064
   if (i->src(1).getFile() == FILE_GPR)
2065
      srcId(i->src(1), 26);
2066
   else
2067
      setSUConst16(i, 1);
2068
   srcId(i->src(3), 14); // values
2069
   setSUPred(i, 2);
2070
}
2071
 
2072
void
2073
CodeEmitterNVC0::emitVectorSubOp(const Instruction *i)
2074
{
2075
   switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2076
   case 0:
2077
      code[1] |= (i->subOp & 0x000f) << 12; // vsrc1
2078
      code[1] |= (i->subOp & 0x00e0) >> 5;  // vsrc2
2079
      code[1] |= (i->subOp & 0x0100) << 7;  // vsrc2
2080
      code[1] |= (i->subOp & 0x3c00) << 13; // vdst
2081
      break;
2082
   case 1:
2083
      code[1] |= (i->subOp & 0x000f) << 8;  // v2src1
2084
      code[1] |= (i->subOp & 0x0010) << 11; // v2src1
2085
      code[1] |= (i->subOp & 0x01e0) >> 1;  // v2src2
2086
      code[1] |= (i->subOp & 0x0200) << 6;  // v2src2
2087
      code[1] |= (i->subOp & 0x3c00) << 2;  // v4dst
2088
      code[1] |= (i->mask & 0x3) << 2;
2089
      break;
2090
   case 2:
2091
      code[1] |= (i->subOp & 0x000f) << 8; // v4src1
2092
      code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2
2093
      code[1] |= (i->subOp & 0x3c00) << 2; // v4dst
2094
      code[1] |= (i->mask & 0x3) << 2;
2095
      code[1] |= (i->mask & 0xc) << 21;
2096
      break;
2097
   default:
2098
      assert(0);
2099
      break;
2100
   }
2101
}
2102
 
2103
void
2104
CodeEmitterNVC0::emitVSHL(const Instruction *i)
2105
{
2106
   uint64_t opc = 0x4;
2107
 
2108
   switch (NV50_IR_SUBOP_Vn(i->subOp)) {
2109
   case 0: opc |= 0xe8ULL << 56; break;
2110
   case 1: opc |= 0xb4ULL << 56; break;
2111
   case 2: opc |= 0x94ULL << 56; break;
2112
   default:
2113
      assert(0);
2114
      break;
2115
   }
2116
   if (NV50_IR_SUBOP_Vn(i->subOp) == 1) {
2117
      if (isSignedType(i->dType)) opc |= 1ULL << 0x2a;
2118
      if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5);
2119
   } else {
2120
      if (isSignedType(i->dType)) opc |= 1ULL << 0x39;
2121
      if (isSignedType(i->sType)) opc |= 1 << 6;
2122
   }
2123
   emitForm_A(i, opc);
2124
   emitVectorSubOp(i);
2125
 
2126
   if (i->saturate)
2127
      code[0] |= 1 << 9;
2128
   if (i->flagsDef >= 0)
2129
      code[1] |= 1 << 16;
2130
}
2131
 
2132
bool
2133
CodeEmitterNVC0::emitInstruction(Instruction *insn)
2134
{
2135
   unsigned int size = insn->encSize;
2136
 
2137
   if (writeIssueDelays && !(codeSize & 0x3f))
2138
      size += 8;
2139
 
2140
   if (!insn->encSize) {
2141
      ERROR("skipping unencodable instruction: "); insn->print();
2142
      return false;
2143
   } else
2144
   if (codeSize + size > codeSizeLimit) {
2145
      ERROR("code emitter output buffer too small\n");
2146
      return false;
2147
   }
2148
 
2149
   if (writeIssueDelays) {
2150
      if (!(codeSize & 0x3f)) {
2151
         code[0] = 0x00000007; // cf issue delay "instruction"
2152
         code[1] = 0x20000000;
2153
         code += 2;
2154
         codeSize += 8;
2155
      }
2156
      const unsigned int id = (codeSize & 0x3f) / 8 - 1;
2157
      uint32_t *data = code - (id * 2 + 2);
2158
      if (id <= 2) {
2159
         data[0] |= insn->sched << (id * 8 + 4);
2160
      } else
2161
      if (id == 3) {
2162
         data[0] |= insn->sched << 28;
2163
         data[1] |= insn->sched >> 4;
2164
      } else {
2165
         data[1] |= insn->sched << ((id - 4) * 8 + 4);
2166
      }
2167
   }
2168
 
2169
   // assert that instructions with multiple defs don't corrupt registers
2170
   for (int d = 0; insn->defExists(d); ++d)
2171
      assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0);
2172
 
2173
   switch (insn->op) {
2174
   case OP_MOV:
2175
   case OP_RDSV:
2176
      emitMOV(insn);
2177
      break;
2178
   case OP_NOP:
2179
      break;
2180
   case OP_LOAD:
2181
      emitLOAD(insn);
2182
      break;
2183
   case OP_STORE:
2184
      emitSTORE(insn);
2185
      break;
2186
   case OP_LINTERP:
2187
   case OP_PINTERP:
2188
      emitINTERP(insn);
2189
      break;
2190
   case OP_VFETCH:
2191
      emitVFETCH(insn);
2192
      break;
2193
   case OP_EXPORT:
2194
      emitEXPORT(insn);
2195
      break;
2196
   case OP_PFETCH:
2197
      emitPFETCH(insn);
2198
      break;
2199
   case OP_EMIT:
2200
   case OP_RESTART:
2201
      emitOUT(insn);
2202
      break;
2203
   case OP_ADD:
2204
   case OP_SUB:
2205
      if (isFloatType(insn->dType))
2206
         emitFADD(insn);
2207
      else
2208
         emitUADD(insn);
2209
      break;
2210
   case OP_MUL:
2211
      if (isFloatType(insn->dType))
2212
         emitFMUL(insn);
2213
      else
2214
         emitUMUL(insn);
2215
      break;
2216
   case OP_MAD:
2217
   case OP_FMA:
2218
      if (isFloatType(insn->dType))
2219
         emitFMAD(insn);
2220
      else
2221
         emitIMAD(insn);
2222
      break;
2223
   case OP_SAD:
2224
      emitISAD(insn);
2225
      break;
2226
   case OP_NOT:
2227
      emitNOT(insn);
2228
      break;
2229
   case OP_AND:
2230
      emitLogicOp(insn, 0);
2231
      break;
2232
   case OP_OR:
2233
      emitLogicOp(insn, 1);
2234
      break;
2235
   case OP_XOR:
2236
      emitLogicOp(insn, 2);
2237
      break;
2238
   case OP_SHL:
2239
   case OP_SHR:
2240
      emitShift(insn);
2241
      break;
2242
   case OP_SET:
2243
   case OP_SET_AND:
2244
   case OP_SET_OR:
2245
   case OP_SET_XOR:
2246
      emitSET(insn->asCmp());
2247
      break;
2248
   case OP_SELP:
2249
      emitSELP(insn);
2250
      break;
2251
   case OP_SLCT:
2252
      emitSLCT(insn->asCmp());
2253
      break;
2254
   case OP_MIN:
2255
   case OP_MAX:
2256
      emitMINMAX(insn);
2257
      break;
2258
   case OP_ABS:
2259
   case OP_NEG:
2260
   case OP_CEIL:
2261
   case OP_FLOOR:
2262
   case OP_TRUNC:
2263
   case OP_CVT:
2264
   case OP_SAT:
2265
      emitCVT(insn);
2266
      break;
2267
   case OP_RSQ:
2268
      emitSFnOp(insn, 5);
2269
      break;
2270
   case OP_RCP:
2271
      emitSFnOp(insn, 4);
2272
      break;
2273
   case OP_LG2:
2274
      emitSFnOp(insn, 3);
2275
      break;
2276
   case OP_EX2:
2277
      emitSFnOp(insn, 2);
2278
      break;
2279
   case OP_SIN:
2280
      emitSFnOp(insn, 1);
2281
      break;
2282
   case OP_COS:
2283
      emitSFnOp(insn, 0);
2284
      break;
2285
   case OP_PRESIN:
2286
   case OP_PREEX2:
2287
      emitPreOp(insn);
2288
      break;
2289
   case OP_TEX:
2290
   case OP_TXB:
2291
   case OP_TXL:
2292
   case OP_TXD:
2293
   case OP_TXF:
2294
      emitTEX(insn->asTex());
2295
      break;
2296
   case OP_TXQ:
2297
      emitTXQ(insn->asTex());
2298
      break;
2299
   case OP_TEXBAR:
2300
      emitTEXBAR(insn);
2301
      break;
2302
   case OP_SUBFM:
2303
   case OP_SUCLAMP:
2304
   case OP_SUEAU:
2305
      emitSUCalc(insn);
2306
      break;
2307
   case OP_MADSP:
2308
      emitMADSP(insn);
2309
      break;
2310
   case OP_SULDB:
2311
      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2312
         emitSULDGB(insn->asTex());
2313
      else
2314
         ERROR("SULDB not yet supported on < nve4\n");
2315
      break;
2316
   case OP_SUSTB:
2317
   case OP_SUSTP:
2318
      if (targ->getChipset() >= NVISA_GK104_CHIPSET)
2319
         emitSUSTGx(insn->asTex());
2320
      else
2321
         ERROR("SUSTx not yet supported on < nve4\n");
2322
      break;
2323
   case OP_ATOM:
2324
      emitATOM(insn);
2325
      break;
2326
   case OP_BRA:
2327
   case OP_CALL:
2328
   case OP_PRERET:
2329
   case OP_RET:
2330
   case OP_DISCARD:
2331
   case OP_EXIT:
2332
   case OP_PRECONT:
2333
   case OP_CONT:
2334
   case OP_PREBREAK:
2335
   case OP_BREAK:
2336
   case OP_JOINAT:
2337
   case OP_BRKPT:
2338
   case OP_QUADON:
2339
   case OP_QUADPOP:
2340
      emitFlow(insn);
2341
      break;
2342
   case OP_QUADOP:
2343
      emitQUADOP(insn, insn->subOp, insn->lanes);
2344
      break;
2345
   case OP_DFDX:
2346
      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4);
2347
      break;
2348
   case OP_DFDY:
2349
      emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5);
2350
      break;
2351
   case OP_POPCNT:
2352
      emitPOPC(insn);
2353
      break;
2354
   case OP_INSBF:
2355
      emitINSBF(insn);
2356
      break;
2357
   case OP_EXTBF:
2358
      emitEXTBF(insn);
2359
      break;
2360
   case OP_PERMT:
2361
      emitPERMT(insn);
2362
      break;
2363
   case OP_JOIN:
2364
      emitNOP(insn);
2365
      insn->join = 1;
2366
      break;
2367
   case OP_BAR:
2368
      emitBAR(insn);
2369
      break;
2370
   case OP_MEMBAR:
2371
      emitMEMBAR(insn);
2372
      break;
2373
   case OP_CCTL:
2374
      emitCCTL(insn);
2375
      break;
2376
   case OP_VSHL:
2377
      emitVSHL(insn);
2378
      break;
2379
   case OP_PHI:
2380
   case OP_UNION:
2381
   case OP_CONSTRAINT:
2382
      ERROR("operation should have been eliminated");
2383
      return false;
2384
   case OP_EXP:
2385
   case OP_LOG:
2386
   case OP_SQRT:
2387
   case OP_POW:
2388
      ERROR("operation should have been lowered\n");
2389
      return false;
2390
   default:
2391
      ERROR("unknow op\n");
2392
      return false;
2393
   }
2394
 
2395
   if (insn->join) {
2396
      code[0] |= 0x10;
2397
      assert(insn->encSize == 8);
2398
   }
2399
 
2400
   code += insn->encSize / 4;
2401
   codeSize += insn->encSize;
2402
   return true;
2403
}
2404
 
2405
uint32_t
2406
CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const
2407
{
2408
   const Target::OpInfo &info = targ->getOpInfo(i);
2409
 
2410
   if (writeIssueDelays || info.minEncSize == 8 || 1)
2411
      return 8;
2412
 
2413
   if (i->ftz || i->saturate || i->join)
2414
      return 8;
2415
   if (i->rnd != ROUND_N)
2416
      return 8;
2417
   if (i->predSrc >= 0 && i->op == OP_MAD)
2418
      return 8;
2419
 
2420
   if (i->op == OP_PINTERP) {
2421
      if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work
2422
         return 8;
2423
   } else
2424
   if (i->op == OP_MOV && i->lanes != 0xf) {
2425
      return 8;
2426
   }
2427
 
2428
   for (int s = 0; i->srcExists(s); ++s) {
2429
      if (i->src(s).isIndirect(0))
2430
         return 8;
2431
 
2432
      if (i->src(s).getFile() == FILE_MEMORY_CONST) {
2433
         if (SDATA(i->src(s)).offset >= 0x100)
2434
            return 8;
2435
         if (i->getSrc(s)->reg.fileIndex > 1 &&
2436
             i->getSrc(s)->reg.fileIndex != 16)
2437
             return 8;
2438
      } else
2439
      if (i->src(s).getFile() == FILE_IMMEDIATE) {
2440
         if (i->dType == TYPE_F32) {
2441
            if (SDATA(i->src(s)).u32 >= 0x100)
2442
               return 8;
2443
         } else {
2444
            if (SDATA(i->src(s)).u32 > 0xff)
2445
               return 8;
2446
         }
2447
      }
2448
 
2449
      if (i->op == OP_CVT)
2450
         continue;
2451
      if (i->src(s).mod != Modifier(0)) {
2452
         if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS))
2453
            if (i->op != OP_RSQ)
2454
               return 8;
2455
         if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG))
2456
            if (i->op != OP_ADD || s != 0)
2457
               return 8;
2458
      }
2459
   }
2460
 
2461
   return 4;
2462
}
2463
 
2464
// Simplified, erring on safe side.
2465
class SchedDataCalculator : public Pass
2466
{
2467
public:
2468
   SchedDataCalculator(const Target *targ) : targ(targ) { }
2469
 
2470
private:
2471
   struct RegScores
2472
   {
2473
      struct Resource {
2474
         int st[DATA_FILE_COUNT]; // LD to LD delay 3
2475
         int ld[DATA_FILE_COUNT]; // ST to ST delay 3
2476
         int tex; // TEX to non-TEX delay 17 (0x11)
2477
         int sfu; // SFU to SFU delay 3 (except PRE-ops)
2478
         int imul; // integer MUL to MUL delay 3
2479
      } res;
2480
      struct ScoreData {
2481
         int r[64];
2482
         int p[8];
2483
         int c;
2484
      } rd, wr;
2485
      int base;
2486
 
2487
      void rebase(const int base)
2488
      {
2489
         const int delta = this->base - base;
2490
         if (!delta)
2491
            return;
2492
         this->base = 0;
2493
 
2494
         for (int i = 0; i < 64; ++i) {
2495
            rd.r[i] += delta;
2496
            wr.r[i] += delta;
2497
         }
2498
         for (int i = 0; i < 8; ++i) {
2499
            rd.p[i] += delta;
2500
            wr.p[i] += delta;
2501
         }
2502
         rd.c += delta;
2503
         wr.c += delta;
2504
 
2505
         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2506
            res.ld[f] += delta;
2507
            res.st[f] += delta;
2508
         }
2509
         res.sfu += delta;
2510
         res.imul += delta;
2511
         res.tex += delta;
2512
      }
2513
      void wipe()
2514
      {
2515
         memset(&rd, 0, sizeof(rd));
2516
         memset(&wr, 0, sizeof(wr));
2517
         memset(&res, 0, sizeof(res));
2518
      }
2519
      int getLatest(const ScoreData& d) const
2520
      {
2521
         int max = 0;
2522
         for (int i = 0; i < 64; ++i)
2523
            if (d.r[i] > max)
2524
               max = d.r[i];
2525
         for (int i = 0; i < 8; ++i)
2526
            if (d.p[i] > max)
2527
               max = d.p[i];
2528
         if (d.c > max)
2529
            max = d.c;
2530
         return max;
2531
      }
2532
      inline int getLatestRd() const
2533
      {
2534
         return getLatest(rd);
2535
      }
2536
      inline int getLatestWr() const
2537
      {
2538
         return getLatest(wr);
2539
      }
2540
      inline int getLatest() const
2541
      {
2542
         const int a = getLatestRd();
2543
         const int b = getLatestWr();
2544
 
2545
         int max = MAX2(a, b);
2546
         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2547
            max = MAX2(res.ld[f], max);
2548
            max = MAX2(res.st[f], max);
2549
         }
2550
         max = MAX2(res.sfu, max);
2551
         max = MAX2(res.imul, max);
2552
         max = MAX2(res.tex, max);
2553
         return max;
2554
      }
2555
      void setMax(const RegScores *that)
2556
      {
2557
         for (int i = 0; i < 64; ++i) {
2558
            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
2559
            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
2560
         }
2561
         for (int i = 0; i < 8; ++i) {
2562
            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
2563
            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
2564
         }
2565
         rd.c = MAX2(rd.c, that->rd.c);
2566
         wr.c = MAX2(wr.c, that->wr.c);
2567
 
2568
         for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) {
2569
            res.ld[f] = MAX2(res.ld[f], that->res.ld[f]);
2570
            res.st[f] = MAX2(res.st[f], that->res.st[f]);
2571
         }
2572
         res.sfu = MAX2(res.sfu, that->res.sfu);
2573
         res.imul = MAX2(res.imul, that->res.imul);
2574
         res.tex = MAX2(res.tex, that->res.tex);
2575
      }
2576
      void print(int cycle)
2577
      {
2578
         for (int i = 0; i < 64; ++i) {
2579
            if (rd.r[i] > cycle)
2580
               INFO("rd $r%i @ %i\n", i, rd.r[i]);
2581
            if (wr.r[i] > cycle)
2582
               INFO("wr $r%i @ %i\n", i, wr.r[i]);
2583
         }
2584
         for (int i = 0; i < 8; ++i) {
2585
            if (rd.p[i] > cycle)
2586
               INFO("rd $p%i @ %i\n", i, rd.p[i]);
2587
            if (wr.p[i] > cycle)
2588
               INFO("wr $p%i @ %i\n", i, wr.p[i]);
2589
         }
2590
         if (rd.c > cycle)
2591
            INFO("rd $c @ %i\n", rd.c);
2592
         if (wr.c > cycle)
2593
            INFO("wr $c @ %i\n", wr.c);
2594
         if (res.sfu > cycle)
2595
            INFO("sfu @ %i\n", res.sfu);
2596
         if (res.imul > cycle)
2597
            INFO("imul @ %i\n", res.imul);
2598
         if (res.tex > cycle)
2599
            INFO("tex @ %i\n", res.tex);
2600
      }
2601
   };
2602
 
2603
   RegScores *score; // for current BB
2604
   std::vector scoreBoards;
2605
   int cycle;
2606
   int prevData;
2607
   operation prevOp;
2608
 
2609
   const Target *targ;
2610
 
2611
   bool visit(Function *);
2612
   bool visit(BasicBlock *);
2613
 
2614
   void commitInsn(const Instruction *, int cycle);
2615
   int calcDelay(const Instruction *, int cycle) const;
2616
   void setDelay(Instruction *, int delay, Instruction *next);
2617
 
2618
   void recordRd(const Value *, const int ready);
2619
   void recordWr(const Value *, const int ready);
2620
   void checkRd(const Value *, int cycle, int& delay) const;
2621
   void checkWr(const Value *, int cycle, int& delay) const;
2622
 
2623
   int getCycles(const Instruction *, int origDelay) const;
2624
};
2625
 
2626
void
2627
SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next)
2628
{
2629
   if (insn->op == OP_EXIT || insn->op == OP_RET)
2630
      delay = MAX2(delay, 14);
2631
 
2632
   if (insn->op == OP_TEXBAR) {
2633
      // TODO: except if results not used before EXIT
2634
      insn->sched = 0xc2;
2635
   } else
2636
   if (insn->op == OP_JOIN || insn->join) {
2637
      insn->sched = 0x00;
2638
   } else
2639
   if (delay >= 0 || prevData == 0x04 ||
2640
       !next || !targ->canDualIssue(insn, next)) {
2641
      insn->sched = static_cast(MAX2(delay, 0));
2642
      if (prevOp == OP_EXPORT)
2643
         insn->sched |= 0x40;
2644
      else
2645
         insn->sched |= 0x20;
2646
   } else {
2647
      insn->sched = 0x04; // dual-issue
2648
   }
2649
 
2650
   if (prevData != 0x04 || prevOp != OP_EXPORT)
2651
      if (insn->sched != 0x04 || insn->op == OP_EXPORT)
2652
         prevOp = insn->op;
2653
 
2654
   prevData = insn->sched;
2655
}
2656
 
2657
int
2658
SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const
2659
{
2660
   if (insn->sched & 0x80) {
2661
      int c = (insn->sched & 0x0f) * 2 + 1;
2662
      if (insn->op == OP_TEXBAR && origDelay > 0)
2663
         c += origDelay;
2664
      return c;
2665
   }
2666
   if (insn->sched & 0x60)
2667
      return (insn->sched & 0x1f) + 1;
2668
   return (insn->sched == 0x04) ? 0 : 32;
2669
}
2670
 
2671
bool
2672
SchedDataCalculator::visit(Function *func)
2673
{
2674
   scoreBoards.resize(func->cfg.getSize());
2675
   for (size_t i = 0; i < scoreBoards.size(); ++i)
2676
      scoreBoards[i].wipe();
2677
   return true;
2678
}
2679
 
2680
bool
2681
SchedDataCalculator::visit(BasicBlock *bb)
2682
{
2683
   Instruction *insn;
2684
   Instruction *next = NULL;
2685
 
2686
   int cycle = 0;
2687
 
2688
   prevData = 0x00;
2689
   prevOp = OP_NOP;
2690
   score = &scoreBoards.at(bb->getId());
2691
 
2692
   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
2693
      // back branches will wait until all target dependencies are satisfied
2694
      if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
2695
         continue;
2696
      BasicBlock *in = BasicBlock::get(ei.getNode());
2697
      if (in->getExit()) {
2698
         if (prevData != 0x04)
2699
            prevData = in->getExit()->sched;
2700
         prevOp = in->getExit()->op;
2701
      }
2702
      score->setMax(&scoreBoards.at(in->getId()));
2703
   }
2704
   if (bb->cfg.incidentCount() > 1)
2705
      prevOp = OP_NOP;
2706
 
2707
#ifdef NVC0_DEBUG_SCHED_DATA
2708
   INFO("=== BB:%i initial scores\n", bb->getId());
2709
   score->print(cycle);
2710
#endif
2711
 
2712
   for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
2713
      next = insn->next;
2714
 
2715
      commitInsn(insn, cycle);
2716
      int delay = calcDelay(next, cycle);
2717
      setDelay(insn, delay, next);
2718
      cycle += getCycles(insn, delay);
2719
 
2720
#ifdef NVC0_DEBUG_SCHED_DATA
2721
      INFO("cycle %i, sched %02x\n", cycle, insn->sched);
2722
      insn->print();
2723
      next->print();
2724
#endif
2725
   }
2726
   if (!insn)
2727
      return true;
2728
   commitInsn(insn, cycle);
2729
 
2730
   int bbDelay = -1;
2731
 
2732
   for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
2733
      BasicBlock *out = BasicBlock::get(ei.getNode());
2734
 
2735
      if (ei.getType() != Graph::Edge::BACK) {
2736
         // only test the first instruction of the outgoing block
2737
         next = out->getEntry();
2738
         if (next)
2739
            bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
2740
      } else {
2741
         // wait until all dependencies are satisfied
2742
         const int regsFree = score->getLatest();
2743
         next = out->getFirst();
2744
         for (int c = cycle; next && c < regsFree; next = next->next) {
2745
            bbDelay = MAX2(bbDelay, calcDelay(next, c));
2746
            c += getCycles(next, bbDelay);
2747
         }
2748
         next = NULL;
2749
      }
2750
   }
2751
   if (bb->cfg.outgoingCount() != 1)
2752
      next = NULL;
2753
   setDelay(insn, bbDelay, next);
2754
   cycle += getCycles(insn, bbDelay);
2755
 
2756
   score->rebase(cycle); // common base for initializing out blocks' scores
2757
   return true;
2758
}
2759
 
2760
#define NVE4_MAX_ISSUE_DELAY 0x1f
2761
int
2762
SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const
2763
{
2764
   int delay = 0, ready = cycle;
2765
 
2766
   for (int s = 0; insn->srcExists(s); ++s)
2767
      checkRd(insn->getSrc(s), cycle, delay);
2768
   // WAR & WAW don't seem to matter
2769
   // for (int s = 0; insn->srcExists(s); ++s)
2770
   //   recordRd(insn->getSrc(s), cycle);
2771
 
2772
   switch (Target::getOpClass(insn->op)) {
2773
   case OPCLASS_SFU:
2774
      ready = score->res.sfu;
2775
      break;
2776
   case OPCLASS_ARITH:
2777
      if (insn->op == OP_MUL && !isFloatType(insn->dType))
2778
         ready = score->res.imul;
2779
      break;
2780
   case OPCLASS_TEXTURE:
2781
      ready = score->res.tex;
2782
      break;
2783
   case OPCLASS_LOAD:
2784
      ready = score->res.ld[insn->src(0).getFile()];
2785
      break;
2786
   case OPCLASS_STORE:
2787
      ready = score->res.st[insn->src(0).getFile()];
2788
      break;
2789
   default:
2790
      break;
2791
   }
2792
   if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE)
2793
      ready = MAX2(ready, score->res.tex);
2794
 
2795
   delay = MAX2(delay, ready - cycle);
2796
 
2797
   // if can issue next cycle, delay is 0, not 1
2798
   return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY);
2799
}
2800
 
2801
void
2802
SchedDataCalculator::commitInsn(const Instruction *insn, int cycle)
2803
{
2804
   const int ready = cycle + targ->getLatency(insn);
2805
 
2806
   for (int d = 0; insn->defExists(d); ++d)
2807
      recordWr(insn->getDef(d), ready);
2808
   // WAR & WAW don't seem to matter
2809
   // for (int s = 0; insn->srcExists(s); ++s)
2810
   //   recordRd(insn->getSrc(s), cycle);
2811
 
2812
   switch (Target::getOpClass(insn->op)) {
2813
   case OPCLASS_SFU:
2814
      score->res.sfu = cycle + 4;
2815
      break;
2816
   case OPCLASS_ARITH:
2817
      if (insn->op == OP_MUL && !isFloatType(insn->dType))
2818
         score->res.imul = cycle + 4;
2819
      break;
2820
   case OPCLASS_TEXTURE:
2821
      score->res.tex = cycle + 18;
2822
      break;
2823
   case OPCLASS_LOAD:
2824
      if (insn->src(0).getFile() == FILE_MEMORY_CONST)
2825
         break;
2826
      score->res.ld[insn->src(0).getFile()] = cycle + 4;
2827
      score->res.st[insn->src(0).getFile()] = ready;
2828
      break;
2829
   case OPCLASS_STORE:
2830
      score->res.st[insn->src(0).getFile()] = cycle + 4;
2831
      score->res.ld[insn->src(0).getFile()] = ready;
2832
      break;
2833
   case OPCLASS_OTHER:
2834
      if (insn->op == OP_TEXBAR)
2835
         score->res.tex = cycle;
2836
      break;
2837
   default:
2838
      break;
2839
   }
2840
 
2841
#ifdef NVC0_DEBUG_SCHED_DATA
2842
   score->print(cycle);
2843
#endif
2844
}
2845
 
2846
void
2847
SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
2848
{
2849
   int ready = cycle;
2850
   int a, b;
2851
 
2852
   switch (v->reg.file) {
2853
   case FILE_GPR:
2854
      a = v->reg.data.id;
2855
      b = a + v->reg.size / 4;
2856
      for (int r = a; r < b; ++r)
2857
         ready = MAX2(ready, score->rd.r[r]);
2858
      break;
2859
   case FILE_PREDICATE:
2860
      ready = MAX2(ready, score->rd.p[v->reg.data.id]);
2861
      break;
2862
   case FILE_FLAGS:
2863
      ready = MAX2(ready, score->rd.c);
2864
      break;
2865
   case FILE_SHADER_INPUT:
2866
   case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs
2867
   case FILE_MEMORY_LOCAL:
2868
   case FILE_MEMORY_CONST:
2869
   case FILE_MEMORY_SHARED:
2870
   case FILE_MEMORY_GLOBAL:
2871
   case FILE_SYSTEM_VALUE:
2872
      // TODO: any restrictions here ?
2873
      break;
2874
   case FILE_IMMEDIATE:
2875
      break;
2876
   default:
2877
      assert(0);
2878
      break;
2879
   }
2880
   if (cycle < ready)
2881
      delay = MAX2(delay, ready - cycle);
2882
}
2883
 
2884
void
2885
SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const
2886
{
2887
   int ready = cycle;
2888
   int a, b;
2889
 
2890
   switch (v->reg.file) {
2891
   case FILE_GPR:
2892
      a = v->reg.data.id;
2893
      b = a + v->reg.size / 4;
2894
      for (int r = a; r < b; ++r)
2895
         ready = MAX2(ready, score->wr.r[r]);
2896
      break;
2897
   case FILE_PREDICATE:
2898
      ready = MAX2(ready, score->wr.p[v->reg.data.id]);
2899
      break;
2900
   default:
2901
      assert(v->reg.file == FILE_FLAGS);
2902
      ready = MAX2(ready, score->wr.c);
2903
      break;
2904
   }
2905
   if (cycle < ready)
2906
      delay = MAX2(delay, ready - cycle);
2907
}
2908
 
2909
void
2910
SchedDataCalculator::recordWr(const Value *v, const int ready)
2911
{
2912
   int a = v->reg.data.id;
2913
 
2914
   if (v->reg.file == FILE_GPR) {
2915
      int b = a + v->reg.size / 4;
2916
      for (int r = a; r < b; ++r)
2917
         score->rd.r[r] = ready;
2918
   } else
2919
   // $c, $pX: shorter issue-to-read delay (at least as exec pred and carry)
2920
   if (v->reg.file == FILE_PREDICATE) {
2921
      score->rd.p[a] = ready + 4;
2922
   } else {
2923
      assert(v->reg.file == FILE_FLAGS);
2924
      score->rd.c = ready + 4;
2925
   }
2926
}
2927
 
2928
void
2929
SchedDataCalculator::recordRd(const Value *v, const int ready)
2930
{
2931
   int a = v->reg.data.id;
2932
 
2933
   if (v->reg.file == FILE_GPR) {
2934
      int b = a + v->reg.size / 4;
2935
      for (int r = a; r < b; ++r)
2936
         score->wr.r[r] = ready;
2937
   } else
2938
   if (v->reg.file == FILE_PREDICATE) {
2939
      score->wr.p[a] = ready;
2940
   } else
2941
   if (v->reg.file == FILE_FLAGS) {
2942
      score->wr.c = ready;
2943
   }
2944
}
2945
 
2946
bool
2947
calculateSchedDataNVC0(const Target *targ, Function *func)
2948
{
2949
   SchedDataCalculator sched(targ);
2950
   return sched.run(func, true, true);
2951
}
2952
 
2953
void
2954
CodeEmitterNVC0::prepareEmission(Function *func)
2955
{
2956
   CodeEmitter::prepareEmission(func);
2957
 
2958
   if (targ->hasSWSched)
2959
      calculateSchedDataNVC0(targ, func);
2960
}
2961
 
2962
CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target)
2963
   : CodeEmitter(target),
2964
     targNVC0(target),
2965
     writeIssueDelays(target->hasSWSched)
2966
{
2967
   code = NULL;
2968
   codeSize = codeSizeLimit = 0;
2969
   relocInfo = NULL;
2970
}
2971
 
2972
CodeEmitter *
2973
TargetNVC0::createCodeEmitterNVC0(Program::Type type)
2974
{
2975
   CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this);
2976
   emit->setProgramType(type);
2977
   return emit;
2978
}
2979
 
2980
CodeEmitter *
2981
TargetNVC0::getCodeEmitter(Program::Type type)
2982
{
2983
   if (chipset >= NVISA_GK110_CHIPSET)
2984
      return createCodeEmitterGK110(type);
2985
   return createCodeEmitterNVC0(type);
2986
}
2987
 
2988
} // namespace nv50_ir