Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright 2011 Christoph Bumiller
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 */
22
 
23
#include "codegen/nv50_ir.h"
24
#include "codegen/nv50_ir_target_nv50.h"
25
 
26
namespace nv50_ir {
27
 
28
#define NV50_OP_ENC_LONG     0
29
#define NV50_OP_ENC_SHORT    1
30
#define NV50_OP_ENC_IMM      2
31
#define NV50_OP_ENC_LONG_ALT 3
32
 
33
class CodeEmitterNV50 : public CodeEmitter
34
{
35
public:
36
   CodeEmitterNV50(const TargetNV50 *);
37
 
38
   virtual bool emitInstruction(Instruction *);
39
 
40
   virtual uint32_t getMinEncodingSize(const Instruction *) const;
41
 
42
   inline void setProgramType(Program::Type pType) { progType = pType; }
43
 
44
   virtual void prepareEmission(Function *);
45
 
46
private:
47
   Program::Type progType;
48
 
49
   const TargetNV50 *targNV50;
50
 
51
private:
52
   inline void defId(const ValueDef&, const int pos);
53
   inline void srcId(const ValueRef&, const int pos);
54
   inline void srcId(const ValueRef *, const int pos);
55
 
56
   inline void srcAddr16(const ValueRef&, bool adj, const int pos);
57
   inline void srcAddr8(const ValueRef&, const int pos);
58
 
59
   void emitFlagsRd(const Instruction *);
60
   void emitFlagsWr(const Instruction *);
61
 
62
   void emitCondCode(CondCode cc, DataType ty, int pos);
63
 
64
   inline void setARegBits(unsigned int);
65
 
66
   void setAReg16(const Instruction *, int s);
67
   void setImmediate(const Instruction *, int s);
68
 
69
   void setDst(const Value *);
70
   void setDst(const Instruction *, int d);
71
   void setSrcFileBits(const Instruction *, int enc);
72
   void setSrc(const Instruction *, unsigned int s, int slot);
73
 
74
   void emitForm_MAD(const Instruction *);
75
   void emitForm_ADD(const Instruction *);
76
   void emitForm_MUL(const Instruction *);
77
   void emitForm_IMM(const Instruction *);
78
 
79
   void emitLoadStoreSizeLG(DataType ty, int pos);
80
   void emitLoadStoreSizeCS(DataType ty);
81
 
82
   void roundMode_MAD(const Instruction *);
83
   void roundMode_CVT(RoundMode);
84
 
85
   void emitMNeg12(const Instruction *);
86
 
87
   void emitLOAD(const Instruction *);
88
   void emitSTORE(const Instruction *);
89
   void emitMOV(const Instruction *);
90
   void emitRDSV(const Instruction *);
91
   void emitNOP();
92
   void emitINTERP(const Instruction *);
93
   void emitPFETCH(const Instruction *);
94
   void emitOUT(const Instruction *);
95
 
96
   void emitUADD(const Instruction *);
97
   void emitAADD(const Instruction *);
98
   void emitFADD(const Instruction *);
99
   void emitIMUL(const Instruction *);
100
   void emitFMUL(const Instruction *);
101
   void emitFMAD(const Instruction *);
102
   void emitIMAD(const Instruction *);
103
   void emitISAD(const Instruction *);
104
 
105
   void emitMINMAX(const Instruction *);
106
 
107
   void emitPreOp(const Instruction *);
108
   void emitSFnOp(const Instruction *, uint8_t subOp);
109
 
110
   void emitShift(const Instruction *);
111
   void emitARL(const Instruction *, unsigned int shl);
112
   void emitLogicOp(const Instruction *);
113
   void emitNOT(const Instruction *);
114
 
115
   void emitCVT(const Instruction *);
116
   void emitSET(const Instruction *);
117
 
118
   void emitTEX(const TexInstruction *);
119
   void emitTXQ(const TexInstruction *);
120
   void emitTEXPREP(const TexInstruction *);
121
 
122
   void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp);
123
 
124
   void emitFlow(const Instruction *, uint8_t flowOp);
125
   void emitPRERETEmu(const FlowInstruction *);
126
   void emitBAR(const Instruction *);
127
 
128
   void emitATOM(const Instruction *);
129
};
130
 
131
#define SDATA(a) ((a).rep()->reg.data)
132
#define DDATA(a) ((a).rep()->reg.data)
133
 
134
void CodeEmitterNV50::srcId(const ValueRef& src, const int pos)
135
{
136
   assert(src.get());
137
   code[pos / 32] |= SDATA(src).id << (pos % 32);
138
}
139
 
140
void CodeEmitterNV50::srcId(const ValueRef *src, const int pos)
141
{
142
   assert(src->get());
143
   code[pos / 32] |= SDATA(*src).id << (pos % 32);
144
}
145
 
146
void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos)
147
{
148
   assert(src.get());
149
 
150
   int32_t offset = SDATA(src).offset;
151
 
152
   assert(!adj || src.get()->reg.size <= 4);
153
   if (adj)
154
      offset /= src.get()->reg.size;
155
 
156
   assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16);
157
 
158
   if (offset < 0)
159
      offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff;
160
 
161
   code[pos / 32] |= offset << (pos % 32);
162
}
163
 
164
void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos)
165
{
166
   assert(src.get());
167
 
168
   uint32_t offset = SDATA(src).offset;
169
 
170
   assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3));
171
 
172
   code[pos / 32] |= (offset >> 2) << (pos % 32);
173
}
174
 
175
void CodeEmitterNV50::defId(const ValueDef& def, const int pos)
176
{
177
   assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT);
178
 
179
   code[pos / 32] |= DDATA(def).id << (pos % 32);
180
}
181
 
182
void
183
CodeEmitterNV50::roundMode_MAD(const Instruction *insn)
184
{
185
   switch (insn->rnd) {
186
   case ROUND_M: code[1] |= 1 << 22; break;
187
   case ROUND_P: code[1] |= 2 << 22; break;
188
   case ROUND_Z: code[1] |= 3 << 22; break;
189
   default:
190
      assert(insn->rnd == ROUND_N);
191
      break;
192
   }
193
}
194
 
195
void
196
CodeEmitterNV50::emitMNeg12(const Instruction *i)
197
{
198
   code[1] |= i->src(0).mod.neg() << 26;
199
   code[1] |= i->src(1).mod.neg() << 27;
200
}
201
 
202
void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos)
203
{
204
   uint8_t enc;
205
 
206
   assert(pos >= 32 || pos <= 27);
207
 
208
   switch (cc) {
209
   case CC_LT:  enc = 0x1; break;
210
   case CC_LTU: enc = 0x9; break;
211
   case CC_EQ:  enc = 0x2; break;
212
   case CC_EQU: enc = 0xa; break;
213
   case CC_LE:  enc = 0x3; break;
214
   case CC_LEU: enc = 0xb; break;
215
   case CC_GT:  enc = 0x4; break;
216
   case CC_GTU: enc = 0xc; break;
217
   case CC_NE:  enc = 0x5; break;
218
   case CC_NEU: enc = 0xd; break;
219
   case CC_GE:  enc = 0x6; break;
220
   case CC_GEU: enc = 0xe; break;
221
   case CC_TR:  enc = 0xf; break;
222
   case CC_FL:  enc = 0x0; break;
223
 
224
   case CC_O:  enc = 0x10; break;
225
   case CC_C:  enc = 0x11; break;
226
   case CC_A:  enc = 0x12; break;
227
   case CC_S:  enc = 0x13; break;
228
   case CC_NS: enc = 0x1c; break;
229
   case CC_NA: enc = 0x1d; break;
230
   case CC_NC: enc = 0x1e; break;
231
   case CC_NO: enc = 0x1f; break;
232
 
233
   default:
234
      enc = 0;
235
      assert(!"invalid condition code");
236
      break;
237
   }
238
   if (ty != TYPE_NONE && !isFloatType(ty))
239
      enc &= ~0x8; // unordered only exists for float types
240
 
241
   code[pos / 32] |= enc << (pos % 32);
242
}
243
 
244
void
245
CodeEmitterNV50::emitFlagsRd(const Instruction *i)
246
{
247
   int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc;
248
 
249
   assert(!(code[1] & 0x00003f80));
250
 
251
   if (s >= 0) {
252
      assert(i->getSrc(s)->reg.file == FILE_FLAGS);
253
      emitCondCode(i->cc, TYPE_NONE, 32 + 7);
254
      srcId(i->src(s), 32 + 12);
255
   } else {
256
      code[1] |= 0x0780;
257
   }
258
}
259
 
260
void
261
CodeEmitterNV50::emitFlagsWr(const Instruction *i)
262
{
263
   assert(!(code[1] & 0x70));
264
 
265
   int flagsDef = i->flagsDef;
266
 
267
   // find flags definition and check that it is the last def
268
   if (flagsDef < 0) {
269
      for (int d = 0; i->defExists(d); ++d)
270
         if (i->def(d).getFile() == FILE_FLAGS)
271
            flagsDef = d;
272
      if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point
273
         WARN("Instruction::flagsDef was not set properly\n");
274
   }
275
   if (flagsDef == 0 && i->defExists(1))
276
      WARN("flags def should not be the primary definition\n");
277
 
278
   if (flagsDef >= 0)
279
      code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40;
280
 
281
}
282
 
283
void
284
CodeEmitterNV50::setARegBits(unsigned int u)
285
{
286
   code[0] |= (u & 3) << 26;
287
   code[1] |= (u & 4);
288
}
289
 
290
void
291
CodeEmitterNV50::setAReg16(const Instruction *i, int s)
292
{
293
   if (i->srcExists(s)) {
294
      s = i->src(s).indirect[0];
295
      if (s >= 0)
296
         setARegBits(SDATA(i->src(s)).id + 1);
297
   }
298
}
299
 
300
void
301
CodeEmitterNV50::setImmediate(const Instruction *i, int s)
302
{
303
   const ImmediateValue *imm = i->src(s).get()->asImm();
304
   assert(imm);
305
 
306
   uint32_t u = imm->reg.data.u32;
307
 
308
   if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT))
309
      u = ~u;
310
 
311
   code[1] |= 3;
312
   code[0] |= (u & 0x3f) << 16;
313
   code[1] |= (u >> 6) << 2;
314
}
315
 
316
void
317
CodeEmitterNV50::setDst(const Value *dst)
318
{
319
   const Storage *reg = &dst->join->reg;
320
 
321
   assert(reg->file != FILE_ADDRESS);
322
 
323
   if (reg->data.id < 0 || reg->file == FILE_FLAGS) {
324
      code[0] |= (127 << 2) | 1;
325
      code[1] |= 8;
326
   } else {
327
      int id;
328
      if (reg->file == FILE_SHADER_OUTPUT) {
329
         code[1] |= 8;
330
         id = reg->data.offset / 4;
331
      } else {
332
         id = reg->data.id;
333
      }
334
      code[0] |= id << 2;
335
   }
336
}
337
 
338
void
339
CodeEmitterNV50::setDst(const Instruction *i, int d)
340
{
341
   if (i->defExists(d)) {
342
      setDst(i->getDef(d));
343
   } else
344
   if (!d) {
345
      code[0] |= 0x01fc; // bit bucket
346
      code[1] |= 0x0008;
347
   }
348
}
349
 
350
// 3 * 2 bits:
351
// 0: r
352
// 1: a/s
353
// 2: c
354
// 3: i
355
void
356
CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
357
{
358
   uint8_t mode = 0;
359
 
360
   for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) {
361
      switch (i->src(s).getFile()) {
362
      case FILE_GPR:
363
         break;
364
      case FILE_MEMORY_SHARED:
365
      case FILE_SHADER_INPUT:
366
         mode |= 1 << (s * 2);
367
         break;
368
      case FILE_MEMORY_CONST:
369
         mode |= 2 << (s * 2);
370
         break;
371
      case FILE_IMMEDIATE:
372
         mode |= 3 << (s * 2);
373
         break;
374
      default:
375
	      ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile());
376
         assert(0);
377
         break;
378
      }
379
   }
380
   switch (mode) {
381
   case 0x00: // rrr
382
      break;
383
   case 0x01: // arr/grr
384
      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
385
         code[0] |= 0x01800000;
386
         if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT)
387
            code[1] |= 0x00200000;
388
      } else {
389
         if (enc == NV50_OP_ENC_SHORT)
390
            code[0] |= 0x01000000;
391
         else
392
            code[1] |= 0x00200000;
393
      }
394
      break;
395
   case 0x03: // irr
396
      assert(i->op == OP_MOV);
397
      return;
398
   case 0x0c: // rir
399
      break;
400
   case 0x0d: // gir
401
      assert(progType == Program::TYPE_GEOMETRY ||
402
             progType == Program::TYPE_COMPUTE);
403
      code[0] |= 0x01000000;
404
      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
405
         int reg = i->src(0).getIndirect(0)->rep()->reg.data.id;
406
         assert(reg < 3);
407
         code[0] |= (reg + 1) << 26;
408
      }
409
      break;
410
   case 0x08: // rcr
411
      code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
412
      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
413
      break;
414
   case 0x09: // acr/gcr
415
      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) {
416
         code[0] |= 0x01800000;
417
      } else {
418
         code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000;
419
         code[1] |= 0x00200000;
420
      }
421
      code[1] |= (i->getSrc(1)->reg.fileIndex << 22);
422
      break;
423
   case 0x20: // rrc
424
      code[0] |= 0x01000000;
425
      code[1] |= (i->getSrc(2)->reg.fileIndex << 22);
426
      break;
427
   case 0x21: // arc
428
      code[0] |= 0x01000000;
429
      code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22);
430
      assert(progType != Program::TYPE_GEOMETRY);
431
      break;
432
   default:
433
      ERROR("not encodable: %x\n", mode);
434
      assert(0);
435
      break;
436
   }
437
   if (progType != Program::TYPE_COMPUTE)
438
      return;
439
 
440
   if ((mode & 3) == 1) {
441
      const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
442
 
443
      switch (i->getSrc(0)->reg.type) {
444
      case TYPE_U8:
445
         break;
446
      case TYPE_U16:
447
         code[0] |= 1 << pos;
448
         break;
449
      case TYPE_S16:
450
         code[0] |= 2 << pos;
451
         break;
452
      default:
453
         code[0] |= 3 << pos;
454
         assert(i->getSrc(0)->reg.size == 4);
455
         break;
456
      }
457
   }
458
}
459
 
460
void
461
CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot)
462
{
463
   if (Target::operationSrcNr[i->op] <= s)
464
      return;
465
   const Storage *reg = &i->src(s).rep()->reg;
466
 
467
   unsigned int id = (reg->file == FILE_GPR) ?
468
      reg->data.id :
469
      reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here
470
 
471
   switch (slot) {
472
   case 0: code[0] |= id << 9; break;
473
   case 1: code[0] |= id << 16; break;
474
   case 2: code[1] |= id << 14; break;
475
   default:
476
      assert(0);
477
      break;
478
   }
479
}
480
 
481
// the default form:
482
//  - long instruction
483
//  - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr)
484
//  - address & flags
485
void
486
CodeEmitterNV50::emitForm_MAD(const Instruction *i)
487
{
488
   assert(i->encSize == 8);
489
   code[0] |= 1;
490
 
491
   emitFlagsRd(i);
492
   emitFlagsWr(i);
493
 
494
   setDst(i, 0);
495
 
496
   setSrcFileBits(i, NV50_OP_ENC_LONG);
497
   setSrc(i, 0, 0);
498
   setSrc(i, 1, 1);
499
   setSrc(i, 2, 2);
500
 
501
   if (i->getIndirect(0, 0)) {
502
      assert(!i->getIndirect(1, 0));
503
      setAReg16(i, 0);
504
   } else {
505
      setAReg16(i, 1);
506
   }
507
}
508
 
509
// like default form, but 2nd source in slot 2, and no 3rd source
510
void
511
CodeEmitterNV50::emitForm_ADD(const Instruction *i)
512
{
513
   assert(i->encSize == 8);
514
   code[0] |= 1;
515
 
516
   emitFlagsRd(i);
517
   emitFlagsWr(i);
518
 
519
   setDst(i, 0);
520
 
521
   setSrcFileBits(i, NV50_OP_ENC_LONG_ALT);
522
   setSrc(i, 0, 0);
523
   setSrc(i, 1, 2);
524
 
525
   if (i->getIndirect(0, 0)) {
526
      assert(!i->getIndirect(1, 0));
527
      setAReg16(i, 0);
528
   } else {
529
      setAReg16(i, 1);
530
   }
531
}
532
 
533
// default short form (rr, ar, rc, gr)
534
void
535
CodeEmitterNV50::emitForm_MUL(const Instruction *i)
536
{
537
   assert(i->encSize == 4 && !(code[0] & 1));
538
   assert(i->defExists(0));
539
   assert(!i->getPredicate());
540
 
541
   setDst(i, 0);
542
 
543
   setSrcFileBits(i, NV50_OP_ENC_SHORT);
544
   setSrc(i, 0, 0);
545
   setSrc(i, 1, 1);
546
}
547
 
548
// usual immediate form
549
// - 1 to 3 sources where last is immediate (rir, gir)
550
// - no address or predicate possible
551
void
552
CodeEmitterNV50::emitForm_IMM(const Instruction *i)
553
{
554
   assert(i->encSize == 8);
555
   code[0] |= 1;
556
 
557
   assert(i->defExists(0) && i->srcExists(0));
558
 
559
   setDst(i, 0);
560
 
561
   setSrcFileBits(i, NV50_OP_ENC_IMM);
562
   if (Target::operationSrcNr[i->op] > 1) {
563
      setSrc(i, 0, 0);
564
      setImmediate(i, 1);
565
      setSrc(i, 2, 1);
566
   } else {
567
      setImmediate(i, 0);
568
   }
569
}
570
 
571
void
572
CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos)
573
{
574
   uint8_t enc;
575
 
576
   switch (ty) {
577
   case TYPE_F32: // fall through
578
   case TYPE_S32: // fall through
579
   case TYPE_U32:  enc = 0x6; break;
580
   case TYPE_B128: enc = 0x5; break;
581
   case TYPE_F64: // fall through
582
   case TYPE_S64: // fall through
583
   case TYPE_U64:  enc = 0x4; break;
584
   case TYPE_S16:  enc = 0x3; break;
585
   case TYPE_U16:  enc = 0x2; break;
586
   case TYPE_S8:   enc = 0x1; break;
587
   case TYPE_U8:   enc = 0x0; break;
588
   default:
589
      enc = 0;
590
      assert(!"invalid load/store type");
591
      break;
592
   }
593
   code[pos / 32] |= enc << (pos % 32);
594
}
595
 
596
void
597
CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty)
598
{
599
   switch (ty) {
600
   case TYPE_U8: break;
601
   case TYPE_U16: code[1] |= 0x4000; break;
602
   case TYPE_S16: code[1] |= 0x8000; break;
603
   case TYPE_F32:
604
   case TYPE_S32:
605
   case TYPE_U32: code[1] |= 0xc000; break;
606
   default:
607
      assert(0);
608
      break;
609
   }
610
}
611
 
612
void
613
CodeEmitterNV50::emitLOAD(const Instruction *i)
614
{
615
   DataFile sf = i->src(0).getFile();
616
   int32_t offset = i->getSrc(0)->reg.data.offset;
617
 
618
   switch (sf) {
619
   case FILE_SHADER_INPUT:
620
      if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0))
621
         code[0] = 0x11800001;
622
      else
623
         // use 'mov' where we can
624
         code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001;
625
      code[1] = 0x00200000 | (i->lanes << 14);
626
      if (typeSizeof(i->dType) == 4)
627
         code[1] |= 0x04000000;
628
      break;
629
   case FILE_MEMORY_SHARED:
630
      if (targ->getChipset() >= 0x84) {
631
         assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType)));
632
         code[0] = 0x10000001;
633
         code[1] = 0x40000000;
634
 
635
         if (typeSizeof(i->dType) == 4)
636
            code[1] |= 0x04000000;
637
 
638
         emitLoadStoreSizeCS(i->sType);
639
      } else {
640
         assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType)));
641
         code[0] = 0x10000001;
642
         code[1] = 0x00200000 | (i->lanes << 14);
643
         emitLoadStoreSizeCS(i->sType);
644
      }
645
      break;
646
   case FILE_MEMORY_CONST:
647
      code[0] = 0x10000001;
648
      code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22);
649
      if (typeSizeof(i->dType) == 4)
650
         code[1] |= 0x04000000;
651
      emitLoadStoreSizeCS(i->sType);
652
      break;
653
   case FILE_MEMORY_LOCAL:
654
      code[0] = 0xd0000001;
655
      code[1] = 0x40000000;
656
      break;
657
   case FILE_MEMORY_GLOBAL:
658
      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
659
      code[1] = 0x80000000;
660
      break;
661
   default:
662
      assert(!"invalid load source file");
663
      break;
664
   }
665
   if (sf == FILE_MEMORY_LOCAL ||
666
       sf == FILE_MEMORY_GLOBAL)
667
      emitLoadStoreSizeLG(i->sType, 21 + 32);
668
 
669
   setDst(i, 0);
670
 
671
   emitFlagsRd(i);
672
   emitFlagsWr(i);
673
 
674
   if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
675
      srcId(*i->src(0).getIndirect(0), 9);
676
   } else {
677
      setAReg16(i, 0);
678
      srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9);
679
   }
680
}
681
 
682
void
683
CodeEmitterNV50::emitSTORE(const Instruction *i)
684
{
685
   DataFile f = i->getSrc(0)->reg.file;
686
   int32_t offset = i->getSrc(0)->reg.data.offset;
687
 
688
   switch (f) {
689
   case FILE_SHADER_OUTPUT:
690
      code[0] = 0x00000001 | ((offset >> 2) << 9);
691
      code[1] = 0x80c00000;
692
      srcId(i->src(1), 32 + 14);
693
      break;
694
   case FILE_MEMORY_GLOBAL:
695
      code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
696
      code[1] = 0xa0000000;
697
      emitLoadStoreSizeLG(i->dType, 21 + 32);
698
      srcId(i->src(1), 2);
699
      break;
700
   case FILE_MEMORY_LOCAL:
701
      code[0] = 0xd0000001;
702
      code[1] = 0x60000000;
703
      emitLoadStoreSizeLG(i->dType, 21 + 32);
704
      srcId(i->src(1), 2);
705
      break;
706
   case FILE_MEMORY_SHARED:
707
      code[0] = 0x00000001;
708
      code[1] = 0xe0000000;
709
      switch (typeSizeof(i->dType)) {
710
      case 1:
711
         code[0] |= offset << 9;
712
         code[1] |= 0x00400000;
713
         break;
714
      case 2:
715
         code[0] |= (offset >> 1) << 9;
716
         break;
717
      case 4:
718
         code[0] |= (offset >> 2) << 9;
719
         code[1] |= 0x04200000;
720
         break;
721
      default:
722
         assert(0);
723
         break;
724
      }
725
      srcId(i->src(1), 32 + 14);
726
      break;
727
   default:
728
      assert(!"invalid store destination file");
729
      break;
730
   }
731
 
732
   if (f == FILE_MEMORY_GLOBAL)
733
      srcId(*i->src(0).getIndirect(0), 9);
734
   else
735
      setAReg16(i, 0);
736
 
737
   if (f == FILE_MEMORY_LOCAL)
738
      srcAddr16(i->src(0), false, 9);
739
 
740
   emitFlagsRd(i);
741
}
742
 
743
void
744
CodeEmitterNV50::emitMOV(const Instruction *i)
745
{
746
   DataFile sf = i->getSrc(0)->reg.file;
747
   DataFile df = i->getDef(0)->reg.file;
748
 
749
   assert(sf == FILE_GPR || df == FILE_GPR);
750
 
751
   if (sf == FILE_FLAGS) {
752
      code[0] = 0x00000001;
753
      code[1] = 0x20000000;
754
      defId(i->def(0), 2);
755
      srcId(i->src(0), 12);
756
      emitFlagsRd(i);
757
   } else
758
   if (sf == FILE_ADDRESS) {
759
      code[0] = 0x00000001;
760
      code[1] = 0x40000000;
761
      defId(i->def(0), 2);
762
      setARegBits(SDATA(i->src(0)).id + 1);
763
      emitFlagsRd(i);
764
   } else
765
   if (df == FILE_FLAGS) {
766
      code[0] = 0x00000001;
767
      code[1] = 0xa0000000;
768
      defId(i->def(0), 4);
769
      srcId(i->src(0), 9);
770
      emitFlagsRd(i);
771
   } else
772
   if (sf == FILE_IMMEDIATE) {
773
      code[0] = 0x10008001;
774
      code[1] = 0x00000003;
775
      emitForm_IMM(i);
776
   } else {
777
      if (i->encSize == 4) {
778
         code[0] = 0x10008000;
779
      } else {
780
         code[0] = 0x10000001;
781
         code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
782
         code[1] |= (i->lanes << 14);
783
         emitFlagsRd(i);
784
      }
785
      defId(i->def(0), 2);
786
      srcId(i->src(0), 9);
787
   }
788
   if (df == FILE_SHADER_OUTPUT) {
789
      assert(i->encSize == 8);
790
      code[1] |= 0x8;
791
   }
792
}
793
 
794
static inline uint8_t getSRegEncoding(const ValueRef &ref)
795
{
796
   switch (SDATA(ref).sv.sv) {
797
   case SV_PHYSID:        return 0;
798
   case SV_CLOCK:         return 1;
799
   case SV_VERTEX_STRIDE: return 3;
800
// case SV_PM_COUNTER:    return 4 + SDATA(ref).sv.index;
801
   case SV_SAMPLE_INDEX:  return 8;
802
   default:
803
      assert(!"no sreg for system value");
804
      return 0;
805
   }
806
}
807
 
808
void
809
CodeEmitterNV50::emitRDSV(const Instruction *i)
810
{
811
   code[0] = 0x00000001;
812
   code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14);
813
   defId(i->def(0), 2);
814
   emitFlagsRd(i);
815
}
816
 
817
void
818
CodeEmitterNV50::emitNOP()
819
{
820
   code[0] = 0xf0000001;
821
   code[1] = 0xe0000000;
822
}
823
 
824
void
825
CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp)
826
{
827
   code[0] = 0xc0000000 | (lane << 16);
828
   code[1] = 0x80000000;
829
 
830
   code[0] |= (quOp & 0x03) << 20;
831
   code[1] |= (quOp & 0xfc) << 20;
832
 
833
   emitForm_ADD(i);
834
 
835
   if (!i->srcExists(1))
836
      srcId(i->src(0), 32 + 14);
837
}
838
 
839
/* NOTE: This returns the base address of a vertex inside the primitive.
840
 * src0 is an immediate, the index (not offset) of the vertex
841
 * inside the primitive. XXX: signed or unsigned ?
842
 * src1 (may be NULL) should use whatever units the hardware requires
843
 * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value).
844
 */
845
void
846
CodeEmitterNV50::emitPFETCH(const Instruction *i)
847
{
848
   const uint32_t prim = i->src(0).get()->reg.data.u32;
849
   assert(prim <= 127);
850
 
851
   if (i->def(0).getFile() == FILE_ADDRESS) {
852
      // shl $aX a[] 0
853
      code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2);
854
      code[1] = 0xc0200000;
855
      code[0] |= prim << 9;
856
      assert(!i->srcExists(1));
857
   } else
858
   if (i->srcExists(1)) {
859
      // ld b32 $rX a[$aX+base]
860
      code[0] = 0x00000001;
861
      code[1] = 0x04200000 | (0xf << 14);
862
      defId(i->def(0), 2);
863
      code[0] |= prim << 9;
864
      setARegBits(SDATA(i->src(1)).id + 1);
865
   } else {
866
      // mov b32 $rX a[]
867
      code[0] = 0x10000001;
868
      code[1] = 0x04200000 | (0xf << 14);
869
      defId(i->def(0), 2);
870
      code[0] |= prim << 9;
871
   }
872
   emitFlagsRd(i);
873
}
874
 
875
void
876
CodeEmitterNV50::emitINTERP(const Instruction *i)
877
{
878
   code[0] = 0x80000000;
879
 
880
   defId(i->def(0), 2);
881
   srcAddr8(i->src(0), 16);
882
 
883
   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
884
      code[0] |= 1 << 8;
885
   } else {
886
      if (i->op == OP_PINTERP) {
887
         code[0] |= 1 << 25;
888
         srcId(i->src(1), 9);
889
      }
890
      if (i->getSampleMode() == NV50_IR_INTERP_CENTROID)
891
         code[0] |= 1 << 24;
892
   }
893
 
894
   if (i->encSize == 8) {
895
      code[1] =
896
         (code[0] & (3 << 24)) >> (24 - 16) |
897
         (code[0] & (1 <<  8)) << (18 -  8);
898
      code[0] &= ~0x03000100;
899
      code[0] |= 1;
900
      emitFlagsRd(i);
901
   }
902
}
903
 
904
void
905
CodeEmitterNV50::emitMINMAX(const Instruction *i)
906
{
907
   if (i->dType == TYPE_F64) {
908
      code[0] = 0xe0000000;
909
      code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000;
910
   } else {
911
      code[0] = 0x30000000;
912
      code[1] = 0x80000000;
913
      if (i->op == OP_MIN)
914
         code[1] |= 0x20000000;
915
 
916
      switch (i->dType) {
917
      case TYPE_F32: code[0] |= 0x80000000; break;
918
      case TYPE_S32: code[1] |= 0x8c000000; break;
919
      case TYPE_U32: code[1] |= 0x84000000; break;
920
      case TYPE_S16: code[1] |= 0x80000000; break;
921
      case TYPE_U16: break;
922
      default:
923
         assert(0);
924
         break;
925
      }
926
      code[1] |= i->src(0).mod.abs() << 20;
927
      code[1] |= i->src(0).mod.neg() << 26;
928
      code[1] |= i->src(1).mod.abs() << 19;
929
      code[1] |= i->src(1).mod.neg() << 27;
930
   }
931
   emitForm_MAD(i);
932
}
933
 
934
void
935
CodeEmitterNV50::emitFMAD(const Instruction *i)
936
{
937
   const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
938
   const int neg_add = i->src(2).mod.neg();
939
 
940
   code[0] = 0xe0000000;
941
 
942
   if (i->src(1).getFile() == FILE_IMMEDIATE) {
943
      code[1] = 0;
944
      emitForm_IMM(i);
945
      code[0] |= neg_mul << 15;
946
      code[0] |= neg_add << 22;
947
      if (i->saturate)
948
         code[0] |= 1 << 8;
949
   } else
950
   if (i->encSize == 4) {
951
      emitForm_MUL(i);
952
      code[0] |= neg_mul << 15;
953
      code[0] |= neg_add << 22;
954
      if (i->saturate)
955
         code[0] |= 1 << 8;
956
   } else {
957
      code[1]  = neg_mul << 26;
958
      code[1] |= neg_add << 27;
959
      if (i->saturate)
960
         code[1] |= 1 << 29;
961
      emitForm_MAD(i);
962
   }
963
}
964
 
965
void
966
CodeEmitterNV50::emitFADD(const Instruction *i)
967
{
968
   const int neg0 = i->src(0).mod.neg();
969
   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
970
 
971
   code[0] = 0xb0000000;
972
 
973
   assert(!(i->src(0).mod | i->src(1).mod).abs());
974
 
975
   if (i->src(1).getFile() == FILE_IMMEDIATE) {
976
      code[1] = 0;
977
      emitForm_IMM(i);
978
      code[0] |= neg0 << 15;
979
      code[0] |= neg1 << 22;
980
      if (i->saturate)
981
         code[0] |= 1 << 8;
982
   } else
983
   if (i->encSize == 8) {
984
      code[1] = 0;
985
      emitForm_ADD(i);
986
      code[1] |= neg0 << 26;
987
      code[1] |= neg1 << 27;
988
      if (i->saturate)
989
         code[1] |= 1 << 29;
990
   } else {
991
      emitForm_MUL(i);
992
      code[0] |= neg0 << 15;
993
      code[0] |= neg1 << 22;
994
      if (i->saturate)
995
         code[0] |= 1 << 8;
996
   }
997
}
998
 
999
void
1000
CodeEmitterNV50::emitUADD(const Instruction *i)
1001
{
1002
   const int neg0 = i->src(0).mod.neg();
1003
   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
1004
 
1005
   code[0] = 0x20008000;
1006
 
1007
   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1008
      code[1] = 0;
1009
      emitForm_IMM(i);
1010
   } else
1011
   if (i->encSize == 8) {
1012
      code[0] = 0x20000000;
1013
      code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000;
1014
      emitForm_ADD(i);
1015
   } else {
1016
      emitForm_MUL(i);
1017
   }
1018
   assert(!(neg0 && neg1));
1019
   code[0] |= neg0 << 28;
1020
   code[0] |= neg1 << 22;
1021
 
1022
   if (i->flagsSrc >= 0) {
1023
      // addc == sub | subr
1024
      assert(!(code[0] & 0x10400000) && !i->getPredicate());
1025
      code[0] |= 0x10400000;
1026
      srcId(i->src(i->flagsSrc), 32 + 12);
1027
   }
1028
}
1029
 
1030
void
1031
CodeEmitterNV50::emitAADD(const Instruction *i)
1032
{
1033
   const int s = (i->op == OP_MOV) ? 0 : 1;
1034
 
1035
   code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9);
1036
   code[1] = 0x20000000;
1037
 
1038
   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1039
 
1040
   emitFlagsRd(i);
1041
 
1042
   if (s && i->srcExists(0))
1043
      setARegBits(SDATA(i->src(0)).id + 1);
1044
}
1045
 
1046
void
1047
CodeEmitterNV50::emitIMUL(const Instruction *i)
1048
{
1049
   code[0] = 0x40000000;
1050
 
1051
   if (i->encSize == 8) {
1052
      code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
1053
      emitForm_MAD(i);
1054
   } else {
1055
      if (i->sType == TYPE_S16)
1056
         code[0] |= 0x8100;
1057
      emitForm_MUL(i);
1058
   }
1059
}
1060
 
1061
void
1062
CodeEmitterNV50::emitFMUL(const Instruction *i)
1063
{
1064
   const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
1065
 
1066
   code[0] = 0xc0000000;
1067
 
1068
   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1069
      code[1] = 0;
1070
      emitForm_IMM(i);
1071
      if (neg)
1072
         code[0] |= 0x8000;
1073
      if (i->saturate)
1074
         code[0] |= 1 << 8;
1075
   } else
1076
   if (i->encSize == 8) {
1077
      code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0;
1078
      if (neg)
1079
         code[1] |= 0x08000000;
1080
      if (i->saturate)
1081
         code[1] |= 1 << 20;
1082
      emitForm_MAD(i);
1083
   } else {
1084
      emitForm_MUL(i);
1085
      if (neg)
1086
         code[0] |= 0x8000;
1087
      if (i->saturate)
1088
         code[0] |= 1 << 8;
1089
   }
1090
}
1091
 
1092
void
1093
CodeEmitterNV50::emitIMAD(const Instruction *i)
1094
{
1095
   code[0] = 0x60000000;
1096
   if (isSignedType(i->sType))
1097
      code[1] = i->saturate ? 0x40000000 : 0x20000000;
1098
   else
1099
      code[1] = 0x00000000;
1100
 
1101
   int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
1102
   int neg2 = i->src(2).mod.neg();
1103
 
1104
   assert(!(neg1 & neg2));
1105
   code[1] |= neg1 << 27;
1106
   code[1] |= neg2 << 26;
1107
 
1108
   emitForm_MAD(i);
1109
 
1110
   if (i->flagsSrc >= 0) {
1111
      // add with carry from $cX
1112
      assert(!(code[1] & 0x0c000000) && !i->getPredicate());
1113
      code[1] |= 0xc << 24;
1114
      srcId(i->src(i->flagsSrc), 32 + 12);
1115
   }
1116
}
1117
 
1118
void
1119
CodeEmitterNV50::emitISAD(const Instruction *i)
1120
{
1121
   if (i->encSize == 8) {
1122
      code[0] = 0x50000000;
1123
      switch (i->sType) {
1124
      case TYPE_U32: code[1] = 0x04000000; break;
1125
      case TYPE_S32: code[1] = 0x0c000000; break;
1126
      case TYPE_U16: code[1] = 0x00000000; break;
1127
      case TYPE_S16: code[1] = 0x08000000; break;
1128
      default:
1129
         assert(0);
1130
         break;
1131
      }
1132
      emitForm_MAD(i);
1133
   } else {
1134
      switch (i->sType) {
1135
      case TYPE_U32: code[0] = 0x50008000; break;
1136
      case TYPE_S32: code[0] = 0x50008100; break;
1137
      case TYPE_U16: code[0] = 0x50000000; break;
1138
      case TYPE_S16: code[0] = 0x50000100; break;
1139
      default:
1140
         assert(0);
1141
         break;
1142
      }
1143
      emitForm_MUL(i);
1144
   }
1145
}
1146
 
1147
void
1148
CodeEmitterNV50::emitSET(const Instruction *i)
1149
{
1150
   code[0] = 0x30000000;
1151
   code[1] = 0x60000000;
1152
 
1153
   emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
1154
 
1155
   switch (i->sType) {
1156
   case TYPE_F32: code[0] |= 0x80000000; break;
1157
   case TYPE_S32: code[1] |= 0x0c000000; break;
1158
   case TYPE_U32: code[1] |= 0x04000000; break;
1159
   case TYPE_S16: code[1] |= 0x08000000; break;
1160
   case TYPE_U16: break;
1161
   default:
1162
      assert(0);
1163
      break;
1164
   }
1165
   if (i->src(0).mod.neg()) code[1] |= 0x04000000;
1166
   if (i->src(1).mod.neg()) code[1] |= 0x08000000;
1167
   if (i->src(0).mod.abs()) code[1] |= 0x00100000;
1168
   if (i->src(1).mod.abs()) code[1] |= 0x00080000;
1169
 
1170
   emitForm_MAD(i);
1171
}
1172
 
1173
void
1174
CodeEmitterNV50::roundMode_CVT(RoundMode rnd)
1175
{
1176
   switch (rnd) {
1177
   case ROUND_NI: code[1] |= 0x08000000; break;
1178
   case ROUND_M:  code[1] |= 0x00020000; break;
1179
   case ROUND_MI: code[1] |= 0x08020000; break;
1180
   case ROUND_P:  code[1] |= 0x00040000; break;
1181
   case ROUND_PI: code[1] |= 0x08040000; break;
1182
   case ROUND_Z:  code[1] |= 0x00060000; break;
1183
   case ROUND_ZI: code[1] |= 0x08060000; break;
1184
   default:
1185
      assert(rnd == ROUND_N);
1186
      break;
1187
   }
1188
}
1189
 
1190
void
1191
CodeEmitterNV50::emitCVT(const Instruction *i)
1192
{
1193
   const bool f2f = isFloatType(i->dType) && isFloatType(i->sType);
1194
   RoundMode rnd;
1195
   DataType dType;
1196
 
1197
   switch (i->op) {
1198
   case OP_CEIL:  rnd = f2f ? ROUND_PI : ROUND_P; break;
1199
   case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break;
1200
   case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break;
1201
   default:
1202
      rnd = i->rnd;
1203
      break;
1204
   }
1205
 
1206
   if (i->op == OP_NEG && i->dType == TYPE_U32)
1207
      dType = TYPE_S32;
1208
   else
1209
      dType = i->dType;
1210
 
1211
   code[0] = 0xa0000000;
1212
 
1213
   switch (dType) {
1214
   case TYPE_F64:
1215
      switch (i->sType) {
1216
      case TYPE_F64: code[1] = 0xc4404000; break;
1217
      case TYPE_S64: code[1] = 0x44414000; break;
1218
      case TYPE_U64: code[1] = 0x44404000; break;
1219
      case TYPE_F32: code[1] = 0xc4400000; break;
1220
      case TYPE_S32: code[1] = 0x44410000; break;
1221
      case TYPE_U32: code[1] = 0x44400000; break;
1222
      default:
1223
         assert(0);
1224
         break;
1225
      }
1226
      break;
1227
   case TYPE_S64:
1228
      switch (i->sType) {
1229
      case TYPE_F64: code[1] = 0x8c404000; break;
1230
      case TYPE_F32: code[1] = 0x8c400000; break;
1231
      default:
1232
         assert(0);
1233
         break;
1234
      }
1235
      break;
1236
   case TYPE_U64:
1237
      switch (i->sType) {
1238
      case TYPE_F64: code[1] = 0x84404000; break;
1239
      case TYPE_F32: code[1] = 0x84400000; break;
1240
      default:
1241
         assert(0);
1242
         break;
1243
      }
1244
      break;
1245
   case TYPE_F32:
1246
      switch (i->sType) {
1247
      case TYPE_F64: code[1] = 0xc0404000; break;
1248
      case TYPE_S64: code[1] = 0x40414000; break;
1249
      case TYPE_U64: code[1] = 0x40404000; break;
1250
      case TYPE_F32: code[1] = 0xc4004000; break;
1251
      case TYPE_S32: code[1] = 0x44014000; break;
1252
      case TYPE_U32: code[1] = 0x44004000; break;
1253
      case TYPE_F16: code[1] = 0xc4000000; break;
1254
      case TYPE_U16: code[1] = 0x44000000; break;
1255
      default:
1256
         assert(0);
1257
         break;
1258
      }
1259
      break;
1260
   case TYPE_S32:
1261
      switch (i->sType) {
1262
      case TYPE_F64: code[1] = 0x88404000; break;
1263
      case TYPE_F32: code[1] = 0x8c004000; break;
1264
      case TYPE_S32: code[1] = 0x0c014000; break;
1265
      case TYPE_U32: code[1] = 0x0c004000; break;
1266
      case TYPE_F16: code[1] = 0x8c000000; break;
1267
      case TYPE_S16: code[1] = 0x0c010000; break;
1268
      case TYPE_U16: code[1] = 0x0c000000; break;
1269
      case TYPE_S8:  code[1] = 0x0c018000; break;
1270
      case TYPE_U8:  code[1] = 0x0c008000; break;
1271
      default:
1272
         assert(0);
1273
         break;
1274
      }
1275
      break;
1276
   case TYPE_U32:
1277
      switch (i->sType) {
1278
      case TYPE_F64: code[1] = 0x80404000; break;
1279
      case TYPE_F32: code[1] = 0x84004000; break;
1280
      case TYPE_S32: code[1] = 0x04014000; break;
1281
      case TYPE_U32: code[1] = 0x04004000; break;
1282
      case TYPE_F16: code[1] = 0x84000000; break;
1283
      case TYPE_S16: code[1] = 0x04010000; break;
1284
      case TYPE_U16: code[1] = 0x04000000; break;
1285
      case TYPE_S8:  code[1] = 0x04018000; break;
1286
      case TYPE_U8:  code[1] = 0x04008000; break;
1287
      default:
1288
         assert(0);
1289
         break;
1290
      }
1291
      break;
1292
   case TYPE_S16:
1293
   case TYPE_U16:
1294
   case TYPE_S8:
1295
   case TYPE_U8:
1296
   default:
1297
      assert(0);
1298
      break;
1299
   }
1300
   if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4)
1301
      code[1] |= 0x00004000;
1302
 
1303
   roundMode_CVT(rnd);
1304
 
1305
   switch (i->op) {
1306
   case OP_ABS: code[1] |= 1 << 20; break;
1307
   case OP_SAT: code[1] |= 1 << 19; break;
1308
   case OP_NEG: code[1] |= 1 << 29; break;
1309
   default:
1310
      break;
1311
   }
1312
   code[1] ^= i->src(0).mod.neg() << 29;
1313
   code[1] |= i->src(0).mod.abs() << 20;
1314
   if (i->saturate)
1315
      code[1] |= 1 << 19;
1316
 
1317
   assert(i->op != OP_ABS || !i->src(0).mod.neg());
1318
 
1319
   emitForm_MAD(i);
1320
}
1321
 
1322
void
1323
CodeEmitterNV50::emitPreOp(const Instruction *i)
1324
{
1325
   code[0] = 0xb0000000;
1326
   code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000;
1327
 
1328
   code[1] |= i->src(0).mod.abs() << 20;
1329
   code[1] |= i->src(0).mod.neg() << 26;
1330
 
1331
   emitForm_MAD(i);
1332
}
1333
 
1334
void
1335
CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp)
1336
{
1337
   code[0] = 0x90000000;
1338
 
1339
   if (i->encSize == 4) {
1340
      assert(i->op == OP_RCP);
1341
      code[0] |= i->src(0).mod.abs() << 15;
1342
      code[0] |= i->src(0).mod.neg() << 22;
1343
      emitForm_MUL(i);
1344
   } else {
1345
      code[1] = subOp << 29;
1346
      code[1] |= i->src(0).mod.abs() << 20;
1347
      code[1] |= i->src(0).mod.neg() << 26;
1348
      emitForm_MAD(i);
1349
   }
1350
}
1351
 
1352
void
1353
CodeEmitterNV50::emitNOT(const Instruction *i)
1354
{
1355
   code[0] = 0xd0000000;
1356
   code[1] = 0x0002c000;
1357
 
1358
   switch (i->sType) {
1359
   case TYPE_U32:
1360
   case TYPE_S32:
1361
      code[1] |= 0x04000000;
1362
      break;
1363
   default:
1364
      break;
1365
   }
1366
   emitForm_MAD(i);
1367
   setSrc(i, 0, 1);
1368
}
1369
 
1370
void
1371
CodeEmitterNV50::emitLogicOp(const Instruction *i)
1372
{
1373
   code[0] = 0xd0000000;
1374
   code[1] = 0;
1375
 
1376
   if (i->src(1).getFile() == FILE_IMMEDIATE) {
1377
      switch (i->op) {
1378
      case OP_OR:  code[0] |= 0x0100; break;
1379
      case OP_XOR: code[0] |= 0x8000; break;
1380
      default:
1381
         assert(i->op == OP_AND);
1382
         break;
1383
      }
1384
      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1385
         code[0] |= 1 << 22;
1386
 
1387
      emitForm_IMM(i);
1388
   } else {
1389
      switch (i->op) {
1390
      case OP_AND: code[1] = 0x04000000; break;
1391
      case OP_OR:  code[1] = 0x04004000; break;
1392
      case OP_XOR: code[1] = 0x04008000; break;
1393
      default:
1394
         assert(0);
1395
         break;
1396
      }
1397
      if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
1398
         code[1] |= 1 << 16;
1399
      if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
1400
         code[1] |= 1 << 17;
1401
 
1402
      emitForm_MAD(i);
1403
   }
1404
}
1405
 
1406
void
1407
CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl)
1408
{
1409
   code[0] = 0x00000001 | (shl << 16);
1410
   code[1] = 0xc0000000;
1411
 
1412
   code[0] |= (DDATA(i->def(0)).id + 1) << 2;
1413
 
1414
   setSrcFileBits(i, NV50_OP_ENC_IMM);
1415
   setSrc(i, 0, 0);
1416
   emitFlagsRd(i);
1417
}
1418
 
1419
void
1420
CodeEmitterNV50::emitShift(const Instruction *i)
1421
{
1422
   if (i->def(0).getFile() == FILE_ADDRESS) {
1423
      assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE);
1424
      emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f);
1425
   } else {
1426
      code[0] = 0x30000001;
1427
      code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000;
1428
      if (i->op == OP_SHR && isSignedType(i->sType))
1429
          code[1] |= 1 << 27;
1430
 
1431
      if (i->src(1).getFile() == FILE_IMMEDIATE) {
1432
         code[1] |= 1 << 20;
1433
         code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16;
1434
         defId(i->def(0), 2);
1435
         srcId(i->src(0), 9);
1436
         emitFlagsRd(i);
1437
      } else {
1438
         emitForm_MAD(i);
1439
      }
1440
   }
1441
}
1442
 
1443
void
1444
CodeEmitterNV50::emitOUT(const Instruction *i)
1445
{
1446
   code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401;
1447
   code[1] = 0xc0000000;
1448
 
1449
   emitFlagsRd(i);
1450
}
1451
 
1452
void
1453
CodeEmitterNV50::emitTEX(const TexInstruction *i)
1454
{
1455
   code[0] = 0xf0000001;
1456
   code[1] = 0x00000000;
1457
 
1458
   switch (i->op) {
1459
   case OP_TXB:
1460
      code[1] = 0x20000000;
1461
      break;
1462
   case OP_TXL:
1463
      code[1] = 0x40000000;
1464
      break;
1465
   case OP_TXF:
1466
      code[0] |= 0x01000000;
1467
      break;
1468
   case OP_TXG:
1469
      code[0] |= 0x01000000;
1470
      code[1] = 0x80000000;
1471
      break;
1472
   case OP_TXLQ:
1473
      code[1] = 0x60020000;
1474
      break;
1475
   default:
1476
      assert(i->op == OP_TEX);
1477
      break;
1478
   }
1479
 
1480
   code[0] |= i->tex.r << 9;
1481
   code[0] |= i->tex.s << 17;
1482
 
1483
   int argc = i->tex.target.getArgCount();
1484
 
1485
   if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF)
1486
      argc += 1;
1487
   if (i->tex.target.isShadow())
1488
      argc += 1;
1489
   assert(argc <= 4);
1490
 
1491
   code[0] |= (argc - 1) << 22;
1492
 
1493
   if (i->tex.target.isCube()) {
1494
      code[0] |= 0x08000000;
1495
   } else
1496
   if (i->tex.useOffsets) {
1497
      code[1] |= (i->tex.offset[0] & 0xf) << 24;
1498
      code[1] |= (i->tex.offset[1] & 0xf) << 20;
1499
      code[1] |= (i->tex.offset[2] & 0xf) << 16;
1500
   }
1501
 
1502
   code[0] |= (i->tex.mask & 0x3) << 25;
1503
   code[1] |= (i->tex.mask & 0xc) << 12;
1504
 
1505
   if (i->tex.liveOnly)
1506
      code[1] |= 4;
1507
 
1508
   defId(i->def(0), 2);
1509
 
1510
   emitFlagsRd(i);
1511
}
1512
 
1513
void
1514
CodeEmitterNV50::emitTXQ(const TexInstruction *i)
1515
{
1516
   assert(i->tex.query == TXQ_DIMS);
1517
 
1518
   code[0] = 0xf0000001;
1519
   code[1] = 0x60000000;
1520
 
1521
   code[0] |= i->tex.r << 9;
1522
   code[0] |= i->tex.s << 17;
1523
 
1524
   code[0] |= (i->tex.mask & 0x3) << 25;
1525
   code[1] |= (i->tex.mask & 0xc) << 12;
1526
 
1527
   defId(i->def(0), 2);
1528
 
1529
   emitFlagsRd(i);
1530
}
1531
 
1532
void
1533
CodeEmitterNV50::emitTEXPREP(const TexInstruction *i)
1534
{
1535
   code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9);
1536
   code[1] = 0x60010000;
1537
 
1538
   code[0] |= (i->tex.mask & 0x3) << 25;
1539
   code[1] |= (i->tex.mask & 0xc) << 12;
1540
   defId(i->def(0), 2);
1541
 
1542
   emitFlagsRd(i);
1543
}
1544
 
1545
void
1546
CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i)
1547
{
1548
   uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */
1549
 
1550
   code[0] = 0x10000003; // bra
1551
   code[1] = 0x00000780; // always
1552
 
1553
   switch (i->subOp) {
1554
   case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call
1555
      break;
1556
   case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call
1557
      pos += 8;
1558
      break;
1559
   default:
1560
      assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2));
1561
      code[0] = 0x20000003; // call
1562
      code[1] = 0x00000000; // no predicate
1563
      break;
1564
   }
1565
   addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9);
1566
   addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4);
1567
}
1568
 
1569
void
1570
CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp)
1571
{
1572
   const FlowInstruction *f = i->asFlow();
1573
   bool hasPred = false;
1574
   bool hasTarg = false;
1575
 
1576
   code[0] = 0x00000003 | (flowOp << 28);
1577
   code[1] = 0x00000000;
1578
 
1579
   switch (i->op) {
1580
   case OP_BRA:
1581
      hasPred = true;
1582
      hasTarg = true;
1583
      break;
1584
   case OP_BREAK:
1585
   case OP_BRKPT:
1586
   case OP_DISCARD:
1587
   case OP_RET:
1588
      hasPred = true;
1589
      break;
1590
   case OP_CALL:
1591
   case OP_PREBREAK:
1592
   case OP_JOINAT:
1593
      hasTarg = true;
1594
      break;
1595
   case OP_PRERET:
1596
      hasTarg = true;
1597
      if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) {
1598
         emitPRERETEmu(f);
1599
         return;
1600
      }
1601
      break;
1602
   default:
1603
      break;
1604
   }
1605
 
1606
   if (hasPred)
1607
      emitFlagsRd(i);
1608
 
1609
   if (hasTarg && f) {
1610
      uint32_t pos;
1611
 
1612
      if (f->op == OP_CALL) {
1613
         if (f->builtin) {
1614
            pos = targNV50->getBuiltinOffset(f->target.builtin);
1615
         } else {
1616
            pos = f->target.fn->binPos;
1617
         }
1618
      } else {
1619
         pos = f->target.bb->binPos;
1620
      }
1621
 
1622
      code[0] |= ((pos >>  2) & 0xffff) << 11;
1623
      code[1] |= ((pos >> 18) & 0x003f) << 14;
1624
 
1625
      RelocEntry::Type relocTy;
1626
 
1627
      relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE;
1628
 
1629
      addReloc(relocTy, 0, pos, 0x07fff800, 9);
1630
      addReloc(relocTy, 1, pos, 0x000fc000, -4);
1631
   }
1632
}
1633
 
1634
void
1635
CodeEmitterNV50::emitBAR(const Instruction *i)
1636
{
1637
   ImmediateValue *barId = i->getSrc(0)->asImm();
1638
   assert(barId);
1639
 
1640
   code[0] = 0x82000003 | (barId->reg.data.u32 << 21);
1641
   code[1] = 0x00004000;
1642
 
1643
   if (i->subOp == NV50_IR_SUBOP_BAR_SYNC)
1644
      code[0] |= 1 << 26;
1645
}
1646
 
1647
void
1648
CodeEmitterNV50::emitATOM(const Instruction *i)
1649
{
1650
   uint8_t subOp;
1651
   switch (i->subOp) {
1652
   case NV50_IR_SUBOP_ATOM_ADD:  subOp = 0x0; break;
1653
   case NV50_IR_SUBOP_ATOM_MIN:  subOp = 0x7; break;
1654
   case NV50_IR_SUBOP_ATOM_MAX:  subOp = 0x6; break;
1655
   case NV50_IR_SUBOP_ATOM_INC:  subOp = 0x4; break;
1656
   case NV50_IR_SUBOP_ATOM_DEC:  subOp = 0x5; break;
1657
   case NV50_IR_SUBOP_ATOM_AND:  subOp = 0xa; break;
1658
   case NV50_IR_SUBOP_ATOM_OR:   subOp = 0xb; break;
1659
   case NV50_IR_SUBOP_ATOM_XOR:  subOp = 0xc; break;
1660
   case NV50_IR_SUBOP_ATOM_CAS:  subOp = 0x2; break;
1661
   case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break;
1662
   default:
1663
      assert(!"invalid subop");
1664
      return;
1665
   }
1666
   code[0] = 0xd0000001;
1667
   code[1] = 0xe0c00000 | (subOp << 2);
1668
   if (isSignedType(i->dType))
1669
      code[1] |= 1 << 21;
1670
 
1671
   // args
1672
   emitFlagsRd(i);
1673
   setDst(i, 0);
1674
   setSrc(i, 1, 1);
1675
   if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
1676
      setSrc(i, 2, 2);
1677
 
1678
   // g[] pointer
1679
   code[0] |= i->getSrc(0)->reg.fileIndex << 23;
1680
   srcId(i->getIndirect(0, 0), 9);
1681
}
1682
 
1683
bool
1684
CodeEmitterNV50::emitInstruction(Instruction *insn)
1685
{
1686
   if (!insn->encSize) {
1687
      ERROR("skipping unencodable instruction: "); insn->print();
1688
      return false;
1689
   } else
1690
   if (codeSize + insn->encSize > codeSizeLimit) {
1691
      ERROR("code emitter output buffer too small\n");
1692
      return false;
1693
   }
1694
 
1695
   if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) {
1696
      INFO("EMIT: "); insn->print();
1697
   }
1698
 
1699
   switch (insn->op) {
1700
   case OP_MOV:
1701
      emitMOV(insn);
1702
      break;
1703
   case OP_EXIT:
1704
   case OP_NOP:
1705
   case OP_JOIN:
1706
      emitNOP();
1707
      break;
1708
   case OP_VFETCH:
1709
   case OP_LOAD:
1710
      emitLOAD(insn);
1711
      break;
1712
   case OP_EXPORT:
1713
   case OP_STORE:
1714
      emitSTORE(insn);
1715
      break;
1716
   case OP_PFETCH:
1717
      emitPFETCH(insn);
1718
      break;
1719
   case OP_RDSV:
1720
      emitRDSV(insn);
1721
      break;
1722
   case OP_LINTERP:
1723
   case OP_PINTERP:
1724
      emitINTERP(insn);
1725
      break;
1726
   case OP_ADD:
1727
   case OP_SUB:
1728
      if (isFloatType(insn->dType))
1729
         emitFADD(insn);
1730
      else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
1731
         emitAADD(insn);
1732
      else
1733
         emitUADD(insn);
1734
      break;
1735
   case OP_MUL:
1736
      if (isFloatType(insn->dType))
1737
         emitFMUL(insn);
1738
      else
1739
         emitIMUL(insn);
1740
      break;
1741
   case OP_MAD:
1742
   case OP_FMA:
1743
      if (isFloatType(insn->dType))
1744
         emitFMAD(insn);
1745
      else
1746
         emitIMAD(insn);
1747
      break;
1748
   case OP_SAD:
1749
      emitISAD(insn);
1750
      break;
1751
   case OP_NOT:
1752
      emitNOT(insn);
1753
      break;
1754
   case OP_AND:
1755
   case OP_OR:
1756
   case OP_XOR:
1757
      emitLogicOp(insn);
1758
      break;
1759
   case OP_SHL:
1760
   case OP_SHR:
1761
      emitShift(insn);
1762
      break;
1763
   case OP_SET:
1764
      emitSET(insn);
1765
      break;
1766
   case OP_MIN:
1767
   case OP_MAX:
1768
      emitMINMAX(insn);
1769
      break;
1770
   case OP_CEIL:
1771
   case OP_FLOOR:
1772
   case OP_TRUNC:
1773
   case OP_ABS:
1774
   case OP_NEG:
1775
   case OP_SAT:
1776
      emitCVT(insn);
1777
      break;
1778
   case OP_CVT:
1779
      if (insn->def(0).getFile() == FILE_ADDRESS)
1780
         emitARL(insn, 0);
1781
      else
1782
      if (insn->def(0).getFile() == FILE_FLAGS ||
1783
          insn->src(0).getFile() == FILE_FLAGS ||
1784
          insn->src(0).getFile() == FILE_ADDRESS)
1785
         emitMOV(insn);
1786
      else
1787
         emitCVT(insn);
1788
      break;
1789
   case OP_RCP:
1790
      emitSFnOp(insn, 0);
1791
      break;
1792
   case OP_RSQ:
1793
      emitSFnOp(insn, 2);
1794
      break;
1795
   case OP_LG2:
1796
      emitSFnOp(insn, 3);
1797
      break;
1798
   case OP_SIN:
1799
      emitSFnOp(insn, 4);
1800
      break;
1801
   case OP_COS:
1802
      emitSFnOp(insn, 5);
1803
      break;
1804
   case OP_EX2:
1805
      emitSFnOp(insn, 6);
1806
      break;
1807
   case OP_PRESIN:
1808
   case OP_PREEX2:
1809
      emitPreOp(insn);
1810
      break;
1811
   case OP_TEX:
1812
   case OP_TXB:
1813
   case OP_TXL:
1814
   case OP_TXF:
1815
   case OP_TXG:
1816
   case OP_TXLQ:
1817
      emitTEX(insn->asTex());
1818
      break;
1819
   case OP_TXQ:
1820
      emitTXQ(insn->asTex());
1821
      break;
1822
   case OP_TEXPREP:
1823
      emitTEXPREP(insn->asTex());
1824
      break;
1825
   case OP_EMIT:
1826
   case OP_RESTART:
1827
      emitOUT(insn);
1828
      break;
1829
   case OP_DISCARD:
1830
      emitFlow(insn, 0x0);
1831
      break;
1832
   case OP_BRA:
1833
      emitFlow(insn, 0x1);
1834
      break;
1835
   case OP_CALL:
1836
      emitFlow(insn, 0x2);
1837
      break;
1838
   case OP_RET:
1839
      emitFlow(insn, 0x3);
1840
      break;
1841
   case OP_PREBREAK:
1842
      emitFlow(insn, 0x4);
1843
      break;
1844
   case OP_BREAK:
1845
      emitFlow(insn, 0x5);
1846
      break;
1847
   case OP_QUADON:
1848
      emitFlow(insn, 0x6);
1849
      break;
1850
   case OP_QUADPOP:
1851
      emitFlow(insn, 0x7);
1852
      break;
1853
   case OP_JOINAT:
1854
      emitFlow(insn, 0xa);
1855
      break;
1856
   case OP_PRERET:
1857
      emitFlow(insn, 0xd);
1858
      break;
1859
   case OP_QUADOP:
1860
      emitQUADOP(insn, insn->lanes, insn->subOp);
1861
      break;
1862
   case OP_DFDX:
1863
      emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99);
1864
      break;
1865
   case OP_DFDY:
1866
      emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5);
1867
      break;
1868
   case OP_ATOM:
1869
      emitATOM(insn);
1870
      break;
1871
   case OP_BAR:
1872
      emitBAR(insn);
1873
      break;
1874
   case OP_PHI:
1875
   case OP_UNION:
1876
   case OP_CONSTRAINT:
1877
      ERROR("operation should have been eliminated\n");
1878
      return false;
1879
   case OP_EXP:
1880
   case OP_LOG:
1881
   case OP_SQRT:
1882
   case OP_POW:
1883
   case OP_SELP:
1884
   case OP_SLCT:
1885
   case OP_TXD:
1886
   case OP_PRECONT:
1887
   case OP_CONT:
1888
   case OP_POPCNT:
1889
   case OP_INSBF:
1890
   case OP_EXTBF:
1891
      ERROR("operation should have been lowered\n");
1892
      return false;
1893
   default:
1894
      ERROR("unknown op: %u\n", insn->op);
1895
      return false;
1896
   }
1897
   if (insn->join || insn->op == OP_JOIN)
1898
      code[1] |= 0x2;
1899
   else
1900
   if (insn->exit || insn->op == OP_EXIT)
1901
      code[1] |= 0x1;
1902
 
1903
   assert((insn->encSize == 8) == (code[0] & 1));
1904
 
1905
   code += insn->encSize / 4;
1906
   codeSize += insn->encSize;
1907
   return true;
1908
}
1909
 
1910
uint32_t
1911
CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
1912
{
1913
   const Target::OpInfo &info = targ->getOpInfo(i);
1914
 
1915
   if (info.minEncSize > 4)
1916
      return 8;
1917
 
1918
   // check constraints on dst and src operands
1919
   for (int d = 0; i->defExists(d); ++d) {
1920
      if (i->def(d).rep()->reg.data.id > 63 ||
1921
          i->def(d).rep()->reg.file != FILE_GPR)
1922
         return 8;
1923
   }
1924
 
1925
   for (int s = 0; i->srcExists(s); ++s) {
1926
      DataFile sf = i->src(s).getFile();
1927
      if (sf != FILE_GPR)
1928
         if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT)
1929
            return 8;
1930
      if (i->src(s).rep()->reg.data.id > 63)
1931
         return 8;
1932
   }
1933
 
1934
   // check modifiers & rounding
1935
   if (i->join || i->lanes != 0xf || i->exit)
1936
      return 8;
1937
   if (i->op == OP_MUL && i->rnd != ROUND_N)
1938
      return 8;
1939
 
1940
   if (i->asTex())
1941
      return 8; // TODO: short tex encoding
1942
 
1943
   // check constraints on short MAD
1944
   if (info.srcNr >= 2 && i->srcExists(2)) {
1945
      if (!i->defExists(0) || !isFloatType(i->dType) ||
1946
          i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
1947
         return 8;
1948
   }
1949
 
1950
   return info.minEncSize;
1951
}
1952
 
1953
// Change the encoding size of an instruction after BBs have been scheduled.
1954
static void
1955
makeInstructionLong(Instruction *insn)
1956
{
1957
   if (insn->encSize == 8)
1958
      return;
1959
   Function *fn = insn->bb->getFunction();
1960
   int n = 0;
1961
   int adj = 4;
1962
 
1963
   for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next);
1964
 
1965
   if (n & 1) {
1966
      adj = 8;
1967
      insn->next->encSize = 8;
1968
   } else
1969
   if (insn->prev && insn->prev->encSize == 4) {
1970
      adj = 8;
1971
      insn->prev->encSize = 8;
1972
   }
1973
   insn->encSize = 8;
1974
 
1975
   for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
1976
      fn->bbArray[i]->binPos += 4;
1977
   }
1978
   fn->binSize += adj;
1979
   insn->bb->binSize += adj;
1980
}
1981
 
1982
static bool
1983
trySetExitModifier(Instruction *insn)
1984
{
1985
   if (insn->op == OP_DISCARD ||
1986
       insn->op == OP_QUADON ||
1987
       insn->op == OP_QUADPOP)
1988
      return false;
1989
   for (int s = 0; insn->srcExists(s); ++s)
1990
      if (insn->src(s).getFile() == FILE_IMMEDIATE)
1991
         return false;
1992
   if (insn->asFlow()) {
1993
      if (insn->op == OP_CALL) // side effects !
1994
         return false;
1995
      if (insn->getPredicate()) // cannot do conditional exit (or can we ?)
1996
         return false;
1997
      insn->op = OP_EXIT;
1998
   }
1999
   insn->exit = 1;
2000
   makeInstructionLong(insn);
2001
   return true;
2002
}
2003
 
2004
static void
2005
replaceExitWithModifier(Function *func)
2006
{
2007
   BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2008
 
2009
   if (!epilogue->getExit() ||
2010
       epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT
2011
      return;
2012
 
2013
   if (epilogue->getEntry()->op != OP_EXIT) {
2014
      Instruction *insn = epilogue->getExit()->prev;
2015
      if (!insn || !trySetExitModifier(insn))
2016
         return;
2017
      insn->exit = 1;
2018
   } else {
2019
      for (Graph::EdgeIterator ei = func->cfgExit->incident();
2020
           !ei.end(); ei.next()) {
2021
         BasicBlock *bb = BasicBlock::get(ei.getNode());
2022
         Instruction *i = bb->getExit();
2023
 
2024
         if (!i || !trySetExitModifier(i))
2025
            return;
2026
      }
2027
   }
2028
   epilogue->binSize -= 8;
2029
   func->binSize -= 8;
2030
   delete_Instruction(func->getProgram(), epilogue->getExit());
2031
}
2032
 
2033
void
2034
CodeEmitterNV50::prepareEmission(Function *func)
2035
{
2036
   CodeEmitter::prepareEmission(func);
2037
 
2038
   replaceExitWithModifier(func);
2039
}
2040
 
2041
CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) :
2042
   CodeEmitter(target), targNV50(target)
2043
{
2044
   targ = target; // specialized
2045
   code = NULL;
2046
   codeSize = codeSizeLimit = 0;
2047
   relocInfo = NULL;
2048
}
2049
 
2050
CodeEmitter *
2051
TargetNV50::getCodeEmitter(Program::Type type)
2052
{
2053
   CodeEmitterNV50 *emit = new CodeEmitterNV50(this);
2054
   emit->setProgramType(type);
2055
   return emit;
2056
}
2057
 
2058
} // namespace nv50_ir