Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2011 Christoph Bumiller
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 */
22
 
23
extern "C" {
24
#include "tgsi/tgsi_dump.h"
25
#include "tgsi/tgsi_scan.h"
26
#include "tgsi/tgsi_util.h"
27
}
28
 
29
#include 
30
 
31
#include "nv50_ir.h"
32
#include "nv50_ir_util.h"
33
#include "nv50_ir_build_util.h"
34
 
35
namespace tgsi {
36
 
37
class Source;
38
 
39
static nv50_ir::operation translateOpcode(uint opcode);
40
static nv50_ir::DataFile translateFile(uint file);
41
static nv50_ir::TexTarget translateTexture(uint texTarg);
42
static nv50_ir::SVSemantic translateSysVal(uint sysval);
43
 
44
class Instruction
45
{
46
public:
47
   Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
48
 
49
   class SrcRegister
50
   {
51
   public:
52
      SrcRegister(const struct tgsi_full_src_register *src)
53
         : reg(src->Register),
54
           fsr(src)
55
      { }
56
 
57
      SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
58
 
59
      SrcRegister(const struct tgsi_ind_register& ind)
60
         : reg(tgsi_util_get_src_from_ind(&ind)),
61
           fsr(NULL)
62
      { }
63
 
64
      struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
65
      {
66
         struct tgsi_src_register reg;
67
         memset(®, 0, sizeof(reg));
68
         reg.Index = off.Index;
69
         reg.File = off.File;
70
         reg.SwizzleX = off.SwizzleX;
71
         reg.SwizzleY = off.SwizzleY;
72
         reg.SwizzleZ = off.SwizzleZ;
73
         return reg;
74
      }
75
 
76
      SrcRegister(const struct tgsi_texture_offset& off) :
77
         reg(offsetToSrc(off)),
78
         fsr(NULL)
79
      { }
80
 
81
      uint getFile() const { return reg.File; }
82
 
83
      bool is2D() const { return reg.Dimension; }
84
 
85
      bool isIndirect(int dim) const
86
      {
87
         return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
88
      }
89
 
90
      int getIndex(int dim) const
91
      {
92
         return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
93
      }
94
 
95
      int getSwizzle(int chan) const
96
      {
97
         return tgsi_util_get_src_register_swizzle(®, chan);
98
      }
99
 
100
      nv50_ir::Modifier getMod(int chan) const;
101
 
102
      SrcRegister getIndirect(int dim) const
103
      {
104
         assert(fsr && isIndirect(dim));
105
         if (dim)
106
            return SrcRegister(fsr->DimIndirect);
107
         return SrcRegister(fsr->Indirect);
108
      }
109
 
110
      uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
111
      {
112
         assert(reg.File == TGSI_FILE_IMMEDIATE);
113
         assert(!reg.Absolute);
114
         assert(!reg.Negate);
115
         return info->immd.data[reg.Index * 4 + getSwizzle(c)];
116
      }
117
 
118
   private:
119
      const struct tgsi_src_register reg;
120
      const struct tgsi_full_src_register *fsr;
121
   };
122
 
123
   class DstRegister
124
   {
125
   public:
126
      DstRegister(const struct tgsi_full_dst_register *dst)
127
         : reg(dst->Register),
128
           fdr(dst)
129
      { }
130
 
131
      DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
132
 
133
      uint getFile() const { return reg.File; }
134
 
135
      bool is2D() const { return reg.Dimension; }
136
 
137
      bool isIndirect(int dim) const
138
      {
139
         return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
140
      }
141
 
142
      int getIndex(int dim) const
143
      {
144
         return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
145
      }
146
 
147
      unsigned int getMask() const { return reg.WriteMask; }
148
 
149
      bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
150
 
151
      SrcRegister getIndirect(int dim) const
152
      {
153
         assert(fdr && isIndirect(dim));
154
         if (dim)
155
            return SrcRegister(fdr->DimIndirect);
156
         return SrcRegister(fdr->Indirect);
157
      }
158
 
159
   private:
160
      const struct tgsi_dst_register reg;
161
      const struct tgsi_full_dst_register *fdr;
162
   };
163
 
164
   inline uint getOpcode() const { return insn->Instruction.Opcode; }
165
 
166
   unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
167
   unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
168
 
169
   // mask of used components of source s
170
   unsigned int srcMask(unsigned int s) const;
171
 
172
   SrcRegister getSrc(unsigned int s) const
173
   {
174
      assert(s < srcCount());
175
      return SrcRegister(&insn->Src[s]);
176
   }
177
 
178
   DstRegister getDst(unsigned int d) const
179
   {
180
      assert(d < dstCount());
181
      return DstRegister(&insn->Dst[d]);
182
   }
183
 
184
   SrcRegister getTexOffset(unsigned int i) const
185
   {
186
      assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
187
      return SrcRegister(insn->TexOffsets[i]);
188
   }
189
 
190
   unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
191
 
192
   bool checkDstSrcAliasing() const;
193
 
194
   inline nv50_ir::operation getOP() const {
195
      return translateOpcode(getOpcode()); }
196
 
197
   nv50_ir::DataType inferSrcType() const;
198
   nv50_ir::DataType inferDstType() const;
199
 
200
   nv50_ir::CondCode getSetCond() const;
201
 
202
   nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
203
 
204
   inline uint getLabel() { return insn->Label.Label; }
205
 
206
   unsigned getSaturate() const { return insn->Instruction.Saturate; }
207
 
208
   void print() const
209
   {
210
      tgsi_dump_instruction(insn, 1);
211
   }
212
 
213
private:
214
   const struct tgsi_full_instruction *insn;
215
};
216
 
217
unsigned int Instruction::srcMask(unsigned int s) const
218
{
219
   unsigned int mask = insn->Dst[0].Register.WriteMask;
220
 
221
   switch (insn->Instruction.Opcode) {
222
   case TGSI_OPCODE_COS:
223
   case TGSI_OPCODE_SIN:
224
      return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
225
   case TGSI_OPCODE_DP2:
226
      return 0x3;
227
   case TGSI_OPCODE_DP3:
228
      return 0x7;
229
   case TGSI_OPCODE_DP4:
230
   case TGSI_OPCODE_DPH:
231
   case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
232
      return 0xf;
233
   case TGSI_OPCODE_DST:
234
      return mask & (s ? 0xa : 0x6);
235
   case TGSI_OPCODE_EX2:
236
   case TGSI_OPCODE_EXP:
237
   case TGSI_OPCODE_LG2:
238
   case TGSI_OPCODE_LOG:
239
   case TGSI_OPCODE_POW:
240
   case TGSI_OPCODE_RCP:
241
   case TGSI_OPCODE_RSQ:
242
   case TGSI_OPCODE_SCS:
243
      return 0x1;
244
   case TGSI_OPCODE_IF:
245
   case TGSI_OPCODE_UIF:
246
      return 0x1;
247
   case TGSI_OPCODE_LIT:
248
      return 0xb;
249
   case TGSI_OPCODE_TEX2:
250
   case TGSI_OPCODE_TXB2:
251
   case TGSI_OPCODE_TXL2:
252
      return (s == 0) ? 0xf : 0x3;
253
   case TGSI_OPCODE_TEX:
254
   case TGSI_OPCODE_TXB:
255
   case TGSI_OPCODE_TXD:
256
   case TGSI_OPCODE_TXL:
257
   case TGSI_OPCODE_TXP:
258
   {
259
      const struct tgsi_instruction_texture *tex = &insn->Texture;
260
 
261
      assert(insn->Instruction.Texture);
262
 
263
      mask = 0x7;
264
      if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
265
          insn->Instruction.Opcode != TGSI_OPCODE_TXD)
266
         mask |= 0x8; /* bias, lod or proj */
267
 
268
      switch (tex->Texture) {
269
      case TGSI_TEXTURE_1D:
270
         mask &= 0x9;
271
         break;
272
      case TGSI_TEXTURE_SHADOW1D:
273
         mask &= 0xd;
274
         break;
275
      case TGSI_TEXTURE_1D_ARRAY:
276
      case TGSI_TEXTURE_2D:
277
      case TGSI_TEXTURE_RECT:
278
         mask &= 0xb;
279
         break;
280
      case TGSI_TEXTURE_CUBE_ARRAY:
281
      case TGSI_TEXTURE_SHADOW2D_ARRAY:
282
      case TGSI_TEXTURE_SHADOWCUBE:
283
      case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
284
         mask |= 0x8;
285
         break;
286
      default:
287
         break;
288
      }
289
   }
290
      return mask;
291
   case TGSI_OPCODE_XPD:
292
   {
293
      unsigned int x = 0;
294
      if (mask & 1) x |= 0x6;
295
      if (mask & 2) x |= 0x5;
296
      if (mask & 4) x |= 0x3;
297
      return x;
298
   }
299
   default:
300
      break;
301
   }
302
 
303
   return mask;
304
}
305
 
306
nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
307
{
308
   nv50_ir::Modifier m(0);
309
 
310
   if (reg.Absolute)
311
      m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
312
   if (reg.Negate)
313
      m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
314
   return m;
315
}
316
 
317
static nv50_ir::DataFile translateFile(uint file)
318
{
319
   switch (file) {
320
   case TGSI_FILE_CONSTANT:        return nv50_ir::FILE_MEMORY_CONST;
321
   case TGSI_FILE_INPUT:           return nv50_ir::FILE_SHADER_INPUT;
322
   case TGSI_FILE_OUTPUT:          return nv50_ir::FILE_SHADER_OUTPUT;
323
   case TGSI_FILE_TEMPORARY:       return nv50_ir::FILE_GPR;
324
   case TGSI_FILE_ADDRESS:         return nv50_ir::FILE_ADDRESS;
325
   case TGSI_FILE_PREDICATE:       return nv50_ir::FILE_PREDICATE;
326
   case TGSI_FILE_IMMEDIATE:       return nv50_ir::FILE_IMMEDIATE;
327
   case TGSI_FILE_SYSTEM_VALUE:    return nv50_ir::FILE_SYSTEM_VALUE;
328
   case TGSI_FILE_RESOURCE:        return nv50_ir::FILE_MEMORY_GLOBAL;
329
   case TGSI_FILE_SAMPLER:
330
   case TGSI_FILE_NULL:
331
   default:
332
      return nv50_ir::FILE_NULL;
333
   }
334
}
335
 
336
static nv50_ir::SVSemantic translateSysVal(uint sysval)
337
{
338
   switch (sysval) {
339
   case TGSI_SEMANTIC_FACE:       return nv50_ir::SV_FACE;
340
   case TGSI_SEMANTIC_PSIZE:      return nv50_ir::SV_POINT_SIZE;
341
   case TGSI_SEMANTIC_PRIMID:     return nv50_ir::SV_PRIMITIVE_ID;
342
   case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
343
   case TGSI_SEMANTIC_VERTEXID:   return nv50_ir::SV_VERTEX_ID;
344
   case TGSI_SEMANTIC_GRID_SIZE:  return nv50_ir::SV_NCTAID;
345
   case TGSI_SEMANTIC_BLOCK_ID:   return nv50_ir::SV_CTAID;
346
   case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
347
   case TGSI_SEMANTIC_THREAD_ID:  return nv50_ir::SV_TID;
348
   default:
349
      assert(0);
350
      return nv50_ir::SV_CLOCK;
351
   }
352
}
353
 
354
#define NV50_IR_TEX_TARG_CASE(a, b) \
355
   case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
356
 
357
static nv50_ir::TexTarget translateTexture(uint tex)
358
{
359
   switch (tex) {
360
   NV50_IR_TEX_TARG_CASE(1D, 1D);
361
   NV50_IR_TEX_TARG_CASE(2D, 2D);
362
   NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
363
   NV50_IR_TEX_TARG_CASE(3D, 3D);
364
   NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
365
   NV50_IR_TEX_TARG_CASE(RECT, RECT);
366
   NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
367
   NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
368
   NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
369
   NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
370
   NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
371
   NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
372
   NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
373
   NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
374
   NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
375
   NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
376
   NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
377
   NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
378
 
379
   case TGSI_TEXTURE_UNKNOWN:
380
   default:
381
      assert(!"invalid texture target");
382
      return nv50_ir::TEX_TARGET_2D;
383
   }
384
}
385
 
386
nv50_ir::DataType Instruction::inferSrcType() const
387
{
388
   switch (getOpcode()) {
389
   case TGSI_OPCODE_UIF:
390
   case TGSI_OPCODE_AND:
391
   case TGSI_OPCODE_OR:
392
   case TGSI_OPCODE_XOR:
393
   case TGSI_OPCODE_NOT:
394
   case TGSI_OPCODE_U2F:
395
   case TGSI_OPCODE_UADD:
396
   case TGSI_OPCODE_UDIV:
397
   case TGSI_OPCODE_UMOD:
398
   case TGSI_OPCODE_UMAD:
399
   case TGSI_OPCODE_UMUL:
400
   case TGSI_OPCODE_UMAX:
401
   case TGSI_OPCODE_UMIN:
402
   case TGSI_OPCODE_USEQ:
403
   case TGSI_OPCODE_USGE:
404
   case TGSI_OPCODE_USLT:
405
   case TGSI_OPCODE_USNE:
406
   case TGSI_OPCODE_USHR:
407
   case TGSI_OPCODE_UCMP:
408
   case TGSI_OPCODE_ATOMUADD:
409
   case TGSI_OPCODE_ATOMXCHG:
410
   case TGSI_OPCODE_ATOMCAS:
411
   case TGSI_OPCODE_ATOMAND:
412
   case TGSI_OPCODE_ATOMOR:
413
   case TGSI_OPCODE_ATOMXOR:
414
   case TGSI_OPCODE_ATOMUMIN:
415
   case TGSI_OPCODE_ATOMUMAX:
416
      return nv50_ir::TYPE_U32;
417
   case TGSI_OPCODE_I2F:
418
   case TGSI_OPCODE_IDIV:
419
   case TGSI_OPCODE_IMAX:
420
   case TGSI_OPCODE_IMIN:
421
   case TGSI_OPCODE_IABS:
422
   case TGSI_OPCODE_INEG:
423
   case TGSI_OPCODE_ISGE:
424
   case TGSI_OPCODE_ISHR:
425
   case TGSI_OPCODE_ISLT:
426
   case TGSI_OPCODE_ISSG:
427
   case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version
428
   case TGSI_OPCODE_MOD:
429
   case TGSI_OPCODE_UARL:
430
   case TGSI_OPCODE_ATOMIMIN:
431
   case TGSI_OPCODE_ATOMIMAX:
432
      return nv50_ir::TYPE_S32;
433
   default:
434
      return nv50_ir::TYPE_F32;
435
   }
436
}
437
 
438
nv50_ir::DataType Instruction::inferDstType() const
439
{
440
   switch (getOpcode()) {
441
   case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
442
   case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
443
   case TGSI_OPCODE_I2F:
444
   case TGSI_OPCODE_U2F:
445
      return nv50_ir::TYPE_F32;
446
   default:
447
      return inferSrcType();
448
   }
449
}
450
 
451
nv50_ir::CondCode Instruction::getSetCond() const
452
{
453
   using namespace nv50_ir;
454
 
455
   switch (getOpcode()) {
456
   case TGSI_OPCODE_SLT:
457
   case TGSI_OPCODE_ISLT:
458
   case TGSI_OPCODE_USLT:
459
      return CC_LT;
460
   case TGSI_OPCODE_SLE:
461
      return CC_LE;
462
   case TGSI_OPCODE_SGE:
463
   case TGSI_OPCODE_ISGE:
464
   case TGSI_OPCODE_USGE:
465
      return CC_GE;
466
   case TGSI_OPCODE_SGT:
467
      return CC_GT;
468
   case TGSI_OPCODE_SEQ:
469
   case TGSI_OPCODE_USEQ:
470
      return CC_EQ;
471
   case TGSI_OPCODE_SNE:
472
      return CC_NEU;
473
   case TGSI_OPCODE_USNE:
474
      return CC_NE;
475
   case TGSI_OPCODE_SFL:
476
      return CC_NEVER;
477
   case TGSI_OPCODE_STR:
478
   default:
479
      return CC_ALWAYS;
480
   }
481
}
482
 
483
#define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
484
 
485
static nv50_ir::operation translateOpcode(uint opcode)
486
{
487
   switch (opcode) {
488
   NV50_IR_OPCODE_CASE(ARL, SHL);
489
   NV50_IR_OPCODE_CASE(MOV, MOV);
490
 
491
   NV50_IR_OPCODE_CASE(RCP, RCP);
492
   NV50_IR_OPCODE_CASE(RSQ, RSQ);
493
 
494
   NV50_IR_OPCODE_CASE(MUL, MUL);
495
   NV50_IR_OPCODE_CASE(ADD, ADD);
496
 
497
   NV50_IR_OPCODE_CASE(MIN, MIN);
498
   NV50_IR_OPCODE_CASE(MAX, MAX);
499
   NV50_IR_OPCODE_CASE(SLT, SET);
500
   NV50_IR_OPCODE_CASE(SGE, SET);
501
   NV50_IR_OPCODE_CASE(MAD, MAD);
502
   NV50_IR_OPCODE_CASE(SUB, SUB);
503
 
504
   NV50_IR_OPCODE_CASE(FLR, FLOOR);
505
   NV50_IR_OPCODE_CASE(ROUND, CVT);
506
   NV50_IR_OPCODE_CASE(EX2, EX2);
507
   NV50_IR_OPCODE_CASE(LG2, LG2);
508
   NV50_IR_OPCODE_CASE(POW, POW);
509
 
510
   NV50_IR_OPCODE_CASE(ABS, ABS);
511
 
512
   NV50_IR_OPCODE_CASE(COS, COS);
513
   NV50_IR_OPCODE_CASE(DDX, DFDX);
514
   NV50_IR_OPCODE_CASE(DDY, DFDY);
515
   NV50_IR_OPCODE_CASE(KILL, DISCARD);
516
 
517
   NV50_IR_OPCODE_CASE(SEQ, SET);
518
   NV50_IR_OPCODE_CASE(SFL, SET);
519
   NV50_IR_OPCODE_CASE(SGT, SET);
520
   NV50_IR_OPCODE_CASE(SIN, SIN);
521
   NV50_IR_OPCODE_CASE(SLE, SET);
522
   NV50_IR_OPCODE_CASE(SNE, SET);
523
   NV50_IR_OPCODE_CASE(STR, SET);
524
   NV50_IR_OPCODE_CASE(TEX, TEX);
525
   NV50_IR_OPCODE_CASE(TXD, TXD);
526
   NV50_IR_OPCODE_CASE(TXP, TEX);
527
 
528
   NV50_IR_OPCODE_CASE(BRA, BRA);
529
   NV50_IR_OPCODE_CASE(CAL, CALL);
530
   NV50_IR_OPCODE_CASE(RET, RET);
531
   NV50_IR_OPCODE_CASE(CMP, SLCT);
532
 
533
   NV50_IR_OPCODE_CASE(TXB, TXB);
534
 
535
   NV50_IR_OPCODE_CASE(DIV, DIV);
536
 
537
   NV50_IR_OPCODE_CASE(TXL, TXL);
538
 
539
   NV50_IR_OPCODE_CASE(CEIL, CEIL);
540
   NV50_IR_OPCODE_CASE(I2F, CVT);
541
   NV50_IR_OPCODE_CASE(NOT, NOT);
542
   NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
543
   NV50_IR_OPCODE_CASE(SHL, SHL);
544
 
545
   NV50_IR_OPCODE_CASE(AND, AND);
546
   NV50_IR_OPCODE_CASE(OR, OR);
547
   NV50_IR_OPCODE_CASE(MOD, MOD);
548
   NV50_IR_OPCODE_CASE(XOR, XOR);
549
   NV50_IR_OPCODE_CASE(SAD, SAD);
550
   NV50_IR_OPCODE_CASE(TXF, TXF);
551
   NV50_IR_OPCODE_CASE(TXQ, TXQ);
552
 
553
   NV50_IR_OPCODE_CASE(EMIT, EMIT);
554
   NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
555
 
556
   NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
557
 
558
   NV50_IR_OPCODE_CASE(F2I, CVT);
559
   NV50_IR_OPCODE_CASE(IDIV, DIV);
560
   NV50_IR_OPCODE_CASE(IMAX, MAX);
561
   NV50_IR_OPCODE_CASE(IMIN, MIN);
562
   NV50_IR_OPCODE_CASE(IABS, ABS);
563
   NV50_IR_OPCODE_CASE(INEG, NEG);
564
   NV50_IR_OPCODE_CASE(ISGE, SET);
565
   NV50_IR_OPCODE_CASE(ISHR, SHR);
566
   NV50_IR_OPCODE_CASE(ISLT, SET);
567
   NV50_IR_OPCODE_CASE(F2U, CVT);
568
   NV50_IR_OPCODE_CASE(U2F, CVT);
569
   NV50_IR_OPCODE_CASE(UADD, ADD);
570
   NV50_IR_OPCODE_CASE(UDIV, DIV);
571
   NV50_IR_OPCODE_CASE(UMAD, MAD);
572
   NV50_IR_OPCODE_CASE(UMAX, MAX);
573
   NV50_IR_OPCODE_CASE(UMIN, MIN);
574
   NV50_IR_OPCODE_CASE(UMOD, MOD);
575
   NV50_IR_OPCODE_CASE(UMUL, MUL);
576
   NV50_IR_OPCODE_CASE(USEQ, SET);
577
   NV50_IR_OPCODE_CASE(USGE, SET);
578
   NV50_IR_OPCODE_CASE(USHR, SHR);
579
   NV50_IR_OPCODE_CASE(USLT, SET);
580
   NV50_IR_OPCODE_CASE(USNE, SET);
581
 
582
   NV50_IR_OPCODE_CASE(SAMPLE, TEX);
583
   NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
584
   NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
585
   NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
586
   NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
587
   NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
588
   NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
589
   NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
590
   NV50_IR_OPCODE_CASE(GATHER4, TXG);
591
   NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
592
 
593
   NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
594
   NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
595
   NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
596
   NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
597
   NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
598
   NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
599
   NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
600
   NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
601
   NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
602
   NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
603
 
604
   NV50_IR_OPCODE_CASE(TEX2, TEX);
605
   NV50_IR_OPCODE_CASE(TXB2, TXB);
606
   NV50_IR_OPCODE_CASE(TXL2, TXL);
607
 
608
   NV50_IR_OPCODE_CASE(END, EXIT);
609
 
610
   default:
611
      return nv50_ir::OP_NOP;
612
   }
613
}
614
 
615
static uint16_t opcodeToSubOp(uint opcode)
616
{
617
   switch (opcode) {
618
   case TGSI_OPCODE_LFENCE:   return NV50_IR_SUBOP_MEMBAR(L, GL);
619
   case TGSI_OPCODE_SFENCE:   return NV50_IR_SUBOP_MEMBAR(S, GL);
620
   case TGSI_OPCODE_MFENCE:   return NV50_IR_SUBOP_MEMBAR(M, GL);
621
   case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
622
   case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
623
   case TGSI_OPCODE_ATOMCAS:  return NV50_IR_SUBOP_ATOM_CAS;
624
   case TGSI_OPCODE_ATOMAND:  return NV50_IR_SUBOP_ATOM_AND;
625
   case TGSI_OPCODE_ATOMOR:   return NV50_IR_SUBOP_ATOM_OR;
626
   case TGSI_OPCODE_ATOMXOR:  return NV50_IR_SUBOP_ATOM_XOR;
627
   case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
628
   case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
629
   case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
630
   case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
631
   default:
632
      return 0;
633
   }
634
}
635
 
636
bool Instruction::checkDstSrcAliasing() const
637
{
638
   if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
639
      return false;
640
 
641
   for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
642
      if (insn->Src[s].Register.File == TGSI_FILE_NULL)
643
         break;
644
      if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
645
          insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
646
         return true;
647
   }
648
   return false;
649
}
650
 
651
class Source
652
{
653
public:
654
   Source(struct nv50_ir_prog_info *);
655
   ~Source();
656
 
657
public:
658
   bool scanSource();
659
   unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
660
 
661
public:
662
   struct tgsi_shader_info scan;
663
   struct tgsi_full_instruction *insns;
664
   const struct tgsi_token *tokens;
665
   struct nv50_ir_prog_info *info;
666
 
667
   nv50_ir::DynArray tempArrays;
668
   nv50_ir::DynArray immdArrays;
669
 
670
   typedef nv50_ir::BuildUtil::Location Location;
671
   // these registers are per-subroutine, cannot be used for parameter passing
672
   std::set locals;
673
 
674
   bool mainTempsInLMem;
675
 
676
   int clipVertexOutput;
677
 
678
   struct TextureView {
679
      uint8_t target; // TGSI_TEXTURE_*
680
   };
681
   std::vector textureViews;
682
 
683
   struct Resource {
684
      uint8_t target; // TGSI_TEXTURE_*
685
      bool raw;
686
      uint8_t slot; // $surface index
687
   };
688
   std::vector resources;
689
 
690
private:
691
   int inferSysValDirection(unsigned sn) const;
692
   bool scanDeclaration(const struct tgsi_full_declaration *);
693
   bool scanInstruction(const struct tgsi_full_instruction *);
694
   void scanProperty(const struct tgsi_full_property *);
695
   void scanImmediate(const struct tgsi_full_immediate *);
696
 
697
   inline bool isEdgeFlagPassthrough(const Instruction&) const;
698
};
699
 
700
Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
701
{
702
   tokens = (const struct tgsi_token *)info->bin.source;
703
 
704
   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
705
      tgsi_dump(tokens, 0);
706
 
707
   mainTempsInLMem = FALSE;
708
}
709
 
710
Source::~Source()
711
{
712
   if (insns)
713
      FREE(insns);
714
 
715
   if (info->immd.data)
716
      FREE(info->immd.data);
717
   if (info->immd.type)
718
      FREE(info->immd.type);
719
}
720
 
721
bool Source::scanSource()
722
{
723
   unsigned insnCount = 0;
724
   struct tgsi_parse_context parse;
725
 
726
   tgsi_scan_shader(tokens, &scan);
727
 
728
   insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
729
                                                  sizeof(insns[0]));
730
   if (!insns)
731
      return false;
732
 
733
   clipVertexOutput = -1;
734
 
735
   textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
736
   resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
737
 
738
   info->immd.bufSize = 0;
739
 
740
   info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
741
   info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
742
   info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
743
 
744
   if (info->type == PIPE_SHADER_FRAGMENT) {
745
      info->prop.fp.writesDepth = scan.writes_z;
746
      info->prop.fp.usesDiscard = scan.uses_kill;
747
   } else
748
   if (info->type == PIPE_SHADER_GEOMETRY) {
749
      info->prop.gp.instanceCount = 1; // default value
750
   }
751
 
752
   info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
753
   info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
754
 
755
   tgsi_parse_init(&parse, tokens);
756
   while (!tgsi_parse_end_of_tokens(&parse)) {
757
      tgsi_parse_token(&parse);
758
 
759
      switch (parse.FullToken.Token.Type) {
760
      case TGSI_TOKEN_TYPE_IMMEDIATE:
761
         scanImmediate(&parse.FullToken.FullImmediate);
762
         break;
763
      case TGSI_TOKEN_TYPE_DECLARATION:
764
         scanDeclaration(&parse.FullToken.FullDeclaration);
765
         break;
766
      case TGSI_TOKEN_TYPE_INSTRUCTION:
767
         insns[insnCount++] = parse.FullToken.FullInstruction;
768
         scanInstruction(&parse.FullToken.FullInstruction);
769
         break;
770
      case TGSI_TOKEN_TYPE_PROPERTY:
771
         scanProperty(&parse.FullToken.FullProperty);
772
         break;
773
      default:
774
         INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
775
         break;
776
      }
777
   }
778
   tgsi_parse_free(&parse);
779
 
780
   if (mainTempsInLMem)
781
      info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16;
782
 
783
   if (info->io.genUserClip > 0) {
784
      info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1;
785
 
786
      const unsigned int nOut = (info->io.genUserClip + 3) / 4;
787
 
788
      for (unsigned int n = 0; n < nOut; ++n) {
789
         unsigned int i = info->numOutputs++;
790
         info->out[i].id = i;
791
         info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
792
         info->out[i].si = n;
793
         info->out[i].mask = info->io.clipDistanceMask >> (n * 4);
794
      }
795
   }
796
 
797
   return info->assignSlots(info) == 0;
798
}
799
 
800
void Source::scanProperty(const struct tgsi_full_property *prop)
801
{
802
   switch (prop->Property.PropertyName) {
803
   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
804
      info->prop.gp.outputPrim = prop->u[0].Data;
805
      break;
806
   case TGSI_PROPERTY_GS_INPUT_PRIM:
807
      info->prop.gp.inputPrim = prop->u[0].Data;
808
      break;
809
   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
810
      info->prop.gp.maxVertices = prop->u[0].Data;
811
      break;
812
#if 0
813
   case TGSI_PROPERTY_GS_INSTANCE_COUNT:
814
      info->prop.gp.instanceCount = prop->u[0].Data;
815
      break;
816
#endif
817
   case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
818
      info->prop.fp.separateFragData = TRUE;
819
      break;
820
   case TGSI_PROPERTY_FS_COORD_ORIGIN:
821
   case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
822
      // we don't care
823
      break;
824
   case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
825
      info->io.genUserClip = -1;
826
      break;
827
   default:
828
      INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
829
      break;
830
   }
831
}
832
 
833
void Source::scanImmediate(const struct tgsi_full_immediate *imm)
834
{
835
   const unsigned n = info->immd.count++;
836
 
837
   assert(n < scan.immediate_count);
838
 
839
   for (int c = 0; c < 4; ++c)
840
      info->immd.data[n * 4 + c] = imm->u[c].Uint;
841
 
842
   info->immd.type[n] = imm->Immediate.DataType;
843
}
844
 
845
int Source::inferSysValDirection(unsigned sn) const
846
{
847
   switch (sn) {
848
   case TGSI_SEMANTIC_INSTANCEID:
849
   case TGSI_SEMANTIC_VERTEXID:
850
      return 1;
851
#if 0
852
   case TGSI_SEMANTIC_LAYER:
853
   case TGSI_SEMANTIC_VIEWPORTINDEX:
854
      return 0;
855
#endif
856
   case TGSI_SEMANTIC_PRIMID:
857
      return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
858
   default:
859
      return 0;
860
   }
861
}
862
 
863
bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
864
{
865
   unsigned i, c;
866
   unsigned sn = TGSI_SEMANTIC_GENERIC;
867
   unsigned si = 0;
868
   const unsigned first = decl->Range.First, last = decl->Range.Last;
869
 
870
   if (decl->Declaration.Semantic) {
871
      sn = decl->Semantic.Name;
872
      si = decl->Semantic.Index;
873
   }
874
 
875
   if (decl->Declaration.Local) {
876
      for (i = first; i <= last; ++i) {
877
         for (c = 0; c < 4; ++c) {
878
            locals.insert(
879
               Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
880
         }
881
      }
882
   }
883
 
884
   switch (decl->Declaration.File) {
885
   case TGSI_FILE_INPUT:
886
      if (info->type == PIPE_SHADER_VERTEX) {
887
         // all vertex attributes are equal
888
         for (i = first; i <= last; ++i) {
889
            info->in[i].sn = TGSI_SEMANTIC_GENERIC;
890
            info->in[i].si = i;
891
         }
892
      } else {
893
         for (i = first; i <= last; ++i, ++si) {
894
            info->in[i].id = i;
895
            info->in[i].sn = sn;
896
            info->in[i].si = si;
897
            if (info->type == PIPE_SHADER_FRAGMENT) {
898
               // translate interpolation mode
899
               switch (decl->Interp.Interpolate) {
900
               case TGSI_INTERPOLATE_CONSTANT:
901
                  info->in[i].flat = 1;
902
                  break;
903
               case TGSI_INTERPOLATE_COLOR:
904
                  info->in[i].sc = 1;
905
                  break;
906
               case TGSI_INTERPOLATE_LINEAR:
907
                  info->in[i].linear = 1;
908
                  break;
909
               default:
910
                  break;
911
               }
912
               if (decl->Interp.Centroid)
913
                  info->in[i].centroid = 1;
914
            }
915
         }
916
      }
917
      break;
918
   case TGSI_FILE_OUTPUT:
919
      for (i = first; i <= last; ++i, ++si) {
920
         switch (sn) {
921
         case TGSI_SEMANTIC_POSITION:
922
            if (info->type == PIPE_SHADER_FRAGMENT)
923
               info->io.fragDepth = i;
924
            else
925
            if (clipVertexOutput < 0)
926
               clipVertexOutput = i;
927
            break;
928
         case TGSI_SEMANTIC_COLOR:
929
            if (info->type == PIPE_SHADER_FRAGMENT)
930
               info->prop.fp.numColourResults++;
931
            break;
932
         case TGSI_SEMANTIC_EDGEFLAG:
933
            info->io.edgeFlagOut = i;
934
            break;
935
         case TGSI_SEMANTIC_CLIPVERTEX:
936
            clipVertexOutput = i;
937
            break;
938
         case TGSI_SEMANTIC_CLIPDIST:
939
            info->io.clipDistanceMask |=
940
               decl->Declaration.UsageMask << (si * 4);
941
            info->io.genUserClip = -1;
942
            break;
943
         default:
944
            break;
945
         }
946
         info->out[i].id = i;
947
         info->out[i].sn = sn;
948
         info->out[i].si = si;
949
      }
950
      break;
951
   case TGSI_FILE_SYSTEM_VALUE:
952
      switch (sn) {
953
      case TGSI_SEMANTIC_INSTANCEID:
954
         info->io.instanceId = first;
955
         break;
956
      case TGSI_SEMANTIC_VERTEXID:
957
         info->io.vertexId = first;
958
         break;
959
      default:
960
         break;
961
      }
962
      for (i = first; i <= last; ++i, ++si) {
963
         info->sv[i].sn = sn;
964
         info->sv[i].si = si;
965
         info->sv[i].input = inferSysValDirection(sn);
966
      }
967
      break;
968
   case TGSI_FILE_RESOURCE:
969
      for (i = first; i <= last; ++i) {
970
         resources[i].target = decl->Resource.Resource;
971
         resources[i].raw = decl->Resource.Raw;
972
         resources[i].slot = i;
973
      }
974
      break;
975
   case TGSI_FILE_SAMPLER_VIEW:
976
      for (i = first; i <= last; ++i)
977
         textureViews[i].target = decl->SamplerView.Resource;
978
      break;
979
   case TGSI_FILE_NULL:
980
   case TGSI_FILE_TEMPORARY:
981
   case TGSI_FILE_ADDRESS:
982
   case TGSI_FILE_CONSTANT:
983
   case TGSI_FILE_IMMEDIATE:
984
   case TGSI_FILE_PREDICATE:
985
   case TGSI_FILE_SAMPLER:
986
      break;
987
   default:
988
      ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
989
      return false;
990
   }
991
   return true;
992
}
993
 
994
inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
995
{
996
   return insn.getOpcode() == TGSI_OPCODE_MOV &&
997
      insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
998
      insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
999
}
1000
 
1001
bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
1002
{
1003
   Instruction insn(inst);
1004
 
1005
   if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
1006
      info->numBarriers = 1;
1007
 
1008
   if (insn.dstCount()) {
1009
      if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
1010
         Instruction::DstRegister dst = insn.getDst(0);
1011
 
1012
         if (dst.isIndirect(0))
1013
            for (unsigned i = 0; i < info->numOutputs; ++i)
1014
               info->out[i].mask = 0xf;
1015
         else
1016
            info->out[dst.getIndex(0)].mask |= dst.getMask();
1017
 
1018
         if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
1019
             info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
1020
             info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
1021
            info->out[dst.getIndex(0)].mask &= 1;
1022
 
1023
         if (isEdgeFlagPassthrough(insn))
1024
            info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1025
      } else
1026
      if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
1027
         if (insn.getDst(0).isIndirect(0))
1028
            mainTempsInLMem = TRUE;
1029
      }
1030
   }
1031
 
1032
   for (unsigned s = 0; s < insn.srcCount(); ++s) {
1033
      Instruction::SrcRegister src = insn.getSrc(s);
1034
      if (src.getFile() == TGSI_FILE_TEMPORARY) {
1035
         if (src.isIndirect(0))
1036
            mainTempsInLMem = TRUE;
1037
      } else
1038
      if (src.getFile() == TGSI_FILE_RESOURCE) {
1039
         if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
1040
            info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1041
               0x1 : 0x2;
1042
      }
1043
      if (src.getFile() != TGSI_FILE_INPUT)
1044
         continue;
1045
      unsigned mask = insn.srcMask(s);
1046
 
1047
      if (src.isIndirect(0)) {
1048
         for (unsigned i = 0; i < info->numInputs; ++i)
1049
            info->in[i].mask = 0xf;
1050
      } else {
1051
         const int i = src.getIndex(0);
1052
         for (unsigned c = 0; c < 4; ++c) {
1053
            if (!(mask & (1 << c)))
1054
               continue;
1055
            int k = src.getSwizzle(c);
1056
            if (k <= TGSI_SWIZZLE_W)
1057
               info->in[i].mask |= 1 << k;
1058
         }
1059
         switch (info->in[i].sn) {
1060
         case TGSI_SEMANTIC_PSIZE:
1061
         case TGSI_SEMANTIC_PRIMID:
1062
         case TGSI_SEMANTIC_FOG:
1063
            info->in[i].mask &= 0x1;
1064
            break;
1065
         case TGSI_SEMANTIC_PCOORD:
1066
            info->in[i].mask &= 0x3;
1067
            break;
1068
         default:
1069
            break;
1070
         }
1071
      }
1072
   }
1073
   return true;
1074
}
1075
 
1076
nv50_ir::TexInstruction::Target
1077
Instruction::getTexture(const tgsi::Source *code, int s) const
1078
{
1079
   // XXX: indirect access
1080
   unsigned int r;
1081
 
1082
   switch (getSrc(s).getFile()) {
1083
   case TGSI_FILE_RESOURCE:
1084
      r = getSrc(s).getIndex(0);
1085
      return translateTexture(code->resources.at(r).target);
1086
   case TGSI_FILE_SAMPLER_VIEW:
1087
      r = getSrc(s).getIndex(0);
1088
      return translateTexture(code->textureViews.at(r).target);
1089
   default:
1090
      return translateTexture(insn->Texture.Texture);
1091
   }
1092
}
1093
 
1094
} // namespace tgsi
1095
 
1096
namespace {
1097
 
1098
using namespace nv50_ir;
1099
 
1100
class Converter : public BuildUtil
1101
{
1102
public:
1103
   Converter(Program *, const tgsi::Source *);
1104
   ~Converter();
1105
 
1106
   bool run();
1107
 
1108
private:
1109
   struct Subroutine
1110
   {
1111
      Subroutine(Function *f) : f(f) { }
1112
      Function *f;
1113
      ValueMap values;
1114
   };
1115
 
1116
   Value *getVertexBase(int s);
1117
   DataArray *getArrayForFile(unsigned file, int idx);
1118
   Value *fetchSrc(int s, int c);
1119
   Value *acquireDst(int d, int c);
1120
   void storeDst(int d, int c, Value *);
1121
 
1122
   Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1123
   void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1124
                 Value *val, Value *ptr);
1125
 
1126
   Value *applySrcMod(Value *, int s, int c);
1127
 
1128
   Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1129
   Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1130
   Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1131
 
1132
   bool handleInstruction(const struct tgsi_full_instruction *);
1133
   void exportOutputs();
1134
   inline Subroutine *getSubroutine(unsigned ip);
1135
   inline Subroutine *getSubroutine(Function *);
1136
   inline bool isEndOfSubroutine(uint ip);
1137
 
1138
   void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1139
 
1140
   // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1141
   void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1142
   void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1143
   void handleTXF(Value *dst0[4], int R, int L_M);
1144
   void handleTXQ(Value *dst0[4], enum TexQuery);
1145
   void handleLIT(Value *dst0[4]);
1146
   void handleUserClipPlanes();
1147
 
1148
   Symbol *getResourceBase(int r);
1149
   void getResourceCoords(std::vector&, int r, int s);
1150
 
1151
   void handleLOAD(Value *dst0[4]);
1152
   void handleSTORE();
1153
   void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
1154
 
1155
   Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1156
 
1157
   void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1158
 
1159
   Value *buildDot(int dim);
1160
 
1161
   class BindArgumentsPass : public Pass {
1162
   public:
1163
      BindArgumentsPass(Converter &conv) : conv(conv) { }
1164
 
1165
   private:
1166
      Converter &conv;
1167
      Subroutine *sub;
1168
 
1169
      inline const Location *getValueLocation(Subroutine *, Value *);
1170
 
1171
      template inline void
1172
      updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1173
                     T (Function::*proto));
1174
 
1175
      template inline void
1176
      updatePrototype(BitSet *set, void (Function::*updateSet)(),
1177
                      T (Function::*proto));
1178
 
1179
   protected:
1180
      bool visit(Function *);
1181
      bool visit(BasicBlock *bb) { return false; }
1182
   };
1183
 
1184
private:
1185
   const struct tgsi::Source *code;
1186
   const struct nv50_ir_prog_info *info;
1187
 
1188
   struct {
1189
      std::map map;
1190
      Subroutine *cur;
1191
   } sub;
1192
 
1193
   uint ip; // instruction pointer
1194
 
1195
   tgsi::Instruction tgsi;
1196
 
1197
   DataType dstTy;
1198
   DataType srcTy;
1199
 
1200
   DataArray tData; // TGSI_FILE_TEMPORARY
1201
   DataArray aData; // TGSI_FILE_ADDRESS
1202
   DataArray pData; // TGSI_FILE_PREDICATE
1203
   DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1204
 
1205
   Value *zero;
1206
   Value *fragCoord[4];
1207
   Value *clipVtx[4];
1208
 
1209
   Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1210
   uint8_t vtxBaseValid;
1211
 
1212
   Stack condBBs;  // fork BB, then else clause BB
1213
   Stack joinBBs;  // fork BB, for inserting join ops on ENDIF
1214
   Stack loopBBs;  // loop headers
1215
   Stack breakBBs; // end of / after loop
1216
};
1217
 
1218
Symbol *
1219
Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1220
{
1221
   const int swz = src.getSwizzle(c);
1222
 
1223
   return makeSym(src.getFile(),
1224
                  src.is2D() ? src.getIndex(1) : 0,
1225
                  src.isIndirect(0) ? -1 : src.getIndex(0), swz,
1226
                  src.getIndex(0) * 16 + swz * 4);
1227
}
1228
 
1229
Symbol *
1230
Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1231
{
1232
   return makeSym(dst.getFile(),
1233
                  dst.is2D() ? dst.getIndex(1) : 0,
1234
                  dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
1235
                  dst.getIndex(0) * 16 + c * 4);
1236
}
1237
 
1238
Symbol *
1239
Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1240
{
1241
   Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1242
 
1243
   sym->reg.fileIndex = fileIdx;
1244
 
1245
   if (idx >= 0) {
1246
      if (sym->reg.file == FILE_SHADER_INPUT)
1247
         sym->setOffset(info->in[idx].slot[c] * 4);
1248
      else
1249
      if (sym->reg.file == FILE_SHADER_OUTPUT)
1250
         sym->setOffset(info->out[idx].slot[c] * 4);
1251
      else
1252
      if (sym->reg.file == FILE_SYSTEM_VALUE)
1253
         sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1254
      else
1255
         sym->setOffset(address);
1256
   } else {
1257
      sym->setOffset(address);
1258
   }
1259
   return sym;
1260
}
1261
 
1262
static inline uint8_t
1263
translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1264
{
1265
   uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1266
 
1267
   if (var->flat)
1268
      mode = NV50_IR_INTERP_FLAT;
1269
   else
1270
   if (var->linear)
1271
      mode = NV50_IR_INTERP_LINEAR;
1272
   else
1273
   if (var->sc)
1274
      mode = NV50_IR_INTERP_SC;
1275
 
1276
   op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1277
      ? OP_PINTERP : OP_LINTERP;
1278
 
1279
   if (var->centroid)
1280
      mode |= NV50_IR_INTERP_CENTROID;
1281
 
1282
   return mode;
1283
}
1284
 
1285
Value *
1286
Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1287
{
1288
   operation op;
1289
 
1290
   // XXX: no way to know interpolation mode if we don't know what's accessed
1291
   const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1292
                                                      src.getIndex(0)], op);
1293
 
1294
   Instruction *insn = new_Instruction(func, op, TYPE_F32);
1295
 
1296
   insn->setDef(0, getScratch());
1297
   insn->setSrc(0, srcToSym(src, c));
1298
   if (op == OP_PINTERP)
1299
      insn->setSrc(1, fragCoord[3]);
1300
   if (ptr)
1301
      insn->setIndirect(0, 0, ptr);
1302
 
1303
   insn->setInterpolate(mode);
1304
 
1305
   bb->insertTail(insn);
1306
   return insn->getDef(0);
1307
}
1308
 
1309
Value *
1310
Converter::applySrcMod(Value *val, int s, int c)
1311
{
1312
   Modifier m = tgsi.getSrc(s).getMod(c);
1313
   DataType ty = tgsi.inferSrcType();
1314
 
1315
   if (m & Modifier(NV50_IR_MOD_ABS))
1316
      val = mkOp1v(OP_ABS, ty, getScratch(), val);
1317
 
1318
   if (m & Modifier(NV50_IR_MOD_NEG))
1319
      val = mkOp1v(OP_NEG, ty, getScratch(), val);
1320
 
1321
   return val;
1322
}
1323
 
1324
Value *
1325
Converter::getVertexBase(int s)
1326
{
1327
   assert(s < 5);
1328
   if (!(vtxBaseValid & (1 << s))) {
1329
      const int index = tgsi.getSrc(s).getIndex(1);
1330
      Value *rel = NULL;
1331
      if (tgsi.getSrc(s).isIndirect(1))
1332
         rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1333
      vtxBaseValid |= 1 << s;
1334
      vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel);
1335
   }
1336
   return vtxBase[s];
1337
}
1338
 
1339
Value *
1340
Converter::fetchSrc(int s, int c)
1341
{
1342
   Value *res;
1343
   Value *ptr = NULL, *dimRel = NULL;
1344
 
1345
   tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1346
 
1347
   if (src.isIndirect(0))
1348
      ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1349
 
1350
   if (src.is2D()) {
1351
      switch (src.getFile()) {
1352
      case TGSI_FILE_INPUT:
1353
         dimRel = getVertexBase(s);
1354
         break;
1355
      case TGSI_FILE_CONSTANT:
1356
         // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1357
         if (src.isIndirect(1))
1358
            dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1359
         break;
1360
      default:
1361
         break;
1362
      }
1363
   }
1364
 
1365
   res = fetchSrc(src, c, ptr);
1366
 
1367
   if (dimRel)
1368
      res->getInsn()->setIndirect(0, 1, dimRel);
1369
 
1370
   return applySrcMod(res, s, c);
1371
}
1372
 
1373
Converter::DataArray *
1374
Converter::getArrayForFile(unsigned file, int idx)
1375
{
1376
   switch (file) {
1377
   case TGSI_FILE_TEMPORARY:
1378
      return &tData;
1379
   case TGSI_FILE_PREDICATE:
1380
      return &pData;
1381
   case TGSI_FILE_ADDRESS:
1382
      return &aData;
1383
   case TGSI_FILE_OUTPUT:
1384
      assert(prog->getType() == Program::TYPE_FRAGMENT);
1385
      return &oData;
1386
   default:
1387
      assert(!"invalid/unhandled TGSI source file");
1388
      return NULL;
1389
   }
1390
}
1391
 
1392
Value *
1393
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1394
{
1395
   const int idx2d = src.is2D() ? src.getIndex(1) : 0;
1396
   const int idx = src.getIndex(0);
1397
   const int swz = src.getSwizzle(c);
1398
 
1399
   switch (src.getFile()) {
1400
   case TGSI_FILE_IMMEDIATE:
1401
      assert(!ptr);
1402
      return loadImm(NULL, info->immd.data[idx * 4 + swz]);
1403
   case TGSI_FILE_CONSTANT:
1404
      return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
1405
   case TGSI_FILE_INPUT:
1406
      if (prog->getType() == Program::TYPE_FRAGMENT) {
1407
         // don't load masked inputs, won't be assigned a slot
1408
         if (!ptr && !(info->in[idx].mask & (1 << swz)))
1409
            return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
1410
	 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE)
1411
            return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0));
1412
         return interpolate(src, c, ptr);
1413
      }
1414
      return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
1415
   case TGSI_FILE_OUTPUT:
1416
      assert(!"load from output file");
1417
      return NULL;
1418
   case TGSI_FILE_SYSTEM_VALUE:
1419
      assert(!ptr);
1420
      return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
1421
   default:
1422
      return getArrayForFile(src.getFile(), idx2d)->load(
1423
         sub.cur->values, idx, swz, ptr);
1424
   }
1425
}
1426
 
1427
Value *
1428
Converter::acquireDst(int d, int c)
1429
{
1430
   const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1431
   const unsigned f = dst.getFile();
1432
   const int idx = dst.getIndex(0);
1433
   const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1434
 
1435
   if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE)
1436
      return NULL;
1437
 
1438
   if (dst.isIndirect(0) ||
1439
       f == TGSI_FILE_SYSTEM_VALUE ||
1440
       (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
1441
      return getScratch();
1442
 
1443
   return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
1444
}
1445
 
1446
void
1447
Converter::storeDst(int d, int c, Value *val)
1448
{
1449
   const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1450
 
1451
   switch (tgsi.getSaturate()) {
1452
   case TGSI_SAT_NONE:
1453
      break;
1454
   case TGSI_SAT_ZERO_ONE:
1455
      mkOp1(OP_SAT, dstTy, val, val);
1456
      break;
1457
   case TGSI_SAT_MINUS_PLUS_ONE:
1458
      mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
1459
      mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
1460
      break;
1461
   default:
1462
      assert(!"invalid saturation mode");
1463
      break;
1464
   }
1465
 
1466
   Value *ptr = dst.isIndirect(0) ?
1467
      fetchSrc(dst.getIndirect(0), 0, NULL) : NULL;
1468
 
1469
   if (info->io.genUserClip > 0 &&
1470
       dst.getFile() == TGSI_FILE_OUTPUT &&
1471
       !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
1472
      mkMov(clipVtx[c], val);
1473
      val = clipVtx[c];
1474
   }
1475
 
1476
   storeDst(dst, c, val, ptr);
1477
}
1478
 
1479
void
1480
Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
1481
                    Value *val, Value *ptr)
1482
{
1483
   const unsigned f = dst.getFile();
1484
   const int idx = dst.getIndex(0);
1485
   const int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
1486
 
1487
   if (f == TGSI_FILE_SYSTEM_VALUE) {
1488
      assert(!ptr);
1489
      mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
1490
   } else
1491
   if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
1492
      if (ptr || (info->out[idx].mask & (1 << c)))
1493
         mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
1494
   } else
1495
   if (f == TGSI_FILE_TEMPORARY ||
1496
       f == TGSI_FILE_PREDICATE ||
1497
       f == TGSI_FILE_ADDRESS ||
1498
       f == TGSI_FILE_OUTPUT) {
1499
      getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
1500
   } else {
1501
      assert(!"invalid dst file");
1502
   }
1503
}
1504
 
1505
#define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
1506
   for (chan = 0; chan < 4; ++chan)                 \
1507
      if (!inst.getDst(d).isMasked(chan))
1508
 
1509
Value *
1510
Converter::buildDot(int dim)
1511
{
1512
   assert(dim > 0);
1513
 
1514
   Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
1515
   Value *dotp = getScratch();
1516
 
1517
   mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
1518
 
1519
   for (int c = 1; c < dim; ++c) {
1520
      src0 = fetchSrc(0, c);
1521
      src1 = fetchSrc(1, c);
1522
      mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
1523
   }
1524
   return dotp;
1525
}
1526
 
1527
void
1528
Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
1529
{
1530
   FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
1531
   join->fixed = 1;
1532
   conv->insertHead(join);
1533
 
1534
   fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
1535
   fork->insertBefore(fork->getExit(), fork->joinAt);
1536
}
1537
 
1538
void
1539
Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
1540
{
1541
   unsigned rIdx = 0, sIdx = 0;
1542
 
1543
   if (R >= 0)
1544
      rIdx = tgsi.getSrc(R).getIndex(0);
1545
   if (S >= 0)
1546
      sIdx = tgsi.getSrc(S).getIndex(0);
1547
 
1548
   tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
1549
 
1550
   if (tgsi.getSrc(R).isIndirect(0)) {
1551
      tex->tex.rIndirectSrc = s;
1552
      tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
1553
   }
1554
   if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
1555
      tex->tex.sIndirectSrc = s;
1556
      tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
1557
   }
1558
}
1559
 
1560
void
1561
Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
1562
{
1563
   TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
1564
   tex->tex.query = query;
1565
   unsigned int c, d;
1566
 
1567
   for (d = 0, c = 0; c < 4; ++c) {
1568
      if (!dst0[c])
1569
         continue;
1570
      tex->tex.mask |= 1 << c;
1571
      tex->setDef(d++, dst0[c]);
1572
   }
1573
   tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
1574
 
1575
   setTexRS(tex, c, 1, -1);
1576
 
1577
   bb->insertTail(tex);
1578
}
1579
 
1580
void
1581
Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
1582
{
1583
   Value *proj = fetchSrc(0, 3);
1584
   Instruction *insn = proj->getUniqueInsn();
1585
   int c;
1586
 
1587
   if (insn->op == OP_PINTERP) {
1588
      bb->insertTail(insn = cloneForward(func, insn));
1589
      insn->op = OP_LINTERP;
1590
      insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
1591
      insn->setSrc(1, NULL);
1592
      proj = insn->getDef(0);
1593
   }
1594
   proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
1595
 
1596
   for (c = 0; c < 4; ++c) {
1597
      if (!(mask & (1 << c)))
1598
         continue;
1599
      if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
1600
         continue;
1601
      mask &= ~(1 << c);
1602
 
1603
      bb->insertTail(insn = cloneForward(func, insn));
1604
      insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
1605
      insn->setSrc(1, proj);
1606
      dst[c] = insn->getDef(0);
1607
   }
1608
   if (!mask)
1609
      return;
1610
 
1611
   proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
1612
 
1613
   for (c = 0; c < 4; ++c)
1614
      if (mask & (1 << c))
1615
         dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
1616
}
1617
 
1618
// order of nv50 ir sources: x y z layer lod/bias shadow
1619
// order of TGSI TEX sources: x y z layer shadow lod/bias
1620
//  lowering will finally set the hw specific order (like array first on nvc0)
1621
void
1622
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
1623
{
1624
   Value *val;
1625
   Value *arg[4], *src[8];
1626
   Value *lod = NULL, *shd = NULL;
1627
   unsigned int s, c, d;
1628
   TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1629
 
1630
   TexInstruction::Target tgt = tgsi.getTexture(code, R);
1631
 
1632
   for (s = 0; s < tgt.getArgCount(); ++s)
1633
      arg[s] = src[s] = fetchSrc(0, s);
1634
 
1635
   if (texi->op == OP_TXL || texi->op == OP_TXB)
1636
      lod = fetchSrc(L >> 4, L & 3);
1637
 
1638
   if (C == 0x0f)
1639
      C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
1640
 
1641
   if (tgt.isShadow())
1642
      shd = fetchSrc(C >> 4, C & 3);
1643
 
1644
   if (texi->op == OP_TXD) {
1645
      for (c = 0; c < tgt.getDim(); ++c) {
1646
         texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
1647
         texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
1648
      }
1649
   }
1650
 
1651
   // cube textures don't care about projection value, it's divided out
1652
   if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
1653
      unsigned int n = tgt.getDim();
1654
      if (shd) {
1655
         arg[n] = shd;
1656
         ++n;
1657
         assert(tgt.getDim() == tgt.getArgCount());
1658
      }
1659
      loadProjTexCoords(src, arg, (1 << n) - 1);
1660
      if (shd)
1661
         shd = src[n - 1];
1662
   }
1663
 
1664
   if (tgt.isCube()) {
1665
      for (c = 0; c < 3; ++c)
1666
         src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
1667
      val = getScratch();
1668
      mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
1669
      mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
1670
      mkOp1(OP_RCP, TYPE_F32, val, val);
1671
      for (c = 0; c < 3; ++c)
1672
         src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
1673
   }
1674
 
1675
   for (c = 0, d = 0; c < 4; ++c) {
1676
      if (dst[c]) {
1677
         texi->setDef(d++, dst[c]);
1678
         texi->tex.mask |= 1 << c;
1679
      } else {
1680
         // NOTE: maybe hook up def too, for CSE
1681
      }
1682
   }
1683
   for (s = 0; s < tgt.getArgCount(); ++s)
1684
      texi->setSrc(s, src[s]);
1685
   if (lod)
1686
      texi->setSrc(s++, lod);
1687
   if (shd)
1688
      texi->setSrc(s++, shd);
1689
 
1690
   setTexRS(texi, s, R, S);
1691
 
1692
   if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
1693
      texi->tex.levelZero = true;
1694
 
1695
   bb->insertTail(texi);
1696
}
1697
 
1698
// 1st source: xyz = coordinates, w = lod/sample
1699
// 2nd source: offset
1700
void
1701
Converter::handleTXF(Value *dst[4], int R, int L_M)
1702
{
1703
   TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
1704
   int ms;
1705
   unsigned int c, d, s;
1706
 
1707
   texi->tex.target = tgsi.getTexture(code, R);
1708
 
1709
   ms = texi->tex.target.isMS() ? 1 : 0;
1710
   texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
1711
 
1712
   for (c = 0, d = 0; c < 4; ++c) {
1713
      if (dst[c]) {
1714
         texi->setDef(d++, dst[c]);
1715
         texi->tex.mask |= 1 << c;
1716
      }
1717
   }
1718
   for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
1719
      texi->setSrc(c, fetchSrc(0, c));
1720
   texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
1721
 
1722
   setTexRS(texi, c, R, -1);
1723
 
1724
   for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
1725
      for (c = 0; c < 3; ++c) {
1726
         texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info);
1727
         if (texi->tex.offset[s][c])
1728
            texi->tex.useOffsets = s + 1;
1729
      }
1730
   }
1731
 
1732
   bb->insertTail(texi);
1733
}
1734
 
1735
void
1736
Converter::handleLIT(Value *dst0[4])
1737
{
1738
   Value *val0 = NULL;
1739
   unsigned int mask = tgsi.getDst(0).getMask();
1740
 
1741
   if (mask & (1 << 0))
1742
      loadImm(dst0[0], 1.0f);
1743
 
1744
   if (mask & (1 << 3))
1745
      loadImm(dst0[3], 1.0f);
1746
 
1747
   if (mask & (3 << 1)) {
1748
      val0 = getScratch();
1749
      mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
1750
      if (mask & (1 << 1))
1751
         mkMov(dst0[1], val0);
1752
   }
1753
 
1754
   if (mask & (1 << 2)) {
1755
      Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
1756
      Value *val1 = getScratch(), *val3 = getScratch();
1757
 
1758
      Value *pos128 = loadImm(NULL, +127.999999f);
1759
      Value *neg128 = loadImm(NULL, -127.999999f);
1760
 
1761
      mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
1762
      mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
1763
      mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
1764
      mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
1765
 
1766
      mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0);
1767
   }
1768
}
1769
 
1770
static inline bool
1771
isResourceSpecial(const int r)
1772
{
1773
   return (r == TGSI_RESOURCE_GLOBAL ||
1774
           r == TGSI_RESOURCE_LOCAL ||
1775
           r == TGSI_RESOURCE_PRIVATE ||
1776
           r == TGSI_RESOURCE_INPUT);
1777
}
1778
 
1779
static inline bool
1780
isResourceRaw(const struct tgsi::Source *code, const int r)
1781
{
1782
   return isResourceSpecial(r) || code->resources[r].raw;
1783
}
1784
 
1785
static inline nv50_ir::TexTarget
1786
getResourceTarget(const struct tgsi::Source *code, int r)
1787
{
1788
   if (isResourceSpecial(r))
1789
      return nv50_ir::TEX_TARGET_BUFFER;
1790
   return tgsi::translateTexture(code->resources.at(r).target);
1791
}
1792
 
1793
Symbol *
1794
Converter::getResourceBase(const int r)
1795
{
1796
   Symbol *sym = NULL;
1797
 
1798
   switch (r) {
1799
   case TGSI_RESOURCE_GLOBAL:
1800
      sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
1801
      break;
1802
   case TGSI_RESOURCE_LOCAL:
1803
      assert(prog->getType() == Program::TYPE_COMPUTE);
1804
      sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
1805
                     info->prop.cp.sharedOffset);
1806
      break;
1807
   case TGSI_RESOURCE_PRIVATE:
1808
      sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
1809
                     info->bin.tlsSpace);
1810
      break;
1811
   case TGSI_RESOURCE_INPUT:
1812
      assert(prog->getType() == Program::TYPE_COMPUTE);
1813
      sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
1814
                     info->prop.cp.inputOffset);
1815
      break;
1816
   default:
1817
      sym = new_Symbol(prog,
1818
                       nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
1819
      break;
1820
   }
1821
   return sym;
1822
}
1823
 
1824
void
1825
Converter::getResourceCoords(std::vector &coords, int r, int s)
1826
{
1827
   const int arg =
1828
      TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
1829
 
1830
   for (int c = 0; c < arg; ++c)
1831
      coords.push_back(fetchSrc(s, c));
1832
 
1833
   // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
1834
   if (r == TGSI_RESOURCE_LOCAL ||
1835
       r == TGSI_RESOURCE_PRIVATE ||
1836
       r == TGSI_RESOURCE_INPUT)
1837
      coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
1838
                         coords[0]);
1839
}
1840
 
1841
static inline int
1842
partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
1843
{
1844
   int n = 0;
1845
 
1846
   while (mask) {
1847
      if (mask & 1) {
1848
         size[n]++;
1849
      } else {
1850
         if (size[n])
1851
            comp[n = 1] = size[0] + 1;
1852
         else
1853
            comp[n]++;
1854
      }
1855
      mask >>= 1;
1856
   }
1857
   if (size[0] == 3) {
1858
      n = 1;
1859
      size[0] = (comp[0] == 1) ? 1 : 2;
1860
      size[1] = 3 - size[0];
1861
      comp[1] = comp[0] + size[0];
1862
   }
1863
   return n + 1;
1864
}
1865
 
1866
// For raw loads, granularity is 4 byte.
1867
// Usage of the texture read mask on OP_SULDP is not allowed.
1868
void
1869
Converter::handleLOAD(Value *dst0[4])
1870
{
1871
   const int r = tgsi.getSrc(0).getIndex(0);
1872
   int c;
1873
   std::vector off, src, ldv, def;
1874
 
1875
   getResourceCoords(off, r, 1);
1876
 
1877
   if (isResourceRaw(code, r)) {
1878
      uint8_t mask = 0;
1879
      uint8_t comp[2] = { 0, 0 };
1880
      uint8_t size[2] = { 0, 0 };
1881
 
1882
      Symbol *base = getResourceBase(r);
1883
 
1884
      // determine the base and size of the at most 2 load ops
1885
      for (c = 0; c < 4; ++c)
1886
         if (!tgsi.getDst(0).isMasked(c))
1887
            mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
1888
 
1889
      int n = partitionLoadStore(comp, size, mask);
1890
 
1891
      src = off;
1892
 
1893
      def.resize(4); // index by component, the ones we need will be non-NULL
1894
      for (c = 0; c < 4; ++c) {
1895
         if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
1896
            def[c] = dst0[c];
1897
         else
1898
         if (mask & (1 << c))
1899
            def[c] = getScratch();
1900
      }
1901
 
1902
      const bool useLd = isResourceSpecial(r) ||
1903
         (info->io.nv50styleSurfaces &&
1904
          code->resources[r].target == TGSI_TEXTURE_BUFFER);
1905
 
1906
      for (int i = 0; i < n; ++i) {
1907
         ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
1908
 
1909
         if (comp[i]) // adjust x component of source address if necessary
1910
            src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
1911
                            off[0], mkImm(comp[i] * 4));
1912
         else
1913
            src[0] = off[0];
1914
 
1915
         if (useLd) {
1916
            Instruction *ld =
1917
               mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
1918
            for (size_t c = 1; c < ldv.size(); ++c)
1919
               ld->setDef(c, ldv[c]);
1920
         } else {
1921
            mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
1922
                  0, ldv, src)->dType = typeOfSize(size[i] * 4);
1923
         }
1924
      }
1925
   } else {
1926
      def.resize(4);
1927
      for (c = 0; c < 4; ++c) {
1928
         if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
1929
            def[c] = getScratch();
1930
         else
1931
            def[c] = dst0[c];
1932
      }
1933
 
1934
      mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
1935
            def, off);
1936
   }
1937
   FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1938
      if (dst0[c] != def[c])
1939
         mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
1940
}
1941
 
1942
// For formatted stores, the write mask on OP_SUSTP can be used.
1943
// Raw stores have to be split.
1944
void
1945
Converter::handleSTORE()
1946
{
1947
   const int r = tgsi.getDst(0).getIndex(0);
1948
   int c;
1949
   std::vector off, src, dummy;
1950
 
1951
   getResourceCoords(off, r, 0);
1952
   src = off;
1953
   const int s = src.size();
1954
 
1955
   if (isResourceRaw(code, r)) {
1956
      uint8_t comp[2] = { 0, 0 };
1957
      uint8_t size[2] = { 0, 0 };
1958
 
1959
      int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
1960
 
1961
      Symbol *base = getResourceBase(r);
1962
 
1963
      const bool useSt = isResourceSpecial(r) ||
1964
         (info->io.nv50styleSurfaces &&
1965
          code->resources[r].target == TGSI_TEXTURE_BUFFER);
1966
 
1967
      for (int i = 0; i < n; ++i) {
1968
         if (comp[i]) // adjust x component of source address if necessary
1969
            src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
1970
                            off[0], mkImm(comp[i] * 4));
1971
         else
1972
            src[0] = off[0];
1973
 
1974
         const DataType stTy = typeOfSize(size[i] * 4);
1975
 
1976
         if (useSt) {
1977
            Instruction *st =
1978
               mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
1979
            for (c = 1; c < size[i]; ++c)
1980
               st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
1981
            st->setIndirect(0, 0, src[0]);
1982
         } else {
1983
            // attach values to be stored
1984
            src.resize(s + size[i]);
1985
            for (c = 0; c < size[i]; ++c)
1986
               src[s + c] = fetchSrc(1, comp[i] + c);
1987
            mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
1988
                  0, dummy, src)->setType(stTy);
1989
         }
1990
      }
1991
   } else {
1992
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
1993
         src.push_back(fetchSrc(1, c));
1994
 
1995
      mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
1996
            dummy, src)->tex.mask = tgsi.getDst(0).getMask();
1997
   }
1998
}
1999
 
2000
// XXX: These only work on resources with the single-component u32/s32 formats.
2001
// Therefore the result is replicated. This might not be intended by TGSI, but
2002
// operating on more than 1 component would produce undefined results because
2003
// they do not exist.
2004
void
2005
Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
2006
{
2007
   const int r = tgsi.getSrc(0).getIndex(0);
2008
   std::vector srcv;
2009
   std::vector defv;
2010
   LValue *dst = getScratch();
2011
 
2012
   getResourceCoords(srcv, r, 1);
2013
 
2014
   if (isResourceSpecial(r)) {
2015
      assert(r != TGSI_RESOURCE_INPUT);
2016
      Instruction *insn;
2017
      insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
2018
      insn->subOp = subOp;
2019
      if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2020
         insn->setSrc(2, fetchSrc(3, 0));
2021
      insn->setIndirect(0, 0, srcv.at(0));
2022
   } else {
2023
      operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
2024
      TexTarget targ = getResourceTarget(code, r);
2025
      int idx = code->resources[r].slot;
2026
      defv.push_back(dst);
2027
      srcv.push_back(fetchSrc(2, 0));
2028
      if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2029
         srcv.push_back(fetchSrc(3, 0));
2030
      TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
2031
      tex->subOp = subOp;
2032
      tex->tex.mask = 1;
2033
      tex->setType(ty);
2034
   }
2035
 
2036
   for (int c = 0; c < 4; ++c)
2037
      if (dst0[c])
2038
         dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2039
}
2040
 
2041
Converter::Subroutine *
2042
Converter::getSubroutine(unsigned ip)
2043
{
2044
   std::map::iterator it = sub.map.find(ip);
2045
 
2046
   if (it == sub.map.end())
2047
      it = sub.map.insert(std::make_pair(
2048
              ip, Subroutine(new Function(prog, "SUB", ip)))).first;
2049
 
2050
   return &it->second;
2051
}
2052
 
2053
Converter::Subroutine *
2054
Converter::getSubroutine(Function *f)
2055
{
2056
   unsigned ip = f->getLabel();
2057
   std::map::iterator it = sub.map.find(ip);
2058
 
2059
   if (it == sub.map.end())
2060
      it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
2061
 
2062
   return &it->second;
2063
}
2064
 
2065
bool
2066
Converter::isEndOfSubroutine(uint ip)
2067
{
2068
   assert(ip < code->scan.num_instructions);
2069
   tgsi::Instruction insn(&code->insns[ip]);
2070
   return (insn.getOpcode() == TGSI_OPCODE_END ||
2071
           insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
2072
           // does END occur at end of main or the very end ?
2073
           insn.getOpcode() == TGSI_OPCODE_BGNSUB);
2074
}
2075
 
2076
bool
2077
Converter::handleInstruction(const struct tgsi_full_instruction *insn)
2078
{
2079
   Instruction *geni;
2080
 
2081
   Value *dst0[4], *rDst0[4];
2082
   Value *src0, *src1, *src2;
2083
   Value *val0, *val1;
2084
   int c;
2085
 
2086
   tgsi = tgsi::Instruction(insn);
2087
 
2088
   bool useScratchDst = tgsi.checkDstSrcAliasing();
2089
 
2090
   operation op = tgsi.getOP();
2091
   dstTy = tgsi.inferDstType();
2092
   srcTy = tgsi.inferSrcType();
2093
 
2094
   unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
2095
 
2096
   if (tgsi.dstCount()) {
2097
      for (c = 0; c < 4; ++c) {
2098
         rDst0[c] = acquireDst(0, c);
2099
         dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
2100
      }
2101
   }
2102
 
2103
   switch (tgsi.getOpcode()) {
2104
   case TGSI_OPCODE_ADD:
2105
   case TGSI_OPCODE_UADD:
2106
   case TGSI_OPCODE_AND:
2107
   case TGSI_OPCODE_DIV:
2108
   case TGSI_OPCODE_IDIV:
2109
   case TGSI_OPCODE_UDIV:
2110
   case TGSI_OPCODE_MAX:
2111
   case TGSI_OPCODE_MIN:
2112
   case TGSI_OPCODE_IMAX:
2113
   case TGSI_OPCODE_IMIN:
2114
   case TGSI_OPCODE_UMAX:
2115
   case TGSI_OPCODE_UMIN:
2116
   case TGSI_OPCODE_MOD:
2117
   case TGSI_OPCODE_UMOD:
2118
   case TGSI_OPCODE_MUL:
2119
   case TGSI_OPCODE_UMUL:
2120
   case TGSI_OPCODE_OR:
2121
   case TGSI_OPCODE_POW:
2122
   case TGSI_OPCODE_SHL:
2123
   case TGSI_OPCODE_ISHR:
2124
   case TGSI_OPCODE_USHR:
2125
   case TGSI_OPCODE_SUB:
2126
   case TGSI_OPCODE_XOR:
2127
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2128
         src0 = fetchSrc(0, c);
2129
         src1 = fetchSrc(1, c);
2130
         mkOp2(op, dstTy, dst0[c], src0, src1);
2131
      }
2132
      break;
2133
   case TGSI_OPCODE_MAD:
2134
   case TGSI_OPCODE_UMAD:
2135
   case TGSI_OPCODE_SAD:
2136
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2137
         src0 = fetchSrc(0, c);
2138
         src1 = fetchSrc(1, c);
2139
         src2 = fetchSrc(2, c);
2140
         mkOp3(op, dstTy, dst0[c], src0, src1, src2);
2141
      }
2142
      break;
2143
   case TGSI_OPCODE_MOV:
2144
   case TGSI_OPCODE_ABS:
2145
   case TGSI_OPCODE_CEIL:
2146
   case TGSI_OPCODE_FLR:
2147
   case TGSI_OPCODE_TRUNC:
2148
   case TGSI_OPCODE_RCP:
2149
   case TGSI_OPCODE_IABS:
2150
   case TGSI_OPCODE_INEG:
2151
   case TGSI_OPCODE_NOT:
2152
   case TGSI_OPCODE_DDX:
2153
   case TGSI_OPCODE_DDY:
2154
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2155
         mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
2156
      break;
2157
   case TGSI_OPCODE_RSQ:
2158
      src0 = fetchSrc(0, 0);
2159
      val0 = getScratch();
2160
      mkOp1(OP_ABS, TYPE_F32, val0, src0);
2161
      mkOp1(OP_RSQ, TYPE_F32, val0, val0);
2162
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2163
         mkMov(dst0[c], val0);
2164
      break;
2165
   case TGSI_OPCODE_ARL:
2166
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2167
         src0 = fetchSrc(0, c);
2168
         mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M;
2169
         mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4));
2170
      }
2171
      break;
2172
   case TGSI_OPCODE_UARL:
2173
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2174
         mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4));
2175
      break;
2176
   case TGSI_OPCODE_EX2:
2177
   case TGSI_OPCODE_LG2:
2178
      val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
2179
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2180
         mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
2181
      break;
2182
   case TGSI_OPCODE_COS:
2183
   case TGSI_OPCODE_SIN:
2184
      val0 = getScratch();
2185
      if (mask & 7) {
2186
         mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
2187
         mkOp1(op, TYPE_F32, val0, val0);
2188
         for (c = 0; c < 3; ++c)
2189
            if (dst0[c])
2190
               mkMov(dst0[c], val0);
2191
      }
2192
      if (dst0[3]) {
2193
         mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
2194
         mkOp1(op, TYPE_F32, dst0[3], val0);
2195
      }
2196
      break;
2197
   case TGSI_OPCODE_SCS:
2198
      if (mask & 3) {
2199
         val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0));
2200
         if (dst0[0])
2201
            mkOp1(OP_COS, TYPE_F32, dst0[0], val0);
2202
         if (dst0[1])
2203
            mkOp1(OP_SIN, TYPE_F32, dst0[1], val0);
2204
      }
2205
      if (dst0[2])
2206
         loadImm(dst0[2], 0.0f);
2207
      if (dst0[3])
2208
         loadImm(dst0[3], 1.0f);
2209
      break;
2210
   case TGSI_OPCODE_EXP:
2211
      src0 = fetchSrc(0, 0);
2212
      val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
2213
      if (dst0[1])
2214
         mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
2215
      if (dst0[0])
2216
         mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
2217
      if (dst0[2])
2218
         mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
2219
      if (dst0[3])
2220
         loadImm(dst0[3], 1.0f);
2221
      break;
2222
   case TGSI_OPCODE_LOG:
2223
      src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
2224
      val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
2225
      if (dst0[0] || dst0[1])
2226
         val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
2227
      if (dst0[1]) {
2228
         mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
2229
         mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
2230
         mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
2231
      }
2232
      if (dst0[3])
2233
         loadImm(dst0[3], 1.0f);
2234
      break;
2235
   case TGSI_OPCODE_DP2:
2236
      val0 = buildDot(2);
2237
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2238
         mkMov(dst0[c], val0);
2239
      break;
2240
   case TGSI_OPCODE_DP3:
2241
      val0 = buildDot(3);
2242
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2243
         mkMov(dst0[c], val0);
2244
      break;
2245
   case TGSI_OPCODE_DP4:
2246
      val0 = buildDot(4);
2247
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2248
         mkMov(dst0[c], val0);
2249
      break;
2250
   case TGSI_OPCODE_DPH:
2251
      val0 = buildDot(3);
2252
      src1 = fetchSrc(1, 3);
2253
      mkOp2(OP_ADD, TYPE_F32, val0, val0, src1);
2254
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2255
         mkMov(dst0[c], val0);
2256
      break;
2257
   case TGSI_OPCODE_DST:
2258
      if (dst0[0])
2259
         loadImm(dst0[0], 1.0f);
2260
      if (dst0[1]) {
2261
         src0 = fetchSrc(0, 1);
2262
         src1 = fetchSrc(1, 1);
2263
         mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
2264
      }
2265
      if (dst0[2])
2266
         mkMov(dst0[2], fetchSrc(0, 2));
2267
      if (dst0[3])
2268
         mkMov(dst0[3], fetchSrc(1, 3));
2269
      break;
2270
   case TGSI_OPCODE_LRP:
2271
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2272
         src0 = fetchSrc(0, c);
2273
         src1 = fetchSrc(1, c);
2274
         src2 = fetchSrc(2, c);
2275
         mkOp3(OP_MAD, TYPE_F32, dst0[c],
2276
               mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
2277
      }
2278
      break;
2279
   case TGSI_OPCODE_LIT:
2280
      handleLIT(dst0);
2281
      break;
2282
   case TGSI_OPCODE_XPD:
2283
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2284
         if (c < 3) {
2285
            val0 = getSSA();
2286
            src0 = fetchSrc(1, (c + 1) % 3);
2287
            src1 = fetchSrc(0, (c + 2) % 3);
2288
            mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
2289
            mkOp1(OP_NEG, TYPE_F32, val0, val0);
2290
 
2291
            src0 = fetchSrc(0, (c + 1) % 3);
2292
            src1 = fetchSrc(1, (c + 2) % 3);
2293
            mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
2294
         } else {
2295
            loadImm(dst0[c], 1.0f);
2296
         }
2297
      }
2298
      break;
2299
   case TGSI_OPCODE_ISSG:
2300
   case TGSI_OPCODE_SSG:
2301
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2302
         src0 = fetchSrc(0, c);
2303
         val0 = getScratch();
2304
         val1 = getScratch();
2305
         mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero);
2306
         mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero);
2307
         if (srcTy == TYPE_F32)
2308
            mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
2309
         else
2310
            mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
2311
      }
2312
      break;
2313
   case TGSI_OPCODE_UCMP:
2314
   case TGSI_OPCODE_CMP:
2315
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2316
         src0 = fetchSrc(0, c);
2317
         src1 = fetchSrc(1, c);
2318
         src2 = fetchSrc(2, c);
2319
         if (src1 == src2)
2320
            mkMov(dst0[c], src1);
2321
         else
2322
            mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
2323
                  srcTy, dst0[c], src1, src2, src0);
2324
      }
2325
      break;
2326
   case TGSI_OPCODE_FRC:
2327
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2328
         src0 = fetchSrc(0, c);
2329
         val0 = getScratch();
2330
         mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
2331
         mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
2332
      }
2333
      break;
2334
   case TGSI_OPCODE_ROUND:
2335
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2336
         mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
2337
         ->rnd = ROUND_NI;
2338
      break;
2339
   case TGSI_OPCODE_CLAMP:
2340
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2341
         src0 = fetchSrc(0, c);
2342
         src1 = fetchSrc(1, c);
2343
         src2 = fetchSrc(2, c);
2344
         val0 = getScratch();
2345
         mkOp2(OP_MIN, TYPE_F32, val0, src0, src1);
2346
         mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2);
2347
      }
2348
      break;
2349
   case TGSI_OPCODE_SLT:
2350
   case TGSI_OPCODE_SGE:
2351
   case TGSI_OPCODE_SEQ:
2352
   case TGSI_OPCODE_SFL:
2353
   case TGSI_OPCODE_SGT:
2354
   case TGSI_OPCODE_SLE:
2355
   case TGSI_OPCODE_SNE:
2356
   case TGSI_OPCODE_STR:
2357
   case TGSI_OPCODE_ISGE:
2358
   case TGSI_OPCODE_ISLT:
2359
   case TGSI_OPCODE_USEQ:
2360
   case TGSI_OPCODE_USGE:
2361
   case TGSI_OPCODE_USLT:
2362
   case TGSI_OPCODE_USNE:
2363
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
2364
         src0 = fetchSrc(0, c);
2365
         src1 = fetchSrc(1, c);
2366
         mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1);
2367
      }
2368
      break;
2369
   case TGSI_OPCODE_KILL_IF:
2370
      val0 = new_LValue(func, FILE_PREDICATE);
2371
      for (c = 0; c < 4; ++c) {
2372
         mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero);
2373
         mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
2374
      }
2375
      break;
2376
   case TGSI_OPCODE_KILL:
2377
      mkOp(OP_DISCARD, TYPE_NONE, NULL);
2378
      break;
2379
   case TGSI_OPCODE_TEX:
2380
   case TGSI_OPCODE_TXB:
2381
   case TGSI_OPCODE_TXL:
2382
   case TGSI_OPCODE_TXP:
2383
      //              R  S     L     C    Dx    Dy
2384
      handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
2385
      break;
2386
   case TGSI_OPCODE_TXD:
2387
      handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
2388
      break;
2389
   case TGSI_OPCODE_TEX2:
2390
      handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
2391
      break;
2392
   case TGSI_OPCODE_TXB2:
2393
   case TGSI_OPCODE_TXL2:
2394
      handleTEX(dst0, 2, 2, 0x10, 0x11, 0x00, 0x00);
2395
      break;
2396
   case TGSI_OPCODE_SAMPLE:
2397
   case TGSI_OPCODE_SAMPLE_B:
2398
   case TGSI_OPCODE_SAMPLE_D:
2399
   case TGSI_OPCODE_SAMPLE_L:
2400
   case TGSI_OPCODE_SAMPLE_C:
2401
   case TGSI_OPCODE_SAMPLE_C_LZ:
2402
      handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
2403
      break;
2404
   case TGSI_OPCODE_TXF:
2405
      handleTXF(dst0, 1, 0x03);
2406
      break;
2407
   case TGSI_OPCODE_SAMPLE_I:
2408
      handleTXF(dst0, 1, 0x03);
2409
      break;
2410
   case TGSI_OPCODE_SAMPLE_I_MS:
2411
      handleTXF(dst0, 1, 0x20);
2412
      break;
2413
   case TGSI_OPCODE_TXQ:
2414
   case TGSI_OPCODE_SVIEWINFO:
2415
      handleTXQ(dst0, TXQ_DIMS);
2416
      break;
2417
   case TGSI_OPCODE_F2I:
2418
   case TGSI_OPCODE_F2U:
2419
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2420
         mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
2421
      break;
2422
   case TGSI_OPCODE_I2F:
2423
   case TGSI_OPCODE_U2F:
2424
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2425
         mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
2426
      break;
2427
   case TGSI_OPCODE_EMIT:
2428
   case TGSI_OPCODE_ENDPRIM:
2429
      // get vertex stream if specified (must be immediate)
2430
      src0 = tgsi.srcCount() ?
2431
         mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero;
2432
      mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
2433
      break;
2434
   case TGSI_OPCODE_IF:
2435
   case TGSI_OPCODE_UIF:
2436
   {
2437
      BasicBlock *ifBB = new BasicBlock(func);
2438
 
2439
      bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
2440
      condBBs.push(bb);
2441
      joinBBs.push(bb);
2442
 
2443
      mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
2444
 
2445
      setPosition(ifBB, true);
2446
   }
2447
      break;
2448
   case TGSI_OPCODE_ELSE:
2449
   {
2450
      BasicBlock *elseBB = new BasicBlock(func);
2451
      BasicBlock *forkBB = reinterpret_cast(condBBs.pop().u.p);
2452
 
2453
      forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
2454
      condBBs.push(bb);
2455
 
2456
      forkBB->getExit()->asFlow()->target.bb = elseBB;
2457
      if (!bb->isTerminated())
2458
         mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
2459
 
2460
      setPosition(elseBB, true);
2461
   }
2462
      break;
2463
   case TGSI_OPCODE_ENDIF:
2464
   {
2465
      BasicBlock *convBB = new BasicBlock(func);
2466
      BasicBlock *prevBB = reinterpret_cast(condBBs.pop().u.p);
2467
      BasicBlock *forkBB = reinterpret_cast(joinBBs.pop().u.p);
2468
 
2469
      if (!bb->isTerminated()) {
2470
         // we only want join if none of the clauses ended with CONT/BREAK/RET
2471
         if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
2472
            insertConvergenceOps(convBB, forkBB);
2473
         mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
2474
         bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2475
      }
2476
 
2477
      if (prevBB->getExit()->op == OP_BRA) {
2478
         prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
2479
         prevBB->getExit()->asFlow()->target.bb = convBB;
2480
      }
2481
      setPosition(convBB, true);
2482
   }
2483
      break;
2484
   case TGSI_OPCODE_BGNLOOP:
2485
   {
2486
      BasicBlock *lbgnBB = new BasicBlock(func);
2487
      BasicBlock *lbrkBB = new BasicBlock(func);
2488
 
2489
      loopBBs.push(lbgnBB);
2490
      breakBBs.push(lbrkBB);
2491
      if (loopBBs.getSize() > func->loopNestingBound)
2492
         func->loopNestingBound++;
2493
 
2494
      mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
2495
 
2496
      bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
2497
      setPosition(lbgnBB, true);
2498
      mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
2499
   }
2500
      break;
2501
   case TGSI_OPCODE_ENDLOOP:
2502
   {
2503
      BasicBlock *loopBB = reinterpret_cast(loopBBs.pop().u.p);
2504
 
2505
      if (!bb->isTerminated()) {
2506
         mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
2507
         bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
2508
      }
2509
      setPosition(reinterpret_cast(breakBBs.pop().u.p), true);
2510
   }
2511
      break;
2512
   case TGSI_OPCODE_BRK:
2513
   {
2514
      if (bb->isTerminated())
2515
         break;
2516
      BasicBlock *brkBB = reinterpret_cast(breakBBs.peek().u.p);
2517
      mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
2518
      bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
2519
   }
2520
      break;
2521
   case TGSI_OPCODE_CONT:
2522
   {
2523
      if (bb->isTerminated())
2524
         break;
2525
      BasicBlock *contBB = reinterpret_cast(loopBBs.peek().u.p);
2526
      mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
2527
      contBB->explicitCont = true;
2528
      bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
2529
   }
2530
      break;
2531
   case TGSI_OPCODE_BGNSUB:
2532
   {
2533
      Subroutine *s = getSubroutine(ip);
2534
      BasicBlock *entry = new BasicBlock(s->f);
2535
      BasicBlock *leave = new BasicBlock(s->f);
2536
 
2537
      // multiple entrypoints possible, keep the graph connected
2538
      if (prog->getType() == Program::TYPE_COMPUTE)
2539
         prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
2540
 
2541
      sub.cur = s;
2542
      s->f->setEntry(entry);
2543
      s->f->setExit(leave);
2544
      setPosition(entry, true);
2545
      return true;
2546
   }
2547
   case TGSI_OPCODE_ENDSUB:
2548
   {
2549
      sub.cur = getSubroutine(prog->main);
2550
      setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
2551
      return true;
2552
   }
2553
   case TGSI_OPCODE_CAL:
2554
   {
2555
      Subroutine *s = getSubroutine(tgsi.getLabel());
2556
      mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
2557
      func->call.attach(&s->f->call, Graph::Edge::TREE);
2558
      return true;
2559
   }
2560
   case TGSI_OPCODE_RET:
2561
   {
2562
      if (bb->isTerminated())
2563
         return true;
2564
      BasicBlock *leave = BasicBlock::get(func->cfgExit);
2565
 
2566
      if (!isEndOfSubroutine(ip + 1)) {
2567
         // insert a PRERET at the entry if this is an early return
2568
         // (only needed for sharing code in the epilogue)
2569
         BasicBlock *pos = getBB();
2570
         setPosition(BasicBlock::get(func->cfg.getRoot()), false);
2571
         mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
2572
         setPosition(pos, true);
2573
      }
2574
      mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
2575
      bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
2576
   }
2577
      break;
2578
   case TGSI_OPCODE_END:
2579
   {
2580
      // attach and generate epilogue code
2581
      BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
2582
      bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
2583
      setPosition(epilogue, true);
2584
      if (prog->getType() == Program::TYPE_FRAGMENT)
2585
         exportOutputs();
2586
      if (info->io.genUserClip > 0)
2587
         handleUserClipPlanes();
2588
      mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
2589
   }
2590
      break;
2591
   case TGSI_OPCODE_SWITCH:
2592
   case TGSI_OPCODE_CASE:
2593
      ERROR("switch/case opcode encountered, should have been lowered\n");
2594
      abort();
2595
      break;
2596
   case TGSI_OPCODE_LOAD:
2597
      handleLOAD(dst0);
2598
      break;
2599
   case TGSI_OPCODE_STORE:
2600
      handleSTORE();
2601
      break;
2602
   case TGSI_OPCODE_BARRIER:
2603
      geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
2604
      geni->fixed = 1;
2605
      geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
2606
      break;
2607
   case TGSI_OPCODE_MFENCE:
2608
   case TGSI_OPCODE_LFENCE:
2609
   case TGSI_OPCODE_SFENCE:
2610
      geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
2611
      geni->fixed = 1;
2612
      geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
2613
      break;
2614
   case TGSI_OPCODE_ATOMUADD:
2615
   case TGSI_OPCODE_ATOMXCHG:
2616
   case TGSI_OPCODE_ATOMCAS:
2617
   case TGSI_OPCODE_ATOMAND:
2618
   case TGSI_OPCODE_ATOMOR:
2619
   case TGSI_OPCODE_ATOMXOR:
2620
   case TGSI_OPCODE_ATOMUMIN:
2621
   case TGSI_OPCODE_ATOMIMIN:
2622
   case TGSI_OPCODE_ATOMUMAX:
2623
   case TGSI_OPCODE_ATOMIMAX:
2624
      handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
2625
      break;
2626
   default:
2627
      ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
2628
      assert(0);
2629
      break;
2630
   }
2631
 
2632
   if (tgsi.dstCount()) {
2633
      for (c = 0; c < 4; ++c) {
2634
         if (!dst0[c])
2635
            continue;
2636
         if (dst0[c] != rDst0[c])
2637
            mkMov(rDst0[c], dst0[c]);
2638
         storeDst(0, c, rDst0[c]);
2639
      }
2640
   }
2641
   vtxBaseValid = 0;
2642
 
2643
   return true;
2644
}
2645
 
2646
void
2647
Converter::handleUserClipPlanes()
2648
{
2649
   Value *res[8];
2650
   int n, i, c;
2651
 
2652
   for (c = 0; c < 4; ++c) {
2653
      for (i = 0; i < info->io.genUserClip; ++i) {
2654
         Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
2655
                                TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
2656
         Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
2657
         if (c == 0)
2658
            res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
2659
         else
2660
            mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
2661
      }
2662
   }
2663
 
2664
   const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
2665
 
2666
   for (i = 0; i < info->io.genUserClip; ++i) {
2667
      n = i / 4 + first;
2668
      c = i % 4;
2669
      Symbol *sym =
2670
         mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
2671
      mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
2672
   }
2673
}
2674
 
2675
void
2676
Converter::exportOutputs()
2677
{
2678
   for (unsigned int i = 0; i < info->numOutputs; ++i) {
2679
      for (unsigned int c = 0; c < 4; ++c) {
2680
         if (!oData.exists(sub.cur->values, i, c))
2681
            continue;
2682
         Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
2683
                                info->out[i].slot[c] * 4);
2684
         Value *val = oData.load(sub.cur->values, i, c, NULL);
2685
         if (val)
2686
            mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
2687
      }
2688
   }
2689
}
2690
 
2691
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
2692
     code(code),
2693
     tgsi(NULL),
2694
     tData(this), aData(this), pData(this), oData(this)
2695
{
2696
   info = code->info;
2697
 
2698
   const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR;
2699
 
2700
   const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
2701
   const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE);
2702
   const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
2703
   const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
2704
 
2705
   tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0);
2706
   pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0);
2707
   aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0);
2708
   oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
2709
 
2710
   zero = mkImm((uint32_t)0);
2711
 
2712
   vtxBaseValid = 0;
2713
}
2714
 
2715
Converter::~Converter()
2716
{
2717
}
2718
 
2719
inline const Converter::Location *
2720
Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
2721
{
2722
   ValueMap::l_iterator it = s->values.l.find(v);
2723
   return it == s->values.l.end() ? NULL : &it->second;
2724
}
2725
 
2726
template inline void
2727
Converter::BindArgumentsPass::updateCallArgs(
2728
   Instruction *i, void (Instruction::*setArg)(int, Value *),
2729
   T (Function::*proto))
2730
{
2731
   Function *g = i->asFlow()->target.fn;
2732
   Subroutine *subg = conv.getSubroutine(g);
2733
 
2734
   for (unsigned a = 0; a < (g->*proto).size(); ++a) {
2735
      Value *v = (g->*proto)[a].get();
2736
      const Converter::Location &l = *getValueLocation(subg, v);
2737
      Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
2738
 
2739
      (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
2740
   }
2741
}
2742
 
2743
template inline void
2744
Converter::BindArgumentsPass::updatePrototype(
2745
   BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
2746
{
2747
   (func->*updateSet)();
2748
 
2749
   for (unsigned i = 0; i < set->getSize(); ++i) {
2750
      Value *v = func->getLValue(i);
2751
      const Converter::Location *l = getValueLocation(sub, v);
2752
 
2753
      // only include values with a matching TGSI register
2754
      if (set->test(i) && l && !conv.code->locals.count(*l))
2755
         (func->*proto).push_back(v);
2756
   }
2757
}
2758
 
2759
bool
2760
Converter::BindArgumentsPass::visit(Function *f)
2761
{
2762
   sub = conv.getSubroutine(f);
2763
 
2764
   for (ArrayList::Iterator bi = f->allBBlocks.iterator();
2765
        !bi.end(); bi.next()) {
2766
      for (Instruction *i = BasicBlock::get(bi)->getFirst();
2767
           i; i = i->next) {
2768
         if (i->op == OP_CALL && !i->asFlow()->builtin) {
2769
            updateCallArgs(i, &Instruction::setSrc, &Function::ins);
2770
            updateCallArgs(i, &Instruction::setDef, &Function::outs);
2771
         }
2772
      }
2773
   }
2774
 
2775
   if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
2776
      return true;
2777
   updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
2778
                   &Function::buildLiveSets, &Function::ins);
2779
   updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
2780
                   &Function::buildDefSets, &Function::outs);
2781
 
2782
   return true;
2783
}
2784
 
2785
bool
2786
Converter::run()
2787
{
2788
   BasicBlock *entry = new BasicBlock(prog->main);
2789
   BasicBlock *leave = new BasicBlock(prog->main);
2790
 
2791
   prog->main->setEntry(entry);
2792
   prog->main->setExit(leave);
2793
 
2794
   setPosition(entry, true);
2795
   sub.cur = getSubroutine(prog->main);
2796
 
2797
   if (info->io.genUserClip > 0) {
2798
      for (int c = 0; c < 4; ++c)
2799
         clipVtx[c] = getScratch();
2800
   }
2801
 
2802
   if (prog->getType() == Program::TYPE_FRAGMENT) {
2803
      Symbol *sv = mkSysVal(SV_POSITION, 3);
2804
      fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
2805
      mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
2806
   }
2807
 
2808
   for (ip = 0; ip < code->scan.num_instructions; ++ip) {
2809
      if (!handleInstruction(&code->insns[ip]))
2810
         return false;
2811
   }
2812
 
2813
   if (!BindArgumentsPass(*this).run(prog))
2814
      return false;
2815
 
2816
   return true;
2817
}
2818
 
2819
} // unnamed namespace
2820
 
2821
namespace nv50_ir {
2822
 
2823
bool
2824
Program::makeFromTGSI(struct nv50_ir_prog_info *info)
2825
{
2826
   tgsi::Source src(info);
2827
   if (!src.scanSource())
2828
      return false;
2829
   tlsSize = info->bin.tlsSpace;
2830
 
2831
   Converter builder(this, &src);
2832
   return builder.run();
2833
}
2834
 
2835
} // namespace nv50_ir