Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
#include "pipe/p_context.h"
2
#include "pipe/p_defines.h"
3
#include "pipe/p_state.h"
4
#include "util/u_linkage.h"
5
#include "util/u_debug.h"
6
 
7
#include "pipe/p_shader_tokens.h"
8
#include "tgsi/tgsi_parse.h"
9
#include "tgsi/tgsi_dump.h"
10
#include "tgsi/tgsi_util.h"
11
#include "tgsi/tgsi_ureg.h"
12
 
13
#include "draw/draw_context.h"
14
 
15
#include "nv30-40_3d.xml.h"
16
#include "nv30_context.h"
17
#include "nv30_resource.h"
18
 
19
/* TODO (at least...):
20
 *  1. Indexed consts  + ARL
21
 *  3. NV_vp11, NV_vp2, NV_vp3 features
22
 *       - extra arith opcodes
23
 *       - branching
24
 *       - texture sampling
25
 *       - indexed attribs
26
 *       - indexed results
27
 *  4. bugs
28
 */
29
 
30
#include "nv30_vertprog.h"
31
#include "nv40_vertprog.h"
32
 
33
struct nvfx_loop_entry {
34
   unsigned brk_target;
35
   unsigned cont_target;
36
};
37
 
38
struct nvfx_vpc {
39
   struct nv30_context* nv30;
40
   struct pipe_shader_state pipe;
41
   struct nv30_vertprog *vp;
42
   struct tgsi_shader_info* info;
43
 
44
   struct nv30_vertprog_exec *vpi;
45
 
46
   unsigned r_temps;
47
   unsigned r_temps_discard;
48
   struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS];
49
   struct nvfx_reg *r_address;
50
   struct nvfx_reg *r_temp;
51
   struct nvfx_reg *r_const;
52
   struct nvfx_reg r_0_1;
53
 
54
   struct nvfx_reg *imm;
55
   unsigned nr_imm;
56
 
57
   int hpos_idx;
58
   int cvtx_idx;
59
 
60
   struct util_dynarray label_relocs;
61
   struct util_dynarray loop_stack;
62
};
63
 
64
static struct nvfx_reg
65
temp(struct nvfx_vpc *vpc)
66
{
67
   int idx = ffs(~vpc->r_temps) - 1;
68
 
69
   if (idx < 0) {
70
      NOUVEAU_ERR("out of temps!!\n");
71
      assert(0);
72
      return nvfx_reg(NVFXSR_TEMP, 0);
73
   }
74
 
75
   vpc->r_temps |= (1 << idx);
76
   vpc->r_temps_discard |= (1 << idx);
77
   return nvfx_reg(NVFXSR_TEMP, idx);
78
}
79
 
80
static inline void
81
release_temps(struct nvfx_vpc *vpc)
82
{
83
   vpc->r_temps &= ~vpc->r_temps_discard;
84
   vpc->r_temps_discard = 0;
85
}
86
 
87
static struct nvfx_reg
88
constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w)
89
{
90
   struct nv30_vertprog *vp = vpc->vp;
91
   struct nv30_vertprog_data *vpd;
92
   int idx;
93
 
94
   if (pipe >= 0) {
95
      for (idx = 0; idx < vp->nr_consts; idx++) {
96
         if (vp->consts[idx].index == pipe)
97
            return nvfx_reg(NVFXSR_CONST, idx);
98
      }
99
   }
100
 
101
   idx = vp->nr_consts++;
102
   vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts);
103
   vpd = &vp->consts[idx];
104
 
105
   vpd->index = pipe;
106
   vpd->value[0] = x;
107
   vpd->value[1] = y;
108
   vpd->value[2] = z;
109
   vpd->value[3] = w;
110
   return nvfx_reg(NVFXSR_CONST, idx);
111
}
112
 
113
#define arith(s,t,o,d,m,s0,s1,s2) \
114
   nvfx_insn((s), (NVFX_VP_INST_SLOT_##t << 7) | NVFX_VP_INST_##t##_OP_##o, -1, (d), (m), (s0), (s1), (s2))
115
 
116
static void
117
emit_src(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw,
118
         int pos, struct nvfx_src src)
119
{
120
   struct nv30_vertprog *vp = vpc->vp;
121
   uint32_t sr = 0;
122
   struct nvfx_relocation reloc;
123
 
124
   switch (src.reg.type) {
125
   case NVFXSR_TEMP:
126
      sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT));
127
      sr |= (src.reg.index << NVFX_VP(SRC_TEMP_SRC_SHIFT));
128
      break;
129
   case NVFXSR_INPUT:
130
      sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
131
             NVFX_VP(SRC_REG_TYPE_SHIFT));
132
      vp->ir |= (1 << src.reg.index);
133
      hw[1] |= (src.reg.index << NVFX_VP(INST_INPUT_SRC_SHIFT));
134
      break;
135
   case NVFXSR_CONST:
136
      sr |= (NVFX_VP(SRC_REG_TYPE_CONST) <<
137
             NVFX_VP(SRC_REG_TYPE_SHIFT));
138
      if (src.reg.index < 256 && src.reg.index >= -256) {
139
         reloc.location = vp->nr_insns - 1;
140
         reloc.target = src.reg.index;
141
         util_dynarray_append(&vp->const_relocs, struct nvfx_relocation, reloc);
142
      } else {
143
         hw[1] |= (src.reg.index << NVFX_VP(INST_CONST_SRC_SHIFT)) &
144
               NVFX_VP(INST_CONST_SRC_MASK);
145
      }
146
      break;
147
   case NVFXSR_NONE:
148
      sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) <<
149
             NVFX_VP(SRC_REG_TYPE_SHIFT));
150
      break;
151
   default:
152
      assert(0);
153
   }
154
 
155
   if (src.negate)
156
      sr |= NVFX_VP(SRC_NEGATE);
157
 
158
   if (src.abs)
159
      hw[0] |= (1 << (21 + pos));
160
 
161
   sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) |
162
          (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) |
163
          (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) |
164
          (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT)));
165
 
166
   if(src.indirect) {
167
      if(src.reg.type == NVFXSR_CONST)
168
         hw[3] |= NVFX_VP(INST_INDEX_CONST);
169
      else if(src.reg.type == NVFXSR_INPUT)
170
         hw[0] |= NVFX_VP(INST_INDEX_INPUT);
171
      else
172
         assert(0);
173
 
174
      if(src.indirect_reg)
175
         hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1);
176
      hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT);
177
   }
178
 
179
   switch (pos) {
180
   case 0:
181
      hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >>
182
           NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT);
183
      hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) <<
184
           NVFX_VP(INST_SRC0L_SHIFT);
185
      break;
186
   case 1:
187
      hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT);
188
      break;
189
   case 2:
190
      hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >>
191
           NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT);
192
      hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) <<
193
           NVFX_VP(INST_SRC2L_SHIFT);
194
      break;
195
   default:
196
      assert(0);
197
   }
198
}
199
 
200
static void
201
emit_dst(struct nv30_context *nv30, struct nvfx_vpc *vpc, uint32_t *hw,
202
         int slot, struct nvfx_reg dst)
203
{
204
   struct nv30_vertprog *vp = vpc->vp;
205
 
206
   switch (dst.type) {
207
   case NVFXSR_NONE:
208
      if(!nv30->is_nv4x)
209
         hw[0] |= NV30_VP_INST_DEST_TEMP_ID_MASK;
210
      else {
211
         hw[3] |= NV40_VP_INST_DEST_MASK;
212
         if (slot == 0)
213
            hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
214
         else
215
            hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
216
      }
217
      break;
218
   case NVFXSR_TEMP:
219
      if(!nv30->is_nv4x)
220
         hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT);
221
      else {
222
         hw[3] |= NV40_VP_INST_DEST_MASK;
223
         if (slot == 0)
224
            hw[0] |= (dst.index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
225
         else
226
            hw[3] |= (dst.index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
227
      }
228
      break;
229
   case NVFXSR_OUTPUT:
230
      /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */
231
      if(nv30->is_nv4x) {
232
         switch (dst.index) {
233
         case NV30_VP_INST_DEST_CLP(0):
234
            dst.index = NVFX_VP(INST_DEST_FOGC);
235
            vp->or   |= (1 << 6);
236
            break;
237
         case NV30_VP_INST_DEST_CLP(1):
238
            dst.index = NVFX_VP(INST_DEST_FOGC);
239
            vp->or   |= (1 << 7);
240
            break;
241
         case NV30_VP_INST_DEST_CLP(2):
242
            dst.index = NVFX_VP(INST_DEST_FOGC);
243
            vp->or   |= (1 << 8);
244
            break;
245
         case NV30_VP_INST_DEST_CLP(3):
246
            dst.index = NVFX_VP(INST_DEST_PSZ);
247
            vp->or   |= (1 << 9);
248
            break;
249
         case NV30_VP_INST_DEST_CLP(4):
250
            dst.index = NVFX_VP(INST_DEST_PSZ);
251
            vp->or   |= (1 << 10);
252
            break;
253
         case NV30_VP_INST_DEST_CLP(5):
254
            dst.index = NVFX_VP(INST_DEST_PSZ);
255
            vp->or   |= (1 << 11);
256
            break;
257
         case NV40_VP_INST_DEST_COL0: vp->or |= (1 << 0); break;
258
         case NV40_VP_INST_DEST_COL1: vp->or |= (1 << 1); break;
259
         case NV40_VP_INST_DEST_BFC0: vp->or |= (1 << 2); break;
260
         case NV40_VP_INST_DEST_BFC1: vp->or |= (1 << 3); break;
261
         case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break;
262
         case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
263
         }
264
      }
265
 
266
      if(!nv30->is_nv4x) {
267
         hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT);
268
         hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK;
269
 
270
         /*XXX: no way this is entirely correct, someone needs to
271
          *     figure out what exactly it is.
272
          */
273
         hw[3] |= 0x800;
274
      } else {
275
         hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
276
         if (slot == 0) {
277
            hw[0] |= NV40_VP_INST_VEC_RESULT;
278
            hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
279
         } else {
280
            hw[3] |= NV40_VP_INST_SCA_RESULT;
281
            hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
282
         }
283
      }
284
      break;
285
   default:
286
      assert(0);
287
   }
288
}
289
 
290
static void
291
nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn)
292
{
293
   struct nv30_context *nv30 = vpc->nv30;
294
   struct nv30_vertprog *vp = vpc->vp;
295
   unsigned slot = insn.op >> 7;
296
   unsigned op = insn.op & 0x7f;
297
   uint32_t *hw;
298
 
299
   vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi));
300
   vpc->vpi = &vp->insns[vp->nr_insns - 1];
301
   memset(vpc->vpi, 0, sizeof(*vpc->vpi));
302
 
303
   hw = vpc->vpi->data;
304
 
305
   if (insn.cc_test != NVFX_COND_TR)
306
      hw[0] |= NVFX_VP(INST_COND_TEST_ENABLE);
307
   hw[0] |= (insn.cc_test << NVFX_VP(INST_COND_SHIFT));
308
   hw[0] |= ((insn.cc_swz[0] << NVFX_VP(INST_COND_SWZ_X_SHIFT)) |
309
             (insn.cc_swz[1] << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) |
310
             (insn.cc_swz[2] << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) |
311
             (insn.cc_swz[3] << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
312
   if(insn.cc_update)
313
      hw[0] |= NVFX_VP(INST_COND_UPDATE_ENABLE);
314
 
315
   if(insn.sat) {
316
      assert(nv30->use_nv4x);
317
      if(nv30->use_nv4x)
318
         hw[0] |= NV40_VP_INST_SATURATE;
319
   }
320
 
321
   if(!nv30->is_nv4x) {
322
      if(slot == 0)
323
         hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
324
      else {
325
         hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT);
326
         hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT);
327
      }
328
//      hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
329
//      hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
330
 
331
      if (insn.dst.type == NVFXSR_OUTPUT) {
332
         if (slot)
333
            hw[3] |= (insn.mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT);
334
         else
335
            hw[3] |= (insn.mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT);
336
      } else {
337
         if (slot)
338
            hw[3] |= (insn.mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT);
339
         else
340
            hw[3] |= (insn.mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT);
341
      }
342
    } else {
343
      if (slot == 0) {
344
         hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
345
         hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
346
         hw[3] |= (insn.mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
347
       } else {
348
         hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
349
         hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK ;
350
         hw[3] |= (insn.mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
351
      }
352
   }
353
 
354
   emit_dst(nv30, vpc, hw, slot, insn.dst);
355
   emit_src(nv30, vpc, hw, 0, insn.src[0]);
356
   emit_src(nv30, vpc, hw, 1, insn.src[1]);
357
   emit_src(nv30, vpc, hw, 2, insn.src[2]);
358
 
359
//   if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL)
360
//      hw[3] |= NV40_VP_INST_SCA_RESULT;
361
}
362
 
363
static inline struct nvfx_src
364
tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
365
   struct nvfx_src src;
366
 
367
   switch (fsrc->Register.File) {
368
   case TGSI_FILE_INPUT:
369
      src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index);
370
      break;
371
   case TGSI_FILE_CONSTANT:
372
      if(fsrc->Register.Indirect) {
373
         src.reg = vpc->r_const[0];
374
         src.reg.index = fsrc->Register.Index;
375
      } else {
376
         src.reg = vpc->r_const[fsrc->Register.Index];
377
      }
378
      break;
379
   case TGSI_FILE_IMMEDIATE:
380
      src.reg = vpc->imm[fsrc->Register.Index];
381
      break;
382
   case TGSI_FILE_TEMPORARY:
383
      src.reg = vpc->r_temp[fsrc->Register.Index];
384
      break;
385
   default:
386
      NOUVEAU_ERR("bad src file\n");
387
      src.reg.index = 0;
388
      src.reg.type = -1;
389
      break;
390
   }
391
 
392
   src.abs = fsrc->Register.Absolute;
393
   src.negate = fsrc->Register.Negate;
394
   src.swz[0] = fsrc->Register.SwizzleX;
395
   src.swz[1] = fsrc->Register.SwizzleY;
396
   src.swz[2] = fsrc->Register.SwizzleZ;
397
   src.swz[3] = fsrc->Register.SwizzleW;
398
   src.indirect = 0;
399
   src.indirect_reg = 0;
400
   src.indirect_swz = 0;
401
 
402
   if(fsrc->Register.Indirect) {
403
      if(fsrc->Indirect.File == TGSI_FILE_ADDRESS &&
404
         (fsrc->Register.File == TGSI_FILE_CONSTANT ||
405
          fsrc->Register.File == TGSI_FILE_INPUT)) {
406
         src.indirect = 1;
407
         src.indirect_reg = fsrc->Indirect.Index;
408
         src.indirect_swz = fsrc->Indirect.Swizzle;
409
      } else {
410
         src.reg.index = 0;
411
         src.reg.type = -1;
412
      }
413
   }
414
 
415
   return src;
416
}
417
 
418
static INLINE struct nvfx_reg
419
tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
420
   struct nvfx_reg dst;
421
 
422
   switch (fdst->Register.File) {
423
   case TGSI_FILE_NULL:
424
      dst = nvfx_reg(NVFXSR_NONE, 0);
425
      break;
426
   case TGSI_FILE_OUTPUT:
427
      dst = vpc->r_result[fdst->Register.Index];
428
      break;
429
   case TGSI_FILE_TEMPORARY:
430
      dst = vpc->r_temp[fdst->Register.Index];
431
      break;
432
   case TGSI_FILE_ADDRESS:
433
      dst = vpc->r_address[fdst->Register.Index];
434
      break;
435
   default:
436
      NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File);
437
      dst.index = 0;
438
      dst.type = 0;
439
      break;
440
   }
441
 
442
   return dst;
443
}
444
 
445
static inline int
446
tgsi_mask(uint tgsi)
447
{
448
   int mask = 0;
449
 
450
   if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X;
451
   if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y;
452
   if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z;
453
   if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W;
454
   return mask;
455
}
456
 
457
static boolean
458
nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc,
459
            unsigned idx, const struct tgsi_full_instruction *finst)
460
{
461
   struct nvfx_src src[3], tmp;
462
   struct nvfx_reg dst;
463
   struct nvfx_reg final_dst;
464
   struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
465
   struct nvfx_insn insn;
466
   struct nvfx_relocation reloc;
467
   struct nvfx_loop_entry loop;
468
   boolean sat = FALSE;
469
   int mask;
470
   int ai = -1, ci = -1, ii = -1;
471
   int i;
472
   unsigned sub_depth = 0;
473
 
474
   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
475
      const struct tgsi_full_src_register *fsrc;
476
 
477
      fsrc = &finst->Src[i];
478
      if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
479
         src[i] = tgsi_src(vpc, fsrc);
480
      }
481
   }
482
 
483
   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
484
      const struct tgsi_full_src_register *fsrc;
485
 
486
      fsrc = &finst->Src[i];
487
 
488
      switch (fsrc->Register.File) {
489
      case TGSI_FILE_INPUT:
490
         if (ai == -1 || ai == fsrc->Register.Index) {
491
            ai = fsrc->Register.Index;
492
            src[i] = tgsi_src(vpc, fsrc);
493
         } else {
494
            src[i] = nvfx_src(temp(vpc));
495
            nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL,
496
                         tgsi_src(vpc, fsrc), none, none));
497
         }
498
         break;
499
      case TGSI_FILE_CONSTANT:
500
         if ((ci == -1 && ii == -1) ||
501
             ci == fsrc->Register.Index) {
502
            ci = fsrc->Register.Index;
503
            src[i] = tgsi_src(vpc, fsrc);
504
         } else {
505
            src[i] = nvfx_src(temp(vpc));
506
            nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL,
507
                         tgsi_src(vpc, fsrc), none, none));
508
         }
509
         break;
510
      case TGSI_FILE_IMMEDIATE:
511
         if ((ci == -1 && ii == -1) ||
512
             ii == fsrc->Register.Index) {
513
            ii = fsrc->Register.Index;
514
            src[i] = tgsi_src(vpc, fsrc);
515
         } else {
516
            src[i] = nvfx_src(temp(vpc));
517
            nvfx_vp_emit(vpc, arith(0, VEC, MOV, src[i].reg, NVFX_VP_MASK_ALL,
518
                         tgsi_src(vpc, fsrc), none, none));
519
         }
520
         break;
521
      case TGSI_FILE_TEMPORARY:
522
         /* handled above */
523
         break;
524
      default:
525
         NOUVEAU_ERR("bad src file\n");
526
         return FALSE;
527
      }
528
   }
529
 
530
   for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
531
      if(src[i].reg.type < 0)
532
         return FALSE;
533
   }
534
 
535
   if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS &&
536
      finst->Instruction.Opcode != TGSI_OPCODE_ARL)
537
      return FALSE;
538
 
539
   final_dst = dst  = tgsi_dst(vpc, &finst->Dst[0]);
540
   mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
541
   if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) {
542
      assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL);
543
      if (nv30->use_nv4x)
544
         sat = TRUE;
545
      else
546
      if(dst.type != NVFXSR_TEMP)
547
         dst = temp(vpc);
548
   }
549
 
550
   switch (finst->Instruction.Opcode) {
551
   case TGSI_OPCODE_ABS:
552
      nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, abs(src[0]), none, none));
553
      break;
554
   case TGSI_OPCODE_ADD:
555
      nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, src[1]));
556
      break;
557
   case TGSI_OPCODE_ARL:
558
      nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none));
559
      break;
560
   case TGSI_OPCODE_CEIL:
561
      tmp = nvfx_src(temp(vpc));
562
      nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none));
563
      nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none));
564
      break;
565
   case TGSI_OPCODE_CMP:
566
      insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none);
567
      insn.cc_update = 1;
568
      nvfx_vp_emit(vpc, insn);
569
 
570
      insn = arith(sat, VEC, MOV, dst, mask, src[2], none, none);
571
      insn.cc_test = NVFX_COND_GE;
572
      nvfx_vp_emit(vpc, insn);
573
 
574
      insn = arith(sat, VEC, MOV, dst, mask, src[1], none, none);
575
      insn.cc_test = NVFX_COND_LT;
576
      nvfx_vp_emit(vpc, insn);
577
      break;
578
   case TGSI_OPCODE_COS:
579
      nvfx_vp_emit(vpc, arith(sat, SCA, COS, dst, mask, none, none, src[0]));
580
      break;
581
   case TGSI_OPCODE_DP2:
582
      tmp = nvfx_src(temp(vpc));
583
      nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X | NVFX_VP_MASK_Y, src[0], src[1], none));
584
      nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, swz(tmp, X, X, X, X), none, swz(tmp, Y, Y, Y, Y)));
585
      break;
586
   case TGSI_OPCODE_DP3:
587
      nvfx_vp_emit(vpc, arith(sat, VEC, DP3, dst, mask, src[0], src[1], none));
588
      break;
589
   case TGSI_OPCODE_DP4:
590
      nvfx_vp_emit(vpc, arith(sat, VEC, DP4, dst, mask, src[0], src[1], none));
591
      break;
592
   case TGSI_OPCODE_DPH:
593
      nvfx_vp_emit(vpc, arith(sat, VEC, DPH, dst, mask, src[0], src[1], none));
594
      break;
595
   case TGSI_OPCODE_DST:
596
      nvfx_vp_emit(vpc, arith(sat, VEC, DST, dst, mask, src[0], src[1], none));
597
      break;
598
   case TGSI_OPCODE_EX2:
599
      nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, src[0]));
600
      break;
601
   case TGSI_OPCODE_EXP:
602
      nvfx_vp_emit(vpc, arith(sat, SCA, EXP, dst, mask, none, none, src[0]));
603
      break;
604
   case TGSI_OPCODE_FLR:
605
      nvfx_vp_emit(vpc, arith(sat, VEC, FLR, dst, mask, src[0], none, none));
606
      break;
607
   case TGSI_OPCODE_FRC:
608
      nvfx_vp_emit(vpc, arith(sat, VEC, FRC, dst, mask, src[0], none, none));
609
      break;
610
   case TGSI_OPCODE_LG2:
611
      nvfx_vp_emit(vpc, arith(sat, SCA, LG2, dst, mask, none, none, src[0]));
612
      break;
613
   case TGSI_OPCODE_LIT:
614
      nvfx_vp_emit(vpc, arith(sat, SCA, LIT, dst, mask, none, none, src[0]));
615
      break;
616
   case TGSI_OPCODE_LOG:
617
      nvfx_vp_emit(vpc, arith(sat, SCA, LOG, dst, mask, none, none, src[0]));
618
      break;
619
   case TGSI_OPCODE_LRP:
620
      tmp = nvfx_src(temp(vpc));
621
      nvfx_vp_emit(vpc, arith(0, VEC, MAD, tmp.reg, mask, neg(src[0]), src[2], src[2]));
622
      nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], tmp));
623
      break;
624
   case TGSI_OPCODE_MAD:
625
      nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, mask, src[0], src[1], src[2]));
626
      break;
627
   case TGSI_OPCODE_MAX:
628
      nvfx_vp_emit(vpc, arith(sat, VEC, MAX, dst, mask, src[0], src[1], none));
629
      break;
630
   case TGSI_OPCODE_MIN:
631
      nvfx_vp_emit(vpc, arith(sat, VEC, MIN, dst, mask, src[0], src[1], none));
632
      break;
633
   case TGSI_OPCODE_MOV:
634
      nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, src[0], none, none));
635
      break;
636
   case TGSI_OPCODE_MUL:
637
      nvfx_vp_emit(vpc, arith(sat, VEC, MUL, dst, mask, src[0], src[1], none));
638
      break;
639
   case TGSI_OPCODE_NOP:
640
      break;
641
   case TGSI_OPCODE_POW:
642
      tmp = nvfx_src(temp(vpc));
643
      nvfx_vp_emit(vpc, arith(0, SCA, LG2, tmp.reg, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)));
644
      nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none));
645
      nvfx_vp_emit(vpc, arith(sat, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)));
646
      break;
647
   case TGSI_OPCODE_RCP:
648
      nvfx_vp_emit(vpc, arith(sat, SCA, RCP, dst, mask, none, none, src[0]));
649
      break;
650
   case TGSI_OPCODE_RSQ:
651
      nvfx_vp_emit(vpc, arith(sat, SCA, RSQ, dst, mask, none, none, abs(src[0])));
652
      break;
653
   case TGSI_OPCODE_SEQ:
654
      nvfx_vp_emit(vpc, arith(sat, VEC, SEQ, dst, mask, src[0], src[1], none));
655
      break;
656
   case TGSI_OPCODE_SFL:
657
      nvfx_vp_emit(vpc, arith(sat, VEC, SFL, dst, mask, src[0], src[1], none));
658
      break;
659
   case TGSI_OPCODE_SGE:
660
      nvfx_vp_emit(vpc, arith(sat, VEC, SGE, dst, mask, src[0], src[1], none));
661
      break;
662
   case TGSI_OPCODE_SGT:
663
      nvfx_vp_emit(vpc, arith(sat, VEC, SGT, dst, mask, src[0], src[1], none));
664
      break;
665
   case TGSI_OPCODE_SIN:
666
      nvfx_vp_emit(vpc, arith(sat, SCA, SIN, dst, mask, none, none, src[0]));
667
      break;
668
   case TGSI_OPCODE_SLE:
669
      nvfx_vp_emit(vpc, arith(sat, VEC, SLE, dst, mask, src[0], src[1], none));
670
      break;
671
   case TGSI_OPCODE_SLT:
672
      nvfx_vp_emit(vpc, arith(sat, VEC, SLT, dst, mask, src[0], src[1], none));
673
      break;
674
   case TGSI_OPCODE_SNE:
675
      nvfx_vp_emit(vpc, arith(sat, VEC, SNE, dst, mask, src[0], src[1], none));
676
      break;
677
   case TGSI_OPCODE_SSG:
678
      nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], none, none));
679
      break;
680
   case TGSI_OPCODE_STR:
681
      nvfx_vp_emit(vpc, arith(sat, VEC, STR, dst, mask, src[0], src[1], none));
682
      break;
683
   case TGSI_OPCODE_SUB:
684
      nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1])));
685
      break;
686
   case TGSI_OPCODE_TRUNC:
687
      tmp = nvfx_src(temp(vpc));
688
      insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none);
689
      insn.cc_update = 1;
690
      nvfx_vp_emit(vpc, insn);
691
 
692
      nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, abs(src[0]), none, none));
693
      nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, tmp, none, none));
694
 
695
      insn = arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none);
696
      insn.cc_test = NVFX_COND_LT;
697
      nvfx_vp_emit(vpc, insn);
698
      break;
699
   case TGSI_OPCODE_XPD:
700
      tmp = nvfx_src(temp(vpc));
701
      nvfx_vp_emit(vpc, arith(0, VEC, MUL, tmp.reg, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none));
702
      nvfx_vp_emit(vpc, arith(sat, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)));
703
      break;
704
   case TGSI_OPCODE_IF:
705
      insn = arith(0, VEC, MOV, none.reg, NVFX_VP_MASK_X, src[0], none, none);
706
      insn.cc_update = 1;
707
      nvfx_vp_emit(vpc, insn);
708
 
709
      reloc.location = vpc->vp->nr_insns;
710
      reloc.target = finst->Label.Label + 1;
711
      util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
712
 
713
      insn = arith(0, SCA, BRA, none.reg, 0, none, none, none);
714
      insn.cc_test = NVFX_COND_EQ;
715
      insn.cc_swz[0] = insn.cc_swz[1] = insn.cc_swz[2] = insn.cc_swz[3] = 0;
716
      nvfx_vp_emit(vpc, insn);
717
      break;
718
   case TGSI_OPCODE_ELSE:
719
   case TGSI_OPCODE_BRA:
720
   case TGSI_OPCODE_CAL:
721
      reloc.location = vpc->vp->nr_insns;
722
      reloc.target = finst->Label.Label;
723
      util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
724
 
725
      if(finst->Instruction.Opcode == TGSI_OPCODE_CAL)
726
         insn = arith(0, SCA, CAL, none.reg, 0, none, none, none);
727
      else
728
         insn = arith(0, SCA, BRA, none.reg, 0, none, none, none);
729
      nvfx_vp_emit(vpc, insn);
730
      break;
731
   case TGSI_OPCODE_RET:
732
      if(sub_depth || !vpc->vp->enabled_ucps) {
733
         tmp = none;
734
         tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;
735
         nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp));
736
      } else {
737
         reloc.location = vpc->vp->nr_insns;
738
         reloc.target = vpc->info->num_instructions;
739
         util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
740
         nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
741
      }
742
      break;
743
   case TGSI_OPCODE_BGNSUB:
744
      ++sub_depth;
745
      break;
746
   case TGSI_OPCODE_ENDSUB:
747
      --sub_depth;
748
      break;
749
   case TGSI_OPCODE_ENDIF:
750
      /* nothing to do here */
751
      break;
752
   case TGSI_OPCODE_BGNLOOP:
753
      loop.cont_target = idx;
754
      loop.brk_target = finst->Label.Label + 1;
755
      util_dynarray_append(&vpc->loop_stack, struct nvfx_loop_entry, loop);
756
      break;
757
   case TGSI_OPCODE_ENDLOOP:
758
      loop = util_dynarray_pop(&vpc->loop_stack, struct nvfx_loop_entry);
759
 
760
      reloc.location = vpc->vp->nr_insns;
761
      reloc.target = loop.cont_target;
762
      util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
763
 
764
      nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
765
      break;
766
   case TGSI_OPCODE_CONT:
767
      loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
768
 
769
      reloc.location = vpc->vp->nr_insns;
770
      reloc.target = loop.cont_target;
771
      util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
772
 
773
      nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
774
      break;
775
   case TGSI_OPCODE_BRK:
776
      loop = util_dynarray_top(&vpc->loop_stack, struct nvfx_loop_entry);
777
 
778
      reloc.location = vpc->vp->nr_insns;
779
      reloc.target = loop.brk_target;
780
      util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
781
 
782
      nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
783
      break;
784
   case TGSI_OPCODE_END:
785
      assert(!sub_depth);
786
      if(vpc->vp->enabled_ucps) {
787
         if(idx != (vpc->info->num_instructions - 1)) {
788
            reloc.location = vpc->vp->nr_insns;
789
            reloc.target = vpc->info->num_instructions;
790
            util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
791
            nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
792
         }
793
      } else {
794
         if(vpc->vp->nr_insns)
795
            vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
796
         nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none));
797
         vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
798
      }
799
      break;
800
   default:
801
      NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
802
      return FALSE;
803
   }
804
 
805
   if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !nv30->use_nv4x) {
806
      if (!vpc->r_0_1.type)
807
         vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0);
808
      nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none));
809
      nvfx_vp_emit(vpc, arith(0, VEC, MIN, final_dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), Y, Y, Y, Y), none));
810
   }
811
 
812
   release_temps(vpc);
813
   return TRUE;
814
}
815
 
816
static boolean
817
nvfx_vertprog_parse_decl_output(struct nv30_context *nv30, struct nvfx_vpc *vpc,
818
                                const struct tgsi_full_declaration *fdec)
819
{
820
   unsigned num_texcoords = nv30->is_nv4x ? 10 : 8;
821
   unsigned idx = fdec->Range.First;
822
   unsigned semantic_index = fdec->Semantic.Index;
823
   int hw = 0, i;
824
 
825
   switch (fdec->Semantic.Name) {
826
   case TGSI_SEMANTIC_POSITION:
827
      hw = NVFX_VP(INST_DEST_POS);
828
      vpc->hpos_idx = idx;
829
      break;
830
   case TGSI_SEMANTIC_CLIPVERTEX:
831
      vpc->r_result[idx] = temp(vpc);
832
      vpc->r_temps_discard = 0;
833
      vpc->cvtx_idx = idx;
834
      return TRUE;
835
   case TGSI_SEMANTIC_COLOR:
836
      if (fdec->Semantic.Index == 0) {
837
         hw = NVFX_VP(INST_DEST_COL0);
838
      } else
839
      if (fdec->Semantic.Index == 1) {
840
         hw = NVFX_VP(INST_DEST_COL1);
841
      } else {
842
         NOUVEAU_ERR("bad colour semantic index\n");
843
         return FALSE;
844
      }
845
      break;
846
   case TGSI_SEMANTIC_BCOLOR:
847
      if (fdec->Semantic.Index == 0) {
848
         hw = NVFX_VP(INST_DEST_BFC0);
849
      } else
850
      if (fdec->Semantic.Index == 1) {
851
         hw = NVFX_VP(INST_DEST_BFC1);
852
      } else {
853
         NOUVEAU_ERR("bad bcolour semantic index\n");
854
         return FALSE;
855
      }
856
      break;
857
   case TGSI_SEMANTIC_FOG:
858
      hw = NVFX_VP(INST_DEST_FOGC);
859
      break;
860
   case TGSI_SEMANTIC_PSIZE:
861
      hw = NVFX_VP(INST_DEST_PSZ);
862
      break;
863
   case TGSI_SEMANTIC_GENERIC:
864
      /* this is really an identifier for VP/FP linkage */
865
      semantic_index += 8;
866
      /* fall through */
867
   case TGSI_SEMANTIC_TEXCOORD:
868
      for (i = 0; i < num_texcoords; i++) {
869
         if (vpc->vp->texcoord[i] == semantic_index) {
870
            hw = NVFX_VP(INST_DEST_TC(i));
871
            break;
872
         }
873
      }
874
 
875
      if (i == num_texcoords) {
876
         vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
877
         return TRUE;
878
      }
879
      break;
880
   case TGSI_SEMANTIC_EDGEFLAG:
881
      /* not really an error just a fallback */
882
      NOUVEAU_ERR("cannot handle edgeflag output\n");
883
      return FALSE;
884
   default:
885
      NOUVEAU_ERR("bad output semantic\n");
886
      return FALSE;
887
   }
888
 
889
   vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
890
   return TRUE;
891
}
892
 
893
static boolean
894
nvfx_vertprog_prepare(struct nv30_context *nv30, struct nvfx_vpc *vpc)
895
{
896
   struct tgsi_parse_context p;
897
   int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i;
898
 
899
   tgsi_parse_init(&p, vpc->pipe.tokens);
900
   while (!tgsi_parse_end_of_tokens(&p)) {
901
      const union tgsi_full_token *tok = &p.FullToken;
902
 
903
      tgsi_parse_token(&p);
904
      switch(tok->Token.Type) {
905
      case TGSI_TOKEN_TYPE_IMMEDIATE:
906
         nr_imm++;
907
         break;
908
      case TGSI_TOKEN_TYPE_DECLARATION:
909
      {
910
         const struct tgsi_full_declaration *fdec;
911
 
912
         fdec = &p.FullToken.FullDeclaration;
913
         switch (fdec->Declaration.File) {
914
         case TGSI_FILE_TEMPORARY:
915
            if (fdec->Range.Last > high_temp) {
916
               high_temp =
917
                  fdec->Range.Last;
918
            }
919
            break;
920
         case TGSI_FILE_ADDRESS:
921
            if (fdec->Range.Last > high_addr) {
922
               high_addr =
923
                  fdec->Range.Last;
924
            }
925
            break;
926
         case TGSI_FILE_CONSTANT:
927
            if (fdec->Range.Last > high_const) {
928
               high_const =
929
                     fdec->Range.Last;
930
            }
931
            break;
932
         case TGSI_FILE_OUTPUT:
933
            if (!nvfx_vertprog_parse_decl_output(nv30, vpc, fdec))
934
               return FALSE;
935
            break;
936
         default:
937
            break;
938
         }
939
      }
940
         break;
941
      default:
942
         break;
943
      }
944
   }
945
   tgsi_parse_free(&p);
946
 
947
   if (nr_imm) {
948
      vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_reg));
949
      assert(vpc->imm);
950
   }
951
 
952
   if (++high_temp) {
953
      vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_reg));
954
      for (i = 0; i < high_temp; i++)
955
         vpc->r_temp[i] = temp(vpc);
956
   }
957
 
958
   if (++high_addr) {
959
      vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg));
960
      for (i = 0; i < high_addr; i++)
961
         vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i);
962
   }
963
 
964
   if(++high_const) {
965
      vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg));
966
      for (i = 0; i < high_const; i++)
967
         vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0);
968
   }
969
 
970
   vpc->r_temps_discard = 0;
971
   return TRUE;
972
}
973
 
974
DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
975
 
976
boolean
977
_nvfx_vertprog_translate(struct nv30_context *nv30, struct nv30_vertprog *vp)
978
{
979
   struct tgsi_parse_context parse;
980
   struct nvfx_vpc *vpc = NULL;
981
   struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
982
   struct util_dynarray insns;
983
   int i, ucps;
984
 
985
   vp->translated = FALSE;
986
   vp->nr_insns = 0;
987
   vp->nr_consts = 0;
988
 
989
   vpc = CALLOC_STRUCT(nvfx_vpc);
990
   if (!vpc)
991
      return FALSE;
992
   vpc->nv30 = nv30;
993
   vpc->vp   = vp;
994
   vpc->pipe = vp->pipe;
995
   vpc->info = &vp->info;
996
   vpc->cvtx_idx = -1;
997
 
998
   if (!nvfx_vertprog_prepare(nv30, vpc)) {
999
      FREE(vpc);
1000
      return FALSE;
1001
   }
1002
 
1003
   /* Redirect post-transform vertex position to a temp if user clip
1004
    * planes are enabled.  We need to append code to the vtxprog
1005
    * to handle clip planes later.
1006
    */
1007
   if (vp->enabled_ucps && vpc->cvtx_idx < 0)  {
1008
      vpc->r_result[vpc->hpos_idx] = temp(vpc);
1009
      vpc->r_temps_discard = 0;
1010
      vpc->cvtx_idx = vpc->hpos_idx;
1011
   }
1012
 
1013
   util_dynarray_init(&insns);
1014
 
1015
   tgsi_parse_init(&parse, vp->pipe.tokens);
1016
   while (!tgsi_parse_end_of_tokens(&parse)) {
1017
      tgsi_parse_token(&parse);
1018
 
1019
      switch (parse.FullToken.Token.Type) {
1020
      case TGSI_TOKEN_TYPE_IMMEDIATE:
1021
      {
1022
         const struct tgsi_full_immediate *imm;
1023
 
1024
         imm = &parse.FullToken.FullImmediate;
1025
         assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
1026
         assert(imm->Immediate.NrTokens == 4 + 1);
1027
         vpc->imm[vpc->nr_imm++] =
1028
            constant(vpc, -1,
1029
                imm->u[0].Float,
1030
                imm->u[1].Float,
1031
                imm->u[2].Float,
1032
                imm->u[3].Float);
1033
      }
1034
         break;
1035
      case TGSI_TOKEN_TYPE_INSTRUCTION:
1036
      {
1037
         const struct tgsi_full_instruction *finst;
1038
         unsigned idx = insns.size >> 2;
1039
         util_dynarray_append(&insns, unsigned, vp->nr_insns);
1040
         finst = &parse.FullToken.FullInstruction;
1041
         if (!nvfx_vertprog_parse_instruction(nv30, vpc, idx, finst))
1042
            goto out;
1043
      }
1044
         break;
1045
      default:
1046
         break;
1047
      }
1048
   }
1049
 
1050
   util_dynarray_append(&insns, unsigned, vp->nr_insns);
1051
 
1052
   for(unsigned i = 0; i < vpc->label_relocs.size; i += sizeof(struct nvfx_relocation))
1053
   {
1054
      struct nvfx_relocation* label_reloc = (struct nvfx_relocation*)((char*)vpc->label_relocs.data + i);
1055
      struct nvfx_relocation hw_reloc;
1056
 
1057
      hw_reloc.location = label_reloc->location;
1058
      hw_reloc.target = ((unsigned*)insns.data)[label_reloc->target];
1059
 
1060
      //debug_printf("hw %u -> tgsi %u = hw %u\n", hw_reloc.location, label_reloc->target, hw_reloc.target);
1061
 
1062
      util_dynarray_append(&vp->branch_relocs, struct nvfx_relocation, hw_reloc);
1063
   }
1064
   util_dynarray_fini(&insns);
1065
   util_dynarray_trim(&vp->branch_relocs);
1066
 
1067
   /* XXX: what if we add a RET before?!  make sure we jump here...*/
1068
 
1069
   /* Write out HPOS if it was redirected to a temp earlier */
1070
   if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) {
1071
      struct nvfx_reg hpos = nvfx_reg(NVFXSR_OUTPUT,
1072
                  NVFX_VP(INST_DEST_POS));
1073
      struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]);
1074
 
1075
      nvfx_vp_emit(vpc, arith(0, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none));
1076
   }
1077
 
1078
   /* Insert code to handle user clip planes */
1079
   ucps = vp->enabled_ucps;
1080
   while (ucps) {
1081
      int i = ffs(ucps) - 1; ucps &= ~(1 << i);
1082
      struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i));
1083
      struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, 512 + i));
1084
      struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->cvtx_idx]);
1085
      unsigned mask;
1086
 
1087
      if(nv30->is_nv4x)
1088
      {
1089
         switch (i) {
1090
         case 0: case 3: mask = NVFX_VP_MASK_Y; break;
1091
         case 1: case 4: mask = NVFX_VP_MASK_Z; break;
1092
         case 2: case 5: mask = NVFX_VP_MASK_W; break;
1093
         default:
1094
            NOUVEAU_ERR("invalid clip dist #%d\n", i);
1095
            goto out;
1096
         }
1097
      }
1098
      else
1099
         mask = NVFX_VP_MASK_X;
1100
 
1101
      nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none));
1102
   }
1103
 
1104
   if (vpc->vp->nr_insns)
1105
      vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
1106
 
1107
   if(debug_get_option_nvfx_dump_vp())
1108
   {
1109
      debug_printf("\n");
1110
      tgsi_dump(vpc->pipe.tokens, 0);
1111
 
1112
      debug_printf("\n%s vertex program:\n", nv30->is_nv4x ? "nv4x" : "nv3x");
1113
      for (i = 0; i < vp->nr_insns; i++)
1114
         debug_printf("%3u: %08x %08x %08x %08x\n", i, vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]);
1115
      debug_printf("\n");
1116
   }
1117
 
1118
   vp->translated = TRUE;
1119
 
1120
out:
1121
   tgsi_parse_free(&parse);
1122
   if(vpc) {
1123
      util_dynarray_fini(&vpc->label_relocs);
1124
      util_dynarray_fini(&vpc->loop_stack);
1125
      FREE(vpc->r_temp);
1126
      FREE(vpc->r_address);
1127
      FREE(vpc->r_const);
1128
      FREE(vpc->imm);
1129
      FREE(vpc);
1130
   }
1131
 
1132
   return vp->translated;
1133
}