Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1901 serge 1
/*
2
 * Mesa 3-D graphics library
3
 * Version:  7.3
4
 *
5
 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the "Software"),
9
 * to deal in the Software without restriction, including without limitation
10
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11
 * and/or sell copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included
15
 * in all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24
 
25
/**
26
 * \file prog_execute.c
27
 * Software interpreter for vertex/fragment programs.
28
 * \author Brian Paul
29
 */
30
 
31
/*
32
 * NOTE: we do everything in single-precision floating point; we don't
33
 * currently observe the single/half/fixed-precision qualifiers.
34
 *
35
 */
36
 
37
 
38
#include "main/glheader.h"
39
#include "main/colormac.h"
40
#include "main/macros.h"
41
#include "prog_execute.h"
42
#include "prog_instruction.h"
43
#include "prog_parameter.h"
44
#include "prog_print.h"
45
#include "prog_noise.h"
46
 
47
 
48
/* debug predicate */
49
#define DEBUG_PROG 0
50
 
51
 
52
/**
53
 * Set x to positive or negative infinity.
54
 */
55
#if defined(USE_IEEE) || defined(_WIN32)
56
#define SET_POS_INFINITY(x)                  \
57
   do {                                      \
58
         fi_type fi;                         \
59
         fi.i = 0x7F800000;                  \
60
         x = fi.f;                           \
61
   } while (0)
62
#define SET_NEG_INFINITY(x)                  \
63
   do {                                      \
64
         fi_type fi;                         \
65
         fi.i = 0xFF800000;                  \
66
         x = fi.f;                           \
67
   } while (0)
68
#elif defined(VMS)
69
#define SET_POS_INFINITY(x)  x = __MAXFLOAT
70
#define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
71
#else
72
#define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
73
#define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
74
#endif
75
 
76
#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
77
 
78
 
79
static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
80
 
81
 
82
 
83
/**
84
 * Return TRUE for +0 and other positive values, FALSE otherwise.
85
 * Used for RCC opcode.
86
 */
87
static INLINE GLboolean
88
positive(float x)
89
{
90
   fi_type fi;
91
   fi.f = x;
92
   if (fi.i & 0x80000000)
93
      return GL_FALSE;
94
   return GL_TRUE;
95
}
96
 
97
 
98
 
99
/**
100
 * Return a pointer to the 4-element float vector specified by the given
101
 * source register.
102
 */
103
static INLINE const GLfloat *
104
get_src_register_pointer(const struct prog_src_register *source,
105
                         const struct gl_program_machine *machine)
106
{
107
   const struct gl_program *prog = machine->CurProgram;
108
   GLint reg = source->Index;
109
 
110
   if (source->RelAddr) {
111
      /* add address register value to src index/offset */
112
      reg += machine->AddressReg[0][0];
113
      if (reg < 0) {
114
         return ZeroVec;
115
      }
116
   }
117
 
118
   switch (source->File) {
119
   case PROGRAM_TEMPORARY:
120
      if (reg >= MAX_PROGRAM_TEMPS)
121
         return ZeroVec;
122
      return machine->Temporaries[reg];
123
 
124
   case PROGRAM_INPUT:
125
      if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
126
         if (reg >= VERT_ATTRIB_MAX)
127
            return ZeroVec;
128
         return machine->VertAttribs[reg];
129
      }
130
      else {
131
         if (reg >= FRAG_ATTRIB_MAX)
132
            return ZeroVec;
133
         return machine->Attribs[reg][machine->CurElement];
134
      }
135
 
136
   case PROGRAM_OUTPUT:
137
      if (reg >= MAX_PROGRAM_OUTPUTS)
138
         return ZeroVec;
139
      return machine->Outputs[reg];
140
 
141
   case PROGRAM_LOCAL_PARAM:
142
      if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
143
         return ZeroVec;
144
      return machine->CurProgram->LocalParams[reg];
145
 
146
   case PROGRAM_ENV_PARAM:
147
      if (reg >= MAX_PROGRAM_ENV_PARAMS)
148
         return ZeroVec;
149
      return machine->EnvParams[reg];
150
 
151
   case PROGRAM_STATE_VAR:
152
      /* Fallthrough */
153
   case PROGRAM_CONSTANT:
154
      /* Fallthrough */
155
   case PROGRAM_UNIFORM:
156
      /* Fallthrough */
157
   case PROGRAM_NAMED_PARAM:
158
      if (reg >= (GLint) prog->Parameters->NumParameters)
159
         return ZeroVec;
160
      return prog->Parameters->ParameterValues[reg];
161
 
162
   default:
163
      _mesa_problem(NULL,
164
         "Invalid src register file %d in get_src_register_pointer()",
165
         source->File);
166
      return NULL;
167
   }
168
}
169
 
170
 
171
/**
172
 * Return a pointer to the 4-element float vector specified by the given
173
 * destination register.
174
 */
175
static INLINE GLfloat *
176
get_dst_register_pointer(const struct prog_dst_register *dest,
177
                         struct gl_program_machine *machine)
178
{
179
   static GLfloat dummyReg[4];
180
   GLint reg = dest->Index;
181
 
182
   if (dest->RelAddr) {
183
      /* add address register value to src index/offset */
184
      reg += machine->AddressReg[0][0];
185
      if (reg < 0) {
186
         return dummyReg;
187
      }
188
   }
189
 
190
   switch (dest->File) {
191
   case PROGRAM_TEMPORARY:
192
      if (reg >= MAX_PROGRAM_TEMPS)
193
         return dummyReg;
194
      return machine->Temporaries[reg];
195
 
196
   case PROGRAM_OUTPUT:
197
      if (reg >= MAX_PROGRAM_OUTPUTS)
198
         return dummyReg;
199
      return machine->Outputs[reg];
200
 
201
   case PROGRAM_WRITE_ONLY:
202
      return dummyReg;
203
 
204
   default:
205
      _mesa_problem(NULL,
206
         "Invalid dest register file %d in get_dst_register_pointer()",
207
         dest->File);
208
      return NULL;
209
   }
210
}
211
 
212
 
213
 
214
/**
215
 * Fetch a 4-element float vector from the given source register.
216
 * Apply swizzling and negating as needed.
217
 */
218
static void
219
fetch_vector4(const struct prog_src_register *source,
220
              const struct gl_program_machine *machine, GLfloat result[4])
221
{
222
   const GLfloat *src = get_src_register_pointer(source, machine);
223
   ASSERT(src);
224
 
225
   if (source->Swizzle == SWIZZLE_NOOP) {
226
      /* no swizzling */
227
      COPY_4V(result, src);
228
   }
229
   else {
230
      ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
231
      ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
232
      ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
233
      ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
234
      result[0] = src[GET_SWZ(source->Swizzle, 0)];
235
      result[1] = src[GET_SWZ(source->Swizzle, 1)];
236
      result[2] = src[GET_SWZ(source->Swizzle, 2)];
237
      result[3] = src[GET_SWZ(source->Swizzle, 3)];
238
   }
239
 
240
   if (source->Abs) {
241
      result[0] = FABSF(result[0]);
242
      result[1] = FABSF(result[1]);
243
      result[2] = FABSF(result[2]);
244
      result[3] = FABSF(result[3]);
245
   }
246
   if (source->Negate) {
247
      ASSERT(source->Negate == NEGATE_XYZW);
248
      result[0] = -result[0];
249
      result[1] = -result[1];
250
      result[2] = -result[2];
251
      result[3] = -result[3];
252
   }
253
 
254
#ifdef NAN_CHECK
255
   assert(!IS_INF_OR_NAN(result[0]));
256
   assert(!IS_INF_OR_NAN(result[0]));
257
   assert(!IS_INF_OR_NAN(result[0]));
258
   assert(!IS_INF_OR_NAN(result[0]));
259
#endif
260
}
261
 
262
 
263
/**
264
 * Fetch a 4-element uint vector from the given source register.
265
 * Apply swizzling but not negation/abs.
266
 */
267
static void
268
fetch_vector4ui(const struct prog_src_register *source,
269
                const struct gl_program_machine *machine, GLuint result[4])
270
{
271
   const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
272
   ASSERT(src);
273
 
274
   if (source->Swizzle == SWIZZLE_NOOP) {
275
      /* no swizzling */
276
      COPY_4V(result, src);
277
   }
278
   else {
279
      ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
280
      ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
281
      ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
282
      ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
283
      result[0] = src[GET_SWZ(source->Swizzle, 0)];
284
      result[1] = src[GET_SWZ(source->Swizzle, 1)];
285
      result[2] = src[GET_SWZ(source->Swizzle, 2)];
286
      result[3] = src[GET_SWZ(source->Swizzle, 3)];
287
   }
288
 
289
   /* Note: no Negate or Abs here */
290
}
291
 
292
 
293
 
294
/**
295
 * Fetch the derivative with respect to X or Y for the given register.
296
 * XXX this currently only works for fragment program input attribs.
297
 */
298
static void
299
fetch_vector4_deriv(struct gl_context * ctx,
300
                    const struct prog_src_register *source,
301
                    const struct gl_program_machine *machine,
302
                    char xOrY, GLfloat result[4])
303
{
304
   if (source->File == PROGRAM_INPUT &&
305
       source->Index < (GLint) machine->NumDeriv) {
306
      const GLint col = machine->CurElement;
307
      const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
308
      const GLfloat invQ = 1.0f / w;
309
      GLfloat deriv[4];
310
 
311
      if (xOrY == 'X') {
312
         deriv[0] = machine->DerivX[source->Index][0] * invQ;
313
         deriv[1] = machine->DerivX[source->Index][1] * invQ;
314
         deriv[2] = machine->DerivX[source->Index][2] * invQ;
315
         deriv[3] = machine->DerivX[source->Index][3] * invQ;
316
      }
317
      else {
318
         deriv[0] = machine->DerivY[source->Index][0] * invQ;
319
         deriv[1] = machine->DerivY[source->Index][1] * invQ;
320
         deriv[2] = machine->DerivY[source->Index][2] * invQ;
321
         deriv[3] = machine->DerivY[source->Index][3] * invQ;
322
      }
323
 
324
      result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
325
      result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
326
      result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
327
      result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
328
 
329
      if (source->Abs) {
330
         result[0] = FABSF(result[0]);
331
         result[1] = FABSF(result[1]);
332
         result[2] = FABSF(result[2]);
333
         result[3] = FABSF(result[3]);
334
      }
335
      if (source->Negate) {
336
         ASSERT(source->Negate == NEGATE_XYZW);
337
         result[0] = -result[0];
338
         result[1] = -result[1];
339
         result[2] = -result[2];
340
         result[3] = -result[3];
341
      }
342
   }
343
   else {
344
      ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
345
   }
346
}
347
 
348
 
349
/**
350
 * As above, but only return result[0] element.
351
 */
352
static void
353
fetch_vector1(const struct prog_src_register *source,
354
              const struct gl_program_machine *machine, GLfloat result[4])
355
{
356
   const GLfloat *src = get_src_register_pointer(source, machine);
357
   ASSERT(src);
358
 
359
   result[0] = src[GET_SWZ(source->Swizzle, 0)];
360
 
361
   if (source->Abs) {
362
      result[0] = FABSF(result[0]);
363
   }
364
   if (source->Negate) {
365
      result[0] = -result[0];
366
   }
367
}
368
 
369
 
370
static GLuint
371
fetch_vector1ui(const struct prog_src_register *source,
372
                const struct gl_program_machine *machine)
373
{
374
   const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
375
   return src[GET_SWZ(source->Swizzle, 0)];
376
}
377
 
378
 
379
/**
380
 * Fetch texel from texture.  Use partial derivatives when possible.
381
 */
382
static INLINE void
383
fetch_texel(struct gl_context *ctx,
384
            const struct gl_program_machine *machine,
385
            const struct prog_instruction *inst,
386
            const GLfloat texcoord[4], GLfloat lodBias,
387
            GLfloat color[4])
388
{
389
   const GLuint unit = machine->Samplers[inst->TexSrcUnit];
390
 
391
   /* Note: we only have the right derivatives for fragment input attribs.
392
    */
393
   if (machine->NumDeriv > 0 &&
394
       inst->SrcReg[0].File == PROGRAM_INPUT &&
395
       inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
396
      /* simple texture fetch for which we should have derivatives */
397
      GLuint attr = inst->SrcReg[0].Index;
398
      machine->FetchTexelDeriv(ctx, texcoord,
399
                               machine->DerivX[attr],
400
                               machine->DerivY[attr],
401
                               lodBias, unit, color);
402
   }
403
   else {
404
      machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
405
   }
406
}
407
 
408
 
409
/**
410
 * Test value against zero and return GT, LT, EQ or UN if NaN.
411
 */
412
static INLINE GLuint
413
generate_cc(float value)
414
{
415
   if (value != value)
416
      return COND_UN;           /* NaN */
417
   if (value > 0.0F)
418
      return COND_GT;
419
   if (value < 0.0F)
420
      return COND_LT;
421
   return COND_EQ;
422
}
423
 
424
 
425
/**
426
 * Test if the ccMaskRule is satisfied by the given condition code.
427
 * Used to mask destination writes according to the current condition code.
428
 */
429
static INLINE GLboolean
430
test_cc(GLuint condCode, GLuint ccMaskRule)
431
{
432
   switch (ccMaskRule) {
433
   case COND_EQ: return (condCode == COND_EQ);
434
   case COND_NE: return (condCode != COND_EQ);
435
   case COND_LT: return (condCode == COND_LT);
436
   case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
437
   case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
438
   case COND_GT: return (condCode == COND_GT);
439
   case COND_TR: return GL_TRUE;
440
   case COND_FL: return GL_FALSE;
441
   default:      return GL_TRUE;
442
   }
443
}
444
 
445
 
446
/**
447
 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
448
 * or GL_FALSE to indicate result.
449
 */
450
static INLINE GLboolean
451
eval_condition(const struct gl_program_machine *machine,
452
               const struct prog_instruction *inst)
453
{
454
   const GLuint swizzle = inst->DstReg.CondSwizzle;
455
   const GLuint condMask = inst->DstReg.CondMask;
456
   if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
457
       test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
458
       test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
459
       test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
460
      return GL_TRUE;
461
   }
462
   else {
463
      return GL_FALSE;
464
   }
465
}
466
 
467
 
468
 
469
/**
470
 * Store 4 floats into a register.  Observe the instructions saturate and
471
 * set-condition-code flags.
472
 */
473
static void
474
store_vector4(const struct prog_instruction *inst,
475
              struct gl_program_machine *machine, const GLfloat value[4])
476
{
477
   const struct prog_dst_register *dstReg = &(inst->DstReg);
478
   const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
479
   GLuint writeMask = dstReg->WriteMask;
480
   GLfloat clampedValue[4];
481
   GLfloat *dst = get_dst_register_pointer(dstReg, machine);
482
 
483
#if 0
484
   if (value[0] > 1.0e10 ||
485
       IS_INF_OR_NAN(value[0]) ||
486
       IS_INF_OR_NAN(value[1]) ||
487
       IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
488
      printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
489
#endif
490
 
491
   if (clamp) {
492
      clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
493
      clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
494
      clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
495
      clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
496
      value = clampedValue;
497
   }
498
 
499
   if (dstReg->CondMask != COND_TR) {
500
      /* condition codes may turn off some writes */
501
      if (writeMask & WRITEMASK_X) {
502
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
503
                      dstReg->CondMask))
504
            writeMask &= ~WRITEMASK_X;
505
      }
506
      if (writeMask & WRITEMASK_Y) {
507
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
508
                      dstReg->CondMask))
509
            writeMask &= ~WRITEMASK_Y;
510
      }
511
      if (writeMask & WRITEMASK_Z) {
512
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
513
                      dstReg->CondMask))
514
            writeMask &= ~WRITEMASK_Z;
515
      }
516
      if (writeMask & WRITEMASK_W) {
517
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
518
                      dstReg->CondMask))
519
            writeMask &= ~WRITEMASK_W;
520
      }
521
   }
522
 
523
#ifdef NAN_CHECK
524
   assert(!IS_INF_OR_NAN(value[0]));
525
   assert(!IS_INF_OR_NAN(value[0]));
526
   assert(!IS_INF_OR_NAN(value[0]));
527
   assert(!IS_INF_OR_NAN(value[0]));
528
#endif
529
 
530
   if (writeMask & WRITEMASK_X)
531
      dst[0] = value[0];
532
   if (writeMask & WRITEMASK_Y)
533
      dst[1] = value[1];
534
   if (writeMask & WRITEMASK_Z)
535
      dst[2] = value[2];
536
   if (writeMask & WRITEMASK_W)
537
      dst[3] = value[3];
538
 
539
   if (inst->CondUpdate) {
540
      if (writeMask & WRITEMASK_X)
541
         machine->CondCodes[0] = generate_cc(value[0]);
542
      if (writeMask & WRITEMASK_Y)
543
         machine->CondCodes[1] = generate_cc(value[1]);
544
      if (writeMask & WRITEMASK_Z)
545
         machine->CondCodes[2] = generate_cc(value[2]);
546
      if (writeMask & WRITEMASK_W)
547
         machine->CondCodes[3] = generate_cc(value[3]);
548
#if DEBUG_PROG
549
      printf("CondCodes=(%s,%s,%s,%s) for:\n",
550
             _mesa_condcode_string(machine->CondCodes[0]),
551
             _mesa_condcode_string(machine->CondCodes[1]),
552
             _mesa_condcode_string(machine->CondCodes[2]),
553
             _mesa_condcode_string(machine->CondCodes[3]));
554
#endif
555
   }
556
}
557
 
558
 
559
/**
560
 * Store 4 uints into a register.  Observe the set-condition-code flags.
561
 */
562
static void
563
store_vector4ui(const struct prog_instruction *inst,
564
                struct gl_program_machine *machine, const GLuint value[4])
565
{
566
   const struct prog_dst_register *dstReg = &(inst->DstReg);
567
   GLuint writeMask = dstReg->WriteMask;
568
   GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
569
 
570
   if (dstReg->CondMask != COND_TR) {
571
      /* condition codes may turn off some writes */
572
      if (writeMask & WRITEMASK_X) {
573
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
574
                      dstReg->CondMask))
575
            writeMask &= ~WRITEMASK_X;
576
      }
577
      if (writeMask & WRITEMASK_Y) {
578
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
579
                      dstReg->CondMask))
580
            writeMask &= ~WRITEMASK_Y;
581
      }
582
      if (writeMask & WRITEMASK_Z) {
583
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
584
                      dstReg->CondMask))
585
            writeMask &= ~WRITEMASK_Z;
586
      }
587
      if (writeMask & WRITEMASK_W) {
588
         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
589
                      dstReg->CondMask))
590
            writeMask &= ~WRITEMASK_W;
591
      }
592
   }
593
 
594
   if (writeMask & WRITEMASK_X)
595
      dst[0] = value[0];
596
   if (writeMask & WRITEMASK_Y)
597
      dst[1] = value[1];
598
   if (writeMask & WRITEMASK_Z)
599
      dst[2] = value[2];
600
   if (writeMask & WRITEMASK_W)
601
      dst[3] = value[3];
602
 
603
   if (inst->CondUpdate) {
604
      if (writeMask & WRITEMASK_X)
605
         machine->CondCodes[0] = generate_cc((float)value[0]);
606
      if (writeMask & WRITEMASK_Y)
607
         machine->CondCodes[1] = generate_cc((float)value[1]);
608
      if (writeMask & WRITEMASK_Z)
609
         machine->CondCodes[2] = generate_cc((float)value[2]);
610
      if (writeMask & WRITEMASK_W)
611
         machine->CondCodes[3] = generate_cc((float)value[3]);
612
#if DEBUG_PROG
613
      printf("CondCodes=(%s,%s,%s,%s) for:\n",
614
             _mesa_condcode_string(machine->CondCodes[0]),
615
             _mesa_condcode_string(machine->CondCodes[1]),
616
             _mesa_condcode_string(machine->CondCodes[2]),
617
             _mesa_condcode_string(machine->CondCodes[3]));
618
#endif
619
   }
620
}
621
 
622
 
623
 
624
/**
625
 * Execute the given vertex/fragment program.
626
 *
627
 * \param ctx  rendering context
628
 * \param program  the program to execute
629
 * \param machine  machine state (must be initialized)
630
 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
631
 */
632
GLboolean
633
_mesa_execute_program(struct gl_context * ctx,
634
                      const struct gl_program *program,
635
                      struct gl_program_machine *machine)
636
{
637
   const GLuint numInst = program->NumInstructions;
638
   const GLuint maxExec = 10000;
639
   GLuint pc, numExec = 0;
640
 
641
   machine->CurProgram = program;
642
 
643
   if (DEBUG_PROG) {
644
      printf("execute program %u --------------------\n", program->Id);
645
   }
646
 
647
   if (program->Target == GL_VERTEX_PROGRAM_ARB) {
648
      machine->EnvParams = ctx->VertexProgram.Parameters;
649
   }
650
   else {
651
      machine->EnvParams = ctx->FragmentProgram.Parameters;
652
   }
653
 
654
   for (pc = 0; pc < numInst; pc++) {
655
      const struct prog_instruction *inst = program->Instructions + pc;
656
 
657
      if (DEBUG_PROG) {
658
         _mesa_print_instruction(inst);
659
      }
660
 
661
      switch (inst->Opcode) {
662
      case OPCODE_ABS:
663
         {
664
            GLfloat a[4], result[4];
665
            fetch_vector4(&inst->SrcReg[0], machine, a);
666
            result[0] = FABSF(a[0]);
667
            result[1] = FABSF(a[1]);
668
            result[2] = FABSF(a[2]);
669
            result[3] = FABSF(a[3]);
670
            store_vector4(inst, machine, result);
671
         }
672
         break;
673
      case OPCODE_ADD:
674
         {
675
            GLfloat a[4], b[4], result[4];
676
            fetch_vector4(&inst->SrcReg[0], machine, a);
677
            fetch_vector4(&inst->SrcReg[1], machine, b);
678
            result[0] = a[0] + b[0];
679
            result[1] = a[1] + b[1];
680
            result[2] = a[2] + b[2];
681
            result[3] = a[3] + b[3];
682
            store_vector4(inst, machine, result);
683
            if (DEBUG_PROG) {
684
               printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
685
                      result[0], result[1], result[2], result[3],
686
                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
687
            }
688
         }
689
         break;
690
      case OPCODE_AND:     /* bitwise AND */
691
         {
692
            GLuint a[4], b[4], result[4];
693
            fetch_vector4ui(&inst->SrcReg[0], machine, a);
694
            fetch_vector4ui(&inst->SrcReg[1], machine, b);
695
            result[0] = a[0] & b[0];
696
            result[1] = a[1] & b[1];
697
            result[2] = a[2] & b[2];
698
            result[3] = a[3] & b[3];
699
            store_vector4ui(inst, machine, result);
700
         }
701
         break;
702
      case OPCODE_ARL:
703
         {
704
            GLfloat t[4];
705
            fetch_vector4(&inst->SrcReg[0], machine, t);
706
            machine->AddressReg[0][0] = IFLOOR(t[0]);
707
            if (DEBUG_PROG) {
708
               printf("ARL %d\n", machine->AddressReg[0][0]);
709
            }
710
         }
711
         break;
712
      case OPCODE_BGNLOOP:
713
         /* no-op */
714
         ASSERT(program->Instructions[inst->BranchTarget].Opcode
715
                == OPCODE_ENDLOOP);
716
         break;
717
      case OPCODE_ENDLOOP:
718
         /* subtract 1 here since pc is incremented by for(pc) loop */
719
         ASSERT(program->Instructions[inst->BranchTarget].Opcode
720
                == OPCODE_BGNLOOP);
721
         pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
722
         break;
723
      case OPCODE_BGNSUB:      /* begin subroutine */
724
         break;
725
      case OPCODE_ENDSUB:      /* end subroutine */
726
         break;
727
      case OPCODE_BRA:         /* branch (conditional) */
728
         if (eval_condition(machine, inst)) {
729
            /* take branch */
730
            /* Subtract 1 here since we'll do pc++ below */
731
            pc = inst->BranchTarget - 1;
732
         }
733
         break;
734
      case OPCODE_BRK:         /* break out of loop (conditional) */
735
         ASSERT(program->Instructions[inst->BranchTarget].Opcode
736
                == OPCODE_ENDLOOP);
737
         if (eval_condition(machine, inst)) {
738
            /* break out of loop */
739
            /* pc++ at end of for-loop will put us after the ENDLOOP inst */
740
            pc = inst->BranchTarget;
741
         }
742
         break;
743
      case OPCODE_CONT:        /* continue loop (conditional) */
744
         ASSERT(program->Instructions[inst->BranchTarget].Opcode
745
                == OPCODE_ENDLOOP);
746
         if (eval_condition(machine, inst)) {
747
            /* continue at ENDLOOP */
748
            /* Subtract 1 here since we'll do pc++ at end of for-loop */
749
            pc = inst->BranchTarget - 1;
750
         }
751
         break;
752
      case OPCODE_CAL:         /* Call subroutine (conditional) */
753
         if (eval_condition(machine, inst)) {
754
            /* call the subroutine */
755
            if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
756
               return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
757
            }
758
            machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
759
            /* Subtract 1 here since we'll do pc++ at end of for-loop */
760
            pc = inst->BranchTarget - 1;
761
         }
762
         break;
763
      case OPCODE_CMP:
764
         {
765
            GLfloat a[4], b[4], c[4], result[4];
766
            fetch_vector4(&inst->SrcReg[0], machine, a);
767
            fetch_vector4(&inst->SrcReg[1], machine, b);
768
            fetch_vector4(&inst->SrcReg[2], machine, c);
769
            result[0] = a[0] < 0.0F ? b[0] : c[0];
770
            result[1] = a[1] < 0.0F ? b[1] : c[1];
771
            result[2] = a[2] < 0.0F ? b[2] : c[2];
772
            result[3] = a[3] < 0.0F ? b[3] : c[3];
773
            store_vector4(inst, machine, result);
774
            if (DEBUG_PROG) {
775
               printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
776
                      result[0], result[1], result[2], result[3],
777
                      a[0], a[1], a[2], a[3],
778
                      b[0], b[1], b[2], b[3],
779
                      c[0], c[1], c[2], c[3]);
780
            }
781
         }
782
         break;
783
      case OPCODE_COS:
784
         {
785
            GLfloat a[4], result[4];
786
            fetch_vector1(&inst->SrcReg[0], machine, a);
787
            result[0] = result[1] = result[2] = result[3]
788
               = (GLfloat) cos(a[0]);
789
            store_vector4(inst, machine, result);
790
         }
791
         break;
792
      case OPCODE_DDX:         /* Partial derivative with respect to X */
793
         {
794
            GLfloat result[4];
795
            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
796
                                'X', result);
797
            store_vector4(inst, machine, result);
798
         }
799
         break;
800
      case OPCODE_DDY:         /* Partial derivative with respect to Y */
801
         {
802
            GLfloat result[4];
803
            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
804
                                'Y', result);
805
            store_vector4(inst, machine, result);
806
         }
807
         break;
808
      case OPCODE_DP2:
809
         {
810
            GLfloat a[4], b[4], result[4];
811
            fetch_vector4(&inst->SrcReg[0], machine, a);
812
            fetch_vector4(&inst->SrcReg[1], machine, b);
813
            result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
814
            store_vector4(inst, machine, result);
815
            if (DEBUG_PROG) {
816
               printf("DP2 %g = (%g %g) . (%g %g)\n",
817
                      result[0], a[0], a[1], b[0], b[1]);
818
            }
819
         }
820
         break;
821
      case OPCODE_DP2A:
822
         {
823
            GLfloat a[4], b[4], c, result[4];
824
            fetch_vector4(&inst->SrcReg[0], machine, a);
825
            fetch_vector4(&inst->SrcReg[1], machine, b);
826
            fetch_vector1(&inst->SrcReg[1], machine, &c);
827
            result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
828
            store_vector4(inst, machine, result);
829
            if (DEBUG_PROG) {
830
               printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
831
                      result[0], a[0], a[1], b[0], b[1], c);
832
            }
833
         }
834
         break;
835
      case OPCODE_DP3:
836
         {
837
            GLfloat a[4], b[4], result[4];
838
            fetch_vector4(&inst->SrcReg[0], machine, a);
839
            fetch_vector4(&inst->SrcReg[1], machine, b);
840
            result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
841
            store_vector4(inst, machine, result);
842
            if (DEBUG_PROG) {
843
               printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
844
                      result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
845
            }
846
         }
847
         break;
848
      case OPCODE_DP4:
849
         {
850
            GLfloat a[4], b[4], result[4];
851
            fetch_vector4(&inst->SrcReg[0], machine, a);
852
            fetch_vector4(&inst->SrcReg[1], machine, b);
853
            result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
854
            store_vector4(inst, machine, result);
855
            if (DEBUG_PROG) {
856
               printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
857
                      result[0], a[0], a[1], a[2], a[3],
858
                      b[0], b[1], b[2], b[3]);
859
            }
860
         }
861
         break;
862
      case OPCODE_DPH:
863
         {
864
            GLfloat a[4], b[4], result[4];
865
            fetch_vector4(&inst->SrcReg[0], machine, a);
866
            fetch_vector4(&inst->SrcReg[1], machine, b);
867
            result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
868
            store_vector4(inst, machine, result);
869
         }
870
         break;
871
      case OPCODE_DST:         /* Distance vector */
872
         {
873
            GLfloat a[4], b[4], result[4];
874
            fetch_vector4(&inst->SrcReg[0], machine, a);
875
            fetch_vector4(&inst->SrcReg[1], machine, b);
876
            result[0] = 1.0F;
877
            result[1] = a[1] * b[1];
878
            result[2] = a[2];
879
            result[3] = b[3];
880
            store_vector4(inst, machine, result);
881
         }
882
         break;
883
      case OPCODE_EXP:
884
         {
885
            GLfloat t[4], q[4], floor_t0;
886
            fetch_vector1(&inst->SrcReg[0], machine, t);
887
            floor_t0 = FLOORF(t[0]);
888
            if (floor_t0 > FLT_MAX_EXP) {
889
               SET_POS_INFINITY(q[0]);
890
               SET_POS_INFINITY(q[2]);
891
            }
892
            else if (floor_t0 < FLT_MIN_EXP) {
893
               q[0] = 0.0F;
894
               q[2] = 0.0F;
895
            }
896
            else {
897
               q[0] = LDEXPF(1.0, (int) floor_t0);
898
               /* Note: GL_NV_vertex_program expects
899
                * result.z = result.x * APPX(result.y)
900
                * We do what the ARB extension says.
901
                */
902
               q[2] = (GLfloat) pow(2.0, t[0]);
903
            }
904
            q[1] = t[0] - floor_t0;
905
            q[3] = 1.0F;
906
            store_vector4( inst, machine, q );
907
         }
908
         break;
909
      case OPCODE_EX2:         /* Exponential base 2 */
910
         {
911
            GLfloat a[4], result[4], val;
912
            fetch_vector1(&inst->SrcReg[0], machine, a);
913
            val = (GLfloat) pow(2.0, a[0]);
914
            /*
915
            if (IS_INF_OR_NAN(val))
916
               val = 1.0e10;
917
            */
918
            result[0] = result[1] = result[2] = result[3] = val;
919
            store_vector4(inst, machine, result);
920
         }
921
         break;
922
      case OPCODE_FLR:
923
         {
924
            GLfloat a[4], result[4];
925
            fetch_vector4(&inst->SrcReg[0], machine, a);
926
            result[0] = FLOORF(a[0]);
927
            result[1] = FLOORF(a[1]);
928
            result[2] = FLOORF(a[2]);
929
            result[3] = FLOORF(a[3]);
930
            store_vector4(inst, machine, result);
931
         }
932
         break;
933
      case OPCODE_FRC:
934
         {
935
            GLfloat a[4], result[4];
936
            fetch_vector4(&inst->SrcReg[0], machine, a);
937
            result[0] = a[0] - FLOORF(a[0]);
938
            result[1] = a[1] - FLOORF(a[1]);
939
            result[2] = a[2] - FLOORF(a[2]);
940
            result[3] = a[3] - FLOORF(a[3]);
941
            store_vector4(inst, machine, result);
942
         }
943
         break;
944
      case OPCODE_IF:
945
         {
946
            GLboolean cond;
947
            ASSERT(program->Instructions[inst->BranchTarget].Opcode
948
                   == OPCODE_ELSE ||
949
                   program->Instructions[inst->BranchTarget].Opcode
950
                   == OPCODE_ENDIF);
951
            /* eval condition */
952
            if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
953
               GLfloat a[4];
954
               fetch_vector1(&inst->SrcReg[0], machine, a);
955
               cond = (a[0] != 0.0);
956
            }
957
            else {
958
               cond = eval_condition(machine, inst);
959
            }
960
            if (DEBUG_PROG) {
961
               printf("IF: %d\n", cond);
962
            }
963
            /* do if/else */
964
            if (cond) {
965
               /* do if-clause (just continue execution) */
966
            }
967
            else {
968
               /* go to the instruction after ELSE or ENDIF */
969
               assert(inst->BranchTarget >= 0);
970
               pc = inst->BranchTarget;
971
            }
972
         }
973
         break;
974
      case OPCODE_ELSE:
975
         /* goto ENDIF */
976
         ASSERT(program->Instructions[inst->BranchTarget].Opcode
977
                == OPCODE_ENDIF);
978
         assert(inst->BranchTarget >= 0);
979
         pc = inst->BranchTarget;
980
         break;
981
      case OPCODE_ENDIF:
982
         /* nothing */
983
         break;
984
      case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
985
         if (eval_condition(machine, inst)) {
986
            return GL_FALSE;
987
         }
988
         break;
989
      case OPCODE_KIL:         /* ARB_f_p only */
990
         {
991
            GLfloat a[4];
992
            fetch_vector4(&inst->SrcReg[0], machine, a);
993
            if (DEBUG_PROG) {
994
               printf("KIL if (%g %g %g %g) <= 0.0\n",
995
                      a[0], a[1], a[2], a[3]);
996
            }
997
 
998
            if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
999
               return GL_FALSE;
1000
            }
1001
         }
1002
         break;
1003
      case OPCODE_LG2:         /* log base 2 */
1004
         {
1005
            GLfloat a[4], result[4], val;
1006
            fetch_vector1(&inst->SrcReg[0], machine, a);
1007
	    /* The fast LOG2 macro doesn't meet the precision requirements.
1008
	     */
1009
            if (a[0] == 0.0F) {
1010
               val = -FLT_MAX;
1011
            }
1012
            else {
1013
               val = (float)(log(a[0]) * 1.442695F);
1014
            }
1015
            result[0] = result[1] = result[2] = result[3] = val;
1016
            store_vector4(inst, machine, result);
1017
         }
1018
         break;
1019
      case OPCODE_LIT:
1020
         {
1021
            const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
1022
            GLfloat a[4], result[4];
1023
            fetch_vector4(&inst->SrcReg[0], machine, a);
1024
            a[0] = MAX2(a[0], 0.0F);
1025
            a[1] = MAX2(a[1], 0.0F);
1026
            /* XXX ARB version clamps a[3], NV version doesn't */
1027
            a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1028
            result[0] = 1.0F;
1029
            result[1] = a[0];
1030
            /* XXX we could probably just use pow() here */
1031
            if (a[0] > 0.0F) {
1032
               if (a[1] == 0.0 && a[3] == 0.0)
1033
                  result[2] = 1.0F;
1034
               else
1035
                  result[2] = (GLfloat) pow(a[1], a[3]);
1036
            }
1037
            else {
1038
               result[2] = 0.0F;
1039
            }
1040
            result[3] = 1.0F;
1041
            store_vector4(inst, machine, result);
1042
            if (DEBUG_PROG) {
1043
               printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1044
                      result[0], result[1], result[2], result[3],
1045
                      a[0], a[1], a[2], a[3]);
1046
            }
1047
         }
1048
         break;
1049
      case OPCODE_LOG:
1050
         {
1051
            GLfloat t[4], q[4], abs_t0;
1052
            fetch_vector1(&inst->SrcReg[0], machine, t);
1053
            abs_t0 = FABSF(t[0]);
1054
            if (abs_t0 != 0.0F) {
1055
               /* Since we really can't handle infinite values on VMS
1056
                * like other OSes we'll use __MAXFLOAT to represent
1057
                * infinity.  This may need some tweaking.
1058
                */
1059
#ifdef VMS
1060
               if (abs_t0 == __MAXFLOAT)
1061
#else
1062
               if (IS_INF_OR_NAN(abs_t0))
1063
#endif
1064
               {
1065
                  SET_POS_INFINITY(q[0]);
1066
                  q[1] = 1.0F;
1067
                  SET_POS_INFINITY(q[2]);
1068
               }
1069
               else {
1070
                  int exponent;
1071
                  GLfloat mantissa = FREXPF(t[0], &exponent);
1072
                  q[0] = (GLfloat) (exponent - 1);
1073
                  q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1074
 
1075
		  /* The fast LOG2 macro doesn't meet the precision
1076
		   * requirements.
1077
		   */
1078
                  q[2] = (float)(log(t[0]) * 1.442695F);
1079
               }
1080
            }
1081
            else {
1082
               SET_NEG_INFINITY(q[0]);
1083
               q[1] = 1.0F;
1084
               SET_NEG_INFINITY(q[2]);
1085
            }
1086
            q[3] = 1.0;
1087
            store_vector4(inst, machine, q);
1088
         }
1089
         break;
1090
      case OPCODE_LRP:
1091
         {
1092
            GLfloat a[4], b[4], c[4], result[4];
1093
            fetch_vector4(&inst->SrcReg[0], machine, a);
1094
            fetch_vector4(&inst->SrcReg[1], machine, b);
1095
            fetch_vector4(&inst->SrcReg[2], machine, c);
1096
            result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1097
            result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1098
            result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1099
            result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1100
            store_vector4(inst, machine, result);
1101
            if (DEBUG_PROG) {
1102
               printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1103
                      "(%g %g %g %g), (%g %g %g %g)\n",
1104
                      result[0], result[1], result[2], result[3],
1105
                      a[0], a[1], a[2], a[3],
1106
                      b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1107
            }
1108
         }
1109
         break;
1110
      case OPCODE_MAD:
1111
         {
1112
            GLfloat a[4], b[4], c[4], result[4];
1113
            fetch_vector4(&inst->SrcReg[0], machine, a);
1114
            fetch_vector4(&inst->SrcReg[1], machine, b);
1115
            fetch_vector4(&inst->SrcReg[2], machine, c);
1116
            result[0] = a[0] * b[0] + c[0];
1117
            result[1] = a[1] * b[1] + c[1];
1118
            result[2] = a[2] * b[2] + c[2];
1119
            result[3] = a[3] * b[3] + c[3];
1120
            store_vector4(inst, machine, result);
1121
            if (DEBUG_PROG) {
1122
               printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1123
                      "(%g %g %g %g) + (%g %g %g %g)\n",
1124
                      result[0], result[1], result[2], result[3],
1125
                      a[0], a[1], a[2], a[3],
1126
                      b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1127
            }
1128
         }
1129
         break;
1130
      case OPCODE_MAX:
1131
         {
1132
            GLfloat a[4], b[4], result[4];
1133
            fetch_vector4(&inst->SrcReg[0], machine, a);
1134
            fetch_vector4(&inst->SrcReg[1], machine, b);
1135
            result[0] = MAX2(a[0], b[0]);
1136
            result[1] = MAX2(a[1], b[1]);
1137
            result[2] = MAX2(a[2], b[2]);
1138
            result[3] = MAX2(a[3], b[3]);
1139
            store_vector4(inst, machine, result);
1140
            if (DEBUG_PROG) {
1141
               printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1142
                      result[0], result[1], result[2], result[3],
1143
                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1144
            }
1145
         }
1146
         break;
1147
      case OPCODE_MIN:
1148
         {
1149
            GLfloat a[4], b[4], result[4];
1150
            fetch_vector4(&inst->SrcReg[0], machine, a);
1151
            fetch_vector4(&inst->SrcReg[1], machine, b);
1152
            result[0] = MIN2(a[0], b[0]);
1153
            result[1] = MIN2(a[1], b[1]);
1154
            result[2] = MIN2(a[2], b[2]);
1155
            result[3] = MIN2(a[3], b[3]);
1156
            store_vector4(inst, machine, result);
1157
         }
1158
         break;
1159
      case OPCODE_MOV:
1160
         {
1161
            GLfloat result[4];
1162
            fetch_vector4(&inst->SrcReg[0], machine, result);
1163
            store_vector4(inst, machine, result);
1164
            if (DEBUG_PROG) {
1165
               printf("MOV (%g %g %g %g)\n",
1166
                      result[0], result[1], result[2], result[3]);
1167
            }
1168
         }
1169
         break;
1170
      case OPCODE_MUL:
1171
         {
1172
            GLfloat a[4], b[4], result[4];
1173
            fetch_vector4(&inst->SrcReg[0], machine, a);
1174
            fetch_vector4(&inst->SrcReg[1], machine, b);
1175
            result[0] = a[0] * b[0];
1176
            result[1] = a[1] * b[1];
1177
            result[2] = a[2] * b[2];
1178
            result[3] = a[3] * b[3];
1179
            store_vector4(inst, machine, result);
1180
            if (DEBUG_PROG) {
1181
               printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1182
                      result[0], result[1], result[2], result[3],
1183
                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1184
            }
1185
         }
1186
         break;
1187
      case OPCODE_NOISE1:
1188
         {
1189
            GLfloat a[4], result[4];
1190
            fetch_vector1(&inst->SrcReg[0], machine, a);
1191
            result[0] =
1192
               result[1] =
1193
               result[2] =
1194
               result[3] = _mesa_noise1(a[0]);
1195
            store_vector4(inst, machine, result);
1196
         }
1197
         break;
1198
      case OPCODE_NOISE2:
1199
         {
1200
            GLfloat a[4], result[4];
1201
            fetch_vector4(&inst->SrcReg[0], machine, a);
1202
            result[0] =
1203
               result[1] =
1204
               result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1205
            store_vector4(inst, machine, result);
1206
         }
1207
         break;
1208
      case OPCODE_NOISE3:
1209
         {
1210
            GLfloat a[4], result[4];
1211
            fetch_vector4(&inst->SrcReg[0], machine, a);
1212
            result[0] =
1213
               result[1] =
1214
               result[2] =
1215
               result[3] = _mesa_noise3(a[0], a[1], a[2]);
1216
            store_vector4(inst, machine, result);
1217
         }
1218
         break;
1219
      case OPCODE_NOISE4:
1220
         {
1221
            GLfloat a[4], result[4];
1222
            fetch_vector4(&inst->SrcReg[0], machine, a);
1223
            result[0] =
1224
               result[1] =
1225
               result[2] =
1226
               result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1227
            store_vector4(inst, machine, result);
1228
         }
1229
         break;
1230
      case OPCODE_NOP:
1231
         break;
1232
      case OPCODE_NOT:         /* bitwise NOT */
1233
         {
1234
            GLuint a[4], result[4];
1235
            fetch_vector4ui(&inst->SrcReg[0], machine, a);
1236
            result[0] = ~a[0];
1237
            result[1] = ~a[1];
1238
            result[2] = ~a[2];
1239
            result[3] = ~a[3];
1240
            store_vector4ui(inst, machine, result);
1241
         }
1242
         break;
1243
      case OPCODE_NRM3:        /* 3-component normalization */
1244
         {
1245
            GLfloat a[4], result[4];
1246
            GLfloat tmp;
1247
            fetch_vector4(&inst->SrcReg[0], machine, a);
1248
            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1249
            if (tmp != 0.0F)
1250
               tmp = INV_SQRTF(tmp);
1251
            result[0] = tmp * a[0];
1252
            result[1] = tmp * a[1];
1253
            result[2] = tmp * a[2];
1254
            result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
1255
            store_vector4(inst, machine, result);
1256
         }
1257
         break;
1258
      case OPCODE_NRM4:        /* 4-component normalization */
1259
         {
1260
            GLfloat a[4], result[4];
1261
            GLfloat tmp;
1262
            fetch_vector4(&inst->SrcReg[0], machine, a);
1263
            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1264
            if (tmp != 0.0F)
1265
               tmp = INV_SQRTF(tmp);
1266
            result[0] = tmp * a[0];
1267
            result[1] = tmp * a[1];
1268
            result[2] = tmp * a[2];
1269
            result[3] = tmp * a[3];
1270
            store_vector4(inst, machine, result);
1271
         }
1272
         break;
1273
      case OPCODE_OR:          /* bitwise OR */
1274
         {
1275
            GLuint a[4], b[4], result[4];
1276
            fetch_vector4ui(&inst->SrcReg[0], machine, a);
1277
            fetch_vector4ui(&inst->SrcReg[1], machine, b);
1278
            result[0] = a[0] | b[0];
1279
            result[1] = a[1] | b[1];
1280
            result[2] = a[2] | b[2];
1281
            result[3] = a[3] | b[3];
1282
            store_vector4ui(inst, machine, result);
1283
         }
1284
         break;
1285
      case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
1286
         {
1287
            GLfloat a[4];
1288
            GLuint result[4];
1289
            GLhalfNV hx, hy;
1290
            fetch_vector4(&inst->SrcReg[0], machine, a);
1291
            hx = _mesa_float_to_half(a[0]);
1292
            hy = _mesa_float_to_half(a[1]);
1293
            result[0] =
1294
            result[1] =
1295
            result[2] =
1296
            result[3] = hx | (hy << 16);
1297
            store_vector4ui(inst, machine, result);
1298
         }
1299
         break;
1300
      case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
1301
         {
1302
            GLfloat a[4];
1303
            GLuint result[4], usx, usy;
1304
            fetch_vector4(&inst->SrcReg[0], machine, a);
1305
            a[0] = CLAMP(a[0], 0.0F, 1.0F);
1306
            a[1] = CLAMP(a[1], 0.0F, 1.0F);
1307
            usx = IROUND(a[0] * 65535.0F);
1308
            usy = IROUND(a[1] * 65535.0F);
1309
            result[0] =
1310
            result[1] =
1311
            result[2] =
1312
            result[3] = usx | (usy << 16);
1313
            store_vector4ui(inst, machine, result);
1314
         }
1315
         break;
1316
      case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
1317
         {
1318
            GLfloat a[4];
1319
            GLuint result[4], ubx, uby, ubz, ubw;
1320
            fetch_vector4(&inst->SrcReg[0], machine, a);
1321
            a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1322
            a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1323
            a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1324
            a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1325
            ubx = IROUND(127.0F * a[0] + 128.0F);
1326
            uby = IROUND(127.0F * a[1] + 128.0F);
1327
            ubz = IROUND(127.0F * a[2] + 128.0F);
1328
            ubw = IROUND(127.0F * a[3] + 128.0F);
1329
            result[0] =
1330
            result[1] =
1331
            result[2] =
1332
            result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1333
            store_vector4ui(inst, machine, result);
1334
         }
1335
         break;
1336
      case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
1337
         {
1338
            GLfloat a[4];
1339
            GLuint result[4], ubx, uby, ubz, ubw;
1340
            fetch_vector4(&inst->SrcReg[0], machine, a);
1341
            a[0] = CLAMP(a[0], 0.0F, 1.0F);
1342
            a[1] = CLAMP(a[1], 0.0F, 1.0F);
1343
            a[2] = CLAMP(a[2], 0.0F, 1.0F);
1344
            a[3] = CLAMP(a[3], 0.0F, 1.0F);
1345
            ubx = IROUND(255.0F * a[0]);
1346
            uby = IROUND(255.0F * a[1]);
1347
            ubz = IROUND(255.0F * a[2]);
1348
            ubw = IROUND(255.0F * a[3]);
1349
            result[0] =
1350
            result[1] =
1351
            result[2] =
1352
            result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1353
            store_vector4ui(inst, machine, result);
1354
         }
1355
         break;
1356
      case OPCODE_POW:
1357
         {
1358
            GLfloat a[4], b[4], result[4];
1359
            fetch_vector1(&inst->SrcReg[0], machine, a);
1360
            fetch_vector1(&inst->SrcReg[1], machine, b);
1361
            result[0] = result[1] = result[2] = result[3]
1362
               = (GLfloat) pow(a[0], b[0]);
1363
            store_vector4(inst, machine, result);
1364
         }
1365
         break;
1366
      case OPCODE_RCC:  /* clamped riciprocal */
1367
         {
1368
            const float largest = 1.884467e+19, smallest = 5.42101e-20;
1369
            GLfloat a[4], r, result[4];
1370
            fetch_vector1(&inst->SrcReg[0], machine, a);
1371
            if (DEBUG_PROG) {
1372
               if (a[0] == 0)
1373
                  printf("RCC(0)\n");
1374
               else if (IS_INF_OR_NAN(a[0]))
1375
                  printf("RCC(inf)\n");
1376
            }
1377
            if (a[0] == 1.0F) {
1378
               r = 1.0F;
1379
            }
1380
            else {
1381
               r = 1.0F / a[0];
1382
            }
1383
            if (positive(r)) {
1384
               if (r > largest) {
1385
                  r = largest;
1386
               }
1387
               else if (r < smallest) {
1388
                  r = smallest;
1389
               }
1390
            }
1391
            else {
1392
               if (r < -largest) {
1393
                  r = -largest;
1394
               }
1395
               else if (r > -smallest) {
1396
                  r = -smallest;
1397
               }
1398
            }
1399
            result[0] = result[1] = result[2] = result[3] = r;
1400
            store_vector4(inst, machine, result);
1401
         }
1402
         break;
1403
 
1404
      case OPCODE_RCP:
1405
         {
1406
            GLfloat a[4], result[4];
1407
            fetch_vector1(&inst->SrcReg[0], machine, a);
1408
            if (DEBUG_PROG) {
1409
               if (a[0] == 0)
1410
                  printf("RCP(0)\n");
1411
               else if (IS_INF_OR_NAN(a[0]))
1412
                  printf("RCP(inf)\n");
1413
            }
1414
            result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1415
            store_vector4(inst, machine, result);
1416
         }
1417
         break;
1418
      case OPCODE_RET:         /* return from subroutine (conditional) */
1419
         if (eval_condition(machine, inst)) {
1420
            if (machine->StackDepth == 0) {
1421
               return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
1422
            }
1423
            /* subtract one because of pc++ in the for loop */
1424
            pc = machine->CallStack[--machine->StackDepth] - 1;
1425
         }
1426
         break;
1427
      case OPCODE_RFL:         /* reflection vector */
1428
         {
1429
            GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1430
            fetch_vector4(&inst->SrcReg[0], machine, axis);
1431
            fetch_vector4(&inst->SrcReg[1], machine, dir);
1432
            tmpW = DOT3(axis, axis);
1433
            tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1434
            result[0] = tmpX * axis[0] - dir[0];
1435
            result[1] = tmpX * axis[1] - dir[1];
1436
            result[2] = tmpX * axis[2] - dir[2];
1437
            /* result[3] is never written! XXX enforce in parser! */
1438
            store_vector4(inst, machine, result);
1439
         }
1440
         break;
1441
      case OPCODE_RSQ:         /* 1 / sqrt() */
1442
         {
1443
            GLfloat a[4], result[4];
1444
            fetch_vector1(&inst->SrcReg[0], machine, a);
1445
            a[0] = FABSF(a[0]);
1446
            result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1447
            store_vector4(inst, machine, result);
1448
            if (DEBUG_PROG) {
1449
               printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1450
            }
1451
         }
1452
         break;
1453
      case OPCODE_SCS:         /* sine and cos */
1454
         {
1455
            GLfloat a[4], result[4];
1456
            fetch_vector1(&inst->SrcReg[0], machine, a);
1457
            result[0] = (GLfloat) cos(a[0]);
1458
            result[1] = (GLfloat) sin(a[0]);
1459
            result[2] = 0.0;    /* undefined! */
1460
            result[3] = 0.0;    /* undefined! */
1461
            store_vector4(inst, machine, result);
1462
         }
1463
         break;
1464
      case OPCODE_SEQ:         /* set on equal */
1465
         {
1466
            GLfloat a[4], b[4], result[4];
1467
            fetch_vector4(&inst->SrcReg[0], machine, a);
1468
            fetch_vector4(&inst->SrcReg[1], machine, b);
1469
            result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1470
            result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1471
            result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1472
            result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1473
            store_vector4(inst, machine, result);
1474
            if (DEBUG_PROG) {
1475
               printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1476
                      result[0], result[1], result[2], result[3],
1477
                      a[0], a[1], a[2], a[3],
1478
                      b[0], b[1], b[2], b[3]);
1479
            }
1480
         }
1481
         break;
1482
      case OPCODE_SFL:         /* set false, operands ignored */
1483
         {
1484
            static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1485
            store_vector4(inst, machine, result);
1486
         }
1487
         break;
1488
      case OPCODE_SGE:         /* set on greater or equal */
1489
         {
1490
            GLfloat a[4], b[4], result[4];
1491
            fetch_vector4(&inst->SrcReg[0], machine, a);
1492
            fetch_vector4(&inst->SrcReg[1], machine, b);
1493
            result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1494
            result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1495
            result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1496
            result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1497
            store_vector4(inst, machine, result);
1498
            if (DEBUG_PROG) {
1499
               printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1500
                      result[0], result[1], result[2], result[3],
1501
                      a[0], a[1], a[2], a[3],
1502
                      b[0], b[1], b[2], b[3]);
1503
            }
1504
         }
1505
         break;
1506
      case OPCODE_SGT:         /* set on greater */
1507
         {
1508
            GLfloat a[4], b[4], result[4];
1509
            fetch_vector4(&inst->SrcReg[0], machine, a);
1510
            fetch_vector4(&inst->SrcReg[1], machine, b);
1511
            result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1512
            result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1513
            result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1514
            result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1515
            store_vector4(inst, machine, result);
1516
            if (DEBUG_PROG) {
1517
               printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1518
                      result[0], result[1], result[2], result[3],
1519
                      a[0], a[1], a[2], a[3],
1520
                      b[0], b[1], b[2], b[3]);
1521
            }
1522
         }
1523
         break;
1524
      case OPCODE_SIN:
1525
         {
1526
            GLfloat a[4], result[4];
1527
            fetch_vector1(&inst->SrcReg[0], machine, a);
1528
            result[0] = result[1] = result[2] = result[3]
1529
               = (GLfloat) sin(a[0]);
1530
            store_vector4(inst, machine, result);
1531
         }
1532
         break;
1533
      case OPCODE_SLE:         /* set on less or equal */
1534
         {
1535
            GLfloat a[4], b[4], result[4];
1536
            fetch_vector4(&inst->SrcReg[0], machine, a);
1537
            fetch_vector4(&inst->SrcReg[1], machine, b);
1538
            result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1539
            result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1540
            result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1541
            result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1542
            store_vector4(inst, machine, result);
1543
            if (DEBUG_PROG) {
1544
               printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1545
                      result[0], result[1], result[2], result[3],
1546
                      a[0], a[1], a[2], a[3],
1547
                      b[0], b[1], b[2], b[3]);
1548
            }
1549
         }
1550
         break;
1551
      case OPCODE_SLT:         /* set on less */
1552
         {
1553
            GLfloat a[4], b[4], result[4];
1554
            fetch_vector4(&inst->SrcReg[0], machine, a);
1555
            fetch_vector4(&inst->SrcReg[1], machine, b);
1556
            result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1557
            result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1558
            result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1559
            result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1560
            store_vector4(inst, machine, result);
1561
            if (DEBUG_PROG) {
1562
               printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1563
                      result[0], result[1], result[2], result[3],
1564
                      a[0], a[1], a[2], a[3],
1565
                      b[0], b[1], b[2], b[3]);
1566
            }
1567
         }
1568
         break;
1569
      case OPCODE_SNE:         /* set on not equal */
1570
         {
1571
            GLfloat a[4], b[4], result[4];
1572
            fetch_vector4(&inst->SrcReg[0], machine, a);
1573
            fetch_vector4(&inst->SrcReg[1], machine, b);
1574
            result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1575
            result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1576
            result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1577
            result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1578
            store_vector4(inst, machine, result);
1579
            if (DEBUG_PROG) {
1580
               printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1581
                      result[0], result[1], result[2], result[3],
1582
                      a[0], a[1], a[2], a[3],
1583
                      b[0], b[1], b[2], b[3]);
1584
            }
1585
         }
1586
         break;
1587
      case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
1588
         {
1589
            GLfloat a[4], result[4];
1590
            fetch_vector4(&inst->SrcReg[0], machine, a);
1591
            result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1592
            result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1593
            result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1594
            result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1595
            store_vector4(inst, machine, result);
1596
         }
1597
         break;
1598
      case OPCODE_STR:         /* set true, operands ignored */
1599
         {
1600
            static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1601
            store_vector4(inst, machine, result);
1602
         }
1603
         break;
1604
      case OPCODE_SUB:
1605
         {
1606
            GLfloat a[4], b[4], result[4];
1607
            fetch_vector4(&inst->SrcReg[0], machine, a);
1608
            fetch_vector4(&inst->SrcReg[1], machine, b);
1609
            result[0] = a[0] - b[0];
1610
            result[1] = a[1] - b[1];
1611
            result[2] = a[2] - b[2];
1612
            result[3] = a[3] - b[3];
1613
            store_vector4(inst, machine, result);
1614
            if (DEBUG_PROG) {
1615
               printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1616
                      result[0], result[1], result[2], result[3],
1617
                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1618
            }
1619
         }
1620
         break;
1621
      case OPCODE_SWZ:         /* extended swizzle */
1622
         {
1623
            const struct prog_src_register *source = &inst->SrcReg[0];
1624
            const GLfloat *src = get_src_register_pointer(source, machine);
1625
            GLfloat result[4];
1626
            GLuint i;
1627
            for (i = 0; i < 4; i++) {
1628
               const GLuint swz = GET_SWZ(source->Swizzle, i);
1629
               if (swz == SWIZZLE_ZERO)
1630
                  result[i] = 0.0;
1631
               else if (swz == SWIZZLE_ONE)
1632
                  result[i] = 1.0;
1633
               else {
1634
                  ASSERT(swz >= 0);
1635
                  ASSERT(swz <= 3);
1636
                  result[i] = src[swz];
1637
               }
1638
               if (source->Negate & (1 << i))
1639
                  result[i] = -result[i];
1640
            }
1641
            store_vector4(inst, machine, result);
1642
         }
1643
         break;
1644
      case OPCODE_TEX:         /* Both ARB and NV frag prog */
1645
         /* Simple texel lookup */
1646
         {
1647
            GLfloat texcoord[4], color[4];
1648
            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1649
 
1650
            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1651
 
1652
            if (DEBUG_PROG) {
1653
               printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1654
                      color[0], color[1], color[2], color[3],
1655
                      inst->TexSrcUnit,
1656
                      texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1657
            }
1658
            store_vector4(inst, machine, color);
1659
         }
1660
         break;
1661
      case OPCODE_TXB:         /* GL_ARB_fragment_program only */
1662
         /* Texel lookup with LOD bias */
1663
         {
1664
            GLfloat texcoord[4], color[4], lodBias;
1665
 
1666
            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1667
 
1668
            /* texcoord[3] is the bias to add to lambda */
1669
            lodBias = texcoord[3];
1670
 
1671
            fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1672
 
1673
            store_vector4(inst, machine, color);
1674
         }
1675
         break;
1676
      case OPCODE_TXD:         /* GL_NV_fragment_program only */
1677
         /* Texture lookup w/ partial derivatives for LOD */
1678
         {
1679
            GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1680
            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1681
            fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1682
            fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1683
            machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1684
                                     0.0, /* lodBias */
1685
                                     inst->TexSrcUnit, color);
1686
            store_vector4(inst, machine, color);
1687
         }
1688
         break;
1689
      case OPCODE_TXL:
1690
         /* Texel lookup with explicit LOD */
1691
         {
1692
            GLfloat texcoord[4], color[4], lod;
1693
 
1694
            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1695
 
1696
            /* texcoord[3] is the LOD */
1697
            lod = texcoord[3];
1698
 
1699
	    machine->FetchTexelLod(ctx, texcoord, lod,
1700
				   machine->Samplers[inst->TexSrcUnit], color);
1701
 
1702
            store_vector4(inst, machine, color);
1703
         }
1704
         break;
1705
      case OPCODE_TXP:         /* GL_ARB_fragment_program only */
1706
         /* Texture lookup w/ projective divide */
1707
         {
1708
            GLfloat texcoord[4], color[4];
1709
 
1710
            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1711
            /* Not so sure about this test - if texcoord[3] is
1712
             * zero, we'd probably be fine except for an ASSERT in
1713
             * IROUND_POS() which gets triggered by the inf values created.
1714
             */
1715
            if (texcoord[3] != 0.0) {
1716
               texcoord[0] /= texcoord[3];
1717
               texcoord[1] /= texcoord[3];
1718
               texcoord[2] /= texcoord[3];
1719
            }
1720
 
1721
            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1722
 
1723
            store_vector4(inst, machine, color);
1724
         }
1725
         break;
1726
      case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
1727
         /* Texture lookup w/ projective divide, as above, but do not
1728
          * do the divide by w if sampling from a cube map.
1729
          */
1730
         {
1731
            GLfloat texcoord[4], color[4];
1732
 
1733
            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1734
            if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1735
                texcoord[3] != 0.0) {
1736
               texcoord[0] /= texcoord[3];
1737
               texcoord[1] /= texcoord[3];
1738
               texcoord[2] /= texcoord[3];
1739
            }
1740
 
1741
            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1742
 
1743
            store_vector4(inst, machine, color);
1744
         }
1745
         break;
1746
      case OPCODE_TRUNC:       /* truncate toward zero */
1747
         {
1748
            GLfloat a[4], result[4];
1749
            fetch_vector4(&inst->SrcReg[0], machine, a);
1750
            result[0] = (GLfloat) (GLint) a[0];
1751
            result[1] = (GLfloat) (GLint) a[1];
1752
            result[2] = (GLfloat) (GLint) a[2];
1753
            result[3] = (GLfloat) (GLint) a[3];
1754
            store_vector4(inst, machine, result);
1755
         }
1756
         break;
1757
      case OPCODE_UP2H:        /* unpack two 16-bit floats */
1758
         {
1759
            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1760
            GLfloat result[4];
1761
            GLushort hx, hy;
1762
            hx = raw & 0xffff;
1763
            hy = raw >> 16;
1764
            result[0] = result[2] = _mesa_half_to_float(hx);
1765
            result[1] = result[3] = _mesa_half_to_float(hy);
1766
            store_vector4(inst, machine, result);
1767
         }
1768
         break;
1769
      case OPCODE_UP2US:       /* unpack two GLushorts */
1770
         {
1771
            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1772
            GLfloat result[4];
1773
            GLushort usx, usy;
1774
            usx = raw & 0xffff;
1775
            usy = raw >> 16;
1776
            result[0] = result[2] = usx * (1.0f / 65535.0f);
1777
            result[1] = result[3] = usy * (1.0f / 65535.0f);
1778
            store_vector4(inst, machine, result);
1779
         }
1780
         break;
1781
      case OPCODE_UP4B:        /* unpack four GLbytes */
1782
         {
1783
            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1784
            GLfloat result[4];
1785
            result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1786
            result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1787
            result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1788
            result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1789
            store_vector4(inst, machine, result);
1790
         }
1791
         break;
1792
      case OPCODE_UP4UB:       /* unpack four GLubytes */
1793
         {
1794
            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1795
            GLfloat result[4];
1796
            result[0] = ((raw >> 0) & 0xff) / 255.0F;
1797
            result[1] = ((raw >> 8) & 0xff) / 255.0F;
1798
            result[2] = ((raw >> 16) & 0xff) / 255.0F;
1799
            result[3] = ((raw >> 24) & 0xff) / 255.0F;
1800
            store_vector4(inst, machine, result);
1801
         }
1802
         break;
1803
      case OPCODE_XOR:         /* bitwise XOR */
1804
         {
1805
            GLuint a[4], b[4], result[4];
1806
            fetch_vector4ui(&inst->SrcReg[0], machine, a);
1807
            fetch_vector4ui(&inst->SrcReg[1], machine, b);
1808
            result[0] = a[0] ^ b[0];
1809
            result[1] = a[1] ^ b[1];
1810
            result[2] = a[2] ^ b[2];
1811
            result[3] = a[3] ^ b[3];
1812
            store_vector4ui(inst, machine, result);
1813
         }
1814
         break;
1815
      case OPCODE_XPD:         /* cross product */
1816
         {
1817
            GLfloat a[4], b[4], result[4];
1818
            fetch_vector4(&inst->SrcReg[0], machine, a);
1819
            fetch_vector4(&inst->SrcReg[1], machine, b);
1820
            result[0] = a[1] * b[2] - a[2] * b[1];
1821
            result[1] = a[2] * b[0] - a[0] * b[2];
1822
            result[2] = a[0] * b[1] - a[1] * b[0];
1823
            result[3] = 1.0;
1824
            store_vector4(inst, machine, result);
1825
            if (DEBUG_PROG) {
1826
               printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1827
                      result[0], result[1], result[2], result[3],
1828
                      a[0], a[1], a[2], b[0], b[1], b[2]);
1829
            }
1830
         }
1831
         break;
1832
      case OPCODE_X2D:         /* 2-D matrix transform */
1833
         {
1834
            GLfloat a[4], b[4], c[4], result[4];
1835
            fetch_vector4(&inst->SrcReg[0], machine, a);
1836
            fetch_vector4(&inst->SrcReg[1], machine, b);
1837
            fetch_vector4(&inst->SrcReg[2], machine, c);
1838
            result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1839
            result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1840
            result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1841
            result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1842
            store_vector4(inst, machine, result);
1843
         }
1844
         break;
1845
      case OPCODE_PRINT:
1846
         {
1847
            if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
1848
               GLfloat a[4];
1849
               fetch_vector4(&inst->SrcReg[0], machine, a);
1850
               printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1851
                            a[0], a[1], a[2], a[3]);
1852
            }
1853
            else {
1854
               printf("%s\n", (const char *) inst->Data);
1855
            }
1856
         }
1857
         break;
1858
      case OPCODE_END:
1859
         return GL_TRUE;
1860
      default:
1861
         _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1862
                       inst->Opcode);
1863
         return GL_TRUE;        /* return value doesn't matter */
1864
      }
1865
 
1866
      numExec++;
1867
      if (numExec > maxExec) {
1868
	 static GLboolean reported = GL_FALSE;
1869
	 if (!reported) {
1870
	    _mesa_problem(ctx, "Infinite loop detected in fragment program");
1871
	    reported = GL_TRUE;
1872
	 }
1873
         return GL_TRUE;
1874
      }
1875
 
1876
   } /* for pc */
1877
 
1878
   return GL_TRUE;
1879
}