Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1901 | serge | 1 | /* |
2 | * Mesa 3-D graphics library |
||
3 | * Version: 7.3 |
||
4 | * |
||
5 | * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. |
||
6 | * |
||
7 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
8 | * copy of this software and associated documentation files (the "Software"), |
||
9 | * to deal in the Software without restriction, including without limitation |
||
10 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
11 | * and/or sell copies of the Software, and to permit persons to whom the |
||
12 | * Software is furnished to do so, subject to the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice shall be included |
||
15 | * in all copies or substantial portions of the Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
18 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
||
21 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||
22 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
23 | */ |
||
24 | |||
25 | /** |
||
26 | * \file prog_execute.c |
||
27 | * Software interpreter for vertex/fragment programs. |
||
28 | * \author Brian Paul |
||
29 | */ |
||
30 | |||
31 | /* |
||
32 | * NOTE: we do everything in single-precision floating point; we don't |
||
33 | * currently observe the single/half/fixed-precision qualifiers. |
||
34 | * |
||
35 | */ |
||
36 | |||
37 | |||
38 | #include "main/glheader.h" |
||
39 | #include "main/colormac.h" |
||
40 | #include "main/macros.h" |
||
41 | #include "prog_execute.h" |
||
42 | #include "prog_instruction.h" |
||
43 | #include "prog_parameter.h" |
||
44 | #include "prog_print.h" |
||
45 | #include "prog_noise.h" |
||
46 | |||
47 | |||
48 | /* debug predicate */ |
||
49 | #define DEBUG_PROG 0 |
||
50 | |||
51 | |||
52 | /** |
||
53 | * Set x to positive or negative infinity. |
||
54 | */ |
||
55 | #if defined(USE_IEEE) || defined(_WIN32) |
||
56 | #define SET_POS_INFINITY(x) \ |
||
57 | do { \ |
||
58 | fi_type fi; \ |
||
59 | fi.i = 0x7F800000; \ |
||
60 | x = fi.f; \ |
||
61 | } while (0) |
||
62 | #define SET_NEG_INFINITY(x) \ |
||
63 | do { \ |
||
64 | fi_type fi; \ |
||
65 | fi.i = 0xFF800000; \ |
||
66 | x = fi.f; \ |
||
67 | } while (0) |
||
68 | #elif defined(VMS) |
||
69 | #define SET_POS_INFINITY(x) x = __MAXFLOAT |
||
70 | #define SET_NEG_INFINITY(x) x = -__MAXFLOAT |
||
71 | #else |
||
72 | #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL |
||
73 | #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL |
||
74 | #endif |
||
75 | |||
76 | #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits |
||
77 | |||
78 | |||
79 | static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; |
||
80 | |||
81 | |||
82 | |||
83 | /** |
||
84 | * Return TRUE for +0 and other positive values, FALSE otherwise. |
||
85 | * Used for RCC opcode. |
||
86 | */ |
||
87 | static INLINE GLboolean |
||
88 | positive(float x) |
||
89 | { |
||
90 | fi_type fi; |
||
91 | fi.f = x; |
||
92 | if (fi.i & 0x80000000) |
||
93 | return GL_FALSE; |
||
94 | return GL_TRUE; |
||
95 | } |
||
96 | |||
97 | |||
98 | |||
99 | /** |
||
100 | * Return a pointer to the 4-element float vector specified by the given |
||
101 | * source register. |
||
102 | */ |
||
103 | static INLINE const GLfloat * |
||
104 | get_src_register_pointer(const struct prog_src_register *source, |
||
105 | const struct gl_program_machine *machine) |
||
106 | { |
||
107 | const struct gl_program *prog = machine->CurProgram; |
||
108 | GLint reg = source->Index; |
||
109 | |||
110 | if (source->RelAddr) { |
||
111 | /* add address register value to src index/offset */ |
||
112 | reg += machine->AddressReg[0][0]; |
||
113 | if (reg < 0) { |
||
114 | return ZeroVec; |
||
115 | } |
||
116 | } |
||
117 | |||
118 | switch (source->File) { |
||
119 | case PROGRAM_TEMPORARY: |
||
120 | if (reg >= MAX_PROGRAM_TEMPS) |
||
121 | return ZeroVec; |
||
122 | return machine->Temporaries[reg]; |
||
123 | |||
124 | case PROGRAM_INPUT: |
||
125 | if (prog->Target == GL_VERTEX_PROGRAM_ARB) { |
||
126 | if (reg >= VERT_ATTRIB_MAX) |
||
127 | return ZeroVec; |
||
128 | return machine->VertAttribs[reg]; |
||
129 | } |
||
130 | else { |
||
131 | if (reg >= FRAG_ATTRIB_MAX) |
||
132 | return ZeroVec; |
||
133 | return machine->Attribs[reg][machine->CurElement]; |
||
134 | } |
||
135 | |||
136 | case PROGRAM_OUTPUT: |
||
137 | if (reg >= MAX_PROGRAM_OUTPUTS) |
||
138 | return ZeroVec; |
||
139 | return machine->Outputs[reg]; |
||
140 | |||
141 | case PROGRAM_LOCAL_PARAM: |
||
142 | if (reg >= MAX_PROGRAM_LOCAL_PARAMS) |
||
143 | return ZeroVec; |
||
144 | return machine->CurProgram->LocalParams[reg]; |
||
145 | |||
146 | case PROGRAM_ENV_PARAM: |
||
147 | if (reg >= MAX_PROGRAM_ENV_PARAMS) |
||
148 | return ZeroVec; |
||
149 | return machine->EnvParams[reg]; |
||
150 | |||
151 | case PROGRAM_STATE_VAR: |
||
152 | /* Fallthrough */ |
||
153 | case PROGRAM_CONSTANT: |
||
154 | /* Fallthrough */ |
||
155 | case PROGRAM_UNIFORM: |
||
156 | /* Fallthrough */ |
||
157 | case PROGRAM_NAMED_PARAM: |
||
158 | if (reg >= (GLint) prog->Parameters->NumParameters) |
||
159 | return ZeroVec; |
||
160 | return prog->Parameters->ParameterValues[reg]; |
||
161 | |||
162 | default: |
||
163 | _mesa_problem(NULL, |
||
164 | "Invalid src register file %d in get_src_register_pointer()", |
||
165 | source->File); |
||
166 | return NULL; |
||
167 | } |
||
168 | } |
||
169 | |||
170 | |||
171 | /** |
||
172 | * Return a pointer to the 4-element float vector specified by the given |
||
173 | * destination register. |
||
174 | */ |
||
175 | static INLINE GLfloat * |
||
176 | get_dst_register_pointer(const struct prog_dst_register *dest, |
||
177 | struct gl_program_machine *machine) |
||
178 | { |
||
179 | static GLfloat dummyReg[4]; |
||
180 | GLint reg = dest->Index; |
||
181 | |||
182 | if (dest->RelAddr) { |
||
183 | /* add address register value to src index/offset */ |
||
184 | reg += machine->AddressReg[0][0]; |
||
185 | if (reg < 0) { |
||
186 | return dummyReg; |
||
187 | } |
||
188 | } |
||
189 | |||
190 | switch (dest->File) { |
||
191 | case PROGRAM_TEMPORARY: |
||
192 | if (reg >= MAX_PROGRAM_TEMPS) |
||
193 | return dummyReg; |
||
194 | return machine->Temporaries[reg]; |
||
195 | |||
196 | case PROGRAM_OUTPUT: |
||
197 | if (reg >= MAX_PROGRAM_OUTPUTS) |
||
198 | return dummyReg; |
||
199 | return machine->Outputs[reg]; |
||
200 | |||
201 | case PROGRAM_WRITE_ONLY: |
||
202 | return dummyReg; |
||
203 | |||
204 | default: |
||
205 | _mesa_problem(NULL, |
||
206 | "Invalid dest register file %d in get_dst_register_pointer()", |
||
207 | dest->File); |
||
208 | return NULL; |
||
209 | } |
||
210 | } |
||
211 | |||
212 | |||
213 | |||
214 | /** |
||
215 | * Fetch a 4-element float vector from the given source register. |
||
216 | * Apply swizzling and negating as needed. |
||
217 | */ |
||
218 | static void |
||
219 | fetch_vector4(const struct prog_src_register *source, |
||
220 | const struct gl_program_machine *machine, GLfloat result[4]) |
||
221 | { |
||
222 | const GLfloat *src = get_src_register_pointer(source, machine); |
||
223 | ASSERT(src); |
||
224 | |||
225 | if (source->Swizzle == SWIZZLE_NOOP) { |
||
226 | /* no swizzling */ |
||
227 | COPY_4V(result, src); |
||
228 | } |
||
229 | else { |
||
230 | ASSERT(GET_SWZ(source->Swizzle, 0) <= 3); |
||
231 | ASSERT(GET_SWZ(source->Swizzle, 1) <= 3); |
||
232 | ASSERT(GET_SWZ(source->Swizzle, 2) <= 3); |
||
233 | ASSERT(GET_SWZ(source->Swizzle, 3) <= 3); |
||
234 | result[0] = src[GET_SWZ(source->Swizzle, 0)]; |
||
235 | result[1] = src[GET_SWZ(source->Swizzle, 1)]; |
||
236 | result[2] = src[GET_SWZ(source->Swizzle, 2)]; |
||
237 | result[3] = src[GET_SWZ(source->Swizzle, 3)]; |
||
238 | } |
||
239 | |||
240 | if (source->Abs) { |
||
241 | result[0] = FABSF(result[0]); |
||
242 | result[1] = FABSF(result[1]); |
||
243 | result[2] = FABSF(result[2]); |
||
244 | result[3] = FABSF(result[3]); |
||
245 | } |
||
246 | if (source->Negate) { |
||
247 | ASSERT(source->Negate == NEGATE_XYZW); |
||
248 | result[0] = -result[0]; |
||
249 | result[1] = -result[1]; |
||
250 | result[2] = -result[2]; |
||
251 | result[3] = -result[3]; |
||
252 | } |
||
253 | |||
254 | #ifdef NAN_CHECK |
||
255 | assert(!IS_INF_OR_NAN(result[0])); |
||
256 | assert(!IS_INF_OR_NAN(result[0])); |
||
257 | assert(!IS_INF_OR_NAN(result[0])); |
||
258 | assert(!IS_INF_OR_NAN(result[0])); |
||
259 | #endif |
||
260 | } |
||
261 | |||
262 | |||
263 | /** |
||
264 | * Fetch a 4-element uint vector from the given source register. |
||
265 | * Apply swizzling but not negation/abs. |
||
266 | */ |
||
267 | static void |
||
268 | fetch_vector4ui(const struct prog_src_register *source, |
||
269 | const struct gl_program_machine *machine, GLuint result[4]) |
||
270 | { |
||
271 | const GLuint *src = (GLuint *) get_src_register_pointer(source, machine); |
||
272 | ASSERT(src); |
||
273 | |||
274 | if (source->Swizzle == SWIZZLE_NOOP) { |
||
275 | /* no swizzling */ |
||
276 | COPY_4V(result, src); |
||
277 | } |
||
278 | else { |
||
279 | ASSERT(GET_SWZ(source->Swizzle, 0) <= 3); |
||
280 | ASSERT(GET_SWZ(source->Swizzle, 1) <= 3); |
||
281 | ASSERT(GET_SWZ(source->Swizzle, 2) <= 3); |
||
282 | ASSERT(GET_SWZ(source->Swizzle, 3) <= 3); |
||
283 | result[0] = src[GET_SWZ(source->Swizzle, 0)]; |
||
284 | result[1] = src[GET_SWZ(source->Swizzle, 1)]; |
||
285 | result[2] = src[GET_SWZ(source->Swizzle, 2)]; |
||
286 | result[3] = src[GET_SWZ(source->Swizzle, 3)]; |
||
287 | } |
||
288 | |||
289 | /* Note: no Negate or Abs here */ |
||
290 | } |
||
291 | |||
292 | |||
293 | |||
294 | /** |
||
295 | * Fetch the derivative with respect to X or Y for the given register. |
||
296 | * XXX this currently only works for fragment program input attribs. |
||
297 | */ |
||
298 | static void |
||
299 | fetch_vector4_deriv(struct gl_context * ctx, |
||
300 | const struct prog_src_register *source, |
||
301 | const struct gl_program_machine *machine, |
||
302 | char xOrY, GLfloat result[4]) |
||
303 | { |
||
304 | if (source->File == PROGRAM_INPUT && |
||
305 | source->Index < (GLint) machine->NumDeriv) { |
||
306 | const GLint col = machine->CurElement; |
||
307 | const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3]; |
||
308 | const GLfloat invQ = 1.0f / w; |
||
309 | GLfloat deriv[4]; |
||
310 | |||
311 | if (xOrY == 'X') { |
||
312 | deriv[0] = machine->DerivX[source->Index][0] * invQ; |
||
313 | deriv[1] = machine->DerivX[source->Index][1] * invQ; |
||
314 | deriv[2] = machine->DerivX[source->Index][2] * invQ; |
||
315 | deriv[3] = machine->DerivX[source->Index][3] * invQ; |
||
316 | } |
||
317 | else { |
||
318 | deriv[0] = machine->DerivY[source->Index][0] * invQ; |
||
319 | deriv[1] = machine->DerivY[source->Index][1] * invQ; |
||
320 | deriv[2] = machine->DerivY[source->Index][2] * invQ; |
||
321 | deriv[3] = machine->DerivY[source->Index][3] * invQ; |
||
322 | } |
||
323 | |||
324 | result[0] = deriv[GET_SWZ(source->Swizzle, 0)]; |
||
325 | result[1] = deriv[GET_SWZ(source->Swizzle, 1)]; |
||
326 | result[2] = deriv[GET_SWZ(source->Swizzle, 2)]; |
||
327 | result[3] = deriv[GET_SWZ(source->Swizzle, 3)]; |
||
328 | |||
329 | if (source->Abs) { |
||
330 | result[0] = FABSF(result[0]); |
||
331 | result[1] = FABSF(result[1]); |
||
332 | result[2] = FABSF(result[2]); |
||
333 | result[3] = FABSF(result[3]); |
||
334 | } |
||
335 | if (source->Negate) { |
||
336 | ASSERT(source->Negate == NEGATE_XYZW); |
||
337 | result[0] = -result[0]; |
||
338 | result[1] = -result[1]; |
||
339 | result[2] = -result[2]; |
||
340 | result[3] = -result[3]; |
||
341 | } |
||
342 | } |
||
343 | else { |
||
344 | ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0); |
||
345 | } |
||
346 | } |
||
347 | |||
348 | |||
349 | /** |
||
350 | * As above, but only return result[0] element. |
||
351 | */ |
||
352 | static void |
||
353 | fetch_vector1(const struct prog_src_register *source, |
||
354 | const struct gl_program_machine *machine, GLfloat result[4]) |
||
355 | { |
||
356 | const GLfloat *src = get_src_register_pointer(source, machine); |
||
357 | ASSERT(src); |
||
358 | |||
359 | result[0] = src[GET_SWZ(source->Swizzle, 0)]; |
||
360 | |||
361 | if (source->Abs) { |
||
362 | result[0] = FABSF(result[0]); |
||
363 | } |
||
364 | if (source->Negate) { |
||
365 | result[0] = -result[0]; |
||
366 | } |
||
367 | } |
||
368 | |||
369 | |||
370 | static GLuint |
||
371 | fetch_vector1ui(const struct prog_src_register *source, |
||
372 | const struct gl_program_machine *machine) |
||
373 | { |
||
374 | const GLuint *src = (GLuint *) get_src_register_pointer(source, machine); |
||
375 | return src[GET_SWZ(source->Swizzle, 0)]; |
||
376 | } |
||
377 | |||
378 | |||
379 | /** |
||
380 | * Fetch texel from texture. Use partial derivatives when possible. |
||
381 | */ |
||
382 | static INLINE void |
||
383 | fetch_texel(struct gl_context *ctx, |
||
384 | const struct gl_program_machine *machine, |
||
385 | const struct prog_instruction *inst, |
||
386 | const GLfloat texcoord[4], GLfloat lodBias, |
||
387 | GLfloat color[4]) |
||
388 | { |
||
389 | const GLuint unit = machine->Samplers[inst->TexSrcUnit]; |
||
390 | |||
391 | /* Note: we only have the right derivatives for fragment input attribs. |
||
392 | */ |
||
393 | if (machine->NumDeriv > 0 && |
||
394 | inst->SrcReg[0].File == PROGRAM_INPUT && |
||
395 | inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) { |
||
396 | /* simple texture fetch for which we should have derivatives */ |
||
397 | GLuint attr = inst->SrcReg[0].Index; |
||
398 | machine->FetchTexelDeriv(ctx, texcoord, |
||
399 | machine->DerivX[attr], |
||
400 | machine->DerivY[attr], |
||
401 | lodBias, unit, color); |
||
402 | } |
||
403 | else { |
||
404 | machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color); |
||
405 | } |
||
406 | } |
||
407 | |||
408 | |||
409 | /** |
||
410 | * Test value against zero and return GT, LT, EQ or UN if NaN. |
||
411 | */ |
||
412 | static INLINE GLuint |
||
413 | generate_cc(float value) |
||
414 | { |
||
415 | if (value != value) |
||
416 | return COND_UN; /* NaN */ |
||
417 | if (value > 0.0F) |
||
418 | return COND_GT; |
||
419 | if (value < 0.0F) |
||
420 | return COND_LT; |
||
421 | return COND_EQ; |
||
422 | } |
||
423 | |||
424 | |||
425 | /** |
||
426 | * Test if the ccMaskRule is satisfied by the given condition code. |
||
427 | * Used to mask destination writes according to the current condition code. |
||
428 | */ |
||
429 | static INLINE GLboolean |
||
430 | test_cc(GLuint condCode, GLuint ccMaskRule) |
||
431 | { |
||
432 | switch (ccMaskRule) { |
||
433 | case COND_EQ: return (condCode == COND_EQ); |
||
434 | case COND_NE: return (condCode != COND_EQ); |
||
435 | case COND_LT: return (condCode == COND_LT); |
||
436 | case COND_GE: return (condCode == COND_GT || condCode == COND_EQ); |
||
437 | case COND_LE: return (condCode == COND_LT || condCode == COND_EQ); |
||
438 | case COND_GT: return (condCode == COND_GT); |
||
439 | case COND_TR: return GL_TRUE; |
||
440 | case COND_FL: return GL_FALSE; |
||
441 | default: return GL_TRUE; |
||
442 | } |
||
443 | } |
||
444 | |||
445 | |||
446 | /** |
||
447 | * Evaluate the 4 condition codes against a predicate and return GL_TRUE |
||
448 | * or GL_FALSE to indicate result. |
||
449 | */ |
||
450 | static INLINE GLboolean |
||
451 | eval_condition(const struct gl_program_machine *machine, |
||
452 | const struct prog_instruction *inst) |
||
453 | { |
||
454 | const GLuint swizzle = inst->DstReg.CondSwizzle; |
||
455 | const GLuint condMask = inst->DstReg.CondMask; |
||
456 | if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) || |
||
457 | test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) || |
||
458 | test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) || |
||
459 | test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) { |
||
460 | return GL_TRUE; |
||
461 | } |
||
462 | else { |
||
463 | return GL_FALSE; |
||
464 | } |
||
465 | } |
||
466 | |||
467 | |||
468 | |||
469 | /** |
||
470 | * Store 4 floats into a register. Observe the instructions saturate and |
||
471 | * set-condition-code flags. |
||
472 | */ |
||
473 | static void |
||
474 | store_vector4(const struct prog_instruction *inst, |
||
475 | struct gl_program_machine *machine, const GLfloat value[4]) |
||
476 | { |
||
477 | const struct prog_dst_register *dstReg = &(inst->DstReg); |
||
478 | const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE; |
||
479 | GLuint writeMask = dstReg->WriteMask; |
||
480 | GLfloat clampedValue[4]; |
||
481 | GLfloat *dst = get_dst_register_pointer(dstReg, machine); |
||
482 | |||
483 | #if 0 |
||
484 | if (value[0] > 1.0e10 || |
||
485 | IS_INF_OR_NAN(value[0]) || |
||
486 | IS_INF_OR_NAN(value[1]) || |
||
487 | IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3])) |
||
488 | printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]); |
||
489 | #endif |
||
490 | |||
491 | if (clamp) { |
||
492 | clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F); |
||
493 | clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F); |
||
494 | clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F); |
||
495 | clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F); |
||
496 | value = clampedValue; |
||
497 | } |
||
498 | |||
499 | if (dstReg->CondMask != COND_TR) { |
||
500 | /* condition codes may turn off some writes */ |
||
501 | if (writeMask & WRITEMASK_X) { |
||
502 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)], |
||
503 | dstReg->CondMask)) |
||
504 | writeMask &= ~WRITEMASK_X; |
||
505 | } |
||
506 | if (writeMask & WRITEMASK_Y) { |
||
507 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)], |
||
508 | dstReg->CondMask)) |
||
509 | writeMask &= ~WRITEMASK_Y; |
||
510 | } |
||
511 | if (writeMask & WRITEMASK_Z) { |
||
512 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)], |
||
513 | dstReg->CondMask)) |
||
514 | writeMask &= ~WRITEMASK_Z; |
||
515 | } |
||
516 | if (writeMask & WRITEMASK_W) { |
||
517 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)], |
||
518 | dstReg->CondMask)) |
||
519 | writeMask &= ~WRITEMASK_W; |
||
520 | } |
||
521 | } |
||
522 | |||
523 | #ifdef NAN_CHECK |
||
524 | assert(!IS_INF_OR_NAN(value[0])); |
||
525 | assert(!IS_INF_OR_NAN(value[0])); |
||
526 | assert(!IS_INF_OR_NAN(value[0])); |
||
527 | assert(!IS_INF_OR_NAN(value[0])); |
||
528 | #endif |
||
529 | |||
530 | if (writeMask & WRITEMASK_X) |
||
531 | dst[0] = value[0]; |
||
532 | if (writeMask & WRITEMASK_Y) |
||
533 | dst[1] = value[1]; |
||
534 | if (writeMask & WRITEMASK_Z) |
||
535 | dst[2] = value[2]; |
||
536 | if (writeMask & WRITEMASK_W) |
||
537 | dst[3] = value[3]; |
||
538 | |||
539 | if (inst->CondUpdate) { |
||
540 | if (writeMask & WRITEMASK_X) |
||
541 | machine->CondCodes[0] = generate_cc(value[0]); |
||
542 | if (writeMask & WRITEMASK_Y) |
||
543 | machine->CondCodes[1] = generate_cc(value[1]); |
||
544 | if (writeMask & WRITEMASK_Z) |
||
545 | machine->CondCodes[2] = generate_cc(value[2]); |
||
546 | if (writeMask & WRITEMASK_W) |
||
547 | machine->CondCodes[3] = generate_cc(value[3]); |
||
548 | #if DEBUG_PROG |
||
549 | printf("CondCodes=(%s,%s,%s,%s) for:\n", |
||
550 | _mesa_condcode_string(machine->CondCodes[0]), |
||
551 | _mesa_condcode_string(machine->CondCodes[1]), |
||
552 | _mesa_condcode_string(machine->CondCodes[2]), |
||
553 | _mesa_condcode_string(machine->CondCodes[3])); |
||
554 | #endif |
||
555 | } |
||
556 | } |
||
557 | |||
558 | |||
559 | /** |
||
560 | * Store 4 uints into a register. Observe the set-condition-code flags. |
||
561 | */ |
||
562 | static void |
||
563 | store_vector4ui(const struct prog_instruction *inst, |
||
564 | struct gl_program_machine *machine, const GLuint value[4]) |
||
565 | { |
||
566 | const struct prog_dst_register *dstReg = &(inst->DstReg); |
||
567 | GLuint writeMask = dstReg->WriteMask; |
||
568 | GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine); |
||
569 | |||
570 | if (dstReg->CondMask != COND_TR) { |
||
571 | /* condition codes may turn off some writes */ |
||
572 | if (writeMask & WRITEMASK_X) { |
||
573 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)], |
||
574 | dstReg->CondMask)) |
||
575 | writeMask &= ~WRITEMASK_X; |
||
576 | } |
||
577 | if (writeMask & WRITEMASK_Y) { |
||
578 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)], |
||
579 | dstReg->CondMask)) |
||
580 | writeMask &= ~WRITEMASK_Y; |
||
581 | } |
||
582 | if (writeMask & WRITEMASK_Z) { |
||
583 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)], |
||
584 | dstReg->CondMask)) |
||
585 | writeMask &= ~WRITEMASK_Z; |
||
586 | } |
||
587 | if (writeMask & WRITEMASK_W) { |
||
588 | if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)], |
||
589 | dstReg->CondMask)) |
||
590 | writeMask &= ~WRITEMASK_W; |
||
591 | } |
||
592 | } |
||
593 | |||
594 | if (writeMask & WRITEMASK_X) |
||
595 | dst[0] = value[0]; |
||
596 | if (writeMask & WRITEMASK_Y) |
||
597 | dst[1] = value[1]; |
||
598 | if (writeMask & WRITEMASK_Z) |
||
599 | dst[2] = value[2]; |
||
600 | if (writeMask & WRITEMASK_W) |
||
601 | dst[3] = value[3]; |
||
602 | |||
603 | if (inst->CondUpdate) { |
||
604 | if (writeMask & WRITEMASK_X) |
||
605 | machine->CondCodes[0] = generate_cc((float)value[0]); |
||
606 | if (writeMask & WRITEMASK_Y) |
||
607 | machine->CondCodes[1] = generate_cc((float)value[1]); |
||
608 | if (writeMask & WRITEMASK_Z) |
||
609 | machine->CondCodes[2] = generate_cc((float)value[2]); |
||
610 | if (writeMask & WRITEMASK_W) |
||
611 | machine->CondCodes[3] = generate_cc((float)value[3]); |
||
612 | #if DEBUG_PROG |
||
613 | printf("CondCodes=(%s,%s,%s,%s) for:\n", |
||
614 | _mesa_condcode_string(machine->CondCodes[0]), |
||
615 | _mesa_condcode_string(machine->CondCodes[1]), |
||
616 | _mesa_condcode_string(machine->CondCodes[2]), |
||
617 | _mesa_condcode_string(machine->CondCodes[3])); |
||
618 | #endif |
||
619 | } |
||
620 | } |
||
621 | |||
622 | |||
623 | |||
624 | /** |
||
625 | * Execute the given vertex/fragment program. |
||
626 | * |
||
627 | * \param ctx rendering context |
||
628 | * \param program the program to execute |
||
629 | * \param machine machine state (must be initialized) |
||
630 | * \return GL_TRUE if program completed or GL_FALSE if program executed KIL. |
||
631 | */ |
||
632 | GLboolean |
||
633 | _mesa_execute_program(struct gl_context * ctx, |
||
634 | const struct gl_program *program, |
||
635 | struct gl_program_machine *machine) |
||
636 | { |
||
637 | const GLuint numInst = program->NumInstructions; |
||
638 | const GLuint maxExec = 10000; |
||
639 | GLuint pc, numExec = 0; |
||
640 | |||
641 | machine->CurProgram = program; |
||
642 | |||
643 | if (DEBUG_PROG) { |
||
644 | printf("execute program %u --------------------\n", program->Id); |
||
645 | } |
||
646 | |||
647 | if (program->Target == GL_VERTEX_PROGRAM_ARB) { |
||
648 | machine->EnvParams = ctx->VertexProgram.Parameters; |
||
649 | } |
||
650 | else { |
||
651 | machine->EnvParams = ctx->FragmentProgram.Parameters; |
||
652 | } |
||
653 | |||
654 | for (pc = 0; pc < numInst; pc++) { |
||
655 | const struct prog_instruction *inst = program->Instructions + pc; |
||
656 | |||
657 | if (DEBUG_PROG) { |
||
658 | _mesa_print_instruction(inst); |
||
659 | } |
||
660 | |||
661 | switch (inst->Opcode) { |
||
662 | case OPCODE_ABS: |
||
663 | { |
||
664 | GLfloat a[4], result[4]; |
||
665 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
666 | result[0] = FABSF(a[0]); |
||
667 | result[1] = FABSF(a[1]); |
||
668 | result[2] = FABSF(a[2]); |
||
669 | result[3] = FABSF(a[3]); |
||
670 | store_vector4(inst, machine, result); |
||
671 | } |
||
672 | break; |
||
673 | case OPCODE_ADD: |
||
674 | { |
||
675 | GLfloat a[4], b[4], result[4]; |
||
676 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
677 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
678 | result[0] = a[0] + b[0]; |
||
679 | result[1] = a[1] + b[1]; |
||
680 | result[2] = a[2] + b[2]; |
||
681 | result[3] = a[3] + b[3]; |
||
682 | store_vector4(inst, machine, result); |
||
683 | if (DEBUG_PROG) { |
||
684 | printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n", |
||
685 | result[0], result[1], result[2], result[3], |
||
686 | a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); |
||
687 | } |
||
688 | } |
||
689 | break; |
||
690 | case OPCODE_AND: /* bitwise AND */ |
||
691 | { |
||
692 | GLuint a[4], b[4], result[4]; |
||
693 | fetch_vector4ui(&inst->SrcReg[0], machine, a); |
||
694 | fetch_vector4ui(&inst->SrcReg[1], machine, b); |
||
695 | result[0] = a[0] & b[0]; |
||
696 | result[1] = a[1] & b[1]; |
||
697 | result[2] = a[2] & b[2]; |
||
698 | result[3] = a[3] & b[3]; |
||
699 | store_vector4ui(inst, machine, result); |
||
700 | } |
||
701 | break; |
||
702 | case OPCODE_ARL: |
||
703 | { |
||
704 | GLfloat t[4]; |
||
705 | fetch_vector4(&inst->SrcReg[0], machine, t); |
||
706 | machine->AddressReg[0][0] = IFLOOR(t[0]); |
||
707 | if (DEBUG_PROG) { |
||
708 | printf("ARL %d\n", machine->AddressReg[0][0]); |
||
709 | } |
||
710 | } |
||
711 | break; |
||
712 | case OPCODE_BGNLOOP: |
||
713 | /* no-op */ |
||
714 | ASSERT(program->Instructions[inst->BranchTarget].Opcode |
||
715 | == OPCODE_ENDLOOP); |
||
716 | break; |
||
717 | case OPCODE_ENDLOOP: |
||
718 | /* subtract 1 here since pc is incremented by for(pc) loop */ |
||
719 | ASSERT(program->Instructions[inst->BranchTarget].Opcode |
||
720 | == OPCODE_BGNLOOP); |
||
721 | pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */ |
||
722 | break; |
||
723 | case OPCODE_BGNSUB: /* begin subroutine */ |
||
724 | break; |
||
725 | case OPCODE_ENDSUB: /* end subroutine */ |
||
726 | break; |
||
727 | case OPCODE_BRA: /* branch (conditional) */ |
||
728 | if (eval_condition(machine, inst)) { |
||
729 | /* take branch */ |
||
730 | /* Subtract 1 here since we'll do pc++ below */ |
||
731 | pc = inst->BranchTarget - 1; |
||
732 | } |
||
733 | break; |
||
734 | case OPCODE_BRK: /* break out of loop (conditional) */ |
||
735 | ASSERT(program->Instructions[inst->BranchTarget].Opcode |
||
736 | == OPCODE_ENDLOOP); |
||
737 | if (eval_condition(machine, inst)) { |
||
738 | /* break out of loop */ |
||
739 | /* pc++ at end of for-loop will put us after the ENDLOOP inst */ |
||
740 | pc = inst->BranchTarget; |
||
741 | } |
||
742 | break; |
||
743 | case OPCODE_CONT: /* continue loop (conditional) */ |
||
744 | ASSERT(program->Instructions[inst->BranchTarget].Opcode |
||
745 | == OPCODE_ENDLOOP); |
||
746 | if (eval_condition(machine, inst)) { |
||
747 | /* continue at ENDLOOP */ |
||
748 | /* Subtract 1 here since we'll do pc++ at end of for-loop */ |
||
749 | pc = inst->BranchTarget - 1; |
||
750 | } |
||
751 | break; |
||
752 | case OPCODE_CAL: /* Call subroutine (conditional) */ |
||
753 | if (eval_condition(machine, inst)) { |
||
754 | /* call the subroutine */ |
||
755 | if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) { |
||
756 | return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ |
||
757 | } |
||
758 | machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */ |
||
759 | /* Subtract 1 here since we'll do pc++ at end of for-loop */ |
||
760 | pc = inst->BranchTarget - 1; |
||
761 | } |
||
762 | break; |
||
763 | case OPCODE_CMP: |
||
764 | { |
||
765 | GLfloat a[4], b[4], c[4], result[4]; |
||
766 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
767 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
768 | fetch_vector4(&inst->SrcReg[2], machine, c); |
||
769 | result[0] = a[0] < 0.0F ? b[0] : c[0]; |
||
770 | result[1] = a[1] < 0.0F ? b[1] : c[1]; |
||
771 | result[2] = a[2] < 0.0F ? b[2] : c[2]; |
||
772 | result[3] = a[3] < 0.0F ? b[3] : c[3]; |
||
773 | store_vector4(inst, machine, result); |
||
774 | if (DEBUG_PROG) { |
||
775 | printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n", |
||
776 | result[0], result[1], result[2], result[3], |
||
777 | a[0], a[1], a[2], a[3], |
||
778 | b[0], b[1], b[2], b[3], |
||
779 | c[0], c[1], c[2], c[3]); |
||
780 | } |
||
781 | } |
||
782 | break; |
||
783 | case OPCODE_COS: |
||
784 | { |
||
785 | GLfloat a[4], result[4]; |
||
786 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
787 | result[0] = result[1] = result[2] = result[3] |
||
788 | = (GLfloat) cos(a[0]); |
||
789 | store_vector4(inst, machine, result); |
||
790 | } |
||
791 | break; |
||
792 | case OPCODE_DDX: /* Partial derivative with respect to X */ |
||
793 | { |
||
794 | GLfloat result[4]; |
||
795 | fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine, |
||
796 | 'X', result); |
||
797 | store_vector4(inst, machine, result); |
||
798 | } |
||
799 | break; |
||
800 | case OPCODE_DDY: /* Partial derivative with respect to Y */ |
||
801 | { |
||
802 | GLfloat result[4]; |
||
803 | fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine, |
||
804 | 'Y', result); |
||
805 | store_vector4(inst, machine, result); |
||
806 | } |
||
807 | break; |
||
808 | case OPCODE_DP2: |
||
809 | { |
||
810 | GLfloat a[4], b[4], result[4]; |
||
811 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
812 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
813 | result[0] = result[1] = result[2] = result[3] = DOT2(a, b); |
||
814 | store_vector4(inst, machine, result); |
||
815 | if (DEBUG_PROG) { |
||
816 | printf("DP2 %g = (%g %g) . (%g %g)\n", |
||
817 | result[0], a[0], a[1], b[0], b[1]); |
||
818 | } |
||
819 | } |
||
820 | break; |
||
821 | case OPCODE_DP2A: |
||
822 | { |
||
823 | GLfloat a[4], b[4], c, result[4]; |
||
824 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
825 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
826 | fetch_vector1(&inst->SrcReg[1], machine, &c); |
||
827 | result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c; |
||
828 | store_vector4(inst, machine, result); |
||
829 | if (DEBUG_PROG) { |
||
830 | printf("DP2A %g = (%g %g) . (%g %g) + %g\n", |
||
831 | result[0], a[0], a[1], b[0], b[1], c); |
||
832 | } |
||
833 | } |
||
834 | break; |
||
835 | case OPCODE_DP3: |
||
836 | { |
||
837 | GLfloat a[4], b[4], result[4]; |
||
838 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
839 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
840 | result[0] = result[1] = result[2] = result[3] = DOT3(a, b); |
||
841 | store_vector4(inst, machine, result); |
||
842 | if (DEBUG_PROG) { |
||
843 | printf("DP3 %g = (%g %g %g) . (%g %g %g)\n", |
||
844 | result[0], a[0], a[1], a[2], b[0], b[1], b[2]); |
||
845 | } |
||
846 | } |
||
847 | break; |
||
848 | case OPCODE_DP4: |
||
849 | { |
||
850 | GLfloat a[4], b[4], result[4]; |
||
851 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
852 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
853 | result[0] = result[1] = result[2] = result[3] = DOT4(a, b); |
||
854 | store_vector4(inst, machine, result); |
||
855 | if (DEBUG_PROG) { |
||
856 | printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n", |
||
857 | result[0], a[0], a[1], a[2], a[3], |
||
858 | b[0], b[1], b[2], b[3]); |
||
859 | } |
||
860 | } |
||
861 | break; |
||
862 | case OPCODE_DPH: |
||
863 | { |
||
864 | GLfloat a[4], b[4], result[4]; |
||
865 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
866 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
867 | result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3]; |
||
868 | store_vector4(inst, machine, result); |
||
869 | } |
||
870 | break; |
||
871 | case OPCODE_DST: /* Distance vector */ |
||
872 | { |
||
873 | GLfloat a[4], b[4], result[4]; |
||
874 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
875 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
876 | result[0] = 1.0F; |
||
877 | result[1] = a[1] * b[1]; |
||
878 | result[2] = a[2]; |
||
879 | result[3] = b[3]; |
||
880 | store_vector4(inst, machine, result); |
||
881 | } |
||
882 | break; |
||
883 | case OPCODE_EXP: |
||
884 | { |
||
885 | GLfloat t[4], q[4], floor_t0; |
||
886 | fetch_vector1(&inst->SrcReg[0], machine, t); |
||
887 | floor_t0 = FLOORF(t[0]); |
||
888 | if (floor_t0 > FLT_MAX_EXP) { |
||
889 | SET_POS_INFINITY(q[0]); |
||
890 | SET_POS_INFINITY(q[2]); |
||
891 | } |
||
892 | else if (floor_t0 < FLT_MIN_EXP) { |
||
893 | q[0] = 0.0F; |
||
894 | q[2] = 0.0F; |
||
895 | } |
||
896 | else { |
||
897 | q[0] = LDEXPF(1.0, (int) floor_t0); |
||
898 | /* Note: GL_NV_vertex_program expects |
||
899 | * result.z = result.x * APPX(result.y) |
||
900 | * We do what the ARB extension says. |
||
901 | */ |
||
902 | q[2] = (GLfloat) pow(2.0, t[0]); |
||
903 | } |
||
904 | q[1] = t[0] - floor_t0; |
||
905 | q[3] = 1.0F; |
||
906 | store_vector4( inst, machine, q ); |
||
907 | } |
||
908 | break; |
||
909 | case OPCODE_EX2: /* Exponential base 2 */ |
||
910 | { |
||
911 | GLfloat a[4], result[4], val; |
||
912 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
913 | val = (GLfloat) pow(2.0, a[0]); |
||
914 | /* |
||
915 | if (IS_INF_OR_NAN(val)) |
||
916 | val = 1.0e10; |
||
917 | */ |
||
918 | result[0] = result[1] = result[2] = result[3] = val; |
||
919 | store_vector4(inst, machine, result); |
||
920 | } |
||
921 | break; |
||
922 | case OPCODE_FLR: |
||
923 | { |
||
924 | GLfloat a[4], result[4]; |
||
925 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
926 | result[0] = FLOORF(a[0]); |
||
927 | result[1] = FLOORF(a[1]); |
||
928 | result[2] = FLOORF(a[2]); |
||
929 | result[3] = FLOORF(a[3]); |
||
930 | store_vector4(inst, machine, result); |
||
931 | } |
||
932 | break; |
||
933 | case OPCODE_FRC: |
||
934 | { |
||
935 | GLfloat a[4], result[4]; |
||
936 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
937 | result[0] = a[0] - FLOORF(a[0]); |
||
938 | result[1] = a[1] - FLOORF(a[1]); |
||
939 | result[2] = a[2] - FLOORF(a[2]); |
||
940 | result[3] = a[3] - FLOORF(a[3]); |
||
941 | store_vector4(inst, machine, result); |
||
942 | } |
||
943 | break; |
||
944 | case OPCODE_IF: |
||
945 | { |
||
946 | GLboolean cond; |
||
947 | ASSERT(program->Instructions[inst->BranchTarget].Opcode |
||
948 | == OPCODE_ELSE || |
||
949 | program->Instructions[inst->BranchTarget].Opcode |
||
950 | == OPCODE_ENDIF); |
||
951 | /* eval condition */ |
||
952 | if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { |
||
953 | GLfloat a[4]; |
||
954 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
955 | cond = (a[0] != 0.0); |
||
956 | } |
||
957 | else { |
||
958 | cond = eval_condition(machine, inst); |
||
959 | } |
||
960 | if (DEBUG_PROG) { |
||
961 | printf("IF: %d\n", cond); |
||
962 | } |
||
963 | /* do if/else */ |
||
964 | if (cond) { |
||
965 | /* do if-clause (just continue execution) */ |
||
966 | } |
||
967 | else { |
||
968 | /* go to the instruction after ELSE or ENDIF */ |
||
969 | assert(inst->BranchTarget >= 0); |
||
970 | pc = inst->BranchTarget; |
||
971 | } |
||
972 | } |
||
973 | break; |
||
974 | case OPCODE_ELSE: |
||
975 | /* goto ENDIF */ |
||
976 | ASSERT(program->Instructions[inst->BranchTarget].Opcode |
||
977 | == OPCODE_ENDIF); |
||
978 | assert(inst->BranchTarget >= 0); |
||
979 | pc = inst->BranchTarget; |
||
980 | break; |
||
981 | case OPCODE_ENDIF: |
||
982 | /* nothing */ |
||
983 | break; |
||
984 | case OPCODE_KIL_NV: /* NV_f_p only (conditional) */ |
||
985 | if (eval_condition(machine, inst)) { |
||
986 | return GL_FALSE; |
||
987 | } |
||
988 | break; |
||
989 | case OPCODE_KIL: /* ARB_f_p only */ |
||
990 | { |
||
991 | GLfloat a[4]; |
||
992 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
993 | if (DEBUG_PROG) { |
||
994 | printf("KIL if (%g %g %g %g) <= 0.0\n", |
||
995 | a[0], a[1], a[2], a[3]); |
||
996 | } |
||
997 | |||
998 | if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) { |
||
999 | return GL_FALSE; |
||
1000 | } |
||
1001 | } |
||
1002 | break; |
||
1003 | case OPCODE_LG2: /* log base 2 */ |
||
1004 | { |
||
1005 | GLfloat a[4], result[4], val; |
||
1006 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1007 | /* The fast LOG2 macro doesn't meet the precision requirements. |
||
1008 | */ |
||
1009 | if (a[0] == 0.0F) { |
||
1010 | val = -FLT_MAX; |
||
1011 | } |
||
1012 | else { |
||
1013 | val = (float)(log(a[0]) * 1.442695F); |
||
1014 | } |
||
1015 | result[0] = result[1] = result[2] = result[3] = val; |
||
1016 | store_vector4(inst, machine, result); |
||
1017 | } |
||
1018 | break; |
||
1019 | case OPCODE_LIT: |
||
1020 | { |
||
1021 | const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */ |
||
1022 | GLfloat a[4], result[4]; |
||
1023 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1024 | a[0] = MAX2(a[0], 0.0F); |
||
1025 | a[1] = MAX2(a[1], 0.0F); |
||
1026 | /* XXX ARB version clamps a[3], NV version doesn't */ |
||
1027 | a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon)); |
||
1028 | result[0] = 1.0F; |
||
1029 | result[1] = a[0]; |
||
1030 | /* XXX we could probably just use pow() here */ |
||
1031 | if (a[0] > 0.0F) { |
||
1032 | if (a[1] == 0.0 && a[3] == 0.0) |
||
1033 | result[2] = 1.0F; |
||
1034 | else |
||
1035 | result[2] = (GLfloat) pow(a[1], a[3]); |
||
1036 | } |
||
1037 | else { |
||
1038 | result[2] = 0.0F; |
||
1039 | } |
||
1040 | result[3] = 1.0F; |
||
1041 | store_vector4(inst, machine, result); |
||
1042 | if (DEBUG_PROG) { |
||
1043 | printf("LIT (%g %g %g %g) : (%g %g %g %g)\n", |
||
1044 | result[0], result[1], result[2], result[3], |
||
1045 | a[0], a[1], a[2], a[3]); |
||
1046 | } |
||
1047 | } |
||
1048 | break; |
||
1049 | case OPCODE_LOG: |
||
1050 | { |
||
1051 | GLfloat t[4], q[4], abs_t0; |
||
1052 | fetch_vector1(&inst->SrcReg[0], machine, t); |
||
1053 | abs_t0 = FABSF(t[0]); |
||
1054 | if (abs_t0 != 0.0F) { |
||
1055 | /* Since we really can't handle infinite values on VMS |
||
1056 | * like other OSes we'll use __MAXFLOAT to represent |
||
1057 | * infinity. This may need some tweaking. |
||
1058 | */ |
||
1059 | #ifdef VMS |
||
1060 | if (abs_t0 == __MAXFLOAT) |
||
1061 | #else |
||
1062 | if (IS_INF_OR_NAN(abs_t0)) |
||
1063 | #endif |
||
1064 | { |
||
1065 | SET_POS_INFINITY(q[0]); |
||
1066 | q[1] = 1.0F; |
||
1067 | SET_POS_INFINITY(q[2]); |
||
1068 | } |
||
1069 | else { |
||
1070 | int exponent; |
||
1071 | GLfloat mantissa = FREXPF(t[0], &exponent); |
||
1072 | q[0] = (GLfloat) (exponent - 1); |
||
1073 | q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ |
||
1074 | |||
1075 | /* The fast LOG2 macro doesn't meet the precision |
||
1076 | * requirements. |
||
1077 | */ |
||
1078 | q[2] = (float)(log(t[0]) * 1.442695F); |
||
1079 | } |
||
1080 | } |
||
1081 | else { |
||
1082 | SET_NEG_INFINITY(q[0]); |
||
1083 | q[1] = 1.0F; |
||
1084 | SET_NEG_INFINITY(q[2]); |
||
1085 | } |
||
1086 | q[3] = 1.0; |
||
1087 | store_vector4(inst, machine, q); |
||
1088 | } |
||
1089 | break; |
||
1090 | case OPCODE_LRP: |
||
1091 | { |
||
1092 | GLfloat a[4], b[4], c[4], result[4]; |
||
1093 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1094 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1095 | fetch_vector4(&inst->SrcReg[2], machine, c); |
||
1096 | result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0]; |
||
1097 | result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1]; |
||
1098 | result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2]; |
||
1099 | result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3]; |
||
1100 | store_vector4(inst, machine, result); |
||
1101 | if (DEBUG_PROG) { |
||
1102 | printf("LRP (%g %g %g %g) = (%g %g %g %g), " |
||
1103 | "(%g %g %g %g), (%g %g %g %g)\n", |
||
1104 | result[0], result[1], result[2], result[3], |
||
1105 | a[0], a[1], a[2], a[3], |
||
1106 | b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); |
||
1107 | } |
||
1108 | } |
||
1109 | break; |
||
1110 | case OPCODE_MAD: |
||
1111 | { |
||
1112 | GLfloat a[4], b[4], c[4], result[4]; |
||
1113 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1114 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1115 | fetch_vector4(&inst->SrcReg[2], machine, c); |
||
1116 | result[0] = a[0] * b[0] + c[0]; |
||
1117 | result[1] = a[1] * b[1] + c[1]; |
||
1118 | result[2] = a[2] * b[2] + c[2]; |
||
1119 | result[3] = a[3] * b[3] + c[3]; |
||
1120 | store_vector4(inst, machine, result); |
||
1121 | if (DEBUG_PROG) { |
||
1122 | printf("MAD (%g %g %g %g) = (%g %g %g %g) * " |
||
1123 | "(%g %g %g %g) + (%g %g %g %g)\n", |
||
1124 | result[0], result[1], result[2], result[3], |
||
1125 | a[0], a[1], a[2], a[3], |
||
1126 | b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); |
||
1127 | } |
||
1128 | } |
||
1129 | break; |
||
1130 | case OPCODE_MAX: |
||
1131 | { |
||
1132 | GLfloat a[4], b[4], result[4]; |
||
1133 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1134 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1135 | result[0] = MAX2(a[0], b[0]); |
||
1136 | result[1] = MAX2(a[1], b[1]); |
||
1137 | result[2] = MAX2(a[2], b[2]); |
||
1138 | result[3] = MAX2(a[3], b[3]); |
||
1139 | store_vector4(inst, machine, result); |
||
1140 | if (DEBUG_PROG) { |
||
1141 | printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n", |
||
1142 | result[0], result[1], result[2], result[3], |
||
1143 | a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); |
||
1144 | } |
||
1145 | } |
||
1146 | break; |
||
1147 | case OPCODE_MIN: |
||
1148 | { |
||
1149 | GLfloat a[4], b[4], result[4]; |
||
1150 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1151 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1152 | result[0] = MIN2(a[0], b[0]); |
||
1153 | result[1] = MIN2(a[1], b[1]); |
||
1154 | result[2] = MIN2(a[2], b[2]); |
||
1155 | result[3] = MIN2(a[3], b[3]); |
||
1156 | store_vector4(inst, machine, result); |
||
1157 | } |
||
1158 | break; |
||
1159 | case OPCODE_MOV: |
||
1160 | { |
||
1161 | GLfloat result[4]; |
||
1162 | fetch_vector4(&inst->SrcReg[0], machine, result); |
||
1163 | store_vector4(inst, machine, result); |
||
1164 | if (DEBUG_PROG) { |
||
1165 | printf("MOV (%g %g %g %g)\n", |
||
1166 | result[0], result[1], result[2], result[3]); |
||
1167 | } |
||
1168 | } |
||
1169 | break; |
||
1170 | case OPCODE_MUL: |
||
1171 | { |
||
1172 | GLfloat a[4], b[4], result[4]; |
||
1173 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1174 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1175 | result[0] = a[0] * b[0]; |
||
1176 | result[1] = a[1] * b[1]; |
||
1177 | result[2] = a[2] * b[2]; |
||
1178 | result[3] = a[3] * b[3]; |
||
1179 | store_vector4(inst, machine, result); |
||
1180 | if (DEBUG_PROG) { |
||
1181 | printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n", |
||
1182 | result[0], result[1], result[2], result[3], |
||
1183 | a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); |
||
1184 | } |
||
1185 | } |
||
1186 | break; |
||
1187 | case OPCODE_NOISE1: |
||
1188 | { |
||
1189 | GLfloat a[4], result[4]; |
||
1190 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1191 | result[0] = |
||
1192 | result[1] = |
||
1193 | result[2] = |
||
1194 | result[3] = _mesa_noise1(a[0]); |
||
1195 | store_vector4(inst, machine, result); |
||
1196 | } |
||
1197 | break; |
||
1198 | case OPCODE_NOISE2: |
||
1199 | { |
||
1200 | GLfloat a[4], result[4]; |
||
1201 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1202 | result[0] = |
||
1203 | result[1] = |
||
1204 | result[2] = result[3] = _mesa_noise2(a[0], a[1]); |
||
1205 | store_vector4(inst, machine, result); |
||
1206 | } |
||
1207 | break; |
||
1208 | case OPCODE_NOISE3: |
||
1209 | { |
||
1210 | GLfloat a[4], result[4]; |
||
1211 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1212 | result[0] = |
||
1213 | result[1] = |
||
1214 | result[2] = |
||
1215 | result[3] = _mesa_noise3(a[0], a[1], a[2]); |
||
1216 | store_vector4(inst, machine, result); |
||
1217 | } |
||
1218 | break; |
||
1219 | case OPCODE_NOISE4: |
||
1220 | { |
||
1221 | GLfloat a[4], result[4]; |
||
1222 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1223 | result[0] = |
||
1224 | result[1] = |
||
1225 | result[2] = |
||
1226 | result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]); |
||
1227 | store_vector4(inst, machine, result); |
||
1228 | } |
||
1229 | break; |
||
1230 | case OPCODE_NOP: |
||
1231 | break; |
||
1232 | case OPCODE_NOT: /* bitwise NOT */ |
||
1233 | { |
||
1234 | GLuint a[4], result[4]; |
||
1235 | fetch_vector4ui(&inst->SrcReg[0], machine, a); |
||
1236 | result[0] = ~a[0]; |
||
1237 | result[1] = ~a[1]; |
||
1238 | result[2] = ~a[2]; |
||
1239 | result[3] = ~a[3]; |
||
1240 | store_vector4ui(inst, machine, result); |
||
1241 | } |
||
1242 | break; |
||
1243 | case OPCODE_NRM3: /* 3-component normalization */ |
||
1244 | { |
||
1245 | GLfloat a[4], result[4]; |
||
1246 | GLfloat tmp; |
||
1247 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1248 | tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2]; |
||
1249 | if (tmp != 0.0F) |
||
1250 | tmp = INV_SQRTF(tmp); |
||
1251 | result[0] = tmp * a[0]; |
||
1252 | result[1] = tmp * a[1]; |
||
1253 | result[2] = tmp * a[2]; |
||
1254 | result[3] = 0.0; /* undefined, but prevent valgrind warnings */ |
||
1255 | store_vector4(inst, machine, result); |
||
1256 | } |
||
1257 | break; |
||
1258 | case OPCODE_NRM4: /* 4-component normalization */ |
||
1259 | { |
||
1260 | GLfloat a[4], result[4]; |
||
1261 | GLfloat tmp; |
||
1262 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1263 | tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3]; |
||
1264 | if (tmp != 0.0F) |
||
1265 | tmp = INV_SQRTF(tmp); |
||
1266 | result[0] = tmp * a[0]; |
||
1267 | result[1] = tmp * a[1]; |
||
1268 | result[2] = tmp * a[2]; |
||
1269 | result[3] = tmp * a[3]; |
||
1270 | store_vector4(inst, machine, result); |
||
1271 | } |
||
1272 | break; |
||
1273 | case OPCODE_OR: /* bitwise OR */ |
||
1274 | { |
||
1275 | GLuint a[4], b[4], result[4]; |
||
1276 | fetch_vector4ui(&inst->SrcReg[0], machine, a); |
||
1277 | fetch_vector4ui(&inst->SrcReg[1], machine, b); |
||
1278 | result[0] = a[0] | b[0]; |
||
1279 | result[1] = a[1] | b[1]; |
||
1280 | result[2] = a[2] | b[2]; |
||
1281 | result[3] = a[3] | b[3]; |
||
1282 | store_vector4ui(inst, machine, result); |
||
1283 | } |
||
1284 | break; |
||
1285 | case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */ |
||
1286 | { |
||
1287 | GLfloat a[4]; |
||
1288 | GLuint result[4]; |
||
1289 | GLhalfNV hx, hy; |
||
1290 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1291 | hx = _mesa_float_to_half(a[0]); |
||
1292 | hy = _mesa_float_to_half(a[1]); |
||
1293 | result[0] = |
||
1294 | result[1] = |
||
1295 | result[2] = |
||
1296 | result[3] = hx | (hy << 16); |
||
1297 | store_vector4ui(inst, machine, result); |
||
1298 | } |
||
1299 | break; |
||
1300 | case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */ |
||
1301 | { |
||
1302 | GLfloat a[4]; |
||
1303 | GLuint result[4], usx, usy; |
||
1304 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1305 | a[0] = CLAMP(a[0], 0.0F, 1.0F); |
||
1306 | a[1] = CLAMP(a[1], 0.0F, 1.0F); |
||
1307 | usx = IROUND(a[0] * 65535.0F); |
||
1308 | usy = IROUND(a[1] * 65535.0F); |
||
1309 | result[0] = |
||
1310 | result[1] = |
||
1311 | result[2] = |
||
1312 | result[3] = usx | (usy << 16); |
||
1313 | store_vector4ui(inst, machine, result); |
||
1314 | } |
||
1315 | break; |
||
1316 | case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */ |
||
1317 | { |
||
1318 | GLfloat a[4]; |
||
1319 | GLuint result[4], ubx, uby, ubz, ubw; |
||
1320 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1321 | a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F); |
||
1322 | a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F); |
||
1323 | a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F); |
||
1324 | a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F); |
||
1325 | ubx = IROUND(127.0F * a[0] + 128.0F); |
||
1326 | uby = IROUND(127.0F * a[1] + 128.0F); |
||
1327 | ubz = IROUND(127.0F * a[2] + 128.0F); |
||
1328 | ubw = IROUND(127.0F * a[3] + 128.0F); |
||
1329 | result[0] = |
||
1330 | result[1] = |
||
1331 | result[2] = |
||
1332 | result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24); |
||
1333 | store_vector4ui(inst, machine, result); |
||
1334 | } |
||
1335 | break; |
||
1336 | case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */ |
||
1337 | { |
||
1338 | GLfloat a[4]; |
||
1339 | GLuint result[4], ubx, uby, ubz, ubw; |
||
1340 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1341 | a[0] = CLAMP(a[0], 0.0F, 1.0F); |
||
1342 | a[1] = CLAMP(a[1], 0.0F, 1.0F); |
||
1343 | a[2] = CLAMP(a[2], 0.0F, 1.0F); |
||
1344 | a[3] = CLAMP(a[3], 0.0F, 1.0F); |
||
1345 | ubx = IROUND(255.0F * a[0]); |
||
1346 | uby = IROUND(255.0F * a[1]); |
||
1347 | ubz = IROUND(255.0F * a[2]); |
||
1348 | ubw = IROUND(255.0F * a[3]); |
||
1349 | result[0] = |
||
1350 | result[1] = |
||
1351 | result[2] = |
||
1352 | result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24); |
||
1353 | store_vector4ui(inst, machine, result); |
||
1354 | } |
||
1355 | break; |
||
1356 | case OPCODE_POW: |
||
1357 | { |
||
1358 | GLfloat a[4], b[4], result[4]; |
||
1359 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1360 | fetch_vector1(&inst->SrcReg[1], machine, b); |
||
1361 | result[0] = result[1] = result[2] = result[3] |
||
1362 | = (GLfloat) pow(a[0], b[0]); |
||
1363 | store_vector4(inst, machine, result); |
||
1364 | } |
||
1365 | break; |
||
1366 | case OPCODE_RCC: /* clamped riciprocal */ |
||
1367 | { |
||
1368 | const float largest = 1.884467e+19, smallest = 5.42101e-20; |
||
1369 | GLfloat a[4], r, result[4]; |
||
1370 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1371 | if (DEBUG_PROG) { |
||
1372 | if (a[0] == 0) |
||
1373 | printf("RCC(0)\n"); |
||
1374 | else if (IS_INF_OR_NAN(a[0])) |
||
1375 | printf("RCC(inf)\n"); |
||
1376 | } |
||
1377 | if (a[0] == 1.0F) { |
||
1378 | r = 1.0F; |
||
1379 | } |
||
1380 | else { |
||
1381 | r = 1.0F / a[0]; |
||
1382 | } |
||
1383 | if (positive(r)) { |
||
1384 | if (r > largest) { |
||
1385 | r = largest; |
||
1386 | } |
||
1387 | else if (r < smallest) { |
||
1388 | r = smallest; |
||
1389 | } |
||
1390 | } |
||
1391 | else { |
||
1392 | if (r < -largest) { |
||
1393 | r = -largest; |
||
1394 | } |
||
1395 | else if (r > -smallest) { |
||
1396 | r = -smallest; |
||
1397 | } |
||
1398 | } |
||
1399 | result[0] = result[1] = result[2] = result[3] = r; |
||
1400 | store_vector4(inst, machine, result); |
||
1401 | } |
||
1402 | break; |
||
1403 | |||
1404 | case OPCODE_RCP: |
||
1405 | { |
||
1406 | GLfloat a[4], result[4]; |
||
1407 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1408 | if (DEBUG_PROG) { |
||
1409 | if (a[0] == 0) |
||
1410 | printf("RCP(0)\n"); |
||
1411 | else if (IS_INF_OR_NAN(a[0])) |
||
1412 | printf("RCP(inf)\n"); |
||
1413 | } |
||
1414 | result[0] = result[1] = result[2] = result[3] = 1.0F / a[0]; |
||
1415 | store_vector4(inst, machine, result); |
||
1416 | } |
||
1417 | break; |
||
1418 | case OPCODE_RET: /* return from subroutine (conditional) */ |
||
1419 | if (eval_condition(machine, inst)) { |
||
1420 | if (machine->StackDepth == 0) { |
||
1421 | return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ |
||
1422 | } |
||
1423 | /* subtract one because of pc++ in the for loop */ |
||
1424 | pc = machine->CallStack[--machine->StackDepth] - 1; |
||
1425 | } |
||
1426 | break; |
||
1427 | case OPCODE_RFL: /* reflection vector */ |
||
1428 | { |
||
1429 | GLfloat axis[4], dir[4], result[4], tmpX, tmpW; |
||
1430 | fetch_vector4(&inst->SrcReg[0], machine, axis); |
||
1431 | fetch_vector4(&inst->SrcReg[1], machine, dir); |
||
1432 | tmpW = DOT3(axis, axis); |
||
1433 | tmpX = (2.0F * DOT3(axis, dir)) / tmpW; |
||
1434 | result[0] = tmpX * axis[0] - dir[0]; |
||
1435 | result[1] = tmpX * axis[1] - dir[1]; |
||
1436 | result[2] = tmpX * axis[2] - dir[2]; |
||
1437 | /* result[3] is never written! XXX enforce in parser! */ |
||
1438 | store_vector4(inst, machine, result); |
||
1439 | } |
||
1440 | break; |
||
1441 | case OPCODE_RSQ: /* 1 / sqrt() */ |
||
1442 | { |
||
1443 | GLfloat a[4], result[4]; |
||
1444 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1445 | a[0] = FABSF(a[0]); |
||
1446 | result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]); |
||
1447 | store_vector4(inst, machine, result); |
||
1448 | if (DEBUG_PROG) { |
||
1449 | printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]); |
||
1450 | } |
||
1451 | } |
||
1452 | break; |
||
1453 | case OPCODE_SCS: /* sine and cos */ |
||
1454 | { |
||
1455 | GLfloat a[4], result[4]; |
||
1456 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1457 | result[0] = (GLfloat) cos(a[0]); |
||
1458 | result[1] = (GLfloat) sin(a[0]); |
||
1459 | result[2] = 0.0; /* undefined! */ |
||
1460 | result[3] = 0.0; /* undefined! */ |
||
1461 | store_vector4(inst, machine, result); |
||
1462 | } |
||
1463 | break; |
||
1464 | case OPCODE_SEQ: /* set on equal */ |
||
1465 | { |
||
1466 | GLfloat a[4], b[4], result[4]; |
||
1467 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1468 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1469 | result[0] = (a[0] == b[0]) ? 1.0F : 0.0F; |
||
1470 | result[1] = (a[1] == b[1]) ? 1.0F : 0.0F; |
||
1471 | result[2] = (a[2] == b[2]) ? 1.0F : 0.0F; |
||
1472 | result[3] = (a[3] == b[3]) ? 1.0F : 0.0F; |
||
1473 | store_vector4(inst, machine, result); |
||
1474 | if (DEBUG_PROG) { |
||
1475 | printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n", |
||
1476 | result[0], result[1], result[2], result[3], |
||
1477 | a[0], a[1], a[2], a[3], |
||
1478 | b[0], b[1], b[2], b[3]); |
||
1479 | } |
||
1480 | } |
||
1481 | break; |
||
1482 | case OPCODE_SFL: /* set false, operands ignored */ |
||
1483 | { |
||
1484 | static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; |
||
1485 | store_vector4(inst, machine, result); |
||
1486 | } |
||
1487 | break; |
||
1488 | case OPCODE_SGE: /* set on greater or equal */ |
||
1489 | { |
||
1490 | GLfloat a[4], b[4], result[4]; |
||
1491 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1492 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1493 | result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F; |
||
1494 | result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F; |
||
1495 | result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F; |
||
1496 | result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F; |
||
1497 | store_vector4(inst, machine, result); |
||
1498 | if (DEBUG_PROG) { |
||
1499 | printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n", |
||
1500 | result[0], result[1], result[2], result[3], |
||
1501 | a[0], a[1], a[2], a[3], |
||
1502 | b[0], b[1], b[2], b[3]); |
||
1503 | } |
||
1504 | } |
||
1505 | break; |
||
1506 | case OPCODE_SGT: /* set on greater */ |
||
1507 | { |
||
1508 | GLfloat a[4], b[4], result[4]; |
||
1509 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1510 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1511 | result[0] = (a[0] > b[0]) ? 1.0F : 0.0F; |
||
1512 | result[1] = (a[1] > b[1]) ? 1.0F : 0.0F; |
||
1513 | result[2] = (a[2] > b[2]) ? 1.0F : 0.0F; |
||
1514 | result[3] = (a[3] > b[3]) ? 1.0F : 0.0F; |
||
1515 | store_vector4(inst, machine, result); |
||
1516 | if (DEBUG_PROG) { |
||
1517 | printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n", |
||
1518 | result[0], result[1], result[2], result[3], |
||
1519 | a[0], a[1], a[2], a[3], |
||
1520 | b[0], b[1], b[2], b[3]); |
||
1521 | } |
||
1522 | } |
||
1523 | break; |
||
1524 | case OPCODE_SIN: |
||
1525 | { |
||
1526 | GLfloat a[4], result[4]; |
||
1527 | fetch_vector1(&inst->SrcReg[0], machine, a); |
||
1528 | result[0] = result[1] = result[2] = result[3] |
||
1529 | = (GLfloat) sin(a[0]); |
||
1530 | store_vector4(inst, machine, result); |
||
1531 | } |
||
1532 | break; |
||
1533 | case OPCODE_SLE: /* set on less or equal */ |
||
1534 | { |
||
1535 | GLfloat a[4], b[4], result[4]; |
||
1536 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1537 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1538 | result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F; |
||
1539 | result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F; |
||
1540 | result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F; |
||
1541 | result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F; |
||
1542 | store_vector4(inst, machine, result); |
||
1543 | if (DEBUG_PROG) { |
||
1544 | printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n", |
||
1545 | result[0], result[1], result[2], result[3], |
||
1546 | a[0], a[1], a[2], a[3], |
||
1547 | b[0], b[1], b[2], b[3]); |
||
1548 | } |
||
1549 | } |
||
1550 | break; |
||
1551 | case OPCODE_SLT: /* set on less */ |
||
1552 | { |
||
1553 | GLfloat a[4], b[4], result[4]; |
||
1554 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1555 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1556 | result[0] = (a[0] < b[0]) ? 1.0F : 0.0F; |
||
1557 | result[1] = (a[1] < b[1]) ? 1.0F : 0.0F; |
||
1558 | result[2] = (a[2] < b[2]) ? 1.0F : 0.0F; |
||
1559 | result[3] = (a[3] < b[3]) ? 1.0F : 0.0F; |
||
1560 | store_vector4(inst, machine, result); |
||
1561 | if (DEBUG_PROG) { |
||
1562 | printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n", |
||
1563 | result[0], result[1], result[2], result[3], |
||
1564 | a[0], a[1], a[2], a[3], |
||
1565 | b[0], b[1], b[2], b[3]); |
||
1566 | } |
||
1567 | } |
||
1568 | break; |
||
1569 | case OPCODE_SNE: /* set on not equal */ |
||
1570 | { |
||
1571 | GLfloat a[4], b[4], result[4]; |
||
1572 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1573 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1574 | result[0] = (a[0] != b[0]) ? 1.0F : 0.0F; |
||
1575 | result[1] = (a[1] != b[1]) ? 1.0F : 0.0F; |
||
1576 | result[2] = (a[2] != b[2]) ? 1.0F : 0.0F; |
||
1577 | result[3] = (a[3] != b[3]) ? 1.0F : 0.0F; |
||
1578 | store_vector4(inst, machine, result); |
||
1579 | if (DEBUG_PROG) { |
||
1580 | printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n", |
||
1581 | result[0], result[1], result[2], result[3], |
||
1582 | a[0], a[1], a[2], a[3], |
||
1583 | b[0], b[1], b[2], b[3]); |
||
1584 | } |
||
1585 | } |
||
1586 | break; |
||
1587 | case OPCODE_SSG: /* set sign (-1, 0 or +1) */ |
||
1588 | { |
||
1589 | GLfloat a[4], result[4]; |
||
1590 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1591 | result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F)); |
||
1592 | result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F)); |
||
1593 | result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F)); |
||
1594 | result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F)); |
||
1595 | store_vector4(inst, machine, result); |
||
1596 | } |
||
1597 | break; |
||
1598 | case OPCODE_STR: /* set true, operands ignored */ |
||
1599 | { |
||
1600 | static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F }; |
||
1601 | store_vector4(inst, machine, result); |
||
1602 | } |
||
1603 | break; |
||
1604 | case OPCODE_SUB: |
||
1605 | { |
||
1606 | GLfloat a[4], b[4], result[4]; |
||
1607 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1608 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1609 | result[0] = a[0] - b[0]; |
||
1610 | result[1] = a[1] - b[1]; |
||
1611 | result[2] = a[2] - b[2]; |
||
1612 | result[3] = a[3] - b[3]; |
||
1613 | store_vector4(inst, machine, result); |
||
1614 | if (DEBUG_PROG) { |
||
1615 | printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n", |
||
1616 | result[0], result[1], result[2], result[3], |
||
1617 | a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); |
||
1618 | } |
||
1619 | } |
||
1620 | break; |
||
1621 | case OPCODE_SWZ: /* extended swizzle */ |
||
1622 | { |
||
1623 | const struct prog_src_register *source = &inst->SrcReg[0]; |
||
1624 | const GLfloat *src = get_src_register_pointer(source, machine); |
||
1625 | GLfloat result[4]; |
||
1626 | GLuint i; |
||
1627 | for (i = 0; i < 4; i++) { |
||
1628 | const GLuint swz = GET_SWZ(source->Swizzle, i); |
||
1629 | if (swz == SWIZZLE_ZERO) |
||
1630 | result[i] = 0.0; |
||
1631 | else if (swz == SWIZZLE_ONE) |
||
1632 | result[i] = 1.0; |
||
1633 | else { |
||
1634 | ASSERT(swz >= 0); |
||
1635 | ASSERT(swz <= 3); |
||
1636 | result[i] = src[swz]; |
||
1637 | } |
||
1638 | if (source->Negate & (1 << i)) |
||
1639 | result[i] = -result[i]; |
||
1640 | } |
||
1641 | store_vector4(inst, machine, result); |
||
1642 | } |
||
1643 | break; |
||
1644 | case OPCODE_TEX: /* Both ARB and NV frag prog */ |
||
1645 | /* Simple texel lookup */ |
||
1646 | { |
||
1647 | GLfloat texcoord[4], color[4]; |
||
1648 | fetch_vector4(&inst->SrcReg[0], machine, texcoord); |
||
1649 | |||
1650 | fetch_texel(ctx, machine, inst, texcoord, 0.0, color); |
||
1651 | |||
1652 | if (DEBUG_PROG) { |
||
1653 | printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n", |
||
1654 | color[0], color[1], color[2], color[3], |
||
1655 | inst->TexSrcUnit, |
||
1656 | texcoord[0], texcoord[1], texcoord[2], texcoord[3]); |
||
1657 | } |
||
1658 | store_vector4(inst, machine, color); |
||
1659 | } |
||
1660 | break; |
||
1661 | case OPCODE_TXB: /* GL_ARB_fragment_program only */ |
||
1662 | /* Texel lookup with LOD bias */ |
||
1663 | { |
||
1664 | GLfloat texcoord[4], color[4], lodBias; |
||
1665 | |||
1666 | fetch_vector4(&inst->SrcReg[0], machine, texcoord); |
||
1667 | |||
1668 | /* texcoord[3] is the bias to add to lambda */ |
||
1669 | lodBias = texcoord[3]; |
||
1670 | |||
1671 | fetch_texel(ctx, machine, inst, texcoord, lodBias, color); |
||
1672 | |||
1673 | store_vector4(inst, machine, color); |
||
1674 | } |
||
1675 | break; |
||
1676 | case OPCODE_TXD: /* GL_NV_fragment_program only */ |
||
1677 | /* Texture lookup w/ partial derivatives for LOD */ |
||
1678 | { |
||
1679 | GLfloat texcoord[4], dtdx[4], dtdy[4], color[4]; |
||
1680 | fetch_vector4(&inst->SrcReg[0], machine, texcoord); |
||
1681 | fetch_vector4(&inst->SrcReg[1], machine, dtdx); |
||
1682 | fetch_vector4(&inst->SrcReg[2], machine, dtdy); |
||
1683 | machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy, |
||
1684 | 0.0, /* lodBias */ |
||
1685 | inst->TexSrcUnit, color); |
||
1686 | store_vector4(inst, machine, color); |
||
1687 | } |
||
1688 | break; |
||
1689 | case OPCODE_TXL: |
||
1690 | /* Texel lookup with explicit LOD */ |
||
1691 | { |
||
1692 | GLfloat texcoord[4], color[4], lod; |
||
1693 | |||
1694 | fetch_vector4(&inst->SrcReg[0], machine, texcoord); |
||
1695 | |||
1696 | /* texcoord[3] is the LOD */ |
||
1697 | lod = texcoord[3]; |
||
1698 | |||
1699 | machine->FetchTexelLod(ctx, texcoord, lod, |
||
1700 | machine->Samplers[inst->TexSrcUnit], color); |
||
1701 | |||
1702 | store_vector4(inst, machine, color); |
||
1703 | } |
||
1704 | break; |
||
1705 | case OPCODE_TXP: /* GL_ARB_fragment_program only */ |
||
1706 | /* Texture lookup w/ projective divide */ |
||
1707 | { |
||
1708 | GLfloat texcoord[4], color[4]; |
||
1709 | |||
1710 | fetch_vector4(&inst->SrcReg[0], machine, texcoord); |
||
1711 | /* Not so sure about this test - if texcoord[3] is |
||
1712 | * zero, we'd probably be fine except for an ASSERT in |
||
1713 | * IROUND_POS() which gets triggered by the inf values created. |
||
1714 | */ |
||
1715 | if (texcoord[3] != 0.0) { |
||
1716 | texcoord[0] /= texcoord[3]; |
||
1717 | texcoord[1] /= texcoord[3]; |
||
1718 | texcoord[2] /= texcoord[3]; |
||
1719 | } |
||
1720 | |||
1721 | fetch_texel(ctx, machine, inst, texcoord, 0.0, color); |
||
1722 | |||
1723 | store_vector4(inst, machine, color); |
||
1724 | } |
||
1725 | break; |
||
1726 | case OPCODE_TXP_NV: /* GL_NV_fragment_program only */ |
||
1727 | /* Texture lookup w/ projective divide, as above, but do not |
||
1728 | * do the divide by w if sampling from a cube map. |
||
1729 | */ |
||
1730 | { |
||
1731 | GLfloat texcoord[4], color[4]; |
||
1732 | |||
1733 | fetch_vector4(&inst->SrcReg[0], machine, texcoord); |
||
1734 | if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX && |
||
1735 | texcoord[3] != 0.0) { |
||
1736 | texcoord[0] /= texcoord[3]; |
||
1737 | texcoord[1] /= texcoord[3]; |
||
1738 | texcoord[2] /= texcoord[3]; |
||
1739 | } |
||
1740 | |||
1741 | fetch_texel(ctx, machine, inst, texcoord, 0.0, color); |
||
1742 | |||
1743 | store_vector4(inst, machine, color); |
||
1744 | } |
||
1745 | break; |
||
1746 | case OPCODE_TRUNC: /* truncate toward zero */ |
||
1747 | { |
||
1748 | GLfloat a[4], result[4]; |
||
1749 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1750 | result[0] = (GLfloat) (GLint) a[0]; |
||
1751 | result[1] = (GLfloat) (GLint) a[1]; |
||
1752 | result[2] = (GLfloat) (GLint) a[2]; |
||
1753 | result[3] = (GLfloat) (GLint) a[3]; |
||
1754 | store_vector4(inst, machine, result); |
||
1755 | } |
||
1756 | break; |
||
1757 | case OPCODE_UP2H: /* unpack two 16-bit floats */ |
||
1758 | { |
||
1759 | const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); |
||
1760 | GLfloat result[4]; |
||
1761 | GLushort hx, hy; |
||
1762 | hx = raw & 0xffff; |
||
1763 | hy = raw >> 16; |
||
1764 | result[0] = result[2] = _mesa_half_to_float(hx); |
||
1765 | result[1] = result[3] = _mesa_half_to_float(hy); |
||
1766 | store_vector4(inst, machine, result); |
||
1767 | } |
||
1768 | break; |
||
1769 | case OPCODE_UP2US: /* unpack two GLushorts */ |
||
1770 | { |
||
1771 | const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); |
||
1772 | GLfloat result[4]; |
||
1773 | GLushort usx, usy; |
||
1774 | usx = raw & 0xffff; |
||
1775 | usy = raw >> 16; |
||
1776 | result[0] = result[2] = usx * (1.0f / 65535.0f); |
||
1777 | result[1] = result[3] = usy * (1.0f / 65535.0f); |
||
1778 | store_vector4(inst, machine, result); |
||
1779 | } |
||
1780 | break; |
||
1781 | case OPCODE_UP4B: /* unpack four GLbytes */ |
||
1782 | { |
||
1783 | const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); |
||
1784 | GLfloat result[4]; |
||
1785 | result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F; |
||
1786 | result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F; |
||
1787 | result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F; |
||
1788 | result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F; |
||
1789 | store_vector4(inst, machine, result); |
||
1790 | } |
||
1791 | break; |
||
1792 | case OPCODE_UP4UB: /* unpack four GLubytes */ |
||
1793 | { |
||
1794 | const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); |
||
1795 | GLfloat result[4]; |
||
1796 | result[0] = ((raw >> 0) & 0xff) / 255.0F; |
||
1797 | result[1] = ((raw >> 8) & 0xff) / 255.0F; |
||
1798 | result[2] = ((raw >> 16) & 0xff) / 255.0F; |
||
1799 | result[3] = ((raw >> 24) & 0xff) / 255.0F; |
||
1800 | store_vector4(inst, machine, result); |
||
1801 | } |
||
1802 | break; |
||
1803 | case OPCODE_XOR: /* bitwise XOR */ |
||
1804 | { |
||
1805 | GLuint a[4], b[4], result[4]; |
||
1806 | fetch_vector4ui(&inst->SrcReg[0], machine, a); |
||
1807 | fetch_vector4ui(&inst->SrcReg[1], machine, b); |
||
1808 | result[0] = a[0] ^ b[0]; |
||
1809 | result[1] = a[1] ^ b[1]; |
||
1810 | result[2] = a[2] ^ b[2]; |
||
1811 | result[3] = a[3] ^ b[3]; |
||
1812 | store_vector4ui(inst, machine, result); |
||
1813 | } |
||
1814 | break; |
||
1815 | case OPCODE_XPD: /* cross product */ |
||
1816 | { |
||
1817 | GLfloat a[4], b[4], result[4]; |
||
1818 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1819 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1820 | result[0] = a[1] * b[2] - a[2] * b[1]; |
||
1821 | result[1] = a[2] * b[0] - a[0] * b[2]; |
||
1822 | result[2] = a[0] * b[1] - a[1] * b[0]; |
||
1823 | result[3] = 1.0; |
||
1824 | store_vector4(inst, machine, result); |
||
1825 | if (DEBUG_PROG) { |
||
1826 | printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n", |
||
1827 | result[0], result[1], result[2], result[3], |
||
1828 | a[0], a[1], a[2], b[0], b[1], b[2]); |
||
1829 | } |
||
1830 | } |
||
1831 | break; |
||
1832 | case OPCODE_X2D: /* 2-D matrix transform */ |
||
1833 | { |
||
1834 | GLfloat a[4], b[4], c[4], result[4]; |
||
1835 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1836 | fetch_vector4(&inst->SrcReg[1], machine, b); |
||
1837 | fetch_vector4(&inst->SrcReg[2], machine, c); |
||
1838 | result[0] = a[0] + b[0] * c[0] + b[1] * c[1]; |
||
1839 | result[1] = a[1] + b[0] * c[2] + b[1] * c[3]; |
||
1840 | result[2] = a[2] + b[0] * c[0] + b[1] * c[1]; |
||
1841 | result[3] = a[3] + b[0] * c[2] + b[1] * c[3]; |
||
1842 | store_vector4(inst, machine, result); |
||
1843 | } |
||
1844 | break; |
||
1845 | case OPCODE_PRINT: |
||
1846 | { |
||
1847 | if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { |
||
1848 | GLfloat a[4]; |
||
1849 | fetch_vector4(&inst->SrcReg[0], machine, a); |
||
1850 | printf("%s%g, %g, %g, %g\n", (const char *) inst->Data, |
||
1851 | a[0], a[1], a[2], a[3]); |
||
1852 | } |
||
1853 | else { |
||
1854 | printf("%s\n", (const char *) inst->Data); |
||
1855 | } |
||
1856 | } |
||
1857 | break; |
||
1858 | case OPCODE_END: |
||
1859 | return GL_TRUE; |
||
1860 | default: |
||
1861 | _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program", |
||
1862 | inst->Opcode); |
||
1863 | return GL_TRUE; /* return value doesn't matter */ |
||
1864 | } |
||
1865 | |||
1866 | numExec++; |
||
1867 | if (numExec > maxExec) { |
||
1868 | static GLboolean reported = GL_FALSE; |
||
1869 | if (!reported) { |
||
1870 | _mesa_problem(ctx, "Infinite loop detected in fragment program"); |
||
1871 | reported = GL_TRUE; |
||
1872 | } |
||
1873 | return GL_TRUE; |
||
1874 | } |
||
1875 | |||
1876 | } /* for pc */ |
||
1877 | |||
1878 | return GL_TRUE; |
||
1879 | }><>=>>>>>>>>>>>=>=>=>=>=>>>><>><>><>><>><>><>><>><>>>>>=>>>>>>>>>>=>=>=>=>=>=>=>=>>> |