WebSVN – Kolibri OS – Blame – /programs/develop/libraries/Mesa/src/mesa/program/prog_execute.c

Rev	Author	Line No.	Line
1901	serge	1	/*
		2	* Mesa 3-D graphics library
		3	* Version: 7.3
		4	*
		5	* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
		6	*
		7	* Permission is hereby granted, free of charge, to any person obtaining a
		8	* copy of this software and associated documentation files (the "Software"),
		9	* to deal in the Software without restriction, including without limitation
		10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		11	* and/or sell copies of the Software, and to permit persons to whom the
		12	* Software is furnished to do so, subject to the following conditions:
		13	*
		14	* The above copyright notice and this permission notice shall be included
		15	* in all copies or substantial portions of the Software.
		16	*
		17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
		18	* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		20	* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
		21	* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
		22	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		23	*/
		24
		25	/**
		26	* \file prog_execute.c
		27	* Software interpreter for vertex/fragment programs.
		28	* \author Brian Paul
		29	*/
		30
		31	/*
		32	* NOTE: we do everything in single-precision floating point; we don't
		33	* currently observe the single/half/fixed-precision qualifiers.
		34	*
		35	*/
		36
		37
		38	#include "main/glheader.h"
		39	#include "main/colormac.h"
		40	#include "main/macros.h"
		41	#include "prog_execute.h"
		42	#include "prog_instruction.h"
		43	#include "prog_parameter.h"
		44	#include "prog_print.h"
		45	#include "prog_noise.h"
		46
		47
		48	/* debug predicate */
		49	#define DEBUG_PROG 0
		50
		51
		52	/**
		53	* Set x to positive or negative infinity.
		54	*/
		55	#if defined(USE_IEEE) \|\| defined(_WIN32)
		56	#define SET_POS_INFINITY(x) \
		57	do { \
		58	fi_type fi; \
		59	fi.i = 0x7F800000; \
		60	x = fi.f; \
		61	} while (0)
		62	#define SET_NEG_INFINITY(x) \
		63	do { \
		64	fi_type fi; \
		65	fi.i = 0xFF800000; \
		66	x = fi.f; \
		67	} while (0)
		68	#elif defined(VMS)
		69	#define SET_POS_INFINITY(x) x = __MAXFLOAT
		70	#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
		71	#else
		72	#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
		73	#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
		74	#endif
		75
		76	#define SET_FLOAT_BITS(x, bits) ((fi_type ) (void ) &(x))->i = bits
		77
		78
		79	static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
		80
		81
		82
		83	/**
		84	* Return TRUE for +0 and other positive values, FALSE otherwise.
		85	* Used for RCC opcode.
		86	*/
		87	static INLINE GLboolean
		88	positive(float x)
		89	{
		90	fi_type fi;
		91	fi.f = x;
		92	if (fi.i & 0x80000000)
		93	return GL_FALSE;
		94	return GL_TRUE;
		95	}
		96
		97
		98
		99	/**
		100	* Return a pointer to the 4-element float vector specified by the given
		101	* source register.
		102	*/
		103	static INLINE const GLfloat *
		104	get_src_register_pointer(const struct prog_src_register *source,
		105	const struct gl_program_machine *machine)
		106	{
		107	const struct gl_program *prog = machine->CurProgram;
		108	GLint reg = source->Index;
		109
		110	if (source->RelAddr) {
		111	/* add address register value to src index/offset */
		112	reg += machine->AddressReg[0][0];
		113	if (reg < 0) {
		114	return ZeroVec;
		115	}
		116	}
		117
		118	switch (source->File) {
		119	case PROGRAM_TEMPORARY:
		120	if (reg >= MAX_PROGRAM_TEMPS)
		121	return ZeroVec;
		122	return machine->Temporaries[reg];
		123
		124	case PROGRAM_INPUT:
		125	if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
		126	if (reg >= VERT_ATTRIB_MAX)
		127	return ZeroVec;
		128	return machine->VertAttribs[reg];
		129	}
		130	else {
		131	if (reg >= FRAG_ATTRIB_MAX)
		132	return ZeroVec;
		133	return machine->Attribs[reg][machine->CurElement];
		134	}
		135
		136	case PROGRAM_OUTPUT:
		137	if (reg >= MAX_PROGRAM_OUTPUTS)
		138	return ZeroVec;
		139	return machine->Outputs[reg];
		140
		141	case PROGRAM_LOCAL_PARAM:
		142	if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
		143	return ZeroVec;
		144	return machine->CurProgram->LocalParams[reg];
		145
		146	case PROGRAM_ENV_PARAM:
		147	if (reg >= MAX_PROGRAM_ENV_PARAMS)
		148	return ZeroVec;
		149	return machine->EnvParams[reg];
		150
		151	case PROGRAM_STATE_VAR:
		152	/* Fallthrough */
		153	case PROGRAM_CONSTANT:
		154	/* Fallthrough */
		155	case PROGRAM_UNIFORM:
		156	/* Fallthrough */
		157	case PROGRAM_NAMED_PARAM:
		158	if (reg >= (GLint) prog->Parameters->NumParameters)
		159	return ZeroVec;
		160	return prog->Parameters->ParameterValues[reg];
		161
		162	default:
		163	_mesa_problem(NULL,
		164	"Invalid src register file %d in get_src_register_pointer()",
		165	source->File);
		166	return NULL;
		167	}
		168	}
		169
		170
		171	/**
		172	* Return a pointer to the 4-element float vector specified by the given
		173	* destination register.
		174	*/
		175	static INLINE GLfloat *
		176	get_dst_register_pointer(const struct prog_dst_register *dest,
		177	struct gl_program_machine *machine)
		178	{
		179	static GLfloat dummyReg[4];
		180	GLint reg = dest->Index;
		181
		182	if (dest->RelAddr) {
		183	/* add address register value to src index/offset */
		184	reg += machine->AddressReg[0][0];
		185	if (reg < 0) {
		186	return dummyReg;
		187	}
		188	}
		189
		190	switch (dest->File) {
		191	case PROGRAM_TEMPORARY:
		192	if (reg >= MAX_PROGRAM_TEMPS)
		193	return dummyReg;
		194	return machine->Temporaries[reg];
		195
		196	case PROGRAM_OUTPUT:
		197	if (reg >= MAX_PROGRAM_OUTPUTS)
		198	return dummyReg;
		199	return machine->Outputs[reg];
		200
		201	case PROGRAM_WRITE_ONLY:
		202	return dummyReg;
		203
		204	default:
		205	_mesa_problem(NULL,
		206	"Invalid dest register file %d in get_dst_register_pointer()",
		207	dest->File);
		208	return NULL;
		209	}
		210	}
		211
		212
		213
		214	/**
		215	* Fetch a 4-element float vector from the given source register.
		216	* Apply swizzling and negating as needed.
		217	*/
		218	static void
		219	fetch_vector4(const struct prog_src_register *source,
		220	const struct gl_program_machine *machine, GLfloat result[4])
		221	{
		222	const GLfloat *src = get_src_register_pointer(source, machine);
		223	ASSERT(src);
		224
		225	if (source->Swizzle == SWIZZLE_NOOP) {
		226	/* no swizzling */
		227	COPY_4V(result, src);
		228	}
		229	else {
		230	ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
		231	ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
		232	ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
		233	ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
		234	result[0] = src[GET_SWZ(source->Swizzle, 0)];
		235	result[1] = src[GET_SWZ(source->Swizzle, 1)];
		236	result[2] = src[GET_SWZ(source->Swizzle, 2)];
		237	result[3] = src[GET_SWZ(source->Swizzle, 3)];
		238	}
		239
		240	if (source->Abs) {
		241	result[0] = FABSF(result[0]);
		242	result[1] = FABSF(result[1]);
		243	result[2] = FABSF(result[2]);
		244	result[3] = FABSF(result[3]);
		245	}
		246	if (source->Negate) {
		247	ASSERT(source->Negate == NEGATE_XYZW);
		248	result[0] = -result[0];
		249	result[1] = -result[1];
		250	result[2] = -result[2];
		251	result[3] = -result[3];
		252	}
		253
		254	#ifdef NAN_CHECK
		255	assert(!IS_INF_OR_NAN(result[0]));
		256	assert(!IS_INF_OR_NAN(result[0]));
		257	assert(!IS_INF_OR_NAN(result[0]));
		258	assert(!IS_INF_OR_NAN(result[0]));
		259	#endif
		260	}
		261
		262
		263	/**
		264	* Fetch a 4-element uint vector from the given source register.
		265	* Apply swizzling but not negation/abs.
		266	*/
		267	static void
		268	fetch_vector4ui(const struct prog_src_register *source,
		269	const struct gl_program_machine *machine, GLuint result[4])
		270	{
		271	const GLuint src = (GLuint ) get_src_register_pointer(source, machine);
		272	ASSERT(src);
		273
		274	if (source->Swizzle == SWIZZLE_NOOP) {
		275	/* no swizzling */
		276	COPY_4V(result, src);
		277	}
		278	else {
		279	ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
		280	ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
		281	ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
		282	ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
		283	result[0] = src[GET_SWZ(source->Swizzle, 0)];
		284	result[1] = src[GET_SWZ(source->Swizzle, 1)];
		285	result[2] = src[GET_SWZ(source->Swizzle, 2)];
		286	result[3] = src[GET_SWZ(source->Swizzle, 3)];
		287	}
		288
		289	/* Note: no Negate or Abs here */
		290	}
		291
		292
		293
		294	/**
		295	* Fetch the derivative with respect to X or Y for the given register.
		296	* XXX this currently only works for fragment program input attribs.
		297	*/
		298	static void
		299	fetch_vector4_deriv(struct gl_context * ctx,
		300	const struct prog_src_register *source,
		301	const struct gl_program_machine *machine,
		302	char xOrY, GLfloat result[4])
		303	{
		304	if (source->File == PROGRAM_INPUT &&
		305	source->Index < (GLint) machine->NumDeriv) {
		306	const GLint col = machine->CurElement;
		307	const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
		308	const GLfloat invQ = 1.0f / w;
		309	GLfloat deriv[4];
		310
		311	if (xOrY == 'X') {
		312	deriv[0] = machine->DerivX[source->Index][0] * invQ;
		313	deriv[1] = machine->DerivX[source->Index][1] * invQ;
		314	deriv[2] = machine->DerivX[source->Index][2] * invQ;
		315	deriv[3] = machine->DerivX[source->Index][3] * invQ;
		316	}
		317	else {
		318	deriv[0] = machine->DerivY[source->Index][0] * invQ;
		319	deriv[1] = machine->DerivY[source->Index][1] * invQ;
		320	deriv[2] = machine->DerivY[source->Index][2] * invQ;
		321	deriv[3] = machine->DerivY[source->Index][3] * invQ;
		322	}
		323
		324	result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
		325	result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
		326	result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
		327	result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
		328
		329	if (source->Abs) {
		330	result[0] = FABSF(result[0]);
		331	result[1] = FABSF(result[1]);
		332	result[2] = FABSF(result[2]);
		333	result[3] = FABSF(result[3]);
		334	}
		335	if (source->Negate) {
		336	ASSERT(source->Negate == NEGATE_XYZW);
		337	result[0] = -result[0];
		338	result[1] = -result[1];
		339	result[2] = -result[2];
		340	result[3] = -result[3];
		341	}
		342	}
		343	else {
		344	ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
		345	}
		346	}
		347
		348
		349	/**
		350	* As above, but only return result[0] element.
		351	*/
		352	static void
		353	fetch_vector1(const struct prog_src_register *source,
		354	const struct gl_program_machine *machine, GLfloat result[4])
		355	{
		356	const GLfloat *src = get_src_register_pointer(source, machine);
		357	ASSERT(src);
		358
		359	result[0] = src[GET_SWZ(source->Swizzle, 0)];
		360
		361	if (source->Abs) {
		362	result[0] = FABSF(result[0]);
		363	}
		364	if (source->Negate) {
		365	result[0] = -result[0];
		366	}
		367	}
		368
		369
		370	static GLuint
		371	fetch_vector1ui(const struct prog_src_register *source,
		372	const struct gl_program_machine *machine)
		373	{
		374	const GLuint src = (GLuint ) get_src_register_pointer(source, machine);
		375	return src[GET_SWZ(source->Swizzle, 0)];
		376	}
		377
		378
		379	/**
		380	* Fetch texel from texture. Use partial derivatives when possible.
		381	*/
		382	static INLINE void
		383	fetch_texel(struct gl_context *ctx,
		384	const struct gl_program_machine *machine,
		385	const struct prog_instruction *inst,
		386	const GLfloat texcoord[4], GLfloat lodBias,
		387	GLfloat color[4])
		388	{
		389	const GLuint unit = machine->Samplers[inst->TexSrcUnit];
		390
		391	/* Note: we only have the right derivatives for fragment input attribs.
		392	*/
		393	if (machine->NumDeriv > 0 &&
		394	inst->SrcReg[0].File == PROGRAM_INPUT &&
		395	inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
		396	/* simple texture fetch for which we should have derivatives */
		397	GLuint attr = inst->SrcReg[0].Index;
		398	machine->FetchTexelDeriv(ctx, texcoord,
		399	machine->DerivX[attr],
		400	machine->DerivY[attr],
		401	lodBias, unit, color);
		402	}
		403	else {
		404	machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
		405	}
		406	}
		407
		408
		409	/**
		410	* Test value against zero and return GT, LT, EQ or UN if NaN.
		411	*/
		412	static INLINE GLuint
		413	generate_cc(float value)
		414	{
		415	if (value != value)
		416	return COND_UN; /* NaN */
		417	if (value > 0.0F)
		418	return COND_GT;
		419	if (value < 0.0F)
		420	return COND_LT;
		421	return COND_EQ;
		422	}
		423
		424
		425	/**
		426	* Test if the ccMaskRule is satisfied by the given condition code.
		427	* Used to mask destination writes according to the current condition code.
		428	*/
		429	static INLINE GLboolean
		430	test_cc(GLuint condCode, GLuint ccMaskRule)
		431	{
		432	switch (ccMaskRule) {
		433	case COND_EQ: return (condCode == COND_EQ);
		434	case COND_NE: return (condCode != COND_EQ);
		435	case COND_LT: return (condCode == COND_LT);
		436	case COND_GE: return (condCode == COND_GT \|\| condCode == COND_EQ);
		437	case COND_LE: return (condCode == COND_LT \|\| condCode == COND_EQ);
		438	case COND_GT: return (condCode == COND_GT);
		439	case COND_TR: return GL_TRUE;
		440	case COND_FL: return GL_FALSE;
		441	default: return GL_TRUE;
		442	}
		443	}
		444
		445
		446	/**
		447	* Evaluate the 4 condition codes against a predicate and return GL_TRUE
		448	* or GL_FALSE to indicate result.
		449	*/
		450	static INLINE GLboolean
		451	eval_condition(const struct gl_program_machine *machine,
		452	const struct prog_instruction *inst)
		453	{
		454	const GLuint swizzle = inst->DstReg.CondSwizzle;
		455	const GLuint condMask = inst->DstReg.CondMask;
		456	if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) \|\|
		457	test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) \|\|
		458	test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) \|\|
		459	test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
		460	return GL_TRUE;
		461	}
		462	else {
		463	return GL_FALSE;
		464	}
		465	}
		466
		467
		468
		469	/**
		470	* Store 4 floats into a register. Observe the instructions saturate and
		471	* set-condition-code flags.
		472	*/
		473	static void
		474	store_vector4(const struct prog_instruction *inst,
		475	struct gl_program_machine *machine, const GLfloat value[4])
		476	{
		477	const struct prog_dst_register *dstReg = &(inst->DstReg);
		478	const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
		479	GLuint writeMask = dstReg->WriteMask;
		480	GLfloat clampedValue[4];
		481	GLfloat *dst = get_dst_register_pointer(dstReg, machine);
		482
		483	#if 0
		484	if (value[0] > 1.0e10 \|\|
		485	IS_INF_OR_NAN(value[0]) \|\|
		486	IS_INF_OR_NAN(value[1]) \|\|
		487	IS_INF_OR_NAN(value[2]) \|\| IS_INF_OR_NAN(value[3]))
		488	printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
		489	#endif
		490
		491	if (clamp) {
		492	clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
		493	clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
		494	clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
		495	clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
		496	value = clampedValue;
		497	}
		498
		499	if (dstReg->CondMask != COND_TR) {
		500	/* condition codes may turn off some writes */
		501	if (writeMask & WRITEMASK_X) {
		502	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
		503	dstReg->CondMask))
		504	writeMask &= ~WRITEMASK_X;
		505	}
		506	if (writeMask & WRITEMASK_Y) {
		507	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
		508	dstReg->CondMask))
		509	writeMask &= ~WRITEMASK_Y;
		510	}
		511	if (writeMask & WRITEMASK_Z) {
		512	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
		513	dstReg->CondMask))
		514	writeMask &= ~WRITEMASK_Z;
		515	}
		516	if (writeMask & WRITEMASK_W) {
		517	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
		518	dstReg->CondMask))
		519	writeMask &= ~WRITEMASK_W;
		520	}
		521	}
		522
		523	#ifdef NAN_CHECK
		524	assert(!IS_INF_OR_NAN(value[0]));
		525	assert(!IS_INF_OR_NAN(value[0]));
		526	assert(!IS_INF_OR_NAN(value[0]));
		527	assert(!IS_INF_OR_NAN(value[0]));
		528	#endif
		529
		530	if (writeMask & WRITEMASK_X)
		531	dst[0] = value[0];
		532	if (writeMask & WRITEMASK_Y)
		533	dst[1] = value[1];
		534	if (writeMask & WRITEMASK_Z)
		535	dst[2] = value[2];
		536	if (writeMask & WRITEMASK_W)
		537	dst[3] = value[3];
		538
		539	if (inst->CondUpdate) {
		540	if (writeMask & WRITEMASK_X)
		541	machine->CondCodes[0] = generate_cc(value[0]);
		542	if (writeMask & WRITEMASK_Y)
		543	machine->CondCodes[1] = generate_cc(value[1]);
		544	if (writeMask & WRITEMASK_Z)
		545	machine->CondCodes[2] = generate_cc(value[2]);
		546	if (writeMask & WRITEMASK_W)
		547	machine->CondCodes[3] = generate_cc(value[3]);
		548	#if DEBUG_PROG
		549	printf("CondCodes=(%s,%s,%s,%s) for:\n",
		550	_mesa_condcode_string(machine->CondCodes[0]),
		551	_mesa_condcode_string(machine->CondCodes[1]),
		552	_mesa_condcode_string(machine->CondCodes[2]),
		553	_mesa_condcode_string(machine->CondCodes[3]));
		554	#endif
		555	}
		556	}
		557
		558
		559	/**
		560	* Store 4 uints into a register. Observe the set-condition-code flags.
		561	*/
		562	static void
		563	store_vector4ui(const struct prog_instruction *inst,
		564	struct gl_program_machine *machine, const GLuint value[4])
		565	{
		566	const struct prog_dst_register *dstReg = &(inst->DstReg);
		567	GLuint writeMask = dstReg->WriteMask;
		568	GLuint dst = (GLuint ) get_dst_register_pointer(dstReg, machine);
		569
		570	if (dstReg->CondMask != COND_TR) {
		571	/* condition codes may turn off some writes */
		572	if (writeMask & WRITEMASK_X) {
		573	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
		574	dstReg->CondMask))
		575	writeMask &= ~WRITEMASK_X;
		576	}
		577	if (writeMask & WRITEMASK_Y) {
		578	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
		579	dstReg->CondMask))
		580	writeMask &= ~WRITEMASK_Y;
		581	}
		582	if (writeMask & WRITEMASK_Z) {
		583	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
		584	dstReg->CondMask))
		585	writeMask &= ~WRITEMASK_Z;
		586	}
		587	if (writeMask & WRITEMASK_W) {
		588	if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
		589	dstReg->CondMask))
		590	writeMask &= ~WRITEMASK_W;
		591	}
		592	}
		593
		594	if (writeMask & WRITEMASK_X)
		595	dst[0] = value[0];
		596	if (writeMask & WRITEMASK_Y)
		597	dst[1] = value[1];
		598	if (writeMask & WRITEMASK_Z)
		599	dst[2] = value[2];
		600	if (writeMask & WRITEMASK_W)
		601	dst[3] = value[3];
		602
		603	if (inst->CondUpdate) {
		604	if (writeMask & WRITEMASK_X)
		605	machine->CondCodes[0] = generate_cc((float)value[0]);
		606	if (writeMask & WRITEMASK_Y)
		607	machine->CondCodes[1] = generate_cc((float)value[1]);
		608	if (writeMask & WRITEMASK_Z)
		609	machine->CondCodes[2] = generate_cc((float)value[2]);
		610	if (writeMask & WRITEMASK_W)
		611	machine->CondCodes[3] = generate_cc((float)value[3]);
		612	#if DEBUG_PROG
		613	printf("CondCodes=(%s,%s,%s,%s) for:\n",
		614	_mesa_condcode_string(machine->CondCodes[0]),
		615	_mesa_condcode_string(machine->CondCodes[1]),
		616	_mesa_condcode_string(machine->CondCodes[2]),
		617	_mesa_condcode_string(machine->CondCodes[3]));
		618	#endif
		619	}
		620	}
		621
		622
		623
		624	/**
		625	* Execute the given vertex/fragment program.
		626	*
		627	* \param ctx rendering context
		628	* \param program the program to execute
		629	* \param machine machine state (must be initialized)
		630	* \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
		631	*/
		632	GLboolean
		633	_mesa_execute_program(struct gl_context * ctx,
		634	const struct gl_program *program,
		635	struct gl_program_machine *machine)
		636	{
		637	const GLuint numInst = program->NumInstructions;
		638	const GLuint maxExec = 10000;
		639	GLuint pc, numExec = 0;
		640
		641	machine->CurProgram = program;
		642
		643	if (DEBUG_PROG) {
		644	printf("execute program %u --------------------\n", program->Id);
		645	}
		646
		647	if (program->Target == GL_VERTEX_PROGRAM_ARB) {
		648	machine->EnvParams = ctx->VertexProgram.Parameters;
		649	}
		650	else {
		651	machine->EnvParams = ctx->FragmentProgram.Parameters;
		652	}
		653
		654	for (pc = 0; pc < numInst; pc++) {
		655	const struct prog_instruction *inst = program->Instructions + pc;
		656
		657	if (DEBUG_PROG) {
		658	_mesa_print_instruction(inst);
		659	}
		660
		661	switch (inst->Opcode) {
		662	case OPCODE_ABS:
		663	{
		664	GLfloat a[4], result[4];
		665	fetch_vector4(&inst->SrcReg[0], machine, a);
		666	result[0] = FABSF(a[0]);
		667	result[1] = FABSF(a[1]);
		668	result[2] = FABSF(a[2]);
		669	result[3] = FABSF(a[3]);
		670	store_vector4(inst, machine, result);
		671	}
		672	break;
		673	case OPCODE_ADD:
		674	{
		675	GLfloat a[4], b[4], result[4];
		676	fetch_vector4(&inst->SrcReg[0], machine, a);
		677	fetch_vector4(&inst->SrcReg[1], machine, b);
		678	result[0] = a[0] + b[0];
		679	result[1] = a[1] + b[1];
		680	result[2] = a[2] + b[2];
		681	result[3] = a[3] + b[3];
		682	store_vector4(inst, machine, result);
		683	if (DEBUG_PROG) {
		684	printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
		685	result[0], result[1], result[2], result[3],
		686	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
		687	}
		688	}
		689	break;
		690	case OPCODE_AND: /* bitwise AND */
		691	{
		692	GLuint a[4], b[4], result[4];
		693	fetch_vector4ui(&inst->SrcReg[0], machine, a);
		694	fetch_vector4ui(&inst->SrcReg[1], machine, b);
		695	result[0] = a[0] & b[0];
		696	result[1] = a[1] & b[1];
		697	result[2] = a[2] & b[2];
		698	result[3] = a[3] & b[3];
		699	store_vector4ui(inst, machine, result);
		700	}
		701	break;
		702	case OPCODE_ARL:
		703	{
		704	GLfloat t[4];
		705	fetch_vector4(&inst->SrcReg[0], machine, t);
		706	machine->AddressReg[0][0] = IFLOOR(t[0]);
		707	if (DEBUG_PROG) {
		708	printf("ARL %d\n", machine->AddressReg[0][0]);
		709	}
		710	}
		711	break;
		712	case OPCODE_BGNLOOP:
		713	/* no-op */
		714	ASSERT(program->Instructions[inst->BranchTarget].Opcode
		715	== OPCODE_ENDLOOP);
		716	break;
		717	case OPCODE_ENDLOOP:
		718	/* subtract 1 here since pc is incremented by for(pc) loop */
		719	ASSERT(program->Instructions[inst->BranchTarget].Opcode
		720	== OPCODE_BGNLOOP);
		721	pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
		722	break;
		723	case OPCODE_BGNSUB: /* begin subroutine */
		724	break;
		725	case OPCODE_ENDSUB: /* end subroutine */
		726	break;
		727	case OPCODE_BRA: /* branch (conditional) */
		728	if (eval_condition(machine, inst)) {
		729	/* take branch */
		730	/* Subtract 1 here since we'll do pc++ below */
		731	pc = inst->BranchTarget - 1;
		732	}
		733	break;
		734	case OPCODE_BRK: /* break out of loop (conditional) */
		735	ASSERT(program->Instructions[inst->BranchTarget].Opcode
		736	== OPCODE_ENDLOOP);
		737	if (eval_condition(machine, inst)) {
		738	/* break out of loop */
		739	/* pc++ at end of for-loop will put us after the ENDLOOP inst */
		740	pc = inst->BranchTarget;
		741	}
		742	break;
		743	case OPCODE_CONT: /* continue loop (conditional) */
		744	ASSERT(program->Instructions[inst->BranchTarget].Opcode
		745	== OPCODE_ENDLOOP);
		746	if (eval_condition(machine, inst)) {
		747	/* continue at ENDLOOP */
		748	/* Subtract 1 here since we'll do pc++ at end of for-loop */
		749	pc = inst->BranchTarget - 1;
		750	}
		751	break;
		752	case OPCODE_CAL: /* Call subroutine (conditional) */
		753	if (eval_condition(machine, inst)) {
		754	/* call the subroutine */
		755	if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
		756	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
		757	}
		758	machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
		759	/* Subtract 1 here since we'll do pc++ at end of for-loop */
		760	pc = inst->BranchTarget - 1;
		761	}
		762	break;
		763	case OPCODE_CMP:
		764	{
		765	GLfloat a[4], b[4], c[4], result[4];
		766	fetch_vector4(&inst->SrcReg[0], machine, a);
		767	fetch_vector4(&inst->SrcReg[1], machine, b);
		768	fetch_vector4(&inst->SrcReg[2], machine, c);
		769	result[0] = a[0] < 0.0F ? b[0] : c[0];
		770	result[1] = a[1] < 0.0F ? b[1] : c[1];
		771	result[2] = a[2] < 0.0F ? b[2] : c[2];
		772	result[3] = a[3] < 0.0F ? b[3] : c[3];
		773	store_vector4(inst, machine, result);
		774	if (DEBUG_PROG) {
		775	printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
		776	result[0], result[1], result[2], result[3],
		777	a[0], a[1], a[2], a[3],
		778	b[0], b[1], b[2], b[3],
		779	c[0], c[1], c[2], c[3]);
		780	}
		781	}
		782	break;
		783	case OPCODE_COS:
		784	{
		785	GLfloat a[4], result[4];
		786	fetch_vector1(&inst->SrcReg[0], machine, a);
		787	result[0] = result[1] = result[2] = result[3]
		788	= (GLfloat) cos(a[0]);
		789	store_vector4(inst, machine, result);
		790	}
		791	break;
		792	case OPCODE_DDX: /* Partial derivative with respect to X */
		793	{
		794	GLfloat result[4];
		795	fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
		796	'X', result);
		797	store_vector4(inst, machine, result);
		798	}
		799	break;
		800	case OPCODE_DDY: /* Partial derivative with respect to Y */
		801	{
		802	GLfloat result[4];
		803	fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
		804	'Y', result);
		805	store_vector4(inst, machine, result);
		806	}
		807	break;
		808	case OPCODE_DP2:
		809	{
		810	GLfloat a[4], b[4], result[4];
		811	fetch_vector4(&inst->SrcReg[0], machine, a);
		812	fetch_vector4(&inst->SrcReg[1], machine, b);
		813	result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
		814	store_vector4(inst, machine, result);
		815	if (DEBUG_PROG) {
		816	printf("DP2 %g = (%g %g) . (%g %g)\n",
		817	result[0], a[0], a[1], b[0], b[1]);
		818	}
		819	}
		820	break;
		821	case OPCODE_DP2A:
		822	{
		823	GLfloat a[4], b[4], c, result[4];
		824	fetch_vector4(&inst->SrcReg[0], machine, a);
		825	fetch_vector4(&inst->SrcReg[1], machine, b);
		826	fetch_vector1(&inst->SrcReg[1], machine, &c);
		827	result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
		828	store_vector4(inst, machine, result);
		829	if (DEBUG_PROG) {
		830	printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
		831	result[0], a[0], a[1], b[0], b[1], c);
		832	}
		833	}
		834	break;
		835	case OPCODE_DP3:
		836	{
		837	GLfloat a[4], b[4], result[4];
		838	fetch_vector4(&inst->SrcReg[0], machine, a);
		839	fetch_vector4(&inst->SrcReg[1], machine, b);
		840	result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
		841	store_vector4(inst, machine, result);
		842	if (DEBUG_PROG) {
		843	printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
		844	result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
		845	}
		846	}
		847	break;
		848	case OPCODE_DP4:
		849	{
		850	GLfloat a[4], b[4], result[4];
		851	fetch_vector4(&inst->SrcReg[0], machine, a);
		852	fetch_vector4(&inst->SrcReg[1], machine, b);
		853	result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
		854	store_vector4(inst, machine, result);
		855	if (DEBUG_PROG) {
		856	printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
		857	result[0], a[0], a[1], a[2], a[3],
		858	b[0], b[1], b[2], b[3]);
		859	}
		860	}
		861	break;
		862	case OPCODE_DPH:
		863	{
		864	GLfloat a[4], b[4], result[4];
		865	fetch_vector4(&inst->SrcReg[0], machine, a);
		866	fetch_vector4(&inst->SrcReg[1], machine, b);
		867	result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
		868	store_vector4(inst, machine, result);
		869	}
		870	break;
		871	case OPCODE_DST: /* Distance vector */
		872	{
		873	GLfloat a[4], b[4], result[4];
		874	fetch_vector4(&inst->SrcReg[0], machine, a);
		875	fetch_vector4(&inst->SrcReg[1], machine, b);
		876	result[0] = 1.0F;
		877	result[1] = a[1] * b[1];
		878	result[2] = a[2];
		879	result[3] = b[3];
		880	store_vector4(inst, machine, result);
		881	}
		882	break;
		883	case OPCODE_EXP:
		884	{
		885	GLfloat t[4], q[4], floor_t0;
		886	fetch_vector1(&inst->SrcReg[0], machine, t);
		887	floor_t0 = FLOORF(t[0]);
		888	if (floor_t0 > FLT_MAX_EXP) {
		889	SET_POS_INFINITY(q[0]);
		890	SET_POS_INFINITY(q[2]);
		891	}
		892	else if (floor_t0 < FLT_MIN_EXP) {
		893	q[0] = 0.0F;
		894	q[2] = 0.0F;
		895	}
		896	else {
		897	q[0] = LDEXPF(1.0, (int) floor_t0);
		898	/* Note: GL_NV_vertex_program expects
		899	* result.z = result.x * APPX(result.y)
		900	* We do what the ARB extension says.
		901	*/
		902	q[2] = (GLfloat) pow(2.0, t[0]);
		903	}
		904	q[1] = t[0] - floor_t0;
		905	q[3] = 1.0F;
		906	store_vector4( inst, machine, q );
		907	}
		908	break;
		909	case OPCODE_EX2: /* Exponential base 2 */
		910	{
		911	GLfloat a[4], result[4], val;
		912	fetch_vector1(&inst->SrcReg[0], machine, a);
		913	val = (GLfloat) pow(2.0, a[0]);
		914	/*
		915	if (IS_INF_OR_NAN(val))
		916	val = 1.0e10;
		917	*/
		918	result[0] = result[1] = result[2] = result[3] = val;
		919	store_vector4(inst, machine, result);
		920	}
		921	break;
		922	case OPCODE_FLR:
		923	{
		924	GLfloat a[4], result[4];
		925	fetch_vector4(&inst->SrcReg[0], machine, a);
		926	result[0] = FLOORF(a[0]);
		927	result[1] = FLOORF(a[1]);
		928	result[2] = FLOORF(a[2]);
		929	result[3] = FLOORF(a[3]);
		930	store_vector4(inst, machine, result);
		931	}
		932	break;
		933	case OPCODE_FRC:
		934	{
		935	GLfloat a[4], result[4];
		936	fetch_vector4(&inst->SrcReg[0], machine, a);
		937	result[0] = a[0] - FLOORF(a[0]);
		938	result[1] = a[1] - FLOORF(a[1]);
		939	result[2] = a[2] - FLOORF(a[2]);
		940	result[3] = a[3] - FLOORF(a[3]);
		941	store_vector4(inst, machine, result);
		942	}
		943	break;
		944	case OPCODE_IF:
		945	{
		946	GLboolean cond;
		947	ASSERT(program->Instructions[inst->BranchTarget].Opcode
		948	== OPCODE_ELSE \|\|
		949	program->Instructions[inst->BranchTarget].Opcode
		950	== OPCODE_ENDIF);
		951	/* eval condition */
		952	if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
		953	GLfloat a[4];
		954	fetch_vector1(&inst->SrcReg[0], machine, a);
		955	cond = (a[0] != 0.0);
		956	}
		957	else {
		958	cond = eval_condition(machine, inst);
		959	}
		960	if (DEBUG_PROG) {
		961	printf("IF: %d\n", cond);
		962	}
		963	/* do if/else */
		964	if (cond) {
		965	/* do if-clause (just continue execution) */
		966	}
		967	else {
		968	/* go to the instruction after ELSE or ENDIF */
		969	assert(inst->BranchTarget >= 0);
		970	pc = inst->BranchTarget;
		971	}
		972	}
		973	break;
		974	case OPCODE_ELSE:
		975	/* goto ENDIF */
		976	ASSERT(program->Instructions[inst->BranchTarget].Opcode
		977	== OPCODE_ENDIF);
		978	assert(inst->BranchTarget >= 0);
		979	pc = inst->BranchTarget;
		980	break;
		981	case OPCODE_ENDIF:
		982	/* nothing */
		983	break;
		984	case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
		985	if (eval_condition(machine, inst)) {
		986	return GL_FALSE;
		987	}
		988	break;
		989	case OPCODE_KIL: /* ARB_f_p only */
		990	{
		991	GLfloat a[4];
		992	fetch_vector4(&inst->SrcReg[0], machine, a);
		993	if (DEBUG_PROG) {
		994	printf("KIL if (%g %g %g %g) <= 0.0\n",
		995	a[0], a[1], a[2], a[3]);
		996	}
		997
		998	if (a[0] < 0.0F \|\| a[1] < 0.0F \|\| a[2] < 0.0F \|\| a[3] < 0.0F) {
		999	return GL_FALSE;
		1000	}
		1001	}
		1002	break;
		1003	case OPCODE_LG2: /* log base 2 */
		1004	{
		1005	GLfloat a[4], result[4], val;
		1006	fetch_vector1(&inst->SrcReg[0], machine, a);
		1007	/* The fast LOG2 macro doesn't meet the precision requirements.
		1008	*/
		1009	if (a[0] == 0.0F) {
		1010	val = -FLT_MAX;
		1011	}
		1012	else {
		1013	val = (float)(log(a[0]) * 1.442695F);
		1014	}
		1015	result[0] = result[1] = result[2] = result[3] = val;
		1016	store_vector4(inst, machine, result);
		1017	}
		1018	break;
		1019	case OPCODE_LIT:
		1020	{
		1021	const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
		1022	GLfloat a[4], result[4];
		1023	fetch_vector4(&inst->SrcReg[0], machine, a);
		1024	a[0] = MAX2(a[0], 0.0F);
		1025	a[1] = MAX2(a[1], 0.0F);
		1026	/* XXX ARB version clamps a[3], NV version doesn't */
		1027	a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
		1028	result[0] = 1.0F;
		1029	result[1] = a[0];
		1030	/* XXX we could probably just use pow() here */
		1031	if (a[0] > 0.0F) {
		1032	if (a[1] == 0.0 && a[3] == 0.0)
		1033	result[2] = 1.0F;
		1034	else
		1035	result[2] = (GLfloat) pow(a[1], a[3]);
		1036	}
		1037	else {
		1038	result[2] = 0.0F;
		1039	}
		1040	result[3] = 1.0F;
		1041	store_vector4(inst, machine, result);
		1042	if (DEBUG_PROG) {
		1043	printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
		1044	result[0], result[1], result[2], result[3],
		1045	a[0], a[1], a[2], a[3]);
		1046	}
		1047	}
		1048	break;
		1049	case OPCODE_LOG:
		1050	{
		1051	GLfloat t[4], q[4], abs_t0;
		1052	fetch_vector1(&inst->SrcReg[0], machine, t);
		1053	abs_t0 = FABSF(t[0]);
		1054	if (abs_t0 != 0.0F) {
		1055	/* Since we really can't handle infinite values on VMS
		1056	* like other OSes we'll use __MAXFLOAT to represent
		1057	* infinity. This may need some tweaking.
		1058	*/
		1059	#ifdef VMS
		1060	if (abs_t0 == __MAXFLOAT)
		1061	#else
		1062	if (IS_INF_OR_NAN(abs_t0))
		1063	#endif
		1064	{
		1065	SET_POS_INFINITY(q[0]);
		1066	q[1] = 1.0F;
		1067	SET_POS_INFINITY(q[2]);
		1068	}
		1069	else {
		1070	int exponent;
		1071	GLfloat mantissa = FREXPF(t[0], &exponent);
		1072	q[0] = (GLfloat) (exponent - 1);
		1073	q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
		1074
		1075	/* The fast LOG2 macro doesn't meet the precision
		1076	* requirements.
		1077	*/
		1078	q[2] = (float)(log(t[0]) * 1.442695F);
		1079	}
		1080	}
		1081	else {
		1082	SET_NEG_INFINITY(q[0]);
		1083	q[1] = 1.0F;
		1084	SET_NEG_INFINITY(q[2]);
		1085	}
		1086	q[3] = 1.0;
		1087	store_vector4(inst, machine, q);
		1088	}
		1089	break;
		1090	case OPCODE_LRP:
		1091	{
		1092	GLfloat a[4], b[4], c[4], result[4];
		1093	fetch_vector4(&inst->SrcReg[0], machine, a);
		1094	fetch_vector4(&inst->SrcReg[1], machine, b);
		1095	fetch_vector4(&inst->SrcReg[2], machine, c);
		1096	result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
		1097	result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
		1098	result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
		1099	result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
		1100	store_vector4(inst, machine, result);
		1101	if (DEBUG_PROG) {
		1102	printf("LRP (%g %g %g %g) = (%g %g %g %g), "
		1103	"(%g %g %g %g), (%g %g %g %g)\n",
		1104	result[0], result[1], result[2], result[3],
		1105	a[0], a[1], a[2], a[3],
		1106	b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
		1107	}
		1108	}
		1109	break;
		1110	case OPCODE_MAD:
		1111	{
		1112	GLfloat a[4], b[4], c[4], result[4];
		1113	fetch_vector4(&inst->SrcReg[0], machine, a);
		1114	fetch_vector4(&inst->SrcReg[1], machine, b);
		1115	fetch_vector4(&inst->SrcReg[2], machine, c);
		1116	result[0] = a[0] * b[0] + c[0];
		1117	result[1] = a[1] * b[1] + c[1];
		1118	result[2] = a[2] * b[2] + c[2];
		1119	result[3] = a[3] * b[3] + c[3];
		1120	store_vector4(inst, machine, result);
		1121	if (DEBUG_PROG) {
		1122	printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
		1123	"(%g %g %g %g) + (%g %g %g %g)\n",
		1124	result[0], result[1], result[2], result[3],
		1125	a[0], a[1], a[2], a[3],
		1126	b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
		1127	}
		1128	}
		1129	break;
		1130	case OPCODE_MAX:
		1131	{
		1132	GLfloat a[4], b[4], result[4];
		1133	fetch_vector4(&inst->SrcReg[0], machine, a);
		1134	fetch_vector4(&inst->SrcReg[1], machine, b);
		1135	result[0] = MAX2(a[0], b[0]);
		1136	result[1] = MAX2(a[1], b[1]);
		1137	result[2] = MAX2(a[2], b[2]);
		1138	result[3] = MAX2(a[3], b[3]);
		1139	store_vector4(inst, machine, result);
		1140	if (DEBUG_PROG) {
		1141	printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
		1142	result[0], result[1], result[2], result[3],
		1143	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
		1144	}
		1145	}
		1146	break;
		1147	case OPCODE_MIN:
		1148	{
		1149	GLfloat a[4], b[4], result[4];
		1150	fetch_vector4(&inst->SrcReg[0], machine, a);
		1151	fetch_vector4(&inst->SrcReg[1], machine, b);
		1152	result[0] = MIN2(a[0], b[0]);
		1153	result[1] = MIN2(a[1], b[1]);
		1154	result[2] = MIN2(a[2], b[2]);
		1155	result[3] = MIN2(a[3], b[3]);
		1156	store_vector4(inst, machine, result);
		1157	}
		1158	break;
		1159	case OPCODE_MOV:
		1160	{
		1161	GLfloat result[4];
		1162	fetch_vector4(&inst->SrcReg[0], machine, result);
		1163	store_vector4(inst, machine, result);
		1164	if (DEBUG_PROG) {
		1165	printf("MOV (%g %g %g %g)\n",
		1166	result[0], result[1], result[2], result[3]);
		1167	}
		1168	}
		1169	break;
		1170	case OPCODE_MUL:
		1171	{
		1172	GLfloat a[4], b[4], result[4];
		1173	fetch_vector4(&inst->SrcReg[0], machine, a);
		1174	fetch_vector4(&inst->SrcReg[1], machine, b);
		1175	result[0] = a[0] * b[0];
		1176	result[1] = a[1] * b[1];
		1177	result[2] = a[2] * b[2];
		1178	result[3] = a[3] * b[3];
		1179	store_vector4(inst, machine, result);
		1180	if (DEBUG_PROG) {
		1181	printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
		1182	result[0], result[1], result[2], result[3],
		1183	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
		1184	}
		1185	}
		1186	break;
		1187	case OPCODE_NOISE1:
		1188	{
		1189	GLfloat a[4], result[4];
		1190	fetch_vector1(&inst->SrcReg[0], machine, a);
		1191	result[0] =
		1192	result[1] =
		1193	result[2] =
		1194	result[3] = _mesa_noise1(a[0]);
		1195	store_vector4(inst, machine, result);
		1196	}
		1197	break;
		1198	case OPCODE_NOISE2:
		1199	{
		1200	GLfloat a[4], result[4];
		1201	fetch_vector4(&inst->SrcReg[0], machine, a);
		1202	result[0] =
		1203	result[1] =
		1204	result[2] = result[3] = _mesa_noise2(a[0], a[1]);
		1205	store_vector4(inst, machine, result);
		1206	}
		1207	break;
		1208	case OPCODE_NOISE3:
		1209	{
		1210	GLfloat a[4], result[4];
		1211	fetch_vector4(&inst->SrcReg[0], machine, a);
		1212	result[0] =
		1213	result[1] =
		1214	result[2] =
		1215	result[3] = _mesa_noise3(a[0], a[1], a[2]);
		1216	store_vector4(inst, machine, result);
		1217	}
		1218	break;
		1219	case OPCODE_NOISE4:
		1220	{
		1221	GLfloat a[4], result[4];
		1222	fetch_vector4(&inst->SrcReg[0], machine, a);
		1223	result[0] =
		1224	result[1] =
		1225	result[2] =
		1226	result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
		1227	store_vector4(inst, machine, result);
		1228	}
		1229	break;
		1230	case OPCODE_NOP:
		1231	break;
		1232	case OPCODE_NOT: /* bitwise NOT */
		1233	{
		1234	GLuint a[4], result[4];
		1235	fetch_vector4ui(&inst->SrcReg[0], machine, a);
		1236	result[0] = ~a[0];
		1237	result[1] = ~a[1];
		1238	result[2] = ~a[2];
		1239	result[3] = ~a[3];
		1240	store_vector4ui(inst, machine, result);
		1241	}
		1242	break;
		1243	case OPCODE_NRM3: /* 3-component normalization */
		1244	{
		1245	GLfloat a[4], result[4];
		1246	GLfloat tmp;
		1247	fetch_vector4(&inst->SrcReg[0], machine, a);
		1248	tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
		1249	if (tmp != 0.0F)
		1250	tmp = INV_SQRTF(tmp);
		1251	result[0] = tmp * a[0];
		1252	result[1] = tmp * a[1];
		1253	result[2] = tmp * a[2];
		1254	result[3] = 0.0; /* undefined, but prevent valgrind warnings */
		1255	store_vector4(inst, machine, result);
		1256	}
		1257	break;
		1258	case OPCODE_NRM4: /* 4-component normalization */
		1259	{
		1260	GLfloat a[4], result[4];
		1261	GLfloat tmp;
		1262	fetch_vector4(&inst->SrcReg[0], machine, a);
		1263	tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
		1264	if (tmp != 0.0F)
		1265	tmp = INV_SQRTF(tmp);
		1266	result[0] = tmp * a[0];
		1267	result[1] = tmp * a[1];
		1268	result[2] = tmp * a[2];
		1269	result[3] = tmp * a[3];
		1270	store_vector4(inst, machine, result);
		1271	}
		1272	break;
		1273	case OPCODE_OR: /* bitwise OR */
		1274	{
		1275	GLuint a[4], b[4], result[4];
		1276	fetch_vector4ui(&inst->SrcReg[0], machine, a);
		1277	fetch_vector4ui(&inst->SrcReg[1], machine, b);
		1278	result[0] = a[0] \| b[0];
		1279	result[1] = a[1] \| b[1];
		1280	result[2] = a[2] \| b[2];
		1281	result[3] = a[3] \| b[3];
		1282	store_vector4ui(inst, machine, result);
		1283	}
		1284	break;
		1285	case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
		1286	{
		1287	GLfloat a[4];
		1288	GLuint result[4];
		1289	GLhalfNV hx, hy;
		1290	fetch_vector4(&inst->SrcReg[0], machine, a);
		1291	hx = _mesa_float_to_half(a[0]);
		1292	hy = _mesa_float_to_half(a[1]);
		1293	result[0] =
		1294	result[1] =
		1295	result[2] =
		1296	result[3] = hx \| (hy << 16);
		1297	store_vector4ui(inst, machine, result);
		1298	}
		1299	break;
		1300	case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
		1301	{
		1302	GLfloat a[4];
		1303	GLuint result[4], usx, usy;
		1304	fetch_vector4(&inst->SrcReg[0], machine, a);
		1305	a[0] = CLAMP(a[0], 0.0F, 1.0F);
		1306	a[1] = CLAMP(a[1], 0.0F, 1.0F);
		1307	usx = IROUND(a[0] * 65535.0F);
		1308	usy = IROUND(a[1] * 65535.0F);
		1309	result[0] =
		1310	result[1] =
		1311	result[2] =
		1312	result[3] = usx \| (usy << 16);
		1313	store_vector4ui(inst, machine, result);
		1314	}
		1315	break;
		1316	case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
		1317	{
		1318	GLfloat a[4];
		1319	GLuint result[4], ubx, uby, ubz, ubw;
		1320	fetch_vector4(&inst->SrcReg[0], machine, a);
		1321	a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
		1322	a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
		1323	a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
		1324	a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
		1325	ubx = IROUND(127.0F * a[0] + 128.0F);
		1326	uby = IROUND(127.0F * a[1] + 128.0F);
		1327	ubz = IROUND(127.0F * a[2] + 128.0F);
		1328	ubw = IROUND(127.0F * a[3] + 128.0F);
		1329	result[0] =
		1330	result[1] =
		1331	result[2] =
		1332	result[3] = ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
		1333	store_vector4ui(inst, machine, result);
		1334	}
		1335	break;
		1336	case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
		1337	{
		1338	GLfloat a[4];
		1339	GLuint result[4], ubx, uby, ubz, ubw;
		1340	fetch_vector4(&inst->SrcReg[0], machine, a);
		1341	a[0] = CLAMP(a[0], 0.0F, 1.0F);
		1342	a[1] = CLAMP(a[1], 0.0F, 1.0F);
		1343	a[2] = CLAMP(a[2], 0.0F, 1.0F);
		1344	a[3] = CLAMP(a[3], 0.0F, 1.0F);
		1345	ubx = IROUND(255.0F * a[0]);
		1346	uby = IROUND(255.0F * a[1]);
		1347	ubz = IROUND(255.0F * a[2]);
		1348	ubw = IROUND(255.0F * a[3]);
		1349	result[0] =
		1350	result[1] =
		1351	result[2] =
		1352	result[3] = ubx \| (uby << 8) \| (ubz << 16) \| (ubw << 24);
		1353	store_vector4ui(inst, machine, result);
		1354	}
		1355	break;
		1356	case OPCODE_POW:
		1357	{
		1358	GLfloat a[4], b[4], result[4];
		1359	fetch_vector1(&inst->SrcReg[0], machine, a);
		1360	fetch_vector1(&inst->SrcReg[1], machine, b);
		1361	result[0] = result[1] = result[2] = result[3]
		1362	= (GLfloat) pow(a[0], b[0]);
		1363	store_vector4(inst, machine, result);
		1364	}
		1365	break;
		1366	case OPCODE_RCC: /* clamped riciprocal */
		1367	{
		1368	const float largest = 1.884467e+19, smallest = 5.42101e-20;
		1369	GLfloat a[4], r, result[4];
		1370	fetch_vector1(&inst->SrcReg[0], machine, a);
		1371	if (DEBUG_PROG) {
		1372	if (a[0] == 0)
		1373	printf("RCC(0)\n");
		1374	else if (IS_INF_OR_NAN(a[0]))
		1375	printf("RCC(inf)\n");
		1376	}
		1377	if (a[0] == 1.0F) {
		1378	r = 1.0F;
		1379	}
		1380	else {
		1381	r = 1.0F / a[0];
		1382	}
		1383	if (positive(r)) {
		1384	if (r > largest) {
		1385	r = largest;
		1386	}
		1387	else if (r < smallest) {
		1388	r = smallest;
		1389	}
		1390	}
		1391	else {
		1392	if (r < -largest) {
		1393	r = -largest;
		1394	}
		1395	else if (r > -smallest) {
		1396	r = -smallest;
		1397	}
		1398	}
		1399	result[0] = result[1] = result[2] = result[3] = r;
		1400	store_vector4(inst, machine, result);
		1401	}
		1402	break;
		1403
		1404	case OPCODE_RCP:
		1405	{
		1406	GLfloat a[4], result[4];
		1407	fetch_vector1(&inst->SrcReg[0], machine, a);
		1408	if (DEBUG_PROG) {
		1409	if (a[0] == 0)
		1410	printf("RCP(0)\n");
		1411	else if (IS_INF_OR_NAN(a[0]))
		1412	printf("RCP(inf)\n");
		1413	}
		1414	result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
		1415	store_vector4(inst, machine, result);
		1416	}
		1417	break;
		1418	case OPCODE_RET: /* return from subroutine (conditional) */
		1419	if (eval_condition(machine, inst)) {
		1420	if (machine->StackDepth == 0) {
		1421	return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
		1422	}
		1423	/* subtract one because of pc++ in the for loop */
		1424	pc = machine->CallStack[--machine->StackDepth] - 1;
		1425	}
		1426	break;
		1427	case OPCODE_RFL: /* reflection vector */
		1428	{
		1429	GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
		1430	fetch_vector4(&inst->SrcReg[0], machine, axis);
		1431	fetch_vector4(&inst->SrcReg[1], machine, dir);
		1432	tmpW = DOT3(axis, axis);
		1433	tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
		1434	result[0] = tmpX * axis[0] - dir[0];
		1435	result[1] = tmpX * axis[1] - dir[1];
		1436	result[2] = tmpX * axis[2] - dir[2];
		1437	/* result[3] is never written! XXX enforce in parser! */
		1438	store_vector4(inst, machine, result);
		1439	}
		1440	break;
		1441	case OPCODE_RSQ: /* 1 / sqrt() */
		1442	{
		1443	GLfloat a[4], result[4];
		1444	fetch_vector1(&inst->SrcReg[0], machine, a);
		1445	a[0] = FABSF(a[0]);
		1446	result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
		1447	store_vector4(inst, machine, result);
		1448	if (DEBUG_PROG) {
		1449	printf("RSQ %g = 1/sqrt(\|%g\|)\n", result[0], a[0]);
		1450	}
		1451	}
		1452	break;
		1453	case OPCODE_SCS: /* sine and cos */
		1454	{
		1455	GLfloat a[4], result[4];
		1456	fetch_vector1(&inst->SrcReg[0], machine, a);
		1457	result[0] = (GLfloat) cos(a[0]);
		1458	result[1] = (GLfloat) sin(a[0]);
		1459	result[2] = 0.0; /* undefined! */
		1460	result[3] = 0.0; /* undefined! */
		1461	store_vector4(inst, machine, result);
		1462	}
		1463	break;
		1464	case OPCODE_SEQ: /* set on equal */
		1465	{
		1466	GLfloat a[4], b[4], result[4];
		1467	fetch_vector4(&inst->SrcReg[0], machine, a);
		1468	fetch_vector4(&inst->SrcReg[1], machine, b);
		1469	result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
		1470	result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
		1471	result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
		1472	result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
		1473	store_vector4(inst, machine, result);
		1474	if (DEBUG_PROG) {
		1475	printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
		1476	result[0], result[1], result[2], result[3],
		1477	a[0], a[1], a[2], a[3],
		1478	b[0], b[1], b[2], b[3]);
		1479	}
		1480	}
		1481	break;
		1482	case OPCODE_SFL: /* set false, operands ignored */
		1483	{
		1484	static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
		1485	store_vector4(inst, machine, result);
		1486	}
		1487	break;
		1488	case OPCODE_SGE: /* set on greater or equal */
		1489	{
		1490	GLfloat a[4], b[4], result[4];
		1491	fetch_vector4(&inst->SrcReg[0], machine, a);
		1492	fetch_vector4(&inst->SrcReg[1], machine, b);
		1493	result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
		1494	result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
		1495	result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
		1496	result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
		1497	store_vector4(inst, machine, result);
		1498	if (DEBUG_PROG) {
		1499	printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
		1500	result[0], result[1], result[2], result[3],
		1501	a[0], a[1], a[2], a[3],
		1502	b[0], b[1], b[2], b[3]);
		1503	}
		1504	}
		1505	break;
		1506	case OPCODE_SGT: /* set on greater */
		1507	{
		1508	GLfloat a[4], b[4], result[4];
		1509	fetch_vector4(&inst->SrcReg[0], machine, a);
		1510	fetch_vector4(&inst->SrcReg[1], machine, b);
		1511	result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
		1512	result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
		1513	result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
		1514	result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
		1515	store_vector4(inst, machine, result);
		1516	if (DEBUG_PROG) {
		1517	printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
		1518	result[0], result[1], result[2], result[3],
		1519	a[0], a[1], a[2], a[3],
		1520	b[0], b[1], b[2], b[3]);
		1521	}
		1522	}
		1523	break;
		1524	case OPCODE_SIN:
		1525	{
		1526	GLfloat a[4], result[4];
		1527	fetch_vector1(&inst->SrcReg[0], machine, a);
		1528	result[0] = result[1] = result[2] = result[3]
		1529	= (GLfloat) sin(a[0]);
		1530	store_vector4(inst, machine, result);
		1531	}
		1532	break;
		1533	case OPCODE_SLE: /* set on less or equal */
		1534	{
		1535	GLfloat a[4], b[4], result[4];
		1536	fetch_vector4(&inst->SrcReg[0], machine, a);
		1537	fetch_vector4(&inst->SrcReg[1], machine, b);
		1538	result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
		1539	result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
		1540	result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
		1541	result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
		1542	store_vector4(inst, machine, result);
		1543	if (DEBUG_PROG) {
		1544	printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
		1545	result[0], result[1], result[2], result[3],
		1546	a[0], a[1], a[2], a[3],
		1547	b[0], b[1], b[2], b[3]);
		1548	}
		1549	}
		1550	break;
		1551	case OPCODE_SLT: /* set on less */
		1552	{
		1553	GLfloat a[4], b[4], result[4];
		1554	fetch_vector4(&inst->SrcReg[0], machine, a);
		1555	fetch_vector4(&inst->SrcReg[1], machine, b);
		1556	result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
		1557	result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
		1558	result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
		1559	result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
		1560	store_vector4(inst, machine, result);
		1561	if (DEBUG_PROG) {
		1562	printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
		1563	result[0], result[1], result[2], result[3],
		1564	a[0], a[1], a[2], a[3],
		1565	b[0], b[1], b[2], b[3]);
		1566	}
		1567	}
		1568	break;
		1569	case OPCODE_SNE: /* set on not equal */
		1570	{
		1571	GLfloat a[4], b[4], result[4];
		1572	fetch_vector4(&inst->SrcReg[0], machine, a);
		1573	fetch_vector4(&inst->SrcReg[1], machine, b);
		1574	result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
		1575	result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
		1576	result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
		1577	result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
		1578	store_vector4(inst, machine, result);
		1579	if (DEBUG_PROG) {
		1580	printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
		1581	result[0], result[1], result[2], result[3],
		1582	a[0], a[1], a[2], a[3],
		1583	b[0], b[1], b[2], b[3]);
		1584	}
		1585	}
		1586	break;
		1587	case OPCODE_SSG: /* set sign (-1, 0 or +1) */
		1588	{
		1589	GLfloat a[4], result[4];
		1590	fetch_vector4(&inst->SrcReg[0], machine, a);
		1591	result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
		1592	result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
		1593	result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
		1594	result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
		1595	store_vector4(inst, machine, result);
		1596	}
		1597	break;
		1598	case OPCODE_STR: /* set true, operands ignored */
		1599	{
		1600	static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
		1601	store_vector4(inst, machine, result);
		1602	}
		1603	break;
		1604	case OPCODE_SUB:
		1605	{
		1606	GLfloat a[4], b[4], result[4];
		1607	fetch_vector4(&inst->SrcReg[0], machine, a);
		1608	fetch_vector4(&inst->SrcReg[1], machine, b);
		1609	result[0] = a[0] - b[0];
		1610	result[1] = a[1] - b[1];
		1611	result[2] = a[2] - b[2];
		1612	result[3] = a[3] - b[3];
		1613	store_vector4(inst, machine, result);
		1614	if (DEBUG_PROG) {
		1615	printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
		1616	result[0], result[1], result[2], result[3],
		1617	a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
		1618	}
		1619	}
		1620	break;
		1621	case OPCODE_SWZ: /* extended swizzle */
		1622	{
		1623	const struct prog_src_register *source = &inst->SrcReg[0];
		1624	const GLfloat *src = get_src_register_pointer(source, machine);
		1625	GLfloat result[4];
		1626	GLuint i;
		1627	for (i = 0; i < 4; i++) {
		1628	const GLuint swz = GET_SWZ(source->Swizzle, i);
		1629	if (swz == SWIZZLE_ZERO)
		1630	result[i] = 0.0;
		1631	else if (swz == SWIZZLE_ONE)
		1632	result[i] = 1.0;
		1633	else {
		1634	ASSERT(swz >= 0);
		1635	ASSERT(swz <= 3);
		1636	result[i] = src[swz];
		1637	}
		1638	if (source->Negate & (1 << i))
		1639	result[i] = -result[i];
		1640	}
		1641	store_vector4(inst, machine, result);
		1642	}
		1643	break;
		1644	case OPCODE_TEX: /* Both ARB and NV frag prog */
		1645	/* Simple texel lookup */
		1646	{
		1647	GLfloat texcoord[4], color[4];
		1648	fetch_vector4(&inst->SrcReg[0], machine, texcoord);
		1649
		1650	fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
		1651
		1652	if (DEBUG_PROG) {
		1653	printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
		1654	color[0], color[1], color[2], color[3],
		1655	inst->TexSrcUnit,
		1656	texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
		1657	}
		1658	store_vector4(inst, machine, color);
		1659	}
		1660	break;
		1661	case OPCODE_TXB: /* GL_ARB_fragment_program only */
		1662	/* Texel lookup with LOD bias */
		1663	{
		1664	GLfloat texcoord[4], color[4], lodBias;
		1665
		1666	fetch_vector4(&inst->SrcReg[0], machine, texcoord);
		1667
		1668	/* texcoord[3] is the bias to add to lambda */
		1669	lodBias = texcoord[3];
		1670
		1671	fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
		1672
		1673	store_vector4(inst, machine, color);
		1674	}
		1675	break;
		1676	case OPCODE_TXD: /* GL_NV_fragment_program only */
		1677	/* Texture lookup w/ partial derivatives for LOD */
		1678	{
		1679	GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
		1680	fetch_vector4(&inst->SrcReg[0], machine, texcoord);
		1681	fetch_vector4(&inst->SrcReg[1], machine, dtdx);
		1682	fetch_vector4(&inst->SrcReg[2], machine, dtdy);
		1683	machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
		1684	0.0, /* lodBias */
		1685	inst->TexSrcUnit, color);
		1686	store_vector4(inst, machine, color);
		1687	}
		1688	break;
		1689	case OPCODE_TXL:
		1690	/* Texel lookup with explicit LOD */
		1691	{
		1692	GLfloat texcoord[4], color[4], lod;
		1693
		1694	fetch_vector4(&inst->SrcReg[0], machine, texcoord);
		1695
		1696	/* texcoord[3] is the LOD */
		1697	lod = texcoord[3];
		1698
		1699	machine->FetchTexelLod(ctx, texcoord, lod,
		1700	machine->Samplers[inst->TexSrcUnit], color);
		1701
		1702	store_vector4(inst, machine, color);
		1703	}
		1704	break;
		1705	case OPCODE_TXP: /* GL_ARB_fragment_program only */
		1706	/* Texture lookup w/ projective divide */
		1707	{
		1708	GLfloat texcoord[4], color[4];
		1709
		1710	fetch_vector4(&inst->SrcReg[0], machine, texcoord);
		1711	/* Not so sure about this test - if texcoord[3] is
		1712	* zero, we'd probably be fine except for an ASSERT in
		1713	* IROUND_POS() which gets triggered by the inf values created.
		1714	*/
		1715	if (texcoord[3] != 0.0) {
		1716	texcoord[0] /= texcoord[3];
		1717	texcoord[1] /= texcoord[3];
		1718	texcoord[2] /= texcoord[3];
		1719	}
		1720
		1721	fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
		1722
		1723	store_vector4(inst, machine, color);
		1724	}
		1725	break;
		1726	case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
		1727	/* Texture lookup w/ projective divide, as above, but do not
		1728	* do the divide by w if sampling from a cube map.
		1729	*/
		1730	{
		1731	GLfloat texcoord[4], color[4];
		1732
		1733	fetch_vector4(&inst->SrcReg[0], machine, texcoord);
		1734	if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
		1735	texcoord[3] != 0.0) {
		1736	texcoord[0] /= texcoord[3];
		1737	texcoord[1] /= texcoord[3];
		1738	texcoord[2] /= texcoord[3];
		1739	}
		1740
		1741	fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
		1742
		1743	store_vector4(inst, machine, color);
		1744	}
		1745	break;
		1746	case OPCODE_TRUNC: /* truncate toward zero */
		1747	{
		1748	GLfloat a[4], result[4];
		1749	fetch_vector4(&inst->SrcReg[0], machine, a);
		1750	result[0] = (GLfloat) (GLint) a[0];
		1751	result[1] = (GLfloat) (GLint) a[1];
		1752	result[2] = (GLfloat) (GLint) a[2];
		1753	result[3] = (GLfloat) (GLint) a[3];
		1754	store_vector4(inst, machine, result);
		1755	}
		1756	break;
		1757	case OPCODE_UP2H: /* unpack two 16-bit floats */
		1758	{
		1759	const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
		1760	GLfloat result[4];
		1761	GLushort hx, hy;
		1762	hx = raw & 0xffff;
		1763	hy = raw >> 16;
		1764	result[0] = result[2] = _mesa_half_to_float(hx);
		1765	result[1] = result[3] = _mesa_half_to_float(hy);
		1766	store_vector4(inst, machine, result);
		1767	}
		1768	break;
		1769	case OPCODE_UP2US: /* unpack two GLushorts */
		1770	{
		1771	const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
		1772	GLfloat result[4];
		1773	GLushort usx, usy;
		1774	usx = raw & 0xffff;
		1775	usy = raw >> 16;
		1776	result[0] = result[2] = usx * (1.0f / 65535.0f);
		1777	result[1] = result[3] = usy * (1.0f / 65535.0f);
		1778	store_vector4(inst, machine, result);
		1779	}
		1780	break;
		1781	case OPCODE_UP4B: /* unpack four GLbytes */
		1782	{
		1783	const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
		1784	GLfloat result[4];
		1785	result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
		1786	result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
		1787	result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
		1788	result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
		1789	store_vector4(inst, machine, result);
		1790	}
		1791	break;
		1792	case OPCODE_UP4UB: /* unpack four GLubytes */
		1793	{
		1794	const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
		1795	GLfloat result[4];
		1796	result[0] = ((raw >> 0) & 0xff) / 255.0F;
		1797	result[1] = ((raw >> 8) & 0xff) / 255.0F;
		1798	result[2] = ((raw >> 16) & 0xff) / 255.0F;
		1799	result[3] = ((raw >> 24) & 0xff) / 255.0F;
		1800	store_vector4(inst, machine, result);
		1801	}
		1802	break;
		1803	case OPCODE_XOR: /* bitwise XOR */
		1804	{
		1805	GLuint a[4], b[4], result[4];
		1806	fetch_vector4ui(&inst->SrcReg[0], machine, a);
		1807	fetch_vector4ui(&inst->SrcReg[1], machine, b);
		1808	result[0] = a[0] ^ b[0];
		1809	result[1] = a[1] ^ b[1];
		1810	result[2] = a[2] ^ b[2];
		1811	result[3] = a[3] ^ b[3];
		1812	store_vector4ui(inst, machine, result);
		1813	}
		1814	break;
		1815	case OPCODE_XPD: /* cross product */
		1816	{
		1817	GLfloat a[4], b[4], result[4];
		1818	fetch_vector4(&inst->SrcReg[0], machine, a);
		1819	fetch_vector4(&inst->SrcReg[1], machine, b);
		1820	result[0] = a[1] * b[2] - a[2] * b[1];
		1821	result[1] = a[2] * b[0] - a[0] * b[2];
		1822	result[2] = a[0] * b[1] - a[1] * b[0];
		1823	result[3] = 1.0;
		1824	store_vector4(inst, machine, result);
		1825	if (DEBUG_PROG) {
		1826	printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
		1827	result[0], result[1], result[2], result[3],
		1828	a[0], a[1], a[2], b[0], b[1], b[2]);
		1829	}
		1830	}
		1831	break;
		1832	case OPCODE_X2D: /* 2-D matrix transform */
		1833	{
		1834	GLfloat a[4], b[4], c[4], result[4];
		1835	fetch_vector4(&inst->SrcReg[0], machine, a);
		1836	fetch_vector4(&inst->SrcReg[1], machine, b);
		1837	fetch_vector4(&inst->SrcReg[2], machine, c);
		1838	result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
		1839	result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
		1840	result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
		1841	result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
		1842	store_vector4(inst, machine, result);
		1843	}
		1844	break;
		1845	case OPCODE_PRINT:
		1846	{
		1847	if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
		1848	GLfloat a[4];
		1849	fetch_vector4(&inst->SrcReg[0], machine, a);
		1850	printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
		1851	a[0], a[1], a[2], a[3]);
		1852	}
		1853	else {
		1854	printf("%s\n", (const char *) inst->Data);
		1855	}
		1856	}
		1857	break;
		1858	case OPCODE_END:
		1859	return GL_TRUE;
		1860	default:
		1861	_mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
		1862	inst->Opcode);
		1863	return GL_TRUE; /* return value doesn't matter */
		1864	}
		1865
		1866	numExec++;
		1867	if (numExec > maxExec) {
		1868	static GLboolean reported = GL_FALSE;
		1869	if (!reported) {
		1870	_mesa_problem(ctx, "Infinite loop detected in fragment program");
		1871	reported = GL_TRUE;
		1872	}
		1873	return GL_TRUE;
		1874	}
		1875
		1876	} /* for pc */
		1877
		1878	return GL_TRUE;
		1879	}

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/Mesa/src/mesa/program/prog_execute.c – Rev 4217