WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	* Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
		3	* Copyright (C) 2008 VMware, Inc. All Rights Reserved.
		4	* Copyright © 2010 Intel Corporation
		5	* Copyright © 2011 Bryan Cain
		6	*
		7	* Permission is hereby granted, free of charge, to any person obtaining a
		8	* copy of this software and associated documentation files (the "Software"),
		9	* to deal in the Software without restriction, including without limitation
		10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		11	* and/or sell copies of the Software, and to permit persons to whom the
		12	* Software is furnished to do so, subject to the following conditions:
		13	*
		14	* The above copyright notice and this permission notice (including the next
		15	* paragraph) shall be included in all copies or substantial portions of the
		16	* Software.
		17	*
		18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		19	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		20	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		21	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		22	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		23	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		24	* DEALINGS IN THE SOFTWARE.
		25	*/
		26
		27	/**
		28	* \file glsl_to_tgsi.cpp
		29	*
		30	* Translate GLSL IR to TGSI.
		31	*/
		32
		33	#include
		34	#include "main/compiler.h"
		35	#include "ir.h"
		36	#include "ir_visitor.h"
		37	#include "ir_expression_flattening.h"
		38	#include "glsl_types.h"
		39	#include "glsl_parser_extras.h"
		40	#include "../glsl/program.h"
		41	#include "ir_optimization.h"
		42	#include "ast.h"
		43
		44	#include "main/mtypes.h"
		45	#include "main/shaderobj.h"
		46	#include "program/hash_table.h"
		47
		48	extern "C" {
		49	#include "main/shaderapi.h"
		50	#include "main/uniforms.h"
		51	#include "program/prog_instruction.h"
		52	#include "program/prog_optimize.h"
		53	#include "program/prog_print.h"
		54	#include "program/program.h"
		55	#include "program/prog_parameter.h"
		56	#include "program/sampler.h"
		57
		58	#include "pipe/p_compiler.h"
		59	#include "pipe/p_context.h"
		60	#include "pipe/p_screen.h"
		61	#include "pipe/p_shader_tokens.h"
		62	#include "pipe/p_state.h"
		63	#include "util/u_math.h"
		64	#include "tgsi/tgsi_ureg.h"
		65	#include "tgsi/tgsi_info.h"
		66	#include "st_context.h"
		67	#include "st_program.h"
		68	#include "st_glsl_to_tgsi.h"
		69	#include "st_mesa_to_tgsi.h"
		70	}
		71
		72	#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
		73	#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) \| \
		74	(1 << PROGRAM_ENV_PARAM) \| \
		75	(1 << PROGRAM_STATE_VAR) \| \
		76	(1 << PROGRAM_CONSTANT) \| \
		77	(1 << PROGRAM_UNIFORM))
		78
		79	/**
		80	* Maximum number of temporary registers.
		81	*
		82	* It is too big for stack allocated arrays -- it will cause stack overflow on
		83	* Windows and likely Mac OS X.
		84	*/
		85	#define MAX_TEMPS 4096
		86
		87	/**
		88	* Maximum number of arrays
		89	*/
		90	#define MAX_ARRAYS 256
		91
		92	/* will be 4 for GLSL 4.00 */
		93	#define MAX_GLSL_TEXTURE_OFFSET 1
		94
		95	class st_src_reg;
		96	class st_dst_reg;
		97
		98	static int swizzle_for_size(int size);
		99
		100	/**
		101	* This struct is a corresponding struct to TGSI ureg_src.
		102	*/
		103	class st_src_reg {
		104	public:
		105	st_src_reg(gl_register_file file, int index, const glsl_type *type)
		106	{
		107	this->file = file;
		108	this->index = index;
		109	if (type && (type->is_scalar() \|\| type->is_vector() \|\| type->is_matrix()))
		110	this->swizzle = swizzle_for_size(type->vector_elements);
		111	else
		112	this->swizzle = SWIZZLE_XYZW;
		113	this->negate = 0;
		114	this->index2D = 0;
		115	this->type = type ? type->base_type : GLSL_TYPE_ERROR;
		116	this->reladdr = NULL;
		117	}
		118
		119	st_src_reg(gl_register_file file, int index, int type)
		120	{
		121	this->type = type;
		122	this->file = file;
		123	this->index = index;
		124	this->index2D = 0;
		125	this->swizzle = SWIZZLE_XYZW;
		126	this->negate = 0;
		127	this->reladdr = NULL;
		128	}
		129
		130	st_src_reg(gl_register_file file, int index, int type, int index2D)
		131	{
		132	this->type = type;
		133	this->file = file;
		134	this->index = index;
		135	this->index2D = index2D;
		136	this->swizzle = SWIZZLE_XYZW;
		137	this->negate = 0;
		138	this->reladdr = NULL;
		139	}
		140
		141	st_src_reg()
		142	{
		143	this->type = GLSL_TYPE_ERROR;
		144	this->file = PROGRAM_UNDEFINED;
		145	this->index = 0;
		146	this->index2D = 0;
		147	this->swizzle = 0;
		148	this->negate = 0;
		149	this->reladdr = NULL;
		150	}
		151
		152	explicit st_src_reg(st_dst_reg reg);
		153
		154	gl_register_file file; /*< PROGRAM_ from Mesa */
		155	int index; /*< temporary index, VERT_ATTRIB_, VARYING_SLOT_, etc. /
		156	int index2D;
		157	GLuint swizzle; /*< SWIZZLE_XYZWONEZERO swizzles from Mesa. /
		158	int negate; /*< NEGATE_XYZW mask from mesa /
		159	int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
		160	/** Register index should be offset by the integer in this reg. */
		161	st_src_reg *reladdr;
		162	};
		163
		164	class st_dst_reg {
		165	public:
		166	st_dst_reg(gl_register_file file, int writemask, int type)
		167	{
		168	this->file = file;
		169	this->index = 0;
		170	this->writemask = writemask;
		171	this->cond_mask = COND_TR;
		172	this->reladdr = NULL;
		173	this->type = type;
		174	}
		175
		176	st_dst_reg()
		177	{
		178	this->type = GLSL_TYPE_ERROR;
		179	this->file = PROGRAM_UNDEFINED;
		180	this->index = 0;
		181	this->writemask = 0;
		182	this->cond_mask = COND_TR;
		183	this->reladdr = NULL;
		184	}
		185
		186	explicit st_dst_reg(st_src_reg reg);
		187
		188	gl_register_file file; /*< PROGRAM_ from Mesa */
		189	int index; /*< temporary index, VERT_ATTRIB_, VARYING_SLOT_, etc. /
		190	int writemask; /*< Bitfield of WRITEMASK_[XYZW] /
		191	GLuint cond_mask:4;
		192	int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
		193	/** Register index should be offset by the integer in this reg. */
		194	st_src_reg *reladdr;
		195	};
		196
		197	st_src_reg::st_src_reg(st_dst_reg reg)
		198	{
		199	this->type = reg.type;
		200	this->file = reg.file;
		201	this->index = reg.index;
		202	this->swizzle = SWIZZLE_XYZW;
		203	this->negate = 0;
		204	this->reladdr = reg.reladdr;
		205	this->index2D = 0;
		206	}
		207
		208	st_dst_reg::st_dst_reg(st_src_reg reg)
		209	{
		210	this->type = reg.type;
		211	this->file = reg.file;
		212	this->index = reg.index;
		213	this->writemask = WRITEMASK_XYZW;
		214	this->cond_mask = COND_TR;
		215	this->reladdr = reg.reladdr;
		216	}
		217
		218	class glsl_to_tgsi_instruction : public exec_node {
		219	public:
		220	/* Callers of this ralloc-based new need not call delete. It's
		221	* easier to just ralloc_free 'ctx' (or any of its ancestors). */
		222	static void* operator new(size_t size, void *ctx)
		223	{
		224	void *node;
		225
		226	node = rzalloc_size(ctx, size);
		227	assert(node != NULL);
		228
		229	return node;
		230	}
		231
		232	unsigned op;
		233	st_dst_reg dst;
		234	st_src_reg src[3];
		235	/** Pointer to the ir source this tree came from for debugging */
		236	ir_instruction *ir;
		237	GLboolean cond_update;
		238	bool saturate;
		239	int sampler; /*< sampler index /
		240	int tex_target; /*< One of TEXTURE__INDEX */
		241	GLboolean tex_shadow;
		242	struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
		243	unsigned tex_offset_num_offset;
		244	int dead_mask; /*< Used in dead code elimination /
		245
		246	class function_entry function; / Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
		247	};
		248
		249	class variable_storage : public exec_node {
		250	public:
		251	variable_storage(ir_variable *var, gl_register_file file, int index)
		252	: file(file), index(index), var(var)
		253	{
		254	/* empty */
		255	}
		256
		257	gl_register_file file;
		258	int index;
		259	ir_variable var; / variable that maps to this, if any */
		260	};
		261
		262	class immediate_storage : public exec_node {
		263	public:
		264	immediate_storage(gl_constant_value *values, int size, int type)
		265	{
		266	memcpy(this->values, values, size * sizeof(gl_constant_value));
		267	this->size = size;
		268	this->type = type;
		269	}
		270
		271	gl_constant_value values[4];
		272	int size; /*< Number of components (1-4) /
		273	int type; /*< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT /
		274	};
		275
		276	class function_entry : public exec_node {
		277	public:
		278	ir_function_signature *sig;
		279
		280	/**
		281	* identifier of this function signature used by the program.
		282	*
		283	* At the point that TGSI instructions for function calls are
		284	* generated, we don't know the address of the first instruction of
		285	* the function body. So we make the BranchTarget that is called a
		286	* small integer and rewrite them during set_branchtargets().
		287	*/
		288	int sig_id;
		289
		290	/**
		291	* Pointer to first instruction of the function body.
		292	*
		293	* Set during function body emits after main() is processed.
		294	*/
		295	glsl_to_tgsi_instruction *bgn_inst;
		296
		297	/**
		298	* Index of the first instruction of the function body in actual TGSI.
		299	*
		300	* Set after conversion from glsl_to_tgsi_instruction to TGSI.
		301	*/
		302	int inst;
		303
		304	/** Storage for the return value. */
		305	st_src_reg return_reg;
		306	};
		307
		308	struct glsl_to_tgsi_visitor : public ir_visitor {
		309	public:
		310	glsl_to_tgsi_visitor();
		311	~glsl_to_tgsi_visitor();
		312
		313	function_entry *current_function;
		314
		315	struct gl_context *ctx;
		316	struct gl_program *prog;
		317	struct gl_shader_program *shader_program;
		318	struct gl_shader_compiler_options *options;
		319
		320	int next_temp;
		321
		322	unsigned array_sizes[MAX_ARRAYS];
		323	unsigned next_array;
		324
		325	int num_address_regs;
		326	int samplers_used;
		327	bool indirect_addr_consts;
		328
		329	int glsl_version;
		330	bool native_integers;
		331	bool have_sqrt;
		332
		333	variable_storage find_variable_storage(ir_variable var);
		334
		335	int add_constant(gl_register_file file, gl_constant_value values[4],
		336	int size, int datatype, GLuint *swizzle_out);
		337
		338	function_entry get_function_signature(ir_function_signature sig);
		339
		340	st_src_reg get_temp(const glsl_type *type);
		341	void reladdr_to_temp(ir_instruction ir, st_src_reg reg, int *num_reladdr);
		342
		343	st_src_reg st_src_reg_for_float(float val);
		344	st_src_reg st_src_reg_for_int(int val);
		345	st_src_reg st_src_reg_for_type(int type, int val);
		346
		347	/**
		348	* \name Visit methods
		349	*
		350	* As typical for the visitor pattern, there must be one \c visit method for
		351	* each concrete subclass of \c ir_instruction. Virtual base classes within
		352	* the hierarchy should not have \c visit methods.
		353	*/
		354	/@{/
		355	virtual void visit(ir_variable *);
		356	virtual void visit(ir_loop *);
		357	virtual void visit(ir_loop_jump *);
		358	virtual void visit(ir_function_signature *);
		359	virtual void visit(ir_function *);
		360	virtual void visit(ir_expression *);
		361	virtual void visit(ir_swizzle *);
		362	virtual void visit(ir_dereference_variable *);
		363	virtual void visit(ir_dereference_array *);
		364	virtual void visit(ir_dereference_record *);
		365	virtual void visit(ir_assignment *);
		366	virtual void visit(ir_constant *);
		367	virtual void visit(ir_call *);
		368	virtual void visit(ir_return *);
		369	virtual void visit(ir_discard *);
		370	virtual void visit(ir_texture *);
		371	virtual void visit(ir_if *);
		372	/@}/
		373
		374	st_src_reg result;
		375
		376	/** List of variable_storage */
		377	exec_list variables;
		378
		379	/** List of immediate_storage */
		380	exec_list immediates;
		381	unsigned num_immediates;
		382
		383	/** List of function_entry */
		384	exec_list function_signatures;
		385	int next_signature_id;
		386
		387	/** List of glsl_to_tgsi_instruction */
		388	exec_list instructions;
		389
		390	glsl_to_tgsi_instruction emit(ir_instruction ir, unsigned op);
		391
		392	glsl_to_tgsi_instruction emit(ir_instruction ir, unsigned op,
		393	st_dst_reg dst, st_src_reg src0);
		394
		395	glsl_to_tgsi_instruction emit(ir_instruction ir, unsigned op,
		396	st_dst_reg dst, st_src_reg src0, st_src_reg src1);
		397
		398	glsl_to_tgsi_instruction emit(ir_instruction ir, unsigned op,
		399	st_dst_reg dst,
		400	st_src_reg src0, st_src_reg src1, st_src_reg src2);
		401
		402	unsigned get_opcode(ir_instruction *ir, unsigned op,
		403	st_dst_reg dst,
		404	st_src_reg src0, st_src_reg src1);
		405
		406	/**
		407	* Emit the correct dot-product instruction for the type of arguments
		408	*/
		409	glsl_to_tgsi_instruction emit_dp(ir_instruction ir,
		410	st_dst_reg dst,
		411	st_src_reg src0,
		412	st_src_reg src1,
		413	unsigned elements);
		414
		415	void emit_scalar(ir_instruction *ir, unsigned op,
		416	st_dst_reg dst, st_src_reg src0);
		417
		418	void emit_scalar(ir_instruction *ir, unsigned op,
		419	st_dst_reg dst, st_src_reg src0, st_src_reg src1);
		420
		421	void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
		422
		423	void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
		424
		425	void emit_scs(ir_instruction *ir, unsigned op,
		426	st_dst_reg dst, const st_src_reg &src);
		427
		428	bool try_emit_mad(ir_expression *ir,
		429	int mul_operand);
		430	bool try_emit_mad_for_and_not(ir_expression *ir,
		431	int mul_operand);
		432	bool try_emit_sat(ir_expression *ir);
		433
		434	void emit_swz(ir_expression *ir);
		435
		436	bool process_move_condition(ir_rvalue *ir);
		437
		438	void simplify_cmp(void);
		439
		440	void rename_temp_register(int index, int new_index);
		441	int get_first_temp_read(int index);
		442	int get_first_temp_write(int index);
		443	int get_last_temp_read(int index);
		444	int get_last_temp_write(int index);
		445
		446	void copy_propagate(void);
		447	void eliminate_dead_code(void);
		448	int eliminate_dead_code_advanced(void);
		449	void merge_registers(void);
		450	void renumber_registers(void);
		451
		452	void emit_block_mov(ir_assignment ir, const struct glsl_type type,
		453	st_dst_reg l, st_src_reg r);
		454
		455	void *mem_ctx;
		456	};
		457
		458	static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
		459
		460	static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
		461
		462	static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
		463
		464	static void
		465	fail_link(struct gl_shader_program prog, const char fmt, ...) PRINTFLIKE(2, 3);
		466
		467	static void
		468	fail_link(struct gl_shader_program prog, const char fmt, ...)
		469	{
		470	va_list args;
		471	va_start(args, fmt);
		472	ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
		473	va_end(args);
		474
		475	prog->LinkStatus = GL_FALSE;
		476	}
		477
		478	static int
		479	swizzle_for_size(int size)
		480	{
		481	int size_swizzles[4] = {
		482	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
		483	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
		484	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
		485	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
		486	};
		487
		488	assert((size >= 1) && (size <= 4));
		489	return size_swizzles[size - 1];
		490	}
		491
		492	static bool
		493	is_tex_instruction(unsigned opcode)
		494	{
		495	const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
		496	return info->is_tex;
		497	}
		498
		499	static unsigned
		500	num_inst_dst_regs(unsigned opcode)
		501	{
		502	const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
		503	return info->num_dst;
		504	}
		505
		506	static unsigned
		507	num_inst_src_regs(unsigned opcode)
		508	{
		509	const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
		510	return info->is_tex ? info->num_src - 1 : info->num_src;
		511	}
		512
		513	glsl_to_tgsi_instruction *
		514	glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
		515	st_dst_reg dst,
		516	st_src_reg src0, st_src_reg src1, st_src_reg src2)
		517	{
		518	glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
		519	int num_reladdr = 0, i;
		520
		521	op = get_opcode(ir, op, dst, src0, src1);
		522
		523	/* If we have to do relative addressing, we want to load the ARL
		524	* reg directly for one of the regs, and preload the other reladdr
		525	* sources into temps.
		526	*/
		527	num_reladdr += dst.reladdr != NULL;
		528	num_reladdr += src0.reladdr != NULL;
		529	num_reladdr += src1.reladdr != NULL;
		530	num_reladdr += src2.reladdr != NULL;
		531
		532	reladdr_to_temp(ir, &src2, &num_reladdr);
		533	reladdr_to_temp(ir, &src1, &num_reladdr);
		534	reladdr_to_temp(ir, &src0, &num_reladdr);
		535
		536	if (dst.reladdr) {
		537	emit_arl(ir, address_reg, *dst.reladdr);
		538	num_reladdr--;
		539	}
		540	assert(num_reladdr == 0);
		541
		542	inst->op = op;
		543	inst->dst = dst;
		544	inst->src[0] = src0;
		545	inst->src[1] = src1;
		546	inst->src[2] = src2;
		547	inst->ir = ir;
		548	inst->dead_mask = 0;
		549
		550	inst->function = NULL;
		551
		552	if (op == TGSI_OPCODE_ARL \|\| op == TGSI_OPCODE_UARL)
		553	this->num_address_regs = 1;
		554
		555	/* Update indirect addressing status used by TGSI */
		556	if (dst.reladdr) {
		557	switch(dst.file) {
		558	case PROGRAM_LOCAL_PARAM:
		559	case PROGRAM_ENV_PARAM:
		560	case PROGRAM_STATE_VAR:
		561	case PROGRAM_CONSTANT:
		562	case PROGRAM_UNIFORM:
		563	this->indirect_addr_consts = true;
		564	break;
		565	case PROGRAM_IMMEDIATE:
		566	assert(!"immediates should not have indirect addressing");
		567	break;
		568	default:
		569	break;
		570	}
		571	}
		572	else {
		573	for (i=0; i<3; i++) {
		574	if(inst->src[i].reladdr) {
		575	switch(inst->src[i].file) {
		576	case PROGRAM_LOCAL_PARAM:
		577	case PROGRAM_ENV_PARAM:
		578	case PROGRAM_STATE_VAR:
		579	case PROGRAM_CONSTANT:
		580	case PROGRAM_UNIFORM:
		581	this->indirect_addr_consts = true;
		582	break;
		583	case PROGRAM_IMMEDIATE:
		584	assert(!"immediates should not have indirect addressing");
		585	break;
		586	default:
		587	break;
		588	}
		589	}
		590	}
		591	}
		592
		593	this->instructions.push_tail(inst);
		594
		595	if (native_integers)
		596	try_emit_float_set(ir, op, dst);
		597
		598	return inst;
		599	}
		600
		601
		602	glsl_to_tgsi_instruction *
		603	glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
		604	st_dst_reg dst, st_src_reg src0, st_src_reg src1)
		605	{
		606	return emit(ir, op, dst, src0, src1, undef_src);
		607	}
		608
		609	glsl_to_tgsi_instruction *
		610	glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
		611	st_dst_reg dst, st_src_reg src0)
		612	{
		613	assert(dst.writemask != 0);
		614	return emit(ir, op, dst, src0, undef_src, undef_src);
		615	}
		616
		617	glsl_to_tgsi_instruction *
		618	glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
		619	{
		620	return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
		621	}
		622
		623	/**
		624	* Emits the code to convert the result of float SET instructions to integers.
		625	*/
		626	void
		627	glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
		628	st_dst_reg dst)
		629	{
		630	if ((op == TGSI_OPCODE_SEQ \|\|
		631	op == TGSI_OPCODE_SNE \|\|
		632	op == TGSI_OPCODE_SGE \|\|
		633	op == TGSI_OPCODE_SLT))
		634	{
		635	st_src_reg src = st_src_reg(dst);
		636	src.negate = ~src.negate;
		637	dst.type = GLSL_TYPE_FLOAT;
		638	emit(ir, TGSI_OPCODE_F2I, dst, src);
		639	}
		640	}
		641
		642	/**
		643	* Determines whether to use an integer, unsigned integer, or float opcode
		644	* based on the operands and input opcode, then emits the result.
		645	*/
		646	unsigned
		647	glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
		648	st_dst_reg dst,
		649	st_src_reg src0, st_src_reg src1)
		650	{
		651	int type = GLSL_TYPE_FLOAT;
		652
		653	assert(src0.type != GLSL_TYPE_ARRAY);
		654	assert(src0.type != GLSL_TYPE_STRUCT);
		655	assert(src1.type != GLSL_TYPE_ARRAY);
		656	assert(src1.type != GLSL_TYPE_STRUCT);
		657
		658	if (src0.type == GLSL_TYPE_FLOAT \|\| src1.type == GLSL_TYPE_FLOAT)
		659	type = GLSL_TYPE_FLOAT;
		660	else if (native_integers)
		661	type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
		662
		663	#define case4(c, f, i, u) \
		664	case TGSI_OPCODE_##c: \
		665	if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
		666	else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
		667	else op = TGSI_OPCODE_##f; \
		668	break;
		669	#define case3(f, i, u) case4(f, f, i, u)
		670	#define case2fi(f, i) case4(f, f, i, i)
		671	#define case2iu(i, u) case4(i, LAST, i, u)
		672
		673	switch(op) {
		674	case2fi(ADD, UADD);
		675	case2fi(MUL, UMUL);
		676	case2fi(MAD, UMAD);
		677	case3(DIV, IDIV, UDIV);
		678	case3(MAX, IMAX, UMAX);
		679	case3(MIN, IMIN, UMIN);
		680	case2iu(MOD, UMOD);
		681
		682	case2fi(SEQ, USEQ);
		683	case2fi(SNE, USNE);
		684	case3(SGE, ISGE, USGE);
		685	case3(SLT, ISLT, USLT);
		686
		687	case2iu(ISHR, USHR);
		688
		689	case2fi(SSG, ISSG);
		690	case3(ABS, IABS, IABS);
		691
		692	default: break;
		693	}
		694
		695	assert(op != TGSI_OPCODE_LAST);
		696	return op;
		697	}
		698
		699	glsl_to_tgsi_instruction *
		700	glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
		701	st_dst_reg dst, st_src_reg src0, st_src_reg src1,
		702	unsigned elements)
		703	{
		704	static const unsigned dot_opcodes[] = {
		705	TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
		706	};
		707
		708	return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
		709	}
		710
		711	/**
		712	* Emits TGSI scalar opcodes to produce unique answers across channels.
		713	*
		714	* Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
		715	* channel determines the result across all channels. So to do a vec4
		716	* of this operation, we want to emit a scalar per source channel used
		717	* to produce dest channels.
		718	*/
		719	void
		720	glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
		721	st_dst_reg dst,
		722	st_src_reg orig_src0, st_src_reg orig_src1)
		723	{
		724	int i, j;
		725	int done_mask = ~dst.writemask;
		726
		727	/* TGSI RCP is a scalar operation splatting results to all channels,
		728	* like ARB_fp/vp. So emit as many RCPs as necessary to cover our
		729	* dst channels.
		730	*/
		731	for (i = 0; i < 4; i++) {
		732	GLuint this_mask = (1 << i);
		733	glsl_to_tgsi_instruction *inst;
		734	st_src_reg src0 = orig_src0;
		735	st_src_reg src1 = orig_src1;
		736
		737	if (done_mask & this_mask)
		738	continue;
		739
		740	GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
		741	GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
		742	for (j = i + 1; j < 4; j++) {
		743	/* If there is another enabled component in the destination that is
		744	* derived from the same inputs, generate its value on this pass as
		745	* well.
		746	*/
		747	if (!(done_mask & (1 << j)) &&
		748	GET_SWZ(src0.swizzle, j) == src0_swiz &&
		749	GET_SWZ(src1.swizzle, j) == src1_swiz) {
		750	this_mask \|= (1 << j);
		751	}
		752	}
		753	src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
		754	src0_swiz, src0_swiz);
		755	src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
		756	src1_swiz, src1_swiz);
		757
		758	inst = emit(ir, op, dst, src0, src1);
		759	inst->dst.writemask = this_mask;
		760	done_mask \|= this_mask;
		761	}
		762	}
		763
		764	void
		765	glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
		766	st_dst_reg dst, st_src_reg src0)
		767	{
		768	st_src_reg undef = undef_src;
		769
		770	undef.swizzle = SWIZZLE_XXXX;
		771
		772	emit_scalar(ir, op, dst, src0, undef);
		773	}
		774
		775	void
		776	glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
		777	st_dst_reg dst, st_src_reg src0)
		778	{
		779	int op = TGSI_OPCODE_ARL;
		780
		781	if (src0.type == GLSL_TYPE_INT \|\| src0.type == GLSL_TYPE_UINT)
		782	op = TGSI_OPCODE_UARL;
		783
		784	emit(NULL, op, dst, src0);
		785	}
		786
		787	/**
		788	* Emit an TGSI_OPCODE_SCS instruction
		789	*
		790	* The \c SCS opcode functions a bit differently than the other TGSI opcodes.
		791	* Instead of splatting its result across all four components of the
		792	* destination, it writes one value to the \c x component and another value to
		793	* the \c y component.
		794	*
		795	* \param ir IR instruction being processed
		796	* \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
		797	* on which value is desired.
		798	* \param dst Destination register
		799	* \param src Source register
		800	*/
		801	void
		802	glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
		803	st_dst_reg dst,
		804	const st_src_reg &src)
		805	{
		806	/* Vertex programs cannot use the SCS opcode.
		807	*/
		808	if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
		809	emit_scalar(ir, op, dst, src);
		810	return;
		811	}
		812
		813	const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
		814	const unsigned scs_mask = (1U << component);
		815	int done_mask = ~dst.writemask;
		816	st_src_reg tmp;
		817
		818	assert(op == TGSI_OPCODE_SIN \|\| op == TGSI_OPCODE_COS);
		819
		820	/* If there are compnents in the destination that differ from the component
		821	* that will be written by the SCS instrution, we'll need a temporary.
		822	*/
		823	if (scs_mask != unsigned(dst.writemask)) {
		824	tmp = get_temp(glsl_type::vec4_type);
		825	}
		826
		827	for (unsigned i = 0; i < 4; i++) {
		828	unsigned this_mask = (1U << i);
		829	st_src_reg src0 = src;
		830
		831	if ((done_mask & this_mask) != 0)
		832	continue;
		833
		834	/* The source swizzle specified which component of the source generates
		835	* sine / cosine for the current component in the destination. The SCS
		836	* instruction requires that this value be swizzle to the X component.
		837	* Replace the current swizzle with a swizzle that puts the source in
		838	* the X component.
		839	*/
		840	unsigned src0_swiz = GET_SWZ(src.swizzle, i);
		841
		842	src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
		843	src0_swiz, src0_swiz);
		844	for (unsigned j = i + 1; j < 4; j++) {
		845	/* If there is another enabled component in the destination that is
		846	* derived from the same inputs, generate its value on this pass as
		847	* well.
		848	*/
		849	if (!(done_mask & (1 << j)) &&
		850	GET_SWZ(src0.swizzle, j) == src0_swiz) {
		851	this_mask \|= (1 << j);
		852	}
		853	}
		854
		855	if (this_mask != scs_mask) {
		856	glsl_to_tgsi_instruction *inst;
		857	st_dst_reg tmp_dst = st_dst_reg(tmp);
		858
		859	/* Emit the SCS instruction.
		860	*/
		861	inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
		862	inst->dst.writemask = scs_mask;
		863
		864	/* Move the result of the SCS instruction to the desired location in
		865	* the destination.
		866	*/
		867	tmp.swizzle = MAKE_SWIZZLE4(component, component,
		868	component, component);
		869	inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
		870	inst->dst.writemask = this_mask;
		871	} else {
		872	/* Emit the SCS instruction to write directly to the destination.
		873	*/
		874	glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
		875	inst->dst.writemask = scs_mask;
		876	}
		877
		878	done_mask \|= this_mask;
		879	}
		880	}
		881
		882	int
		883	glsl_to_tgsi_visitor::add_constant(gl_register_file file,
		884	gl_constant_value values[4], int size, int datatype,
		885	GLuint *swizzle_out)
		886	{
		887	if (file == PROGRAM_CONSTANT) {
		888	return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
		889	size, datatype, swizzle_out);
		890	} else {
		891	int index = 0;
		892	immediate_storage *entry;
		893	assert(file == PROGRAM_IMMEDIATE);
		894
		895	/* Search immediate storage to see if we already have an identical
		896	* immediate that we can use instead of adding a duplicate entry.
		897	*/
		898	foreach_iter(exec_list_iterator, iter, this->immediates) {
		899	entry = (immediate_storage *)iter.get();
		900
		901	if (entry->size == size &&
		902	entry->type == datatype &&
		903	!memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
		904	return index;
		905	}
		906	index++;
		907	}
		908
		909	/* Add this immediate to the list. */
		910	entry = new(mem_ctx) immediate_storage(values, size, datatype);
		911	this->immediates.push_tail(entry);
		912	this->num_immediates++;
		913	return index;
		914	}
		915	}
		916
		917	st_src_reg
		918	glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
		919	{
		920	st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
		921	union gl_constant_value uval;
		922
		923	uval.f = val;
		924	src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
		925
		926	return src;
		927	}
		928
		929	st_src_reg
		930	glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
		931	{
		932	st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
		933	union gl_constant_value uval;
		934
		935	assert(native_integers);
		936
		937	uval.i = val;
		938	src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
		939
		940	return src;
		941	}
		942
		943	st_src_reg
		944	glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
		945	{
		946	if (native_integers)
		947	return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
		948	st_src_reg_for_int(val);
		949	else
		950	return st_src_reg_for_float(val);
		951	}
		952
		953	static int
		954	type_size(const struct glsl_type *type)
		955	{
		956	unsigned int i;
		957	int size;
		958
		959	switch (type->base_type) {
		960	case GLSL_TYPE_UINT:
		961	case GLSL_TYPE_INT:
		962	case GLSL_TYPE_FLOAT:
		963	case GLSL_TYPE_BOOL:
		964	if (type->is_matrix()) {
		965	return type->matrix_columns;
		966	} else {
		967	/* Regardless of size of vector, it gets a vec4. This is bad
		968	* packing for things like floats, but otherwise arrays become a
		969	* mess. Hopefully a later pass over the code can pack scalars
		970	* down if appropriate.
		971	*/
		972	return 1;
		973	}
		974	case GLSL_TYPE_ARRAY:
		975	assert(type->length > 0);
		976	return type_size(type->fields.array) * type->length;
		977	case GLSL_TYPE_STRUCT:
		978	size = 0;
		979	for (i = 0; i < type->length; i++) {
		980	size += type_size(type->fields.structure[i].type);
		981	}
		982	return size;
		983	case GLSL_TYPE_SAMPLER:
		984	/* Samplers take up one slot in UNIFORMS[], but they're baked in
		985	* at link time.
		986	*/
		987	return 1;
		988	case GLSL_TYPE_INTERFACE:
		989	case GLSL_TYPE_VOID:
		990	case GLSL_TYPE_ERROR:
		991	assert(!"Invalid type in type_size");
		992	break;
		993	}
		994	return 0;
		995	}
		996
		997	/**
		998	* In the initial pass of codegen, we assign temporary numbers to
		999	* intermediate results. (not SSA -- variable assignments will reuse
		1000	* storage).
		1001	*/
		1002	st_src_reg
		1003	glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
		1004	{
		1005	st_src_reg src;
		1006
		1007	src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
		1008	src.reladdr = NULL;
		1009	src.negate = 0;
		1010
		1011	if (!options->EmitNoIndirectTemp &&
		1012	(type->is_array() \|\| type->is_matrix())) {
		1013
		1014	src.file = PROGRAM_ARRAY;
		1015	src.index = next_array << 16 \| 0x8000;
		1016	array_sizes[next_array] = type_size(type);
		1017	++next_array;
		1018
		1019	} else {
		1020	src.file = PROGRAM_TEMPORARY;
		1021	src.index = next_temp;
		1022	next_temp += type_size(type);
		1023	}
		1024
		1025	if (type->is_array() \|\| type->is_record()) {
		1026	src.swizzle = SWIZZLE_NOOP;
		1027	} else {
		1028	src.swizzle = swizzle_for_size(type->vector_elements);
		1029	}
		1030
		1031	return src;
		1032	}
		1033
		1034	variable_storage *
		1035	glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
		1036	{
		1037
		1038	variable_storage *entry;
		1039
		1040	foreach_iter(exec_list_iterator, iter, this->variables) {
		1041	entry = (variable_storage *)iter.get();
		1042
		1043	if (entry->var == var)
		1044	return entry;
		1045	}
		1046
		1047	return NULL;
		1048	}
		1049
		1050	void
		1051	glsl_to_tgsi_visitor::visit(ir_variable *ir)
		1052	{
		1053	if (strcmp(ir->name, "gl_FragCoord") == 0) {
		1054	struct gl_fragment_program fp = (struct gl_fragment_program )this->prog;
		1055
		1056	fp->OriginUpperLeft = ir->origin_upper_left;
		1057	fp->PixelCenterInteger = ir->pixel_center_integer;
		1058	}
		1059
		1060	if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
		1061	unsigned int i;
		1062	const ir_state_slot *const slots = ir->state_slots;
		1063	assert(ir->state_slots != NULL);
		1064
		1065	/* Check if this statevar's setup in the STATE file exactly
		1066	* matches how we'll want to reference it as a
		1067	* struct/array/whatever. If not, then we need to move it into
		1068	* temporary storage and hope that it'll get copy-propagated
		1069	* out.
		1070	*/
		1071	for (i = 0; i < ir->num_state_slots; i++) {
		1072	if (slots[i].swizzle != SWIZZLE_XYZW) {
		1073	break;
		1074	}
		1075	}
		1076
		1077	variable_storage *storage;
		1078	st_dst_reg dst;
		1079	if (i == ir->num_state_slots) {
		1080	/* We'll set the index later. */
		1081	storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
		1082	this->variables.push_tail(storage);
		1083
		1084	dst = undef_dst;
		1085	} else {
		1086	/* The variable_storage constructor allocates slots based on the size
		1087	* of the type. However, this had better match the number of state
		1088	* elements that we're going to copy into the new temporary.
		1089	*/
		1090	assert((int) ir->num_state_slots == type_size(ir->type));
		1091
		1092	dst = st_dst_reg(get_temp(ir->type));
		1093
		1094	storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
		1095
		1096	this->variables.push_tail(storage);
		1097	}
		1098
		1099
		1100	for (unsigned int i = 0; i < ir->num_state_slots; i++) {
		1101	int index = _mesa_add_state_reference(this->prog->Parameters,
		1102	(gl_state_index *)slots[i].tokens);
		1103
		1104	if (storage->file == PROGRAM_STATE_VAR) {
		1105	if (storage->index == -1) {
		1106	storage->index = index;
		1107	} else {
		1108	assert(index == storage->index + (int)i);
		1109	}
		1110	} else {
		1111	/* We use GLSL_TYPE_FLOAT here regardless of the actual type of
		1112	* the data being moved since MOV does not care about the type of
		1113	* data it is moving, and we don't want to declare registers with
		1114	* array or struct types.
		1115	*/
		1116	st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
		1117	src.swizzle = slots[i].swizzle;
		1118	emit(ir, TGSI_OPCODE_MOV, dst, src);
		1119	/* even a float takes up a whole vec4 reg in a struct/array. */
		1120	dst.index++;
		1121	}
		1122	}
		1123
		1124	if (storage->file == PROGRAM_TEMPORARY &&
		1125	dst.index != storage->index + (int) ir->num_state_slots) {
		1126	fail_link(this->shader_program,
		1127	"failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
		1128	ir->name, dst.index - storage->index,
		1129	type_size(ir->type));
		1130	}
		1131	}
		1132	}
		1133
		1134	void
		1135	glsl_to_tgsi_visitor::visit(ir_loop *ir)
		1136	{
		1137	ir_dereference_variable *counter = NULL;
		1138
		1139	if (ir->counter != NULL)
		1140	counter = new(ir) ir_dereference_variable(ir->counter);
		1141
		1142	if (ir->from != NULL) {
		1143	assert(ir->counter != NULL);
		1144
		1145	ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
		1146
		1147	a->accept(this);
		1148	delete a;
		1149	}
		1150
		1151	emit(NULL, TGSI_OPCODE_BGNLOOP);
		1152
		1153	if (ir->to) {
		1154	ir_expression *e =
		1155	new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
		1156	counter, ir->to);
		1157	ir_if *if_stmt = new(ir) ir_if(e);
		1158
		1159	ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
		1160
		1161	if_stmt->then_instructions.push_tail(brk);
		1162
		1163	if_stmt->accept(this);
		1164
		1165	delete if_stmt;
		1166	delete e;
		1167	delete brk;
		1168	}
		1169
		1170	visit_exec_list(&ir->body_instructions, this);
		1171
		1172	if (ir->increment) {
		1173	ir_expression *e =
		1174	new(ir) ir_expression(ir_binop_add, counter->type,
		1175	counter, ir->increment);
		1176
		1177	ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
		1178
		1179	a->accept(this);
		1180	delete a;
		1181	delete e;
		1182	}
		1183
		1184	emit(NULL, TGSI_OPCODE_ENDLOOP);
		1185	}
		1186
		1187	void
		1188	glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
		1189	{
		1190	switch (ir->mode) {
		1191	case ir_loop_jump::jump_break:
		1192	emit(NULL, TGSI_OPCODE_BRK);
		1193	break;
		1194	case ir_loop_jump::jump_continue:
		1195	emit(NULL, TGSI_OPCODE_CONT);
		1196	break;
		1197	}
		1198	}
		1199
		1200
		1201	void
		1202	glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
		1203	{
		1204	assert(0);
		1205	(void)ir;
		1206	}
		1207
		1208	void
		1209	glsl_to_tgsi_visitor::visit(ir_function *ir)
		1210	{
		1211	/* Ignore function bodies other than main() -- we shouldn't see calls to
		1212	* them since they should all be inlined before we get to glsl_to_tgsi.
		1213	*/
		1214	if (strcmp(ir->name, "main") == 0) {
		1215	const ir_function_signature *sig;
		1216	exec_list empty;
		1217
		1218	sig = ir->matching_signature(&empty);
		1219
		1220	assert(sig);
		1221
		1222	foreach_iter(exec_list_iterator, iter, sig->body) {
		1223	ir_instruction ir = (ir_instruction )iter.get();
		1224
		1225	ir->accept(this);
		1226	}
		1227	}
		1228	}
		1229
		1230	bool
		1231	glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
		1232	{
		1233	int nonmul_operand = 1 - mul_operand;
		1234	st_src_reg a, b, c;
		1235	st_dst_reg result_dst;
		1236
		1237	ir_expression *expr = ir->operands[mul_operand]->as_expression();
		1238	if (!expr \|\| expr->operation != ir_binop_mul)
		1239	return false;
		1240
		1241	expr->operands[0]->accept(this);
		1242	a = this->result;
		1243	expr->operands[1]->accept(this);
		1244	b = this->result;
		1245	ir->operands[nonmul_operand]->accept(this);
		1246	c = this->result;
		1247
		1248	this->result = get_temp(ir->type);
		1249	result_dst = st_dst_reg(this->result);
		1250	result_dst.writemask = (1 << ir->type->vector_elements) - 1;
		1251	emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
		1252
		1253	return true;
		1254	}
		1255
		1256	/**
		1257	* Emit MAD(a, -b, a) instead of AND(a, NOT(b))
		1258	*
		1259	* The logic values are 1.0 for true and 0.0 for false. Logical-and is
		1260	* implemented using multiplication, and logical-or is implemented using
		1261	* addition. Logical-not can be implemented as (true - x), or (1.0 - x).
		1262	* As result, the logical expression (a & !b) can be rewritten as:
		1263	*
		1264	* - a * !b
		1265	* - a * (1 - b)
		1266	* - (a * 1) - (a * b)
		1267	* - a + -(a * b)
		1268	* - a + (a * -b)
		1269	*
		1270	* This final expression can be implemented as a single MAD(a, -b, a)
		1271	* instruction.
		1272	*/
		1273	bool
		1274	glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
		1275	{
		1276	const int other_operand = 1 - try_operand;
		1277	st_src_reg a, b;
		1278
		1279	ir_expression *expr = ir->operands[try_operand]->as_expression();
		1280	if (!expr \|\| expr->operation != ir_unop_logic_not)
		1281	return false;
		1282
		1283	ir->operands[other_operand]->accept(this);
		1284	a = this->result;
		1285	expr->operands[0]->accept(this);
		1286	b = this->result;
		1287
		1288	b.negate = ~b.negate;
		1289
		1290	this->result = get_temp(ir->type);
		1291	emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
		1292
		1293	return true;
		1294	}
		1295
		1296	bool
		1297	glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
		1298	{
		1299	/* Emit saturates in the vertex shader only if SM 3.0 is supported.
		1300	*/
		1301	if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
		1302	!st_context(this->ctx)->has_shader_model3) {
		1303	return false;
		1304	}
		1305
		1306	ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
		1307	if (!sat_src)
		1308	return false;
		1309
		1310	sat_src->accept(this);
		1311	st_src_reg src = this->result;
		1312
		1313	/* If we generated an expression instruction into a temporary in
		1314	* processing the saturate's operand, apply the saturate to that
		1315	* instruction. Otherwise, generate a MOV to do the saturate.
		1316	*
		1317	* Note that we have to be careful to only do this optimization if
		1318	* the instruction in question was what generated src->result. For
		1319	* example, ir_dereference_array might generate a MUL instruction
		1320	* to create the reladdr, and return us a src reg using that
		1321	* reladdr. That MUL result is not the value we're trying to
		1322	* saturate.
		1323	*/
		1324	ir_expression *sat_src_expr = sat_src->as_expression();
		1325	if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul \|\|
		1326	sat_src_expr->operation == ir_binop_add \|\|
		1327	sat_src_expr->operation == ir_binop_dot)) {
		1328	glsl_to_tgsi_instruction *new_inst;
		1329	new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
		1330	new_inst->saturate = true;
		1331	} else {
		1332	this->result = get_temp(ir->type);
		1333	st_dst_reg result_dst = st_dst_reg(this->result);
		1334	result_dst.writemask = (1 << ir->type->vector_elements) - 1;
		1335	glsl_to_tgsi_instruction *inst;
		1336	inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
		1337	inst->saturate = true;
		1338	}
		1339
		1340	return true;
		1341	}
		1342
		1343	void
		1344	glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
		1345	st_src_reg reg, int num_reladdr)
		1346	{
		1347	if (!reg->reladdr)
		1348	return;
		1349
		1350	emit_arl(ir, address_reg, *reg->reladdr);
		1351
		1352	if (*num_reladdr != 1) {
		1353	st_src_reg temp = get_temp(glsl_type::vec4_type);
		1354
		1355	emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
		1356	*reg = temp;
		1357	}
		1358
		1359	(*num_reladdr)--;
		1360	}
		1361
		1362	void
		1363	glsl_to_tgsi_visitor::visit(ir_expression *ir)
		1364	{
		1365	unsigned int operand;
		1366	st_src_reg op[Elements(ir->operands)];
		1367	st_src_reg result_src;
		1368	st_dst_reg result_dst;
		1369
		1370	/* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
		1371	*/
		1372	if (ir->operation == ir_binop_add) {
		1373	if (try_emit_mad(ir, 1))
		1374	return;
		1375	if (try_emit_mad(ir, 0))
		1376	return;
		1377	}
		1378
		1379	/* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
		1380	*/
		1381	if (ir->operation == ir_binop_logic_and) {
		1382	if (try_emit_mad_for_and_not(ir, 1))
		1383	return;
		1384	if (try_emit_mad_for_and_not(ir, 0))
		1385	return;
		1386	}
		1387
		1388	if (try_emit_sat(ir))
		1389	return;
		1390
		1391	if (ir->operation == ir_quadop_vector)
		1392	assert(!"ir_quadop_vector should have been lowered");
		1393
		1394	for (operand = 0; operand < ir->get_num_operands(); operand++) {
		1395	this->result.file = PROGRAM_UNDEFINED;
		1396	ir->operands[operand]->accept(this);
		1397	if (this->result.file == PROGRAM_UNDEFINED) {
		1398	printf("Failed to get tree for expression operand:\n");
		1399	ir->operands[operand]->print();
		1400	printf("\n");
		1401	exit(1);
		1402	}
		1403	op[operand] = this->result;
		1404
		1405	/* Matrix expression operands should have been broken down to vector
		1406	* operations already.
		1407	*/
		1408	assert(!ir->operands[operand]->type->is_matrix());
		1409	}
		1410
		1411	int vector_elements = ir->operands[0]->type->vector_elements;
		1412	if (ir->operands[1]) {
		1413	vector_elements = MAX2(vector_elements,
		1414	ir->operands[1]->type->vector_elements);
		1415	}
		1416
		1417	this->result.file = PROGRAM_UNDEFINED;
		1418
		1419	/* Storage for our result. Ideally for an assignment we'd be using
		1420	* the actual storage for the result here, instead.
		1421	*/
		1422	result_src = get_temp(ir->type);
		1423	/* convenience for the emit functions below. */
		1424	result_dst = st_dst_reg(result_src);
		1425	/* Limit writes to the channels that will be used by result_src later.
		1426	* This does limit this temp's use as a temporary for multi-instruction
		1427	* sequences.
		1428	*/
		1429	result_dst.writemask = (1 << ir->type->vector_elements) - 1;
		1430
		1431	switch (ir->operation) {
		1432	case ir_unop_logic_not:
		1433	if (result_dst.type != GLSL_TYPE_FLOAT)
		1434	emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
		1435	else {
		1436	/* Previously 'SEQ dst, src, 0.0' was used for this. However, many
		1437	* older GPUs implement SEQ using multiple instructions (i915 uses two
		1438	* SGE instructions and a MUL instruction). Since our logic values are
		1439	* 0.0 and 1.0, 1-x also implements !x.
		1440	*/
		1441	op[0].negate = ~op[0].negate;
		1442	emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
		1443	}
		1444	break;
		1445	case ir_unop_neg:
		1446	if (result_dst.type == GLSL_TYPE_INT \|\| result_dst.type == GLSL_TYPE_UINT)
		1447	emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
		1448	else {
		1449	op[0].negate = ~op[0].negate;
		1450	result_src = op[0];
		1451	}
		1452	break;
		1453	case ir_unop_abs:
		1454	emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
		1455	break;
		1456	case ir_unop_sign:
		1457	emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
		1458	break;
		1459	case ir_unop_rcp:
		1460	emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
		1461	break;
		1462
		1463	case ir_unop_exp2:
		1464	emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
		1465	break;
		1466	case ir_unop_exp:
		1467	case ir_unop_log:
		1468	assert(!"not reached: should be handled by ir_explog_to_explog2");
		1469	break;
		1470	case ir_unop_log2:
		1471	emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
		1472	break;
		1473	case ir_unop_sin:
		1474	emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
		1475	break;
		1476	case ir_unop_cos:
		1477	emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
		1478	break;
		1479	case ir_unop_sin_reduced:
		1480	emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
		1481	break;
		1482	case ir_unop_cos_reduced:
		1483	emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
		1484	break;
		1485
		1486	case ir_unop_dFdx:
		1487	emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
		1488	break;
		1489	case ir_unop_dFdy:
		1490	{
		1491	/* The X component contains 1 or -1 depending on whether the framebuffer
		1492	* is a FBO or the window system buffer, respectively.
		1493	* It is then multiplied with the source operand of DDY.
		1494	*/
		1495	static const gl_state_index transform_y_state[STATE_LENGTH]
		1496	= { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
		1497
		1498	unsigned transform_y_index =
		1499	_mesa_add_state_reference(this->prog->Parameters,
		1500	transform_y_state);
		1501
		1502	st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
		1503	transform_y_index,
		1504	glsl_type::vec4_type);
		1505	transform_y.swizzle = SWIZZLE_XXXX;
		1506
		1507	st_src_reg temp = get_temp(glsl_type::vec4_type);
		1508
		1509	emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
		1510	emit(ir, TGSI_OPCODE_DDY, result_dst, temp);
		1511	break;
		1512	}
		1513
		1514	case ir_unop_noise: {
		1515	/* At some point, a motivated person could add a better
		1516	* implementation of noise. Currently not even the nvidia
		1517	* binary drivers do anything more than this. In any case, the
		1518	* place to do this is in the GL state tracker, not the poor
		1519	* driver.
		1520	*/
		1521	emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
		1522	break;
		1523	}
		1524
		1525	case ir_binop_add:
		1526	emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
		1527	break;
		1528	case ir_binop_sub:
		1529	emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
		1530	break;
		1531
		1532	case ir_binop_mul:
		1533	emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
		1534	break;
		1535	case ir_binop_div:
		1536	if (result_dst.type == GLSL_TYPE_FLOAT)
		1537	assert(!"not reached: should be handled by ir_div_to_mul_rcp");
		1538	else
		1539	emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
		1540	break;
		1541	case ir_binop_mod:
		1542	if (result_dst.type == GLSL_TYPE_FLOAT)
		1543	assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
		1544	else
		1545	emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
		1546	break;
		1547
		1548	case ir_binop_less:
		1549	emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
		1550	break;
		1551	case ir_binop_greater:
		1552	emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
		1553	break;
		1554	case ir_binop_lequal:
		1555	emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
		1556	break;
		1557	case ir_binop_gequal:
		1558	emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
		1559	break;
		1560	case ir_binop_equal:
		1561	emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
		1562	break;
		1563	case ir_binop_nequal:
		1564	emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
		1565	break;
		1566	case ir_binop_all_equal:
		1567	/* "==" operator producing a scalar boolean. */
		1568	if (ir->operands[0]->type->is_vector() \|\|
		1569	ir->operands[1]->type->is_vector()) {
		1570	st_src_reg temp = get_temp(native_integers ?
		1571	glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
		1572	glsl_type::vec4_type);
		1573
		1574	if (native_integers) {
		1575	st_dst_reg temp_dst = st_dst_reg(temp);
		1576	st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
		1577
		1578	emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
		1579
		1580	/* Emit 1-3 AND operations to combine the SEQ results. */
		1581	switch (ir->operands[0]->type->vector_elements) {
		1582	case 2:
		1583	break;
		1584	case 3:
		1585	temp_dst.writemask = WRITEMASK_Y;
		1586	temp1.swizzle = SWIZZLE_YYYY;
		1587	temp2.swizzle = SWIZZLE_ZZZZ;
		1588	emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
		1589	break;
		1590	case 4:
		1591	temp_dst.writemask = WRITEMASK_X;
		1592	temp1.swizzle = SWIZZLE_XXXX;
		1593	temp2.swizzle = SWIZZLE_YYYY;
		1594	emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
		1595	temp_dst.writemask = WRITEMASK_Y;
		1596	temp1.swizzle = SWIZZLE_ZZZZ;
		1597	temp2.swizzle = SWIZZLE_WWWW;
		1598	emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
		1599	}
		1600
		1601	temp1.swizzle = SWIZZLE_XXXX;
		1602	temp2.swizzle = SWIZZLE_YYYY;
		1603	emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
		1604	} else {
		1605	emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
		1606
		1607	/* After the dot-product, the value will be an integer on the
		1608	* range [0,4]. Zero becomes 1.0, and positive values become zero.
		1609	*/
		1610	emit_dp(ir, result_dst, temp, temp, vector_elements);
		1611
		1612	/* Negating the result of the dot-product gives values on the range
		1613	* [-4, 0]. Zero becomes 1.0, and negative values become zero.
		1614	* This is achieved using SGE.
		1615	*/
		1616	st_src_reg sge_src = result_src;
		1617	sge_src.negate = ~sge_src.negate;
		1618	emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
		1619	}
		1620	} else {
		1621	emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
		1622	}
		1623	break;
		1624	case ir_binop_any_nequal:
		1625	/* "!=" operator producing a scalar boolean. */
		1626	if (ir->operands[0]->type->is_vector() \|\|
		1627	ir->operands[1]->type->is_vector()) {
		1628	st_src_reg temp = get_temp(native_integers ?
		1629	glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
		1630	glsl_type::vec4_type);
		1631	emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
		1632
		1633	if (native_integers) {
		1634	st_dst_reg temp_dst = st_dst_reg(temp);
		1635	st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
		1636
		1637	/* Emit 1-3 OR operations to combine the SNE results. */
		1638	switch (ir->operands[0]->type->vector_elements) {
		1639	case 2:
		1640	break;
		1641	case 3:
		1642	temp_dst.writemask = WRITEMASK_Y;
		1643	temp1.swizzle = SWIZZLE_YYYY;
		1644	temp2.swizzle = SWIZZLE_ZZZZ;
		1645	emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
		1646	break;
		1647	case 4:
		1648	temp_dst.writemask = WRITEMASK_X;
		1649	temp1.swizzle = SWIZZLE_XXXX;
		1650	temp2.swizzle = SWIZZLE_YYYY;
		1651	emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
		1652	temp_dst.writemask = WRITEMASK_Y;
		1653	temp1.swizzle = SWIZZLE_ZZZZ;
		1654	temp2.swizzle = SWIZZLE_WWWW;
		1655	emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
		1656	}
		1657
		1658	temp1.swizzle = SWIZZLE_XXXX;
		1659	temp2.swizzle = SWIZZLE_YYYY;
		1660	emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
		1661	} else {
		1662	/* After the dot-product, the value will be an integer on the
		1663	* range [0,4]. Zero stays zero, and positive values become 1.0.
		1664	*/
		1665	glsl_to_tgsi_instruction *const dp =
		1666	emit_dp(ir, result_dst, temp, temp, vector_elements);
		1667	if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
		1668	/* The clamping to [0,1] can be done for free in the fragment
		1669	* shader with a saturate.
		1670	*/
		1671	dp->saturate = true;
		1672	} else {
		1673	/* Negating the result of the dot-product gives values on the range
		1674	* [-4, 0]. Zero stays zero, and negative values become 1.0. This
		1675	* achieved using SLT.
		1676	*/
		1677	st_src_reg slt_src = result_src;
		1678	slt_src.negate = ~slt_src.negate;
		1679	emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
		1680	}
		1681	}
		1682	} else {
		1683	emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
		1684	}
		1685	break;
		1686
		1687	case ir_unop_any: {
		1688	assert(ir->operands[0]->type->is_vector());
		1689
		1690	/* After the dot-product, the value will be an integer on the
		1691	* range [0,4]. Zero stays zero, and positive values become 1.0.
		1692	*/
		1693	glsl_to_tgsi_instruction *const dp =
		1694	emit_dp(ir, result_dst, op[0], op[0],
		1695	ir->operands[0]->type->vector_elements);
		1696	if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
		1697	result_dst.type == GLSL_TYPE_FLOAT) {
		1698	/* The clamping to [0,1] can be done for free in the fragment
		1699	* shader with a saturate.
		1700	*/
		1701	dp->saturate = true;
		1702	} else if (result_dst.type == GLSL_TYPE_FLOAT) {
		1703	/* Negating the result of the dot-product gives values on the range
		1704	* [-4, 0]. Zero stays zero, and negative values become 1.0. This
		1705	* is achieved using SLT.
		1706	*/
		1707	st_src_reg slt_src = result_src;
		1708	slt_src.negate = ~slt_src.negate;
		1709	emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
		1710	}
		1711	else {
		1712	/* Use SNE 0 if integers are being used as boolean values. */
		1713	emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
		1714	}
		1715	break;
		1716	}
		1717
		1718	case ir_binop_logic_xor:
		1719	if (native_integers)
		1720	emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
		1721	else
		1722	emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
		1723	break;
		1724
		1725	case ir_binop_logic_or: {
		1726	if (native_integers) {
		1727	/* If integers are used as booleans, we can use an actual "or"
		1728	* instruction.
		1729	*/
		1730	assert(native_integers);
		1731	emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
		1732	} else {
		1733	/* After the addition, the value will be an integer on the
		1734	* range [0,2]. Zero stays zero, and positive values become 1.0.
		1735	*/
		1736	glsl_to_tgsi_instruction *add =
		1737	emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
		1738	if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
		1739	/* The clamping to [0,1] can be done for free in the fragment
		1740	* shader with a saturate if floats are being used as boolean values.
		1741	*/
		1742	add->saturate = true;
		1743	} else {
		1744	/* Negating the result of the addition gives values on the range
		1745	* [-2, 0]. Zero stays zero, and negative values become 1.0. This
		1746	* is achieved using SLT.
		1747	*/
		1748	st_src_reg slt_src = result_src;
		1749	slt_src.negate = ~slt_src.negate;
		1750	emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
		1751	}
		1752	}
		1753	break;
		1754	}
		1755
		1756	case ir_binop_logic_and:
		1757	/* If native integers are disabled, the bool args are stored as float 0.0
		1758	* or 1.0, so "mul" gives us "and". If they're enabled, just use the
		1759	* actual AND opcode.
		1760	*/
		1761	if (native_integers)
		1762	emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
		1763	else
		1764	emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
		1765	break;
		1766
		1767	case ir_binop_dot:
		1768	assert(ir->operands[0]->type->is_vector());
		1769	assert(ir->operands[0]->type == ir->operands[1]->type);
		1770	emit_dp(ir, result_dst, op[0], op[1],
		1771	ir->operands[0]->type->vector_elements);
		1772	break;
		1773
		1774	case ir_unop_sqrt:
		1775	if (have_sqrt) {
		1776	emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
		1777	}
		1778	else {
		1779	/* sqrt(x) = x * rsq(x). */
		1780	emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
		1781	emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
		1782	/* For incoming channels <= 0, set the result to 0. */
		1783	op[0].negate = ~op[0].negate;
		1784	emit(ir, TGSI_OPCODE_CMP, result_dst,
		1785	op[0], result_src, st_src_reg_for_float(0.0));
		1786	}
		1787	break;
		1788	case ir_unop_rsq:
		1789	emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
		1790	break;
		1791	case ir_unop_i2f:
		1792	if (native_integers) {
		1793	emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
		1794	break;
		1795	}
		1796	/* fallthrough to next case otherwise */
		1797	case ir_unop_b2f:
		1798	if (native_integers) {
		1799	emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
		1800	break;
		1801	}
		1802	/* fallthrough to next case otherwise */
		1803	case ir_unop_i2u:
		1804	case ir_unop_u2i:
		1805	/* Converting between signed and unsigned integers is a no-op. */
		1806	result_src = op[0];
		1807	break;
		1808	case ir_unop_b2i:
		1809	if (native_integers) {
		1810	/* Booleans are stored as integers using ~0 for true and 0 for false.
		1811	* GLSL requires that int(bool) return 1 for true and 0 for false.
		1812	* This conversion is done with AND, but it could be done with NEG.
		1813	*/
		1814	emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
		1815	} else {
		1816	/* Booleans and integers are both stored as floats when native
		1817	* integers are disabled.
		1818	*/
		1819	result_src = op[0];
		1820	}
		1821	break;
		1822	case ir_unop_f2i:
		1823	if (native_integers)
		1824	emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
		1825	else
		1826	emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
		1827	break;
		1828	case ir_unop_f2u:
		1829	if (native_integers)
		1830	emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
		1831	else
		1832	emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
		1833	break;
		1834	case ir_unop_bitcast_f2i:
		1835	result_src = op[0];
		1836	result_src.type = GLSL_TYPE_INT;
		1837	break;
		1838	case ir_unop_bitcast_f2u:
		1839	result_src = op[0];
		1840	result_src.type = GLSL_TYPE_UINT;
		1841	break;
		1842	case ir_unop_bitcast_i2f:
		1843	case ir_unop_bitcast_u2f:
		1844	result_src = op[0];
		1845	result_src.type = GLSL_TYPE_FLOAT;
		1846	break;
		1847	case ir_unop_f2b:
		1848	emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
		1849	break;
		1850	case ir_unop_i2b:
		1851	if (native_integers)
		1852	emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
		1853	else
		1854	emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
		1855	break;
		1856	case ir_unop_trunc:
		1857	emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
		1858	break;
		1859	case ir_unop_ceil:
		1860	emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
		1861	break;
		1862	case ir_unop_floor:
		1863	emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
		1864	break;
		1865	case ir_unop_round_even:
		1866	emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
		1867	break;
		1868	case ir_unop_fract:
		1869	emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
		1870	break;
		1871
		1872	case ir_binop_min:
		1873	emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
		1874	break;
		1875	case ir_binop_max:
		1876	emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
		1877	break;
		1878	case ir_binop_pow:
		1879	emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
		1880	break;
		1881
		1882	case ir_unop_bit_not:
		1883	if (native_integers) {
		1884	emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
		1885	break;
		1886	}
		1887	case ir_unop_u2f:
		1888	if (native_integers) {
		1889	emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
		1890	break;
		1891	}
		1892	case ir_binop_lshift:
		1893	if (native_integers) {
		1894	emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
		1895	break;
		1896	}
		1897	case ir_binop_rshift:
		1898	if (native_integers) {
		1899	emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
		1900	break;
		1901	}
		1902	case ir_binop_bit_and:
		1903	if (native_integers) {
		1904	emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
		1905	break;
		1906	}
		1907	case ir_binop_bit_xor:
		1908	if (native_integers) {
		1909	emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
		1910	break;
		1911	}
		1912	case ir_binop_bit_or:
		1913	if (native_integers) {
		1914	emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
		1915	break;
		1916	}
		1917
		1918	assert(!"GLSL 1.30 features unsupported");
		1919	break;
		1920
		1921	case ir_binop_ubo_load: {
		1922	ir_constant *uniform_block = ir->operands[0]->as_constant();
		1923	ir_constant *const_offset_ir = ir->operands[1]->as_constant();
		1924	unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
		1925	st_src_reg index_reg = get_temp(glsl_type::uint_type);
		1926	st_src_reg cbuf;
		1927
		1928	cbuf.type = glsl_type::vec4_type->base_type;
		1929	cbuf.file = PROGRAM_CONSTANT;
		1930	cbuf.index = 0;
		1931	cbuf.index2D = uniform_block->value.u[0] + 1;
		1932	cbuf.reladdr = NULL;
		1933	cbuf.negate = 0;
		1934
		1935	assert(ir->type->is_vector() \|\| ir->type->is_scalar());
		1936
		1937	if (const_offset_ir) {
		1938	index_reg = st_src_reg_for_int(const_offset / 16);
		1939	} else {
		1940	emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4));
		1941	}
		1942
		1943	cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
		1944	cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
		1945	const_offset % 16 / 4,
		1946	const_offset % 16 / 4,
		1947	const_offset % 16 / 4);
		1948
		1949	cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
		1950	memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
		1951
		1952	if (ir->type->base_type == GLSL_TYPE_BOOL) {
		1953	emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
		1954	} else {
		1955	emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
		1956	}
		1957	break;
		1958	}
		1959	case ir_triop_lrp:
		1960	/* note: we have to reorder the three args here */
		1961	emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
		1962	break;
		1963	case ir_unop_pack_snorm_2x16:
		1964	case ir_unop_pack_unorm_2x16:
		1965	case ir_unop_pack_half_2x16:
		1966	case ir_unop_pack_snorm_4x8:
		1967	case ir_unop_pack_unorm_4x8:
		1968	case ir_unop_unpack_snorm_2x16:
		1969	case ir_unop_unpack_unorm_2x16:
		1970	case ir_unop_unpack_half_2x16:
		1971	case ir_unop_unpack_half_2x16_split_x:
		1972	case ir_unop_unpack_half_2x16_split_y:
		1973	case ir_unop_unpack_snorm_4x8:
		1974	case ir_unop_unpack_unorm_4x8:
		1975	case ir_binop_pack_half_2x16_split:
		1976	case ir_unop_bitfield_reverse:
		1977	case ir_unop_bit_count:
		1978	case ir_unop_find_msb:
		1979	case ir_unop_find_lsb:
		1980	case ir_binop_bfm:
		1981	case ir_triop_bfi:
		1982	case ir_triop_bitfield_extract:
		1983	case ir_quadop_bitfield_insert:
		1984	case ir_quadop_vector:
		1985	case ir_binop_vector_extract:
		1986	case ir_triop_vector_insert:
		1987	/* This operation is not supported, or should have already been handled.
		1988	*/
		1989	assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
		1990	break;
		1991	}
		1992
		1993	this->result = result_src;
		1994	}
		1995
		1996
		1997	void
		1998	glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
		1999	{
		2000	st_src_reg src;
		2001	int i;
		2002	int swizzle[4];
		2003
		2004	/* Note that this is only swizzles in expressions, not those on the left
		2005	* hand side of an assignment, which do write masking. See ir_assignment
		2006	* for that.
		2007	*/
		2008
		2009	ir->val->accept(this);
		2010	src = this->result;
		2011	assert(src.file != PROGRAM_UNDEFINED);
		2012
		2013	for (i = 0; i < 4; i++) {
		2014	if (i < ir->type->vector_elements) {
		2015	switch (i) {
		2016	case 0:
		2017	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
		2018	break;
		2019	case 1:
		2020	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
		2021	break;
		2022	case 2:
		2023	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
		2024	break;
		2025	case 3:
		2026	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
		2027	break;
		2028	}
		2029	} else {
		2030	/* If the type is smaller than a vec4, replicate the last
		2031	* channel out.
		2032	*/
		2033	swizzle[i] = swizzle[ir->type->vector_elements - 1];
		2034	}
		2035	}
		2036
		2037	src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
		2038
		2039	this->result = src;
		2040	}
		2041
		2042	void
		2043	glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
		2044	{
		2045	variable_storage *entry = find_variable_storage(ir->var);
		2046	ir_variable *var = ir->var;
		2047
		2048	if (!entry) {
		2049	switch (var->mode) {
		2050	case ir_var_uniform:
		2051	entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
		2052	var->location);
		2053	this->variables.push_tail(entry);
		2054	break;
		2055	case ir_var_shader_in:
		2056	/* The linker assigns locations for varyings and attributes,
		2057	* including deprecated builtins (like gl_Color), user-assign
		2058	* generic attributes (glBindVertexLocation), and
		2059	* user-defined varyings.
		2060	*/
		2061	assert(var->location != -1);
		2062	entry = new(mem_ctx) variable_storage(var,
		2063	PROGRAM_INPUT,
		2064	var->location);
		2065	break;
		2066	case ir_var_shader_out:
		2067	assert(var->location != -1);
		2068	entry = new(mem_ctx) variable_storage(var,
		2069	PROGRAM_OUTPUT,
		2070	var->location + var->index);
		2071	break;
		2072	case ir_var_system_value:
		2073	entry = new(mem_ctx) variable_storage(var,
		2074	PROGRAM_SYSTEM_VALUE,
		2075	var->location);
		2076	break;
		2077	case ir_var_auto:
		2078	case ir_var_temporary:
		2079	st_src_reg src = get_temp(var->type);
		2080
		2081	entry = new(mem_ctx) variable_storage(var, src.file, src.index);
		2082	this->variables.push_tail(entry);
		2083
		2084	break;
		2085	}
		2086
		2087	if (!entry) {
		2088	printf("Failed to make storage for %s\n", var->name);
		2089	exit(1);
		2090	}
		2091	}
		2092
		2093	this->result = st_src_reg(entry->file, entry->index, var->type);
		2094	if (!native_integers)
		2095	this->result.type = GLSL_TYPE_FLOAT;
		2096	}
		2097
		2098	void
		2099	glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
		2100	{
		2101	ir_constant *index;
		2102	st_src_reg src;
		2103	int element_size = type_size(ir->type);
		2104
		2105	index = ir->array_index->constant_expression_value();
		2106
		2107	ir->array->accept(this);
		2108	src = this->result;
		2109
		2110	if (index) {
		2111	src.index += index->value.i[0] * element_size;
		2112	} else {
		2113	/* Variable index array dereference. It eats the "vec4" of the
		2114	* base of the array and an index that offsets the TGSI register
		2115	* index.
		2116	*/
		2117	ir->array_index->accept(this);
		2118
		2119	st_src_reg index_reg;
		2120
		2121	if (element_size == 1) {
		2122	index_reg = this->result;
		2123	} else {
		2124	index_reg = get_temp(native_integers ?
		2125	glsl_type::int_type : glsl_type::float_type);
		2126
		2127	emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
		2128	this->result, st_src_reg_for_type(index_reg.type, element_size));
		2129	}
		2130
		2131	/* If there was already a relative address register involved, add the
		2132	* new and the old together to get the new offset.
		2133	*/
		2134	if (src.reladdr != NULL) {
		2135	st_src_reg accum_reg = get_temp(native_integers ?
		2136	glsl_type::int_type : glsl_type::float_type);
		2137
		2138	emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
		2139	index_reg, *src.reladdr);
		2140
		2141	index_reg = accum_reg;
		2142	}
		2143
		2144	src.reladdr = ralloc(mem_ctx, st_src_reg);
		2145	memcpy(src.reladdr, &index_reg, sizeof(index_reg));
		2146	}
		2147
		2148	/* If the type is smaller than a vec4, replicate the last channel out. */
		2149	if (ir->type->is_scalar() \|\| ir->type->is_vector())
		2150	src.swizzle = swizzle_for_size(ir->type->vector_elements);
		2151	else
		2152	src.swizzle = SWIZZLE_NOOP;
		2153
		2154	/* Change the register type to the element type of the array. */
		2155	src.type = ir->type->base_type;
		2156
		2157	this->result = src;
		2158	}
		2159
		2160	void
		2161	glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
		2162	{
		2163	unsigned int i;
		2164	const glsl_type *struct_type = ir->record->type;
		2165	int offset = 0;
		2166
		2167	ir->record->accept(this);
		2168
		2169	for (i = 0; i < struct_type->length; i++) {
		2170	if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
		2171	break;
		2172	offset += type_size(struct_type->fields.structure[i].type);
		2173	}
		2174
		2175	/* If the type is smaller than a vec4, replicate the last channel out. */
		2176	if (ir->type->is_scalar() \|\| ir->type->is_vector())
		2177	this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
		2178	else
		2179	this->result.swizzle = SWIZZLE_NOOP;
		2180
		2181	this->result.index += offset;
		2182	this->result.type = ir->type->base_type;
		2183	}
		2184
		2185	/**
		2186	* We want to be careful in assignment setup to hit the actual storage
		2187	* instead of potentially using a temporary like we might with the
		2188	* ir_dereference handler.
		2189	*/
		2190	static st_dst_reg
		2191	get_assignment_lhs(ir_dereference ir, glsl_to_tgsi_visitor v)
		2192	{
		2193	/* The LHS must be a dereference. If the LHS is a variable indexed array
		2194	* access of a vector, it must be separated into a series conditional moves
		2195	* before reaching this point (see ir_vec_index_to_cond_assign).
		2196	*/
		2197	assert(ir->as_dereference());
		2198	ir_dereference_array *deref_array = ir->as_dereference_array();
		2199	if (deref_array) {
		2200	assert(!deref_array->array->type->is_vector());
		2201	}
		2202
		2203	/* Use the rvalue deref handler for the most part. We'll ignore
		2204	* swizzles in it and write swizzles using writemask, though.
		2205	*/
		2206	ir->accept(v);
		2207	return st_dst_reg(v->result);
		2208	}
		2209
		2210	/**
		2211	* Process the condition of a conditional assignment
		2212	*
		2213	* Examines the condition of a conditional assignment to generate the optimal
		2214	* first operand of a \c CMP instruction. If the condition is a relational
		2215	* operator with 0 (e.g., \c ir_binop_less), the value being compared will be
		2216	* used as the source for the \c CMP instruction. Otherwise the comparison
		2217	* is processed to a boolean result, and the boolean result is used as the
		2218	* operand to the CMP instruction.
		2219	*/
		2220	bool
		2221	glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
		2222	{
		2223	ir_rvalue *src_ir = ir;
		2224	bool negate = true;
		2225	bool switch_order = false;
		2226
		2227	ir_expression *const expr = ir->as_expression();
		2228	if ((expr != NULL) && (expr->get_num_operands() == 2)) {
		2229	bool zero_on_left = false;
		2230
		2231	if (expr->operands[0]->is_zero()) {
		2232	src_ir = expr->operands[1];
		2233	zero_on_left = true;
		2234	} else if (expr->operands[1]->is_zero()) {
		2235	src_ir = expr->operands[0];
		2236	zero_on_left = false;
		2237	}
		2238
		2239	/* a is - 0 + - 0 +
		2240	* (a < 0) T F F ( a < 0) T F F
		2241	* (0 < a) F F T (-a < 0) F F T
		2242	* (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
		2243	* (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
		2244	* (a > 0) F F T (-a < 0) F F T
		2245	* (0 > a) T F F ( a < 0) T F F
		2246	* (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
		2247	* (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
		2248	*
		2249	* Note that exchanging the order of 0 and 'a' in the comparison simply
		2250	* means that the value of 'a' should be negated.
		2251	*/
		2252	if (src_ir != ir) {
		2253	switch (expr->operation) {
		2254	case ir_binop_less:
		2255	switch_order = false;
		2256	negate = zero_on_left;
		2257	break;
		2258
		2259	case ir_binop_greater:
		2260	switch_order = false;
		2261	negate = !zero_on_left;
		2262	break;
		2263
		2264	case ir_binop_lequal:
		2265	switch_order = true;
		2266	negate = !zero_on_left;
		2267	break;
		2268
		2269	case ir_binop_gequal:
		2270	switch_order = true;
		2271	negate = zero_on_left;
		2272	break;
		2273
		2274	default:
		2275	/* This isn't the right kind of comparison afterall, so make sure
		2276	* the whole condition is visited.
		2277	*/
		2278	src_ir = ir;
		2279	break;
		2280	}
		2281	}
		2282	}
		2283
		2284	src_ir->accept(this);
		2285
		2286	/* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
		2287	* condition we produced is 0.0 or 1.0. By flipping the sign, we can
		2288	* choose which value TGSI_OPCODE_CMP produces without an extra instruction
		2289	* computing the condition.
		2290	*/
		2291	if (negate)
		2292	this->result.negate = ~this->result.negate;
		2293
		2294	return switch_order;
		2295	}
		2296
		2297	void
		2298	glsl_to_tgsi_visitor::emit_block_mov(ir_assignment ir, const struct glsl_type type,
		2299	st_dst_reg l, st_src_reg r)
		2300	{
		2301	if (type->base_type == GLSL_TYPE_STRUCT) {
		2302	for (unsigned int i = 0; i < type->length; i++) {
		2303	emit_block_mov(ir, type->fields.structure[i].type, l, r);
		2304	}
		2305	return;
		2306	}
		2307
		2308	if (type->is_array()) {
		2309	for (unsigned int i = 0; i < type->length; i++) {
		2310	emit_block_mov(ir, type->fields.array, l, r);
		2311	}
		2312	return;
		2313	}
		2314
		2315	if (type->is_matrix()) {
		2316	const struct glsl_type *vec_type;
		2317
		2318	vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
		2319	type->vector_elements, 1);
		2320
		2321	for (int i = 0; i < type->matrix_columns; i++) {
		2322	emit_block_mov(ir, vec_type, l, r);
		2323	}
		2324	return;
		2325	}
		2326
		2327	assert(type->is_scalar() \|\| type->is_vector());
		2328
		2329	r->type = type->base_type;
		2330	emit(ir, TGSI_OPCODE_MOV, l, r);
		2331	l->index++;
		2332	r->index++;
		2333	}
		2334
		2335	void
		2336	glsl_to_tgsi_visitor::visit(ir_assignment *ir)
		2337	{
		2338	st_dst_reg l;
		2339	st_src_reg r;
		2340	int i;
		2341
		2342	ir->rhs->accept(this);
		2343	r = this->result;
		2344
		2345	l = get_assignment_lhs(ir->lhs, this);
		2346
		2347	/* FINISHME: This should really set to the correct maximal writemask for each
		2348	* FINISHME: component written (in the loops below). This case can only
		2349	* FINISHME: occur for matrices, arrays, and structures.
		2350	*/
		2351	if (ir->write_mask == 0) {
		2352	assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
		2353	l.writemask = WRITEMASK_XYZW;
		2354	} else if (ir->lhs->type->is_scalar() &&
		2355	ir->lhs->variable_referenced()->mode == ir_var_shader_out) {
		2356	/* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
		2357	* FINISHME: W component of fragment shader output zero, work correctly.
		2358	*/
		2359	l.writemask = WRITEMASK_XYZW;
		2360	} else {
		2361	int swizzles[4];
		2362	int first_enabled_chan = 0;
		2363	int rhs_chan = 0;
		2364
		2365	l.writemask = ir->write_mask;
		2366
		2367	for (int i = 0; i < 4; i++) {
		2368	if (l.writemask & (1 << i)) {
		2369	first_enabled_chan = GET_SWZ(r.swizzle, i);
		2370	break;
		2371	}
		2372	}
		2373
		2374	/* Swizzle a small RHS vector into the channels being written.
		2375	*
		2376	* glsl ir treats write_mask as dictating how many channels are
		2377	* present on the RHS while TGSI treats write_mask as just
		2378	* showing which channels of the vec4 RHS get written.
		2379	*/
		2380	for (int i = 0; i < 4; i++) {
		2381	if (l.writemask & (1 << i))
		2382	swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
		2383	else
		2384	swizzles[i] = first_enabled_chan;
		2385	}
		2386	r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
		2387	swizzles[2], swizzles[3]);
		2388	}
		2389
		2390	assert(l.file != PROGRAM_UNDEFINED);
		2391	assert(r.file != PROGRAM_UNDEFINED);
		2392
		2393	if (ir->condition) {
		2394	const bool switch_order = this->process_move_condition(ir->condition);
		2395	st_src_reg condition = this->result;
		2396
		2397	for (i = 0; i < type_size(ir->lhs->type); i++) {
		2398	st_src_reg l_src = st_src_reg(l);
		2399	st_src_reg condition_temp = condition;
		2400	l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
		2401
		2402	if (native_integers) {
		2403	/* This is necessary because TGSI's CMP instruction expects the
		2404	* condition to be a float, and we store booleans as integers.
		2405	* TODO: really want to avoid i2f path and use UCMP. Requires
		2406	* changes to process_move_condition though too.
		2407	*/
		2408	condition_temp = get_temp(glsl_type::vec4_type);
		2409	condition.negate = 0;
		2410	emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
		2411	condition_temp.swizzle = condition.swizzle;
		2412	}
		2413
		2414	if (switch_order) {
		2415	emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
		2416	} else {
		2417	emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
		2418	}
		2419
		2420	l.index++;
		2421	r.index++;
		2422	}
		2423	} else if (ir->rhs->as_expression() &&
		2424	this->instructions.get_tail() &&
		2425	ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
		2426	type_size(ir->lhs->type) == 1 &&
		2427	l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
		2428	/* To avoid emitting an extra MOV when assigning an expression to a
		2429	* variable, emit the last instruction of the expression again, but
		2430	* replace the destination register with the target of the assignment.
		2431	* Dead code elimination will remove the original instruction.
		2432	*/
		2433	glsl_to_tgsi_instruction inst, new_inst;
		2434	inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
		2435	new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
		2436	new_inst->saturate = inst->saturate;
		2437	inst->dead_mask = inst->dst.writemask;
		2438	} else {
		2439	emit_block_mov(ir, ir->rhs->type, &l, &r);
		2440	}
		2441	}
		2442
		2443
		2444	void
		2445	glsl_to_tgsi_visitor::visit(ir_constant *ir)
		2446	{
		2447	st_src_reg src;
		2448	GLfloat stack_vals[4] = { 0 };
		2449	gl_constant_value values = (gl_constant_value ) stack_vals;
		2450	GLenum gl_type = GL_NONE;
		2451	unsigned int i;
		2452	static int in_array = 0;
		2453	gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
		2454
		2455	/* Unfortunately, 4 floats is all we can get into
		2456	* _mesa_add_typed_unnamed_constant. So, make a temp to store an
		2457	* aggregate constant and move each constant value into it. If we
		2458	* get lucky, copy propagation will eliminate the extra moves.
		2459	*/
		2460	if (ir->type->base_type == GLSL_TYPE_STRUCT) {
		2461	st_src_reg temp_base = get_temp(ir->type);
		2462	st_dst_reg temp = st_dst_reg(temp_base);
		2463
		2464	foreach_iter(exec_list_iterator, iter, ir->components) {
		2465	ir_constant field_value = (ir_constant )iter.get();
		2466	int size = type_size(field_value->type);
		2467
		2468	assert(size > 0);
		2469
		2470	field_value->accept(this);
		2471	src = this->result;
		2472
		2473	for (i = 0; i < (unsigned int)size; i++) {
		2474	emit(ir, TGSI_OPCODE_MOV, temp, src);
		2475
		2476	src.index++;
		2477	temp.index++;
		2478	}
		2479	}
		2480	this->result = temp_base;
		2481	return;
		2482	}
		2483
		2484	if (ir->type->is_array()) {
		2485	st_src_reg temp_base = get_temp(ir->type);
		2486	st_dst_reg temp = st_dst_reg(temp_base);
		2487	int size = type_size(ir->type->fields.array);
		2488
		2489	assert(size > 0);
		2490	in_array++;
		2491
		2492	for (i = 0; i < ir->type->length; i++) {
		2493	ir->array_elements[i]->accept(this);
		2494	src = this->result;
		2495	for (int j = 0; j < size; j++) {
		2496	emit(ir, TGSI_OPCODE_MOV, temp, src);
		2497
		2498	src.index++;
		2499	temp.index++;
		2500	}
		2501	}
		2502	this->result = temp_base;
		2503	in_array--;
		2504	return;
		2505	}
		2506
		2507	if (ir->type->is_matrix()) {
		2508	st_src_reg mat = get_temp(ir->type);
		2509	st_dst_reg mat_column = st_dst_reg(mat);
		2510
		2511	for (i = 0; i < ir->type->matrix_columns; i++) {
		2512	assert(ir->type->base_type == GLSL_TYPE_FLOAT);
		2513	values = (gl_constant_value ) &ir->value.f[i ir->type->vector_elements];
		2514
		2515	src = st_src_reg(file, -1, ir->type->base_type);
		2516	src.index = add_constant(file,
		2517	values,
		2518	ir->type->vector_elements,
		2519	GL_FLOAT,
		2520	&src.swizzle);
		2521	emit(ir, TGSI_OPCODE_MOV, mat_column, src);
		2522
		2523	mat_column.index++;
		2524	}
		2525
		2526	this->result = mat;
		2527	return;
		2528	}
		2529
		2530	switch (ir->type->base_type) {
		2531	case GLSL_TYPE_FLOAT:
		2532	gl_type = GL_FLOAT;
		2533	for (i = 0; i < ir->type->vector_elements; i++) {
		2534	values[i].f = ir->value.f[i];
		2535	}
		2536	break;
		2537	case GLSL_TYPE_UINT:
		2538	gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
		2539	for (i = 0; i < ir->type->vector_elements; i++) {
		2540	if (native_integers)
		2541	values[i].u = ir->value.u[i];
		2542	else
		2543	values[i].f = ir->value.u[i];
		2544	}
		2545	break;
		2546	case GLSL_TYPE_INT:
		2547	gl_type = native_integers ? GL_INT : GL_FLOAT;
		2548	for (i = 0; i < ir->type->vector_elements; i++) {
		2549	if (native_integers)
		2550	values[i].i = ir->value.i[i];
		2551	else
		2552	values[i].f = ir->value.i[i];
		2553	}
		2554	break;
		2555	case GLSL_TYPE_BOOL:
		2556	gl_type = native_integers ? GL_BOOL : GL_FLOAT;
		2557	for (i = 0; i < ir->type->vector_elements; i++) {
		2558	if (native_integers)
		2559	values[i].u = ir->value.b[i] ? ~0 : 0;
		2560	else
		2561	values[i].f = ir->value.b[i];
		2562	}
		2563	break;
		2564	default:
		2565	assert(!"Non-float/uint/int/bool constant");
		2566	}
		2567
		2568	this->result = st_src_reg(file, -1, ir->type);
		2569	this->result.index = add_constant(file,
		2570	values,
		2571	ir->type->vector_elements,
		2572	gl_type,
		2573	&this->result.swizzle);
		2574	}
		2575
		2576	function_entry *
		2577	glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
		2578	{
		2579	function_entry *entry;
		2580
		2581	foreach_iter(exec_list_iterator, iter, this->function_signatures) {
		2582	entry = (function_entry *)iter.get();
		2583
		2584	if (entry->sig == sig)
		2585	return entry;
		2586	}
		2587
		2588	entry = ralloc(mem_ctx, function_entry);
		2589	entry->sig = sig;
		2590	entry->sig_id = this->next_signature_id++;
		2591	entry->bgn_inst = NULL;
		2592
		2593	/* Allocate storage for all the parameters. */
		2594	foreach_iter(exec_list_iterator, iter, sig->parameters) {
		2595	ir_variable param = (ir_variable )iter.get();
		2596	variable_storage *storage;
		2597
		2598	storage = find_variable_storage(param);
		2599	assert(!storage);
		2600
		2601	st_src_reg src = get_temp(param->type);
		2602
		2603	storage = new(mem_ctx) variable_storage(param, src.file, src.index);
		2604	this->variables.push_tail(storage);
		2605	}
		2606
		2607	if (!sig->return_type->is_void()) {
		2608	entry->return_reg = get_temp(sig->return_type);
		2609	} else {
		2610	entry->return_reg = undef_src;
		2611	}
		2612
		2613	this->function_signatures.push_tail(entry);
		2614	return entry;
		2615	}
		2616
		2617	void
		2618	glsl_to_tgsi_visitor::visit(ir_call *ir)
		2619	{
		2620	glsl_to_tgsi_instruction *call_inst;
		2621	ir_function_signature *sig = ir->callee;
		2622	function_entry *entry = get_function_signature(sig);
		2623	int i;
		2624
		2625	/* Process in parameters. */
		2626	exec_list_iterator sig_iter = sig->parameters.iterator();
		2627	foreach_iter(exec_list_iterator, iter, *ir) {
		2628	ir_rvalue param_rval = (ir_rvalue )iter.get();
		2629	ir_variable param = (ir_variable )sig_iter.get();
		2630
		2631	if (param->mode == ir_var_function_in \|\|
		2632	param->mode == ir_var_function_inout) {
		2633	variable_storage *storage = find_variable_storage(param);
		2634	assert(storage);
		2635
		2636	param_rval->accept(this);
		2637	st_src_reg r = this->result;
		2638
		2639	st_dst_reg l;
		2640	l.file = storage->file;
		2641	l.index = storage->index;
		2642	l.reladdr = NULL;
		2643	l.writemask = WRITEMASK_XYZW;
		2644	l.cond_mask = COND_TR;
		2645
		2646	for (i = 0; i < type_size(param->type); i++) {
		2647	emit(ir, TGSI_OPCODE_MOV, l, r);
		2648	l.index++;
		2649	r.index++;
		2650	}
		2651	}
		2652
		2653	sig_iter.next();
		2654	}
		2655	assert(!sig_iter.has_next());
		2656
		2657	/* Emit call instruction */
		2658	call_inst = emit(ir, TGSI_OPCODE_CAL);
		2659	call_inst->function = entry;
		2660
		2661	/* Process out parameters. */
		2662	sig_iter = sig->parameters.iterator();
		2663	foreach_iter(exec_list_iterator, iter, *ir) {
		2664	ir_rvalue param_rval = (ir_rvalue )iter.get();
		2665	ir_variable param = (ir_variable )sig_iter.get();
		2666
		2667	if (param->mode == ir_var_function_out \|\|
		2668	param->mode == ir_var_function_inout) {
		2669	variable_storage *storage = find_variable_storage(param);
		2670	assert(storage);
		2671
		2672	st_src_reg r;
		2673	r.file = storage->file;
		2674	r.index = storage->index;
		2675	r.reladdr = NULL;
		2676	r.swizzle = SWIZZLE_NOOP;
		2677	r.negate = 0;
		2678
		2679	param_rval->accept(this);
		2680	st_dst_reg l = st_dst_reg(this->result);
		2681
		2682	for (i = 0; i < type_size(param->type); i++) {
		2683	emit(ir, TGSI_OPCODE_MOV, l, r);
		2684	l.index++;
		2685	r.index++;
		2686	}
		2687	}
		2688
		2689	sig_iter.next();
		2690	}
		2691	assert(!sig_iter.has_next());
		2692
		2693	/* Process return value. */
		2694	this->result = entry->return_reg;
		2695	}
		2696
		2697	void
		2698	glsl_to_tgsi_visitor::visit(ir_texture *ir)
		2699	{
		2700	st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset, sample_index;
		2701	st_dst_reg result_dst, coord_dst, cube_sc_dst;
		2702	glsl_to_tgsi_instruction *inst = NULL;
		2703	unsigned opcode = TGSI_OPCODE_NOP;
		2704	const glsl_type *sampler_type = ir->sampler->type;
		2705	bool is_cube_array = false;
		2706
		2707	/* if we are a cube array sampler */
		2708	if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
		2709	sampler_type->sampler_array)) {
		2710	is_cube_array = true;
		2711	}
		2712
		2713	if (ir->coordinate) {
		2714	ir->coordinate->accept(this);
		2715
		2716	/* Put our coords in a temp. We'll need to modify them for shadow,
		2717	* projection, or LOD, so the only case we'd use it as is is if
		2718	* we're doing plain old texturing. The optimization passes on
		2719	* glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
		2720	*/
		2721	coord = get_temp(glsl_type::vec4_type);
		2722	coord_dst = st_dst_reg(coord);
		2723	coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
		2724	emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
		2725	}
		2726
		2727	if (ir->projector) {
		2728	ir->projector->accept(this);
		2729	projector = this->result;
		2730	}
		2731
		2732	/* Storage for our result. Ideally for an assignment we'd be using
		2733	* the actual storage for the result here, instead.
		2734	*/
		2735	result_src = get_temp(ir->type);
		2736	result_dst = st_dst_reg(result_src);
		2737
		2738	switch (ir->op) {
		2739	case ir_tex:
		2740	opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
		2741	if (ir->offset) {
		2742	ir->offset->accept(this);
		2743	offset = this->result;
		2744	}
		2745	break;
		2746	case ir_txb:
		2747	opcode = is_cube_array ? TGSI_OPCODE_TXB2 : TGSI_OPCODE_TXB;
		2748	ir->lod_info.bias->accept(this);
		2749	lod_info = this->result;
		2750	if (ir->offset) {
		2751	ir->offset->accept(this);
		2752	offset = this->result;
		2753	}
		2754	break;
		2755	case ir_txl:
		2756	opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
		2757	ir->lod_info.lod->accept(this);
		2758	lod_info = this->result;
		2759	if (ir->offset) {
		2760	ir->offset->accept(this);
		2761	offset = this->result;
		2762	}
		2763	break;
		2764	case ir_txd:
		2765	opcode = TGSI_OPCODE_TXD;
		2766	ir->lod_info.grad.dPdx->accept(this);
		2767	dx = this->result;
		2768	ir->lod_info.grad.dPdy->accept(this);
		2769	dy = this->result;
		2770	if (ir->offset) {
		2771	ir->offset->accept(this);
		2772	offset = this->result;
		2773	}
		2774	break;
		2775	case ir_txs:
		2776	opcode = TGSI_OPCODE_TXQ;
		2777	ir->lod_info.lod->accept(this);
		2778	lod_info = this->result;
		2779	break;
		2780	case ir_txf:
		2781	opcode = TGSI_OPCODE_TXF;
		2782	ir->lod_info.lod->accept(this);
		2783	lod_info = this->result;
		2784	if (ir->offset) {
		2785	ir->offset->accept(this);
		2786	offset = this->result;
		2787	}
		2788	break;
		2789	case ir_txf_ms:
		2790	opcode = TGSI_OPCODE_TXF;
		2791	ir->lod_info.sample_index->accept(this);
		2792	sample_index = this->result;
		2793	break;
		2794	case ir_lod:
		2795	assert(!"Unexpected ir_lod opcode");
		2796	break;
		2797	}
		2798
		2799	if (ir->projector) {
		2800	if (opcode == TGSI_OPCODE_TEX) {
		2801	/* Slot the projector in as the last component of the coord. */
		2802	coord_dst.writemask = WRITEMASK_W;
		2803	emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
		2804	coord_dst.writemask = WRITEMASK_XYZW;
		2805	opcode = TGSI_OPCODE_TXP;
		2806	} else {
		2807	st_src_reg coord_w = coord;
		2808	coord_w.swizzle = SWIZZLE_WWWW;
		2809
		2810	/* For the other TEX opcodes there's no projective version
		2811	* since the last slot is taken up by LOD info. Do the
		2812	* projective divide now.
		2813	*/
		2814	coord_dst.writemask = WRITEMASK_W;
		2815	emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
		2816
		2817	/* In the case where we have to project the coordinates "by hand,"
		2818	* the shadow comparator value must also be projected.
		2819	*/
		2820	st_src_reg tmp_src = coord;
		2821	if (ir->shadow_comparitor) {
		2822	/* Slot the shadow value in as the second to last component of the
		2823	* coord.
		2824	*/
		2825	ir->shadow_comparitor->accept(this);
		2826
		2827	tmp_src = get_temp(glsl_type::vec4_type);
		2828	st_dst_reg tmp_dst = st_dst_reg(tmp_src);
		2829
		2830	/* Projective division not allowed for array samplers. */
		2831	assert(!sampler_type->sampler_array);
		2832
		2833	tmp_dst.writemask = WRITEMASK_Z;
		2834	emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
		2835
		2836	tmp_dst.writemask = WRITEMASK_XY;
		2837	emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
		2838	}
		2839
		2840	coord_dst.writemask = WRITEMASK_XYZ;
		2841	emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
		2842
		2843	coord_dst.writemask = WRITEMASK_XYZW;
		2844	coord.swizzle = SWIZZLE_XYZW;
		2845	}
		2846	}
		2847
		2848	/* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
		2849	* comparator was put in the correct place (and projected) by the code,
		2850	* above, that handles by-hand projection.
		2851	*/
		2852	if (ir->shadow_comparitor && (!ir->projector \|\| opcode == TGSI_OPCODE_TXP)) {
		2853	/* Slot the shadow value in as the second to last component of the
		2854	* coord.
		2855	*/
		2856	ir->shadow_comparitor->accept(this);
		2857
		2858	if (is_cube_array) {
		2859	cube_sc = get_temp(glsl_type::float_type);
		2860	cube_sc_dst = st_dst_reg(cube_sc);
		2861	cube_sc_dst.writemask = WRITEMASK_X;
		2862	emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
		2863	cube_sc_dst.writemask = WRITEMASK_X;
		2864	}
		2865	else {
		2866	if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
		2867	sampler_type->sampler_array) \|\|
		2868	sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
		2869	coord_dst.writemask = WRITEMASK_W;
		2870	} else {
		2871	coord_dst.writemask = WRITEMASK_Z;
		2872	}
		2873
		2874	emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
		2875	coord_dst.writemask = WRITEMASK_XYZW;
		2876	}
		2877	}
		2878
		2879	if (ir->op == ir_txf_ms) {
		2880	coord_dst.writemask = WRITEMASK_W;
		2881	emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
		2882	coord_dst.writemask = WRITEMASK_XYZW;
		2883	} else if (opcode == TGSI_OPCODE_TXL \|\| opcode == TGSI_OPCODE_TXB \|\|
		2884	opcode == TGSI_OPCODE_TXF) {
		2885	/* TGSI stores LOD or LOD bias in the last channel of the coords. */
		2886	coord_dst.writemask = WRITEMASK_W;
		2887	emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
		2888	coord_dst.writemask = WRITEMASK_XYZW;
		2889	}
		2890
		2891	if (opcode == TGSI_OPCODE_TXD)
		2892	inst = emit(ir, opcode, result_dst, coord, dx, dy);
		2893	else if (opcode == TGSI_OPCODE_TXQ)
		2894	inst = emit(ir, opcode, result_dst, lod_info);
		2895	else if (opcode == TGSI_OPCODE_TXF) {
		2896	inst = emit(ir, opcode, result_dst, coord);
		2897	} else if (opcode == TGSI_OPCODE_TXL2 \|\| opcode == TGSI_OPCODE_TXB2) {
		2898	inst = emit(ir, opcode, result_dst, coord, lod_info);
		2899	} else if (opcode == TGSI_OPCODE_TEX2) {
		2900	inst = emit(ir, opcode, result_dst, coord, cube_sc);
		2901	} else
		2902	inst = emit(ir, opcode, result_dst, coord);
		2903
		2904	if (ir->shadow_comparitor)
		2905	inst->tex_shadow = GL_TRUE;
		2906
		2907	inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
		2908	this->shader_program,
		2909	this->prog);
		2910
		2911	if (ir->offset) {
		2912	inst->tex_offset_num_offset = 1;
		2913	inst->tex_offsets[0].Index = offset.index;
		2914	inst->tex_offsets[0].File = offset.file;
		2915	inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
		2916	inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
		2917	inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
		2918	}
		2919
		2920	switch (sampler_type->sampler_dimensionality) {
		2921	case GLSL_SAMPLER_DIM_1D:
		2922	inst->tex_target = (sampler_type->sampler_array)
		2923	? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
		2924	break;
		2925	case GLSL_SAMPLER_DIM_2D:
		2926	inst->tex_target = (sampler_type->sampler_array)
		2927	? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
		2928	break;
		2929	case GLSL_SAMPLER_DIM_3D:
		2930	inst->tex_target = TEXTURE_3D_INDEX;
		2931	break;
		2932	case GLSL_SAMPLER_DIM_CUBE:
		2933	inst->tex_target = (sampler_type->sampler_array)
		2934	? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
		2935	break;
		2936	case GLSL_SAMPLER_DIM_RECT:
		2937	inst->tex_target = TEXTURE_RECT_INDEX;
		2938	break;
		2939	case GLSL_SAMPLER_DIM_BUF:
		2940	inst->tex_target = TEXTURE_BUFFER_INDEX;
		2941	break;
		2942	case GLSL_SAMPLER_DIM_EXTERNAL:
		2943	inst->tex_target = TEXTURE_EXTERNAL_INDEX;
		2944	break;
		2945	case GLSL_SAMPLER_DIM_MS:
		2946	inst->tex_target = (sampler_type->sampler_array)
		2947	? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
		2948	break;
		2949	default:
		2950	assert(!"Should not get here.");
		2951	}
		2952
		2953	this->result = result_src;
		2954	}
		2955
		2956	void
		2957	glsl_to_tgsi_visitor::visit(ir_return *ir)
		2958	{
		2959	if (ir->get_value()) {
		2960	st_dst_reg l;
		2961	int i;
		2962
		2963	assert(current_function);
		2964
		2965	ir->get_value()->accept(this);
		2966	st_src_reg r = this->result;
		2967
		2968	l = st_dst_reg(current_function->return_reg);
		2969
		2970	for (i = 0; i < type_size(current_function->sig->return_type); i++) {
		2971	emit(ir, TGSI_OPCODE_MOV, l, r);
		2972	l.index++;
		2973	r.index++;
		2974	}
		2975	}
		2976
		2977	emit(ir, TGSI_OPCODE_RET);
		2978	}
		2979
		2980	void
		2981	glsl_to_tgsi_visitor::visit(ir_discard *ir)
		2982	{
		2983	if (ir->condition) {
		2984	ir->condition->accept(this);
		2985	this->result.negate = ~this->result.negate;
		2986	emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result);
		2987	} else {
		2988	/* unconditional kil */
		2989	emit(ir, TGSI_OPCODE_KILL);
		2990	}
		2991	}
		2992
		2993	void
		2994	glsl_to_tgsi_visitor::visit(ir_if *ir)
		2995	{
		2996	unsigned if_opcode;
		2997	glsl_to_tgsi_instruction *if_inst;
		2998
		2999	ir->condition->accept(this);
		3000	assert(this->result.file != PROGRAM_UNDEFINED);
		3001
		3002	if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
		3003
		3004	if_inst = emit(ir->condition, if_opcode, undef_dst, this->result);
		3005
		3006	this->instructions.push_tail(if_inst);
		3007
		3008	visit_exec_list(&ir->then_instructions, this);
		3009
		3010	if (!ir->else_instructions.is_empty()) {
		3011	emit(ir->condition, TGSI_OPCODE_ELSE);
		3012	visit_exec_list(&ir->else_instructions, this);
		3013	}
		3014
		3015	if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
		3016	}
		3017
		3018	glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
		3019	{
		3020	result.file = PROGRAM_UNDEFINED;
		3021	next_temp = 1;
		3022	next_array = 0;
		3023	next_signature_id = 1;
		3024	num_immediates = 0;
		3025	current_function = NULL;
		3026	num_address_regs = 0;
		3027	samplers_used = 0;
		3028	indirect_addr_consts = false;
		3029	glsl_version = 0;
		3030	native_integers = false;
		3031	mem_ctx = ralloc_context(NULL);
		3032	ctx = NULL;
		3033	prog = NULL;
		3034	shader_program = NULL;
		3035	options = NULL;
		3036	}
		3037
		3038	glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
		3039	{
		3040	ralloc_free(mem_ctx);
		3041	}
		3042
		3043	extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
		3044	{
		3045	delete v;
		3046	}
		3047
		3048
		3049	/**
		3050	* Count resources used by the given gpu program (number of texture
		3051	* samplers, etc).
		3052	*/
		3053	static void
		3054	count_resources(glsl_to_tgsi_visitor v, gl_program prog)
		3055	{
		3056	v->samplers_used = 0;
		3057
		3058	foreach_iter(exec_list_iterator, iter, v->instructions) {
		3059	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3060
		3061	if (is_tex_instruction(inst->op)) {
		3062	v->samplers_used \|= 1 << inst->sampler;
		3063
		3064	if (inst->tex_shadow) {
		3065	prog->ShadowSamplers \|= 1 << inst->sampler;
		3066	}
		3067	}
		3068	}
		3069
		3070	prog->SamplersUsed = v->samplers_used;
		3071
		3072	if (v->shader_program != NULL)
		3073	_mesa_update_shader_textures_used(v->shader_program, prog);
		3074	}
		3075
		3076	static void
		3077	set_uniform_initializer(struct gl_context ctx, void mem_ctx,
		3078	struct gl_shader_program *shader_program,
		3079	const char name, const glsl_type type,
		3080	ir_constant *val)
		3081	{
		3082	if (type->is_record()) {
		3083	ir_constant *field_constant;
		3084
		3085	field_constant = (ir_constant *)val->components.get_head();
		3086
		3087	for (unsigned int i = 0; i < type->length; i++) {
		3088	const glsl_type *field_type = type->fields.structure[i].type;
		3089	const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
		3090	type->fields.structure[i].name);
		3091	set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
		3092	field_type, field_constant);
		3093	field_constant = (ir_constant *)field_constant->next;
		3094	}
		3095	return;
		3096	}
		3097
		3098	unsigned offset;
		3099	unsigned index = _mesa_get_uniform_location(ctx, shader_program, name,
		3100	&offset);
		3101	if (offset == GL_INVALID_INDEX) {
		3102	fail_link(shader_program,
		3103	"Couldn't find uniform for initializer %s\n", name);
		3104	return;
		3105	}
		3106	int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset);
		3107
		3108	for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
		3109	ir_constant *element;
		3110	const glsl_type *element_type;
		3111	if (type->is_array()) {
		3112	element = val->array_elements[i];
		3113	element_type = type->fields.array;
		3114	} else {
		3115	element = val;
		3116	element_type = type;
		3117	}
		3118
		3119	void *values;
		3120
		3121	if (element_type->base_type == GLSL_TYPE_BOOL) {
		3122	int *conv = ralloc_array(mem_ctx, int, element_type->components());
		3123	for (unsigned int j = 0; j < element_type->components(); j++) {
		3124	conv[j] = element->value.b[j];
		3125	}
		3126	values = (void *)conv;
		3127	element_type = glsl_type::get_instance(GLSL_TYPE_INT,
		3128	element_type->vector_elements,
		3129	1);
		3130	} else {
		3131	values = &element->value;
		3132	}
		3133
		3134	if (element_type->is_matrix()) {
		3135	_mesa_uniform_matrix(ctx, shader_program,
		3136	element_type->matrix_columns,
		3137	element_type->vector_elements,
		3138	loc, 1, GL_FALSE, (GLfloat *)values);
		3139	} else {
		3140	_mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
		3141	values, element_type->gl_type);
		3142	}
		3143
		3144	loc++;
		3145	}
		3146	}
		3147
		3148	/**
		3149	* Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
		3150	* are read from the given src in this instruction
		3151	*/
		3152	static int
		3153	get_src_arg_mask(st_dst_reg dst, st_src_reg src)
		3154	{
		3155	int read_mask = 0, comp;
		3156
		3157	/* Now, given the src swizzle and the written channels, find which
		3158	* components are actually read
		3159	*/
		3160	for (comp = 0; comp < 4; ++comp) {
		3161	const unsigned coord = GET_SWZ(src.swizzle, comp);
		3162	ASSERT(coord < 4);
		3163	if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
		3164	read_mask \|= 1 << coord;
		3165	}
		3166
		3167	return read_mask;
		3168	}
		3169
		3170	/**
		3171	* This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
		3172	* instruction is the first instruction to write to register T0. There are
		3173	* several lowering passes done in GLSL IR (e.g. branches and
		3174	* relative addressing) that create a large number of conditional assignments
		3175	* that ir_to_mesa converts to CMP instructions like the one mentioned above.
		3176	*
		3177	* Here is why this conversion is safe:
		3178	* CMP T0, T1 T2 T0 can be expanded to:
		3179	* if (T1 < 0.0)
		3180	* MOV T0, T2;
		3181	* else
		3182	* MOV T0, T0;
		3183	*
		3184	* If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
		3185	* as the original program. If (T1 < 0.0) evaluates to false, executing
		3186	* MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
		3187	* Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
		3188	* because any instruction that was going to read from T0 after this was going
		3189	* to read a garbage value anyway.
		3190	*/
		3191	void
		3192	glsl_to_tgsi_visitor::simplify_cmp(void)
		3193	{
		3194	unsigned *tempWrites;
		3195	unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
		3196
		3197	tempWrites = new unsigned[MAX_TEMPS];
		3198	if (!tempWrites) {
		3199	return;
		3200	}
		3201	memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
		3202	memset(outputWrites, 0, sizeof(outputWrites));
		3203
		3204	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3205	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3206	unsigned prevWriteMask = 0;
		3207
		3208	/* Give up if we encounter relative addressing or flow control. */
		3209	if (inst->dst.reladdr \|\|
		3210	tgsi_get_opcode_info(inst->op)->is_branch \|\|
		3211	inst->op == TGSI_OPCODE_BGNSUB \|\|
		3212	inst->op == TGSI_OPCODE_CONT \|\|
		3213	inst->op == TGSI_OPCODE_END \|\|
		3214	inst->op == TGSI_OPCODE_ENDSUB \|\|
		3215	inst->op == TGSI_OPCODE_RET) {
		3216	break;
		3217	}
		3218
		3219	if (inst->dst.file == PROGRAM_OUTPUT) {
		3220	assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
		3221	prevWriteMask = outputWrites[inst->dst.index];
		3222	outputWrites[inst->dst.index] \|= inst->dst.writemask;
		3223	} else if (inst->dst.file == PROGRAM_TEMPORARY) {
		3224	assert(inst->dst.index < MAX_TEMPS);
		3225	prevWriteMask = tempWrites[inst->dst.index];
		3226	tempWrites[inst->dst.index] \|= inst->dst.writemask;
		3227	} else
		3228	continue;
		3229
		3230	/* For a CMP to be considered a conditional write, the destination
		3231	* register and source register two must be the same. */
		3232	if (inst->op == TGSI_OPCODE_CMP
		3233	&& !(inst->dst.writemask & prevWriteMask)
		3234	&& inst->src[2].file == inst->dst.file
		3235	&& inst->src[2].index == inst->dst.index
		3236	&& inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
		3237
		3238	inst->op = TGSI_OPCODE_MOV;
		3239	inst->src[0] = inst->src[1];
		3240	}
		3241	}
		3242
		3243	delete [] tempWrites;
		3244	}
		3245
		3246	/* Replaces all references to a temporary register index with another index. */
		3247	void
		3248	glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
		3249	{
		3250	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3251	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3252	unsigned j;
		3253
		3254	for (j=0; j < num_inst_src_regs(inst->op); j++) {
		3255	if (inst->src[j].file == PROGRAM_TEMPORARY &&
		3256	inst->src[j].index == index) {
		3257	inst->src[j].index = new_index;
		3258	}
		3259	}
		3260
		3261	if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
		3262	inst->dst.index = new_index;
		3263	}
		3264	}
		3265	}
		3266
		3267	int
		3268	glsl_to_tgsi_visitor::get_first_temp_read(int index)
		3269	{
		3270	int depth = 0; /* loop depth */
		3271	int loop_start = -1; /* index of the first active BGNLOOP (if any) */
		3272	unsigned i = 0, j;
		3273
		3274	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3275	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3276
		3277	for (j=0; j < num_inst_src_regs(inst->op); j++) {
		3278	if (inst->src[j].file == PROGRAM_TEMPORARY &&
		3279	inst->src[j].index == index) {
		3280	return (depth == 0) ? i : loop_start;
		3281	}
		3282	}
		3283
		3284	if (inst->op == TGSI_OPCODE_BGNLOOP) {
		3285	if(depth++ == 0)
		3286	loop_start = i;
		3287	} else if (inst->op == TGSI_OPCODE_ENDLOOP) {
		3288	if (--depth == 0)
		3289	loop_start = -1;
		3290	}
		3291	assert(depth >= 0);
		3292
		3293	i++;
		3294	}
		3295
		3296	return -1;
		3297	}
		3298
		3299	int
		3300	glsl_to_tgsi_visitor::get_first_temp_write(int index)
		3301	{
		3302	int depth = 0; /* loop depth */
		3303	int loop_start = -1; /* index of the first active BGNLOOP (if any) */
		3304	int i = 0;
		3305
		3306	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3307	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3308
		3309	if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
		3310	return (depth == 0) ? i : loop_start;
		3311	}
		3312
		3313	if (inst->op == TGSI_OPCODE_BGNLOOP) {
		3314	if(depth++ == 0)
		3315	loop_start = i;
		3316	} else if (inst->op == TGSI_OPCODE_ENDLOOP) {
		3317	if (--depth == 0)
		3318	loop_start = -1;
		3319	}
		3320	assert(depth >= 0);
		3321
		3322	i++;
		3323	}
		3324
		3325	return -1;
		3326	}
		3327
		3328	int
		3329	glsl_to_tgsi_visitor::get_last_temp_read(int index)
		3330	{
		3331	int depth = 0; /* loop depth */
		3332	int last = -1; /* index of last instruction that reads the temporary */
		3333	unsigned i = 0, j;
		3334
		3335	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3336	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3337
		3338	for (j=0; j < num_inst_src_regs(inst->op); j++) {
		3339	if (inst->src[j].file == PROGRAM_TEMPORARY &&
		3340	inst->src[j].index == index) {
		3341	last = (depth == 0) ? i : -2;
		3342	}
		3343	}
		3344
		3345	if (inst->op == TGSI_OPCODE_BGNLOOP)
		3346	depth++;
		3347	else if (inst->op == TGSI_OPCODE_ENDLOOP)
		3348	if (--depth == 0 && last == -2)
		3349	last = i;
		3350	assert(depth >= 0);
		3351
		3352	i++;
		3353	}
		3354
		3355	assert(last >= -1);
		3356	return last;
		3357	}
		3358
		3359	int
		3360	glsl_to_tgsi_visitor::get_last_temp_write(int index)
		3361	{
		3362	int depth = 0; /* loop depth */
		3363	int last = -1; /* index of last instruction that writes to the temporary */
		3364	int i = 0;
		3365
		3366	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3367	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3368
		3369	if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
		3370	last = (depth == 0) ? i : -2;
		3371
		3372	if (inst->op == TGSI_OPCODE_BGNLOOP)
		3373	depth++;
		3374	else if (inst->op == TGSI_OPCODE_ENDLOOP)
		3375	if (--depth == 0 && last == -2)
		3376	last = i;
		3377	assert(depth >= 0);
		3378
		3379	i++;
		3380	}
		3381
		3382	assert(last >= -1);
		3383	return last;
		3384	}
		3385
		3386	/*
		3387	* On a basic block basis, tracks available PROGRAM_TEMPORARY register
		3388	* channels for copy propagation and updates following instructions to
		3389	* use the original versions.
		3390	*
		3391	* The glsl_to_tgsi_visitor lazily produces code assuming that this pass
		3392	* will occur. As an example, a TXP production before this pass:
		3393	*
		3394	* 0: MOV TEMP[1], INPUT[4].xyyy;
		3395	* 1: MOV TEMP[1].w, INPUT[4].wwww;
		3396	* 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
		3397	*
		3398	* and after:
		3399	*
		3400	* 0: MOV TEMP[1], INPUT[4].xyyy;
		3401	* 1: MOV TEMP[1].w, INPUT[4].wwww;
		3402	* 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
		3403	*
		3404	* which allows for dead code elimination on TEMP[1]'s writes.
		3405	*/
		3406	void
		3407	glsl_to_tgsi_visitor::copy_propagate(void)
		3408	{
		3409	glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
		3410	glsl_to_tgsi_instruction *,
		3411	this->next_temp * 4);
		3412	int acp_level = rzalloc_array(mem_ctx, int, this->next_temp 4);
		3413	int level = 0;
		3414
		3415	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3416	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3417
		3418	assert(inst->dst.file != PROGRAM_TEMPORARY
		3419	\|\| inst->dst.index < this->next_temp);
		3420
		3421	/* First, do any copy propagation possible into the src regs. */
		3422	for (int r = 0; r < 3; r++) {
		3423	glsl_to_tgsi_instruction *first = NULL;
		3424	bool good = true;
		3425	int acp_base = inst->src[r].index * 4;
		3426
		3427	if (inst->src[r].file != PROGRAM_TEMPORARY \|\|
		3428	inst->src[r].reladdr)
		3429	continue;
		3430
		3431	/* See if we can find entries in the ACP consisting of MOVs
		3432	* from the same src register for all the swizzled channels
		3433	* of this src register reference.
		3434	*/
		3435	for (int i = 0; i < 4; i++) {
		3436	int src_chan = GET_SWZ(inst->src[r].swizzle, i);
		3437	glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
		3438
		3439	if (!copy_chan) {
		3440	good = false;
		3441	break;
		3442	}
		3443
		3444	assert(acp_level[acp_base + src_chan] <= level);
		3445
		3446	if (!first) {
		3447	first = copy_chan;
		3448	} else {
		3449	if (first->src[0].file != copy_chan->src[0].file \|\|
		3450	first->src[0].index != copy_chan->src[0].index) {
		3451	good = false;
		3452	break;
		3453	}
		3454	}
		3455	}
		3456
		3457	if (good) {
		3458	/* We've now validated that we can copy-propagate to
		3459	* replace this src register reference. Do it.
		3460	*/
		3461	inst->src[r].file = first->src[0].file;
		3462	inst->src[r].index = first->src[0].index;
		3463
		3464	int swizzle = 0;
		3465	for (int i = 0; i < 4; i++) {
		3466	int src_chan = GET_SWZ(inst->src[r].swizzle, i);
		3467	glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
		3468	swizzle \|= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
		3469	(3 * i));
		3470	}
		3471	inst->src[r].swizzle = swizzle;
		3472	}
		3473	}
		3474
		3475	switch (inst->op) {
		3476	case TGSI_OPCODE_BGNLOOP:
		3477	case TGSI_OPCODE_ENDLOOP:
		3478	/* End of a basic block, clear the ACP entirely. */
		3479	memset(acp, 0, sizeof(acp) this->next_temp * 4);
		3480	break;
		3481
		3482	case TGSI_OPCODE_IF:
		3483	case TGSI_OPCODE_UIF:
		3484	++level;
		3485	break;
		3486
		3487	case TGSI_OPCODE_ENDIF:
		3488	case TGSI_OPCODE_ELSE:
		3489	/* Clear all channels written inside the block from the ACP, but
		3490	* leaving those that were not touched.
		3491	*/
		3492	for (int r = 0; r < this->next_temp; r++) {
		3493	for (int c = 0; c < 4; c++) {
		3494	if (!acp[4 * r + c])
		3495	continue;
		3496
		3497	if (acp_level[4 * r + c] >= level)
		3498	acp[4 * r + c] = NULL;
		3499	}
		3500	}
		3501	if (inst->op == TGSI_OPCODE_ENDIF)
		3502	--level;
		3503	break;
		3504
		3505	default:
		3506	/* Continuing the block, clear any written channels from
		3507	* the ACP.
		3508	*/
		3509	if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
		3510	/* Any temporary might be written, so no copy propagation
		3511	* across this instruction.
		3512	*/
		3513	memset(acp, 0, sizeof(acp) this->next_temp * 4);
		3514	} else if (inst->dst.file == PROGRAM_OUTPUT &&
		3515	inst->dst.reladdr) {
		3516	/* Any output might be written, so no copy propagation
		3517	* from outputs across this instruction.
		3518	*/
		3519	for (int r = 0; r < this->next_temp; r++) {
		3520	for (int c = 0; c < 4; c++) {
		3521	if (!acp[4 * r + c])
		3522	continue;
		3523
		3524	if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
		3525	acp[4 * r + c] = NULL;
		3526	}
		3527	}
		3528	} else if (inst->dst.file == PROGRAM_TEMPORARY \|\|
		3529	inst->dst.file == PROGRAM_OUTPUT) {
		3530	/* Clear where it's used as dst. */
		3531	if (inst->dst.file == PROGRAM_TEMPORARY) {
		3532	for (int c = 0; c < 4; c++) {
		3533	if (inst->dst.writemask & (1 << c)) {
		3534	acp[4 * inst->dst.index + c] = NULL;
		3535	}
		3536	}
		3537	}
		3538
		3539	/* Clear where it's used as src. */
		3540	for (int r = 0; r < this->next_temp; r++) {
		3541	for (int c = 0; c < 4; c++) {
		3542	if (!acp[4 * r + c])
		3543	continue;
		3544
		3545	int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
		3546
		3547	if (acp[4 * r + c]->src[0].file == inst->dst.file &&
		3548	acp[4 * r + c]->src[0].index == inst->dst.index &&
		3549	inst->dst.writemask & (1 << src_chan))
		3550	{
		3551	acp[4 * r + c] = NULL;
		3552	}
		3553	}
		3554	}
		3555	}
		3556	break;
		3557	}
		3558
		3559	/* If this is a copy, add it to the ACP. */
		3560	if (inst->op == TGSI_OPCODE_MOV &&
		3561	inst->dst.file == PROGRAM_TEMPORARY &&
		3562	!(inst->dst.file == inst->src[0].file &&
		3563	inst->dst.index == inst->src[0].index) &&
		3564	!inst->dst.reladdr &&
		3565	!inst->saturate &&
		3566	!inst->src[0].reladdr &&
		3567	!inst->src[0].negate) {
		3568	for (int i = 0; i < 4; i++) {
		3569	if (inst->dst.writemask & (1 << i)) {
		3570	acp[4 * inst->dst.index + i] = inst;
		3571	acp_level[4 * inst->dst.index + i] = level;
		3572	}
		3573	}
		3574	}
		3575	}
		3576
		3577	ralloc_free(acp_level);
		3578	ralloc_free(acp);
		3579	}
		3580
		3581	/*
		3582	* Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
		3583	*
		3584	* The glsl_to_tgsi_visitor lazily produces code assuming that this pass
		3585	* will occur. As an example, a TXP production after copy propagation but
		3586	* before this pass:
		3587	*
		3588	* 0: MOV TEMP[1], INPUT[4].xyyy;
		3589	* 1: MOV TEMP[1].w, INPUT[4].wwww;
		3590	* 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
		3591	*
		3592	* and after this pass:
		3593	*
		3594	* 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
		3595	*
		3596	* FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
		3597	* FIXME: doesn't eliminate all dead code inside of loops; it steps around them
		3598	*/
		3599	void
		3600	glsl_to_tgsi_visitor::eliminate_dead_code(void)
		3601	{
		3602	int i;
		3603
		3604	for (i=0; i < this->next_temp; i++) {
		3605	int last_read = get_last_temp_read(i);
		3606	int j = 0;
		3607
		3608	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3609	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3610
		3611	if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
		3612	j > last_read)
		3613	{
		3614	iter.remove();
		3615	delete inst;
		3616	}
		3617
		3618	j++;
		3619	}
		3620	}
		3621	}
		3622
		3623	/*
		3624	* On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
		3625	* code elimination. This is less primitive than eliminate_dead_code(), as it
		3626	* is per-channel and can detect consecutive writes without a read between them
		3627	* as dead code. However, there is some dead code that can be eliminated by
		3628	* eliminate_dead_code() but not this function - for example, this function
		3629	* cannot eliminate an instruction writing to a register that is never read and
		3630	* is the only instruction writing to that register.
		3631	*
		3632	* The glsl_to_tgsi_visitor lazily produces code assuming that this pass
		3633	* will occur.
		3634	*/
		3635	int
		3636	glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
		3637	{
		3638	glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
		3639	glsl_to_tgsi_instruction *,
		3640	this->next_temp * 4);
		3641	int write_level = rzalloc_array(mem_ctx, int, this->next_temp 4);
		3642	int level = 0;
		3643	int removed = 0;
		3644
		3645	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3646	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3647
		3648	assert(inst->dst.file != PROGRAM_TEMPORARY
		3649	\|\| inst->dst.index < this->next_temp);
		3650
		3651	switch (inst->op) {
		3652	case TGSI_OPCODE_BGNLOOP:
		3653	case TGSI_OPCODE_ENDLOOP:
		3654	case TGSI_OPCODE_CONT:
		3655	case TGSI_OPCODE_BRK:
		3656	/* End of a basic block, clear the write array entirely.
		3657	*
		3658	* This keeps us from killing dead code when the writes are
		3659	* on either side of a loop, even when the register isn't touched
		3660	* inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit
		3661	* dead code of this type, so it shouldn't make a difference as long as
		3662	* the dead code elimination pass in the GLSL compiler does its job.
		3663	*/
		3664	memset(writes, 0, sizeof(writes) this->next_temp * 4);
		3665	break;
		3666
		3667	case TGSI_OPCODE_ENDIF:
		3668	case TGSI_OPCODE_ELSE:
		3669	/* Promote the recorded level of all channels written inside the
		3670	* preceding if or else block to the level above the if/else block.
		3671	*/
		3672	for (int r = 0; r < this->next_temp; r++) {
		3673	for (int c = 0; c < 4; c++) {
		3674	if (!writes[4 * r + c])
		3675	continue;
		3676
		3677	if (write_level[4 * r + c] == level)
		3678	write_level[4 * r + c] = level-1;
		3679	}
		3680	}
		3681
		3682	if(inst->op == TGSI_OPCODE_ENDIF)
		3683	--level;
		3684
		3685	break;
		3686
		3687	case TGSI_OPCODE_IF:
		3688	case TGSI_OPCODE_UIF:
		3689	++level;
		3690	/* fallthrough to default case to mark the condition as read */
		3691
		3692	default:
		3693	/* Continuing the block, clear any channels from the write array that
		3694	* are read by this instruction.
		3695	*/
		3696	for (unsigned i = 0; i < Elements(inst->src); i++) {
		3697	if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
		3698	/* Any temporary might be read, so no dead code elimination
		3699	* across this instruction.
		3700	*/
		3701	memset(writes, 0, sizeof(writes) this->next_temp * 4);
		3702	} else if (inst->src[i].file == PROGRAM_TEMPORARY) {
		3703	/* Clear where it's used as src. */
		3704	int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
		3705	src_chans \|= 1 << GET_SWZ(inst->src[i].swizzle, 1);
		3706	src_chans \|= 1 << GET_SWZ(inst->src[i].swizzle, 2);
		3707	src_chans \|= 1 << GET_SWZ(inst->src[i].swizzle, 3);
		3708
		3709	for (int c = 0; c < 4; c++) {
		3710	if (src_chans & (1 << c)) {
		3711	writes[4 * inst->src[i].index + c] = NULL;
		3712	}
		3713	}
		3714	}
		3715	}
		3716	break;
		3717	}
		3718
		3719	/* If this instruction writes to a temporary, add it to the write array.
		3720	* If there is already an instruction in the write array for one or more
		3721	* of the channels, flag that channel write as dead.
		3722	*/
		3723	if (inst->dst.file == PROGRAM_TEMPORARY &&
		3724	!inst->dst.reladdr &&
		3725	!inst->saturate) {
		3726	for (int c = 0; c < 4; c++) {
		3727	if (inst->dst.writemask & (1 << c)) {
		3728	if (writes[4 * inst->dst.index + c]) {
		3729	if (write_level[4 * inst->dst.index + c] < level)
		3730	continue;
		3731	else
		3732	writes[4 * inst->dst.index + c]->dead_mask \|= (1 << c);
		3733	}
		3734	writes[4 * inst->dst.index + c] = inst;
		3735	write_level[4 * inst->dst.index + c] = level;
		3736	}
		3737	}
		3738	}
		3739	}
		3740
		3741	/* Anything still in the write array at this point is dead code. */
		3742	for (int r = 0; r < this->next_temp; r++) {
		3743	for (int c = 0; c < 4; c++) {
		3744	glsl_to_tgsi_instruction inst = writes[4 r + c];
		3745	if (inst)
		3746	inst->dead_mask \|= (1 << c);
		3747	}
		3748	}
		3749
		3750	/* Now actually remove the instructions that are completely dead and update
		3751	* the writemask of other instructions with dead channels.
		3752	*/
		3753	foreach_iter(exec_list_iterator, iter, this->instructions) {
		3754	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3755
		3756	if (!inst->dead_mask \|\| !inst->dst.writemask)
		3757	continue;
		3758	else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
		3759	iter.remove();
		3760	delete inst;
		3761	removed++;
		3762	} else
		3763	inst->dst.writemask &= ~(inst->dead_mask);
		3764	}
		3765
		3766	ralloc_free(write_level);
		3767	ralloc_free(writes);
		3768
		3769	return removed;
		3770	}
		3771
		3772	/* Merges temporary registers together where possible to reduce the number of
		3773	* registers needed to run a program.
		3774	*
		3775	* Produces optimal code only after copy propagation and dead code elimination
		3776	* have been run. */
		3777	void
		3778	glsl_to_tgsi_visitor::merge_registers(void)
		3779	{
		3780	int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
		3781	int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
		3782	int i, j;
		3783
		3784	/* Read the indices of the last read and first write to each temp register
		3785	* into an array so that we don't have to traverse the instruction list as
		3786	* much. */
		3787	for (i=0; i < this->next_temp; i++) {
		3788	last_reads[i] = get_last_temp_read(i);
		3789	first_writes[i] = get_first_temp_write(i);
		3790	}
		3791
		3792	/* Start looking for registers with non-overlapping usages that can be
		3793	* merged together. */
		3794	for (i=0; i < this->next_temp; i++) {
		3795	/* Don't touch unused registers. */
		3796	if (last_reads[i] < 0 \|\| first_writes[i] < 0) continue;
		3797
		3798	for (j=0; j < this->next_temp; j++) {
		3799	/* Don't touch unused registers. */
		3800	if (last_reads[j] < 0 \|\| first_writes[j] < 0) continue;
		3801
		3802	/* We can merge the two registers if the first write to j is after or
		3803	* in the same instruction as the last read from i. Note that the
		3804	* register at index i will always be used earlier or at the same time
		3805	* as the register at index j. */
		3806	if (first_writes[i] <= first_writes[j] &&
		3807	last_reads[i] <= first_writes[j])
		3808	{
		3809	rename_temp_register(j, i); /* Replace all references to j with i.*/
		3810
		3811	/* Update the first_writes and last_reads arrays with the new
		3812	* values for the merged register index, and mark the newly unused
		3813	* register index as such. */
		3814	last_reads[i] = last_reads[j];
		3815	first_writes[j] = -1;
		3816	last_reads[j] = -1;
		3817	}
		3818	}
		3819	}
		3820
		3821	ralloc_free(last_reads);
		3822	ralloc_free(first_writes);
		3823	}
		3824
		3825	/* Reassign indices to temporary registers by reusing unused indices created
		3826	* by optimization passes. */
		3827	void
		3828	glsl_to_tgsi_visitor::renumber_registers(void)
		3829	{
		3830	int i = 0;
		3831	int new_index = 0;
		3832
		3833	for (i=0; i < this->next_temp; i++) {
		3834	if (get_first_temp_read(i) < 0) continue;
		3835	if (i != new_index)
		3836	rename_temp_register(i, new_index);
		3837	new_index++;
		3838	}
		3839
		3840	this->next_temp = new_index;
		3841	}
		3842
		3843	/**
		3844	* Returns a fragment program which implements the current pixel transfer ops.
		3845	* Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
		3846	*/
		3847	extern "C" void
		3848	get_pixel_transfer_visitor(struct st_fragment_program *fp,
		3849	glsl_to_tgsi_visitor *original,
		3850	int scale_and_bias, int pixel_maps)
		3851	{
		3852	glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
		3853	struct st_context *st = st_context(original->ctx);
		3854	struct gl_program *prog = &fp->Base.Base;
		3855	struct gl_program_parameter_list *params = _mesa_new_parameter_list();
		3856	st_src_reg coord, src0;
		3857	st_dst_reg dst0;
		3858	glsl_to_tgsi_instruction *inst;
		3859
		3860	/* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
		3861	v->ctx = original->ctx;
		3862	v->prog = prog;
		3863	v->shader_program = NULL;
		3864	v->glsl_version = original->glsl_version;
		3865	v->native_integers = original->native_integers;
		3866	v->options = original->options;
		3867	v->next_temp = original->next_temp;
		3868	v->num_address_regs = original->num_address_regs;
		3869	v->samplers_used = prog->SamplersUsed = original->samplers_used;
		3870	v->indirect_addr_consts = original->indirect_addr_consts;
		3871	memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
		3872	v->num_immediates = original->num_immediates;
		3873
		3874	/*
		3875	* Get initial pixel color from the texture.
		3876	* TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
		3877	*/
		3878	coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
		3879	src0 = v->get_temp(glsl_type::vec4_type);
		3880	dst0 = st_dst_reg(src0);
		3881	inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
		3882	inst->sampler = 0;
		3883	inst->tex_target = TEXTURE_2D_INDEX;
		3884
		3885	prog->InputsRead \|= VARYING_BIT_TEX0;
		3886	prog->SamplersUsed \|= (1 << 0); /* mark sampler 0 as used */
		3887	v->samplers_used \|= (1 << 0);
		3888
		3889	if (scale_and_bias) {
		3890	static const gl_state_index scale_state[STATE_LENGTH] =
		3891	{ STATE_INTERNAL, STATE_PT_SCALE,
		3892	(gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
		3893	static const gl_state_index bias_state[STATE_LENGTH] =
		3894	{ STATE_INTERNAL, STATE_PT_BIAS,
		3895	(gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
		3896	GLint scale_p, bias_p;
		3897	st_src_reg scale, bias;
		3898
		3899	scale_p = _mesa_add_state_reference(params, scale_state);
		3900	bias_p = _mesa_add_state_reference(params, bias_state);
		3901
		3902	/* MAD colorTemp, colorTemp, scale, bias; */
		3903	scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
		3904	bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
		3905	inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
		3906	}
		3907
		3908	if (pixel_maps) {
		3909	st_src_reg temp = v->get_temp(glsl_type::vec4_type);
		3910	st_dst_reg temp_dst = st_dst_reg(temp);
		3911
		3912	assert(st->pixel_xfer.pixelmap_texture);
		3913
		3914	/* With a little effort, we can do four pixel map look-ups with
		3915	* two TEX instructions:
		3916	*/
		3917
		3918	/* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
		3919	temp_dst.writemask = WRITEMASK_XY; /* write R,G */
		3920	inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
		3921	inst->sampler = 1;
		3922	inst->tex_target = TEXTURE_2D_INDEX;
		3923
		3924	/* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
		3925	src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
		3926	temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
		3927	inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
		3928	inst->sampler = 1;
		3929	inst->tex_target = TEXTURE_2D_INDEX;
		3930
		3931	prog->SamplersUsed \|= (1 << 1); /* mark sampler 1 as used */
		3932	v->samplers_used \|= (1 << 1);
		3933
		3934	/* MOV colorTemp, temp; */
		3935	inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
		3936	}
		3937
		3938	/* Now copy the instructions from the original glsl_to_tgsi_visitor into the
		3939	* new visitor. */
		3940	foreach_iter(exec_list_iterator, iter, original->instructions) {
		3941	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		3942	glsl_to_tgsi_instruction *newinst;
		3943	st_src_reg src_regs[3];
		3944
		3945	if (inst->dst.file == PROGRAM_OUTPUT)
		3946	prog->OutputsWritten \|= BITFIELD64_BIT(inst->dst.index);
		3947
		3948	for (int i=0; i<3; i++) {
		3949	src_regs[i] = inst->src[i];
		3950	if (src_regs[i].file == PROGRAM_INPUT &&
		3951	src_regs[i].index == VARYING_SLOT_COL0)
		3952	{
		3953	src_regs[i].file = PROGRAM_TEMPORARY;
		3954	src_regs[i].index = src0.index;
		3955	}
		3956	else if (src_regs[i].file == PROGRAM_INPUT)
		3957	prog->InputsRead \|= BITFIELD64_BIT(src_regs[i].index);
		3958	}
		3959
		3960	newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
		3961	newinst->tex_target = inst->tex_target;
		3962	}
		3963
		3964	/* Make modifications to fragment program info. */
		3965	prog->Parameters = _mesa_combine_parameter_lists(params,
		3966	original->prog->Parameters);
		3967	_mesa_free_parameter_list(params);
		3968	count_resources(v, prog);
		3969	fp->glsl_to_tgsi = v;
		3970	}
		3971
		3972	/**
		3973	* Make fragment program for glBitmap:
		3974	* Sample the texture and kill the fragment if the bit is 0.
		3975	* This program will be combined with the user's fragment program.
		3976	*
		3977	* Based on make_bitmap_fragment_program in st_cb_bitmap.c.
		3978	*/
		3979	extern "C" void
		3980	get_bitmap_visitor(struct st_fragment_program *fp,
		3981	glsl_to_tgsi_visitor *original, int samplerIndex)
		3982	{
		3983	glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
		3984	struct st_context *st = st_context(original->ctx);
		3985	struct gl_program *prog = &fp->Base.Base;
		3986	st_src_reg coord, src0;
		3987	st_dst_reg dst0;
		3988	glsl_to_tgsi_instruction *inst;
		3989
		3990	/* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
		3991	v->ctx = original->ctx;
		3992	v->prog = prog;
		3993	v->shader_program = NULL;
		3994	v->glsl_version = original->glsl_version;
		3995	v->native_integers = original->native_integers;
		3996	v->options = original->options;
		3997	v->next_temp = original->next_temp;
		3998	v->num_address_regs = original->num_address_regs;
		3999	v->samplers_used = prog->SamplersUsed = original->samplers_used;
		4000	v->indirect_addr_consts = original->indirect_addr_consts;
		4001	memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
		4002	v->num_immediates = original->num_immediates;
		4003
		4004	/* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
		4005	coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
		4006	src0 = v->get_temp(glsl_type::vec4_type);
		4007	dst0 = st_dst_reg(src0);
		4008	inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
		4009	inst->sampler = samplerIndex;
		4010	inst->tex_target = TEXTURE_2D_INDEX;
		4011
		4012	prog->InputsRead \|= VARYING_BIT_TEX0;
		4013	prog->SamplersUsed \|= (1 << samplerIndex); /* mark sampler as used */
		4014	v->samplers_used \|= (1 << samplerIndex);
		4015
		4016	/* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
		4017	src0.negate = NEGATE_XYZW;
		4018	if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
		4019	src0.swizzle = SWIZZLE_XXXX;
		4020	inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
		4021
		4022	/* Now copy the instructions from the original glsl_to_tgsi_visitor into the
		4023	* new visitor. */
		4024	foreach_iter(exec_list_iterator, iter, original->instructions) {
		4025	glsl_to_tgsi_instruction inst = (glsl_to_tgsi_instruction )iter.get();
		4026	glsl_to_tgsi_instruction *newinst;
		4027	st_src_reg src_regs[3];
		4028
		4029	if (inst->dst.file == PROGRAM_OUTPUT)
		4030	prog->OutputsWritten \|= BITFIELD64_BIT(inst->dst.index);
		4031
		4032	for (int i=0; i<3; i++) {
		4033	src_regs[i] = inst->src[i];
		4034	if (src_regs[i].file == PROGRAM_INPUT)
		4035	prog->InputsRead \|= BITFIELD64_BIT(src_regs[i].index);
		4036	}
		4037
		4038	newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
		4039	newinst->tex_target = inst->tex_target;
		4040	}
		4041
		4042	/* Make modifications to fragment program info. */
		4043	prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
		4044	count_resources(v, prog);
		4045	fp->glsl_to_tgsi = v;
		4046	}
		4047
		4048	/* ------------------------- TGSI conversion stuff -------------------------- */
		4049	struct label {
		4050	unsigned branch_target;
		4051	unsigned token;
		4052	};
		4053
		4054	/**
		4055	* Intermediate state used during shader translation.
		4056	*/
		4057	struct st_translate {
		4058	struct ureg_program *ureg;
		4059
		4060	struct ureg_dst temps[MAX_TEMPS];
		4061	struct ureg_dst arrays[MAX_ARRAYS];
		4062	struct ureg_src *constants;
		4063	struct ureg_src *immediates;
		4064	struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
		4065	struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
		4066	struct ureg_dst address[1];
		4067	struct ureg_src samplers[PIPE_MAX_SAMPLERS];
		4068	struct ureg_src systemValues[SYSTEM_VALUE_MAX];
		4069
		4070	unsigned array_sizes[MAX_ARRAYS];
		4071
		4072	const GLuint *inputMapping;
		4073	const GLuint *outputMapping;
		4074
		4075	/* For every instruction that contains a label (eg CALL), keep
		4076	* details so that we can go back afterwards and emit the correct
		4077	* tgsi instruction number for each label.
		4078	*/
		4079	struct label *labels;
		4080	unsigned labels_size;
		4081	unsigned labels_count;
		4082
		4083	/* Keep a record of the tgsi instruction number that each mesa
		4084	* instruction starts at, will be used to fix up labels after
		4085	* translation.
		4086	*/
		4087	unsigned *insn;
		4088	unsigned insn_size;
		4089	unsigned insn_count;
		4090
		4091	unsigned procType; /*< TGSI_PROCESSOR_VERTEX/FRAGMENT /
		4092
		4093	boolean error;
		4094	};
		4095
		4096	/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
		4097	static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
		4098	TGSI_SEMANTIC_FACE,
		4099	TGSI_SEMANTIC_VERTEXID,
		4100	TGSI_SEMANTIC_INSTANCEID
		4101	};
		4102
		4103	/**
		4104	* Make note of a branch to a label in the TGSI code.
		4105	* After we've emitted all instructions, we'll go over the list
		4106	* of labels built here and patch the TGSI code with the actual
		4107	* location of each label.
		4108	*/
		4109	static unsigned get_label(struct st_translate t, unsigned branch_target)
		4110	{
		4111	unsigned i;
		4112
		4113	if (t->labels_count + 1 >= t->labels_size) {
		4114	t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
		4115	t->labels = (struct label *)realloc(t->labels,
		4116	t->labels_size * sizeof(struct label));
		4117	if (t->labels == NULL) {
		4118	static unsigned dummy;
		4119	t->error = TRUE;
		4120	return &dummy;
		4121	}
		4122	}
		4123
		4124	i = t->labels_count++;
		4125	t->labels[i].branch_target = branch_target;
		4126	return &t->labels[i].token;
		4127	}
		4128
		4129	/**
		4130	* Called prior to emitting the TGSI code for each instruction.
		4131	* Allocate additional space for instructions if needed.
		4132	* Update the insn[] array so the next glsl_to_tgsi_instruction points to
		4133	* the next TGSI instruction.
		4134	*/
		4135	static void set_insn_start(struct st_translate *t, unsigned start)
		4136	{
		4137	if (t->insn_count + 1 >= t->insn_size) {
		4138	t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
		4139	t->insn = (unsigned )realloc(t->insn, t->insn_size sizeof(t->insn[0]));
		4140	if (t->insn == NULL) {
		4141	t->error = TRUE;
		4142	return;
		4143	}
		4144	}
		4145
		4146	t->insn[t->insn_count++] = start;
		4147	}
		4148
		4149	/**
		4150	* Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
		4151	*/
		4152	static struct ureg_src
		4153	emit_immediate(struct st_translate *t,
		4154	gl_constant_value values[4],
		4155	int type, int size)
		4156	{
		4157	struct ureg_program *ureg = t->ureg;
		4158
		4159	switch(type)
		4160	{
		4161	case GL_FLOAT:
		4162	return ureg_DECL_immediate(ureg, &values[0].f, size);
		4163	case GL_INT:
		4164	return ureg_DECL_immediate_int(ureg, &values[0].i, size);
		4165	case GL_UNSIGNED_INT:
		4166	case GL_BOOL:
		4167	return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
		4168	default:
		4169	assert(!"should not get here - type must be float, int, uint, or bool");
		4170	return ureg_src_undef();
		4171	}
		4172	}
		4173
		4174	/**
		4175	* Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
		4176	*/
		4177	static struct ureg_dst
		4178	dst_register(struct st_translate *t,
		4179	gl_register_file file,
		4180	GLuint index)
		4181	{
		4182	unsigned array;
		4183
		4184	switch(file) {
		4185	case PROGRAM_UNDEFINED:
		4186	return ureg_dst_undef();
		4187
		4188	case PROGRAM_TEMPORARY:
		4189	assert(index >= 0);
		4190	assert(index < (int) Elements(t->temps));
		4191
		4192	if (ureg_dst_is_undef(t->temps[index]))
		4193	t->temps[index] = ureg_DECL_local_temporary(t->ureg);
		4194
		4195	return t->temps[index];
		4196
		4197	case PROGRAM_ARRAY:
		4198	array = index >> 16;
		4199
		4200	assert(array >= 0);
		4201	assert(array < (int) Elements(t->arrays));
		4202
		4203	if (ureg_dst_is_undef(t->arrays[array]))
		4204	t->arrays[array] = ureg_DECL_array_temporary(
		4205	t->ureg, t->array_sizes[array], TRUE);
		4206
		4207	return ureg_dst_array_offset(t->arrays[array],
		4208	(int)(index & 0xFFFF) - 0x8000);
		4209
		4210	case PROGRAM_OUTPUT:
		4211	if (t->procType == TGSI_PROCESSOR_VERTEX)
		4212	assert(index < VARYING_SLOT_MAX);
		4213	else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
		4214	assert(index < FRAG_RESULT_MAX);
		4215	else
		4216	assert(index < VARYING_SLOT_MAX);
		4217
		4218	assert(t->outputMapping[index] < Elements(t->outputs));
		4219
		4220	return t->outputs[t->outputMapping[index]];
		4221
		4222	case PROGRAM_ADDRESS:
		4223	return t->address[index];
		4224
		4225	default:
		4226	assert(!"unknown dst register file");
		4227	return ureg_dst_undef();
		4228	}
		4229	}
		4230
		4231	/**
		4232	* Map a glsl_to_tgsi src register to a TGSI ureg_src register.
		4233	*/
		4234	static struct ureg_src
		4235	src_register(struct st_translate *t,
		4236	gl_register_file file,
		4237	GLint index, GLint index2D)
		4238	{
		4239	switch(file) {
		4240	case PROGRAM_UNDEFINED:
		4241	return ureg_src_undef();
		4242
		4243	case PROGRAM_TEMPORARY:
		4244	case PROGRAM_ARRAY:
		4245	return ureg_src(dst_register(t, file, index));
		4246
		4247	case PROGRAM_ENV_PARAM:
		4248	case PROGRAM_LOCAL_PARAM:
		4249	case PROGRAM_UNIFORM:
		4250	assert(index >= 0);
		4251	return t->constants[index];
		4252	case PROGRAM_STATE_VAR:
		4253	case PROGRAM_CONSTANT: /* ie, immediate */
		4254	if (index2D) {
		4255	struct ureg_src src;
		4256	src = ureg_src_register(TGSI_FILE_CONSTANT, 0);
		4257	src.Dimension = 1;
		4258	src.DimensionIndex = index2D;
		4259	return src;
		4260	} else if (index < 0)
		4261	return ureg_DECL_constant(t->ureg, 0);
		4262	else
		4263	return t->constants[index];
		4264
		4265	case PROGRAM_IMMEDIATE:
		4266	return t->immediates[index];
		4267
		4268	case PROGRAM_INPUT:
		4269	assert(t->inputMapping[index] < Elements(t->inputs));
		4270	return t->inputs[t->inputMapping[index]];
		4271
		4272	case PROGRAM_OUTPUT:
		4273	assert(t->outputMapping[index] < Elements(t->outputs));
		4274	return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
		4275
		4276	case PROGRAM_ADDRESS:
		4277	return ureg_src(t->address[index]);
		4278
		4279	case PROGRAM_SYSTEM_VALUE:
		4280	assert(index < (int) Elements(t->systemValues));
		4281	return t->systemValues[index];
		4282
		4283	default:
		4284	assert(!"unknown src register file");
		4285	return ureg_src_undef();
		4286	}
		4287	}
		4288
		4289	/**
		4290	* Create a TGSI ureg_dst register from an st_dst_reg.
		4291	*/
		4292	static struct ureg_dst
		4293	translate_dst(struct st_translate *t,
		4294	const st_dst_reg *dst_reg,
		4295	bool saturate, bool clamp_color)
		4296	{
		4297	struct ureg_dst dst = dst_register(t,
		4298	dst_reg->file,
		4299	dst_reg->index);
		4300
		4301	dst = ureg_writemask(dst, dst_reg->writemask);
		4302
		4303	if (saturate)
		4304	dst = ureg_saturate(dst);
		4305	else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
		4306	/* Clamp colors for ARB_color_buffer_float. */
		4307	switch (t->procType) {
		4308	case TGSI_PROCESSOR_VERTEX:
		4309	/* XXX if the geometry shader is present, this must be done there
		4310	* instead of here. */
		4311	if (dst_reg->index == VARYING_SLOT_COL0 \|\|
		4312	dst_reg->index == VARYING_SLOT_COL1 \|\|
		4313	dst_reg->index == VARYING_SLOT_BFC0 \|\|
		4314	dst_reg->index == VARYING_SLOT_BFC1) {
		4315	dst = ureg_saturate(dst);
		4316	}
		4317	break;
		4318
		4319	case TGSI_PROCESSOR_FRAGMENT:
		4320	if (dst_reg->index >= FRAG_RESULT_COLOR) {
		4321	dst = ureg_saturate(dst);
		4322	}
		4323	break;
		4324	}
		4325	}
		4326
		4327	if (dst_reg->reladdr != NULL) {
		4328	assert(dst_reg->file != PROGRAM_TEMPORARY);
		4329	dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
		4330	}
		4331
		4332	return dst;
		4333	}
		4334
		4335	/**
		4336	* Create a TGSI ureg_src register from an st_src_reg.
		4337	*/
		4338	static struct ureg_src
		4339	translate_src(struct st_translate t, const st_src_reg src_reg)
		4340	{
		4341	struct ureg_src src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D);
		4342
		4343	src = ureg_swizzle(src,
		4344	GET_SWZ(src_reg->swizzle, 0) & 0x3,
		4345	GET_SWZ(src_reg->swizzle, 1) & 0x3,
		4346	GET_SWZ(src_reg->swizzle, 2) & 0x3,
		4347	GET_SWZ(src_reg->swizzle, 3) & 0x3);
		4348
		4349	if ((src_reg->negate & 0xf) == NEGATE_XYZW)
		4350	src = ureg_negate(src);
		4351
		4352	if (src_reg->reladdr != NULL) {
		4353	assert(src_reg->file != PROGRAM_TEMPORARY);
		4354	src = ureg_src_indirect(src, ureg_src(t->address[0]));
		4355	}
		4356
		4357	return src;
		4358	}
		4359
		4360	static struct tgsi_texture_offset
		4361	translate_tex_offset(struct st_translate *t,
		4362	const struct tgsi_texture_offset *in_offset)
		4363	{
		4364	struct tgsi_texture_offset offset;
		4365	struct ureg_src imm_src;
		4366
		4367	assert(in_offset->File == PROGRAM_IMMEDIATE);
		4368	imm_src = t->immediates[in_offset->Index];
		4369
		4370	offset.File = imm_src.File;
		4371	offset.Index = imm_src.Index;
		4372	offset.SwizzleX = imm_src.SwizzleX;
		4373	offset.SwizzleY = imm_src.SwizzleY;
		4374	offset.SwizzleZ = imm_src.SwizzleZ;
		4375	offset.File = TGSI_FILE_IMMEDIATE;
		4376	offset.Padding = 0;
		4377
		4378	return offset;
		4379	}
		4380
		4381	static void
		4382	compile_tgsi_instruction(struct st_translate *t,
		4383	const glsl_to_tgsi_instruction *inst,
		4384	bool clamp_dst_color_output)
		4385	{
		4386	struct ureg_program *ureg = t->ureg;
		4387	GLuint i;
		4388	struct ureg_dst dst[1];
		4389	struct ureg_src src[4];
		4390	struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
		4391
		4392	unsigned num_dst;
		4393	unsigned num_src;
		4394	unsigned tex_target;
		4395
		4396	num_dst = num_inst_dst_regs(inst->op);
		4397	num_src = num_inst_src_regs(inst->op);
		4398
		4399	if (num_dst)
		4400	dst[0] = translate_dst(t,
		4401	&inst->dst,
		4402	inst->saturate,
		4403	clamp_dst_color_output);
		4404
		4405	for (i = 0; i < num_src; i++)
		4406	src[i] = translate_src(t, &inst->src[i]);
		4407
		4408	switch(inst->op) {
		4409	case TGSI_OPCODE_BGNLOOP:
		4410	case TGSI_OPCODE_CAL:
		4411	case TGSI_OPCODE_ELSE:
		4412	case TGSI_OPCODE_ENDLOOP:
		4413	case TGSI_OPCODE_IF:
		4414	case TGSI_OPCODE_UIF:
		4415	assert(num_dst == 0);
		4416	ureg_label_insn(ureg,
		4417	inst->op,
		4418	src, num_src,
		4419	get_label(t,
		4420	inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
		4421	return;
		4422
		4423	case TGSI_OPCODE_TEX:
		4424	case TGSI_OPCODE_TXB:
		4425	case TGSI_OPCODE_TXD:
		4426	case TGSI_OPCODE_TXL:
		4427	case TGSI_OPCODE_TXP:
		4428	case TGSI_OPCODE_TXQ:
		4429	case TGSI_OPCODE_TXF:
		4430	case TGSI_OPCODE_TEX2:
		4431	case TGSI_OPCODE_TXB2:
		4432	case TGSI_OPCODE_TXL2:
		4433	src[num_src++] = t->samplers[inst->sampler];
		4434	for (i = 0; i < inst->tex_offset_num_offset; i++) {
		4435	texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
		4436	}
		4437	tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
		4438
		4439	ureg_tex_insn(ureg,
		4440	inst->op,
		4441	dst, num_dst,
		4442	tex_target,
		4443	texoffsets, inst->tex_offset_num_offset,
		4444	src, num_src);
		4445	return;
		4446
		4447	case TGSI_OPCODE_SCS:
		4448	dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
		4449	ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
		4450	break;
		4451
		4452	default:
		4453	ureg_insn(ureg,
		4454	inst->op,
		4455	dst, num_dst,
		4456	src, num_src);
		4457	break;
		4458	}
		4459	}
		4460
		4461	/**
		4462	* Emit the TGSI instructions for inverting and adjusting WPOS.
		4463	* This code is unavoidable because it also depends on whether
		4464	* a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
		4465	*/
		4466	static void
		4467	emit_wpos_adjustment( struct st_translate *t,
		4468	const struct gl_program *program,
		4469	boolean invert,
		4470	GLfloat adjX, GLfloat adjY[2])
		4471	{
		4472	struct ureg_program *ureg = t->ureg;
		4473
		4474	/* Fragment program uses fragment position input.
		4475	* Need to replace instances of INPUT[WPOS] with temp T
		4476	* where T = INPUT[WPOS] by y is inverted.
		4477	*/
		4478	static const gl_state_index wposTransformState[STATE_LENGTH]
		4479	= { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
		4480	(gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
		4481
		4482	/* XXX: note we are modifying the incoming shader here! Need to
		4483	* do this before emitting the constant decls below, or this
		4484	* will be missed:
		4485	*/
		4486	unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
		4487	wposTransformState);
		4488
		4489	struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
		4490	struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
		4491	struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];
		4492
		4493	/* First, apply the coordinate shift: */
		4494	if (adjX \|\| adjY[0] \|\| adjY[1]) {
		4495	if (adjY[0] != adjY[1]) {
		4496	/* Adjust the y coordinate by adjY[1] or adjY[0] respectively
		4497	* depending on whether inversion is actually going to be applied
		4498	* or not, which is determined by testing against the inversion
		4499	* state variable used below, which will be either +1 or -1.
		4500	*/
		4501	struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
		4502
		4503	ureg_CMP(ureg, adj_temp,
		4504	ureg_scalar(wpostrans, invert ? 2 : 0),
		4505	ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
		4506	ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
		4507	ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
		4508	} else {
		4509	ureg_ADD(ureg, wpos_temp, wpos_input,
		4510	ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
		4511	}
		4512	wpos_input = ureg_src(wpos_temp);
		4513	} else {
		4514	/* MOV wpos_temp, input[wpos]
		4515	*/
		4516	ureg_MOV( ureg, wpos_temp, wpos_input );
		4517	}
		4518
		4519	/* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
		4520	* inversion/identity, or the other way around if we're drawing to an FBO.
		4521	*/
		4522	if (invert) {
		4523	/* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
		4524	*/
		4525	ureg_MAD( ureg,
		4526	ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
		4527	wpos_input,
		4528	ureg_scalar(wpostrans, 0),
		4529	ureg_scalar(wpostrans, 1));
		4530	} else {
		4531	/* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
		4532	*/
		4533	ureg_MAD( ureg,
		4534	ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
		4535	wpos_input,
		4536	ureg_scalar(wpostrans, 2),
		4537	ureg_scalar(wpostrans, 3));
		4538	}
		4539
		4540	/* Use wpos_temp as position input from here on:
		4541	*/
		4542	t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
		4543	}
		4544
		4545
		4546	/**
		4547	* Emit fragment position/ooordinate code.
		4548	*/
		4549	static void
		4550	emit_wpos(struct st_context *st,
		4551	struct st_translate *t,
		4552	const struct gl_program *program,
		4553	struct ureg_program *ureg)
		4554	{
		4555	const struct gl_fragment_program *fp =
		4556	(const struct gl_fragment_program *) program;
		4557	struct pipe_screen *pscreen = st->pipe->screen;
		4558	GLfloat adjX = 0.0f;
		4559	GLfloat adjY[2] = { 0.0f, 0.0f };
		4560	boolean invert = FALSE;
		4561
		4562	/* Query the pixel center conventions supported by the pipe driver and set
		4563	* adjX, adjY to help out if it cannot handle the requested one internally.
		4564	*
		4565	* The bias of the y-coordinate depends on whether y-inversion takes place
		4566	* (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
		4567	* drawing to an FBO (causes additional inversion), and whether the the pipe
		4568	* driver origin and the requested origin differ (the latter condition is
		4569	* stored in the 'invert' variable).
		4570	*
		4571	* For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
		4572	*
		4573	* center shift only:
		4574	* i -> h: +0.5
		4575	* h -> i: -0.5
		4576	*
		4577	* inversion only:
		4578	* l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
		4579	* l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
		4580	* u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
		4581	* u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
		4582	*
		4583	* inversion and center shift:
		4584	* l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
		4585	* l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
		4586	* u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
		4587	* u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
		4588	*/
		4589	if (fp->OriginUpperLeft) {
		4590	/* Fragment shader wants origin in upper-left */
		4591	if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
		4592	/* the driver supports upper-left origin */
		4593	}
		4594	else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
		4595	/* the driver supports lower-left origin, need to invert Y */
		4596	ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
		4597	invert = TRUE;
		4598	}
		4599	else
		4600	assert(0);
		4601	}
		4602	else {
		4603	/* Fragment shader wants origin in lower-left */
		4604	if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
		4605	/* the driver supports lower-left origin */
		4606	ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
		4607	else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
		4608	/* the driver supports upper-left origin, need to invert Y */
		4609	invert = TRUE;
		4610	else
		4611	assert(0);
		4612	}
		4613
		4614	if (fp->PixelCenterInteger) {
		4615	/* Fragment shader wants pixel center integer */
		4616	if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
		4617	/* the driver supports pixel center integer */
		4618	adjY[1] = 1.0f;
		4619	ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
		4620	}
		4621	else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
		4622	/* the driver supports pixel center half integer, need to bias X,Y */
		4623	adjX = -0.5f;
		4624	adjY[0] = -0.5f;
		4625	adjY[1] = 0.5f;
		4626	}
		4627	else
		4628	assert(0);
		4629	}
		4630	else {
		4631	/* Fragment shader wants pixel center half integer */
		4632	if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
		4633	/* the driver supports pixel center half integer */
		4634	}
		4635	else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
		4636	/* the driver supports pixel center integer, need to bias X,Y */
		4637	adjX = adjY[0] = adjY[1] = 0.5f;
		4638	ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
		4639	}
		4640	else
		4641	assert(0);
		4642	}
		4643
		4644	/* we invert after adjustment so that we avoid the MOV to temporary,
		4645	* and reuse the adjustment ADD instead */
		4646	emit_wpos_adjustment(t, program, invert, adjX, adjY);
		4647	}
		4648
		4649	/**
		4650	* OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
		4651	* TGSI uses +1 for front, -1 for back.
		4652	* This function converts the TGSI value to the GL value. Simply clamping/
		4653	* saturating the value to [0,1] does the job.
		4654	*/
		4655	static void
		4656	emit_face_var(struct st_translate *t)
		4657	{
		4658	struct ureg_program *ureg = t->ureg;
		4659	struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
		4660	struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
		4661
		4662	/* MOV_SAT face_temp, input[face] */
		4663	face_temp = ureg_saturate(face_temp);
		4664	ureg_MOV(ureg, face_temp, face_input);
		4665
		4666	/* Use face_temp as face input from here on: */
		4667	t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
		4668	}
		4669
		4670	static void
		4671	emit_edgeflags(struct st_translate *t)
		4672	{
		4673	struct ureg_program *ureg = t->ureg;
		4674	struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
		4675	struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
		4676
		4677	ureg_MOV(ureg, edge_dst, edge_src);
		4678	}
		4679
		4680	/**
		4681	* Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
		4682	* \param program the program to translate
		4683	* \param numInputs number of input registers used
		4684	* \param inputMapping maps Mesa fragment program inputs to TGSI generic
		4685	* input indexes
		4686	* \param inputSemanticName the TGSI_SEMANTIC flag for each input
		4687	* \param inputSemanticIndex the semantic index (ex: which texcoord) for
		4688	* each input
		4689	* \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
		4690	* \param numOutputs number of output registers used
		4691	* \param outputMapping maps Mesa fragment program outputs to TGSI
		4692	* generic outputs
		4693	* \param outputSemanticName the TGSI_SEMANTIC flag for each output
		4694	* \param outputSemanticIndex the semantic index (ex: which texcoord) for
		4695	* each output
		4696	*
		4697	* \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
		4698	*/
		4699	extern "C" enum pipe_error
		4700	st_translate_program(
		4701	struct gl_context *ctx,
		4702	uint procType,
		4703	struct ureg_program *ureg,
		4704	glsl_to_tgsi_visitor *program,
		4705	const struct gl_program *proginfo,
		4706	GLuint numInputs,
		4707	const GLuint inputMapping[],
		4708	const ubyte inputSemanticName[],
		4709	const ubyte inputSemanticIndex[],
		4710	const GLuint interpMode[],
		4711	const GLboolean is_centroid[],
		4712	GLuint numOutputs,
		4713	const GLuint outputMapping[],
		4714	const ubyte outputSemanticName[],
		4715	const ubyte outputSemanticIndex[],
		4716	boolean passthrough_edgeflags,
		4717	boolean clamp_color)
		4718	{
		4719	struct st_translate *t;
		4720	unsigned i;
		4721	enum pipe_error ret = PIPE_OK;
		4722
		4723	assert(numInputs <= Elements(t->inputs));
		4724	assert(numOutputs <= Elements(t->outputs));
		4725
		4726	t = CALLOC_STRUCT(st_translate);
		4727	if (!t) {
		4728	ret = PIPE_ERROR_OUT_OF_MEMORY;
		4729	goto out;
		4730	}
		4731
		4732	memset(t, 0, sizeof *t);
		4733
		4734	t->procType = procType;
		4735	t->inputMapping = inputMapping;
		4736	t->outputMapping = outputMapping;
		4737	t->ureg = ureg;
		4738
		4739	if (program->shader_program) {
		4740	for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
		4741	struct gl_uniform_storage *const storage =
		4742	&program->shader_program->UniformStorage[i];
		4743
		4744	_mesa_uniform_detach_all_driver_storage(storage);
		4745	}
		4746	}
		4747
		4748	/*
		4749	* Declare input attributes.
		4750	*/
		4751	if (procType == TGSI_PROCESSOR_FRAGMENT) {
		4752	for (i = 0; i < numInputs; i++) {
		4753	t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
		4754	inputSemanticName[i],
		4755	inputSemanticIndex[i],
		4756	interpMode[i], 0,
		4757	is_centroid[i]);
		4758	}
		4759
		4760	if (proginfo->InputsRead & VARYING_BIT_POS) {
		4761	/* Must do this after setting up t->inputs, and before
		4762	* emitting constant references, below:
		4763	*/
		4764	emit_wpos(st_context(ctx), t, proginfo, ureg);
		4765	}
		4766
		4767	if (proginfo->InputsRead & VARYING_BIT_FACE)
		4768	emit_face_var(t);
		4769
		4770	/*
		4771	* Declare output attributes.
		4772	*/
		4773	for (i = 0; i < numOutputs; i++) {
		4774	switch (outputSemanticName[i]) {
		4775	case TGSI_SEMANTIC_POSITION:
		4776	t->outputs[i] = ureg_DECL_output(ureg,
		4777	TGSI_SEMANTIC_POSITION, /* Z/Depth */
		4778	outputSemanticIndex[i]);
		4779	t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
		4780	break;
		4781	case TGSI_SEMANTIC_STENCIL:
		4782	t->outputs[i] = ureg_DECL_output(ureg,
		4783	TGSI_SEMANTIC_STENCIL, /* Stencil */
		4784	outputSemanticIndex[i]);
		4785	t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
		4786	break;
		4787	case TGSI_SEMANTIC_COLOR:
		4788	t->outputs[i] = ureg_DECL_output(ureg,
		4789	TGSI_SEMANTIC_COLOR,
		4790	outputSemanticIndex[i]);
		4791	break;
		4792	default:
		4793	assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
		4794	ret = PIPE_ERROR_BAD_INPUT;
		4795	goto out;
		4796	}
		4797	}
		4798	}
		4799	else if (procType == TGSI_PROCESSOR_GEOMETRY) {
		4800	for (i = 0; i < numInputs; i++) {
		4801	t->inputs[i] = ureg_DECL_gs_input(ureg,
		4802	i,
		4803	inputSemanticName[i],
		4804	inputSemanticIndex[i]);
		4805	}
		4806
		4807	for (i = 0; i < numOutputs; i++) {
		4808	t->outputs[i] = ureg_DECL_output(ureg,
		4809	outputSemanticName[i],
		4810	outputSemanticIndex[i]);
		4811	}
		4812	}
		4813	else {
		4814	assert(procType == TGSI_PROCESSOR_VERTEX);
		4815
		4816	for (i = 0; i < numInputs; i++) {
		4817	t->inputs[i] = ureg_DECL_vs_input(ureg, i);
		4818	}
		4819
		4820	for (i = 0; i < numOutputs; i++) {
		4821	t->outputs[i] = ureg_DECL_output(ureg,
		4822	outputSemanticName[i],
		4823	outputSemanticIndex[i]);
		4824	}
		4825	if (passthrough_edgeflags)
		4826	emit_edgeflags(t);
		4827	}
		4828
		4829	/* Declare address register.
		4830	*/
		4831	if (program->num_address_regs > 0) {
		4832	assert(program->num_address_regs == 1);
		4833	t->address[0] = ureg_DECL_address(ureg);
		4834	}
		4835
		4836	/* Declare misc input registers
		4837	*/
		4838	{
		4839	GLbitfield sysInputs = proginfo->SystemValuesRead;
		4840	unsigned numSys = 0;
		4841	for (i = 0; sysInputs; i++) {
		4842	if (sysInputs & (1 << i)) {
		4843	unsigned semName = mesa_sysval_to_semantic[i];
		4844	t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
		4845	if (semName == TGSI_SEMANTIC_INSTANCEID \|\|
		4846	semName == TGSI_SEMANTIC_VERTEXID) {
		4847	/* From Gallium perspective, these system values are always
		4848	* integer, and require native integer support. However, if
		4849	* native integer is supported on the vertex stage but not the
		4850	* pixel stage (e.g, i915g + draw), Mesa will generate IR that
		4851	* assumes these system values are floats. To resolve the
		4852	* inconsistency, we insert a U2F.
		4853	*/
		4854	struct st_context *st = st_context(ctx);
		4855	struct pipe_screen *pscreen = st->pipe->screen;
		4856	assert(procType == TGSI_PROCESSOR_VERTEX);
		4857	assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
		4858	if (!ctx->Const.NativeIntegers) {
		4859	struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
		4860	ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
		4861	t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
		4862	}
		4863	}
		4864	numSys++;
		4865	sysInputs &= ~(1 << i);
		4866	}
		4867	}
		4868	}
		4869
		4870	/* Copy over array sizes
		4871	*/
		4872	memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
		4873
		4874	/* Emit constants and uniforms. TGSI uses a single index space for these,
		4875	* so we put all the translated regs in t->constants.
		4876	*/
		4877	if (proginfo->Parameters) {
		4878	t->constants = (struct ureg_src *)
		4879	calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
		4880	if (t->constants == NULL) {
		4881	ret = PIPE_ERROR_OUT_OF_MEMORY;
		4882	goto out;
		4883	}
		4884
		4885	for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
		4886	switch (proginfo->Parameters->Parameters[i].Type) {
		4887	case PROGRAM_ENV_PARAM:
		4888	case PROGRAM_LOCAL_PARAM:
		4889	case PROGRAM_STATE_VAR:
		4890	case PROGRAM_UNIFORM:
		4891	t->constants[i] = ureg_DECL_constant(ureg, i);
		4892	break;
		4893
		4894	/* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
		4895	* addressing of the const buffer.
		4896	* FIXME: Be smarter and recognize param arrays:
		4897	* indirect addressing is only valid within the referenced
		4898	* array.
		4899	*/
		4900	case PROGRAM_CONSTANT:
		4901	if (program->indirect_addr_consts)
		4902	t->constants[i] = ureg_DECL_constant(ureg, i);
		4903	else
		4904	t->constants[i] = emit_immediate(t,
		4905	proginfo->Parameters->ParameterValues[i],
		4906	proginfo->Parameters->Parameters[i].DataType,
		4907	4);
		4908	break;
		4909	default:
		4910	break;
		4911	}
		4912	}
		4913	}
		4914
		4915	if (program->shader_program) {
		4916	unsigned num_ubos = program->shader_program->NumUniformBlocks;
		4917
		4918	for (i = 0; i < num_ubos; i++) {
		4919	ureg_DECL_constant2D(t->ureg, 0, program->shader_program->UniformBlocks[i].UniformBufferSize / 4, i + 1);
		4920	}
		4921	}
		4922
		4923	/* Emit immediate values.
		4924	*/
		4925	t->immediates = (struct ureg_src *)
		4926	calloc(program->num_immediates, sizeof(struct ureg_src));
		4927	if (t->immediates == NULL) {
		4928	ret = PIPE_ERROR_OUT_OF_MEMORY;
		4929	goto out;
		4930	}
		4931	i = 0;
		4932	foreach_iter(exec_list_iterator, iter, program->immediates) {
		4933	immediate_storage imm = (immediate_storage )iter.get();
		4934	assert(i < program->num_immediates);
		4935	t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
		4936	}
		4937	assert(i == program->num_immediates);
		4938
		4939	/* texture samplers */
		4940	for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; i++) {
		4941	if (program->samplers_used & (1 << i)) {
		4942	t->samplers[i] = ureg_DECL_sampler(ureg, i);
		4943	}
		4944	}
		4945
		4946	/* Emit each instruction in turn:
		4947	*/
		4948	foreach_iter(exec_list_iterator, iter, program->instructions) {
		4949	set_insn_start(t, ureg_get_instruction_number(ureg));
		4950	compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
		4951	clamp_color);
		4952	}
		4953
		4954	/* Fix up all emitted labels:
		4955	*/
		4956	for (i = 0; i < t->labels_count; i++) {
		4957	ureg_fixup_label(ureg, t->labels[i].token,
		4958	t->insn[t->labels[i].branch_target]);
		4959	}
		4960
		4961	if (program->shader_program) {
		4962	/* This has to be done last. Any operation the can cause
		4963	* prog->ParameterValues to get reallocated (e.g., anything that adds a
		4964	* program constant) has to happen before creating this linkage.
		4965	*/
		4966	for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
		4967	if (program->shader_program->_LinkedShaders[i] == NULL)
		4968	continue;
		4969
		4970	_mesa_associate_uniform_storage(ctx, program->shader_program,
		4971	program->shader_program->_LinkedShaders[i]->Program->Parameters);
		4972	}
		4973	}
		4974
		4975	out:
		4976	if (t) {
		4977	free(t->insn);
		4978	free(t->labels);
		4979	free(t->constants);
		4980	free(t->immediates);
		4981
		4982	if (t->error) {
		4983	debug_printf("%s: translate error flag set\n", __FUNCTION__);
		4984	}
		4985
		4986	free(t);
		4987	}
		4988
		4989	return ret;
		4990	}
		4991	/* ----------------------------- End TGSI code ------------------------------ */
		4992
		4993	/**
		4994	* Convert a shader's GLSL IR into a Mesa gl_program, although without
		4995	* generating Mesa IR.
		4996	*/
		4997	static struct gl_program *
		4998	get_mesa_program(struct gl_context *ctx,
		4999	struct gl_shader_program *shader_program,
		5000	struct gl_shader *shader)
		5001	{
		5002	glsl_to_tgsi_visitor* v;
		5003	struct gl_program *prog;
		5004	GLenum target;
		5005	bool progress;
		5006	struct gl_shader_compiler_options *options =
		5007	&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
		5008	struct pipe_screen *pscreen = ctx->st->pipe->screen;
		5009	unsigned ptarget;
		5010
		5011	switch (shader->Type) {
		5012	case GL_VERTEX_SHADER:
		5013	target = GL_VERTEX_PROGRAM_ARB;
		5014	ptarget = PIPE_SHADER_VERTEX;
		5015	break;
		5016	case GL_FRAGMENT_SHADER:
		5017	target = GL_FRAGMENT_PROGRAM_ARB;
		5018	ptarget = PIPE_SHADER_FRAGMENT;
		5019	break;
		5020	case GL_GEOMETRY_SHADER:
		5021	target = GL_GEOMETRY_PROGRAM_NV;
		5022	ptarget = PIPE_SHADER_GEOMETRY;
		5023	break;
		5024	default:
		5025	assert(!"should not be reached");
		5026	return NULL;
		5027	}
		5028
		5029	validate_ir_tree(shader->ir);
		5030
		5031	prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
		5032	if (!prog)
		5033	return NULL;
		5034	prog->Parameters = _mesa_new_parameter_list();
		5035	v = new glsl_to_tgsi_visitor();
		5036	v->ctx = ctx;
		5037	v->prog = prog;
		5038	v->shader_program = shader_program;
		5039	v->options = options;
		5040	v->glsl_version = ctx->Const.GLSLVersion;
		5041	v->native_integers = ctx->Const.NativeIntegers;
		5042
		5043	v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
		5044	PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
		5045
		5046	_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
		5047	prog->Parameters);
		5048
		5049	/* Remove reads from output registers. */
		5050	lower_output_reads(shader->ir);
		5051
		5052	/* Emit intermediate IR for main(). */
		5053	visit_exec_list(shader->ir, v);
		5054
		5055	/* Now emit bodies for any functions that were used. */
		5056	do {
		5057	progress = GL_FALSE;
		5058
		5059	foreach_iter(exec_list_iterator, iter, v->function_signatures) {
		5060	function_entry entry = (function_entry )iter.get();
		5061
		5062	if (!entry->bgn_inst) {
		5063	v->current_function = entry;
		5064
		5065	entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
		5066	entry->bgn_inst->function = entry;
		5067
		5068	visit_exec_list(&entry->sig->body, v);
		5069
		5070	glsl_to_tgsi_instruction *last;
		5071	last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
		5072	if (last->op != TGSI_OPCODE_RET)
		5073	v->emit(NULL, TGSI_OPCODE_RET);
		5074
		5075	glsl_to_tgsi_instruction *end;
		5076	end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
		5077	end->function = entry;
		5078
		5079	progress = GL_TRUE;
		5080	}
		5081	}
		5082	} while (progress);
		5083
		5084	#if 0
		5085	/* Print out some information (for debugging purposes) used by the
		5086	* optimization passes. */
		5087	for (i=0; i < v->next_temp; i++) {
		5088	int fr = v->get_first_temp_read(i);
		5089	int fw = v->get_first_temp_write(i);
		5090	int lr = v->get_last_temp_read(i);
		5091	int lw = v->get_last_temp_write(i);
		5092
		5093	printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
		5094	assert(fw <= fr);
		5095	}
		5096	#endif
		5097
		5098	/* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
		5099	v->simplify_cmp();
		5100	v->copy_propagate();
		5101	while (v->eliminate_dead_code_advanced());
		5102
		5103	v->eliminate_dead_code();
		5104	v->merge_registers();
		5105	v->renumber_registers();
		5106
		5107	/* Write the END instruction. */
		5108	v->emit(NULL, TGSI_OPCODE_END);
		5109
		5110	if (ctx->Shader.Flags & GLSL_DUMP) {
		5111	printf("\n");
		5112	printf("GLSL IR for linked %s program %d:\n",
		5113	_mesa_glsl_shader_target_name(shader->Type),
		5114	shader_program->Name);
		5115	_mesa_print_ir(shader->ir, NULL);
		5116	printf("\n");
		5117	printf("\n");
		5118	fflush(stdout);
		5119	}
		5120
		5121	prog->Instructions = NULL;
		5122	prog->NumInstructions = 0;
		5123
		5124	do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
		5125	count_resources(v, prog);
		5126
		5127	_mesa_reference_program(ctx, &shader->Program, prog);
		5128
		5129	/* This has to be done last. Any operation the can cause
		5130	* prog->ParameterValues to get reallocated (e.g., anything that adds a
		5131	* program constant) has to happen before creating this linkage.
		5132	*/
		5133	_mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
		5134	if (!shader_program->LinkStatus) {
		5135	return NULL;
		5136	}
		5137
		5138	struct st_vertex_program *stvp;
		5139	struct st_fragment_program *stfp;
		5140	struct st_geometry_program *stgp;
		5141
		5142	switch (shader->Type) {
		5143	case GL_VERTEX_SHADER:
		5144	stvp = (struct st_vertex_program *)prog;
		5145	stvp->glsl_to_tgsi = v;
		5146	break;
		5147	case GL_FRAGMENT_SHADER:
		5148	stfp = (struct st_fragment_program *)prog;
		5149	stfp->glsl_to_tgsi = v;
		5150	break;
		5151	case GL_GEOMETRY_SHADER:
		5152	stgp = (struct st_geometry_program *)prog;
		5153	stgp->glsl_to_tgsi = v;
		5154	break;
		5155	default:
		5156	assert(!"should not be reached");
		5157	return NULL;
		5158	}
		5159
		5160	return prog;
		5161	}
		5162
		5163	extern "C" {
		5164
		5165	struct gl_shader *
		5166	st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
		5167	{
		5168	struct gl_shader *shader;
		5169	assert(type == GL_FRAGMENT_SHADER \|\| type == GL_VERTEX_SHADER \|\|
		5170	type == GL_GEOMETRY_SHADER_ARB);
		5171	shader = rzalloc(NULL, struct gl_shader);
		5172	if (shader) {
		5173	shader->Type = type;
		5174	shader->Name = name;
		5175	_mesa_init_shader(ctx, shader);
		5176	}
		5177	return shader;
		5178	}
		5179
		5180	struct gl_shader_program *
		5181	st_new_shader_program(struct gl_context *ctx, GLuint name)
		5182	{
		5183	struct gl_shader_program *shProg;
		5184	shProg = rzalloc(NULL, struct gl_shader_program);
		5185	if (shProg) {
		5186	shProg->Name = name;
		5187	_mesa_init_shader_program(ctx, shProg);
		5188	}
		5189	return shProg;
		5190	}
		5191
		5192	/**
		5193	* Link a shader.
		5194	* Called via ctx->Driver.LinkShader()
		5195	* This actually involves converting GLSL IR into an intermediate TGSI-like IR
		5196	* with code lowering and other optimizations.
		5197	*/
		5198	GLboolean
		5199	st_link_shader(struct gl_context ctx, struct gl_shader_program prog)
		5200	{
		5201	assert(prog->LinkStatus);
		5202
		5203	for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
		5204	if (prog->_LinkedShaders[i] == NULL)
		5205	continue;
		5206
		5207	bool progress;
		5208	exec_list *ir = prog->_LinkedShaders[i]->ir;
		5209	const struct gl_shader_compiler_options *options =
		5210	&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
		5211
		5212	/* If there are forms of indirect addressing that the driver
		5213	* cannot handle, perform the lowering pass.
		5214	*/
		5215	if (options->EmitNoIndirectInput \|\| options->EmitNoIndirectOutput \|\|
		5216	options->EmitNoIndirectTemp \|\| options->EmitNoIndirectUniform) {
		5217	lower_variable_index_to_cond_assign(ir,
		5218	options->EmitNoIndirectInput,
		5219	options->EmitNoIndirectOutput,
		5220	options->EmitNoIndirectTemp,
		5221	options->EmitNoIndirectUniform);
		5222	}
		5223
		5224	if (ctx->Extensions.ARB_shading_language_packing) {
		5225	unsigned lower_inst = LOWER_PACK_SNORM_2x16 \|
		5226	LOWER_UNPACK_SNORM_2x16 \|
		5227	LOWER_PACK_UNORM_2x16 \|
		5228	LOWER_UNPACK_UNORM_2x16 \|
		5229	LOWER_PACK_SNORM_4x8 \|
		5230	LOWER_UNPACK_SNORM_4x8 \|
		5231	LOWER_UNPACK_UNORM_4x8 \|
		5232	LOWER_PACK_UNORM_4x8 \|
		5233	LOWER_PACK_HALF_2x16 \|
		5234	LOWER_UNPACK_HALF_2x16;
		5235
		5236	lower_packing_builtins(ir, lower_inst);
		5237	}
		5238
		5239	do_mat_op_to_vec(ir);
		5240	lower_instructions(ir,
		5241	MOD_TO_FRACT \|
		5242	DIV_TO_MUL_RCP \|
		5243	EXP_TO_EXP2 \|
		5244	LOG_TO_LOG2 \|
		5245	(options->EmitNoPow ? POW_TO_EXP2 : 0) \|
		5246	(!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
		5247
		5248	lower_ubo_reference(prog->_LinkedShaders[i], ir);
		5249	do_vec_index_to_cond_assign(ir);
		5250	lower_vector_insert(ir, true);
		5251	lower_quadop_vector(ir, false);
		5252	lower_noise(ir);
		5253	if (options->MaxIfDepth == 0) {
		5254	lower_discard(ir);
		5255	}
		5256
		5257	do {
		5258	progress = false;
		5259
		5260	progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) \|\| progress;
		5261
		5262	progress = do_common_optimization(ir, true, true,
		5263	options->MaxUnrollIterations, options)
		5264	\|\| progress;
		5265
		5266	progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) \|\| progress;
		5267
		5268	} while (progress);
		5269
		5270	validate_ir_tree(ir);
		5271	}
		5272
		5273	for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
		5274	struct gl_program *linked_prog;
		5275
		5276	if (prog->_LinkedShaders[i] == NULL)
		5277	continue;
		5278
		5279	linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
		5280
		5281	if (linked_prog) {
		5282	_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
		5283	linked_prog);
		5284	if (!ctx->Driver.ProgramStringNotify(ctx,
		5285	_mesa_program_index_to_target(i),
		5286	linked_prog)) {
		5287	_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
		5288	NULL);
		5289	_mesa_reference_program(ctx, &linked_prog, NULL);
		5290	return GL_FALSE;
		5291	}
		5292	}
		5293
		5294	_mesa_reference_program(ctx, &linked_prog, NULL);
		5295	}
		5296
		5297	return GL_TRUE;
		5298	}
		5299
		5300	void
		5301	st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
		5302	const GLuint outputMapping[],
		5303	struct pipe_stream_output_info *so)
		5304	{
		5305	unsigned i;
		5306	struct gl_transform_feedback_info *info =
		5307	&glsl_to_tgsi->shader_program->LinkedTransformFeedback;
		5308
		5309	for (i = 0; i < info->NumOutputs; i++) {
		5310	so->output[i].register_index =
		5311	outputMapping[info->Outputs[i].OutputRegister];
		5312	so->output[i].start_component = info->Outputs[i].ComponentOffset;
		5313	so->output[i].num_components = info->Outputs[i].NumComponents;
		5314	so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
		5315	so->output[i].dst_offset = info->Outputs[i].DstOffset;
		5316	}
		5317
		5318	for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
		5319	so->stride[i] = info->BufferStride[i];
		5320	}
		5321	so->num_outputs = info->NumOutputs;
		5322	}
		5323
		5324	} /* extern "C" */

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/state_tracker/st_glsl_to_tgsi.cpp – Rev 5063