WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/program/ir_to_mesa.cpp

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	* Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
		3	* Copyright (C) 2008 VMware, Inc. All Rights Reserved.
		4	* Copyright © 2010 Intel Corporation
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the "Software"),
		8	* to deal in the Software without restriction, including without limitation
		9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		10	* and/or sell copies of the Software, and to permit persons to whom the
		11	* Software is furnished to do so, subject to the following conditions:
		12	*
		13	* The above copyright notice and this permission notice (including the next
		14	* paragraph) shall be included in all copies or substantial portions of the
		15	* Software.
		16	*
		17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		18	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		20	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		22	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		23	* DEALINGS IN THE SOFTWARE.
		24	*/
		25
		26	/**
		27	* \file ir_to_mesa.cpp
		28	*
		29	* Translate GLSL IR to Mesa's gl_program representation.
		30	*/
		31
		32	#include
		33	#include "main/compiler.h"
		34	#include "ir.h"
		35	#include "ir_visitor.h"
		36	#include "ir_expression_flattening.h"
		37	#include "ir_uniform.h"
		38	#include "glsl_types.h"
		39	#include "glsl_parser_extras.h"
		40	#include "../glsl/program.h"
		41	#include "ir_optimization.h"
		42	#include "ast.h"
		43	#include "linker.h"
		44
		45	#include "main/mtypes.h"
		46	#include "main/shaderobj.h"
		47	#include "program/hash_table.h"
		48
		49	extern "C" {
		50	#include "main/shaderapi.h"
		51	#include "main/uniforms.h"
		52	#include "program/prog_instruction.h"
		53	#include "program/prog_optimize.h"
		54	#include "program/prog_print.h"
		55	#include "program/program.h"
		56	#include "program/prog_parameter.h"
		57	#include "program/sampler.h"
		58	}
		59
		60	class src_reg;
		61	class dst_reg;
		62
		63	static int swizzle_for_size(int size);
		64
		65	/**
		66	* This struct is a corresponding struct to Mesa prog_src_register, with
		67	* wider fields.
		68	*/
		69	class src_reg {
		70	public:
		71	src_reg(gl_register_file file, int index, const glsl_type *type)
		72	{
		73	this->file = file;
		74	this->index = index;
		75	if (type && (type->is_scalar() \|\| type->is_vector() \|\| type->is_matrix()))
		76	this->swizzle = swizzle_for_size(type->vector_elements);
		77	else
		78	this->swizzle = SWIZZLE_XYZW;
		79	this->negate = 0;
		80	this->reladdr = NULL;
		81	}
		82
		83	src_reg()
		84	{
		85	this->file = PROGRAM_UNDEFINED;
		86	this->index = 0;
		87	this->swizzle = 0;
		88	this->negate = 0;
		89	this->reladdr = NULL;
		90	}
		91
		92	explicit src_reg(dst_reg reg);
		93
		94	gl_register_file file; /*< PROGRAM_ from Mesa */
		95	int index; /*< temporary index, VERT_ATTRIB_, VARYING_SLOT_, etc. /
		96	GLuint swizzle; /*< SWIZZLE_XYZWONEZERO swizzles from Mesa. /
		97	int negate; /*< NEGATE_XYZW mask from mesa /
		98	/** Register index should be offset by the integer in this reg. */
		99	src_reg *reladdr;
		100	};
		101
		102	class dst_reg {
		103	public:
		104	dst_reg(gl_register_file file, int writemask)
		105	{
		106	this->file = file;
		107	this->index = 0;
		108	this->writemask = writemask;
		109	this->cond_mask = COND_TR;
		110	this->reladdr = NULL;
		111	}
		112
		113	dst_reg()
		114	{
		115	this->file = PROGRAM_UNDEFINED;
		116	this->index = 0;
		117	this->writemask = 0;
		118	this->cond_mask = COND_TR;
		119	this->reladdr = NULL;
		120	}
		121
		122	explicit dst_reg(src_reg reg);
		123
		124	gl_register_file file; /*< PROGRAM_ from Mesa */
		125	int index; /*< temporary index, VERT_ATTRIB_, VARYING_SLOT_, etc. /
		126	int writemask; /*< Bitfield of WRITEMASK_[XYZW] /
		127	GLuint cond_mask:4;
		128	/** Register index should be offset by the integer in this reg. */
		129	src_reg *reladdr;
		130	};
		131
		132	src_reg::src_reg(dst_reg reg)
		133	{
		134	this->file = reg.file;
		135	this->index = reg.index;
		136	this->swizzle = SWIZZLE_XYZW;
		137	this->negate = 0;
		138	this->reladdr = reg.reladdr;
		139	}
		140
		141	dst_reg::dst_reg(src_reg reg)
		142	{
		143	this->file = reg.file;
		144	this->index = reg.index;
		145	this->writemask = WRITEMASK_XYZW;
		146	this->cond_mask = COND_TR;
		147	this->reladdr = reg.reladdr;
		148	}
		149
		150	class ir_to_mesa_instruction : public exec_node {
		151	public:
		152	/* Callers of this ralloc-based new need not call delete. It's
		153	* easier to just ralloc_free 'ctx' (or any of its ancestors). */
		154	static void* operator new(size_t size, void *ctx)
		155	{
		156	void *node;
		157
		158	node = rzalloc_size(ctx, size);
		159	assert(node != NULL);
		160
		161	return node;
		162	}
		163
		164	enum prog_opcode op;
		165	dst_reg dst;
		166	src_reg src[3];
		167	/** Pointer to the ir source this tree came from for debugging */
		168	ir_instruction *ir;
		169	GLboolean cond_update;
		170	bool saturate;
		171	int sampler; /*< sampler index /
		172	int tex_target; /*< One of TEXTURE__INDEX */
		173	GLboolean tex_shadow;
		174	};
		175
		176	class variable_storage : public exec_node {
		177	public:
		178	variable_storage(ir_variable *var, gl_register_file file, int index)
		179	: file(file), index(index), var(var)
		180	{
		181	/* empty */
		182	}
		183
		184	gl_register_file file;
		185	int index;
		186	ir_variable var; / variable that maps to this, if any */
		187	};
		188
		189	class function_entry : public exec_node {
		190	public:
		191	ir_function_signature *sig;
		192
		193	/**
		194	* identifier of this function signature used by the program.
		195	*
		196	* At the point that Mesa instructions for function calls are
		197	* generated, we don't know the address of the first instruction of
		198	* the function body. So we make the BranchTarget that is called a
		199	* small integer and rewrite them during set_branchtargets().
		200	*/
		201	int sig_id;
		202
		203	/**
		204	* Pointer to first instruction of the function body.
		205	*
		206	* Set during function body emits after main() is processed.
		207	*/
		208	ir_to_mesa_instruction *bgn_inst;
		209
		210	/**
		211	* Index of the first instruction of the function body in actual
		212	* Mesa IR.
		213	*
		214	* Set after convertion from ir_to_mesa_instruction to prog_instruction.
		215	*/
		216	int inst;
		217
		218	/** Storage for the return value. */
		219	src_reg return_reg;
		220	};
		221
		222	class ir_to_mesa_visitor : public ir_visitor {
		223	public:
		224	ir_to_mesa_visitor();
		225	~ir_to_mesa_visitor();
		226
		227	function_entry *current_function;
		228
		229	struct gl_context *ctx;
		230	struct gl_program *prog;
		231	struct gl_shader_program *shader_program;
		232	struct gl_shader_compiler_options *options;
		233
		234	int next_temp;
		235
		236	variable_storage find_variable_storage(ir_variable var);
		237
		238	src_reg get_temp(const glsl_type *type);
		239	void reladdr_to_temp(ir_instruction ir, src_reg reg, int *num_reladdr);
		240
		241	src_reg src_reg_for_float(float val);
		242
		243	/**
		244	* \name Visit methods
		245	*
		246	* As typical for the visitor pattern, there must be one \c visit method for
		247	* each concrete subclass of \c ir_instruction. Virtual base classes within
		248	* the hierarchy should not have \c visit methods.
		249	*/
		250	/@{/
		251	virtual void visit(ir_variable *);
		252	virtual void visit(ir_loop *);
		253	virtual void visit(ir_loop_jump *);
		254	virtual void visit(ir_function_signature *);
		255	virtual void visit(ir_function *);
		256	virtual void visit(ir_expression *);
		257	virtual void visit(ir_swizzle *);
		258	virtual void visit(ir_dereference_variable *);
		259	virtual void visit(ir_dereference_array *);
		260	virtual void visit(ir_dereference_record *);
		261	virtual void visit(ir_assignment *);
		262	virtual void visit(ir_constant *);
		263	virtual void visit(ir_call *);
		264	virtual void visit(ir_return *);
		265	virtual void visit(ir_discard *);
		266	virtual void visit(ir_texture *);
		267	virtual void visit(ir_if *);
		268	/@}/
		269
		270	src_reg result;
		271
		272	/** List of variable_storage */
		273	exec_list variables;
		274
		275	/** List of function_entry */
		276	exec_list function_signatures;
		277	int next_signature_id;
		278
		279	/** List of ir_to_mesa_instruction */
		280	exec_list instructions;
		281
		282	ir_to_mesa_instruction emit(ir_instruction ir, enum prog_opcode op);
		283
		284	ir_to_mesa_instruction emit(ir_instruction ir, enum prog_opcode op,
		285	dst_reg dst, src_reg src0);
		286
		287	ir_to_mesa_instruction emit(ir_instruction ir, enum prog_opcode op,
		288	dst_reg dst, src_reg src0, src_reg src1);
		289
		290	ir_to_mesa_instruction emit(ir_instruction ir, enum prog_opcode op,
		291	dst_reg dst,
		292	src_reg src0, src_reg src1, src_reg src2);
		293
		294	/**
		295	* Emit the correct dot-product instruction for the type of arguments
		296	*/
		297	ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
		298	dst_reg dst,
		299	src_reg src0,
		300	src_reg src1,
		301	unsigned elements);
		302
		303	void emit_scalar(ir_instruction *ir, enum prog_opcode op,
		304	dst_reg dst, src_reg src0);
		305
		306	void emit_scalar(ir_instruction *ir, enum prog_opcode op,
		307	dst_reg dst, src_reg src0, src_reg src1);
		308
		309	void emit_scs(ir_instruction *ir, enum prog_opcode op,
		310	dst_reg dst, const src_reg &src);
		311
		312	bool try_emit_mad(ir_expression *ir,
		313	int mul_operand);
		314	bool try_emit_mad_for_and_not(ir_expression *ir,
		315	int mul_operand);
		316	bool try_emit_sat(ir_expression *ir);
		317
		318	void emit_swz(ir_expression *ir);
		319
		320	bool process_move_condition(ir_rvalue *ir);
		321
		322	void copy_propagate(void);
		323
		324	void *mem_ctx;
		325	};
		326
		327	static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
		328
		329	static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
		330
		331	static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
		332
		333	static int
		334	swizzle_for_size(int size)
		335	{
		336	static const int size_swizzles[4] = {
		337	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
		338	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
		339	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
		340	MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
		341	};
		342
		343	assert((size >= 1) && (size <= 4));
		344	return size_swizzles[size - 1];
		345	}
		346
		347	ir_to_mesa_instruction *
		348	ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
		349	dst_reg dst,
		350	src_reg src0, src_reg src1, src_reg src2)
		351	{
		352	ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
		353	int num_reladdr = 0;
		354
		355	/* If we have to do relative addressing, we want to load the ARL
		356	* reg directly for one of the regs, and preload the other reladdr
		357	* sources into temps.
		358	*/
		359	num_reladdr += dst.reladdr != NULL;
		360	num_reladdr += src0.reladdr != NULL;
		361	num_reladdr += src1.reladdr != NULL;
		362	num_reladdr += src2.reladdr != NULL;
		363
		364	reladdr_to_temp(ir, &src2, &num_reladdr);
		365	reladdr_to_temp(ir, &src1, &num_reladdr);
		366	reladdr_to_temp(ir, &src0, &num_reladdr);
		367
		368	if (dst.reladdr) {
		369	emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
		370	num_reladdr--;
		371	}
		372	assert(num_reladdr == 0);
		373
		374	inst->op = op;
		375	inst->dst = dst;
		376	inst->src[0] = src0;
		377	inst->src[1] = src1;
		378	inst->src[2] = src2;
		379	inst->ir = ir;
		380
		381	this->instructions.push_tail(inst);
		382
		383	return inst;
		384	}
		385
		386
		387	ir_to_mesa_instruction *
		388	ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
		389	dst_reg dst, src_reg src0, src_reg src1)
		390	{
		391	return emit(ir, op, dst, src0, src1, undef_src);
		392	}
		393
		394	ir_to_mesa_instruction *
		395	ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
		396	dst_reg dst, src_reg src0)
		397	{
		398	assert(dst.writemask != 0);
		399	return emit(ir, op, dst, src0, undef_src, undef_src);
		400	}
		401
		402	ir_to_mesa_instruction *
		403	ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
		404	{
		405	return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
		406	}
		407
		408	ir_to_mesa_instruction *
		409	ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
		410	dst_reg dst, src_reg src0, src_reg src1,
		411	unsigned elements)
		412	{
		413	static const gl_inst_opcode dot_opcodes[] = {
		414	OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
		415	};
		416
		417	return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
		418	}
		419
		420	/**
		421	* Emits Mesa scalar opcodes to produce unique answers across channels.
		422	*
		423	* Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
		424	* channel determines the result across all channels. So to do a vec4
		425	* of this operation, we want to emit a scalar per source channel used
		426	* to produce dest channels.
		427	*/
		428	void
		429	ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
		430	dst_reg dst,
		431	src_reg orig_src0, src_reg orig_src1)
		432	{
		433	int i, j;
		434	int done_mask = ~dst.writemask;
		435
		436	/* Mesa RCP is a scalar operation splatting results to all channels,
		437	* like ARB_fp/vp. So emit as many RCPs as necessary to cover our
		438	* dst channels.
		439	*/
		440	for (i = 0; i < 4; i++) {
		441	GLuint this_mask = (1 << i);
		442	ir_to_mesa_instruction *inst;
		443	src_reg src0 = orig_src0;
		444	src_reg src1 = orig_src1;
		445
		446	if (done_mask & this_mask)
		447	continue;
		448
		449	GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
		450	GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
		451	for (j = i + 1; j < 4; j++) {
		452	/* If there is another enabled component in the destination that is
		453	* derived from the same inputs, generate its value on this pass as
		454	* well.
		455	*/
		456	if (!(done_mask & (1 << j)) &&
		457	GET_SWZ(src0.swizzle, j) == src0_swiz &&
		458	GET_SWZ(src1.swizzle, j) == src1_swiz) {
		459	this_mask \|= (1 << j);
		460	}
		461	}
		462	src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
		463	src0_swiz, src0_swiz);
		464	src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
		465	src1_swiz, src1_swiz);
		466
		467	inst = emit(ir, op, dst, src0, src1);
		468	inst->dst.writemask = this_mask;
		469	done_mask \|= this_mask;
		470	}
		471	}
		472
		473	void
		474	ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
		475	dst_reg dst, src_reg src0)
		476	{
		477	src_reg undef = undef_src;
		478
		479	undef.swizzle = SWIZZLE_XXXX;
		480
		481	emit_scalar(ir, op, dst, src0, undef);
		482	}
		483
		484	/**
		485	* Emit an OPCODE_SCS instruction
		486	*
		487	* The \c SCS opcode functions a bit differently than the other Mesa (or
		488	* ARB_fragment_program) opcodes. Instead of splatting its result across all
		489	* four components of the destination, it writes one value to the \c x
		490	* component and another value to the \c y component.
		491	*
		492	* \param ir IR instruction being processed
		493	* \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which
		494	* value is desired.
		495	* \param dst Destination register
		496	* \param src Source register
		497	*/
		498	void
		499	ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
		500	dst_reg dst,
		501	const src_reg &src)
		502	{
		503	/* Vertex programs cannot use the SCS opcode.
		504	*/
		505	if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
		506	emit_scalar(ir, op, dst, src);
		507	return;
		508	}
		509
		510	const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
		511	const unsigned scs_mask = (1U << component);
		512	int done_mask = ~dst.writemask;
		513	src_reg tmp;
		514
		515	assert(op == OPCODE_SIN \|\| op == OPCODE_COS);
		516
		517	/* If there are compnents in the destination that differ from the component
		518	* that will be written by the SCS instrution, we'll need a temporary.
		519	*/
		520	if (scs_mask != unsigned(dst.writemask)) {
		521	tmp = get_temp(glsl_type::vec4_type);
		522	}
		523
		524	for (unsigned i = 0; i < 4; i++) {
		525	unsigned this_mask = (1U << i);
		526	src_reg src0 = src;
		527
		528	if ((done_mask & this_mask) != 0)
		529	continue;
		530
		531	/* The source swizzle specified which component of the source generates
		532	* sine / cosine for the current component in the destination. The SCS
		533	* instruction requires that this value be swizzle to the X component.
		534	* Replace the current swizzle with a swizzle that puts the source in
		535	* the X component.
		536	*/
		537	unsigned src0_swiz = GET_SWZ(src.swizzle, i);
		538
		539	src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
		540	src0_swiz, src0_swiz);
		541	for (unsigned j = i + 1; j < 4; j++) {
		542	/* If there is another enabled component in the destination that is
		543	* derived from the same inputs, generate its value on this pass as
		544	* well.
		545	*/
		546	if (!(done_mask & (1 << j)) &&
		547	GET_SWZ(src0.swizzle, j) == src0_swiz) {
		548	this_mask \|= (1 << j);
		549	}
		550	}
		551
		552	if (this_mask != scs_mask) {
		553	ir_to_mesa_instruction *inst;
		554	dst_reg tmp_dst = dst_reg(tmp);
		555
		556	/* Emit the SCS instruction.
		557	*/
		558	inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
		559	inst->dst.writemask = scs_mask;
		560
		561	/* Move the result of the SCS instruction to the desired location in
		562	* the destination.
		563	*/
		564	tmp.swizzle = MAKE_SWIZZLE4(component, component,
		565	component, component);
		566	inst = emit(ir, OPCODE_SCS, dst, tmp);
		567	inst->dst.writemask = this_mask;
		568	} else {
		569	/* Emit the SCS instruction to write directly to the destination.
		570	*/
		571	ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
		572	inst->dst.writemask = scs_mask;
		573	}
		574
		575	done_mask \|= this_mask;
		576	}
		577	}
		578
		579	src_reg
		580	ir_to_mesa_visitor::src_reg_for_float(float val)
		581	{
		582	src_reg src(PROGRAM_CONSTANT, -1, NULL);
		583
		584	src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
		585	(const gl_constant_value *)&val, 1, &src.swizzle);
		586
		587	return src;
		588	}
		589
		590	static int
		591	type_size(const struct glsl_type *type)
		592	{
		593	unsigned int i;
		594	int size;
		595
		596	switch (type->base_type) {
		597	case GLSL_TYPE_UINT:
		598	case GLSL_TYPE_INT:
		599	case GLSL_TYPE_FLOAT:
		600	case GLSL_TYPE_BOOL:
		601	if (type->is_matrix()) {
		602	return type->matrix_columns;
		603	} else {
		604	/* Regardless of size of vector, it gets a vec4. This is bad
		605	* packing for things like floats, but otherwise arrays become a
		606	* mess. Hopefully a later pass over the code can pack scalars
		607	* down if appropriate.
		608	*/
		609	return 1;
		610	}
		611	case GLSL_TYPE_ARRAY:
		612	assert(type->length > 0);
		613	return type_size(type->fields.array) * type->length;
		614	case GLSL_TYPE_STRUCT:
		615	size = 0;
		616	for (i = 0; i < type->length; i++) {
		617	size += type_size(type->fields.structure[i].type);
		618	}
		619	return size;
		620	case GLSL_TYPE_SAMPLER:
		621	/* Samplers take up one slot in UNIFORMS[], but they're baked in
		622	* at link time.
		623	*/
		624	return 1;
		625	case GLSL_TYPE_VOID:
		626	case GLSL_TYPE_ERROR:
		627	case GLSL_TYPE_INTERFACE:
		628	assert(!"Invalid type in type_size");
		629	break;
		630	}
		631
		632	return 0;
		633	}
		634
		635	/**
		636	* In the initial pass of codegen, we assign temporary numbers to
		637	* intermediate results. (not SSA -- variable assignments will reuse
		638	* storage). Actual register allocation for the Mesa VM occurs in a
		639	* pass over the Mesa IR later.
		640	*/
		641	src_reg
		642	ir_to_mesa_visitor::get_temp(const glsl_type *type)
		643	{
		644	src_reg src;
		645
		646	src.file = PROGRAM_TEMPORARY;
		647	src.index = next_temp;
		648	src.reladdr = NULL;
		649	next_temp += type_size(type);
		650
		651	if (type->is_array() \|\| type->is_record()) {
		652	src.swizzle = SWIZZLE_NOOP;
		653	} else {
		654	src.swizzle = swizzle_for_size(type->vector_elements);
		655	}
		656	src.negate = 0;
		657
		658	return src;
		659	}
		660
		661	variable_storage *
		662	ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
		663	{
		664
		665	variable_storage *entry;
		666
		667	foreach_iter(exec_list_iterator, iter, this->variables) {
		668	entry = (variable_storage *)iter.get();
		669
		670	if (entry->var == var)
		671	return entry;
		672	}
		673
		674	return NULL;
		675	}
		676
		677	void
		678	ir_to_mesa_visitor::visit(ir_variable *ir)
		679	{
		680	if (strcmp(ir->name, "gl_FragCoord") == 0) {
		681	struct gl_fragment_program fp = (struct gl_fragment_program )this->prog;
		682
		683	fp->OriginUpperLeft = ir->origin_upper_left;
		684	fp->PixelCenterInteger = ir->pixel_center_integer;
		685	}
		686
		687	if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
		688	unsigned int i;
		689	const ir_state_slot *const slots = ir->state_slots;
		690	assert(ir->state_slots != NULL);
		691
		692	/* Check if this statevar's setup in the STATE file exactly
		693	* matches how we'll want to reference it as a
		694	* struct/array/whatever. If not, then we need to move it into
		695	* temporary storage and hope that it'll get copy-propagated
		696	* out.
		697	*/
		698	for (i = 0; i < ir->num_state_slots; i++) {
		699	if (slots[i].swizzle != SWIZZLE_XYZW) {
		700	break;
		701	}
		702	}
		703
		704	variable_storage *storage;
		705	dst_reg dst;
		706	if (i == ir->num_state_slots) {
		707	/* We'll set the index later. */
		708	storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
		709	this->variables.push_tail(storage);
		710
		711	dst = undef_dst;
		712	} else {
		713	/* The variable_storage constructor allocates slots based on the size
		714	* of the type. However, this had better match the number of state
		715	* elements that we're going to copy into the new temporary.
		716	*/
		717	assert((int) ir->num_state_slots == type_size(ir->type));
		718
		719	storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
		720	this->next_temp);
		721	this->variables.push_tail(storage);
		722	this->next_temp += type_size(ir->type);
		723
		724	dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
		725	}
		726
		727
		728	for (unsigned int i = 0; i < ir->num_state_slots; i++) {
		729	int index = _mesa_add_state_reference(this->prog->Parameters,
		730	(gl_state_index *)slots[i].tokens);
		731
		732	if (storage->file == PROGRAM_STATE_VAR) {
		733	if (storage->index == -1) {
		734	storage->index = index;
		735	} else {
		736	assert(index == storage->index + (int)i);
		737	}
		738	} else {
		739	src_reg src(PROGRAM_STATE_VAR, index, NULL);
		740	src.swizzle = slots[i].swizzle;
		741	emit(ir, OPCODE_MOV, dst, src);
		742	/* even a float takes up a whole vec4 reg in a struct/array. */
		743	dst.index++;
		744	}
		745	}
		746
		747	if (storage->file == PROGRAM_TEMPORARY &&
		748	dst.index != storage->index + (int) ir->num_state_slots) {
		749	linker_error(this->shader_program,
		750	"failed to load builtin uniform `%s' "
		751	"(%d/%d regs loaded)\n",
		752	ir->name, dst.index - storage->index,
		753	type_size(ir->type));
		754	}
		755	}
		756	}
		757
		758	void
		759	ir_to_mesa_visitor::visit(ir_loop *ir)
		760	{
		761	ir_dereference_variable *counter = NULL;
		762
		763	if (ir->counter != NULL)
		764	counter = new(mem_ctx) ir_dereference_variable(ir->counter);
		765
		766	if (ir->from != NULL) {
		767	assert(ir->counter != NULL);
		768
		769	ir_assignment *a =
		770	new(mem_ctx) ir_assignment(counter, ir->from, NULL);
		771
		772	a->accept(this);
		773	}
		774
		775	emit(NULL, OPCODE_BGNLOOP);
		776
		777	if (ir->to) {
		778	ir_expression *e =
		779	new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type,
		780	counter, ir->to);
		781	ir_if *if_stmt = new(mem_ctx) ir_if(e);
		782
		783	ir_loop_jump *brk =
		784	new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
		785
		786	if_stmt->then_instructions.push_tail(brk);
		787
		788	if_stmt->accept(this);
		789	}
		790
		791	visit_exec_list(&ir->body_instructions, this);
		792
		793	if (ir->increment) {
		794	ir_expression *e =
		795	new(mem_ctx) ir_expression(ir_binop_add, counter->type,
		796	counter, ir->increment);
		797
		798	ir_assignment *a =
		799	new(mem_ctx) ir_assignment(counter, e, NULL);
		800
		801	a->accept(this);
		802	}
		803
		804	emit(NULL, OPCODE_ENDLOOP);
		805	}
		806
		807	void
		808	ir_to_mesa_visitor::visit(ir_loop_jump *ir)
		809	{
		810	switch (ir->mode) {
		811	case ir_loop_jump::jump_break:
		812	emit(NULL, OPCODE_BRK);
		813	break;
		814	case ir_loop_jump::jump_continue:
		815	emit(NULL, OPCODE_CONT);
		816	break;
		817	}
		818	}
		819
		820
		821	void
		822	ir_to_mesa_visitor::visit(ir_function_signature *ir)
		823	{
		824	assert(0);
		825	(void)ir;
		826	}
		827
		828	void
		829	ir_to_mesa_visitor::visit(ir_function *ir)
		830	{
		831	/* Ignore function bodies other than main() -- we shouldn't see calls to
		832	* them since they should all be inlined before we get to ir_to_mesa.
		833	*/
		834	if (strcmp(ir->name, "main") == 0) {
		835	const ir_function_signature *sig;
		836	exec_list empty;
		837
		838	sig = ir->matching_signature(&empty);
		839
		840	assert(sig);
		841
		842	foreach_iter(exec_list_iterator, iter, sig->body) {
		843	ir_instruction ir = (ir_instruction )iter.get();
		844
		845	ir->accept(this);
		846	}
		847	}
		848	}
		849
		850	bool
		851	ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
		852	{
		853	int nonmul_operand = 1 - mul_operand;
		854	src_reg a, b, c;
		855
		856	ir_expression *expr = ir->operands[mul_operand]->as_expression();
		857	if (!expr \|\| expr->operation != ir_binop_mul)
		858	return false;
		859
		860	expr->operands[0]->accept(this);
		861	a = this->result;
		862	expr->operands[1]->accept(this);
		863	b = this->result;
		864	ir->operands[nonmul_operand]->accept(this);
		865	c = this->result;
		866
		867	this->result = get_temp(ir->type);
		868	emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
		869
		870	return true;
		871	}
		872
		873	/**
		874	* Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
		875	*
		876	* The logic values are 1.0 for true and 0.0 for false. Logical-and is
		877	* implemented using multiplication, and logical-or is implemented using
		878	* addition. Logical-not can be implemented as (true - x), or (1.0 - x).
		879	* As result, the logical expression (a & !b) can be rewritten as:
		880	*
		881	* - a * !b
		882	* - a * (1 - b)
		883	* - (a * 1) - (a * b)
		884	* - a + -(a * b)
		885	* - a + (a * -b)
		886	*
		887	* This final expression can be implemented as a single MAD(a, -b, a)
		888	* instruction.
		889	*/
		890	bool
		891	ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
		892	{
		893	const int other_operand = 1 - try_operand;
		894	src_reg a, b;
		895
		896	ir_expression *expr = ir->operands[try_operand]->as_expression();
		897	if (!expr \|\| expr->operation != ir_unop_logic_not)
		898	return false;
		899
		900	ir->operands[other_operand]->accept(this);
		901	a = this->result;
		902	expr->operands[0]->accept(this);
		903	b = this->result;
		904
		905	b.negate = ~b.negate;
		906
		907	this->result = get_temp(ir->type);
		908	emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
		909
		910	return true;
		911	}
		912
		913	bool
		914	ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
		915	{
		916	/* Saturates were only introduced to vertex programs in
		917	* NV_vertex_program3, so don't give them to drivers in the VP.
		918	*/
		919	if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
		920	return false;
		921
		922	ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
		923	if (!sat_src)
		924	return false;
		925
		926	sat_src->accept(this);
		927	src_reg src = this->result;
		928
		929	/* If we generated an expression instruction into a temporary in
		930	* processing the saturate's operand, apply the saturate to that
		931	* instruction. Otherwise, generate a MOV to do the saturate.
		932	*
		933	* Note that we have to be careful to only do this optimization if
		934	* the instruction in question was what generated src->result. For
		935	* example, ir_dereference_array might generate a MUL instruction
		936	* to create the reladdr, and return us a src reg using that
		937	* reladdr. That MUL result is not the value we're trying to
		938	* saturate.
		939	*/
		940	ir_expression *sat_src_expr = sat_src->as_expression();
		941	ir_to_mesa_instruction *new_inst;
		942	new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
		943	if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul \|\|
		944	sat_src_expr->operation == ir_binop_add \|\|
		945	sat_src_expr->operation == ir_binop_dot)) {
		946	new_inst->saturate = true;
		947	} else {
		948	this->result = get_temp(ir->type);
		949	ir_to_mesa_instruction *inst;
		950	inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
		951	inst->saturate = true;
		952	}
		953
		954	return true;
		955	}
		956
		957	void
		958	ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
		959	src_reg reg, int num_reladdr)
		960	{
		961	if (!reg->reladdr)
		962	return;
		963
		964	emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
		965
		966	if (*num_reladdr != 1) {
		967	src_reg temp = get_temp(glsl_type::vec4_type);
		968
		969	emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
		970	*reg = temp;
		971	}
		972
		973	(*num_reladdr)--;
		974	}
		975
		976	void
		977	ir_to_mesa_visitor::emit_swz(ir_expression *ir)
		978	{
		979	/* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
		980	* This means that each of the operands is either an immediate value of -1,
		981	* 0, or 1, or is a component from one source register (possibly with
		982	* negation).
		983	*/
		984	uint8_t components[4] = { 0 };
		985	bool negate[4] = { false };
		986	ir_variable *var = NULL;
		987
		988	for (unsigned i = 0; i < ir->type->vector_elements; i++) {
		989	ir_rvalue *op = ir->operands[i];
		990
		991	assert(op->type->is_scalar());
		992
		993	while (op != NULL) {
		994	switch (op->ir_type) {
		995	case ir_type_constant: {
		996
		997	assert(op->type->is_scalar());
		998
		999	const ir_constant *const c = op->as_constant();
		1000	if (c->is_one()) {
		1001	components[i] = SWIZZLE_ONE;
		1002	} else if (c->is_zero()) {
		1003	components[i] = SWIZZLE_ZERO;
		1004	} else if (c->is_negative_one()) {
		1005	components[i] = SWIZZLE_ONE;
		1006	negate[i] = true;
		1007	} else {
		1008	assert(!"SWZ constant must be 0.0 or 1.0.");
		1009	}
		1010
		1011	op = NULL;
		1012	break;
		1013	}
		1014
		1015	case ir_type_dereference_variable: {
		1016	ir_dereference_variable *const deref =
		1017	(ir_dereference_variable *) op;
		1018
		1019	assert((var == NULL) \|\| (deref->var == var));
		1020	components[i] = SWIZZLE_X;
		1021	var = deref->var;
		1022	op = NULL;
		1023	break;
		1024	}
		1025
		1026	case ir_type_expression: {
		1027	ir_expression const expr = (ir_expression ) op;
		1028
		1029	assert(expr->operation == ir_unop_neg);
		1030	negate[i] = true;
		1031
		1032	op = expr->operands[0];
		1033	break;
		1034	}
		1035
		1036	case ir_type_swizzle: {
		1037	ir_swizzle const swiz = (ir_swizzle ) op;
		1038
		1039	components[i] = swiz->mask.x;
		1040	op = swiz->val;
		1041	break;
		1042	}
		1043
		1044	default:
		1045	assert(!"Should not get here.");
		1046	return;
		1047	}
		1048	}
		1049	}
		1050
		1051	assert(var != NULL);
		1052
		1053	ir_dereference_variable *const deref =
		1054	new(mem_ctx) ir_dereference_variable(var);
		1055
		1056	this->result.file = PROGRAM_UNDEFINED;
		1057	deref->accept(this);
		1058	if (this->result.file == PROGRAM_UNDEFINED) {
		1059	printf("Failed to get tree for expression operand:\n");
		1060	deref->print();
		1061	printf("\n");
		1062	exit(1);
		1063	}
		1064
		1065	src_reg src;
		1066
		1067	src = this->result;
		1068	src.swizzle = MAKE_SWIZZLE4(components[0],
		1069	components[1],
		1070	components[2],
		1071	components[3]);
		1072	src.negate = ((unsigned(negate[0]) << 0)
		1073	\| (unsigned(negate[1]) << 1)
		1074	\| (unsigned(negate[2]) << 2)
		1075	\| (unsigned(negate[3]) << 3));
		1076
		1077	/* Storage for our result. Ideally for an assignment we'd be using the
		1078	* actual storage for the result here, instead.
		1079	*/
		1080	const src_reg result_src = get_temp(ir->type);
		1081	dst_reg result_dst = dst_reg(result_src);
		1082
		1083	/* Limit writes to the channels that will be used by result_src later.
		1084	* This does limit this temp's use as a temporary for multi-instruction
		1085	* sequences.
		1086	*/
		1087	result_dst.writemask = (1 << ir->type->vector_elements) - 1;
		1088
		1089	emit(ir, OPCODE_SWZ, result_dst, src);
		1090	this->result = result_src;
		1091	}
		1092
		1093	void
		1094	ir_to_mesa_visitor::visit(ir_expression *ir)
		1095	{
		1096	unsigned int operand;
		1097	src_reg op[Elements(ir->operands)];
		1098	src_reg result_src;
		1099	dst_reg result_dst;
		1100
		1101	/* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
		1102	*/
		1103	if (ir->operation == ir_binop_add) {
		1104	if (try_emit_mad(ir, 1))
		1105	return;
		1106	if (try_emit_mad(ir, 0))
		1107	return;
		1108	}
		1109
		1110	/* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
		1111	*/
		1112	if (ir->operation == ir_binop_logic_and) {
		1113	if (try_emit_mad_for_and_not(ir, 1))
		1114	return;
		1115	if (try_emit_mad_for_and_not(ir, 0))
		1116	return;
		1117	}
		1118
		1119	if (try_emit_sat(ir))
		1120	return;
		1121
		1122	if (ir->operation == ir_quadop_vector) {
		1123	this->emit_swz(ir);
		1124	return;
		1125	}
		1126
		1127	for (operand = 0; operand < ir->get_num_operands(); operand++) {
		1128	this->result.file = PROGRAM_UNDEFINED;
		1129	ir->operands[operand]->accept(this);
		1130	if (this->result.file == PROGRAM_UNDEFINED) {
		1131	printf("Failed to get tree for expression operand:\n");
		1132	ir->operands[operand]->print();
		1133	printf("\n");
		1134	exit(1);
		1135	}
		1136	op[operand] = this->result;
		1137
		1138	/* Matrix expression operands should have been broken down to vector
		1139	* operations already.
		1140	*/
		1141	assert(!ir->operands[operand]->type->is_matrix());
		1142	}
		1143
		1144	int vector_elements = ir->operands[0]->type->vector_elements;
		1145	if (ir->operands[1]) {
		1146	vector_elements = MAX2(vector_elements,
		1147	ir->operands[1]->type->vector_elements);
		1148	}
		1149
		1150	this->result.file = PROGRAM_UNDEFINED;
		1151
		1152	/* Storage for our result. Ideally for an assignment we'd be using
		1153	* the actual storage for the result here, instead.
		1154	*/
		1155	result_src = get_temp(ir->type);
		1156	/* convenience for the emit functions below. */
		1157	result_dst = dst_reg(result_src);
		1158	/* Limit writes to the channels that will be used by result_src later.
		1159	* This does limit this temp's use as a temporary for multi-instruction
		1160	* sequences.
		1161	*/
		1162	result_dst.writemask = (1 << ir->type->vector_elements) - 1;
		1163
		1164	switch (ir->operation) {
		1165	case ir_unop_logic_not:
		1166	/* Previously 'SEQ dst, src, 0.0' was used for this. However, many
		1167	* older GPUs implement SEQ using multiple instructions (i915 uses two
		1168	* SGE instructions and a MUL instruction). Since our logic values are
		1169	* 0.0 and 1.0, 1-x also implements !x.
		1170	*/
		1171	op[0].negate = ~op[0].negate;
		1172	emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
		1173	break;
		1174	case ir_unop_neg:
		1175	op[0].negate = ~op[0].negate;
		1176	result_src = op[0];
		1177	break;
		1178	case ir_unop_abs:
		1179	emit(ir, OPCODE_ABS, result_dst, op[0]);
		1180	break;
		1181	case ir_unop_sign:
		1182	emit(ir, OPCODE_SSG, result_dst, op[0]);
		1183	break;
		1184	case ir_unop_rcp:
		1185	emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
		1186	break;
		1187
		1188	case ir_unop_exp2:
		1189	emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
		1190	break;
		1191	case ir_unop_exp:
		1192	case ir_unop_log:
		1193	assert(!"not reached: should be handled by ir_explog_to_explog2");
		1194	break;
		1195	case ir_unop_log2:
		1196	emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
		1197	break;
		1198	case ir_unop_sin:
		1199	emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
		1200	break;
		1201	case ir_unop_cos:
		1202	emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
		1203	break;
		1204	case ir_unop_sin_reduced:
		1205	emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
		1206	break;
		1207	case ir_unop_cos_reduced:
		1208	emit_scs(ir, OPCODE_COS, result_dst, op[0]);
		1209	break;
		1210
		1211	case ir_unop_dFdx:
		1212	emit(ir, OPCODE_DDX, result_dst, op[0]);
		1213	break;
		1214	case ir_unop_dFdy:
		1215	emit(ir, OPCODE_DDY, result_dst, op[0]);
		1216	break;
		1217
		1218	case ir_unop_noise: {
		1219	const enum prog_opcode opcode =
		1220	prog_opcode(OPCODE_NOISE1
		1221	+ (ir->operands[0]->type->vector_elements) - 1);
		1222	assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
		1223
		1224	emit(ir, opcode, result_dst, op[0]);
		1225	break;
		1226	}
		1227
		1228	case ir_binop_add:
		1229	emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
		1230	break;
		1231	case ir_binop_sub:
		1232	emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
		1233	break;
		1234
		1235	case ir_binop_mul:
		1236	emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
		1237	break;
		1238	case ir_binop_div:
		1239	assert(!"not reached: should be handled by ir_div_to_mul_rcp");
		1240	break;
		1241	case ir_binop_mod:
		1242	/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
		1243	assert(ir->type->is_integer());
		1244	emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
		1245	break;
		1246
		1247	case ir_binop_less:
		1248	emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
		1249	break;
		1250	case ir_binop_greater:
		1251	emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
		1252	break;
		1253	case ir_binop_lequal:
		1254	emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
		1255	break;
		1256	case ir_binop_gequal:
		1257	emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
		1258	break;
		1259	case ir_binop_equal:
		1260	emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
		1261	break;
		1262	case ir_binop_nequal:
		1263	emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
		1264	break;
		1265	case ir_binop_all_equal:
		1266	/* "==" operator producing a scalar boolean. */
		1267	if (ir->operands[0]->type->is_vector() \|\|
		1268	ir->operands[1]->type->is_vector()) {
		1269	src_reg temp = get_temp(glsl_type::vec4_type);
		1270	emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
		1271
		1272	/* After the dot-product, the value will be an integer on the
		1273	* range [0,4]. Zero becomes 1.0, and positive values become zero.
		1274	*/
		1275	emit_dp(ir, result_dst, temp, temp, vector_elements);
		1276
		1277	/* Negating the result of the dot-product gives values on the range
		1278	* [-4, 0]. Zero becomes 1.0, and negative values become zero. This
		1279	* achieved using SGE.
		1280	*/
		1281	src_reg sge_src = result_src;
		1282	sge_src.negate = ~sge_src.negate;
		1283	emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
		1284	} else {
		1285	emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
		1286	}
		1287	break;
		1288	case ir_binop_any_nequal:
		1289	/* "!=" operator producing a scalar boolean. */
		1290	if (ir->operands[0]->type->is_vector() \|\|
		1291	ir->operands[1]->type->is_vector()) {
		1292	src_reg temp = get_temp(glsl_type::vec4_type);
		1293	emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
		1294
		1295	/* After the dot-product, the value will be an integer on the
		1296	* range [0,4]. Zero stays zero, and positive values become 1.0.
		1297	*/
		1298	ir_to_mesa_instruction *const dp =
		1299	emit_dp(ir, result_dst, temp, temp, vector_elements);
		1300	if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
		1301	/* The clamping to [0,1] can be done for free in the fragment
		1302	* shader with a saturate.
		1303	*/
		1304	dp->saturate = true;
		1305	} else {
		1306	/* Negating the result of the dot-product gives values on the range
		1307	* [-4, 0]. Zero stays zero, and negative values become 1.0. This
		1308	* achieved using SLT.
		1309	*/
		1310	src_reg slt_src = result_src;
		1311	slt_src.negate = ~slt_src.negate;
		1312	emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
		1313	}
		1314	} else {
		1315	emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
		1316	}
		1317	break;
		1318
		1319	case ir_unop_any: {
		1320	assert(ir->operands[0]->type->is_vector());
		1321
		1322	/* After the dot-product, the value will be an integer on the
		1323	* range [0,4]. Zero stays zero, and positive values become 1.0.
		1324	*/
		1325	ir_to_mesa_instruction *const dp =
		1326	emit_dp(ir, result_dst, op[0], op[0],
		1327	ir->operands[0]->type->vector_elements);
		1328	if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
		1329	/* The clamping to [0,1] can be done for free in the fragment
		1330	* shader with a saturate.
		1331	*/
		1332	dp->saturate = true;
		1333	} else {
		1334	/* Negating the result of the dot-product gives values on the range
		1335	* [-4, 0]. Zero stays zero, and negative values become 1.0. This
		1336	* is achieved using SLT.
		1337	*/
		1338	src_reg slt_src = result_src;
		1339	slt_src.negate = ~slt_src.negate;
		1340	emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
		1341	}
		1342	break;
		1343	}
		1344
		1345	case ir_binop_logic_xor:
		1346	emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
		1347	break;
		1348
		1349	case ir_binop_logic_or: {
		1350	/* After the addition, the value will be an integer on the
		1351	* range [0,2]. Zero stays zero, and positive values become 1.0.
		1352	*/
		1353	ir_to_mesa_instruction *add =
		1354	emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
		1355	if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
		1356	/* The clamping to [0,1] can be done for free in the fragment
		1357	* shader with a saturate.
		1358	*/
		1359	add->saturate = true;
		1360	} else {
		1361	/* Negating the result of the addition gives values on the range
		1362	* [-2, 0]. Zero stays zero, and negative values become 1.0. This
		1363	* is achieved using SLT.
		1364	*/
		1365	src_reg slt_src = result_src;
		1366	slt_src.negate = ~slt_src.negate;
		1367	emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
		1368	}
		1369	break;
		1370	}
		1371
		1372	case ir_binop_logic_and:
		1373	/* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
		1374	emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
		1375	break;
		1376
		1377	case ir_binop_dot:
		1378	assert(ir->operands[0]->type->is_vector());
		1379	assert(ir->operands[0]->type == ir->operands[1]->type);
		1380	emit_dp(ir, result_dst, op[0], op[1],
		1381	ir->operands[0]->type->vector_elements);
		1382	break;
		1383
		1384	case ir_unop_sqrt:
		1385	/* sqrt(x) = x * rsq(x). */
		1386	emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
		1387	emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
		1388	/* For incoming channels <= 0, set the result to 0. */
		1389	op[0].negate = ~op[0].negate;
		1390	emit(ir, OPCODE_CMP, result_dst,
		1391	op[0], result_src, src_reg_for_float(0.0));
		1392	break;
		1393	case ir_unop_rsq:
		1394	emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
		1395	break;
		1396	case ir_unop_i2f:
		1397	case ir_unop_u2f:
		1398	case ir_unop_b2f:
		1399	case ir_unop_b2i:
		1400	case ir_unop_i2u:
		1401	case ir_unop_u2i:
		1402	/* Mesa IR lacks types, ints are stored as truncated floats. */
		1403	result_src = op[0];
		1404	break;
		1405	case ir_unop_f2i:
		1406	case ir_unop_f2u:
		1407	emit(ir, OPCODE_TRUNC, result_dst, op[0]);
		1408	break;
		1409	case ir_unop_f2b:
		1410	case ir_unop_i2b:
		1411	emit(ir, OPCODE_SNE, result_dst,
		1412	op[0], src_reg_for_float(0.0));
		1413	break;
		1414	case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway
		1415	case ir_unop_bitcast_f2u:
		1416	case ir_unop_bitcast_i2f:
		1417	case ir_unop_bitcast_u2f:
		1418	break;
		1419	case ir_unop_trunc:
		1420	emit(ir, OPCODE_TRUNC, result_dst, op[0]);
		1421	break;
		1422	case ir_unop_ceil:
		1423	op[0].negate = ~op[0].negate;
		1424	emit(ir, OPCODE_FLR, result_dst, op[0]);
		1425	result_src.negate = ~result_src.negate;
		1426	break;
		1427	case ir_unop_floor:
		1428	emit(ir, OPCODE_FLR, result_dst, op[0]);
		1429	break;
		1430	case ir_unop_fract:
		1431	emit(ir, OPCODE_FRC, result_dst, op[0]);
		1432	break;
		1433	case ir_unop_pack_snorm_2x16:
		1434	case ir_unop_pack_snorm_4x8:
		1435	case ir_unop_pack_unorm_2x16:
		1436	case ir_unop_pack_unorm_4x8:
		1437	case ir_unop_pack_half_2x16:
		1438	case ir_unop_unpack_snorm_2x16:
		1439	case ir_unop_unpack_snorm_4x8:
		1440	case ir_unop_unpack_unorm_2x16:
		1441	case ir_unop_unpack_unorm_4x8:
		1442	case ir_unop_unpack_half_2x16:
		1443	case ir_unop_unpack_half_2x16_split_x:
		1444	case ir_unop_unpack_half_2x16_split_y:
		1445	case ir_binop_pack_half_2x16_split:
		1446	case ir_unop_bitfield_reverse:
		1447	case ir_unop_bit_count:
		1448	case ir_unop_find_msb:
		1449	case ir_unop_find_lsb:
		1450	assert(!"not supported");
		1451	break;
		1452	case ir_binop_min:
		1453	emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
		1454	break;
		1455	case ir_binop_max:
		1456	emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
		1457	break;
		1458	case ir_binop_pow:
		1459	emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
		1460	break;
		1461
		1462	/* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
		1463	* hardware backends have no way to avoid Mesa IR generation
		1464	* even if they don't use it, we need to emit "something" and
		1465	* continue.
		1466	*/
		1467	case ir_binop_lshift:
		1468	case ir_binop_rshift:
		1469	case ir_binop_bit_and:
		1470	case ir_binop_bit_xor:
		1471	case ir_binop_bit_or:
		1472	emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
		1473	break;
		1474
		1475	case ir_unop_bit_not:
		1476	case ir_unop_round_even:
		1477	emit(ir, OPCODE_MOV, result_dst, op[0]);
		1478	break;
		1479
		1480	case ir_binop_ubo_load:
		1481	assert(!"not supported");
		1482	break;
		1483
		1484	case ir_triop_lrp:
		1485	/* ir_triop_lrp operands are (x, y, a) while
		1486	* OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program.
		1487	*/
		1488	emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
		1489	break;
		1490
		1491	case ir_binop_vector_extract:
		1492	case ir_binop_bfm:
		1493	case ir_triop_bfi:
		1494	case ir_triop_bitfield_extract:
		1495	case ir_triop_vector_insert:
		1496	case ir_quadop_bitfield_insert:
		1497	assert(!"not supported");
		1498	break;
		1499
		1500	case ir_quadop_vector:
		1501	/* This operation should have already been handled.
		1502	*/
		1503	assert(!"Should not get here.");
		1504	break;
		1505	}
		1506
		1507	this->result = result_src;
		1508	}
		1509
		1510
		1511	void
		1512	ir_to_mesa_visitor::visit(ir_swizzle *ir)
		1513	{
		1514	src_reg src;
		1515	int i;
		1516	int swizzle[4];
		1517
		1518	/* Note that this is only swizzles in expressions, not those on the left
		1519	* hand side of an assignment, which do write masking. See ir_assignment
		1520	* for that.
		1521	*/
		1522
		1523	ir->val->accept(this);
		1524	src = this->result;
		1525	assert(src.file != PROGRAM_UNDEFINED);
		1526
		1527	for (i = 0; i < 4; i++) {
		1528	if (i < ir->type->vector_elements) {
		1529	switch (i) {
		1530	case 0:
		1531	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
		1532	break;
		1533	case 1:
		1534	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
		1535	break;
		1536	case 2:
		1537	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
		1538	break;
		1539	case 3:
		1540	swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
		1541	break;
		1542	}
		1543	} else {
		1544	/* If the type is smaller than a vec4, replicate the last
		1545	* channel out.
		1546	*/
		1547	swizzle[i] = swizzle[ir->type->vector_elements - 1];
		1548	}
		1549	}
		1550
		1551	src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
		1552
		1553	this->result = src;
		1554	}
		1555
		1556	void
		1557	ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
		1558	{
		1559	variable_storage *entry = find_variable_storage(ir->var);
		1560	ir_variable *var = ir->var;
		1561
		1562	if (!entry) {
		1563	switch (var->mode) {
		1564	case ir_var_uniform:
		1565	entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
		1566	var->location);
		1567	this->variables.push_tail(entry);
		1568	break;
		1569	case ir_var_shader_in:
		1570	/* The linker assigns locations for varyings and attributes,
		1571	* including deprecated builtins (like gl_Color),
		1572	* user-assigned generic attributes (glBindVertexLocation),
		1573	* and user-defined varyings.
		1574	*/
		1575	assert(var->location != -1);
		1576	entry = new(mem_ctx) variable_storage(var,
		1577	PROGRAM_INPUT,
		1578	var->location);
		1579	break;
		1580	case ir_var_shader_out:
		1581	assert(var->location != -1);
		1582	entry = new(mem_ctx) variable_storage(var,
		1583	PROGRAM_OUTPUT,
		1584	var->location);
		1585	break;
		1586	case ir_var_system_value:
		1587	entry = new(mem_ctx) variable_storage(var,
		1588	PROGRAM_SYSTEM_VALUE,
		1589	var->location);
		1590	break;
		1591	case ir_var_auto:
		1592	case ir_var_temporary:
		1593	entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
		1594	this->next_temp);
		1595	this->variables.push_tail(entry);
		1596
		1597	next_temp += type_size(var->type);
		1598	break;
		1599	}
		1600
		1601	if (!entry) {
		1602	printf("Failed to make storage for %s\n", var->name);
		1603	exit(1);
		1604	}
		1605	}
		1606
		1607	this->result = src_reg(entry->file, entry->index, var->type);
		1608	}
		1609
		1610	void
		1611	ir_to_mesa_visitor::visit(ir_dereference_array *ir)
		1612	{
		1613	ir_constant *index;
		1614	src_reg src;
		1615	int element_size = type_size(ir->type);
		1616
		1617	index = ir->array_index->constant_expression_value();
		1618
		1619	ir->array->accept(this);
		1620	src = this->result;
		1621
		1622	if (index) {
		1623	src.index += index->value.i[0] * element_size;
		1624	} else {
		1625	/* Variable index array dereference. It eats the "vec4" of the
		1626	* base of the array and an index that offsets the Mesa register
		1627	* index.
		1628	*/
		1629	ir->array_index->accept(this);
		1630
		1631	src_reg index_reg;
		1632
		1633	if (element_size == 1) {
		1634	index_reg = this->result;
		1635	} else {
		1636	index_reg = get_temp(glsl_type::float_type);
		1637
		1638	emit(ir, OPCODE_MUL, dst_reg(index_reg),
		1639	this->result, src_reg_for_float(element_size));
		1640	}
		1641
		1642	/* If there was already a relative address register involved, add the
		1643	* new and the old together to get the new offset.
		1644	*/
		1645	if (src.reladdr != NULL) {
		1646	src_reg accum_reg = get_temp(glsl_type::float_type);
		1647
		1648	emit(ir, OPCODE_ADD, dst_reg(accum_reg),
		1649	index_reg, *src.reladdr);
		1650
		1651	index_reg = accum_reg;
		1652	}
		1653
		1654	src.reladdr = ralloc(mem_ctx, src_reg);
		1655	memcpy(src.reladdr, &index_reg, sizeof(index_reg));
		1656	}
		1657
		1658	/* If the type is smaller than a vec4, replicate the last channel out. */
		1659	if (ir->type->is_scalar() \|\| ir->type->is_vector())
		1660	src.swizzle = swizzle_for_size(ir->type->vector_elements);
		1661	else
		1662	src.swizzle = SWIZZLE_NOOP;
		1663
		1664	this->result = src;
		1665	}
		1666
		1667	void
		1668	ir_to_mesa_visitor::visit(ir_dereference_record *ir)
		1669	{
		1670	unsigned int i;
		1671	const glsl_type *struct_type = ir->record->type;
		1672	int offset = 0;
		1673
		1674	ir->record->accept(this);
		1675
		1676	for (i = 0; i < struct_type->length; i++) {
		1677	if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
		1678	break;
		1679	offset += type_size(struct_type->fields.structure[i].type);
		1680	}
		1681
		1682	/* If the type is smaller than a vec4, replicate the last channel out. */
		1683	if (ir->type->is_scalar() \|\| ir->type->is_vector())
		1684	this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
		1685	else
		1686	this->result.swizzle = SWIZZLE_NOOP;
		1687
		1688	this->result.index += offset;
		1689	}
		1690
		1691	/**
		1692	* We want to be careful in assignment setup to hit the actual storage
		1693	* instead of potentially using a temporary like we might with the
		1694	* ir_dereference handler.
		1695	*/
		1696	static dst_reg
		1697	get_assignment_lhs(ir_dereference ir, ir_to_mesa_visitor v)
		1698	{
		1699	/* The LHS must be a dereference. If the LHS is a variable indexed array
		1700	* access of a vector, it must be separated into a series conditional moves
		1701	* before reaching this point (see ir_vec_index_to_cond_assign).
		1702	*/
		1703	assert(ir->as_dereference());
		1704	ir_dereference_array *deref_array = ir->as_dereference_array();
		1705	if (deref_array) {
		1706	assert(!deref_array->array->type->is_vector());
		1707	}
		1708
		1709	/* Use the rvalue deref handler for the most part. We'll ignore
		1710	* swizzles in it and write swizzles using writemask, though.
		1711	*/
		1712	ir->accept(v);
		1713	return dst_reg(v->result);
		1714	}
		1715
		1716	/**
		1717	* Process the condition of a conditional assignment
		1718	*
		1719	* Examines the condition of a conditional assignment to generate the optimal
		1720	* first operand of a \c CMP instruction. If the condition is a relational
		1721	* operator with 0 (e.g., \c ir_binop_less), the value being compared will be
		1722	* used as the source for the \c CMP instruction. Otherwise the comparison
		1723	* is processed to a boolean result, and the boolean result is used as the
		1724	* operand to the CMP instruction.
		1725	*/
		1726	bool
		1727	ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
		1728	{
		1729	ir_rvalue *src_ir = ir;
		1730	bool negate = true;
		1731	bool switch_order = false;
		1732
		1733	ir_expression *const expr = ir->as_expression();
		1734	if ((expr != NULL) && (expr->get_num_operands() == 2)) {
		1735	bool zero_on_left = false;
		1736
		1737	if (expr->operands[0]->is_zero()) {
		1738	src_ir = expr->operands[1];
		1739	zero_on_left = true;
		1740	} else if (expr->operands[1]->is_zero()) {
		1741	src_ir = expr->operands[0];
		1742	zero_on_left = false;
		1743	}
		1744
		1745	/* a is - 0 + - 0 +
		1746	* (a < 0) T F F ( a < 0) T F F
		1747	* (0 < a) F F T (-a < 0) F F T
		1748	* (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
		1749	* (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
		1750	* (a > 0) F F T (-a < 0) F F T
		1751	* (0 > a) T F F ( a < 0) T F F
		1752	* (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
		1753	* (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
		1754	*
		1755	* Note that exchanging the order of 0 and 'a' in the comparison simply
		1756	* means that the value of 'a' should be negated.
		1757	*/
		1758	if (src_ir != ir) {
		1759	switch (expr->operation) {
		1760	case ir_binop_less:
		1761	switch_order = false;
		1762	negate = zero_on_left;
		1763	break;
		1764
		1765	case ir_binop_greater:
		1766	switch_order = false;
		1767	negate = !zero_on_left;
		1768	break;
		1769
		1770	case ir_binop_lequal:
		1771	switch_order = true;
		1772	negate = !zero_on_left;
		1773	break;
		1774
		1775	case ir_binop_gequal:
		1776	switch_order = true;
		1777	negate = zero_on_left;
		1778	break;
		1779
		1780	default:
		1781	/* This isn't the right kind of comparison afterall, so make sure
		1782	* the whole condition is visited.
		1783	*/
		1784	src_ir = ir;
		1785	break;
		1786	}
		1787	}
		1788	}
		1789
		1790	src_ir->accept(this);
		1791
		1792	/* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
		1793	* condition we produced is 0.0 or 1.0. By flipping the sign, we can
		1794	* choose which value OPCODE_CMP produces without an extra instruction
		1795	* computing the condition.
		1796	*/
		1797	if (negate)
		1798	this->result.negate = ~this->result.negate;
		1799
		1800	return switch_order;
		1801	}
		1802
		1803	void
		1804	ir_to_mesa_visitor::visit(ir_assignment *ir)
		1805	{
		1806	dst_reg l;
		1807	src_reg r;
		1808	int i;
		1809
		1810	ir->rhs->accept(this);
		1811	r = this->result;
		1812
		1813	l = get_assignment_lhs(ir->lhs, this);
		1814
		1815	/* FINISHME: This should really set to the correct maximal writemask for each
		1816	* FINISHME: component written (in the loops below). This case can only
		1817	* FINISHME: occur for matrices, arrays, and structures.
		1818	*/
		1819	if (ir->write_mask == 0) {
		1820	assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
		1821	l.writemask = WRITEMASK_XYZW;
		1822	} else if (ir->lhs->type->is_scalar()) {
		1823	/* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
		1824	* FINISHME: W component of fragment shader output zero, work correctly.
		1825	*/
		1826	l.writemask = WRITEMASK_XYZW;
		1827	} else {
		1828	int swizzles[4];
		1829	int first_enabled_chan = 0;
		1830	int rhs_chan = 0;
		1831
		1832	assert(ir->lhs->type->is_vector());
		1833	l.writemask = ir->write_mask;
		1834
		1835	for (int i = 0; i < 4; i++) {
		1836	if (l.writemask & (1 << i)) {
		1837	first_enabled_chan = GET_SWZ(r.swizzle, i);
		1838	break;
		1839	}
		1840	}
		1841
		1842	/* Swizzle a small RHS vector into the channels being written.
		1843	*
		1844	* glsl ir treats write_mask as dictating how many channels are
		1845	* present on the RHS while Mesa IR treats write_mask as just
		1846	* showing which channels of the vec4 RHS get written.
		1847	*/
		1848	for (int i = 0; i < 4; i++) {
		1849	if (l.writemask & (1 << i))
		1850	swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
		1851	else
		1852	swizzles[i] = first_enabled_chan;
		1853	}
		1854	r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
		1855	swizzles[2], swizzles[3]);
		1856	}
		1857
		1858	assert(l.file != PROGRAM_UNDEFINED);
		1859	assert(r.file != PROGRAM_UNDEFINED);
		1860
		1861	if (ir->condition) {
		1862	const bool switch_order = this->process_move_condition(ir->condition);
		1863	src_reg condition = this->result;
		1864
		1865	for (i = 0; i < type_size(ir->lhs->type); i++) {
		1866	if (switch_order) {
		1867	emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
		1868	} else {
		1869	emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
		1870	}
		1871
		1872	l.index++;
		1873	r.index++;
		1874	}
		1875	} else {
		1876	for (i = 0; i < type_size(ir->lhs->type); i++) {
		1877	emit(ir, OPCODE_MOV, l, r);
		1878	l.index++;
		1879	r.index++;
		1880	}
		1881	}
		1882	}
		1883
		1884
		1885	void
		1886	ir_to_mesa_visitor::visit(ir_constant *ir)
		1887	{
		1888	src_reg src;
		1889	GLfloat stack_vals[4] = { 0 };
		1890	GLfloat *values = stack_vals;
		1891	unsigned int i;
		1892
		1893	/* Unfortunately, 4 floats is all we can get into
		1894	* _mesa_add_unnamed_constant. So, make a temp to store an
		1895	* aggregate constant and move each constant value into it. If we
		1896	* get lucky, copy propagation will eliminate the extra moves.
		1897	*/
		1898
		1899	if (ir->type->base_type == GLSL_TYPE_STRUCT) {
		1900	src_reg temp_base = get_temp(ir->type);
		1901	dst_reg temp = dst_reg(temp_base);
		1902
		1903	foreach_iter(exec_list_iterator, iter, ir->components) {
		1904	ir_constant field_value = (ir_constant )iter.get();
		1905	int size = type_size(field_value->type);
		1906
		1907	assert(size > 0);
		1908
		1909	field_value->accept(this);
		1910	src = this->result;
		1911
		1912	for (i = 0; i < (unsigned int)size; i++) {
		1913	emit(ir, OPCODE_MOV, temp, src);
		1914
		1915	src.index++;
		1916	temp.index++;
		1917	}
		1918	}
		1919	this->result = temp_base;
		1920	return;
		1921	}
		1922
		1923	if (ir->type->is_array()) {
		1924	src_reg temp_base = get_temp(ir->type);
		1925	dst_reg temp = dst_reg(temp_base);
		1926	int size = type_size(ir->type->fields.array);
		1927
		1928	assert(size > 0);
		1929
		1930	for (i = 0; i < ir->type->length; i++) {
		1931	ir->array_elements[i]->accept(this);
		1932	src = this->result;
		1933	for (int j = 0; j < size; j++) {
		1934	emit(ir, OPCODE_MOV, temp, src);
		1935
		1936	src.index++;
		1937	temp.index++;
		1938	}
		1939	}
		1940	this->result = temp_base;
		1941	return;
		1942	}
		1943
		1944	if (ir->type->is_matrix()) {
		1945	src_reg mat = get_temp(ir->type);
		1946	dst_reg mat_column = dst_reg(mat);
		1947
		1948	for (i = 0; i < ir->type->matrix_columns; i++) {
		1949	assert(ir->type->base_type == GLSL_TYPE_FLOAT);
		1950	values = &ir->value.f[i * ir->type->vector_elements];
		1951
		1952	src = src_reg(PROGRAM_CONSTANT, -1, NULL);
		1953	src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
		1954	(gl_constant_value *) values,
		1955	ir->type->vector_elements,
		1956	&src.swizzle);
		1957	emit(ir, OPCODE_MOV, mat_column, src);
		1958
		1959	mat_column.index++;
		1960	}
		1961
		1962	this->result = mat;
		1963	return;
		1964	}
		1965
		1966	src.file = PROGRAM_CONSTANT;
		1967	switch (ir->type->base_type) {
		1968	case GLSL_TYPE_FLOAT:
		1969	values = &ir->value.f[0];
		1970	break;
		1971	case GLSL_TYPE_UINT:
		1972	for (i = 0; i < ir->type->vector_elements; i++) {
		1973	values[i] = ir->value.u[i];
		1974	}
		1975	break;
		1976	case GLSL_TYPE_INT:
		1977	for (i = 0; i < ir->type->vector_elements; i++) {
		1978	values[i] = ir->value.i[i];
		1979	}
		1980	break;
		1981	case GLSL_TYPE_BOOL:
		1982	for (i = 0; i < ir->type->vector_elements; i++) {
		1983	values[i] = ir->value.b[i];
		1984	}
		1985	break;
		1986	default:
		1987	assert(!"Non-float/uint/int/bool constant");
		1988	}
		1989
		1990	this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
		1991	this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
		1992	(gl_constant_value *) values,
		1993	ir->type->vector_elements,
		1994	&this->result.swizzle);
		1995	}
		1996
		1997	void
		1998	ir_to_mesa_visitor::visit(ir_call *ir)
		1999	{
		2000	assert(!"ir_to_mesa: All function calls should have been inlined by now.");
		2001	}
		2002
		2003	void
		2004	ir_to_mesa_visitor::visit(ir_texture *ir)
		2005	{
		2006	src_reg result_src, coord, lod_info, projector, dx, dy;
		2007	dst_reg result_dst, coord_dst;
		2008	ir_to_mesa_instruction *inst = NULL;
		2009	prog_opcode opcode = OPCODE_NOP;
		2010
		2011	if (ir->op == ir_txs)
		2012	this->result = src_reg_for_float(0.0);
		2013	else
		2014	ir->coordinate->accept(this);
		2015
		2016	/* Put our coords in a temp. We'll need to modify them for shadow,
		2017	* projection, or LOD, so the only case we'd use it as is is if
		2018	* we're doing plain old texturing. Mesa IR optimization should
		2019	* handle cleaning up our mess in that case.
		2020	*/
		2021	coord = get_temp(glsl_type::vec4_type);
		2022	coord_dst = dst_reg(coord);
		2023	emit(ir, OPCODE_MOV, coord_dst, this->result);
		2024
		2025	if (ir->projector) {
		2026	ir->projector->accept(this);
		2027	projector = this->result;
		2028	}
		2029
		2030	/* Storage for our result. Ideally for an assignment we'd be using
		2031	* the actual storage for the result here, instead.
		2032	*/
		2033	result_src = get_temp(glsl_type::vec4_type);
		2034	result_dst = dst_reg(result_src);
		2035
		2036	switch (ir->op) {
		2037	case ir_tex:
		2038	case ir_txs:
		2039	opcode = OPCODE_TEX;
		2040	break;
		2041	case ir_txb:
		2042	opcode = OPCODE_TXB;
		2043	ir->lod_info.bias->accept(this);
		2044	lod_info = this->result;
		2045	break;
		2046	case ir_txf:
		2047	/* Pretend to be TXL so the sampler, coordinate, lod are available */
		2048	case ir_txl:
		2049	opcode = OPCODE_TXL;
		2050	ir->lod_info.lod->accept(this);
		2051	lod_info = this->result;
		2052	break;
		2053	case ir_txd:
		2054	opcode = OPCODE_TXD;
		2055	ir->lod_info.grad.dPdx->accept(this);
		2056	dx = this->result;
		2057	ir->lod_info.grad.dPdy->accept(this);
		2058	dy = this->result;
		2059	break;
		2060	case ir_txf_ms:
		2061	assert(!"Unexpected ir_txf_ms opcode");
		2062	break;
		2063	case ir_lod:
		2064	assert(!"Unexpected ir_lod opcode");
		2065	break;
		2066	}
		2067
		2068	const glsl_type *sampler_type = ir->sampler->type;
		2069
		2070	if (ir->projector) {
		2071	if (opcode == OPCODE_TEX) {
		2072	/* Slot the projector in as the last component of the coord. */
		2073	coord_dst.writemask = WRITEMASK_W;
		2074	emit(ir, OPCODE_MOV, coord_dst, projector);
		2075	coord_dst.writemask = WRITEMASK_XYZW;
		2076	opcode = OPCODE_TXP;
		2077	} else {
		2078	src_reg coord_w = coord;
		2079	coord_w.swizzle = SWIZZLE_WWWW;
		2080
		2081	/* For the other TEX opcodes there's no projective version
		2082	* since the last slot is taken up by lod info. Do the
		2083	* projective divide now.
		2084	*/
		2085	coord_dst.writemask = WRITEMASK_W;
		2086	emit(ir, OPCODE_RCP, coord_dst, projector);
		2087
		2088	/* In the case where we have to project the coordinates "by hand,"
		2089	* the shadow comparitor value must also be projected.
		2090	*/
		2091	src_reg tmp_src = coord;
		2092	if (ir->shadow_comparitor) {
		2093	/* Slot the shadow value in as the second to last component of the
		2094	* coord.
		2095	*/
		2096	ir->shadow_comparitor->accept(this);
		2097
		2098	tmp_src = get_temp(glsl_type::vec4_type);
		2099	dst_reg tmp_dst = dst_reg(tmp_src);
		2100
		2101	/* Projective division not allowed for array samplers. */
		2102	assert(!sampler_type->sampler_array);
		2103
		2104	tmp_dst.writemask = WRITEMASK_Z;
		2105	emit(ir, OPCODE_MOV, tmp_dst, this->result);
		2106
		2107	tmp_dst.writemask = WRITEMASK_XY;
		2108	emit(ir, OPCODE_MOV, tmp_dst, coord);
		2109	}
		2110
		2111	coord_dst.writemask = WRITEMASK_XYZ;
		2112	emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
		2113
		2114	coord_dst.writemask = WRITEMASK_XYZW;
		2115	coord.swizzle = SWIZZLE_XYZW;
		2116	}
		2117	}
		2118
		2119	/* If projection is done and the opcode is not OPCODE_TXP, then the shadow
		2120	* comparitor was put in the correct place (and projected) by the code,
		2121	* above, that handles by-hand projection.
		2122	*/
		2123	if (ir->shadow_comparitor && (!ir->projector \|\| opcode == OPCODE_TXP)) {
		2124	/* Slot the shadow value in as the second to last component of the
		2125	* coord.
		2126	*/
		2127	ir->shadow_comparitor->accept(this);
		2128
		2129	/* XXX This will need to be updated for cubemap array samplers. */
		2130	if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
		2131	sampler_type->sampler_array) {
		2132	coord_dst.writemask = WRITEMASK_W;
		2133	} else {
		2134	coord_dst.writemask = WRITEMASK_Z;
		2135	}
		2136
		2137	emit(ir, OPCODE_MOV, coord_dst, this->result);
		2138	coord_dst.writemask = WRITEMASK_XYZW;
		2139	}
		2140
		2141	if (opcode == OPCODE_TXL \|\| opcode == OPCODE_TXB) {
		2142	/* Mesa IR stores lod or lod bias in the last channel of the coords. */
		2143	coord_dst.writemask = WRITEMASK_W;
		2144	emit(ir, OPCODE_MOV, coord_dst, lod_info);
		2145	coord_dst.writemask = WRITEMASK_XYZW;
		2146	}
		2147
		2148	if (opcode == OPCODE_TXD)
		2149	inst = emit(ir, opcode, result_dst, coord, dx, dy);
		2150	else
		2151	inst = emit(ir, opcode, result_dst, coord);
		2152
		2153	if (ir->shadow_comparitor)
		2154	inst->tex_shadow = GL_TRUE;
		2155
		2156	inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
		2157	this->shader_program,
		2158	this->prog);
		2159
		2160	switch (sampler_type->sampler_dimensionality) {
		2161	case GLSL_SAMPLER_DIM_1D:
		2162	inst->tex_target = (sampler_type->sampler_array)
		2163	? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
		2164	break;
		2165	case GLSL_SAMPLER_DIM_2D:
		2166	inst->tex_target = (sampler_type->sampler_array)
		2167	? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
		2168	break;
		2169	case GLSL_SAMPLER_DIM_3D:
		2170	inst->tex_target = TEXTURE_3D_INDEX;
		2171	break;
		2172	case GLSL_SAMPLER_DIM_CUBE:
		2173	inst->tex_target = TEXTURE_CUBE_INDEX;
		2174	break;
		2175	case GLSL_SAMPLER_DIM_RECT:
		2176	inst->tex_target = TEXTURE_RECT_INDEX;
		2177	break;
		2178	case GLSL_SAMPLER_DIM_BUF:
		2179	assert(!"FINISHME: Implement ARB_texture_buffer_object");
		2180	break;
		2181	case GLSL_SAMPLER_DIM_EXTERNAL:
		2182	inst->tex_target = TEXTURE_EXTERNAL_INDEX;
		2183	break;
		2184	default:
		2185	assert(!"Should not get here.");
		2186	}
		2187
		2188	this->result = result_src;
		2189	}
		2190
		2191	void
		2192	ir_to_mesa_visitor::visit(ir_return *ir)
		2193	{
		2194	/* Non-void functions should have been inlined. We may still emit RETs
		2195	* from main() unless the EmitNoMainReturn option is set.
		2196	*/
		2197	assert(!ir->get_value());
		2198	emit(ir, OPCODE_RET);
		2199	}
		2200
		2201	void
		2202	ir_to_mesa_visitor::visit(ir_discard *ir)
		2203	{
		2204	if (ir->condition) {
		2205	ir->condition->accept(this);
		2206	this->result.negate = ~this->result.negate;
		2207	emit(ir, OPCODE_KIL, undef_dst, this->result);
		2208	} else {
		2209	emit(ir, OPCODE_KIL_NV);
		2210	}
		2211	}
		2212
		2213	void
		2214	ir_to_mesa_visitor::visit(ir_if *ir)
		2215	{
		2216	ir_to_mesa_instruction cond_inst, if_inst;
		2217	ir_to_mesa_instruction *prev_inst;
		2218
		2219	prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
		2220
		2221	ir->condition->accept(this);
		2222	assert(this->result.file != PROGRAM_UNDEFINED);
		2223
		2224	if (this->options->EmitCondCodes) {
		2225	cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
		2226
		2227	/* See if we actually generated any instruction for generating
		2228	* the condition. If not, then cook up a move to a temp so we
		2229	* have something to set cond_update on.
		2230	*/
		2231	if (cond_inst == prev_inst) {
		2232	src_reg temp = get_temp(glsl_type::bool_type);
		2233	cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result);
		2234	}
		2235	cond_inst->cond_update = GL_TRUE;
		2236
		2237	if_inst = emit(ir->condition, OPCODE_IF);
		2238	if_inst->dst.cond_mask = COND_NE;
		2239	} else {
		2240	if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
		2241	}
		2242
		2243	this->instructions.push_tail(if_inst);
		2244
		2245	visit_exec_list(&ir->then_instructions, this);
		2246
		2247	if (!ir->else_instructions.is_empty()) {
		2248	emit(ir->condition, OPCODE_ELSE);
		2249	visit_exec_list(&ir->else_instructions, this);
		2250	}
		2251
		2252	if_inst = emit(ir->condition, OPCODE_ENDIF);
		2253	}
		2254
		2255	ir_to_mesa_visitor::ir_to_mesa_visitor()
		2256	{
		2257	result.file = PROGRAM_UNDEFINED;
		2258	next_temp = 1;
		2259	next_signature_id = 1;
		2260	current_function = NULL;
		2261	mem_ctx = ralloc_context(NULL);
		2262	}
		2263
		2264	ir_to_mesa_visitor::~ir_to_mesa_visitor()
		2265	{
		2266	ralloc_free(mem_ctx);
		2267	}
		2268
		2269	static struct prog_src_register
		2270	mesa_src_reg_from_ir_src_reg(src_reg reg)
		2271	{
		2272	struct prog_src_register mesa_reg;
		2273
		2274	mesa_reg.File = reg.file;
		2275	assert(reg.index < (1 << INST_INDEX_BITS));
		2276	mesa_reg.Index = reg.index;
		2277	mesa_reg.Swizzle = reg.swizzle;
		2278	mesa_reg.RelAddr = reg.reladdr != NULL;
		2279	mesa_reg.Negate = reg.negate;
		2280	mesa_reg.Abs = 0;
		2281	mesa_reg.HasIndex2 = GL_FALSE;
		2282	mesa_reg.RelAddr2 = 0;
		2283	mesa_reg.Index2 = 0;
		2284
		2285	return mesa_reg;
		2286	}
		2287
		2288	static void
		2289	set_branchtargets(ir_to_mesa_visitor *v,
		2290	struct prog_instruction *mesa_instructions,
		2291	int num_instructions)
		2292	{
		2293	int if_count = 0, loop_count = 0;
		2294	int if_stack, loop_stack;
		2295	int if_stack_pos = 0, loop_stack_pos = 0;
		2296	int i, j;
		2297
		2298	for (i = 0; i < num_instructions; i++) {
		2299	switch (mesa_instructions[i].Opcode) {
		2300	case OPCODE_IF:
		2301	if_count++;
		2302	break;
		2303	case OPCODE_BGNLOOP:
		2304	loop_count++;
		2305	break;
		2306	case OPCODE_BRK:
		2307	case OPCODE_CONT:
		2308	mesa_instructions[i].BranchTarget = -1;
		2309	break;
		2310	default:
		2311	break;
		2312	}
		2313	}
		2314
		2315	if_stack = rzalloc_array(v->mem_ctx, int, if_count);
		2316	loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
		2317
		2318	for (i = 0; i < num_instructions; i++) {
		2319	switch (mesa_instructions[i].Opcode) {
		2320	case OPCODE_IF:
		2321	if_stack[if_stack_pos] = i;
		2322	if_stack_pos++;
		2323	break;
		2324	case OPCODE_ELSE:
		2325	mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
		2326	if_stack[if_stack_pos - 1] = i;
		2327	break;
		2328	case OPCODE_ENDIF:
		2329	mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
		2330	if_stack_pos--;
		2331	break;
		2332	case OPCODE_BGNLOOP:
		2333	loop_stack[loop_stack_pos] = i;
		2334	loop_stack_pos++;
		2335	break;
		2336	case OPCODE_ENDLOOP:
		2337	loop_stack_pos--;
		2338	/* Rewrite any breaks/conts at this nesting level (haven't
		2339	* already had a BranchTarget assigned) to point to the end
		2340	* of the loop.
		2341	*/
		2342	for (j = loop_stack[loop_stack_pos]; j < i; j++) {
		2343	if (mesa_instructions[j].Opcode == OPCODE_BRK \|\|
		2344	mesa_instructions[j].Opcode == OPCODE_CONT) {
		2345	if (mesa_instructions[j].BranchTarget == -1) {
		2346	mesa_instructions[j].BranchTarget = i;
		2347	}
		2348	}
		2349	}
		2350	/* The loop ends point at each other. */
		2351	mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
		2352	mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
		2353	break;
		2354	case OPCODE_CAL:
		2355	foreach_iter(exec_list_iterator, iter, v->function_signatures) {
		2356	function_entry entry = (function_entry )iter.get();
		2357
		2358	if (entry->sig_id == mesa_instructions[i].BranchTarget) {
		2359	mesa_instructions[i].BranchTarget = entry->inst;
		2360	break;
		2361	}
		2362	}
		2363	break;
		2364	default:
		2365	break;
		2366	}
		2367	}
		2368	}
		2369
		2370	static void
		2371	print_program(struct prog_instruction *mesa_instructions,
		2372	ir_instruction **mesa_instruction_annotation,
		2373	int num_instructions)
		2374	{
		2375	ir_instruction *last_ir = NULL;
		2376	int i;
		2377	int indent = 0;
		2378
		2379	for (i = 0; i < num_instructions; i++) {
		2380	struct prog_instruction *mesa_inst = mesa_instructions + i;
		2381	ir_instruction *ir = mesa_instruction_annotation[i];
		2382
		2383	fprintf(stdout, "%3d: ", i);
		2384
		2385	if (last_ir != ir && ir) {
		2386	int j;
		2387
		2388	for (j = 0; j < indent; j++) {
		2389	fprintf(stdout, " ");
		2390	}
		2391	ir->print();
		2392	printf("\n");
		2393	last_ir = ir;
		2394
		2395	fprintf(stdout, " "); /* line number spacing. */
		2396	}
		2397
		2398	indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
		2399	PROG_PRINT_DEBUG, NULL);
		2400	}
		2401	}
		2402
		2403	class add_uniform_to_shader : public program_resource_visitor {
		2404	public:
		2405	add_uniform_to_shader(struct gl_shader_program *shader_program,
		2406	struct gl_program_parameter_list *params,
		2407	gl_shader_type shader_type)
		2408	: shader_program(shader_program), params(params), idx(-1),
		2409	shader_type(shader_type)
		2410	{
		2411	/* empty */
		2412	}
		2413
		2414	void process(ir_variable *var)
		2415	{
		2416	this->idx = -1;
		2417	this->program_resource_visitor::process(var);
		2418
		2419	var->location = this->idx;
		2420	}
		2421
		2422	private:
		2423	virtual void visit_field(const glsl_type type, const char name,
		2424	bool row_major);
		2425
		2426	struct gl_shader_program *shader_program;
		2427	struct gl_program_parameter_list *params;
		2428	int idx;
		2429	gl_shader_type shader_type;
		2430	};
		2431
		2432	void
		2433	add_uniform_to_shader::visit_field(const glsl_type type, const char name,
		2434	bool row_major)
		2435	{
		2436	unsigned int size;
		2437
		2438	(void) row_major;
		2439
		2440	if (type->is_vector() \|\| type->is_scalar()) {
		2441	size = type->vector_elements;
		2442	} else {
		2443	size = type_size(type) * 4;
		2444	}
		2445
		2446	gl_register_file file;
		2447	if (type->is_sampler() \|\|
		2448	(type->is_array() && type->fields.array->is_sampler())) {
		2449	file = PROGRAM_SAMPLER;
		2450	} else {
		2451	file = PROGRAM_UNIFORM;
		2452	}
		2453
		2454	int index = _mesa_lookup_parameter_index(params, -1, name);
		2455	if (index < 0) {
		2456	index = _mesa_add_parameter(params, file, name, size, type->gl_type,
		2457	NULL, NULL);
		2458
		2459	/* Sampler uniform values are stored in prog->SamplerUnits,
		2460	* and the entry in that array is selected by this index we
		2461	* store in ParameterValues[].
		2462	*/
		2463	if (file == PROGRAM_SAMPLER) {
		2464	unsigned location;
		2465	const bool found =
		2466	this->shader_program->UniformHash->get(location,
		2467	params->Parameters[index].Name);
		2468	assert(found);
		2469
		2470	if (!found)
		2471	return;
		2472
		2473	struct gl_uniform_storage *storage =
		2474	&this->shader_program->UniformStorage[location];
		2475
		2476	assert(storage->sampler[shader_type].active);
		2477
		2478	for (unsigned int j = 0; j < size / 4; j++)
		2479	params->ParameterValues[index + j][0].f =
		2480	storage->sampler[shader_type].index + j;
		2481	}
		2482	}
		2483
		2484	/* The first part of the uniform that's processed determines the base
		2485	* location of the whole uniform (for structures).
		2486	*/
		2487	if (this->idx < 0)
		2488	this->idx = index;
		2489	}
		2490
		2491	/**
		2492	* Generate the program parameters list for the user uniforms in a shader
		2493	*
		2494	* \param shader_program Linked shader program. This is only used to
		2495	* emit possible link errors to the info log.
		2496	* \param sh Shader whose uniforms are to be processed.
		2497	* \param params Parameter list to be filled in.
		2498	*/
		2499	void
		2500	_mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
		2501	*shader_program,
		2502	struct gl_shader *sh,
		2503	struct gl_program_parameter_list
		2504	*params)
		2505	{
		2506	add_uniform_to_shader add(shader_program, params,
		2507	_mesa_shader_type_to_index(sh->Type));
		2508
		2509	foreach_list(node, sh->ir) {
		2510	ir_variable var = ((ir_instruction ) node)->as_variable();
		2511
		2512	if ((var == NULL) \|\| (var->mode != ir_var_uniform)
		2513	\|\| var->is_in_uniform_block() \|\| (strncmp(var->name, "gl_", 3) == 0))
		2514	continue;
		2515
		2516	add.process(var);
		2517	}
		2518	}
		2519
		2520	void
		2521	_mesa_associate_uniform_storage(struct gl_context *ctx,
		2522	struct gl_shader_program *shader_program,
		2523	struct gl_program_parameter_list *params)
		2524	{
		2525	/* After adding each uniform to the parameter list, connect the storage for
		2526	* the parameter with the tracking structure used by the API for the
		2527	* uniform.
		2528	*/
		2529	unsigned last_location = unsigned(~0);
		2530	for (unsigned i = 0; i < params->NumParameters; i++) {
		2531	if (params->Parameters[i].Type != PROGRAM_UNIFORM)
		2532	continue;
		2533
		2534	unsigned location;
		2535	const bool found =
		2536	shader_program->UniformHash->get(location, params->Parameters[i].Name);
		2537	assert(found);
		2538
		2539	if (!found)
		2540	continue;
		2541
		2542	if (location != last_location) {
		2543	struct gl_uniform_storage *storage =
		2544	&shader_program->UniformStorage[location];
		2545	enum gl_uniform_driver_format format = uniform_native;
		2546
		2547	unsigned columns = 0;
		2548	switch (storage->type->base_type) {
		2549	case GLSL_TYPE_UINT:
		2550	assert(ctx->Const.NativeIntegers);
		2551	format = uniform_native;
		2552	columns = 1;
		2553	break;
		2554	case GLSL_TYPE_INT:
		2555	format =
		2556	(ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
		2557	columns = 1;
		2558	break;
		2559	case GLSL_TYPE_FLOAT:
		2560	format = uniform_native;
		2561	columns = storage->type->matrix_columns;
		2562	break;
		2563	case GLSL_TYPE_BOOL:
		2564	if (ctx->Const.NativeIntegers) {
		2565	format = (ctx->Const.UniformBooleanTrue == 1)
		2566	? uniform_bool_int_0_1 : uniform_bool_int_0_not0;
		2567	} else {
		2568	format = uniform_bool_float;
		2569	}
		2570	columns = 1;
		2571	break;
		2572	case GLSL_TYPE_SAMPLER:
		2573	format = uniform_native;
		2574	columns = 1;
		2575	break;
		2576	case GLSL_TYPE_ARRAY:
		2577	case GLSL_TYPE_VOID:
		2578	case GLSL_TYPE_STRUCT:
		2579	case GLSL_TYPE_ERROR:
		2580	case GLSL_TYPE_INTERFACE:
		2581	assert(!"Should not get here.");
		2582	break;
		2583	}
		2584
		2585	_mesa_uniform_attach_driver_storage(storage,
		2586	4 * sizeof(float) * columns,
		2587	4 * sizeof(float),
		2588	format,
		2589	¶ms->ParameterValues[i]);
		2590
		2591	/* After attaching the driver's storage to the uniform, propagate any
		2592	* data from the linker's backing store. This will cause values from
		2593	* initializers in the source code to be copied over.
		2594	*/
		2595	_mesa_propagate_uniforms_to_driver_storage(storage,
		2596	0,
		2597	MAX2(1, storage->array_elements));
		2598
		2599	last_location = location;
		2600	}
		2601	}
		2602	}
		2603
		2604	/*
		2605	* On a basic block basis, tracks available PROGRAM_TEMPORARY register
		2606	* channels for copy propagation and updates following instructions to
		2607	* use the original versions.
		2608	*
		2609	* The ir_to_mesa_visitor lazily produces code assuming that this pass
		2610	* will occur. As an example, a TXP production before this pass:
		2611	*
		2612	* 0: MOV TEMP[1], INPUT[4].xyyy;
		2613	* 1: MOV TEMP[1].w, INPUT[4].wwww;
		2614	* 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
		2615	*
		2616	* and after:
		2617	*
		2618	* 0: MOV TEMP[1], INPUT[4].xyyy;
		2619	* 1: MOV TEMP[1].w, INPUT[4].wwww;
		2620	* 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
		2621	*
		2622	* which allows for dead code elimination on TEMP[1]'s writes.
		2623	*/
		2624	void
		2625	ir_to_mesa_visitor::copy_propagate(void)
		2626	{
		2627	ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
		2628	ir_to_mesa_instruction *,
		2629	this->next_temp * 4);
		2630	int acp_level = rzalloc_array(mem_ctx, int, this->next_temp 4);
		2631	int level = 0;
		2632
		2633	foreach_iter(exec_list_iterator, iter, this->instructions) {
		2634	ir_to_mesa_instruction inst = (ir_to_mesa_instruction )iter.get();
		2635
		2636	assert(inst->dst.file != PROGRAM_TEMPORARY
		2637	\|\| inst->dst.index < this->next_temp);
		2638
		2639	/* First, do any copy propagation possible into the src regs. */
		2640	for (int r = 0; r < 3; r++) {
		2641	ir_to_mesa_instruction *first = NULL;
		2642	bool good = true;
		2643	int acp_base = inst->src[r].index * 4;
		2644
		2645	if (inst->src[r].file != PROGRAM_TEMPORARY \|\|
		2646	inst->src[r].reladdr)
		2647	continue;
		2648
		2649	/* See if we can find entries in the ACP consisting of MOVs
		2650	* from the same src register for all the swizzled channels
		2651	* of this src register reference.
		2652	*/
		2653	for (int i = 0; i < 4; i++) {
		2654	int src_chan = GET_SWZ(inst->src[r].swizzle, i);
		2655	ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
		2656
		2657	if (!copy_chan) {
		2658	good = false;
		2659	break;
		2660	}
		2661
		2662	assert(acp_level[acp_base + src_chan] <= level);
		2663
		2664	if (!first) {
		2665	first = copy_chan;
		2666	} else {
		2667	if (first->src[0].file != copy_chan->src[0].file \|\|
		2668	first->src[0].index != copy_chan->src[0].index) {
		2669	good = false;
		2670	break;
		2671	}
		2672	}
		2673	}
		2674
		2675	if (good) {
		2676	/* We've now validated that we can copy-propagate to
		2677	* replace this src register reference. Do it.
		2678	*/
		2679	inst->src[r].file = first->src[0].file;
		2680	inst->src[r].index = first->src[0].index;
		2681
		2682	int swizzle = 0;
		2683	for (int i = 0; i < 4; i++) {
		2684	int src_chan = GET_SWZ(inst->src[r].swizzle, i);
		2685	ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
		2686	swizzle \|= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
		2687	(3 * i));
		2688	}
		2689	inst->src[r].swizzle = swizzle;
		2690	}
		2691	}
		2692
		2693	switch (inst->op) {
		2694	case OPCODE_BGNLOOP:
		2695	case OPCODE_ENDLOOP:
		2696	/* End of a basic block, clear the ACP entirely. */
		2697	memset(acp, 0, sizeof(acp) this->next_temp * 4);
		2698	break;
		2699
		2700	case OPCODE_IF:
		2701	++level;
		2702	break;
		2703
		2704	case OPCODE_ENDIF:
		2705	case OPCODE_ELSE:
		2706	/* Clear all channels written inside the block from the ACP, but
		2707	* leaving those that were not touched.
		2708	*/
		2709	for (int r = 0; r < this->next_temp; r++) {
		2710	for (int c = 0; c < 4; c++) {
		2711	if (!acp[4 * r + c])
		2712	continue;
		2713
		2714	if (acp_level[4 * r + c] >= level)
		2715	acp[4 * r + c] = NULL;
		2716	}
		2717	}
		2718	if (inst->op == OPCODE_ENDIF)
		2719	--level;
		2720	break;
		2721
		2722	default:
		2723	/* Continuing the block, clear any written channels from
		2724	* the ACP.
		2725	*/
		2726	if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
		2727	/* Any temporary might be written, so no copy propagation
		2728	* across this instruction.
		2729	*/
		2730	memset(acp, 0, sizeof(acp) this->next_temp * 4);
		2731	} else if (inst->dst.file == PROGRAM_OUTPUT &&
		2732	inst->dst.reladdr) {
		2733	/* Any output might be written, so no copy propagation
		2734	* from outputs across this instruction.
		2735	*/
		2736	for (int r = 0; r < this->next_temp; r++) {
		2737	for (int c = 0; c < 4; c++) {
		2738	if (!acp[4 * r + c])
		2739	continue;
		2740
		2741	if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
		2742	acp[4 * r + c] = NULL;
		2743	}
		2744	}
		2745	} else if (inst->dst.file == PROGRAM_TEMPORARY \|\|
		2746	inst->dst.file == PROGRAM_OUTPUT) {
		2747	/* Clear where it's used as dst. */
		2748	if (inst->dst.file == PROGRAM_TEMPORARY) {
		2749	for (int c = 0; c < 4; c++) {
		2750	if (inst->dst.writemask & (1 << c)) {
		2751	acp[4 * inst->dst.index + c] = NULL;
		2752	}
		2753	}
		2754	}
		2755
		2756	/* Clear where it's used as src. */
		2757	for (int r = 0; r < this->next_temp; r++) {
		2758	for (int c = 0; c < 4; c++) {
		2759	if (!acp[4 * r + c])
		2760	continue;
		2761
		2762	int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
		2763
		2764	if (acp[4 * r + c]->src[0].file == inst->dst.file &&
		2765	acp[4 * r + c]->src[0].index == inst->dst.index &&
		2766	inst->dst.writemask & (1 << src_chan))
		2767	{
		2768	acp[4 * r + c] = NULL;
		2769	}
		2770	}
		2771	}
		2772	}
		2773	break;
		2774	}
		2775
		2776	/* If this is a copy, add it to the ACP. */
		2777	if (inst->op == OPCODE_MOV &&
		2778	inst->dst.file == PROGRAM_TEMPORARY &&
		2779	!(inst->dst.file == inst->src[0].file &&
		2780	inst->dst.index == inst->src[0].index) &&
		2781	!inst->dst.reladdr &&
		2782	!inst->saturate &&
		2783	!inst->src[0].reladdr &&
		2784	!inst->src[0].negate) {
		2785	for (int i = 0; i < 4; i++) {
		2786	if (inst->dst.writemask & (1 << i)) {
		2787	acp[4 * inst->dst.index + i] = inst;
		2788	acp_level[4 * inst->dst.index + i] = level;
		2789	}
		2790	}
		2791	}
		2792	}
		2793
		2794	ralloc_free(acp_level);
		2795	ralloc_free(acp);
		2796	}
		2797
		2798
		2799	/**
		2800	* Convert a shader's GLSL IR into a Mesa gl_program.
		2801	*/
		2802	static struct gl_program *
		2803	get_mesa_program(struct gl_context *ctx,
		2804	struct gl_shader_program *shader_program,
		2805	struct gl_shader *shader)
		2806	{
		2807	ir_to_mesa_visitor v;
		2808	struct prog_instruction mesa_instructions, mesa_inst;
		2809	ir_instruction **mesa_instruction_annotation;
		2810	int i;
		2811	struct gl_program *prog;
		2812	GLenum target;
		2813	const char *target_string = _mesa_glsl_shader_target_name(shader->Type);
		2814	struct gl_shader_compiler_options *options =
		2815	&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
		2816
		2817	switch (shader->Type) {
		2818	case GL_VERTEX_SHADER:
		2819	target = GL_VERTEX_PROGRAM_ARB;
		2820	break;
		2821	case GL_FRAGMENT_SHADER:
		2822	target = GL_FRAGMENT_PROGRAM_ARB;
		2823	break;
		2824	case GL_GEOMETRY_SHADER:
		2825	target = GL_GEOMETRY_PROGRAM_NV;
		2826	break;
		2827	default:
		2828	assert(!"should not be reached");
		2829	return NULL;
		2830	}
		2831
		2832	validate_ir_tree(shader->ir);
		2833
		2834	prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
		2835	if (!prog)
		2836	return NULL;
		2837	prog->Parameters = _mesa_new_parameter_list();
		2838	v.ctx = ctx;
		2839	v.prog = prog;
		2840	v.shader_program = shader_program;
		2841	v.options = options;
		2842
		2843	_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
		2844	prog->Parameters);
		2845
		2846	/* Emit Mesa IR for main(). */
		2847	visit_exec_list(shader->ir, &v);
		2848	v.emit(NULL, OPCODE_END);
		2849
		2850	prog->NumTemporaries = v.next_temp;
		2851
		2852	int num_instructions = 0;
		2853	foreach_iter(exec_list_iterator, iter, v.instructions) {
		2854	num_instructions++;
		2855	}
		2856
		2857	mesa_instructions =
		2858	(struct prog_instruction *)calloc(num_instructions,
		2859	sizeof(*mesa_instructions));
		2860	mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
		2861	num_instructions);
		2862
		2863	v.copy_propagate();
		2864
		2865	/* Convert ir_mesa_instructions into prog_instructions.
		2866	*/
		2867	mesa_inst = mesa_instructions;
		2868	i = 0;
		2869	foreach_iter(exec_list_iterator, iter, v.instructions) {
		2870	const ir_to_mesa_instruction inst = (ir_to_mesa_instruction )iter.get();
		2871
		2872	mesa_inst->Opcode = inst->op;
		2873	mesa_inst->CondUpdate = inst->cond_update;
		2874	if (inst->saturate)
		2875	mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
		2876	mesa_inst->DstReg.File = inst->dst.file;
		2877	mesa_inst->DstReg.Index = inst->dst.index;
		2878	mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
		2879	mesa_inst->DstReg.WriteMask = inst->dst.writemask;
		2880	mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
		2881	mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
		2882	mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
		2883	mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
		2884	mesa_inst->TexSrcUnit = inst->sampler;
		2885	mesa_inst->TexSrcTarget = inst->tex_target;
		2886	mesa_inst->TexShadow = inst->tex_shadow;
		2887	mesa_instruction_annotation[i] = inst->ir;
		2888
		2889	/* Set IndirectRegisterFiles. */
		2890	if (mesa_inst->DstReg.RelAddr)
		2891	prog->IndirectRegisterFiles \|= 1 << mesa_inst->DstReg.File;
		2892
		2893	/* Update program's bitmask of indirectly accessed register files */
		2894	for (unsigned src = 0; src < 3; src++)
		2895	if (mesa_inst->SrcReg[src].RelAddr)
		2896	prog->IndirectRegisterFiles \|= 1 << mesa_inst->SrcReg[src].File;
		2897
		2898	switch (mesa_inst->Opcode) {
		2899	case OPCODE_IF:
		2900	if (options->MaxIfDepth == 0) {
		2901	linker_warning(shader_program,
		2902	"Couldn't flatten if-statement. "
		2903	"This will likely result in software "
		2904	"rasterization.\n");
		2905	}
		2906	break;
		2907	case OPCODE_BGNLOOP:
		2908	if (options->EmitNoLoops) {
		2909	linker_warning(shader_program,
		2910	"Couldn't unroll loop. "
		2911	"This will likely result in software "
		2912	"rasterization.\n");
		2913	}
		2914	break;
		2915	case OPCODE_CONT:
		2916	if (options->EmitNoCont) {
		2917	linker_warning(shader_program,
		2918	"Couldn't lower continue-statement. "
		2919	"This will likely result in software "
		2920	"rasterization.\n");
		2921	}
		2922	break;
		2923	case OPCODE_ARL:
		2924	prog->NumAddressRegs = 1;
		2925	break;
		2926	default:
		2927	break;
		2928	}
		2929
		2930	mesa_inst++;
		2931	i++;
		2932
		2933	if (!shader_program->LinkStatus)
		2934	break;
		2935	}
		2936
		2937	if (!shader_program->LinkStatus) {
		2938	goto fail_exit;
		2939	}
		2940
		2941	set_branchtargets(&v, mesa_instructions, num_instructions);
		2942
		2943	if (ctx->Shader.Flags & GLSL_DUMP) {
		2944	printf("\n");
		2945	printf("GLSL IR for linked %s program %d:\n", target_string,
		2946	shader_program->Name);
		2947	_mesa_print_ir(shader->ir, NULL);
		2948	printf("\n");
		2949	printf("\n");
		2950	printf("Mesa IR for linked %s program %d:\n", target_string,
		2951	shader_program->Name);
		2952	print_program(mesa_instructions, mesa_instruction_annotation,
		2953	num_instructions);
		2954	}
		2955
		2956	prog->Instructions = mesa_instructions;
		2957	prog->NumInstructions = num_instructions;
		2958
		2959	/* Setting this to NULL prevents a possible double free in the fail_exit
		2960	* path (far below).
		2961	*/
		2962	mesa_instructions = NULL;
		2963
		2964	do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
		2965
		2966	prog->SamplersUsed = shader->active_samplers;
		2967	prog->ShadowSamplers = shader->shadow_samplers;
		2968	_mesa_update_shader_textures_used(shader_program, prog);
		2969
		2970	/* Set the gl_FragDepth layout. */
		2971	if (target == GL_FRAGMENT_PROGRAM_ARB) {
		2972	struct gl_fragment_program fp = (struct gl_fragment_program )prog;
		2973	fp->FragDepthLayout = shader_program->FragDepthLayout;
		2974	}
		2975
		2976	_mesa_reference_program(ctx, &shader->Program, prog);
		2977
		2978	if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
		2979	_mesa_optimize_program(ctx, prog);
		2980	}
		2981
		2982	/* This has to be done last. Any operation that can cause
		2983	* prog->ParameterValues to get reallocated (e.g., anything that adds a
		2984	* program constant) has to happen before creating this linkage.
		2985	*/
		2986	_mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
		2987	if (!shader_program->LinkStatus) {
		2988	goto fail_exit;
		2989	}
		2990
		2991	return prog;
		2992
		2993	fail_exit:
		2994	free(mesa_instructions);
		2995	_mesa_reference_program(ctx, &shader->Program, NULL);
		2996	return NULL;
		2997	}
		2998
		2999	extern "C" {
		3000
		3001	/**
		3002	* Link a shader.
		3003	* Called via ctx->Driver.LinkShader()
		3004	* This actually involves converting GLSL IR into Mesa gl_programs with
		3005	* code lowering and other optimizations.
		3006	*/
		3007	GLboolean
		3008	_mesa_ir_link_shader(struct gl_context ctx, struct gl_shader_program prog)
		3009	{
		3010	assert(prog->LinkStatus);
		3011
		3012	for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
		3013	if (prog->_LinkedShaders[i] == NULL)
		3014	continue;
		3015
		3016	bool progress;
		3017	exec_list *ir = prog->_LinkedShaders[i]->ir;
		3018	const struct gl_shader_compiler_options *options =
		3019	&ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
		3020
		3021	do {
		3022	progress = false;
		3023
		3024	/* Lowering */
		3025	do_mat_op_to_vec(ir);
		3026	lower_instructions(ir, (MOD_TO_FRACT \| DIV_TO_MUL_RCP \| EXP_TO_EXP2
		3027	\| LOG_TO_LOG2 \| INT_DIV_TO_MUL_RCP
		3028	\| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
		3029
		3030	progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) \|\| progress;
		3031
		3032	progress = do_common_optimization(ir, true, true,
		3033	options->MaxUnrollIterations,
		3034	options)
		3035	\|\| progress;
		3036
		3037	progress = lower_quadop_vector(ir, true) \|\| progress;
		3038
		3039	if (options->MaxIfDepth == 0)
		3040	progress = lower_discard(ir) \|\| progress;
		3041
		3042	progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) \|\| progress;
		3043
		3044	if (options->EmitNoNoise)
		3045	progress = lower_noise(ir) \|\| progress;
		3046
		3047	/* If there are forms of indirect addressing that the driver
		3048	* cannot handle, perform the lowering pass.
		3049	*/
		3050	if (options->EmitNoIndirectInput \|\| options->EmitNoIndirectOutput
		3051	\|\| options->EmitNoIndirectTemp \|\| options->EmitNoIndirectUniform)
		3052	progress =
		3053	lower_variable_index_to_cond_assign(ir,
		3054	options->EmitNoIndirectInput,
		3055	options->EmitNoIndirectOutput,
		3056	options->EmitNoIndirectTemp,
		3057	options->EmitNoIndirectUniform)
		3058	\|\| progress;
		3059
		3060	progress = do_vec_index_to_cond_assign(ir) \|\| progress;
		3061	progress = lower_vector_insert(ir, true) \|\| progress;
		3062	} while (progress);
		3063
		3064	validate_ir_tree(ir);
		3065	}
		3066
		3067	for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
		3068	struct gl_program *linked_prog;
		3069
		3070	if (prog->_LinkedShaders[i] == NULL)
		3071	continue;
		3072
		3073	linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
		3074
		3075	if (linked_prog) {
		3076	if (i == MESA_SHADER_VERTEX) {
		3077	((struct gl_vertex_program *)linked_prog)->UsesClipDistance
		3078	= prog->Vert.UsesClipDistance;
		3079	}
		3080
		3081	_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
		3082	linked_prog);
		3083	if (!ctx->Driver.ProgramStringNotify(ctx,
		3084	_mesa_program_index_to_target(i),
		3085	linked_prog)) {
		3086	return GL_FALSE;
		3087	}
		3088	}
		3089
		3090	_mesa_reference_program(ctx, &linked_prog, NULL);
		3091	}
		3092
		3093	return prog->LinkStatus;
		3094	}
		3095
		3096	/**
		3097	* Link a GLSL shader program. Called via glLinkProgram().
		3098	*/
		3099	void
		3100	_mesa_glsl_link_shader(struct gl_context ctx, struct gl_shader_program prog)
		3101	{
		3102	unsigned int i;
		3103
		3104	_mesa_clear_shader_program_data(ctx, prog);
		3105
		3106	prog->LinkStatus = GL_TRUE;
		3107
		3108	for (i = 0; i < prog->NumShaders; i++) {
		3109	if (!prog->Shaders[i]->CompileStatus) {
		3110	linker_error(prog, "linking with uncompiled shader");
		3111	prog->LinkStatus = GL_FALSE;
		3112	}
		3113	}
		3114
		3115	if (prog->LinkStatus) {
		3116	link_shaders(ctx, prog);
		3117	}
		3118
		3119	if (prog->LinkStatus) {
		3120	if (!ctx->Driver.LinkShader(ctx, prog)) {
		3121	prog->LinkStatus = GL_FALSE;
		3122	}
		3123	}
		3124
		3125	if (ctx->Shader.Flags & GLSL_DUMP) {
		3126	if (!prog->LinkStatus) {
		3127	printf("GLSL shader program %d failed to link\n", prog->Name);
		3128	}
		3129
		3130	if (prog->InfoLog && prog->InfoLog[0] != 0) {
		3131	printf("GLSL shader program %d info log:\n", prog->Name);
		3132	printf("%s\n", prog->InfoLog);
		3133	}
		3134	}
		3135	}
		3136
		3137	} /* extern "C" */

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/program/ir_to_mesa.cpp – Rev 4826