WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/lower_packed_varyings.cpp

Rev	Author	Line No.	Line
5564	serge	1	/*
		2	* Copyright © 2011 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		21	* DEALINGS IN THE SOFTWARE.
		22	*/
		23
		24	/**
		25	* \file lower_varyings_to_packed.cpp
		26	*
		27	* This lowering pass generates GLSL code that manually packs varyings into
		28	* vec4 slots, for the benefit of back-ends that don't support packed varyings
		29	* natively.
		30	*
		31	* For example, the following shader:
		32	*
		33	* out mat3x2 foo; // location=4, location_frac=0
		34	* out vec3 bar[2]; // location=5, location_frac=2
		35	*
		36	* main()
		37	* {
		38	* ...
		39	* }
		40	*
		41	* Is rewritten to:
		42	*
		43	* mat3x2 foo;
		44	* vec3 bar[2];
		45	* out vec4 packed4; // location=4, location_frac=0
		46	* out vec4 packed5; // location=5, location_frac=0
		47	* out vec4 packed6; // location=6, location_frac=0
		48	*
		49	* main()
		50	* {
		51	* ...
		52	* packed4.xy = foo[0];
		53	* packed4.zw = foo[1];
		54	* packed5.xy = foo[2];
		55	* packed5.zw = bar[0].xy;
		56	* packed6.x = bar[0].z;
		57	* packed6.yzw = bar[1];
		58	* }
		59	*
		60	* This lowering pass properly handles "double parking" of a varying vector
		61	* across two varying slots. For example, in the code above, two of the
		62	* components of bar[0] are stored in packed5, and the remaining component is
		63	* stored in packed6.
		64	*
		65	* Note that in theory, the extra instructions may cause some loss of
		66	* performance. However, hopefully in most cases the performance loss will
		67	* either be absorbed by a later optimization pass, or it will be offset by
		68	* memory bandwidth savings (because fewer varyings are used).
		69	*
		70	* This lowering pass also packs flat floats, ints, and uints together, by
		71	* using ivec4 as the base type of flat "varyings", and using appropriate
		72	* casts to convert floats and uints into ints.
		73	*
		74	* This lowering pass also handles varyings whose type is a struct or an array
		75	* of struct. Structs are packed in order and with no gaps, so there may be a
		76	* performance penalty due to structure elements being double-parked.
		77	*
		78	* Lowering of geometry shader inputs is slightly more complex, since geometry
		79	* inputs are always arrays, so we need to lower arrays to arrays. For
		80	* example, the following input:
		81	*
		82	* in struct Foo {
		83	* float f;
		84	* vec3 v;
		85	* vec2 a[2];
		86	* } arr[3]; // location=4, location_frac=0
		87	*
		88	* Would get lowered like this if it occurred in a fragment shader:
		89	*
		90	* struct Foo {
		91	* float f;
		92	* vec3 v;
		93	* vec2 a[2];
		94	* } arr[3];
		95	* in vec4 packed4; // location=4, location_frac=0
		96	* in vec4 packed5; // location=5, location_frac=0
		97	* in vec4 packed6; // location=6, location_frac=0
		98	* in vec4 packed7; // location=7, location_frac=0
		99	* in vec4 packed8; // location=8, location_frac=0
		100	* in vec4 packed9; // location=9, location_frac=0
		101	*
		102	* main()
		103	* {
		104	* arr[0].f = packed4.x;
		105	* arr[0].v = packed4.yzw;
		106	* arr[0].a[0] = packed5.xy;
		107	* arr[0].a[1] = packed5.zw;
		108	* arr[1].f = packed6.x;
		109	* arr[1].v = packed6.yzw;
		110	* arr[1].a[0] = packed7.xy;
		111	* arr[1].a[1] = packed7.zw;
		112	* arr[2].f = packed8.x;
		113	* arr[2].v = packed8.yzw;
		114	* arr[2].a[0] = packed9.xy;
		115	* arr[2].a[1] = packed9.zw;
		116	* ...
		117	* }
		118	*
		119	* But it would get lowered like this if it occurred in a geometry shader:
		120	*
		121	* struct Foo {
		122	* float f;
		123	* vec3 v;
		124	* vec2 a[2];
		125	* } arr[3];
		126	* in vec4 packed4[3]; // location=4, location_frac=0
		127	* in vec4 packed5[3]; // location=5, location_frac=0
		128	*
		129	* main()
		130	* {
		131	* arr[0].f = packed4[0].x;
		132	* arr[0].v = packed4[0].yzw;
		133	* arr[0].a[0] = packed5[0].xy;
		134	* arr[0].a[1] = packed5[0].zw;
		135	* arr[1].f = packed4[1].x;
		136	* arr[1].v = packed4[1].yzw;
		137	* arr[1].a[0] = packed5[1].xy;
		138	* arr[1].a[1] = packed5[1].zw;
		139	* arr[2].f = packed4[2].x;
		140	* arr[2].v = packed4[2].yzw;
		141	* arr[2].a[0] = packed5[2].xy;
		142	* arr[2].a[1] = packed5[2].zw;
		143	* ...
		144	* }
		145	*/
		146
		147	#include "glsl_symbol_table.h"
		148	#include "ir.h"
		149	#include "ir_builder.h"
		150	#include "ir_optimization.h"
		151	#include "program/prog_instruction.h"
		152
		153	using namespace ir_builder;
		154
		155	namespace {
		156
		157	/**
		158	* Visitor that performs varying packing. For each varying declared in the
		159	* shader, this visitor determines whether it needs to be packed. If so, it
		160	* demotes it to an ordinary global, creates new packed varyings, and
		161	* generates assignments to convert between the original varying and the
		162	* packed varying.
		163	*/
		164	class lower_packed_varyings_visitor
		165	{
		166	public:
		167	lower_packed_varyings_visitor(void *mem_ctx, unsigned locations_used,
		168	ir_variable_mode mode,
		169	unsigned gs_input_vertices,
		170	exec_list *out_instructions,
		171	exec_list *out_variables);
		172
		173	void run(exec_list *instructions);
		174
		175	private:
		176	void bitwise_assign_pack(ir_rvalue lhs, ir_rvalue rhs);
		177	void bitwise_assign_unpack(ir_rvalue lhs, ir_rvalue rhs);
		178	unsigned lower_rvalue(ir_rvalue *rvalue, unsigned fine_location,
		179	ir_variable unpacked_var, const char name,
		180	bool gs_input_toplevel, unsigned vertex_index);
		181	unsigned lower_arraylike(ir_rvalue *rvalue, unsigned array_size,
		182	unsigned fine_location,
		183	ir_variable unpacked_var, const char name,
		184	bool gs_input_toplevel, unsigned vertex_index);
		185	ir_dereference *get_packed_varying_deref(unsigned location,
		186	ir_variable *unpacked_var,
		187	const char *name,
		188	unsigned vertex_index);
		189	bool needs_lowering(ir_variable *var);
		190
		191	/**
		192	* Memory context used to allocate new instructions for the shader.
		193	*/
		194	void * const mem_ctx;
		195
		196	/**
		197	* Number of generic varying slots which are used by this shader. This is
		198	* used to allocate temporary intermediate data structures. If any varying
		199	* used by this shader has a location greater than or equal to
		200	* VARYING_SLOT_VAR0 + locations_used, an assertion will fire.
		201	*/
		202	const unsigned locations_used;
		203
		204	/**
		205	* Array of pointers to the packed varyings that have been created for each
		206	* generic varying slot. NULL entries in this array indicate varying slots
		207	* for which a packed varying has not been created yet.
		208	*/
		209	ir_variable **packed_varyings;
		210
		211	/**
		212	* Type of varying which is being lowered in this pass (either
		213	* ir_var_shader_in or ir_var_shader_out).
		214	*/
		215	const ir_variable_mode mode;
		216
		217	/**
		218	* If we are currently lowering geometry shader inputs, the number of input
		219	* vertices the geometry shader accepts. Otherwise zero.
		220	*/
		221	const unsigned gs_input_vertices;
		222
		223	/**
		224	* Exec list into which the visitor should insert the packing instructions.
		225	* Caller provides this list; it should insert the instructions into the
		226	* appropriate place in the shader once the visitor has finished running.
		227	*/
		228	exec_list *out_instructions;
		229
		230	/**
		231	* Exec list into which the visitor should insert any new variables.
		232	*/
		233	exec_list *out_variables;
		234	};
		235
		236	} /* anonymous namespace */
		237
		238	lower_packed_varyings_visitor::lower_packed_varyings_visitor(
		239	void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
		240	unsigned gs_input_vertices, exec_list *out_instructions,
		241	exec_list *out_variables)
		242	: mem_ctx(mem_ctx),
		243	locations_used(locations_used),
		244	packed_varyings((ir_variable **)
		245	rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
		246	locations_used)),
		247	mode(mode),
		248	gs_input_vertices(gs_input_vertices),
		249	out_instructions(out_instructions),
		250	out_variables(out_variables)
		251	{
		252	}
		253
		254	void
		255	lower_packed_varyings_visitor::run(exec_list *instructions)
		256	{
		257	foreach_in_list(ir_instruction, node, instructions) {
		258	ir_variable *var = node->as_variable();
		259	if (var == NULL)
		260	continue;
		261
		262	if (var->data.mode != this->mode \|\|
		263	var->data.location < VARYING_SLOT_VAR0 \|\|
		264	!this->needs_lowering(var))
		265	continue;
		266
		267	/* This lowering pass is only capable of packing floats and ints
		268	* together when their interpolation mode is "flat". Therefore, to be
		269	* safe, caller should ensure that integral varyings always use flat
		270	* interpolation, even when this is not required by GLSL.
		271	*/
		272	assert(var->data.interpolation == INTERP_QUALIFIER_FLAT \|\|
		273	!var->type->contains_integer());
		274
		275	/* Change the old varying into an ordinary global. */
		276	assert(var->data.mode != ir_var_temporary);
		277	var->data.mode = ir_var_auto;
		278
		279	/* Create a reference to the old varying. */
		280	ir_dereference_variable *deref
		281	= new(this->mem_ctx) ir_dereference_variable(var);
		282
		283	/* Recursively pack or unpack it. */
		284	this->lower_rvalue(deref, var->data.location * 4 + var->data.location_frac, var,
		285	var->name, this->gs_input_vertices != 0, 0);
		286	}
		287	}
		288
		289	#define SWIZZLE_ZWZW MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W)
		290
		291	/**
		292	* Make an ir_assignment from \c rhs to \c lhs, performing appropriate
		293	* bitcasts if necessary to match up types.
		294	*
		295	* This function is called when packing varyings.
		296	*/
		297	void
		298	lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs,
		299	ir_rvalue *rhs)
		300	{
		301	if (lhs->type->base_type != rhs->type->base_type) {
		302	/* Since we only mix types in flat varyings, and we always store flat
		303	* varyings as type ivec4, we need only produce conversions from (uint
		304	* or float) to int.
		305	*/
		306	assert(lhs->type->base_type == GLSL_TYPE_INT);
		307	switch (rhs->type->base_type) {
		308	case GLSL_TYPE_UINT:
		309	rhs = new(this->mem_ctx)
		310	ir_expression(ir_unop_u2i, lhs->type, rhs);
		311	break;
		312	case GLSL_TYPE_FLOAT:
		313	rhs = new(this->mem_ctx)
		314	ir_expression(ir_unop_bitcast_f2i, lhs->type, rhs);
		315	break;
		316	case GLSL_TYPE_DOUBLE:
		317	assert(rhs->type->vector_elements <= 2);
		318	if (rhs->type->vector_elements == 2) {
		319	ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary);
		320
		321	assert(lhs->type->vector_elements == 4);
		322	this->out_variables->push_tail(t);
		323	this->out_instructions->push_tail(
		324	assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_x(rhs->clone(mem_ctx, NULL)))), 0x3));
		325	this->out_instructions->push_tail(
		326	assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_y(rhs))), 0xc));
		327	rhs = deref(t).val;
		328	} else {
		329	rhs = u2i(expr(ir_unop_unpack_double_2x32, rhs));
		330	}
		331	break;
		332	default:
		333	assert(!"Unexpected type conversion while lowering varyings");
		334	break;
		335	}
		336	}
		337	this->out_instructions->push_tail(new (this->mem_ctx) ir_assignment(lhs, rhs));
		338	}
		339
		340
		341	/**
		342	* Make an ir_assignment from \c rhs to \c lhs, performing appropriate
		343	* bitcasts if necessary to match up types.
		344	*
		345	* This function is called when unpacking varyings.
		346	*/
		347	void
		348	lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs,
		349	ir_rvalue *rhs)
		350	{
		351	if (lhs->type->base_type != rhs->type->base_type) {
		352	/* Since we only mix types in flat varyings, and we always store flat
		353	* varyings as type ivec4, we need only produce conversions from int to
		354	* (uint or float).
		355	*/
		356	assert(rhs->type->base_type == GLSL_TYPE_INT);
		357	switch (lhs->type->base_type) {
		358	case GLSL_TYPE_UINT:
		359	rhs = new(this->mem_ctx)
		360	ir_expression(ir_unop_i2u, lhs->type, rhs);
		361	break;
		362	case GLSL_TYPE_FLOAT:
		363	rhs = new(this->mem_ctx)
		364	ir_expression(ir_unop_bitcast_i2f, lhs->type, rhs);
		365	break;
		366	case GLSL_TYPE_DOUBLE:
		367	assert(lhs->type->vector_elements <= 2);
		368	if (lhs->type->vector_elements == 2) {
		369	ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary);
		370	assert(rhs->type->vector_elements == 4);
		371	this->out_variables->push_tail(t);
		372	this->out_instructions->push_tail(
		373	assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle_xy(rhs->clone(mem_ctx, NULL)))), 0x1));
		374	this->out_instructions->push_tail(
		375	assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2));
		376	rhs = deref(t).val;
		377	} else {
		378	rhs = expr(ir_unop_pack_double_2x32, i2u(rhs));
		379	}
		380	break;
		381	default:
		382	assert(!"Unexpected type conversion while lowering varyings");
		383	break;
		384	}
		385	}
		386	this->out_instructions->push_tail(new(this->mem_ctx) ir_assignment(lhs, rhs));
		387	}
		388
		389
		390	/**
		391	* Recursively pack or unpack the given varying (or portion of a varying) by
		392	* traversing all of its constituent vectors.
		393	*
		394	* \param fine_location is the location where the first constituent vector
		395	* should be packed--the word "fine" indicates that this location is expressed
		396	* in multiples of a float, rather than multiples of a vec4 as is used
		397	* elsewhere in Mesa.
		398	*
		399	* \param gs_input_toplevel should be set to true if we are lowering geometry
		400	* shader inputs, and we are currently lowering the whole input variable
		401	* (i.e. we are lowering the array whose index selects the vertex).
		402	*
		403	* \param vertex_index: if we are lowering geometry shader inputs, and the
		404	* level of the array that we are currently lowering is not the top level,
		405	* then this indicates which vertex we are currently lowering. Otherwise it
		406	* is ignored.
		407	*
		408	* \return the location where the next constituent vector (after this one)
		409	* should be packed.
		410	*/
		411	unsigned
		412	lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
		413	unsigned fine_location,
		414	ir_variable *unpacked_var,
		415	const char *name,
		416	bool gs_input_toplevel,
		417	unsigned vertex_index)
		418	{
		419	unsigned dmul = rvalue->type->is_double() ? 2 : 1;
		420	/* When gs_input_toplevel is set, we should be looking at a geometry shader
		421	* input array.
		422	*/
		423	assert(!gs_input_toplevel \|\| rvalue->type->is_array());
		424
		425	if (rvalue->type->is_record()) {
		426	for (unsigned i = 0; i < rvalue->type->length; i++) {
		427	if (i != 0)
		428	rvalue = rvalue->clone(this->mem_ctx, NULL);
		429	const char *field_name = rvalue->type->fields.structure[i].name;
		430	ir_dereference_record *dereference_record = new(this->mem_ctx)
		431	ir_dereference_record(rvalue, field_name);
		432	char *deref_name
		433	= ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name);
		434	fine_location = this->lower_rvalue(dereference_record, fine_location,
		435	unpacked_var, deref_name, false,
		436	vertex_index);
		437	}
		438	return fine_location;
		439	} else if (rvalue->type->is_array()) {
		440	/* Arrays are packed/unpacked by considering each array element in
		441	* sequence.
		442	*/
		443	return this->lower_arraylike(rvalue, rvalue->type->array_size(),
		444	fine_location, unpacked_var, name,
		445	gs_input_toplevel, vertex_index);
		446	} else if (rvalue->type->is_matrix()) {
		447	/* Matrices are packed/unpacked by considering each column vector in
		448	* sequence.
		449	*/
		450	return this->lower_arraylike(rvalue, rvalue->type->matrix_columns,
		451	fine_location, unpacked_var, name,
		452	false, vertex_index);
		453	} else if (rvalue->type->vector_elements * dmul +
		454	fine_location % 4 > 4) {
		455	/* This vector is going to be "double parked" across two varying slots,
		456	* so handle it as two separate assignments. For doubles, a dvec3/dvec4
		457	* can end up being spread over 3 slots. However the second splitting
		458	* will happen later, here we just always want to split into 2.
		459	*/
		460	unsigned left_components, right_components;
		461	unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
		462	unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
		463	char left_swizzle_name[4] = { 0, 0, 0, 0 };
		464	char right_swizzle_name[4] = { 0, 0, 0, 0 };
		465
		466	left_components = 4 - fine_location % 4;
		467	if (rvalue->type->is_double()) {
		468	/* We might actually end up with 0 left components! */
		469	left_components /= 2;
		470	}
		471	right_components = rvalue->type->vector_elements - left_components;
		472
		473	for (unsigned i = 0; i < left_components; i++) {
		474	left_swizzle_values[i] = i;
		475	left_swizzle_name[i] = "xyzw"[i];
		476	}
		477	for (unsigned i = 0; i < right_components; i++) {
		478	right_swizzle_values[i] = i + left_components;
		479	right_swizzle_name[i] = "xyzw"[i + left_components];
		480	}
		481	ir_swizzle *left_swizzle = new(this->mem_ctx)
		482	ir_swizzle(rvalue, left_swizzle_values, left_components);
		483	ir_swizzle *right_swizzle = new(this->mem_ctx)
		484	ir_swizzle(rvalue->clone(this->mem_ctx, NULL), right_swizzle_values,
		485	right_components);
		486	char *left_name
		487	= ralloc_asprintf(this->mem_ctx, "%s.%s", name, left_swizzle_name);
		488	char *right_name
		489	= ralloc_asprintf(this->mem_ctx, "%s.%s", name, right_swizzle_name);
		490	if (left_components)
		491	fine_location = this->lower_rvalue(left_swizzle, fine_location,
		492	unpacked_var, left_name, false,
		493	vertex_index);
		494	else
		495	/* Top up the fine location to the next slot */
		496	fine_location++;
		497	return this->lower_rvalue(right_swizzle, fine_location, unpacked_var,
		498	right_name, false, vertex_index);
		499	} else {
		500	/* No special handling is necessary; pack the rvalue into the
		501	* varying.
		502	*/
		503	unsigned swizzle_values[4] = { 0, 0, 0, 0 };
		504	unsigned components = rvalue->type->vector_elements * dmul;
		505	unsigned location = fine_location / 4;
		506	unsigned location_frac = fine_location % 4;
		507	for (unsigned i = 0; i < components; ++i)
		508	swizzle_values[i] = i + location_frac;
		509	ir_dereference *packed_deref =
		510	this->get_packed_varying_deref(location, unpacked_var, name,
		511	vertex_index);
		512	ir_swizzle *swizzle = new(this->mem_ctx)
		513	ir_swizzle(packed_deref, swizzle_values, components);
		514	if (this->mode == ir_var_shader_out) {
		515	this->bitwise_assign_pack(swizzle, rvalue);
		516	} else {
		517	this->bitwise_assign_unpack(rvalue, swizzle);
		518	}
		519	return fine_location + components;
		520	}
		521	}
		522
		523	/**
		524	* Recursively pack or unpack a varying for which we need to iterate over its
		525	* constituent elements, accessing each one using an ir_dereference_array.
		526	* This takes care of both arrays and matrices, since ir_dereference_array
		527	* treats a matrix like an array of its column vectors.
		528	*
		529	* \param gs_input_toplevel should be set to true if we are lowering geometry
		530	* shader inputs, and we are currently lowering the whole input variable
		531	* (i.e. we are lowering the array whose index selects the vertex).
		532	*
		533	* \param vertex_index: if we are lowering geometry shader inputs, and the
		534	* level of the array that we are currently lowering is not the top level,
		535	* then this indicates which vertex we are currently lowering. Otherwise it
		536	* is ignored.
		537	*/
		538	unsigned
		539	lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue,
		540	unsigned array_size,
		541	unsigned fine_location,
		542	ir_variable *unpacked_var,
		543	const char *name,
		544	bool gs_input_toplevel,
		545	unsigned vertex_index)
		546	{
		547	for (unsigned i = 0; i < array_size; i++) {
		548	if (i != 0)
		549	rvalue = rvalue->clone(this->mem_ctx, NULL);
		550	ir_constant *constant = new(this->mem_ctx) ir_constant(i);
		551	ir_dereference_array *dereference_array = new(this->mem_ctx)
		552	ir_dereference_array(rvalue, constant);
		553	if (gs_input_toplevel) {
		554	/* Geometry shader inputs are a special case. Instead of storing
		555	* each element of the array at a different location, all elements
		556	* are at the same location, but with a different vertex index.
		557	*/
		558	(void) this->lower_rvalue(dereference_array, fine_location,
		559	unpacked_var, name, false, i);
		560	} else {
		561	char *subscripted_name
		562	= ralloc_asprintf(this->mem_ctx, "%s[%d]", name, i);
		563	fine_location =
		564	this->lower_rvalue(dereference_array, fine_location,
		565	unpacked_var, subscripted_name,
		566	false, vertex_index);
		567	}
		568	}
		569	return fine_location;
		570	}
		571
		572	/**
		573	* Retrieve the packed varying corresponding to the given varying location.
		574	* If no packed varying has been created for the given varying location yet,
		575	* create it and add it to the shader before returning it.
		576	*
		577	* The newly created varying inherits its interpolation parameters from \c
		578	* unpacked_var. Its base type is ivec4 if we are lowering a flat varying,
		579	* vec4 otherwise.
		580	*
		581	* \param vertex_index: if we are lowering geometry shader inputs, then this
		582	* indicates which vertex we are currently lowering. Otherwise it is ignored.
		583	*/
		584	ir_dereference *
		585	lower_packed_varyings_visitor::get_packed_varying_deref(
		586	unsigned location, ir_variable unpacked_var, const char name,
		587	unsigned vertex_index)
		588	{
		589	unsigned slot = location - VARYING_SLOT_VAR0;
		590	assert(slot < locations_used);
		591	if (this->packed_varyings[slot] == NULL) {
		592	char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
		593	const glsl_type *packed_type;
		594	if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT)
		595	packed_type = glsl_type::ivec4_type;
		596	else
		597	packed_type = glsl_type::vec4_type;
		598	if (this->gs_input_vertices != 0) {
		599	packed_type =
		600	glsl_type::get_array_instance(packed_type,
		601	this->gs_input_vertices);
		602	}
		603	ir_variable *packed_var = new(this->mem_ctx)
		604	ir_variable(packed_type, packed_name, this->mode);
		605	if (this->gs_input_vertices != 0) {
		606	/* Prevent update_array_sizes() from messing with the size of the
		607	* array.
		608	*/
		609	packed_var->data.max_array_access = this->gs_input_vertices - 1;
		610	}
		611	packed_var->data.centroid = unpacked_var->data.centroid;
		612	packed_var->data.sample = unpacked_var->data.sample;
		613	packed_var->data.interpolation = unpacked_var->data.interpolation;
		614	packed_var->data.location = location;
		615	unpacked_var->insert_before(packed_var);
		616	this->packed_varyings[slot] = packed_var;
		617	} else {
		618	/* For geometry shader inputs, only update the packed variable name the
		619	* first time we visit each component.
		620	*/
		621	if (this->gs_input_vertices == 0 \|\| vertex_index == 0) {
		622	ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name,
		623	",%s", name);
		624	}
		625	}
		626
		627	ir_dereference *deref = new(this->mem_ctx)
		628	ir_dereference_variable(this->packed_varyings[slot]);
		629	if (this->gs_input_vertices != 0) {
		630	/* When lowering GS inputs, the packed variable is an array, so we need
		631	* to dereference it using vertex_index.
		632	*/
		633	ir_constant *constant = new(this->mem_ctx) ir_constant(vertex_index);
		634	deref = new(this->mem_ctx) ir_dereference_array(deref, constant);
		635	}
		636	return deref;
		637	}
		638
		639	bool
		640	lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
		641	{
		642	/* Things composed of vec4's and varyings with explicitly assigned
		643	* locations don't need lowering. Everything else does.
		644	*/
		645	if (var->data.explicit_location)
		646	return false;
		647
		648	const glsl_type *type = var->type->without_array();
		649	if (type->vector_elements == 4 && !type->is_double())
		650	return false;
		651	return true;
		652	}
		653
		654
		655	/**
		656	* Visitor that splices varying packing code before every use of EmitVertex()
		657	* in a geometry shader.
		658	*/
		659	class lower_packed_varyings_gs_splicer : public ir_hierarchical_visitor
		660	{
		661	public:
		662	explicit lower_packed_varyings_gs_splicer(void *mem_ctx,
		663	const exec_list *instructions);
		664
		665	virtual ir_visitor_status visit_leave(ir_emit_vertex *ev);
		666
		667	private:
		668	/**
		669	* Memory context used to allocate new instructions for the shader.
		670	*/
		671	void * const mem_ctx;
		672
		673	/**
		674	* Instructions that should be spliced into place before each EmitVertex()
		675	* call.
		676	*/
		677	const exec_list *instructions;
		678	};
		679
		680
		681	lower_packed_varyings_gs_splicer::lower_packed_varyings_gs_splicer(
		682	void mem_ctx, const exec_list instructions)
		683	: mem_ctx(mem_ctx), instructions(instructions)
		684	{
		685	}
		686
		687
		688	ir_visitor_status
		689	lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
		690	{
		691	foreach_in_list(ir_instruction, ir, this->instructions) {
		692	ev->insert_before(ir->clone(this->mem_ctx, NULL));
		693	}
		694	return visit_continue;
		695	}
		696
		697
		698	void
		699	lower_packed_varyings(void *mem_ctx, unsigned locations_used,
		700	ir_variable_mode mode, unsigned gs_input_vertices,
		701	gl_shader *shader)
		702	{
		703	exec_list *instructions = shader->ir;
		704	ir_function *main_func = shader->symbols->get_function("main");
		705	exec_list void_parameters;
		706	ir_function_signature *main_func_sig
		707	= main_func->matching_signature(NULL, &void_parameters, false);
		708	exec_list new_instructions, new_variables;
		709	lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
		710	gs_input_vertices,
		711	&new_instructions,
		712	&new_variables);
		713	visitor.run(instructions);
		714	if (mode == ir_var_shader_out) {
		715	if (shader->Stage == MESA_SHADER_GEOMETRY) {
		716	/* For geometry shaders, outputs need to be lowered before each call
		717	* to EmitVertex()
		718	*/
		719	lower_packed_varyings_gs_splicer splicer(mem_ctx, &new_instructions);
		720
		721	/* Add all the variables in first. */
		722	main_func_sig->body.head->insert_before(&new_variables);
		723
		724	/* Now update all the EmitVertex instances */
		725	splicer.run(instructions);
		726	} else {
		727	/* For other shader types, outputs need to be lowered at the end of
		728	* main()
		729	*/
		730	main_func_sig->body.append_list(&new_variables);
		731	main_func_sig->body.append_list(&new_instructions);
		732	}
		733	} else {
		734	/* Shader inputs need to be lowered at the beginning of main() */
		735	main_func_sig->body.head->insert_before(&new_instructions);
		736	main_func_sig->body.head->insert_before(&new_variables);
		737	}
		738	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/lower_packed_varyings.cpp – Rev 5571