WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/nir/nir_lower_phis_to_scalar.c

Rev	Author	Line No.	Line
5564	serge	1	/*
		2	* Copyright © 2015 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		21	* IN THE SOFTWARE.
		22	*
		23	* Authors:
		24	* Jason Ekstrand (jason@jlekstrand.net)
		25	*
		26	*/
		27
		28	#include "nir.h"
		29
		30	/*
		31	* Implements a pass that lowers vector phi nodes to scalar phi nodes when
		32	* we don't think it will hurt anything.
		33	*/
		34
		35	struct lower_phis_to_scalar_state {
		36	void *mem_ctx;
		37	void *dead_ctx;
		38
		39	/* Hash table marking which phi nodes are scalarizable. The key is
		40	* pointers to phi instructions and the entry is either NULL for not
		41	* scalarizable or non-null for scalarizable.
		42	*/
		43	struct hash_table *phi_table;
		44	};
		45
		46	static bool
		47	should_lower_phi(nir_phi_instr phi, struct lower_phis_to_scalar_state state);
		48
		49	static bool
		50	is_phi_src_scalarizable(nir_phi_src *src,
		51	struct lower_phis_to_scalar_state *state)
		52	{
		53	/* Don't know what to do with non-ssa sources */
		54	if (!src->src.is_ssa)
		55	return false;
		56
		57	nir_instr *src_instr = src->src.ssa->parent_instr;
		58	switch (src_instr->type) {
		59	case nir_instr_type_alu: {
		60	nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
		61
		62	/* ALU operations with output_size == 0 should be scalarized. We
		63	* will also see a bunch of vecN operations from scalarizing ALU
		64	* operations and, since they can easily be copy-propagated, they
		65	* are ok too.
		66	*/
		67	return nir_op_infos[src_alu->op].output_size == 0 \|\|
		68	src_alu->op == nir_op_vec2 \|\|
		69	src_alu->op == nir_op_vec3 \|\|
		70	src_alu->op == nir_op_vec4;
		71	}
		72
		73	case nir_instr_type_phi:
		74	/* A phi is scalarizable if we're going to lower it */
		75	return should_lower_phi(nir_instr_as_phi(src_instr), state);
		76
		77	case nir_instr_type_load_const:
		78	/* These are trivially scalarizable */
		79	return true;
		80
		81	case nir_instr_type_intrinsic: {
		82	nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
		83
		84	switch (src_intrin->intrinsic) {
		85	case nir_intrinsic_load_var:
		86	return src_intrin->variables[0]->var->data.mode == nir_var_shader_in \|\|
		87	src_intrin->variables[0]->var->data.mode == nir_var_uniform;
		88
		89	case nir_intrinsic_interp_var_at_centroid:
		90	case nir_intrinsic_interp_var_at_sample:
		91	case nir_intrinsic_interp_var_at_offset:
		92	case nir_intrinsic_load_uniform:
		93	case nir_intrinsic_load_uniform_indirect:
		94	case nir_intrinsic_load_ubo:
		95	case nir_intrinsic_load_ubo_indirect:
		96	case nir_intrinsic_load_input:
		97	case nir_intrinsic_load_input_indirect:
		98	return true;
		99	default:
		100	break;
		101	}
		102	}
		103
		104	default:
		105	/* We can't scalarize this type of instruction */
		106	return false;
		107	}
		108	}
		109
		110	/**
		111	* Determines if the given phi node should be lowered. The only phi nodes
		112	* we will scalarize at the moment are those where all of the sources are
		113	* scalarizable.
		114	*
		115	* The reason for this comes down to coalescing. Since phi sources can't
		116	* swizzle, swizzles on phis have to be resolved by inserting a mov right
		117	* before the phi. The choice then becomes between movs to pick off
		118	* components for a scalar phi or potentially movs to recombine components
		119	* for a vector phi. The problem is that the movs generated to pick off
		120	* the components are almost uncoalescable. We can't coalesce them in NIR
		121	* because we need them to pick off components and we can't coalesce them
		122	* in the backend because the source register is a vector and the
		123	* destination is a scalar that may be used at other places in the program.
		124	* On the other hand, if we have a bunch of scalars going into a vector
		125	* phi, the situation is much better. In this case, if the SSA def is
		126	* generated in the predecessor block to the corresponding phi source, the
		127	* backend code will be an ALU op into a temporary and then a mov into the
		128	* given vector component; this move can almost certainly be coalesced
		129	* away.
		130	*/
		131	static bool
		132	should_lower_phi(nir_phi_instr phi, struct lower_phis_to_scalar_state state)
		133	{
		134	/* Already scalar */
		135	if (phi->dest.ssa.num_components == 1)
		136	return false;
		137
		138	struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi);
		139	if (entry)
		140	return entry->data != NULL;
		141
		142	/* Insert an entry and mark it as scalarizable for now. That way
		143	* we don't recurse forever and a cycle in the dependence graph
		144	* won't automatically make us fail to scalarize.
		145	*/
		146	entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
		147
		148	bool scalarizable = true;
		149
		150	nir_foreach_phi_src(phi, src) {
		151	scalarizable = is_phi_src_scalarizable(src, state);
		152	if (!scalarizable)
		153	break;
		154	}
		155
		156	entry->data = (void *)(intptr_t)scalarizable;
		157
		158	return scalarizable;
		159	}
		160
		161	static bool
		162	lower_phis_to_scalar_block(nir_block block, void void_state)
		163	{
		164	struct lower_phis_to_scalar_state *state = void_state;
		165
		166	/* Find the last phi node in the block */
		167	nir_phi_instr *last_phi = NULL;
		168	nir_foreach_instr(block, instr) {
		169	if (instr->type != nir_instr_type_phi)
		170	break;
		171
		172	last_phi = nir_instr_as_phi(instr);
		173	}
		174
		175	/* We have to handle the phi nodes in their own pass due to the way
		176	* we're modifying the linked list of instructions.
		177	*/
		178	nir_foreach_instr_safe(block, instr) {
		179	if (instr->type != nir_instr_type_phi)
		180	break;
		181
		182	nir_phi_instr *phi = nir_instr_as_phi(instr);
		183
		184	if (!should_lower_phi(phi, state))
		185	continue;
		186
		187	/* Create a vecN operation to combine the results. Most of these
		188	* will be redundant, but copy propagation should clean them up for
		189	* us. No need to add the complexity here.
		190	*/
		191	nir_op vec_op;
		192	switch (phi->dest.ssa.num_components) {
		193	case 2: vec_op = nir_op_vec2; break;
		194	case 3: vec_op = nir_op_vec3; break;
		195	case 4: vec_op = nir_op_vec4; break;
		196	default: unreachable("Invalid number of components");
		197	}
		198
		199	nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
		200	nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
		201	phi->dest.ssa.num_components, NULL);
		202	vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
		203
		204	for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
		205	nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
		206	nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
		207
		208	vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
		209
		210	nir_foreach_phi_src(phi, src) {
		211	/* We need to insert a mov to grab the i'th component of src */
		212	nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
		213	nir_op_imov);
		214	nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
		215	mov->dest.write_mask = 1;
		216	nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
		217	mov->src[0].swizzle[0] = i;
		218
		219	/* Insert at the end of the predecessor but before the jump */
		220	nir_instr *pred_last_instr = nir_block_last_instr(src->pred);
		221	if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump)
		222	nir_instr_insert_before(pred_last_instr, &mov->instr);
		223	else
		224	nir_instr_insert_after_block(src->pred, &mov->instr);
		225
		226	nir_phi_src *new_src = ralloc(new_phi, nir_phi_src);
		227	new_src->pred = src->pred;
		228	new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
		229
		230	exec_list_push_tail(&new_phi->srcs, &new_src->node);
		231	}
		232
		233	nir_instr_insert_before(&phi->instr, &new_phi->instr);
		234	}
		235
		236	nir_instr_insert_after(&last_phi->instr, &vec->instr);
		237
		238	nir_ssa_def_rewrite_uses(&phi->dest.ssa,
		239	nir_src_for_ssa(&vec->dest.dest.ssa),
		240	state->mem_ctx);
		241
		242	ralloc_steal(state->dead_ctx, phi);
		243	nir_instr_remove(&phi->instr);
		244
		245	/* We're using the safe iterator and inserting all the newly
		246	* scalarized phi nodes before their non-scalarized version so that's
		247	* ok. However, we are also inserting vec operations after all of
		248	* the last phi node so once we get here, we can't trust even the
		249	* safe iterator to stop properly. We have to break manually.
		250	*/
		251	if (instr == &last_phi->instr)
		252	break;
		253	}
		254
		255	return true;
		256	}
		257
		258	static void
		259	lower_phis_to_scalar_impl(nir_function_impl *impl)
		260	{
		261	struct lower_phis_to_scalar_state state;
		262
		263	state.mem_ctx = ralloc_parent(impl);
		264	state.dead_ctx = ralloc_context(NULL);
		265	state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
		266	_mesa_key_pointer_equal);
		267
		268	nir_foreach_block(impl, lower_phis_to_scalar_block, &state);
		269
		270	nir_metadata_preserve(impl, nir_metadata_block_index \|
		271	nir_metadata_dominance);
		272
		273	ralloc_free(state.dead_ctx);
		274	}
		275
		276	/** A pass that lowers vector phi nodes to scalar
		277	*
		278	* This pass loops through the blocks and lowers looks for vector phi nodes
		279	* it can lower to scalar phi nodes. Not all phi nodes are lowered. For
		280	* instance, if one of the sources is a non-scalarizable vector, then we
		281	* don't bother lowering because that would generate hard-to-coalesce movs.
		282	*/
		283	void
		284	nir_lower_phis_to_scalar(nir_shader *shader)
		285	{
		286	nir_foreach_overload(shader, overload) {
		287	if (overload->impl)
		288	lower_phis_to_scalar_impl(overload->impl);
		289	}
		290	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/nir/nir_lower_phis_to_scalar.c – Rev 5564