WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/nir/nir_from_ssa.c

Rev	Author	Line No.	Line
5564	serge	1	/*
		2	* Copyright © 2014 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		21	* IN THE SOFTWARE.
		22	*
		23	* Authors:
		24	* Jason Ekstrand (jason@jlekstrand.net)
		25	*
		26	*/
		27
		28	#include "nir.h"
		29	#include "nir_vla.h"
		30
		31	/*
		32	* This file implements an out-of-SSA pass as described in "Revisiting
		33	* Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
		34	* Boissinot et. al.
		35	*/
		36
		37	struct from_ssa_state {
		38	void *mem_ctx;
		39	void *dead_ctx;
		40	struct hash_table *merge_node_table;
		41	nir_instr *instr;
		42	nir_function_impl *impl;
		43	};
		44
		45	/* Returns true if a dominates b */
		46	static bool
		47	ssa_def_dominates(nir_ssa_def a, nir_ssa_def b)
		48	{
		49	if (a->live_index == 0) {
		50	/* SSA undefs always dominate */
		51	return true;
		52	} else if (b->live_index < a->live_index) {
		53	return false;
		54	} else if (a->parent_instr->block == b->parent_instr->block) {
		55	return a->live_index <= b->live_index;
		56	} else {
		57	return nir_block_dominates(a->parent_instr->block,
		58	b->parent_instr->block);
		59	}
		60	}
		61
		62
		63	/* The following data structure, which I have named merge_set is a way of
		64	* representing a set registers of non-interfering registers. This is
		65	* based on the concept of a "dominence forest" presented in "Fast Copy
		66	* Coalescing and Live-Range Identification" by Budimlic et. al. but the
		67	* implementation concept is taken from "Revisiting Out-of-SSA Translation
		68	* for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
		69	*
		70	* Each SSA definition is associated with a merge_node and the association
		71	* is represented by a combination of a hash table and the "def" parameter
		72	* in the merge_node structure. The merge_set stores a linked list of
		73	* merge_node's in dominence order of the ssa definitions. (Since the
		74	* liveness analysis pass indexes the SSA values in dominence order for us,
		75	* this is an easy thing to keep up.) It is assumed that no pair of the
		76	* nodes in a given set interfere. Merging two sets or checking for
		77	* interference can be done in a single linear-time merge-sort walk of the
		78	* two lists of nodes.
		79	*/
		80	struct merge_set;
		81
		82	typedef struct {
		83	struct exec_node node;
		84	struct merge_set *set;
		85	nir_ssa_def *def;
		86	} merge_node;
		87
		88	typedef struct merge_set {
		89	struct exec_list nodes;
		90	unsigned size;
		91	nir_register *reg;
		92	} merge_set;
		93
		94	#if 0
		95	static void
		96	merge_set_dump(merge_set set, FILE fp)
		97	{
		98	nir_ssa_def *dom[set->size];
		99	int dom_idx = -1;
		100
		101	foreach_list_typed(merge_node, node, node, &set->nodes) {
		102	while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def))
		103	dom_idx--;
		104
		105	for (int i = 0; i <= dom_idx; i++)
		106	fprintf(fp, " ");
		107
		108	if (node->def->name)
		109	fprintf(fp, "ssa_%d /* %s */\n", node->def->index, node->def->name);
		110	else
		111	fprintf(fp, "ssa_%d\n", node->def->index);
		112
		113	dom[++dom_idx] = node->def;
		114	}
		115	}
		116	#endif
		117
		118	static merge_node *
		119	get_merge_node(nir_ssa_def def, struct from_ssa_state state)
		120	{
		121	struct hash_entry *entry =
		122	_mesa_hash_table_search(state->merge_node_table, def);
		123	if (entry)
		124	return entry->data;
		125
		126	merge_set *set = ralloc(state->dead_ctx, merge_set);
		127	exec_list_make_empty(&set->nodes);
		128	set->size = 1;
		129	set->reg = NULL;
		130
		131	merge_node *node = ralloc(state->dead_ctx, merge_node);
		132	node->set = set;
		133	node->def = def;
		134	exec_list_push_head(&set->nodes, &node->node);
		135
		136	_mesa_hash_table_insert(state->merge_node_table, def, node);
		137
		138	return node;
		139	}
		140
		141	static bool
		142	merge_nodes_interfere(merge_node a, merge_node b)
		143	{
		144	return nir_ssa_defs_interfere(a->def, b->def);
		145	}
		146
		147	/* Merges b into a */
		148	static merge_set *
		149	merge_merge_sets(merge_set a, merge_set b)
		150	{
		151	struct exec_node *an = exec_list_get_head(&a->nodes);
		152	struct exec_node *bn = exec_list_get_head(&b->nodes);
		153	while (!exec_node_is_tail_sentinel(bn)) {
		154	merge_node *a_node = exec_node_data(merge_node, an, node);
		155	merge_node *b_node = exec_node_data(merge_node, bn, node);
		156
		157	if (exec_node_is_tail_sentinel(an) \|\|
		158	a_node->def->live_index > b_node->def->live_index) {
		159	struct exec_node *next = bn->next;
		160	exec_node_remove(bn);
		161	exec_node_insert_node_before(an, bn);
		162	exec_node_data(merge_node, bn, node)->set = a;
		163	bn = next;
		164	} else {
		165	an = an->next;
		166	}
		167	}
		168
		169	a->size += b->size;
		170	b->size = 0;
		171
		172	return a;
		173	}
		174
		175	/* Checks for any interference between two merge sets
		176	*
		177	* This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
		178	* Translation for Correctness, Code Quality, and Efficiency" by
		179	* Boissinot et. al.
		180	*/
		181	static bool
		182	merge_sets_interfere(merge_set a, merge_set b)
		183	{
		184	NIR_VLA(merge_node *, dom, a->size + b->size);
		185	int dom_idx = -1;
		186
		187	struct exec_node *an = exec_list_get_head(&a->nodes);
		188	struct exec_node *bn = exec_list_get_head(&b->nodes);
		189	while (!exec_node_is_tail_sentinel(an) \|\|
		190	!exec_node_is_tail_sentinel(bn)) {
		191
		192	merge_node *current;
		193	if (exec_node_is_tail_sentinel(an)) {
		194	current = exec_node_data(merge_node, bn, node);
		195	bn = bn->next;
		196	} else if (exec_node_is_tail_sentinel(bn)) {
		197	current = exec_node_data(merge_node, an, node);
		198	an = an->next;
		199	} else {
		200	merge_node *a_node = exec_node_data(merge_node, an, node);
		201	merge_node *b_node = exec_node_data(merge_node, bn, node);
		202
		203	if (a_node->def->live_index <= b_node->def->live_index) {
		204	current = a_node;
		205	an = an->next;
		206	} else {
		207	current = b_node;
		208	bn = bn->next;
		209	}
		210	}
		211
		212	while (dom_idx >= 0 &&
		213	!ssa_def_dominates(dom[dom_idx]->def, current->def))
		214	dom_idx--;
		215
		216	if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx]))
		217	return true;
		218
		219	dom[++dom_idx] = current;
		220	}
		221
		222	return false;
		223	}
		224
		225	static bool
		226	add_parallel_copy_to_end_of_block(nir_block block, void void_state)
		227	{
		228	struct from_ssa_state *state = void_state;
		229
		230	bool need_end_copy = false;
		231	if (block->successors[0]) {
		232	nir_instr *instr = nir_block_first_instr(block->successors[0]);
		233	if (instr && instr->type == nir_instr_type_phi)
		234	need_end_copy = true;
		235	}
		236
		237	if (block->successors[1]) {
		238	nir_instr *instr = nir_block_first_instr(block->successors[1]);
		239	if (instr && instr->type == nir_instr_type_phi)
		240	need_end_copy = true;
		241	}
		242
		243	if (need_end_copy) {
		244	/* If one of our successors has at least one phi node, we need to
		245	* create a parallel copy at the end of the block but before the jump
		246	* (if there is one).
		247	*/
		248	nir_parallel_copy_instr *pcopy =
		249	nir_parallel_copy_instr_create(state->dead_ctx);
		250
		251	nir_instr *last_instr = nir_block_last_instr(block);
		252	if (last_instr && last_instr->type == nir_instr_type_jump) {
		253	nir_instr_insert_before(last_instr, &pcopy->instr);
		254	} else {
		255	nir_instr_insert_after_block(block, &pcopy->instr);
		256	}
		257	}
		258
		259	return true;
		260	}
		261
		262	static nir_parallel_copy_instr *
		263	get_parallel_copy_at_end_of_block(nir_block *block)
		264	{
		265	nir_instr *last_instr = nir_block_last_instr(block);
		266	if (last_instr == NULL)
		267	return NULL;
		268
		269	/* The last instruction may be a jump in which case the parallel copy is
		270	* right before it.
		271	*/
		272	if (last_instr->type == nir_instr_type_jump)
		273	last_instr = nir_instr_prev(last_instr);
		274
		275	if (last_instr && last_instr->type == nir_instr_type_parallel_copy)
		276	return nir_instr_as_parallel_copy(last_instr);
		277	else
		278	return NULL;
		279	}
		280
		281	/** Isolate phi nodes with parallel copies
		282	*
		283	* In order to solve the dependency problems with the sources and
		284	* destinations of phi nodes, we first isolate them by adding parallel
		285	* copies to the beginnings and ends of basic blocks. For every block with
		286	* phi nodes, we add a parallel copy immediately following the last phi
		287	* node that copies the destinations of all of the phi nodes to new SSA
		288	* values. We also add a parallel copy to the end of every block that has
		289	* a successor with phi nodes that, for each phi node in each successor,
		290	* copies the corresponding sorce of the phi node and adjust the phi to
		291	* used the destination of the parallel copy.
		292	*
		293	* In SSA form, each value has exactly one definition. What this does is
		294	* ensure that each value used in a phi also has exactly one use. The
		295	* destinations of phis are only used by the parallel copy immediately
		296	* following the phi nodes and. Thanks to the parallel copy at the end of
		297	* the predecessor block, the sources of phi nodes are are the only use of
		298	* that value. This allows us to immediately assign all the sources and
		299	* destinations of any given phi node to the same register without worrying
		300	* about interference at all. We do coalescing to get rid of the parallel
		301	* copies where possible.
		302	*
		303	* Before this pass can be run, we have to iterate over the blocks with
		304	* add_parallel_copy_to_end_of_block to ensure that the parallel copies at
		305	* the ends of blocks exist. We can create the ones at the beginnings as
		306	* we go, but the ones at the ends of blocks need to be created ahead of
		307	* time because of potential back-edges in the CFG.
		308	*/
		309	static bool
		310	isolate_phi_nodes_block(nir_block block, void void_state)
		311	{
		312	struct from_ssa_state *state = void_state;
		313
		314	nir_instr *last_phi_instr = NULL;
		315	nir_foreach_instr(block, instr) {
		316	/* Phi nodes only ever come at the start of a block */
		317	if (instr->type != nir_instr_type_phi)
		318	break;
		319
		320	last_phi_instr = instr;
		321	}
		322
		323	/* If we don't have any phi's, then there's nothing for us to do. */
		324	if (last_phi_instr == NULL)
		325	return true;
		326
		327	/* If we have phi nodes, we need to create a parallel copy at the
		328	* start of this block but after the phi nodes.
		329	*/
		330	nir_parallel_copy_instr *block_pcopy =
		331	nir_parallel_copy_instr_create(state->dead_ctx);
		332	nir_instr_insert_after(last_phi_instr, &block_pcopy->instr);
		333
		334	nir_foreach_instr(block, instr) {
		335	/* Phi nodes only ever come at the start of a block */
		336	if (instr->type != nir_instr_type_phi)
		337	break;
		338
		339	nir_phi_instr *phi = nir_instr_as_phi(instr);
		340	assert(phi->dest.is_ssa);
		341	nir_foreach_phi_src(phi, src) {
		342	nir_parallel_copy_instr *pcopy =
		343	get_parallel_copy_at_end_of_block(src->pred);
		344	assert(pcopy);
		345
		346	nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
		347	nir_parallel_copy_entry);
		348	nir_ssa_dest_init(&pcopy->instr, &entry->dest,
		349	phi->dest.ssa.num_components, src->src.ssa->name);
		350	exec_list_push_tail(&pcopy->entries, &entry->node);
		351
		352	assert(src->src.is_ssa);
		353	nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src);
		354
		355	nir_instr_rewrite_src(&phi->instr, &src->src,
		356	nir_src_for_ssa(&entry->dest.ssa));
		357	}
		358
		359	nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
		360	nir_parallel_copy_entry);
		361	nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
		362	phi->dest.ssa.num_components, phi->dest.ssa.name);
		363	exec_list_push_tail(&block_pcopy->entries, &entry->node);
		364
		365	nir_ssa_def_rewrite_uses(&phi->dest.ssa,
		366	nir_src_for_ssa(&entry->dest.ssa),
		367	state->mem_ctx);
		368
		369	nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
		370	nir_src_for_ssa(&phi->dest.ssa));
		371	}
		372
		373	return true;
		374	}
		375
		376	static bool
		377	coalesce_phi_nodes_block(nir_block block, void void_state)
		378	{
		379	struct from_ssa_state *state = void_state;
		380
		381	nir_foreach_instr(block, instr) {
		382	/* Phi nodes only ever come at the start of a block */
		383	if (instr->type != nir_instr_type_phi)
		384	break;
		385
		386	nir_phi_instr *phi = nir_instr_as_phi(instr);
		387
		388	assert(phi->dest.is_ssa);
		389	merge_node *dest_node = get_merge_node(&phi->dest.ssa, state);
		390
		391	nir_foreach_phi_src(phi, src) {
		392	assert(src->src.is_ssa);
		393	merge_node *src_node = get_merge_node(src->src.ssa, state);
		394	if (src_node->set != dest_node->set)
		395	merge_merge_sets(dest_node->set, src_node->set);
		396	}
		397	}
		398
		399	return true;
		400	}
		401
		402	static void
		403	aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
		404	struct from_ssa_state *state)
		405	{
		406	nir_foreach_parallel_copy_entry(pcopy, entry) {
		407	if (!entry->src.is_ssa)
		408	continue;
		409
		410	/* Since load_const instructions are SSA only, we can't replace their
		411	* destinations with registers and, therefore, can't coalesce them.
		412	*/
		413	if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const)
		414	continue;
		415
		416	/* Don't try and coalesce these */
		417	if (entry->dest.ssa.num_components != entry->src.ssa->num_components)
		418	continue;
		419
		420	merge_node *src_node = get_merge_node(entry->src.ssa, state);
		421	merge_node *dest_node = get_merge_node(&entry->dest.ssa, state);
		422
		423	if (src_node->set == dest_node->set)
		424	continue;
		425
		426	if (!merge_sets_interfere(src_node->set, dest_node->set))
		427	merge_merge_sets(src_node->set, dest_node->set);
		428	}
		429	}
		430
		431	static bool
		432	aggressive_coalesce_block(nir_block block, void void_state)
		433	{
		434	struct from_ssa_state *state = void_state;
		435
		436	nir_parallel_copy_instr *start_pcopy = NULL;
		437	nir_foreach_instr(block, instr) {
		438	/* Phi nodes only ever come at the start of a block */
		439	if (instr->type != nir_instr_type_phi) {
		440	if (instr->type != nir_instr_type_parallel_copy)
		441	break; /* The parallel copy must be right after the phis */
		442
		443	start_pcopy = nir_instr_as_parallel_copy(instr);
		444
		445	aggressive_coalesce_parallel_copy(start_pcopy, state);
		446
		447	break;
		448	}
		449	}
		450
		451	nir_parallel_copy_instr *end_pcopy =
		452	get_parallel_copy_at_end_of_block(block);
		453
		454	if (end_pcopy && end_pcopy != start_pcopy)
		455	aggressive_coalesce_parallel_copy(end_pcopy, state);
		456
		457	return true;
		458	}
		459
		460	static bool
		461	rewrite_ssa_def(nir_ssa_def def, void void_state)
		462	{
		463	struct from_ssa_state *state = void_state;
		464	nir_register *reg;
		465
		466	struct hash_entry *entry =
		467	_mesa_hash_table_search(state->merge_node_table, def);
		468	if (entry) {
		469	/* In this case, we're part of a phi web. Use the web's register. */
		470	merge_node node = (merge_node )entry->data;
		471
		472	/* If it doesn't have a register yet, create one. Note that all of
		473	* the things in the merge set should be the same so it doesn't
		474	* matter which node's definition we use.
		475	*/
		476	if (node->set->reg == NULL) {
		477	node->set->reg = nir_local_reg_create(state->impl);
		478	node->set->reg->name = def->name;
		479	node->set->reg->num_components = def->num_components;
		480	node->set->reg->num_array_elems = 0;
		481	}
		482
		483	reg = node->set->reg;
		484	} else {
		485	/* We leave load_const SSA values alone. They act as immediates to
		486	* the backend. If it got coalesced into a phi, that's ok.
		487	*/
		488	if (def->parent_instr->type == nir_instr_type_load_const)
		489	return true;
		490
		491	reg = nir_local_reg_create(state->impl);
		492	reg->name = def->name;
		493	reg->num_components = def->num_components;
		494	reg->num_array_elems = 0;
		495
		496	/* This register comes from an SSA definition that is defined and not
		497	* part of a phi-web. Therefore, we know it has a single unique
		498	* definition that dominates all of its uses; we can copy the
		499	* parent_instr from the SSA def safely.
		500	*/
		501	if (def->parent_instr->type != nir_instr_type_ssa_undef)
		502	reg->parent_instr = def->parent_instr;
		503	}
		504
		505	nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
		506	assert(list_empty(&def->uses) && list_empty(&def->if_uses));
		507
		508	if (def->parent_instr->type == nir_instr_type_ssa_undef)
		509	return true;
		510
		511	assert(def->parent_instr->type != nir_instr_type_load_const);
		512
		513	/* At this point we know a priori that this SSA def is part of a
		514	* nir_dest. We can use exec_node_data to get the dest pointer.
		515	*/
		516	nir_dest *dest = exec_node_data(nir_dest, def, ssa);
		517
		518	*dest = nir_dest_for_reg(reg);
		519	dest->reg.parent_instr = state->instr;
		520	list_addtail(&dest->reg.def_link, ®->defs);
		521
		522	return true;
		523	}
		524
		525	/* Resolves ssa definitions to registers. While we're at it, we also
		526	* remove phi nodes and ssa_undef instructions
		527	*/
		528	static bool
		529	resolve_registers_block(nir_block block, void void_state)
		530	{
		531	struct from_ssa_state *state = void_state;
		532
		533	nir_foreach_instr_safe(block, instr) {
		534	state->instr = instr;
		535	nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
		536
		537	if (instr->type == nir_instr_type_ssa_undef \|\|
		538	instr->type == nir_instr_type_phi) {
		539	nir_instr_remove(instr);
		540	ralloc_steal(state->dead_ctx, instr);
		541	}
		542	}
		543	state->instr = NULL;
		544
		545	return true;
		546	}
		547
		548	static void
		549	emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
		550	void *mem_ctx)
		551	{
		552	assert(!dest_src.is_ssa &&
		553	dest_src.reg.indirect == NULL &&
		554	dest_src.reg.base_offset == 0);
		555
		556	if (src.is_ssa)
		557	assert(src.ssa->num_components >= dest_src.reg.reg->num_components);
		558	else
		559	assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
		560
		561	nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
		562	nir_src_copy(&mov->src[0].src, &src, mem_ctx);
		563	mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
		564	mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
		565
		566	nir_instr_insert_before(&pcopy->instr, &mov->instr);
		567	}
		568
		569	/* Resolves a single parallel copy operation into a sequence of mov's
		570	*
		571	* This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
		572	* Correctness, Code Quality, and Efficiency" by Boissinot et. al..
		573	* However, I never got the algorithm to work as written, so this version
		574	* is slightly modified.
		575	*
		576	* The algorithm works by playing this little shell game with the values.
		577	* We start by recording where every source value is and which source value
		578	* each destination value should receive. We then grab any copy whose
		579	* destination is "empty", i.e. not used as a source, and do the following:
		580	* - Find where its source value currently lives
		581	* - Emit the move instruction
		582	* - Set the location of the source value to the destination
		583	* - Mark the location containing the source value
		584	* - Mark the destination as no longer needing to be copied
		585	*
		586	* When we run out of "empty" destinations, we have a cycle and so we
		587	* create a temporary register, copy to that register, and mark the value
		588	* we copied as living in that temporary. Now, the cycle is broken, so we
		589	* can continue with the above steps.
		590	*/
		591	static void
		592	resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
		593	struct from_ssa_state *state)
		594	{
		595	unsigned num_copies = 0;
		596	nir_foreach_parallel_copy_entry(pcopy, entry) {
		597	/* Sources may be SSA */
		598	if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
		599	continue;
		600
		601	num_copies++;
		602	}
		603
		604	if (num_copies == 0) {
		605	/* Hooray, we don't need any copies! */
		606	nir_instr_remove(&pcopy->instr);
		607	return;
		608	}
		609
		610	/* The register/source corresponding to the given index */
		611	NIR_VLA_ZERO(nir_src, values, num_copies * 2);
		612
		613	/* The current location of a given piece of data. We will use -1 for "null" */
		614	NIR_VLA_FILL(int, loc, num_copies * 2, -1);
		615
		616	/* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */
		617	NIR_VLA_FILL(int, pred, num_copies * 2, -1);
		618
		619	/* The destinations we have yet to properly fill */
		620	NIR_VLA(int, to_do, num_copies * 2);
		621	int to_do_idx = -1;
		622
		623	/* Now we set everything up:
		624	* - All values get assigned a temporary index
		625	* - Current locations are set from sources
		626	* - Predicessors are recorded from sources and destinations
		627	*/
		628	int num_vals = 0;
		629	nir_foreach_parallel_copy_entry(pcopy, entry) {
		630	/* Sources may be SSA */
		631	if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
		632	continue;
		633
		634	int src_idx = -1;
		635	for (int i = 0; i < num_vals; ++i) {
		636	if (nir_srcs_equal(values[i], entry->src))
		637	src_idx = i;
		638	}
		639	if (src_idx < 0) {
		640	src_idx = num_vals++;
		641	values[src_idx] = entry->src;
		642	}
		643
		644	nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg);
		645
		646	int dest_idx = -1;
		647	for (int i = 0; i < num_vals; ++i) {
		648	if (nir_srcs_equal(values[i], dest_src)) {
		649	/* Each destination of a parallel copy instruction should be
		650	* unique. A destination may get used as a source, so we still
		651	* have to walk the list. However, the predecessor should not,
		652	* at this point, be set yet, so we should have -1 here.
		653	*/
		654	assert(pred[i] == -1);
		655	dest_idx = i;
		656	}
		657	}
		658	if (dest_idx < 0) {
		659	dest_idx = num_vals++;
		660	values[dest_idx] = dest_src;
		661	}
		662
		663	loc[src_idx] = src_idx;
		664	pred[dest_idx] = src_idx;
		665
		666	to_do[++to_do_idx] = dest_idx;
		667	}
		668
		669	/* Currently empty destinations we can go ahead and fill */
		670	NIR_VLA(int, ready, num_copies * 2);
		671	int ready_idx = -1;
		672
		673	/* Mark the ones that are ready for copying. We know an index is a
		674	* destination if it has a predecessor and it's ready for copying if
		675	* it's not marked as containing data.
		676	*/
		677	for (int i = 0; i < num_vals; i++) {
		678	if (pred[i] != -1 && loc[i] == -1)
		679	ready[++ready_idx] = i;
		680	}
		681
		682	while (to_do_idx >= 0) {
		683	while (ready_idx >= 0) {
		684	int b = ready[ready_idx--];
		685	int a = pred[b];
		686	emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx);
		687
		688	/* If any other copies want a they can find it at b */
		689	loc[a] = b;
		690
		691	/* b has been filled, mark it as not needing to be copied */
		692	pred[b] = -1;
		693
		694	/* If a needs to be filled, it's ready for copying now */
		695	if (pred[a] != -1)
		696	ready[++ready_idx] = a;
		697	}
		698	int b = to_do[to_do_idx--];
		699	if (pred[b] == -1)
		700	continue;
		701
		702	/* If we got here, then we don't have any more trivial copies that we
		703	* can do. We have to break a cycle, so we create a new temporary
		704	* register for that purpose. Normally, if going out of SSA after
		705	* register allocation, you would want to avoid creating temporary
		706	* registers. However, we are going out of SSA before register
		707	* allocation, so we would rather not create extra register
		708	* dependencies for the backend to deal with. If it wants, the
		709	* backend can coalesce the (possibly multiple) temporaries.
		710	*/
		711	assert(num_vals < num_copies * 2);
		712	nir_register *reg = nir_local_reg_create(state->impl);
		713	reg->name = "copy_temp";
		714	reg->num_array_elems = 0;
		715	if (values[b].is_ssa)
		716	reg->num_components = values[b].ssa->num_components;
		717	else
		718	reg->num_components = values[b].reg.reg->num_components;
		719	values[num_vals].is_ssa = false;
		720	values[num_vals].reg.reg = reg;
		721
		722	emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx);
		723	loc[b] = num_vals;
		724	ready[++ready_idx] = b;
		725	num_vals++;
		726	}
		727
		728	nir_instr_remove(&pcopy->instr);
		729	}
		730
		731	/* Resolves the parallel copies in a block. Each block can have at most
		732	* two: One at the beginning, right after all the phi noces, and one at
		733	* the end (or right before the final jump if it exists).
		734	*/
		735	static bool
		736	resolve_parallel_copies_block(nir_block block, void void_state)
		737	{
		738	struct from_ssa_state *state = void_state;
		739
		740	/* At this point, we have removed all of the phi nodes. If a parallel
		741	* copy existed right after the phi nodes in this block, it is now the
		742	* first instruction.
		743	*/
		744	nir_instr *first_instr = nir_block_first_instr(block);
		745	if (first_instr == NULL)
		746	return true; /* Empty, nothing to do. */
		747
		748	if (first_instr->type == nir_instr_type_parallel_copy) {
		749	nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr);
		750
		751	resolve_parallel_copy(pcopy, state);
		752	}
		753
		754	/* It's possible that the above code already cleaned up the end parallel
		755	* copy. However, doing so removed it form the instructions list so we
		756	* won't find it here. Therefore, it's safe to go ahead and just look
		757	* for one and clean it up if it exists.
		758	*/
		759	nir_parallel_copy_instr *end_pcopy =
		760	get_parallel_copy_at_end_of_block(block);
		761	if (end_pcopy)
		762	resolve_parallel_copy(end_pcopy, state);
		763
		764	return true;
		765	}
		766
		767	static void
		768	nir_convert_from_ssa_impl(nir_function_impl *impl)
		769	{
		770	struct from_ssa_state state;
		771
		772	state.mem_ctx = ralloc_parent(impl);
		773	state.dead_ctx = ralloc_context(NULL);
		774	state.impl = impl;
		775	state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
		776	_mesa_key_pointer_equal);
		777
		778	nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
		779	nir_foreach_block(impl, isolate_phi_nodes_block, &state);
		780
		781	/* Mark metadata as dirty before we ask for liveness analysis */
		782	nir_metadata_preserve(impl, nir_metadata_block_index \|
		783	nir_metadata_dominance);
		784
		785	nir_metadata_require(impl, nir_metadata_live_variables \|
		786	nir_metadata_dominance);
		787
		788	nir_foreach_block(impl, coalesce_phi_nodes_block, &state);
		789	nir_foreach_block(impl, aggressive_coalesce_block, &state);
		790
		791	nir_foreach_block(impl, resolve_registers_block, &state);
		792
		793	nir_foreach_block(impl, resolve_parallel_copies_block, &state);
		794
		795	nir_metadata_preserve(impl, nir_metadata_block_index \|
		796	nir_metadata_dominance);
		797
		798	/* Clean up dead instructions and the hash tables */
		799	_mesa_hash_table_destroy(state.merge_node_table, NULL);
		800	ralloc_free(state.dead_ctx);
		801	}
		802
		803	void
		804	nir_convert_from_ssa(nir_shader *shader)
		805	{
		806	nir_foreach_overload(shader, overload) {
		807	if (overload->impl)
		808	nir_convert_from_ssa_impl(overload->impl);
		809	}
		810	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/nir/nir_from_ssa.c – Rev 5564