WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/nir/nir_opt_gcm.c

Rev	Author	Line No.	Line
5564	serge	1	/*
		2	* Copyright © 2014 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		21	* IN THE SOFTWARE.
		22	*
		23	* Authors:
		24	* Jason Ekstrand (jason@jlekstrand.net)
		25	*
		26	*/
		27
		28	#include "nir.h"
		29
		30	/*
		31	* Implements Global Code Motion. A description of GCM can be found in
		32	* "Global Code Motion; Global Value Numbering" by Cliff Click.
		33	* Unfortunately, the algorithm presented in the paper is broken in a
		34	* number of ways. The algorithm used here differs substantially from the
		35	* one in the paper but it is, in my opinion, much easier to read and
		36	* verify correcness.
		37	*/
		38
		39	struct gcm_block_info {
		40	/* Number of loops this block is inside */
		41	unsigned loop_depth;
		42
		43	/* The last instruction inserted into this block. This is used as we
		44	* traverse the instructions and insert them back into the program to
		45	* put them in the right order.
		46	*/
		47	nir_instr *last_instr;
		48	};
		49
		50	/* Flags used in the instr->pass_flags field for various instruction states */
		51	enum {
		52	GCM_INSTR_PINNED = (1 << 0),
		53	GCM_INSTR_SCHEDULED_EARLY = (1 << 1),
		54	GCM_INSTR_SCHEDULED_LATE = (1 << 2),
		55	GCM_INSTR_PLACED = (1 << 3),
		56	};
		57
		58	struct gcm_state {
		59	nir_function_impl *impl;
		60	nir_instr *instr;
		61
		62	/* The list of non-pinned instructions. As we do the late scheduling,
		63	* we pull non-pinned instructions out of their blocks and place them in
		64	* this list. This saves us from having linked-list problems when we go
		65	* to put instructions back in their blocks.
		66	*/
		67	struct exec_list instrs;
		68
		69	struct gcm_block_info *blocks;
		70	};
		71
		72	/* Recursively walks the CFG and builds the block_info structure */
		73	static void
		74	gcm_build_block_info(struct exec_list cf_list, struct gcm_state state,
		75	unsigned loop_depth)
		76	{
		77	foreach_list_typed(nir_cf_node, node, node, cf_list) {
		78	switch (node->type) {
		79	case nir_cf_node_block: {
		80	nir_block *block = nir_cf_node_as_block(node);
		81	state->blocks[block->index].loop_depth = loop_depth;
		82	break;
		83	}
		84	case nir_cf_node_if: {
		85	nir_if *if_stmt = nir_cf_node_as_if(node);
		86	gcm_build_block_info(&if_stmt->then_list, state, loop_depth);
		87	gcm_build_block_info(&if_stmt->else_list, state, loop_depth);
		88	break;
		89	}
		90	case nir_cf_node_loop: {
		91	nir_loop *loop = nir_cf_node_as_loop(node);
		92	gcm_build_block_info(&loop->body, state, loop_depth + 1);
		93	break;
		94	}
		95	default:
		96	unreachable("Invalid CF node type");
		97	}
		98	}
		99	}
		100
		101	/* Walks the instruction list and marks immovable instructions as pinned
		102	*
		103	* This function also serves to initialize the instr->pass_flags field.
		104	* After this is completed, all instructions' pass_flags fields will be set
		105	* to either GCM_INSTR_PINNED or 0.
		106	*/
		107	static bool
		108	gcm_pin_instructions_block(nir_block block, void void_state)
		109	{
		110	struct gcm_state *state = void_state;
		111
		112	nir_foreach_instr_safe(block, instr) {
		113	switch (instr->type) {
		114	case nir_instr_type_alu:
		115	switch (nir_instr_as_alu(instr)->op) {
		116	case nir_op_fddx:
		117	case nir_op_fddy:
		118	case nir_op_fddx_fine:
		119	case nir_op_fddy_fine:
		120	case nir_op_fddx_coarse:
		121	case nir_op_fddy_coarse:
		122	/* These can only go in uniform control flow; pin them for now */
		123	instr->pass_flags = GCM_INSTR_PINNED;
		124	break;
		125
		126	default:
		127	instr->pass_flags = 0;
		128	break;
		129	}
		130	break;
		131
		132	case nir_instr_type_tex:
		133	switch (nir_instr_as_tex(instr)->op) {
		134	case nir_texop_tex:
		135	case nir_texop_txb:
		136	case nir_texop_lod:
		137	/* These two take implicit derivatives so they need to be pinned */
		138	instr->pass_flags = GCM_INSTR_PINNED;
		139	break;
		140
		141	default:
		142	instr->pass_flags = 0;
		143	break;
		144	}
		145	break;
		146
		147	case nir_instr_type_load_const:
		148	instr->pass_flags = 0;
		149	break;
		150
		151	case nir_instr_type_intrinsic: {
		152	const nir_intrinsic_info *info =
		153	&nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
		154
		155	if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
		156	(info->flags & NIR_INTRINSIC_CAN_REORDER)) {
		157	instr->pass_flags = 0;
		158	} else {
		159	instr->pass_flags = GCM_INSTR_PINNED;
		160	}
		161	break;
		162	}
		163
		164	case nir_instr_type_jump:
		165	case nir_instr_type_ssa_undef:
		166	case nir_instr_type_phi:
		167	instr->pass_flags = GCM_INSTR_PINNED;
		168	break;
		169
		170	default:
		171	unreachable("Invalid instruction type in GCM");
		172	}
		173
		174	if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
		175	/* If this is an unpinned instruction, go ahead and pull it out of
		176	* the program and put it on the instrs list. This has a couple
		177	* of benifits. First, it makes the scheduling algorithm more
		178	* efficient because we can avoid walking over basic blocks and
		179	* pinned instructions. Second, it keeps us from causing linked
		180	* list confusion when we're trying to put everything in its
		181	* proper place at the end of the pass.
		182	*
		183	* Note that we don't use nir_instr_remove here because that also
		184	* cleans up uses and defs and we want to keep that information.
		185	*/
		186	exec_node_remove(&instr->node);
		187	exec_list_push_tail(&state->instrs, &instr->node);
		188	}
		189	}
		190
		191	return true;
		192	}
		193
		194	static void
		195	gcm_schedule_early_instr(nir_instr instr, struct gcm_state state);
		196
		197	/** Update an instructions schedule for the given source
		198	*
		199	* This function is called iteratively as we walk the sources of an
		200	* instruction. It ensures that the given source instruction has been
		201	* scheduled and then update this instruction's block if the source
		202	* instruction is lower down the tree.
		203	*/
		204	static bool
		205	gcm_schedule_early_src(nir_src src, void void_state)
		206	{
		207	struct gcm_state *state = void_state;
		208	nir_instr *instr = state->instr;
		209
		210	assert(src->is_ssa);
		211
		212	gcm_schedule_early_instr(src->ssa->parent_instr, void_state);
		213
		214	/* While the index isn't a proper dominance depth, it does have the
		215	* property that if A dominates B then A->index <= B->index. Since we
		216	* know that this instruction must have been dominated by all of its
		217	* sources at some point (even if it's gone through value-numbering),
		218	* all of the sources must lie on the same branch of the dominance tree.
		219	* Therefore, we can just go ahead and just compare indices.
		220	*/
		221	if (instr->block->index < src->ssa->parent_instr->block->index)
		222	instr->block = src->ssa->parent_instr->block;
		223
		224	/* We need to restore the state instruction because it may have been
		225	* changed through the gcm_schedule_early_instr call above. Since we
		226	* may still be iterating through sources and future calls to
		227	* gcm_schedule_early_src for the same instruction will still need it.
		228	*/
		229	state->instr = instr;
		230
		231	return true;
		232	}
		233
		234	/** Schedules an instruction early
		235	*
		236	* This function performs a recursive depth-first search starting at the
		237	* given instruction and proceeding through the sources to schedule
		238	* instructions as early as they can possibly go in the dominance tree.
		239	* The instructions are "scheduled" by updating their instr->block field.
		240	*/
		241	static void
		242	gcm_schedule_early_instr(nir_instr instr, struct gcm_state state)
		243	{
		244	if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY)
		245	return;
		246
		247	instr->pass_flags \|= GCM_INSTR_SCHEDULED_EARLY;
		248
		249	/* Pinned instructions are already scheduled so we don't need to do
		250	* anything. Also, bailing here keeps us from ever following the
		251	* sources of phi nodes which can be back-edges.
		252	*/
		253	if (instr->pass_flags & GCM_INSTR_PINNED)
		254	return;
		255
		256	/* Start with the instruction at the top. As we iterate over the
		257	* sources, it will get moved down as needed.
		258	*/
		259	instr->block = state->impl->start_block;
		260	state->instr = instr;
		261
		262	nir_foreach_src(instr, gcm_schedule_early_src, state);
		263	}
		264
		265	static void
		266	gcm_schedule_late_instr(nir_instr instr, struct gcm_state state);
		267
		268	/** Schedules the instruction associated with the given SSA def late
		269	*
		270	* This function works by first walking all of the uses of the given SSA
		271	* definition, ensuring that they are scheduled, and then computing the LCA
		272	* (least common ancestor) of its uses. It then schedules this instruction
		273	* as close to the LCA as possible while trying to stay out of loops.
		274	*/
		275	static bool
		276	gcm_schedule_late_def(nir_ssa_def def, void void_state)
		277	{
		278	struct gcm_state *state = void_state;
		279
		280	nir_block *lca = NULL;
		281
		282	nir_foreach_use(def, use_src) {
		283	nir_instr *use_instr = use_src->parent_instr;
		284
		285	gcm_schedule_late_instr(use_instr, state);
		286
		287	/* Phi instructions are a bit special. SSA definitions don't have to
		288	* dominate the sources of the phi nodes that use them; instead, they
		289	* have to dominate the predecessor block corresponding to the phi
		290	* source. We handle this by looking through the sources, finding
		291	* any that are usingg this SSA def, and using those blocks instead
		292	* of the one the phi lives in.
		293	*/
		294	if (use_instr->type == nir_instr_type_phi) {
		295	nir_phi_instr *phi = nir_instr_as_phi(use_instr);
		296
		297	nir_foreach_phi_src(phi, phi_src) {
		298	if (phi_src->src.ssa == def)
		299	lca = nir_dominance_lca(lca, phi_src->pred);
		300	}
		301	} else {
		302	lca = nir_dominance_lca(lca, use_instr->block);
		303	}
		304	}
		305
		306	nir_foreach_if_use(def, use_src) {
		307	nir_if *if_stmt = use_src->parent_if;
		308
		309	/* For if statements, we consider the block to be the one immediately
		310	* preceding the if CF node.
		311	*/
		312	nir_block *pred_block =
		313	nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
		314
		315	lca = nir_dominance_lca(lca, pred_block);
		316	}
		317
		318	/* Some instructions may never be used. We'll just leave them scheduled
		319	* early and let dead code clean them up.
		320	*/
		321	if (lca == NULL)
		322	return true;
		323
		324	/* We know have the LCA of all of the uses. If our invariants hold,
		325	* this is dominated by the block that we chose when scheduling early.
		326	* We now walk up the dominance tree and pick the lowest block that is
		327	* as far outside loops as we can get.
		328	*/
		329	nir_block *best = lca;
		330	while (lca != def->parent_instr->block) {
		331	assert(lca);
		332	if (state->blocks[lca->index].loop_depth <
		333	state->blocks[best->index].loop_depth)
		334	best = lca;
		335	lca = lca->imm_dom;
		336	}
		337	def->parent_instr->block = best;
		338
		339	return true;
		340	}
		341
		342	/** Schedules an instruction late
		343	*
		344	* This function performs a depth-first search starting at the given
		345	* instruction and proceeding through its uses to schedule instructions as
		346	* late as they can reasonably go in the dominance tree. The instructions
		347	* are "scheduled" by updating their instr->block field.
		348	*
		349	* The name of this function is actually a bit of a misnomer as it doesn't
		350	* schedule them "as late as possible" as the paper implies. Instead, it
		351	* first finds the lates possible place it can schedule the instruction and
		352	* then possibly schedules it earlier than that. The actual location is as
		353	* far down the tree as we can go while trying to stay out of loops.
		354	*/
		355	static void
		356	gcm_schedule_late_instr(nir_instr instr, struct gcm_state state)
		357	{
		358	if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE)
		359	return;
		360
		361	instr->pass_flags \|= GCM_INSTR_SCHEDULED_LATE;
		362
		363	/* Pinned instructions are already scheduled so we don't need to do
		364	* anything. Also, bailing here keeps us from ever following phi nodes
		365	* which can be back-edges.
		366	*/
		367	if (instr->pass_flags & GCM_INSTR_PINNED)
		368	return;
		369
		370	nir_foreach_ssa_def(instr, gcm_schedule_late_def, state);
		371	}
		372
		373	static void
		374	gcm_place_instr(nir_instr instr, struct gcm_state state);
		375
		376	static bool
		377	gcm_place_instr_def(nir_ssa_def def, void state)
		378	{
		379	nir_foreach_use(def, use_src)
		380	gcm_place_instr(use_src->parent_instr, state);
		381
		382	return false;
		383	}
		384
		385	/** Places an instrution back into the program
		386	*
		387	* The earlier passes of GCM simply choose blocks for each instruction and
		388	* otherwise leave them alone. This pass actually places the instructions
		389	* into their chosen blocks.
		390	*
		391	* To do so, we use a standard post-order depth-first search linearization
		392	* algorithm. We walk over the uses of the given instruction and ensure
		393	* that they are placed and then place this instruction. Because we are
		394	* working on multiple blocks at a time, we keep track of the last inserted
		395	* instruction per-block in the state structure's block_info array. When
		396	* we insert an instruction in a block we insert it before the last
		397	* instruction inserted in that block rather than the last instruction
		398	* inserted globally.
		399	*/
		400	static void
		401	gcm_place_instr(nir_instr instr, struct gcm_state state)
		402	{
		403	if (instr->pass_flags & GCM_INSTR_PLACED)
		404	return;
		405
		406	instr->pass_flags \|= GCM_INSTR_PLACED;
		407
		408	/* Phi nodes are our once source of back-edges. Since right now we are
		409	* only doing scheduling within blocks, we don't need to worry about
		410	* them since they are always at the top. Just skip them completely.
		411	*/
		412	if (instr->type == nir_instr_type_phi) {
		413	assert(instr->pass_flags & GCM_INSTR_PINNED);
		414	return;
		415	}
		416
		417	nir_foreach_ssa_def(instr, gcm_place_instr_def, state);
		418
		419	if (instr->pass_flags & GCM_INSTR_PINNED) {
		420	/* Pinned instructions have an implicit dependence on the pinned
		421	* instructions that come after them in the block. Since the pinned
		422	* instructions will naturally "chain" together, we only need to
		423	* explicitly visit one of them.
		424	*/
		425	for (nir_instr *after = nir_instr_next(instr);
		426	after;
		427	after = nir_instr_next(after)) {
		428	if (after->pass_flags & GCM_INSTR_PINNED) {
		429	gcm_place_instr(after, state);
		430	break;
		431	}
		432	}
		433	}
		434
		435	struct gcm_block_info *block_info = &state->blocks[instr->block->index];
		436	if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
		437	exec_node_remove(&instr->node);
		438
		439	if (block_info->last_instr) {
		440	exec_node_insert_node_before(&block_info->last_instr->node,
		441	&instr->node);
		442	} else {
		443	/* Schedule it at the end of the block */
		444	nir_instr *jump_instr = nir_block_last_instr(instr->block);
		445	if (jump_instr && jump_instr->type == nir_instr_type_jump) {
		446	exec_node_insert_node_before(&jump_instr->node, &instr->node);
		447	} else {
		448	exec_list_push_tail(&instr->block->instr_list, &instr->node);
		449	}
		450	}
		451	}
		452
		453	block_info->last_instr = instr;
		454	}
		455
		456	static void
		457	opt_gcm_impl(nir_function_impl *impl)
		458	{
		459	struct gcm_state state;
		460
		461	state.impl = impl;
		462	state.instr = NULL;
		463	exec_list_make_empty(&state.instrs);
		464	state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
		465
		466	nir_metadata_require(impl, nir_metadata_block_index \|
		467	nir_metadata_dominance);
		468
		469	gcm_build_block_info(&impl->body, &state, 0);
		470	nir_foreach_block(impl, gcm_pin_instructions_block, &state);
		471
		472	foreach_list_typed(nir_instr, instr, node, &state.instrs)
		473	gcm_schedule_early_instr(instr, &state);
		474
		475	foreach_list_typed(nir_instr, instr, node, &state.instrs)
		476	gcm_schedule_late_instr(instr, &state);
		477
		478	while (!exec_list_is_empty(&state.instrs)) {
		479	nir_instr *instr = exec_node_data(nir_instr,
		480	state.instrs.tail_pred, node);
		481	gcm_place_instr(instr, &state);
		482	}
		483
		484	ralloc_free(state.blocks);
		485	}
		486
		487	void
		488	nir_opt_gcm(nir_shader *shader)
		489	{
		490	nir_foreach_overload(shader, overload) {
		491	if (overload->impl)
		492	opt_gcm_impl(overload->impl);
		493	}
		494	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/glsl/nir/nir_opt_gcm.c – Rev 5568