Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright © 2014 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Jason Ekstrand (jason@jlekstrand.net)
25
 *
26
 */
27
 
28
#include "nir.h"
29
 
30
/*
31
 * Implements Global Code Motion.  A description of GCM can be found in
32
 * "Global Code Motion; Global Value Numbering" by Cliff Click.
33
 * Unfortunately, the algorithm presented in the paper is broken in a
34
 * number of ways.  The algorithm used here differs substantially from the
35
 * one in the paper but it is, in my opinion, much easier to read and
36
 * verify correcness.
37
 */
38
 
39
struct gcm_block_info {
40
   /* Number of loops this block is inside */
41
   unsigned loop_depth;
42
 
43
   /* The last instruction inserted into this block.  This is used as we
44
    * traverse the instructions and insert them back into the program to
45
    * put them in the right order.
46
    */
47
   nir_instr *last_instr;
48
};
49
 
50
/* Flags used in the instr->pass_flags field for various instruction states */
51
enum {
52
   GCM_INSTR_PINNED =            (1 << 0),
53
   GCM_INSTR_SCHEDULED_EARLY =   (1 << 1),
54
   GCM_INSTR_SCHEDULED_LATE =    (1 << 2),
55
   GCM_INSTR_PLACED =            (1 << 3),
56
};
57
 
58
struct gcm_state {
59
   nir_function_impl *impl;
60
   nir_instr *instr;
61
 
62
   /* The list of non-pinned instructions.  As we do the late scheduling,
63
    * we pull non-pinned instructions out of their blocks and place them in
64
    * this list.  This saves us from having linked-list problems when we go
65
    * to put instructions back in their blocks.
66
    */
67
   struct exec_list instrs;
68
 
69
   struct gcm_block_info *blocks;
70
};
71
 
72
/* Recursively walks the CFG and builds the block_info structure */
73
static void
74
gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
75
                     unsigned loop_depth)
76
{
77
   foreach_list_typed(nir_cf_node, node, node, cf_list) {
78
      switch (node->type) {
79
      case nir_cf_node_block: {
80
         nir_block *block = nir_cf_node_as_block(node);
81
         state->blocks[block->index].loop_depth = loop_depth;
82
         break;
83
      }
84
      case nir_cf_node_if: {
85
         nir_if *if_stmt = nir_cf_node_as_if(node);
86
         gcm_build_block_info(&if_stmt->then_list, state, loop_depth);
87
         gcm_build_block_info(&if_stmt->else_list, state, loop_depth);
88
         break;
89
      }
90
      case nir_cf_node_loop: {
91
         nir_loop *loop = nir_cf_node_as_loop(node);
92
         gcm_build_block_info(&loop->body, state, loop_depth + 1);
93
         break;
94
      }
95
      default:
96
         unreachable("Invalid CF node type");
97
      }
98
   }
99
}
100
 
101
/* Walks the instruction list and marks immovable instructions as pinned
102
 *
103
 * This function also serves to initialize the instr->pass_flags field.
104
 * After this is completed, all instructions' pass_flags fields will be set
105
 * to either GCM_INSTR_PINNED or 0.
106
 */
107
static bool
108
gcm_pin_instructions_block(nir_block *block, void *void_state)
109
{
110
   struct gcm_state *state = void_state;
111
 
112
   nir_foreach_instr_safe(block, instr) {
113
      switch (instr->type) {
114
      case nir_instr_type_alu:
115
         switch (nir_instr_as_alu(instr)->op) {
116
         case nir_op_fddx:
117
         case nir_op_fddy:
118
         case nir_op_fddx_fine:
119
         case nir_op_fddy_fine:
120
         case nir_op_fddx_coarse:
121
         case nir_op_fddy_coarse:
122
            /* These can only go in uniform control flow; pin them for now */
123
            instr->pass_flags = GCM_INSTR_PINNED;
124
            break;
125
 
126
         default:
127
            instr->pass_flags = 0;
128
            break;
129
         }
130
         break;
131
 
132
      case nir_instr_type_tex:
133
         switch (nir_instr_as_tex(instr)->op) {
134
         case nir_texop_tex:
135
         case nir_texop_txb:
136
         case nir_texop_lod:
137
            /* These two take implicit derivatives so they need to be pinned */
138
            instr->pass_flags = GCM_INSTR_PINNED;
139
            break;
140
 
141
         default:
142
            instr->pass_flags = 0;
143
            break;
144
         }
145
         break;
146
 
147
      case nir_instr_type_load_const:
148
         instr->pass_flags = 0;
149
         break;
150
 
151
      case nir_instr_type_intrinsic: {
152
         const nir_intrinsic_info *info =
153
            &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
154
 
155
         if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
156
             (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
157
            instr->pass_flags = 0;
158
         } else {
159
            instr->pass_flags = GCM_INSTR_PINNED;
160
         }
161
         break;
162
      }
163
 
164
      case nir_instr_type_jump:
165
      case nir_instr_type_ssa_undef:
166
      case nir_instr_type_phi:
167
         instr->pass_flags = GCM_INSTR_PINNED;
168
         break;
169
 
170
      default:
171
         unreachable("Invalid instruction type in GCM");
172
      }
173
 
174
      if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
175
         /* If this is an unpinned instruction, go ahead and pull it out of
176
          * the program and put it on the instrs list.  This has a couple
177
          * of benifits.  First, it makes the scheduling algorithm more
178
          * efficient because we can avoid walking over basic blocks and
179
          * pinned instructions.  Second, it keeps us from causing linked
180
          * list confusion when we're trying to put everything in its
181
          * proper place at the end of the pass.
182
          *
183
          * Note that we don't use nir_instr_remove here because that also
184
          * cleans up uses and defs and we want to keep that information.
185
          */
186
         exec_node_remove(&instr->node);
187
         exec_list_push_tail(&state->instrs, &instr->node);
188
      }
189
   }
190
 
191
   return true;
192
}
193
 
194
static void
195
gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state);
196
 
197
/** Update an instructions schedule for the given source
198
 *
199
 * This function is called iteratively as we walk the sources of an
200
 * instruction.  It ensures that the given source instruction has been
201
 * scheduled and then update this instruction's block if the source
202
 * instruction is lower down the tree.
203
 */
204
static bool
205
gcm_schedule_early_src(nir_src *src, void *void_state)
206
{
207
   struct gcm_state *state = void_state;
208
   nir_instr *instr = state->instr;
209
 
210
   assert(src->is_ssa);
211
 
212
   gcm_schedule_early_instr(src->ssa->parent_instr, void_state);
213
 
214
   /* While the index isn't a proper dominance depth, it does have the
215
    * property that if A dominates B then A->index <= B->index.  Since we
216
    * know that this instruction must have been dominated by all of its
217
    * sources at some point (even if it's gone through value-numbering),
218
    * all of the sources must lie on the same branch of the dominance tree.
219
    * Therefore, we can just go ahead and just compare indices.
220
    */
221
   if (instr->block->index < src->ssa->parent_instr->block->index)
222
      instr->block = src->ssa->parent_instr->block;
223
 
224
   /* We need to restore the state instruction because it may have been
225
    * changed through the gcm_schedule_early_instr call above.  Since we
226
    * may still be iterating through sources and future calls to
227
    * gcm_schedule_early_src for the same instruction will still need it.
228
    */
229
   state->instr = instr;
230
 
231
   return true;
232
}
233
 
234
/** Schedules an instruction early
235
 *
236
 * This function performs a recursive depth-first search starting at the
237
 * given instruction and proceeding through the sources to schedule
238
 * instructions as early as they can possibly go in the dominance tree.
239
 * The instructions are "scheduled" by updating their instr->block field.
240
 */
241
static void
242
gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state)
243
{
244
   if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY)
245
      return;
246
 
247
   instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY;
248
 
249
   /* Pinned instructions are already scheduled so we don't need to do
250
    * anything.  Also, bailing here keeps us from ever following the
251
    * sources of phi nodes which can be back-edges.
252
    */
253
   if (instr->pass_flags & GCM_INSTR_PINNED)
254
      return;
255
 
256
   /* Start with the instruction at the top.  As we iterate over the
257
    * sources, it will get moved down as needed.
258
    */
259
   instr->block = state->impl->start_block;
260
   state->instr = instr;
261
 
262
   nir_foreach_src(instr, gcm_schedule_early_src, state);
263
}
264
 
265
static void
266
gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state);
267
 
268
/** Schedules the instruction associated with the given SSA def late
269
 *
270
 * This function works by first walking all of the uses of the given SSA
271
 * definition, ensuring that they are scheduled, and then computing the LCA
272
 * (least common ancestor) of its uses.  It then schedules this instruction
273
 * as close to the LCA as possible while trying to stay out of loops.
274
 */
275
static bool
276
gcm_schedule_late_def(nir_ssa_def *def, void *void_state)
277
{
278
   struct gcm_state *state = void_state;
279
 
280
   nir_block *lca = NULL;
281
 
282
   nir_foreach_use(def, use_src) {
283
      nir_instr *use_instr = use_src->parent_instr;
284
 
285
      gcm_schedule_late_instr(use_instr, state);
286
 
287
      /* Phi instructions are a bit special.  SSA definitions don't have to
288
       * dominate the sources of the phi nodes that use them; instead, they
289
       * have to dominate the predecessor block corresponding to the phi
290
       * source.  We handle this by looking through the sources, finding
291
       * any that are usingg this SSA def, and using those blocks instead
292
       * of the one the phi lives in.
293
       */
294
      if (use_instr->type == nir_instr_type_phi) {
295
         nir_phi_instr *phi = nir_instr_as_phi(use_instr);
296
 
297
         nir_foreach_phi_src(phi, phi_src) {
298
            if (phi_src->src.ssa == def)
299
               lca = nir_dominance_lca(lca, phi_src->pred);
300
         }
301
      } else {
302
         lca = nir_dominance_lca(lca, use_instr->block);
303
      }
304
   }
305
 
306
   nir_foreach_if_use(def, use_src) {
307
      nir_if *if_stmt = use_src->parent_if;
308
 
309
      /* For if statements, we consider the block to be the one immediately
310
       * preceding the if CF node.
311
       */
312
      nir_block *pred_block =
313
         nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
314
 
315
      lca = nir_dominance_lca(lca, pred_block);
316
   }
317
 
318
   /* Some instructions may never be used.  We'll just leave them scheduled
319
    * early and let dead code clean them up.
320
    */
321
   if (lca == NULL)
322
      return true;
323
 
324
   /* We know have the LCA of all of the uses.  If our invariants hold,
325
    * this is dominated by the block that we chose when scheduling early.
326
    * We now walk up the dominance tree and pick the lowest block that is
327
    * as far outside loops as we can get.
328
    */
329
   nir_block *best = lca;
330
   while (lca != def->parent_instr->block) {
331
      assert(lca);
332
      if (state->blocks[lca->index].loop_depth <
333
          state->blocks[best->index].loop_depth)
334
         best = lca;
335
      lca = lca->imm_dom;
336
   }
337
   def->parent_instr->block = best;
338
 
339
   return true;
340
}
341
 
342
/** Schedules an instruction late
343
 *
344
 * This function performs a depth-first search starting at the given
345
 * instruction and proceeding through its uses to schedule instructions as
346
 * late as they can reasonably go in the dominance tree.  The instructions
347
 * are "scheduled" by updating their instr->block field.
348
 *
349
 * The name of this function is actually a bit of a misnomer as it doesn't
350
 * schedule them "as late as possible" as the paper implies.  Instead, it
351
 * first finds the lates possible place it can schedule the instruction and
352
 * then possibly schedules it earlier than that.  The actual location is as
353
 * far down the tree as we can go while trying to stay out of loops.
354
 */
355
static void
356
gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state)
357
{
358
   if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE)
359
      return;
360
 
361
   instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE;
362
 
363
   /* Pinned instructions are already scheduled so we don't need to do
364
    * anything.  Also, bailing here keeps us from ever following phi nodes
365
    * which can be back-edges.
366
    */
367
   if (instr->pass_flags & GCM_INSTR_PINNED)
368
      return;
369
 
370
   nir_foreach_ssa_def(instr, gcm_schedule_late_def, state);
371
}
372
 
373
static void
374
gcm_place_instr(nir_instr *instr, struct gcm_state *state);
375
 
376
static bool
377
gcm_place_instr_def(nir_ssa_def *def, void *state)
378
{
379
   nir_foreach_use(def, use_src)
380
      gcm_place_instr(use_src->parent_instr, state);
381
 
382
   return false;
383
}
384
 
385
/** Places an instrution back into the program
386
 *
387
 * The earlier passes of GCM simply choose blocks for each instruction and
388
 * otherwise leave them alone.  This pass actually places the instructions
389
 * into their chosen blocks.
390
 *
391
 * To do so, we use a standard post-order depth-first search linearization
392
 * algorithm.  We walk over the uses of the given instruction and ensure
393
 * that they are placed and then place this instruction.  Because we are
394
 * working on multiple blocks at a time, we keep track of the last inserted
395
 * instruction per-block in the state structure's block_info array.  When
396
 * we insert an instruction in a block we insert it before the last
397
 * instruction inserted in that block rather than the last instruction
398
 * inserted globally.
399
 */
400
static void
401
gcm_place_instr(nir_instr *instr, struct gcm_state *state)
402
{
403
   if (instr->pass_flags & GCM_INSTR_PLACED)
404
      return;
405
 
406
   instr->pass_flags |= GCM_INSTR_PLACED;
407
 
408
   /* Phi nodes are our once source of back-edges.  Since right now we are
409
    * only doing scheduling within blocks, we don't need to worry about
410
    * them since they are always at the top.  Just skip them completely.
411
    */
412
   if (instr->type == nir_instr_type_phi) {
413
      assert(instr->pass_flags & GCM_INSTR_PINNED);
414
      return;
415
   }
416
 
417
   nir_foreach_ssa_def(instr, gcm_place_instr_def, state);
418
 
419
   if (instr->pass_flags & GCM_INSTR_PINNED) {
420
      /* Pinned instructions have an implicit dependence on the pinned
421
       * instructions that come after them in the block.  Since the pinned
422
       * instructions will naturally "chain" together, we only need to
423
       * explicitly visit one of them.
424
       */
425
      for (nir_instr *after = nir_instr_next(instr);
426
           after;
427
           after = nir_instr_next(after)) {
428
         if (after->pass_flags & GCM_INSTR_PINNED) {
429
            gcm_place_instr(after, state);
430
            break;
431
         }
432
      }
433
   }
434
 
435
   struct gcm_block_info *block_info = &state->blocks[instr->block->index];
436
   if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
437
      exec_node_remove(&instr->node);
438
 
439
      if (block_info->last_instr) {
440
         exec_node_insert_node_before(&block_info->last_instr->node,
441
                                      &instr->node);
442
      } else {
443
         /* Schedule it at the end of the block */
444
         nir_instr *jump_instr = nir_block_last_instr(instr->block);
445
         if (jump_instr && jump_instr->type == nir_instr_type_jump) {
446
            exec_node_insert_node_before(&jump_instr->node, &instr->node);
447
         } else {
448
            exec_list_push_tail(&instr->block->instr_list, &instr->node);
449
         }
450
      }
451
   }
452
 
453
   block_info->last_instr = instr;
454
}
455
 
456
static void
457
opt_gcm_impl(nir_function_impl *impl)
458
{
459
   struct gcm_state state;
460
 
461
   state.impl = impl;
462
   state.instr = NULL;
463
   exec_list_make_empty(&state.instrs);
464
   state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
465
 
466
   nir_metadata_require(impl, nir_metadata_block_index |
467
                              nir_metadata_dominance);
468
 
469
   gcm_build_block_info(&impl->body, &state, 0);
470
   nir_foreach_block(impl, gcm_pin_instructions_block, &state);
471
 
472
   foreach_list_typed(nir_instr, instr, node, &state.instrs)
473
      gcm_schedule_early_instr(instr, &state);
474
 
475
   foreach_list_typed(nir_instr, instr, node, &state.instrs)
476
      gcm_schedule_late_instr(instr, &state);
477
 
478
   while (!exec_list_is_empty(&state.instrs)) {
479
      nir_instr *instr = exec_node_data(nir_instr,
480
                                        state.instrs.tail_pred, node);
481
      gcm_place_instr(instr, &state);
482
   }
483
 
484
   ralloc_free(state.blocks);
485
}
486
 
487
void
488
nir_opt_gcm(nir_shader *shader)
489
{
490
   nir_foreach_overload(shader, overload) {
491
      if (overload->impl)
492
         opt_gcm_impl(overload->impl);
493
   }
494
}