Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright © 2013 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 */
23
 
24
/**
25
 * \file opt_cse.cpp
26
 *
27
 * constant subexpression elimination at the GLSL IR level.
28
 *
29
 * Compare to brw_fs_cse.cpp for a more complete CSE implementation.  This one
30
 * is generic and handles texture operations, but it's rather simple currently
31
 * and doesn't support modification of variables in the available expressions
32
 * list, so it can't do variables other than uniforms or shader inputs.
33
 */
34
 
35
#include "ir.h"
36
#include "ir_visitor.h"
37
#include "ir_rvalue_visitor.h"
38
#include "ir_basic_block.h"
39
#include "ir_optimization.h"
40
#include "ir_builder.h"
41
#include "glsl_types.h"
42
 
43
using namespace ir_builder;
44
 
45
static bool debug = false;
46
 
47
namespace {
48
 
49
/**
50
 * This is the record of an available expression for common subexpression
51
 * elimination.
52
 */
53
class ae_entry : public exec_node
54
{
55
public:
56
   ae_entry(ir_instruction *base_ir, ir_rvalue **val)
57
      : val(val), base_ir(base_ir)
58
   {
59
      assert(val);
60
      assert(*val);
61
      assert(base_ir);
62
 
63
      var = NULL;
64
   }
65
 
66
   void init(ir_instruction *base_ir, ir_rvalue **val)
67
   {
68
      this->val = val;
69
      this->base_ir = base_ir;
70
      this->var = NULL;
71
 
72
      assert(val);
73
      assert(*val);
74
      assert(base_ir);
75
   }
76
 
77
   /**
78
    * The pointer to the expression that we might be able to reuse
79
    *
80
    * Note the double pointer -- this is the place in the base_ir expression
81
    * tree that we would rewrite to move the expression out to a new variable
82
    * assignment.
83
    */
84
   ir_rvalue **val;
85
 
86
   /**
87
    * Root instruction in the basic block where the expression appeared.
88
    *
89
    * This is used so that we can insert the new variable declaration into the
90
    * instruction stream (since *val is just somewhere in base_ir's expression
91
    * tree).
92
    */
93
   ir_instruction *base_ir;
94
 
95
   /**
96
    * The variable that the expression has been stored in, if it's been CSEd
97
    * once already.
98
    */
99
   ir_variable *var;
100
};
101
 
102
class cse_visitor : public ir_rvalue_visitor {
103
public:
104
   cse_visitor(exec_list *validate_instructions)
105
      : validate_instructions(validate_instructions)
106
   {
107
      progress = false;
108
      mem_ctx = ralloc_context(NULL);
109
      this->ae = new(mem_ctx) exec_list;
110
   }
111
   ~cse_visitor()
112
   {
113
      ralloc_free(mem_ctx);
114
   }
115
 
116
   virtual ir_visitor_status visit_enter(ir_function_signature *ir);
117
   virtual ir_visitor_status visit_enter(ir_loop *ir);
118
   virtual ir_visitor_status visit_enter(ir_if *ir);
119
   virtual ir_visitor_status visit_enter(ir_call *ir);
120
   virtual void handle_rvalue(ir_rvalue **rvalue);
121
 
122
   bool progress;
123
 
124
private:
125
   void *mem_ctx;
126
 
127
   ir_rvalue *try_cse(ir_rvalue *rvalue);
128
   void add_to_ae(ir_rvalue **rvalue);
129
 
130
   /**
131
    * Move all nodes from the ae list to the free list
132
    */
133
   void empty_ae_list();
134
 
135
   /**
136
    * Get and initialize a new ae_entry
137
    *
138
    * This will either come from the free list or be freshly allocated.
139
    */
140
   ae_entry *get_ae_entry(ir_rvalue **rvalue);
141
 
142
   /** List of ae_entry: The available expressions to reuse */
143
   exec_list *ae;
144
 
145
   /**
146
    * The whole shader, so that we can validate_ir_tree in debug mode.
147
    *
148
    * This proved quite useful when trying to get the tree manipulation
149
    * right.
150
    */
151
   exec_list *validate_instructions;
152
 
153
   /**
154
    * List of available-for-use ae_entry objects.
155
    */
156
   exec_list free_ae_entries;
157
};
158
 
159
/**
160
 * Visitor to walk an expression tree to check that all variables referenced
161
 * are constants.
162
 */
163
class is_cse_candidate_visitor : public ir_hierarchical_visitor
164
{
165
public:
166
 
167
   is_cse_candidate_visitor()
168
      : ok(true)
169
   {
170
   }
171
 
172
   virtual ir_visitor_status visit(ir_dereference_variable *ir);
173
 
174
   bool ok;
175
};
176
 
177
 
178
class contains_rvalue_visitor : public ir_rvalue_visitor
179
{
180
public:
181
 
182
   contains_rvalue_visitor(ir_rvalue *val)
183
      : val(val)
184
   {
185
      found = false;
186
   }
187
 
188
   virtual void handle_rvalue(ir_rvalue **rvalue);
189
 
190
   bool found;
191
 
192
private:
193
   ir_rvalue *val;
194
};
195
 
196
} /* unnamed namespace */
197
 
198
static void
199
dump_ae(exec_list *ae)
200
{
201
   int i = 0;
202
 
203
   printf("CSE: AE contents:\n");
204
   foreach_in_list(ae_entry, entry, ae) {
205
      printf("CSE:   AE %2d (%p): ", i, entry);
206
      (*entry->val)->print();
207
      printf("\n");
208
 
209
      if (entry->var)
210
         printf("CSE:     in var %p:\n", entry->var);
211
 
212
      i++;
213
   }
214
}
215
 
216
ir_visitor_status
217
is_cse_candidate_visitor::visit(ir_dereference_variable *ir)
218
{
219
   /* Currently, since we don't handle kills of the ae based on variables
220
    * getting assigned, we can only handle constant variables.
221
    */
222
   if (ir->var->data.read_only) {
223
      return visit_continue;
224
   } else {
225
      if (debug)
226
         printf("CSE: non-candidate: var %s is not read only\n", ir->var->name);
227
      ok = false;
228
      return visit_stop;
229
   }
230
}
231
 
232
void
233
contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue)
234
{
235
   if (*rvalue == val)
236
      found = true;
237
}
238
 
239
static bool
240
contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle)
241
{
242
   contains_rvalue_visitor v(needle);
243
   haystack->accept(&v);
244
   return v.found;
245
}
246
 
247
static bool
248
is_cse_candidate(ir_rvalue *ir)
249
{
250
   /* Our temporary variable assignment generation isn't ready to handle
251
    * anything bigger than a vector.
252
    */
253
   if (!ir->type->is_vector() && !ir->type->is_scalar()) {
254
      if (debug)
255
         printf("CSE: non-candidate: not a vector/scalar\n");
256
      return false;
257
   }
258
 
259
   /* Only handle expressions and textures currently.  We may want to extend
260
    * to variable-index array dereferences at some point.
261
    */
262
   switch (ir->ir_type) {
263
   case ir_type_expression:
264
   case ir_type_texture:
265
      break;
266
   default:
267
      if (debug)
268
         printf("CSE: non-candidate: not an expression/texture\n");
269
      return false;
270
   }
271
 
272
   is_cse_candidate_visitor v;
273
 
274
   ir->accept(&v);
275
 
276
   return v.ok;
277
}
278
 
279
/**
280
 * Tries to find and return a reference to a previous computation of a given
281
 * expression.
282
 *
283
 * Walk the list of available expressions checking if any of them match the
284
 * rvalue, and if so, move the previous copy of the expression to a temporary
285
 * and return a reference of the temporary.
286
 */
287
ir_rvalue *
288
cse_visitor::try_cse(ir_rvalue *rvalue)
289
{
290
   foreach_in_list(ae_entry, entry, ae) {
291
      if (debug) {
292
         printf("Comparing to AE %p: ", entry);
293
         (*entry->val)->print();
294
         printf("\n");
295
      }
296
 
297
      if (!rvalue->equals(*entry->val))
298
         continue;
299
 
300
      if (debug) {
301
         printf("CSE: Replacing: ");
302
         (*entry->val)->print();
303
         printf("\n");
304
         printf("CSE:      with: ");
305
         rvalue->print();
306
         printf("\n");
307
      }
308
 
309
      if (!entry->var) {
310
         ir_instruction *base_ir = entry->base_ir;
311
 
312
         ir_variable *var = new(rvalue) ir_variable(rvalue->type,
313
                                                    "cse",
314
                                                    ir_var_temporary);
315
 
316
         /* Write the previous expression result into a new variable. */
317
         base_ir->insert_before(var);
318
         ir_assignment *assignment = assign(var, *entry->val);
319
         base_ir->insert_before(assignment);
320
 
321
         /* Replace the expression in the original tree with a deref of the
322
          * variable, but keep tracking the expression for further reuse.
323
          */
324
         *entry->val = new(rvalue) ir_dereference_variable(var);
325
         entry->val = &assignment->rhs;
326
 
327
         entry->var = var;
328
 
329
         /* Update the base_irs in the AE list.  We have to be sure that
330
          * they're correct -- expressions from our base_ir that weren't moved
331
          * need to stay in this base_ir (so that later consumption of them
332
          * puts new variables between our new variable and our base_ir), but
333
          * expressions from our base_ir that we *did* move need base_ir
334
          * updated so that any further elimination from inside gets its new
335
          * assignments put before our new assignment.
336
          */
337
         foreach_in_list(ae_entry, fixup_entry, ae) {
338
            if (contains_rvalue(assignment->rhs, *fixup_entry->val))
339
               fixup_entry->base_ir = assignment;
340
         }
341
 
342
         if (debug)
343
            dump_ae(ae);
344
      }
345
 
346
      /* Replace the expression in our current tree with the variable. */
347
      return new(rvalue) ir_dereference_variable(entry->var);
348
   }
349
 
350
   return NULL;
351
}
352
 
353
void
354
cse_visitor::empty_ae_list()
355
{
356
   free_ae_entries.append_list(ae);
357
}
358
 
359
ae_entry *
360
cse_visitor::get_ae_entry(ir_rvalue **rvalue)
361
{
362
   ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
363
   if (entry) {
364
      entry->init(base_ir, rvalue);
365
   } else {
366
      entry = new(mem_ctx) ae_entry(base_ir, rvalue);
367
   }
368
 
369
   return entry;
370
}
371
 
372
/** Add the rvalue to the list of available expressions for CSE. */
373
void
374
cse_visitor::add_to_ae(ir_rvalue **rvalue)
375
{
376
   if (debug) {
377
      printf("CSE: Add to AE: ");
378
      (*rvalue)->print();
379
      printf("\n");
380
   }
381
 
382
   ae->push_tail(get_ae_entry(rvalue));
383
 
384
   if (debug)
385
      dump_ae(ae);
386
}
387
 
388
void
389
cse_visitor::handle_rvalue(ir_rvalue **rvalue)
390
{
391
   if (!*rvalue)
392
      return;
393
 
394
   if (debug) {
395
      printf("CSE: handle_rvalue ");
396
      (*rvalue)->print();
397
      printf("\n");
398
   }
399
 
400
   if (!is_cse_candidate(*rvalue))
401
      return;
402
 
403
   ir_rvalue *new_rvalue = try_cse(*rvalue);
404
   if (new_rvalue) {
405
      *rvalue = new_rvalue;
406
      progress = true;
407
 
408
      if (debug)
409
         validate_ir_tree(validate_instructions);
410
   } else {
411
      add_to_ae(rvalue);
412
   }
413
}
414
 
415
ir_visitor_status
416
cse_visitor::visit_enter(ir_if *ir)
417
{
418
   handle_rvalue(&ir->condition);
419
 
420
   empty_ae_list();
421
   visit_list_elements(this, &ir->then_instructions);
422
 
423
   empty_ae_list();
424
   visit_list_elements(this, &ir->else_instructions);
425
 
426
   empty_ae_list();
427
   return visit_continue_with_parent;
428
}
429
 
430
ir_visitor_status
431
cse_visitor::visit_enter(ir_function_signature *ir)
432
{
433
   empty_ae_list();
434
   visit_list_elements(this, &ir->body);
435
 
436
   empty_ae_list();
437
   return visit_continue_with_parent;
438
}
439
 
440
ir_visitor_status
441
cse_visitor::visit_enter(ir_loop *ir)
442
{
443
   empty_ae_list();
444
   visit_list_elements(this, &ir->body_instructions);
445
 
446
   empty_ae_list();
447
   return visit_continue_with_parent;
448
}
449
 
450
ir_visitor_status
451
cse_visitor::visit_enter(ir_call *)
452
{
453
   /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a
454
    * pointer to the (ir_rvalue *) on the stack.  Since we save those pointers
455
    * in the AE list, we can't let handle_rvalue get called.
456
    */
457
   return visit_continue_with_parent;
458
}
459
 
460
/**
461
 * Does a (uniform-value) constant subexpression elimination pass on the code
462
 * present in the instruction stream.
463
 */
464
bool
465
do_cse(exec_list *instructions)
466
{
467
   cse_visitor v(instructions);
468
 
469
   visit_list_elements(&v, instructions);
470
 
471
   return v.progress;
472
}