Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright © 2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 */
23
 
24
/**
25
 * \file opt_copy_propagation_elements.cpp
26
 *
27
 * Replaces usage of recently-copied components of variables with the
28
 * previous copy of the variable.
29
 *
30
 * This pass can be compared with opt_copy_propagation, which operands
31
 * on arbitrary whole-variable copies.  However, in order to handle
32
 * the copy propagation of swizzled variables or writemasked writes,
33
 * we want to track things on a channel-wise basis.  I found that
34
 * trying to mix the swizzled/writemasked support here with the
35
 * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
36
 * so this is separate despite the ACP handling being somewhat
37
 * similar.
38
 *
39
 * This should reduce the number of MOV instructions in the generated
40
 * programs unless copy propagation is also done on the LIR, and may
41
 * help anyway by triggering other optimizations that live in the HIR.
42
 */
43
 
44
#include "ir.h"
45
#include "ir_rvalue_visitor.h"
46
#include "ir_basic_block.h"
47
#include "ir_optimization.h"
48
#include "glsl_types.h"
49
 
50
static bool debug = false;
51
 
52
namespace {
53
 
54
class acp_entry : public exec_node
55
{
56
public:
57
   acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4])
58
   {
59
      this->lhs = lhs;
60
      this->rhs = rhs;
61
      this->write_mask = write_mask;
62
      memcpy(this->swizzle, swizzle, sizeof(this->swizzle));
63
   }
64
 
65
   acp_entry(acp_entry *a)
66
   {
67
      this->lhs = a->lhs;
68
      this->rhs = a->rhs;
69
      this->write_mask = a->write_mask;
70
      memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle));
71
   }
72
 
73
   ir_variable *lhs;
74
   ir_variable *rhs;
75
   unsigned int write_mask;
76
   int swizzle[4];
77
};
78
 
79
 
80
class kill_entry : public exec_node
81
{
82
public:
83
   kill_entry(ir_variable *var, int write_mask)
84
   {
85
      this->var = var;
86
      this->write_mask = write_mask;
87
   }
88
 
89
   ir_variable *var;
90
   unsigned int write_mask;
91
};
92
 
93
class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
94
public:
95
   ir_copy_propagation_elements_visitor()
96
   {
97
      this->progress = false;
98
      this->killed_all = false;
99
      this->mem_ctx = ralloc_context(NULL);
100
      this->shader_mem_ctx = NULL;
101
      this->acp = new(mem_ctx) exec_list;
102
      this->kills = new(mem_ctx) exec_list;
103
   }
104
   ~ir_copy_propagation_elements_visitor()
105
   {
106
      ralloc_free(mem_ctx);
107
   }
108
 
109
   virtual ir_visitor_status visit_enter(class ir_loop *);
110
   virtual ir_visitor_status visit_enter(class ir_function_signature *);
111
   virtual ir_visitor_status visit_leave(class ir_assignment *);
112
   virtual ir_visitor_status visit_enter(class ir_call *);
113
   virtual ir_visitor_status visit_enter(class ir_if *);
114
   virtual ir_visitor_status visit_leave(class ir_swizzle *);
115
 
116
   void handle_rvalue(ir_rvalue **rvalue);
117
 
118
   void add_copy(ir_assignment *ir);
119
   void kill(kill_entry *k);
120
   void handle_if_block(exec_list *instructions);
121
 
122
   /** List of acp_entry: The available copies to propagate */
123
   exec_list *acp;
124
   /**
125
    * List of kill_entry: The variables whose values were killed in this
126
    * block.
127
    */
128
   exec_list *kills;
129
 
130
   bool progress;
131
 
132
   bool killed_all;
133
 
134
   /* Context for our local data structures. */
135
   void *mem_ctx;
136
   /* Context for allocating new shader nodes. */
137
   void *shader_mem_ctx;
138
};
139
 
140
} /* unnamed namespace */
141
 
142
ir_visitor_status
143
ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
144
{
145
   /* Treat entry into a function signature as a completely separate
146
    * block.  Any instructions at global scope will be shuffled into
147
    * main() at link time, so they're irrelevant to us.
148
    */
149
   exec_list *orig_acp = this->acp;
150
   exec_list *orig_kills = this->kills;
151
   bool orig_killed_all = this->killed_all;
152
 
153
   this->acp = new(mem_ctx) exec_list;
154
   this->kills = new(mem_ctx) exec_list;
155
   this->killed_all = false;
156
 
157
   visit_list_elements(this, &ir->body);
158
 
159
   ralloc_free(this->acp);
160
   ralloc_free(this->kills);
161
 
162
   this->kills = orig_kills;
163
   this->acp = orig_acp;
164
   this->killed_all = orig_killed_all;
165
 
166
   return visit_continue_with_parent;
167
}
168
 
169
ir_visitor_status
170
ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
171
{
172
   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
173
   ir_variable *var = ir->lhs->variable_referenced();
174
 
175
   if (var->type->is_scalar() || var->type->is_vector()) {
176
      kill_entry *k;
177
 
178
      if (lhs)
179
	 k = new(this->kills) kill_entry(var, ir->write_mask);
180
      else
181
	 k = new(this->kills) kill_entry(var, ~0);
182
 
183
      kill(k);
184
   }
185
 
186
   add_copy(ir);
187
 
188
   return visit_continue;
189
}
190
 
191
ir_visitor_status
192
ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
193
{
194
   /* Don't visit the values of swizzles since they are handled while
195
    * visiting the swizzle itself.
196
    */
197
   return visit_continue;
198
}
199
 
200
/**
201
 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
202
 *
203
 * This is where the actual copy propagation occurs.  Note that the
204
 * rewriting of ir_dereference means that the ir_dereference instance
205
 * must not be shared by multiple IR operations!
206
 */
207
void
208
ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
209
{
210
   int swizzle_chan[4];
211
   ir_dereference_variable *deref_var;
212
   ir_variable *source[4] = {NULL, NULL, NULL, NULL};
213
   int source_chan[4] = {0, 0, 0, 0};
214
   int chans;
215
   bool noop_swizzle = true;
216
 
217
   if (!*ir)
218
      return;
219
 
220
   ir_swizzle *swizzle = (*ir)->as_swizzle();
221
   if (swizzle) {
222
      deref_var = swizzle->val->as_dereference_variable();
223
      if (!deref_var)
224
	 return;
225
 
226
      swizzle_chan[0] = swizzle->mask.x;
227
      swizzle_chan[1] = swizzle->mask.y;
228
      swizzle_chan[2] = swizzle->mask.z;
229
      swizzle_chan[3] = swizzle->mask.w;
230
      chans = swizzle->type->vector_elements;
231
   } else {
232
      deref_var = (*ir)->as_dereference_variable();
233
      if (!deref_var)
234
	 return;
235
 
236
      swizzle_chan[0] = 0;
237
      swizzle_chan[1] = 1;
238
      swizzle_chan[2] = 2;
239
      swizzle_chan[3] = 3;
240
      chans = deref_var->type->vector_elements;
241
   }
242
 
243
   if (this->in_assignee)
244
      return;
245
 
246
   ir_variable *var = deref_var->var;
247
 
248
   /* Try to find ACP entries covering swizzle_chan[], hoping they're
249
    * the same source variable.
250
    */
251
   foreach_in_list(acp_entry, entry, this->acp) {
252
      if (var == entry->lhs) {
253
	 for (int c = 0; c < chans; c++) {
254
	    if (entry->write_mask & (1 << swizzle_chan[c])) {
255
	       source[c] = entry->rhs;
256
	       source_chan[c] = entry->swizzle[swizzle_chan[c]];
257
 
258
               if (source_chan[c] != swizzle_chan[c])
259
                  noop_swizzle = false;
260
	    }
261
	 }
262
      }
263
   }
264
 
265
   /* Make sure all channels are copying from the same source variable. */
266
   if (!source[0])
267
      return;
268
   for (int c = 1; c < chans; c++) {
269
      if (source[c] != source[0])
270
	 return;
271
   }
272
 
273
   if (!shader_mem_ctx)
274
      shader_mem_ctx = ralloc_parent(deref_var);
275
 
276
   /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
277
    * of itself, which would just be deleted by opt_noop_swizzle).
278
    */
279
   if (source[0] == var && noop_swizzle)
280
      return;
281
 
282
   if (debug) {
283
      printf("Copy propagation from:\n");
284
      (*ir)->print();
285
   }
286
 
287
   deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
288
   *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
289
					source_chan[0],
290
					source_chan[1],
291
					source_chan[2],
292
					source_chan[3],
293
					chans);
294
   progress = true;
295
 
296
   if (debug) {
297
      printf("to:\n");
298
      (*ir)->print();
299
      printf("\n");
300
   }
301
}
302
 
303
 
304
ir_visitor_status
305
ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
306
{
307
   /* Do copy propagation on call parameters, but skip any out params */
308
   foreach_two_lists(formal_node, &ir->callee->parameters,
309
                     actual_node, &ir->actual_parameters) {
310
      ir_variable *sig_param = (ir_variable *) formal_node;
311
      ir_rvalue *ir = (ir_rvalue *) actual_node;
312
      if (sig_param->data.mode != ir_var_function_out
313
          && sig_param->data.mode != ir_var_function_inout) {
314
         ir->accept(this);
315
      }
316
   }
317
 
318
   /* Since we're unlinked, we don't (necessarily) know the side effects of
319
    * this call.  So kill all copies.
320
    */
321
   acp->make_empty();
322
   this->killed_all = true;
323
 
324
   return visit_continue_with_parent;
325
}
326
 
327
void
328
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
329
{
330
   exec_list *orig_acp = this->acp;
331
   exec_list *orig_kills = this->kills;
332
   bool orig_killed_all = this->killed_all;
333
 
334
   this->acp = new(mem_ctx) exec_list;
335
   this->kills = new(mem_ctx) exec_list;
336
   this->killed_all = false;
337
 
338
   /* Populate the initial acp with a copy of the original */
339
   foreach_in_list(acp_entry, a, orig_acp) {
340
      this->acp->push_tail(new(this->acp) acp_entry(a));
341
   }
342
 
343
   visit_list_elements(this, instructions);
344
 
345
   if (this->killed_all) {
346
      orig_acp->make_empty();
347
   }
348
 
349
   exec_list *new_kills = this->kills;
350
   this->kills = orig_kills;
351
   ralloc_free(this->acp);
352
   this->acp = orig_acp;
353
   this->killed_all = this->killed_all || orig_killed_all;
354
 
355
   /* Move the new kills into the parent block's list, removing them
356
    * from the parent's ACP list in the process.
357
    */
358
   foreach_in_list_safe(kill_entry, k, new_kills) {
359
      kill(k);
360
   }
361
 
362
   ralloc_free(new_kills);
363
}
364
 
365
ir_visitor_status
366
ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
367
{
368
   ir->condition->accept(this);
369
 
370
   handle_if_block(&ir->then_instructions);
371
   handle_if_block(&ir->else_instructions);
372
 
373
   /* handle_if_block() already descended into the children. */
374
   return visit_continue_with_parent;
375
}
376
 
377
ir_visitor_status
378
ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
379
{
380
   exec_list *orig_acp = this->acp;
381
   exec_list *orig_kills = this->kills;
382
   bool orig_killed_all = this->killed_all;
383
 
384
   /* FINISHME: For now, the initial acp for loops is totally empty.
385
    * We could go through once, then go through again with the acp
386
    * cloned minus the killed entries after the first run through.
387
    */
388
   this->acp = new(mem_ctx) exec_list;
389
   this->kills = new(mem_ctx) exec_list;
390
   this->killed_all = false;
391
 
392
   visit_list_elements(this, &ir->body_instructions);
393
 
394
   if (this->killed_all) {
395
      orig_acp->make_empty();
396
   }
397
 
398
   exec_list *new_kills = this->kills;
399
   this->kills = orig_kills;
400
   ralloc_free(this->acp);
401
   this->acp = orig_acp;
402
   this->killed_all = this->killed_all || orig_killed_all;
403
 
404
   foreach_in_list_safe(kill_entry, k, new_kills) {
405
      kill(k);
406
   }
407
 
408
   ralloc_free(new_kills);
409
 
410
   /* already descended into the children. */
411
   return visit_continue_with_parent;
412
}
413
 
414
/* Remove any entries currently in the ACP for this kill. */
415
void
416
ir_copy_propagation_elements_visitor::kill(kill_entry *k)
417
{
418
   foreach_in_list_safe(acp_entry, entry, acp) {
419
      if (entry->lhs == k->var) {
420
	 entry->write_mask = entry->write_mask & ~k->write_mask;
421
	 if (entry->write_mask == 0) {
422
	    entry->remove();
423
	    continue;
424
	 }
425
      }
426
      if (entry->rhs == k->var) {
427
	 entry->remove();
428
      }
429
   }
430
 
431
   /* If we were on a list, remove ourselves before inserting */
432
   if (k->next)
433
      k->remove();
434
 
435
   ralloc_steal(this->kills, k);
436
   this->kills->push_tail(k);
437
}
438
 
439
/**
440
 * Adds directly-copied channels between vector variables to the available
441
 * copy propagation list.
442
 */
443
void
444
ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
445
{
446
   acp_entry *entry;
447
   int orig_swizzle[4] = {0, 1, 2, 3};
448
   int swizzle[4];
449
 
450
   if (ir->condition)
451
      return;
452
 
453
   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
454
   if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
455
      return;
456
 
457
   ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
458
   if (!rhs) {
459
      ir_swizzle *swiz = ir->rhs->as_swizzle();
460
      if (!swiz)
461
	 return;
462
 
463
      rhs = swiz->val->as_dereference_variable();
464
      if (!rhs)
465
	 return;
466
 
467
      orig_swizzle[0] = swiz->mask.x;
468
      orig_swizzle[1] = swiz->mask.y;
469
      orig_swizzle[2] = swiz->mask.z;
470
      orig_swizzle[3] = swiz->mask.w;
471
   }
472
 
473
   /* Move the swizzle channels out to the positions they match in the
474
    * destination.  We don't want to have to rewrite the swizzle[]
475
    * array every time we clear a bit of the write_mask.
476
    */
477
   int j = 0;
478
   for (int i = 0; i < 4; i++) {
479
      if (ir->write_mask & (1 << i))
480
	 swizzle[i] = orig_swizzle[j++];
481
   }
482
 
483
   int write_mask = ir->write_mask;
484
   if (lhs->var == rhs->var) {
485
      /* If this is a copy from the variable to itself, then we need
486
       * to be sure not to include the updated channels from this
487
       * instruction in the set of new source channels to be
488
       * copy-propagated from.
489
       */
490
      for (int i = 0; i < 4; i++) {
491
	 if (ir->write_mask & (1 << orig_swizzle[i]))
492
	    write_mask &= ~(1 << i);
493
      }
494
   }
495
 
496
   entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask,
497
					swizzle);
498
   this->acp->push_tail(entry);
499
}
500
 
501
bool
502
do_copy_propagation_elements(exec_list *instructions)
503
{
504
   ir_copy_propagation_elements_visitor v;
505
 
506
   visit_list_elements(&v, instructions);
507
 
508
   return v.progress;
509
}