Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright © 2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 */
23
 
24
/**
25
 * \file opt_copy_propagation_elements.cpp
26
 *
27
 * Replaces usage of recently-copied components of variables with the
28
 * previous copy of the variable.
29
 *
30
 * This pass can be compared with opt_copy_propagation, which operands
31
 * on arbitrary whole-variable copies.  However, in order to handle
32
 * the copy propagation of swizzled variables or writemasked writes,
33
 * we want to track things on a channel-wise basis.  I found that
34
 * trying to mix the swizzled/writemasked support here with the
35
 * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
36
 * so this is separate despite the ACP handling being somewhat
37
 * similar.
38
 *
39
 * This should reduce the number of MOV instructions in the generated
40
 * programs unless copy propagation is also done on the LIR, and may
41
 * help anyway by triggering other optimizations that live in the HIR.
42
 */
43
 
44
#include "ir.h"
45
#include "ir_rvalue_visitor.h"
46
#include "ir_basic_block.h"
47
#include "ir_optimization.h"
48
#include "glsl_types.h"
49
 
50
static bool debug = false;
51
 
52
namespace {
53
 
54
class acp_entry : public exec_node
55
{
56
public:
57
   acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4])
58
   {
59
      this->lhs = lhs;
60
      this->rhs = rhs;
61
      this->write_mask = write_mask;
62
      memcpy(this->swizzle, swizzle, sizeof(this->swizzle));
63
   }
64
 
65
   acp_entry(acp_entry *a)
66
   {
67
      this->lhs = a->lhs;
68
      this->rhs = a->rhs;
69
      this->write_mask = a->write_mask;
70
      memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle));
71
   }
72
 
73
   ir_variable *lhs;
74
   ir_variable *rhs;
75
   unsigned int write_mask;
76
   int swizzle[4];
77
};
78
 
79
 
80
class kill_entry : public exec_node
81
{
82
public:
83
   kill_entry(ir_variable *var, int write_mask)
84
   {
85
      this->var = var;
86
      this->write_mask = write_mask;
87
   }
88
 
89
   ir_variable *var;
90
   unsigned int write_mask;
91
};
92
 
93
class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
94
public:
95
   ir_copy_propagation_elements_visitor()
96
   {
97
      this->progress = false;
98
      this->killed_all = false;
99
      this->mem_ctx = ralloc_context(NULL);
100
      this->shader_mem_ctx = NULL;
101
      this->acp = new(mem_ctx) exec_list;
102
      this->kills = new(mem_ctx) exec_list;
103
   }
104
   ~ir_copy_propagation_elements_visitor()
105
   {
106
      ralloc_free(mem_ctx);
107
   }
108
 
109
   virtual ir_visitor_status visit_enter(class ir_loop *);
110
   virtual ir_visitor_status visit_enter(class ir_function_signature *);
111
   virtual ir_visitor_status visit_leave(class ir_assignment *);
112
   virtual ir_visitor_status visit_enter(class ir_call *);
113
   virtual ir_visitor_status visit_enter(class ir_if *);
114
   virtual ir_visitor_status visit_leave(class ir_swizzle *);
115
 
116
   void handle_rvalue(ir_rvalue **rvalue);
117
 
118
   void add_copy(ir_assignment *ir);
119
   void kill(kill_entry *k);
120
   void handle_if_block(exec_list *instructions);
121
 
122
   /** List of acp_entry: The available copies to propagate */
123
   exec_list *acp;
124
   /**
125
    * List of kill_entry: The variables whose values were killed in this
126
    * block.
127
    */
128
   exec_list *kills;
129
 
130
   bool progress;
131
 
132
   bool killed_all;
133
 
134
   /* Context for our local data structures. */
135
   void *mem_ctx;
136
   /* Context for allocating new shader nodes. */
137
   void *shader_mem_ctx;
138
};
139
 
140
} /* unnamed namespace */
141
 
142
ir_visitor_status
143
ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
144
{
145
   /* Treat entry into a function signature as a completely separate
146
    * block.  Any instructions at global scope will be shuffled into
147
    * main() at link time, so they're irrelevant to us.
148
    */
149
   exec_list *orig_acp = this->acp;
150
   exec_list *orig_kills = this->kills;
151
   bool orig_killed_all = this->killed_all;
152
 
153
   this->acp = new(mem_ctx) exec_list;
154
   this->kills = new(mem_ctx) exec_list;
155
   this->killed_all = false;
156
 
157
   visit_list_elements(this, &ir->body);
158
 
159
   this->kills = orig_kills;
160
   this->acp = orig_acp;
161
   this->killed_all = orig_killed_all;
162
 
163
   return visit_continue_with_parent;
164
}
165
 
166
ir_visitor_status
167
ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
168
{
169
   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
170
   ir_variable *var = ir->lhs->variable_referenced();
171
 
172
   if (var->type->is_scalar() || var->type->is_vector()) {
173
      kill_entry *k;
174
 
175
      if (lhs)
176
	 k = new(mem_ctx) kill_entry(var, ir->write_mask);
177
      else
178
	 k = new(mem_ctx) kill_entry(var, ~0);
179
 
180
      kill(k);
181
   }
182
 
183
   add_copy(ir);
184
 
185
   return visit_continue;
186
}
187
 
188
ir_visitor_status
189
ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *ir)
190
{
191
   /* Don't visit the values of swizzles since they are handled while
192
    * visiting the swizzle itself.
193
    */
194
   return visit_continue;
195
}
196
 
197
/**
198
 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
199
 *
200
 * This is where the actual copy propagation occurs.  Note that the
201
 * rewriting of ir_dereference means that the ir_dereference instance
202
 * must not be shared by multiple IR operations!
203
 */
204
void
205
ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
206
{
207
   int swizzle_chan[4];
208
   ir_dereference_variable *deref_var;
209
   ir_variable *source[4] = {NULL, NULL, NULL, NULL};
210
   int source_chan[4];
211
   int chans;
212
 
213
   if (!*ir)
214
      return;
215
 
216
   ir_swizzle *swizzle = (*ir)->as_swizzle();
217
   if (swizzle) {
218
      deref_var = swizzle->val->as_dereference_variable();
219
      if (!deref_var)
220
	 return;
221
 
222
      swizzle_chan[0] = swizzle->mask.x;
223
      swizzle_chan[1] = swizzle->mask.y;
224
      swizzle_chan[2] = swizzle->mask.z;
225
      swizzle_chan[3] = swizzle->mask.w;
226
      chans = swizzle->type->vector_elements;
227
   } else {
228
      deref_var = (*ir)->as_dereference_variable();
229
      if (!deref_var)
230
	 return;
231
 
232
      swizzle_chan[0] = 0;
233
      swizzle_chan[1] = 1;
234
      swizzle_chan[2] = 2;
235
      swizzle_chan[3] = 3;
236
      chans = deref_var->type->vector_elements;
237
   }
238
 
239
   if (this->in_assignee)
240
      return;
241
 
242
   ir_variable *var = deref_var->var;
243
 
244
   /* Try to find ACP entries covering swizzle_chan[], hoping they're
245
    * the same source variable.
246
    */
247
   foreach_iter(exec_list_iterator, iter, *this->acp) {
248
      acp_entry *entry = (acp_entry *)iter.get();
249
 
250
      if (var == entry->lhs) {
251
	 for (int c = 0; c < chans; c++) {
252
	    if (entry->write_mask & (1 << swizzle_chan[c])) {
253
	       source[c] = entry->rhs;
254
	       source_chan[c] = entry->swizzle[swizzle_chan[c]];
255
	    }
256
	 }
257
      }
258
   }
259
 
260
   /* Make sure all channels are copying from the same source variable. */
261
   if (!source[0])
262
      return;
263
   for (int c = 1; c < chans; c++) {
264
      if (source[c] != source[0])
265
	 return;
266
   }
267
 
268
   if (!shader_mem_ctx)
269
      shader_mem_ctx = ralloc_parent(deref_var);
270
 
271
   if (debug) {
272
      printf("Copy propagation from:\n");
273
      (*ir)->print();
274
   }
275
 
276
   deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
277
   *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
278
					source_chan[0],
279
					source_chan[1],
280
					source_chan[2],
281
					source_chan[3],
282
					chans);
283
 
284
   if (debug) {
285
      printf("to:\n");
286
      (*ir)->print();
287
      printf("\n");
288
   }
289
}
290
 
291
 
292
ir_visitor_status
293
ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
294
{
295
   /* Do copy propagation on call parameters, but skip any out params */
296
   exec_list_iterator sig_param_iter = ir->callee->parameters.iterator();
297
   foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
298
      ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
299
      ir_instruction *ir = (ir_instruction *)iter.get();
300
      if (sig_param->mode != ir_var_function_out
301
          && sig_param->mode != ir_var_function_inout) {
302
         ir->accept(this);
303
      }
304
      sig_param_iter.next();
305
   }
306
 
307
   /* Since we're unlinked, we don't (necessarily) know the side effects of
308
    * this call.  So kill all copies.
309
    */
310
   acp->make_empty();
311
   this->killed_all = true;
312
 
313
   return visit_continue_with_parent;
314
}
315
 
316
void
317
ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
318
{
319
   exec_list *orig_acp = this->acp;
320
   exec_list *orig_kills = this->kills;
321
   bool orig_killed_all = this->killed_all;
322
 
323
   this->acp = new(mem_ctx) exec_list;
324
   this->kills = new(mem_ctx) exec_list;
325
   this->killed_all = false;
326
 
327
   /* Populate the initial acp with a copy of the original */
328
   foreach_iter(exec_list_iterator, iter, *orig_acp) {
329
      acp_entry *a = (acp_entry *)iter.get();
330
      this->acp->push_tail(new(this->mem_ctx) acp_entry(a));
331
   }
332
 
333
   visit_list_elements(this, instructions);
334
 
335
   if (this->killed_all) {
336
      orig_acp->make_empty();
337
   }
338
 
339
   exec_list *new_kills = this->kills;
340
   this->kills = orig_kills;
341
   this->acp = orig_acp;
342
   this->killed_all = this->killed_all || orig_killed_all;
343
 
344
   /* Move the new kills into the parent block's list, removing them
345
    * from the parent's ACP list in the process.
346
    */
347
   foreach_list_safe(node, new_kills) {
348
      kill_entry *k = (kill_entry *)node;
349
      kill(k);
350
   }
351
}
352
 
353
ir_visitor_status
354
ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
355
{
356
   ir->condition->accept(this);
357
 
358
   handle_if_block(&ir->then_instructions);
359
   handle_if_block(&ir->else_instructions);
360
 
361
   /* handle_if_block() already descended into the children. */
362
   return visit_continue_with_parent;
363
}
364
 
365
ir_visitor_status
366
ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
367
{
368
   exec_list *orig_acp = this->acp;
369
   exec_list *orig_kills = this->kills;
370
   bool orig_killed_all = this->killed_all;
371
 
372
   /* FINISHME: For now, the initial acp for loops is totally empty.
373
    * We could go through once, then go through again with the acp
374
    * cloned minus the killed entries after the first run through.
375
    */
376
   this->acp = new(mem_ctx) exec_list;
377
   this->kills = new(mem_ctx) exec_list;
378
   this->killed_all = false;
379
 
380
   visit_list_elements(this, &ir->body_instructions);
381
 
382
   if (this->killed_all) {
383
      orig_acp->make_empty();
384
   }
385
 
386
   exec_list *new_kills = this->kills;
387
   this->kills = orig_kills;
388
   this->acp = orig_acp;
389
   this->killed_all = this->killed_all || orig_killed_all;
390
 
391
   foreach_list_safe(node, new_kills) {
392
      kill_entry *k = (kill_entry *)node;
393
      kill(k);
394
   }
395
 
396
   /* already descended into the children. */
397
   return visit_continue_with_parent;
398
}
399
 
400
/* Remove any entries currently in the ACP for this kill. */
401
void
402
ir_copy_propagation_elements_visitor::kill(kill_entry *k)
403
{
404
   foreach_list_safe(node, acp) {
405
      acp_entry *entry = (acp_entry *)node;
406
 
407
      if (entry->lhs == k->var) {
408
	 entry->write_mask = entry->write_mask & ~k->write_mask;
409
	 if (entry->write_mask == 0) {
410
	    entry->remove();
411
	    continue;
412
	 }
413
      }
414
      if (entry->rhs == k->var) {
415
	 entry->remove();
416
      }
417
   }
418
 
419
   /* If we were on a list, remove ourselves before inserting */
420
   if (k->next)
421
      k->remove();
422
 
423
   this->kills->push_tail(k);
424
}
425
 
426
/**
427
 * Adds directly-copied channels between vector variables to the available
428
 * copy propagation list.
429
 */
430
void
431
ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
432
{
433
   acp_entry *entry;
434
   int orig_swizzle[4] = {0, 1, 2, 3};
435
   int swizzle[4];
436
 
437
   if (ir->condition)
438
      return;
439
 
440
   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
441
   if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
442
      return;
443
 
444
   ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
445
   if (!rhs) {
446
      ir_swizzle *swiz = ir->rhs->as_swizzle();
447
      if (!swiz)
448
	 return;
449
 
450
      rhs = swiz->val->as_dereference_variable();
451
      if (!rhs)
452
	 return;
453
 
454
      orig_swizzle[0] = swiz->mask.x;
455
      orig_swizzle[1] = swiz->mask.y;
456
      orig_swizzle[2] = swiz->mask.z;
457
      orig_swizzle[3] = swiz->mask.w;
458
   }
459
 
460
   /* Move the swizzle channels out to the positions they match in the
461
    * destination.  We don't want to have to rewrite the swizzle[]
462
    * array every time we clear a bit of the write_mask.
463
    */
464
   int j = 0;
465
   for (int i = 0; i < 4; i++) {
466
      if (ir->write_mask & (1 << i))
467
	 swizzle[i] = orig_swizzle[j++];
468
   }
469
 
470
   int write_mask = ir->write_mask;
471
   if (lhs->var == rhs->var) {
472
      /* If this is a copy from the variable to itself, then we need
473
       * to be sure not to include the updated channels from this
474
       * instruction in the set of new source channels to be
475
       * copy-propagated from.
476
       */
477
      for (int i = 0; i < 4; i++) {
478
	 if (ir->write_mask & (1 << orig_swizzle[i]))
479
	    write_mask &= ~(1 << i);
480
      }
481
   }
482
 
483
   entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask,
484
					swizzle);
485
   this->acp->push_tail(entry);
486
}
487
 
488
bool
489
do_copy_propagation_elements(exec_list *instructions)
490
{
491
   ir_copy_propagation_elements_visitor v;
492
 
493
   visit_list_elements(&v, instructions);
494
 
495
   return v.progress;
496
}