Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright © 2010 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
21 | * DEALINGS IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | /** |
||
25 | * \file opt_copy_propagation_elements.cpp |
||
26 | * |
||
27 | * Replaces usage of recently-copied components of variables with the |
||
28 | * previous copy of the variable. |
||
29 | * |
||
30 | * This pass can be compared with opt_copy_propagation, which operands |
||
31 | * on arbitrary whole-variable copies. However, in order to handle |
||
32 | * the copy propagation of swizzled variables or writemasked writes, |
||
33 | * we want to track things on a channel-wise basis. I found that |
||
34 | * trying to mix the swizzled/writemasked support here with the |
||
35 | * whole-variable stuff in opt_copy_propagation.cpp just made a mess, |
||
36 | * so this is separate despite the ACP handling being somewhat |
||
37 | * similar. |
||
38 | * |
||
39 | * This should reduce the number of MOV instructions in the generated |
||
40 | * programs unless copy propagation is also done on the LIR, and may |
||
41 | * help anyway by triggering other optimizations that live in the HIR. |
||
42 | */ |
||
43 | |||
44 | #include "ir.h" |
||
45 | #include "ir_rvalue_visitor.h" |
||
46 | #include "ir_basic_block.h" |
||
47 | #include "ir_optimization.h" |
||
48 | #include "glsl_types.h" |
||
49 | |||
50 | static bool debug = false; |
||
51 | |||
52 | namespace { |
||
53 | |||
54 | class acp_entry : public exec_node |
||
55 | { |
||
56 | public: |
||
57 | acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4]) |
||
58 | { |
||
59 | this->lhs = lhs; |
||
60 | this->rhs = rhs; |
||
61 | this->write_mask = write_mask; |
||
62 | memcpy(this->swizzle, swizzle, sizeof(this->swizzle)); |
||
63 | } |
||
64 | |||
65 | acp_entry(acp_entry *a) |
||
66 | { |
||
67 | this->lhs = a->lhs; |
||
68 | this->rhs = a->rhs; |
||
69 | this->write_mask = a->write_mask; |
||
70 | memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle)); |
||
71 | } |
||
72 | |||
73 | ir_variable *lhs; |
||
74 | ir_variable *rhs; |
||
75 | unsigned int write_mask; |
||
76 | int swizzle[4]; |
||
77 | }; |
||
78 | |||
79 | |||
80 | class kill_entry : public exec_node |
||
81 | { |
||
82 | public: |
||
83 | kill_entry(ir_variable *var, int write_mask) |
||
84 | { |
||
85 | this->var = var; |
||
86 | this->write_mask = write_mask; |
||
87 | } |
||
88 | |||
89 | ir_variable *var; |
||
90 | unsigned int write_mask; |
||
91 | }; |
||
92 | |||
93 | class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor { |
||
94 | public: |
||
95 | ir_copy_propagation_elements_visitor() |
||
96 | { |
||
97 | this->progress = false; |
||
98 | this->killed_all = false; |
||
99 | this->mem_ctx = ralloc_context(NULL); |
||
100 | this->shader_mem_ctx = NULL; |
||
101 | this->acp = new(mem_ctx) exec_list; |
||
102 | this->kills = new(mem_ctx) exec_list; |
||
103 | } |
||
104 | ~ir_copy_propagation_elements_visitor() |
||
105 | { |
||
106 | ralloc_free(mem_ctx); |
||
107 | } |
||
108 | |||
109 | virtual ir_visitor_status visit_enter(class ir_loop *); |
||
110 | virtual ir_visitor_status visit_enter(class ir_function_signature *); |
||
111 | virtual ir_visitor_status visit_leave(class ir_assignment *); |
||
112 | virtual ir_visitor_status visit_enter(class ir_call *); |
||
113 | virtual ir_visitor_status visit_enter(class ir_if *); |
||
114 | virtual ir_visitor_status visit_leave(class ir_swizzle *); |
||
115 | |||
116 | void handle_rvalue(ir_rvalue **rvalue); |
||
117 | |||
118 | void add_copy(ir_assignment *ir); |
||
119 | void kill(kill_entry *k); |
||
120 | void handle_if_block(exec_list *instructions); |
||
121 | |||
122 | /** List of acp_entry: The available copies to propagate */ |
||
123 | exec_list *acp; |
||
124 | /** |
||
125 | * List of kill_entry: The variables whose values were killed in this |
||
126 | * block. |
||
127 | */ |
||
128 | exec_list *kills; |
||
129 | |||
130 | bool progress; |
||
131 | |||
132 | bool killed_all; |
||
133 | |||
134 | /* Context for our local data structures. */ |
||
135 | void *mem_ctx; |
||
136 | /* Context for allocating new shader nodes. */ |
||
137 | void *shader_mem_ctx; |
||
138 | }; |
||
139 | |||
140 | } /* unnamed namespace */ |
||
141 | |||
142 | ir_visitor_status |
||
143 | ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) |
||
144 | { |
||
145 | /* Treat entry into a function signature as a completely separate |
||
146 | * block. Any instructions at global scope will be shuffled into |
||
147 | * main() at link time, so they're irrelevant to us. |
||
148 | */ |
||
149 | exec_list *orig_acp = this->acp; |
||
150 | exec_list *orig_kills = this->kills; |
||
151 | bool orig_killed_all = this->killed_all; |
||
152 | |||
153 | this->acp = new(mem_ctx) exec_list; |
||
154 | this->kills = new(mem_ctx) exec_list; |
||
155 | this->killed_all = false; |
||
156 | |||
157 | visit_list_elements(this, &ir->body); |
||
158 | |||
159 | this->kills = orig_kills; |
||
160 | this->acp = orig_acp; |
||
161 | this->killed_all = orig_killed_all; |
||
162 | |||
163 | return visit_continue_with_parent; |
||
164 | } |
||
165 | |||
166 | ir_visitor_status |
||
167 | ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) |
||
168 | { |
||
169 | ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); |
||
170 | ir_variable *var = ir->lhs->variable_referenced(); |
||
171 | |||
172 | if (var->type->is_scalar() || var->type->is_vector()) { |
||
173 | kill_entry *k; |
||
174 | |||
175 | if (lhs) |
||
176 | k = new(mem_ctx) kill_entry(var, ir->write_mask); |
||
177 | else |
||
178 | k = new(mem_ctx) kill_entry(var, ~0); |
||
179 | |||
180 | kill(k); |
||
181 | } |
||
182 | |||
183 | add_copy(ir); |
||
184 | |||
185 | return visit_continue; |
||
186 | } |
||
187 | |||
188 | ir_visitor_status |
||
189 | ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *ir) |
||
190 | { |
||
191 | /* Don't visit the values of swizzles since they are handled while |
||
192 | * visiting the swizzle itself. |
||
193 | */ |
||
194 | return visit_continue; |
||
195 | } |
||
196 | |||
197 | /** |
||
198 | * Replaces dereferences of ACP RHS variables with ACP LHS variables. |
||
199 | * |
||
200 | * This is where the actual copy propagation occurs. Note that the |
||
201 | * rewriting of ir_dereference means that the ir_dereference instance |
||
202 | * must not be shared by multiple IR operations! |
||
203 | */ |
||
204 | void |
||
205 | ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir) |
||
206 | { |
||
207 | int swizzle_chan[4]; |
||
208 | ir_dereference_variable *deref_var; |
||
209 | ir_variable *source[4] = {NULL, NULL, NULL, NULL}; |
||
210 | int source_chan[4]; |
||
211 | int chans; |
||
212 | |||
213 | if (!*ir) |
||
214 | return; |
||
215 | |||
216 | ir_swizzle *swizzle = (*ir)->as_swizzle(); |
||
217 | if (swizzle) { |
||
218 | deref_var = swizzle->val->as_dereference_variable(); |
||
219 | if (!deref_var) |
||
220 | return; |
||
221 | |||
222 | swizzle_chan[0] = swizzle->mask.x; |
||
223 | swizzle_chan[1] = swizzle->mask.y; |
||
224 | swizzle_chan[2] = swizzle->mask.z; |
||
225 | swizzle_chan[3] = swizzle->mask.w; |
||
226 | chans = swizzle->type->vector_elements; |
||
227 | } else { |
||
228 | deref_var = (*ir)->as_dereference_variable(); |
||
229 | if (!deref_var) |
||
230 | return; |
||
231 | |||
232 | swizzle_chan[0] = 0; |
||
233 | swizzle_chan[1] = 1; |
||
234 | swizzle_chan[2] = 2; |
||
235 | swizzle_chan[3] = 3; |
||
236 | chans = deref_var->type->vector_elements; |
||
237 | } |
||
238 | |||
239 | if (this->in_assignee) |
||
240 | return; |
||
241 | |||
242 | ir_variable *var = deref_var->var; |
||
243 | |||
244 | /* Try to find ACP entries covering swizzle_chan[], hoping they're |
||
245 | * the same source variable. |
||
246 | */ |
||
247 | foreach_iter(exec_list_iterator, iter, *this->acp) { |
||
248 | acp_entry *entry = (acp_entry *)iter.get(); |
||
249 | |||
250 | if (var == entry->lhs) { |
||
251 | for (int c = 0; c < chans; c++) { |
||
252 | if (entry->write_mask & (1 << swizzle_chan[c])) { |
||
253 | source[c] = entry->rhs; |
||
254 | source_chan[c] = entry->swizzle[swizzle_chan[c]]; |
||
255 | } |
||
256 | } |
||
257 | } |
||
258 | } |
||
259 | |||
260 | /* Make sure all channels are copying from the same source variable. */ |
||
261 | if (!source[0]) |
||
262 | return; |
||
263 | for (int c = 1; c < chans; c++) { |
||
264 | if (source[c] != source[0]) |
||
265 | return; |
||
266 | } |
||
267 | |||
268 | if (!shader_mem_ctx) |
||
269 | shader_mem_ctx = ralloc_parent(deref_var); |
||
270 | |||
271 | if (debug) { |
||
272 | printf("Copy propagation from:\n"); |
||
273 | (*ir)->print(); |
||
274 | } |
||
275 | |||
276 | deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]); |
||
277 | *ir = new(shader_mem_ctx) ir_swizzle(deref_var, |
||
278 | source_chan[0], |
||
279 | source_chan[1], |
||
280 | source_chan[2], |
||
281 | source_chan[3], |
||
282 | chans); |
||
283 | |||
284 | if (debug) { |
||
285 | printf("to:\n"); |
||
286 | (*ir)->print(); |
||
287 | printf("\n"); |
||
288 | } |
||
289 | } |
||
290 | |||
291 | |||
292 | ir_visitor_status |
||
293 | ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) |
||
294 | { |
||
295 | /* Do copy propagation on call parameters, but skip any out params */ |
||
296 | exec_list_iterator sig_param_iter = ir->callee->parameters.iterator(); |
||
297 | foreach_iter(exec_list_iterator, iter, ir->actual_parameters) { |
||
298 | ir_variable *sig_param = (ir_variable *)sig_param_iter.get(); |
||
299 | ir_instruction *ir = (ir_instruction *)iter.get(); |
||
300 | if (sig_param->mode != ir_var_function_out |
||
301 | && sig_param->mode != ir_var_function_inout) { |
||
302 | ir->accept(this); |
||
303 | } |
||
304 | sig_param_iter.next(); |
||
305 | } |
||
306 | |||
307 | /* Since we're unlinked, we don't (necessarily) know the side effects of |
||
308 | * this call. So kill all copies. |
||
309 | */ |
||
310 | acp->make_empty(); |
||
311 | this->killed_all = true; |
||
312 | |||
313 | return visit_continue_with_parent; |
||
314 | } |
||
315 | |||
316 | void |
||
317 | ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) |
||
318 | { |
||
319 | exec_list *orig_acp = this->acp; |
||
320 | exec_list *orig_kills = this->kills; |
||
321 | bool orig_killed_all = this->killed_all; |
||
322 | |||
323 | this->acp = new(mem_ctx) exec_list; |
||
324 | this->kills = new(mem_ctx) exec_list; |
||
325 | this->killed_all = false; |
||
326 | |||
327 | /* Populate the initial acp with a copy of the original */ |
||
328 | foreach_iter(exec_list_iterator, iter, *orig_acp) { |
||
329 | acp_entry *a = (acp_entry *)iter.get(); |
||
330 | this->acp->push_tail(new(this->mem_ctx) acp_entry(a)); |
||
331 | } |
||
332 | |||
333 | visit_list_elements(this, instructions); |
||
334 | |||
335 | if (this->killed_all) { |
||
336 | orig_acp->make_empty(); |
||
337 | } |
||
338 | |||
339 | exec_list *new_kills = this->kills; |
||
340 | this->kills = orig_kills; |
||
341 | this->acp = orig_acp; |
||
342 | this->killed_all = this->killed_all || orig_killed_all; |
||
343 | |||
344 | /* Move the new kills into the parent block's list, removing them |
||
345 | * from the parent's ACP list in the process. |
||
346 | */ |
||
347 | foreach_list_safe(node, new_kills) { |
||
348 | kill_entry *k = (kill_entry *)node; |
||
349 | kill(k); |
||
350 | } |
||
351 | } |
||
352 | |||
353 | ir_visitor_status |
||
354 | ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir) |
||
355 | { |
||
356 | ir->condition->accept(this); |
||
357 | |||
358 | handle_if_block(&ir->then_instructions); |
||
359 | handle_if_block(&ir->else_instructions); |
||
360 | |||
361 | /* handle_if_block() already descended into the children. */ |
||
362 | return visit_continue_with_parent; |
||
363 | } |
||
364 | |||
365 | ir_visitor_status |
||
366 | ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) |
||
367 | { |
||
368 | exec_list *orig_acp = this->acp; |
||
369 | exec_list *orig_kills = this->kills; |
||
370 | bool orig_killed_all = this->killed_all; |
||
371 | |||
372 | /* FINISHME: For now, the initial acp for loops is totally empty. |
||
373 | * We could go through once, then go through again with the acp |
||
374 | * cloned minus the killed entries after the first run through. |
||
375 | */ |
||
376 | this->acp = new(mem_ctx) exec_list; |
||
377 | this->kills = new(mem_ctx) exec_list; |
||
378 | this->killed_all = false; |
||
379 | |||
380 | visit_list_elements(this, &ir->body_instructions); |
||
381 | |||
382 | if (this->killed_all) { |
||
383 | orig_acp->make_empty(); |
||
384 | } |
||
385 | |||
386 | exec_list *new_kills = this->kills; |
||
387 | this->kills = orig_kills; |
||
388 | this->acp = orig_acp; |
||
389 | this->killed_all = this->killed_all || orig_killed_all; |
||
390 | |||
391 | foreach_list_safe(node, new_kills) { |
||
392 | kill_entry *k = (kill_entry *)node; |
||
393 | kill(k); |
||
394 | } |
||
395 | |||
396 | /* already descended into the children. */ |
||
397 | return visit_continue_with_parent; |
||
398 | } |
||
399 | |||
400 | /* Remove any entries currently in the ACP for this kill. */ |
||
401 | void |
||
402 | ir_copy_propagation_elements_visitor::kill(kill_entry *k) |
||
403 | { |
||
404 | foreach_list_safe(node, acp) { |
||
405 | acp_entry *entry = (acp_entry *)node; |
||
406 | |||
407 | if (entry->lhs == k->var) { |
||
408 | entry->write_mask = entry->write_mask & ~k->write_mask; |
||
409 | if (entry->write_mask == 0) { |
||
410 | entry->remove(); |
||
411 | continue; |
||
412 | } |
||
413 | } |
||
414 | if (entry->rhs == k->var) { |
||
415 | entry->remove(); |
||
416 | } |
||
417 | } |
||
418 | |||
419 | /* If we were on a list, remove ourselves before inserting */ |
||
420 | if (k->next) |
||
421 | k->remove(); |
||
422 | |||
423 | this->kills->push_tail(k); |
||
424 | } |
||
425 | |||
426 | /** |
||
427 | * Adds directly-copied channels between vector variables to the available |
||
428 | * copy propagation list. |
||
429 | */ |
||
430 | void |
||
431 | ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir) |
||
432 | { |
||
433 | acp_entry *entry; |
||
434 | int orig_swizzle[4] = {0, 1, 2, 3}; |
||
435 | int swizzle[4]; |
||
436 | |||
437 | if (ir->condition) |
||
438 | return; |
||
439 | |||
440 | ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); |
||
441 | if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector())) |
||
442 | return; |
||
443 | |||
444 | ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); |
||
445 | if (!rhs) { |
||
446 | ir_swizzle *swiz = ir->rhs->as_swizzle(); |
||
447 | if (!swiz) |
||
448 | return; |
||
449 | |||
450 | rhs = swiz->val->as_dereference_variable(); |
||
451 | if (!rhs) |
||
452 | return; |
||
453 | |||
454 | orig_swizzle[0] = swiz->mask.x; |
||
455 | orig_swizzle[1] = swiz->mask.y; |
||
456 | orig_swizzle[2] = swiz->mask.z; |
||
457 | orig_swizzle[3] = swiz->mask.w; |
||
458 | } |
||
459 | |||
460 | /* Move the swizzle channels out to the positions they match in the |
||
461 | * destination. We don't want to have to rewrite the swizzle[] |
||
462 | * array every time we clear a bit of the write_mask. |
||
463 | */ |
||
464 | int j = 0; |
||
465 | for (int i = 0; i < 4; i++) { |
||
466 | if (ir->write_mask & (1 << i)) |
||
467 | swizzle[i] = orig_swizzle[j++]; |
||
468 | } |
||
469 | |||
470 | int write_mask = ir->write_mask; |
||
471 | if (lhs->var == rhs->var) { |
||
472 | /* If this is a copy from the variable to itself, then we need |
||
473 | * to be sure not to include the updated channels from this |
||
474 | * instruction in the set of new source channels to be |
||
475 | * copy-propagated from. |
||
476 | */ |
||
477 | for (int i = 0; i < 4; i++) { |
||
478 | if (ir->write_mask & (1 << orig_swizzle[i])) |
||
479 | write_mask &= ~(1 << i); |
||
480 | } |
||
481 | } |
||
482 | |||
483 | entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask, |
||
484 | swizzle); |
||
485 | this->acp->push_tail(entry); |
||
486 | } |
||
487 | |||
488 | bool |
||
489 | do_copy_propagation_elements(exec_list *instructions) |
||
490 | { |
||
491 | ir_copy_propagation_elements_visitor v; |
||
492 | |||
493 | visit_list_elements(&v, instructions); |
||
494 | |||
495 | return v.progress; |
||
496 | }><>><>>><>>>><>> |