Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright © 2010 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
21 | * DEALINGS IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | /** |
||
25 | * \file opt_copy_propagation_elements.cpp |
||
26 | * |
||
27 | * Replaces usage of recently-copied components of variables with the |
||
28 | * previous copy of the variable. |
||
29 | * |
||
30 | * This pass can be compared with opt_copy_propagation, which operands |
||
31 | * on arbitrary whole-variable copies. However, in order to handle |
||
32 | * the copy propagation of swizzled variables or writemasked writes, |
||
33 | * we want to track things on a channel-wise basis. I found that |
||
34 | * trying to mix the swizzled/writemasked support here with the |
||
35 | * whole-variable stuff in opt_copy_propagation.cpp just made a mess, |
||
36 | * so this is separate despite the ACP handling being somewhat |
||
37 | * similar. |
||
38 | * |
||
39 | * This should reduce the number of MOV instructions in the generated |
||
40 | * programs unless copy propagation is also done on the LIR, and may |
||
41 | * help anyway by triggering other optimizations that live in the HIR. |
||
42 | */ |
||
43 | |||
44 | #include "ir.h" |
||
45 | #include "ir_rvalue_visitor.h" |
||
46 | #include "ir_basic_block.h" |
||
47 | #include "ir_optimization.h" |
||
48 | #include "glsl_types.h" |
||
49 | |||
50 | static bool debug = false; |
||
51 | |||
52 | namespace { |
||
53 | |||
54 | class acp_entry : public exec_node |
||
55 | { |
||
56 | public: |
||
57 | acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4]) |
||
58 | { |
||
59 | this->lhs = lhs; |
||
60 | this->rhs = rhs; |
||
61 | this->write_mask = write_mask; |
||
62 | memcpy(this->swizzle, swizzle, sizeof(this->swizzle)); |
||
63 | } |
||
64 | |||
65 | acp_entry(acp_entry *a) |
||
66 | { |
||
67 | this->lhs = a->lhs; |
||
68 | this->rhs = a->rhs; |
||
69 | this->write_mask = a->write_mask; |
||
70 | memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle)); |
||
71 | } |
||
72 | |||
73 | ir_variable *lhs; |
||
74 | ir_variable *rhs; |
||
75 | unsigned int write_mask; |
||
76 | int swizzle[4]; |
||
77 | }; |
||
78 | |||
79 | |||
80 | class kill_entry : public exec_node |
||
81 | { |
||
82 | public: |
||
83 | kill_entry(ir_variable *var, int write_mask) |
||
84 | { |
||
85 | this->var = var; |
||
86 | this->write_mask = write_mask; |
||
87 | } |
||
88 | |||
89 | ir_variable *var; |
||
90 | unsigned int write_mask; |
||
91 | }; |
||
92 | |||
93 | class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor { |
||
94 | public: |
||
95 | ir_copy_propagation_elements_visitor() |
||
96 | { |
||
97 | this->progress = false; |
||
98 | this->killed_all = false; |
||
99 | this->mem_ctx = ralloc_context(NULL); |
||
100 | this->shader_mem_ctx = NULL; |
||
101 | this->acp = new(mem_ctx) exec_list; |
||
102 | this->kills = new(mem_ctx) exec_list; |
||
103 | } |
||
104 | ~ir_copy_propagation_elements_visitor() |
||
105 | { |
||
106 | ralloc_free(mem_ctx); |
||
107 | } |
||
108 | |||
109 | virtual ir_visitor_status visit_enter(class ir_loop *); |
||
110 | virtual ir_visitor_status visit_enter(class ir_function_signature *); |
||
111 | virtual ir_visitor_status visit_leave(class ir_assignment *); |
||
112 | virtual ir_visitor_status visit_enter(class ir_call *); |
||
113 | virtual ir_visitor_status visit_enter(class ir_if *); |
||
114 | virtual ir_visitor_status visit_leave(class ir_swizzle *); |
||
115 | |||
116 | void handle_rvalue(ir_rvalue **rvalue); |
||
117 | |||
118 | void add_copy(ir_assignment *ir); |
||
119 | void kill(kill_entry *k); |
||
120 | void handle_if_block(exec_list *instructions); |
||
121 | |||
122 | /** List of acp_entry: The available copies to propagate */ |
||
123 | exec_list *acp; |
||
124 | /** |
||
125 | * List of kill_entry: The variables whose values were killed in this |
||
126 | * block. |
||
127 | */ |
||
128 | exec_list *kills; |
||
129 | |||
130 | bool progress; |
||
131 | |||
132 | bool killed_all; |
||
133 | |||
134 | /* Context for our local data structures. */ |
||
135 | void *mem_ctx; |
||
136 | /* Context for allocating new shader nodes. */ |
||
137 | void *shader_mem_ctx; |
||
138 | }; |
||
139 | |||
140 | } /* unnamed namespace */ |
||
141 | |||
142 | ir_visitor_status |
||
143 | ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) |
||
144 | { |
||
145 | /* Treat entry into a function signature as a completely separate |
||
146 | * block. Any instructions at global scope will be shuffled into |
||
147 | * main() at link time, so they're irrelevant to us. |
||
148 | */ |
||
149 | exec_list *orig_acp = this->acp; |
||
150 | exec_list *orig_kills = this->kills; |
||
151 | bool orig_killed_all = this->killed_all; |
||
152 | |||
153 | this->acp = new(mem_ctx) exec_list; |
||
154 | this->kills = new(mem_ctx) exec_list; |
||
155 | this->killed_all = false; |
||
156 | |||
157 | visit_list_elements(this, &ir->body); |
||
158 | |||
159 | ralloc_free(this->acp); |
||
160 | ralloc_free(this->kills); |
||
161 | |||
162 | this->kills = orig_kills; |
||
163 | this->acp = orig_acp; |
||
164 | this->killed_all = orig_killed_all; |
||
165 | |||
166 | return visit_continue_with_parent; |
||
167 | } |
||
168 | |||
169 | ir_visitor_status |
||
170 | ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) |
||
171 | { |
||
172 | ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); |
||
173 | ir_variable *var = ir->lhs->variable_referenced(); |
||
174 | |||
175 | if (var->type->is_scalar() || var->type->is_vector()) { |
||
176 | kill_entry *k; |
||
177 | |||
178 | if (lhs) |
||
179 | k = new(this->kills) kill_entry(var, ir->write_mask); |
||
180 | else |
||
181 | k = new(this->kills) kill_entry(var, ~0); |
||
182 | |||
183 | kill(k); |
||
184 | } |
||
185 | |||
186 | add_copy(ir); |
||
187 | |||
188 | return visit_continue; |
||
189 | } |
||
190 | |||
191 | ir_visitor_status |
||
192 | ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *) |
||
193 | { |
||
194 | /* Don't visit the values of swizzles since they are handled while |
||
195 | * visiting the swizzle itself. |
||
196 | */ |
||
197 | return visit_continue; |
||
198 | } |
||
199 | |||
200 | /** |
||
201 | * Replaces dereferences of ACP RHS variables with ACP LHS variables. |
||
202 | * |
||
203 | * This is where the actual copy propagation occurs. Note that the |
||
204 | * rewriting of ir_dereference means that the ir_dereference instance |
||
205 | * must not be shared by multiple IR operations! |
||
206 | */ |
||
207 | void |
||
208 | ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir) |
||
209 | { |
||
210 | int swizzle_chan[4]; |
||
211 | ir_dereference_variable *deref_var; |
||
212 | ir_variable *source[4] = {NULL, NULL, NULL, NULL}; |
||
213 | int source_chan[4] = {0, 0, 0, 0}; |
||
214 | int chans; |
||
215 | bool noop_swizzle = true; |
||
216 | |||
217 | if (!*ir) |
||
218 | return; |
||
219 | |||
220 | ir_swizzle *swizzle = (*ir)->as_swizzle(); |
||
221 | if (swizzle) { |
||
222 | deref_var = swizzle->val->as_dereference_variable(); |
||
223 | if (!deref_var) |
||
224 | return; |
||
225 | |||
226 | swizzle_chan[0] = swizzle->mask.x; |
||
227 | swizzle_chan[1] = swizzle->mask.y; |
||
228 | swizzle_chan[2] = swizzle->mask.z; |
||
229 | swizzle_chan[3] = swizzle->mask.w; |
||
230 | chans = swizzle->type->vector_elements; |
||
231 | } else { |
||
232 | deref_var = (*ir)->as_dereference_variable(); |
||
233 | if (!deref_var) |
||
234 | return; |
||
235 | |||
236 | swizzle_chan[0] = 0; |
||
237 | swizzle_chan[1] = 1; |
||
238 | swizzle_chan[2] = 2; |
||
239 | swizzle_chan[3] = 3; |
||
240 | chans = deref_var->type->vector_elements; |
||
241 | } |
||
242 | |||
243 | if (this->in_assignee) |
||
244 | return; |
||
245 | |||
246 | ir_variable *var = deref_var->var; |
||
247 | |||
248 | /* Try to find ACP entries covering swizzle_chan[], hoping they're |
||
249 | * the same source variable. |
||
250 | */ |
||
251 | foreach_in_list(acp_entry, entry, this->acp) { |
||
252 | if (var == entry->lhs) { |
||
253 | for (int c = 0; c < chans; c++) { |
||
254 | if (entry->write_mask & (1 << swizzle_chan[c])) { |
||
255 | source[c] = entry->rhs; |
||
256 | source_chan[c] = entry->swizzle[swizzle_chan[c]]; |
||
257 | |||
258 | if (source_chan[c] != swizzle_chan[c]) |
||
259 | noop_swizzle = false; |
||
260 | } |
||
261 | } |
||
262 | } |
||
263 | } |
||
264 | |||
265 | /* Make sure all channels are copying from the same source variable. */ |
||
266 | if (!source[0]) |
||
267 | return; |
||
268 | for (int c = 1; c < chans; c++) { |
||
269 | if (source[c] != source[0]) |
||
270 | return; |
||
271 | } |
||
272 | |||
273 | if (!shader_mem_ctx) |
||
274 | shader_mem_ctx = ralloc_parent(deref_var); |
||
275 | |||
276 | /* Don't pointlessly replace the rvalue with itself (or a noop swizzle |
||
277 | * of itself, which would just be deleted by opt_noop_swizzle). |
||
278 | */ |
||
279 | if (source[0] == var && noop_swizzle) |
||
280 | return; |
||
281 | |||
282 | if (debug) { |
||
283 | printf("Copy propagation from:\n"); |
||
284 | (*ir)->print(); |
||
285 | } |
||
286 | |||
287 | deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]); |
||
288 | *ir = new(shader_mem_ctx) ir_swizzle(deref_var, |
||
289 | source_chan[0], |
||
290 | source_chan[1], |
||
291 | source_chan[2], |
||
292 | source_chan[3], |
||
293 | chans); |
||
294 | progress = true; |
||
295 | |||
296 | if (debug) { |
||
297 | printf("to:\n"); |
||
298 | (*ir)->print(); |
||
299 | printf("\n"); |
||
300 | } |
||
301 | } |
||
302 | |||
303 | |||
304 | ir_visitor_status |
||
305 | ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) |
||
306 | { |
||
307 | /* Do copy propagation on call parameters, but skip any out params */ |
||
308 | foreach_two_lists(formal_node, &ir->callee->parameters, |
||
309 | actual_node, &ir->actual_parameters) { |
||
310 | ir_variable *sig_param = (ir_variable *) formal_node; |
||
311 | ir_rvalue *ir = (ir_rvalue *) actual_node; |
||
312 | if (sig_param->data.mode != ir_var_function_out |
||
313 | && sig_param->data.mode != ir_var_function_inout) { |
||
314 | ir->accept(this); |
||
315 | } |
||
316 | } |
||
317 | |||
318 | /* Since we're unlinked, we don't (necessarily) know the side effects of |
||
319 | * this call. So kill all copies. |
||
320 | */ |
||
321 | acp->make_empty(); |
||
322 | this->killed_all = true; |
||
323 | |||
324 | return visit_continue_with_parent; |
||
325 | } |
||
326 | |||
327 | void |
||
328 | ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) |
||
329 | { |
||
330 | exec_list *orig_acp = this->acp; |
||
331 | exec_list *orig_kills = this->kills; |
||
332 | bool orig_killed_all = this->killed_all; |
||
333 | |||
334 | this->acp = new(mem_ctx) exec_list; |
||
335 | this->kills = new(mem_ctx) exec_list; |
||
336 | this->killed_all = false; |
||
337 | |||
338 | /* Populate the initial acp with a copy of the original */ |
||
339 | foreach_in_list(acp_entry, a, orig_acp) { |
||
340 | this->acp->push_tail(new(this->acp) acp_entry(a)); |
||
341 | } |
||
342 | |||
343 | visit_list_elements(this, instructions); |
||
344 | |||
345 | if (this->killed_all) { |
||
346 | orig_acp->make_empty(); |
||
347 | } |
||
348 | |||
349 | exec_list *new_kills = this->kills; |
||
350 | this->kills = orig_kills; |
||
351 | ralloc_free(this->acp); |
||
352 | this->acp = orig_acp; |
||
353 | this->killed_all = this->killed_all || orig_killed_all; |
||
354 | |||
355 | /* Move the new kills into the parent block's list, removing them |
||
356 | * from the parent's ACP list in the process. |
||
357 | */ |
||
358 | foreach_in_list_safe(kill_entry, k, new_kills) { |
||
359 | kill(k); |
||
360 | } |
||
361 | |||
362 | ralloc_free(new_kills); |
||
363 | } |
||
364 | |||
365 | ir_visitor_status |
||
366 | ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir) |
||
367 | { |
||
368 | ir->condition->accept(this); |
||
369 | |||
370 | handle_if_block(&ir->then_instructions); |
||
371 | handle_if_block(&ir->else_instructions); |
||
372 | |||
373 | /* handle_if_block() already descended into the children. */ |
||
374 | return visit_continue_with_parent; |
||
375 | } |
||
376 | |||
377 | ir_visitor_status |
||
378 | ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) |
||
379 | { |
||
380 | exec_list *orig_acp = this->acp; |
||
381 | exec_list *orig_kills = this->kills; |
||
382 | bool orig_killed_all = this->killed_all; |
||
383 | |||
384 | /* FINISHME: For now, the initial acp for loops is totally empty. |
||
385 | * We could go through once, then go through again with the acp |
||
386 | * cloned minus the killed entries after the first run through. |
||
387 | */ |
||
388 | this->acp = new(mem_ctx) exec_list; |
||
389 | this->kills = new(mem_ctx) exec_list; |
||
390 | this->killed_all = false; |
||
391 | |||
392 | visit_list_elements(this, &ir->body_instructions); |
||
393 | |||
394 | if (this->killed_all) { |
||
395 | orig_acp->make_empty(); |
||
396 | } |
||
397 | |||
398 | exec_list *new_kills = this->kills; |
||
399 | this->kills = orig_kills; |
||
400 | ralloc_free(this->acp); |
||
401 | this->acp = orig_acp; |
||
402 | this->killed_all = this->killed_all || orig_killed_all; |
||
403 | |||
404 | foreach_in_list_safe(kill_entry, k, new_kills) { |
||
405 | kill(k); |
||
406 | } |
||
407 | |||
408 | ralloc_free(new_kills); |
||
409 | |||
410 | /* already descended into the children. */ |
||
411 | return visit_continue_with_parent; |
||
412 | } |
||
413 | |||
414 | /* Remove any entries currently in the ACP for this kill. */ |
||
415 | void |
||
416 | ir_copy_propagation_elements_visitor::kill(kill_entry *k) |
||
417 | { |
||
418 | foreach_in_list_safe(acp_entry, entry, acp) { |
||
419 | if (entry->lhs == k->var) { |
||
420 | entry->write_mask = entry->write_mask & ~k->write_mask; |
||
421 | if (entry->write_mask == 0) { |
||
422 | entry->remove(); |
||
423 | continue; |
||
424 | } |
||
425 | } |
||
426 | if (entry->rhs == k->var) { |
||
427 | entry->remove(); |
||
428 | } |
||
429 | } |
||
430 | |||
431 | /* If we were on a list, remove ourselves before inserting */ |
||
432 | if (k->next) |
||
433 | k->remove(); |
||
434 | |||
435 | ralloc_steal(this->kills, k); |
||
436 | this->kills->push_tail(k); |
||
437 | } |
||
438 | |||
439 | /** |
||
440 | * Adds directly-copied channels between vector variables to the available |
||
441 | * copy propagation list. |
||
442 | */ |
||
443 | void |
||
444 | ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir) |
||
445 | { |
||
446 | acp_entry *entry; |
||
447 | int orig_swizzle[4] = {0, 1, 2, 3}; |
||
448 | int swizzle[4]; |
||
449 | |||
450 | if (ir->condition) |
||
451 | return; |
||
452 | |||
453 | ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); |
||
454 | if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector())) |
||
455 | return; |
||
456 | |||
457 | ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); |
||
458 | if (!rhs) { |
||
459 | ir_swizzle *swiz = ir->rhs->as_swizzle(); |
||
460 | if (!swiz) |
||
461 | return; |
||
462 | |||
463 | rhs = swiz->val->as_dereference_variable(); |
||
464 | if (!rhs) |
||
465 | return; |
||
466 | |||
467 | orig_swizzle[0] = swiz->mask.x; |
||
468 | orig_swizzle[1] = swiz->mask.y; |
||
469 | orig_swizzle[2] = swiz->mask.z; |
||
470 | orig_swizzle[3] = swiz->mask.w; |
||
471 | } |
||
472 | |||
473 | /* Move the swizzle channels out to the positions they match in the |
||
474 | * destination. We don't want to have to rewrite the swizzle[] |
||
475 | * array every time we clear a bit of the write_mask. |
||
476 | */ |
||
477 | int j = 0; |
||
478 | for (int i = 0; i < 4; i++) { |
||
479 | if (ir->write_mask & (1 << i)) |
||
480 | swizzle[i] = orig_swizzle[j++]; |
||
481 | } |
||
482 | |||
483 | int write_mask = ir->write_mask; |
||
484 | if (lhs->var == rhs->var) { |
||
485 | /* If this is a copy from the variable to itself, then we need |
||
486 | * to be sure not to include the updated channels from this |
||
487 | * instruction in the set of new source channels to be |
||
488 | * copy-propagated from. |
||
489 | */ |
||
490 | for (int i = 0; i < 4; i++) { |
||
491 | if (ir->write_mask & (1 << orig_swizzle[i])) |
||
492 | write_mask &= ~(1 << i); |
||
493 | } |
||
494 | } |
||
495 | |||
496 | entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask, |
||
497 | swizzle); |
||
498 | this->acp->push_tail(entry); |
||
499 | } |
||
500 | |||
501 | bool |
||
502 | do_copy_propagation_elements(exec_list *instructions) |
||
503 | { |
||
504 | ir_copy_propagation_elements_visitor v; |
||
505 | |||
506 | visit_list_elements(&v, instructions); |
||
507 | |||
508 | return v.progress; |
||
509 | }><>><>>><>>>><>> |