Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1901 | serge | 1 | /* |
2 | * Copyright © 2010 Luca Barbieri |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
21 | * DEALINGS IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | /** |
||
25 | * \file lower_variable_index_to_cond_assign.cpp |
||
26 | * |
||
27 | * Turns non-constant indexing into array types to a series of |
||
28 | * conditional moves of each element into a temporary. |
||
29 | * |
||
30 | * Pre-DX10 GPUs often don't have a native way to do this operation, |
||
31 | * and this works around that. |
||
32 | */ |
||
33 | |||
34 | #include "ir.h" |
||
35 | #include "ir_rvalue_visitor.h" |
||
36 | #include "ir_optimization.h" |
||
37 | #include "glsl_types.h" |
||
38 | #include "main/macros.h" |
||
39 | |||
40 | struct assignment_generator |
||
41 | { |
||
42 | ir_instruction* base_ir; |
||
43 | ir_rvalue* array; |
||
44 | bool is_write; |
||
45 | unsigned int write_mask; |
||
46 | ir_variable* var; |
||
47 | |||
48 | assignment_generator() |
||
49 | { |
||
50 | } |
||
51 | |||
52 | void generate(unsigned i, ir_rvalue* condition, exec_list *list) const |
||
53 | { |
||
54 | /* Just clone the rest of the deref chain when trying to get at the |
||
55 | * underlying variable. |
||
56 | */ |
||
57 | void *mem_ctx = ralloc_parent(base_ir); |
||
58 | ir_dereference *element = |
||
59 | new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL), |
||
60 | new(mem_ctx) ir_constant(i)); |
||
61 | ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); |
||
62 | |||
63 | ir_assignment *assignment; |
||
64 | if (is_write) { |
||
65 | assignment = new(mem_ctx) ir_assignment(element, variable, condition, |
||
66 | write_mask); |
||
67 | } else { |
||
68 | assignment = new(mem_ctx) ir_assignment(variable, element, condition); |
||
69 | } |
||
70 | |||
71 | list->push_tail(assignment); |
||
72 | } |
||
73 | }; |
||
74 | |||
75 | struct switch_generator |
||
76 | { |
||
77 | /* make TFunction a template parameter if you need to use other generators */ |
||
78 | typedef assignment_generator TFunction; |
||
79 | const TFunction& generator; |
||
80 | |||
81 | ir_variable* index; |
||
82 | unsigned linear_sequence_max_length; |
||
83 | unsigned condition_components; |
||
84 | |||
85 | void *mem_ctx; |
||
86 | |||
87 | switch_generator(const TFunction& generator, ir_variable *index, |
||
88 | unsigned linear_sequence_max_length, |
||
89 | unsigned condition_components) |
||
90 | : generator(generator), index(index), |
||
91 | linear_sequence_max_length(linear_sequence_max_length), |
||
92 | condition_components(condition_components) |
||
93 | { |
||
94 | this->mem_ctx = ralloc_parent(index); |
||
95 | } |
||
96 | |||
97 | void linear_sequence(unsigned begin, unsigned end, exec_list *list) |
||
98 | { |
||
99 | if (begin == end) |
||
100 | return; |
||
101 | |||
102 | /* If the array access is a read, read the first element of this subregion |
||
103 | * unconditionally. The remaining tests will possibly overwrite this |
||
104 | * value with one of the other array elements. |
||
105 | * |
||
106 | * This optimization cannot be done for writes because it will cause the |
||
107 | * first element of the subregion to be written possibly *in addition* to |
||
108 | * one of the other elements. |
||
109 | */ |
||
110 | unsigned first; |
||
111 | if (!this->generator.is_write) { |
||
112 | this->generator.generate(begin, 0, list); |
||
113 | first = begin + 1; |
||
114 | } else { |
||
115 | first = begin; |
||
116 | } |
||
117 | |||
118 | for (unsigned i = first; i < end; i += 4) { |
||
119 | const unsigned comps = MIN2(condition_components, end - i); |
||
120 | |||
121 | ir_rvalue *broadcast_index = |
||
122 | new(this->mem_ctx) ir_dereference_variable(index); |
||
123 | |||
124 | if (comps) { |
||
125 | const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false }; |
||
126 | broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m); |
||
127 | } |
||
128 | |||
129 | /* Compare the desired index value with the next block of four indices. |
||
130 | */ |
||
131 | ir_constant_data test_indices_data; |
||
132 | memset(&test_indices_data, 0, sizeof(test_indices_data)); |
||
133 | test_indices_data.i[0] = i; |
||
134 | test_indices_data.i[1] = i + 1; |
||
135 | test_indices_data.i[2] = i + 2; |
||
136 | test_indices_data.i[3] = i + 3; |
||
137 | ir_constant *const test_indices = |
||
138 | new(this->mem_ctx) ir_constant(broadcast_index->type, |
||
139 | &test_indices_data); |
||
140 | |||
141 | ir_rvalue *const condition_val = |
||
142 | new(this->mem_ctx) ir_expression(ir_binop_equal, |
||
143 | &glsl_type::bool_type[comps - 1], |
||
144 | broadcast_index, |
||
145 | test_indices); |
||
146 | |||
147 | ir_variable *const condition = |
||
148 | new(this->mem_ctx) ir_variable(condition_val->type, |
||
149 | "dereference_array_condition", |
||
150 | ir_var_temporary); |
||
151 | list->push_tail(condition); |
||
152 | |||
153 | ir_rvalue *const cond_deref = |
||
154 | new(this->mem_ctx) ir_dereference_variable(condition); |
||
155 | list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref, |
||
156 | condition_val, 0)); |
||
157 | |||
158 | if (comps == 1) { |
||
159 | ir_rvalue *const cond_deref = |
||
160 | new(this->mem_ctx) ir_dereference_variable(condition); |
||
161 | |||
162 | this->generator.generate(i, cond_deref, list); |
||
163 | } else { |
||
164 | for (unsigned j = 0; j < comps; j++) { |
||
165 | ir_rvalue *const cond_deref = |
||
166 | new(this->mem_ctx) ir_dereference_variable(condition); |
||
167 | ir_rvalue *const cond_swiz = |
||
168 | new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1); |
||
169 | |||
170 | this->generator.generate(i + j, cond_swiz, list); |
||
171 | } |
||
172 | } |
||
173 | } |
||
174 | } |
||
175 | |||
176 | void bisect(unsigned begin, unsigned end, exec_list *list) |
||
177 | { |
||
178 | unsigned middle = (begin + end) >> 1; |
||
179 | |||
180 | assert(index->type->is_integer()); |
||
181 | |||
182 | ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) |
||
183 | ? new(this->mem_ctx) ir_constant((unsigned)middle) |
||
184 | : new(this->mem_ctx) ir_constant((int)middle); |
||
185 | |||
186 | |||
187 | ir_dereference_variable *deref = |
||
188 | new(this->mem_ctx) ir_dereference_variable(this->index); |
||
189 | |||
190 | ir_expression *less = |
||
191 | new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, |
||
192 | deref, middle_c); |
||
193 | |||
194 | ir_if *if_less = new(this->mem_ctx) ir_if(less); |
||
195 | |||
196 | generate(begin, middle, &if_less->then_instructions); |
||
197 | generate(middle, end, &if_less->else_instructions); |
||
198 | |||
199 | list->push_tail(if_less); |
||
200 | } |
||
201 | |||
202 | void generate(unsigned begin, unsigned end, exec_list *list) |
||
203 | { |
||
204 | unsigned length = end - begin; |
||
205 | if (length <= this->linear_sequence_max_length) |
||
206 | return linear_sequence(begin, end, list); |
||
207 | else |
||
208 | return bisect(begin, end, list); |
||
209 | } |
||
210 | }; |
||
211 | |||
212 | /** |
||
213 | * Visitor class for replacing expressions with ir_constant values. |
||
214 | */ |
||
215 | |||
216 | class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor { |
||
217 | public: |
||
218 | variable_index_to_cond_assign_visitor(bool lower_input, |
||
219 | bool lower_output, |
||
220 | bool lower_temp, |
||
221 | bool lower_uniform) |
||
222 | { |
||
223 | this->progress = false; |
||
224 | this->lower_inputs = lower_input; |
||
225 | this->lower_outputs = lower_output; |
||
226 | this->lower_temps = lower_temp; |
||
227 | this->lower_uniforms = lower_uniform; |
||
228 | } |
||
229 | |||
230 | bool progress; |
||
231 | bool lower_inputs; |
||
232 | bool lower_outputs; |
||
233 | bool lower_temps; |
||
234 | bool lower_uniforms; |
||
235 | |||
236 | bool is_array_or_matrix(const ir_instruction *ir) const |
||
237 | { |
||
238 | return (ir->type->is_array() || ir->type->is_matrix()); |
||
239 | } |
||
240 | |||
241 | bool needs_lowering(ir_dereference_array *deref) const |
||
242 | { |
||
243 | if (deref == NULL || deref->array_index->as_constant() |
||
244 | || !is_array_or_matrix(deref->array)) |
||
245 | return false; |
||
246 | |||
247 | if (deref->array->ir_type == ir_type_constant) |
||
248 | return this->lower_temps; |
||
249 | |||
250 | const ir_variable *const var = deref->array->variable_referenced(); |
||
251 | switch (var->mode) { |
||
252 | case ir_var_auto: |
||
253 | case ir_var_temporary: |
||
254 | return this->lower_temps; |
||
255 | case ir_var_uniform: |
||
256 | return this->lower_uniforms; |
||
257 | case ir_var_in: |
||
258 | return (var->location == -1) ? this->lower_temps : this->lower_inputs; |
||
259 | case ir_var_out: |
||
260 | return (var->location == -1) ? this->lower_temps : this->lower_outputs; |
||
261 | case ir_var_inout: |
||
262 | return this->lower_temps; |
||
263 | } |
||
264 | |||
265 | assert(!"Should not get here."); |
||
266 | return false; |
||
267 | } |
||
268 | |||
269 | ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, |
||
270 | ir_assignment* orig_assign) |
||
271 | { |
||
272 | assert(is_array_or_matrix(orig_deref->array)); |
||
273 | |||
274 | const unsigned length = (orig_deref->array->type->is_array()) |
||
275 | ? orig_deref->array->type->length |
||
276 | : orig_deref->array->type->matrix_columns; |
||
277 | |||
278 | void *const mem_ctx = ralloc_parent(base_ir); |
||
279 | |||
280 | /* Temporary storage for either the result of the dereference of |
||
281 | * the array, or the RHS that's being assigned into the |
||
282 | * dereference of the array. |
||
283 | */ |
||
284 | ir_variable *var; |
||
285 | |||
286 | if (orig_assign) { |
||
287 | var = new(mem_ctx) ir_variable(orig_assign->rhs->type, |
||
288 | "dereference_array_value", |
||
289 | ir_var_temporary); |
||
290 | base_ir->insert_before(var); |
||
291 | |||
292 | ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); |
||
293 | ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, |
||
294 | orig_assign->rhs, |
||
295 | NULL); |
||
296 | |||
297 | base_ir->insert_before(assign); |
||
298 | } else { |
||
299 | var = new(mem_ctx) ir_variable(orig_deref->type, |
||
300 | "dereference_array_value", |
||
301 | ir_var_temporary); |
||
302 | base_ir->insert_before(var); |
||
303 | } |
||
304 | |||
305 | /* Store the index to a temporary to avoid reusing its tree. */ |
||
306 | ir_variable *index = |
||
307 | new(mem_ctx) ir_variable(orig_deref->array_index->type, |
||
308 | "dereference_array_index", ir_var_temporary); |
||
309 | base_ir->insert_before(index); |
||
310 | |||
311 | ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); |
||
312 | ir_assignment *assign = |
||
313 | new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); |
||
314 | base_ir->insert_before(assign); |
||
315 | |||
316 | assignment_generator ag; |
||
317 | ag.array = orig_deref->array; |
||
318 | ag.base_ir = base_ir; |
||
319 | ag.var = var; |
||
320 | if (orig_assign) { |
||
321 | ag.is_write = true; |
||
322 | ag.write_mask = orig_assign->write_mask; |
||
323 | } else { |
||
324 | ag.is_write = false; |
||
325 | } |
||
326 | |||
327 | switch_generator sg(ag, index, 4, 4); |
||
328 | |||
329 | exec_list list; |
||
330 | sg.generate(0, length, &list); |
||
331 | base_ir->insert_before(&list); |
||
332 | |||
333 | return var; |
||
334 | } |
||
335 | |||
336 | virtual void handle_rvalue(ir_rvalue **pir) |
||
337 | { |
||
338 | if (!*pir) |
||
339 | return; |
||
340 | |||
341 | ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); |
||
342 | if (needs_lowering(orig_deref)) { |
||
343 | ir_variable* var = convert_dereference_array(orig_deref, 0); |
||
344 | assert(var); |
||
345 | *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); |
||
346 | this->progress = true; |
||
347 | } |
||
348 | } |
||
349 | |||
350 | ir_visitor_status |
||
351 | visit_leave(ir_assignment *ir) |
||
352 | { |
||
353 | ir_rvalue_visitor::visit_leave(ir); |
||
354 | |||
355 | ir_dereference_array *orig_deref = ir->lhs->as_dereference_array(); |
||
356 | |||
357 | if (needs_lowering(orig_deref)) { |
||
358 | convert_dereference_array(orig_deref, ir); |
||
359 | ir->remove(); |
||
360 | this->progress = true; |
||
361 | } |
||
362 | |||
363 | return visit_continue; |
||
364 | } |
||
365 | }; |
||
366 | |||
367 | bool |
||
368 | lower_variable_index_to_cond_assign(exec_list *instructions, |
||
369 | bool lower_input, |
||
370 | bool lower_output, |
||
371 | bool lower_temp, |
||
372 | bool lower_uniform) |
||
373 | { |
||
374 | variable_index_to_cond_assign_visitor v(lower_input, |
||
375 | lower_output, |
||
376 | lower_temp, |
||
377 | lower_uniform); |
||
378 | |||
379 | visit_list_elements(&v, instructions); |
||
380 | |||
381 | return v.progress; |
||
382 | }=>>> |