Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #define RA_DEBUG 0 |
||
28 | |||
29 | #if RA_DEBUG |
||
30 | #define RA_DUMP(q) do { q } while (0) |
||
31 | #else |
||
32 | #define RA_DUMP(q) |
||
33 | #endif |
||
34 | |||
35 | #include |
||
36 | |||
37 | #include "sb_bc.h" |
||
38 | #include "sb_shader.h" |
||
39 | #include "sb_pass.h" |
||
40 | |||
41 | namespace r600_sb { |
||
42 | |||
43 | class regbits { |
||
44 | typedef uint32_t basetype; |
||
45 | static const unsigned bt_bytes = sizeof(basetype); |
||
46 | static const unsigned bt_index_shift = 5; |
||
47 | static const unsigned bt_index_mask = (1u << bt_index_shift) - 1; |
||
48 | static const unsigned bt_bits = bt_bytes << 3; |
||
49 | static const unsigned size = MAX_GPR * 4 / bt_bits; |
||
50 | |||
51 | basetype dta[size]; |
||
52 | |||
53 | unsigned num_temps; |
||
54 | |||
55 | public: |
||
56 | |||
57 | regbits(unsigned num_temps) : dta(), num_temps(num_temps) {} |
||
58 | regbits(unsigned num_temps, unsigned value) : num_temps(num_temps) |
||
59 | { set_all(value); } |
||
60 | |||
61 | regbits(shader &sh, val_set &vs) : num_temps(sh.get_ctx().alu_temp_gprs) |
||
62 | { set_all(1); from_val_set(sh, vs); } |
||
63 | |||
64 | void set_all(unsigned val); |
||
65 | void from_val_set(shader &sh, val_set &vs); |
||
66 | |||
67 | void set(unsigned index); |
||
68 | void clear(unsigned index); |
||
69 | bool get(unsigned index); |
||
70 | |||
71 | void set(unsigned index, unsigned val); |
||
72 | |||
73 | sel_chan find_free_bit(); |
||
74 | sel_chan find_free_chans(unsigned mask); |
||
75 | sel_chan find_free_chan_by_mask(unsigned mask); |
||
76 | sel_chan find_free_array(unsigned size, unsigned mask); |
||
77 | |||
78 | void dump(); |
||
79 | }; |
||
80 | |||
81 | // ======================================= |
||
82 | |||
83 | void regbits::dump() { |
||
84 | for (unsigned i = 0; i < size * bt_bits; ++i) { |
||
85 | |||
86 | if (!(i & 31)) |
||
87 | sblog << "\n"; |
||
88 | |||
89 | if (!(i & 3)) { |
||
90 | sblog.print_w(i / 4, 7); |
||
91 | sblog << " "; |
||
92 | } |
||
93 | |||
94 | sblog << (get(i) ? 1 : 0); |
||
95 | } |
||
96 | } |
||
97 | |||
98 | |||
99 | void regbits::set_all(unsigned v) { |
||
100 | memset(&dta, v ? 0xFF : 0x00, size * bt_bytes); |
||
101 | } |
||
102 | |||
103 | void regbits::from_val_set(shader &sh, val_set& vs) { |
||
104 | val_set &s = vs; |
||
105 | unsigned g; |
||
106 | for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) { |
||
107 | value *v = *I; |
||
108 | if (v->is_any_gpr()) { |
||
109 | g = v->get_final_gpr(); |
||
110 | if (!g) |
||
111 | continue; |
||
112 | } else |
||
113 | continue; |
||
114 | |||
115 | assert(g); |
||
116 | --g; |
||
117 | assert(g < 512); |
||
118 | clear(g); |
||
119 | } |
||
120 | } |
||
121 | |||
122 | void regbits::set(unsigned index) { |
||
123 | unsigned ih = index >> bt_index_shift; |
||
124 | unsigned il = index & bt_index_mask; |
||
125 | dta[ih] |= ((basetype)1u << il); |
||
126 | } |
||
127 | |||
128 | void regbits::clear(unsigned index) { |
||
129 | unsigned ih = index >> bt_index_shift; |
||
130 | unsigned il = index & bt_index_mask; |
||
131 | assert(ih < size); |
||
132 | dta[ih] &= ~((basetype)1u << il); |
||
133 | } |
||
134 | |||
135 | bool regbits::get(unsigned index) { |
||
136 | unsigned ih = index >> bt_index_shift; |
||
137 | unsigned il = index & bt_index_mask; |
||
138 | return dta[ih] & ((basetype)1u << il); |
||
139 | } |
||
140 | |||
141 | void regbits::set(unsigned index, unsigned val) { |
||
142 | unsigned ih = index >> bt_index_shift; |
||
143 | unsigned il = index & bt_index_mask; |
||
144 | basetype bm = 1u << il; |
||
145 | dta[ih] = (dta[ih] & ~bm) | (val << il); |
||
146 | } |
||
147 | |||
148 | // free register for ra means the bit is set |
||
149 | sel_chan regbits::find_free_bit() { |
||
150 | unsigned elt = 0; |
||
151 | unsigned bit = 0; |
||
152 | |||
153 | while (elt < size && !dta[elt]) |
||
154 | ++elt; |
||
155 | |||
156 | if (elt >= size) |
||
157 | return 0; |
||
158 | |||
159 | bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift); |
||
160 | |||
161 | assert(bit < ((MAX_GPR - num_temps) << 2)); |
||
162 | |||
163 | return bit + 1; |
||
164 | } |
||
165 | |||
166 | // find free gpr component to use as indirectly addressable array |
||
167 | sel_chan regbits::find_free_array(unsigned length, unsigned mask) { |
||
168 | unsigned cc[4] = {}; |
||
169 | |||
170 | // FIXME optimize this. though hopefully we won't have a lot of arrays |
||
171 | for (unsigned a = 0; a < MAX_GPR - num_temps; ++a) { |
||
172 | for(unsigned c = 0; c < MAX_CHAN; ++c) { |
||
173 | if (mask & (1 << c)) { |
||
174 | if (get((a << 2) | c)) { |
||
175 | if (++cc[c] == length) |
||
176 | return sel_chan(a - length + 1, c); |
||
177 | } else { |
||
178 | cc[c] = 0; |
||
179 | } |
||
180 | } |
||
181 | } |
||
182 | } |
||
183 | return 0; |
||
184 | } |
||
185 | |||
186 | sel_chan regbits::find_free_chans(unsigned mask) { |
||
187 | unsigned elt = 0; |
||
188 | unsigned bit = 0; |
||
189 | |||
190 | assert (!(mask & ~0xF)); |
||
191 | basetype cd = dta[elt]; |
||
192 | |||
193 | do { |
||
194 | if (!cd) { |
||
195 | if (++elt < size) { |
||
196 | cd = dta[elt]; |
||
197 | bit = 0; |
||
198 | continue; |
||
199 | } else |
||
200 | return 0; |
||
201 | } |
||
202 | |||
203 | unsigned p = __builtin_ctz(cd) & ~(basetype)3u; |
||
204 | |||
205 | assert (p <= bt_bits - bit); |
||
206 | bit += p; |
||
207 | cd >>= p; |
||
208 | |||
209 | if ((cd & mask) == mask) { |
||
210 | return ((elt << bt_index_shift) | bit) + 1; |
||
211 | } |
||
212 | |||
213 | bit += 4; |
||
214 | cd >>= 4; |
||
215 | |||
216 | } while (1); |
||
217 | |||
218 | return 0; |
||
219 | } |
||
220 | |||
221 | sel_chan regbits::find_free_chan_by_mask(unsigned mask) { |
||
222 | unsigned elt = 0; |
||
223 | unsigned bit = 0; |
||
224 | |||
225 | assert (!(mask & ~0xF)); |
||
226 | basetype cd = dta[elt]; |
||
227 | |||
228 | do { |
||
229 | if (!cd) { |
||
230 | if (++elt < size) { |
||
231 | cd = dta[elt]; |
||
232 | bit = 0; |
||
233 | continue; |
||
234 | } else |
||
235 | return 0; |
||
236 | } |
||
237 | |||
238 | unsigned p = __builtin_ctz(cd) & ~(basetype)3u; |
||
239 | |||
240 | assert (p <= bt_bits - bit); |
||
241 | bit += p; |
||
242 | cd >>= p; |
||
243 | |||
244 | if (cd & mask) { |
||
245 | unsigned nb = __builtin_ctz(cd & mask); |
||
246 | unsigned ofs = ((elt << bt_index_shift) | bit); |
||
247 | return nb + ofs + 1; |
||
248 | } |
||
249 | |||
250 | bit += 4; |
||
251 | cd >>= 4; |
||
252 | |||
253 | } while (1); |
||
254 | |||
255 | return 0; |
||
256 | } |
||
257 | |||
258 | // ================================ |
||
259 | |||
260 | void ra_init::alloc_arrays() { |
||
261 | |||
262 | gpr_array_vec &ga = sh.arrays(); |
||
263 | |||
264 | for(gpr_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) { |
||
265 | gpr_array *a = *I; |
||
266 | |||
267 | RA_DUMP( |
||
268 | sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n"; |
||
269 | sblog << "\n"; |
||
270 | ); |
||
271 | |||
272 | // skip preallocated arrays (e.g. with preloaded inputs) |
||
273 | if (a->gpr) { |
||
274 | RA_DUMP( sblog << " FIXED at " << a->gpr << "\n"; ); |
||
275 | continue; |
||
276 | } |
||
277 | |||
278 | bool dead = a->is_dead(); |
||
279 | |||
280 | if (dead) { |
||
281 | RA_DUMP( sblog << " DEAD\n"; ); |
||
282 | continue; |
||
283 | } |
||
284 | |||
285 | val_set &s = a->interferences; |
||
286 | |||
287 | |||
288 | for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) { |
||
289 | value *v = *I; |
||
290 | if (v->array == a) |
||
291 | s.remove_val(v); |
||
292 | } |
||
293 | |||
294 | RA_DUMP( |
||
295 | sblog << " interf: "; |
||
296 | dump::dump_set(sh, s); |
||
297 | sblog << "\n"; |
||
298 | ); |
||
299 | |||
300 | regbits rb(sh, s); |
||
301 | |||
302 | sel_chan base = rb.find_free_array(a->array_size, |
||
303 | (1 << a->base_gpr.chan())); |
||
304 | |||
305 | RA_DUMP( sblog << " found base: " << base << "\n"; ); |
||
306 | |||
307 | a->gpr = base; |
||
308 | } |
||
309 | } |
||
310 | |||
311 | |||
312 | int ra_init::run() { |
||
313 | |||
314 | alloc_arrays(); |
||
315 | |||
316 | ra_node(sh.root); |
||
317 | return 0; |
||
318 | } |
||
319 | |||
320 | void ra_init::ra_node(container_node* c) { |
||
321 | |||
322 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
323 | node *n = *I; |
||
324 | if (n->type == NT_OP) { |
||
325 | process_op(n); |
||
326 | } |
||
327 | if (n->is_container() && !n->is_alu_packed()) { |
||
328 | ra_node(static_cast |
||
329 | } |
||
330 | } |
||
331 | } |
||
332 | |||
333 | void ra_init::process_op(node* n) { |
||
334 | |||
335 | bool copy = n->is_copy_mov(); |
||
336 | |||
337 | RA_DUMP( |
||
338 | sblog << "ra_init: process_op : "; |
||
339 | dump::dump_op(n); |
||
340 | sblog << "\n"; |
||
341 | ); |
||
342 | |||
343 | if (n->is_alu_packed()) { |
||
344 | for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { |
||
345 | value *v = *I; |
||
346 | if (v && v->is_sgpr() && v->constraint && |
||
347 | v->constraint->kind == CK_PACKED_BS) { |
||
348 | color_bs_constraint(v->constraint); |
||
349 | break; |
||
350 | } |
||
351 | } |
||
352 | } |
||
353 | |||
354 | if (n->is_fetch_inst() || n->is_cf_inst()) { |
||
355 | for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { |
||
356 | value *v = *I; |
||
357 | if (v && v->is_sgpr()) |
||
358 | color(v); |
||
359 | } |
||
360 | } |
||
361 | |||
362 | for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) { |
||
363 | value *v = *I; |
||
364 | if (!v) |
||
365 | continue; |
||
366 | if (v->is_sgpr()) { |
||
367 | if (!v->gpr) { |
||
368 | if (copy && !v->constraint) { |
||
369 | value *s = *(n->src.begin() + (I - n->dst.begin())); |
||
370 | assert(s); |
||
371 | if (s->is_sgpr()) { |
||
372 | assign_color(v, s->gpr); |
||
373 | } |
||
374 | } else |
||
375 | color(v); |
||
376 | } |
||
377 | } |
||
378 | } |
||
379 | } |
||
380 | |||
381 | void ra_init::color_bs_constraint(ra_constraint* c) { |
||
382 | vvec &vv = c->values; |
||
383 | assert(vv.size() <= 8); |
||
384 | |||
385 | RA_DUMP( |
||
386 | sblog << "color_bs_constraint: "; |
||
387 | dump::dump_vec(vv); |
||
388 | sblog << "\n"; |
||
389 | ); |
||
390 | |||
391 | regbits rb(ctx.alu_temp_gprs); |
||
392 | |||
393 | unsigned chan_count[4] = {}; |
||
394 | unsigned allowed_chans = 0x0F; |
||
395 | |||
396 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
397 | value *v = *I; |
||
398 | sel_chan gpr = v->get_final_gpr(); |
||
399 | |||
400 | if (!v || v->is_dead()) |
||
401 | continue; |
||
402 | |||
403 | val_set interf; |
||
404 | |||
405 | if (v->chunk) |
||
406 | sh.coal.get_chunk_interferences(v->chunk, interf); |
||
407 | else |
||
408 | interf = v->interferences; |
||
409 | |||
410 | RA_DUMP( |
||
411 | sblog << " processing " << *v << " interferences : "; |
||
412 | dump::dump_set(sh, interf); |
||
413 | sblog << "\n"; |
||
414 | ); |
||
415 | |||
416 | if (gpr) { |
||
417 | unsigned chan = gpr.chan(); |
||
418 | if (chan_count[chan] < 3) { |
||
419 | ++chan_count[chan]; |
||
420 | continue; |
||
421 | } else { |
||
422 | v->flags &= ~VLF_FIXED; |
||
423 | allowed_chans &= ~(1 << chan); |
||
424 | assert(allowed_chans); |
||
425 | } |
||
426 | } |
||
427 | |||
428 | v->gpr = 0; |
||
429 | |||
430 | gpr = 1; |
||
431 | rb.set_all(1); |
||
432 | |||
433 | |||
434 | rb.from_val_set(sh, interf); |
||
435 | |||
436 | RA_DUMP( |
||
437 | sblog << " regbits : "; |
||
438 | rb.dump(); |
||
439 | sblog << "\n"; |
||
440 | ); |
||
441 | |||
442 | while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) { |
||
443 | |||
444 | while (rb.get(gpr - 1) == 0) |
||
445 | gpr = gpr + 1; |
||
446 | |||
447 | RA_DUMP( |
||
448 | sblog << " trying " << gpr << "\n"; |
||
449 | ); |
||
450 | |||
451 | unsigned chan = gpr.chan(); |
||
452 | if (chan_count[chan] < 3) { |
||
453 | ++chan_count[chan]; |
||
454 | |||
455 | if (v->chunk) { |
||
456 | vvec::iterator F = std::find(v->chunk->values.begin(), |
||
457 | v->chunk->values.end(), |
||
458 | v); |
||
459 | v->chunk->values.erase(F); |
||
460 | v->chunk = NULL; |
||
461 | } |
||
462 | |||
463 | assign_color(v, gpr); |
||
464 | break; |
||
465 | } else { |
||
466 | allowed_chans &= ~(1 << chan); |
||
467 | } |
||
468 | gpr = gpr + 1; |
||
469 | } |
||
470 | |||
471 | if (!gpr) { |
||
472 | sblog << "color_bs_constraint: failed...\n"; |
||
473 | assert(!"coloring failed"); |
||
474 | } |
||
475 | } |
||
476 | } |
||
477 | |||
478 | void ra_init::color(value* v) { |
||
479 | |||
480 | if (v->constraint && v->constraint->kind == CK_PACKED_BS) { |
||
481 | color_bs_constraint(v->constraint); |
||
482 | return; |
||
483 | } |
||
484 | |||
485 | if (v->chunk && v->chunk->is_fixed()) |
||
486 | return; |
||
487 | |||
488 | RA_DUMP( |
||
489 | sblog << "coloring "; |
||
490 | dump::dump_val(v); |
||
491 | sblog << " interferences "; |
||
492 | dump::dump_set(sh, v->interferences); |
||
493 | sblog << "\n"; |
||
494 | ); |
||
495 | |||
496 | if (v->is_reg_pinned()) { |
||
497 | assert(v->is_chan_pinned()); |
||
498 | assign_color(v, v->pin_gpr); |
||
499 | return; |
||
500 | } |
||
501 | |||
502 | regbits rb(sh, v->interferences); |
||
503 | sel_chan c; |
||
504 | |||
505 | if (v->is_chan_pinned()) { |
||
506 | RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << " "; ); |
||
507 | unsigned mask = 1 << v->pin_gpr.chan(); |
||
508 | c = rb.find_free_chans(mask) + v->pin_gpr.chan(); |
||
509 | } else { |
||
510 | unsigned cm = get_preferable_chan_mask(); |
||
511 | RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; ); |
||
512 | c = rb.find_free_chan_by_mask(cm); |
||
513 | } |
||
514 | |||
515 | assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed"); |
||
516 | assign_color(v, c); |
||
517 | } |
||
518 | |||
519 | void ra_init::assign_color(value* v, sel_chan c) { |
||
520 | add_prev_chan(c.chan()); |
||
521 | v->gpr = c; |
||
522 | RA_DUMP( |
||
523 | sblog << "colored "; |
||
524 | dump::dump_val(v); |
||
525 | sblog << " to " << c << "\n"; |
||
526 | ); |
||
527 | } |
||
528 | |||
529 | // =================================================== |
||
530 | |||
531 | int ra_split::run() { |
||
532 | split(sh.root); |
||
533 | return 0; |
||
534 | } |
||
535 | |||
536 | void ra_split::split_phi_src(container_node *loc, container_node *c, |
||
537 | unsigned id, bool loop) { |
||
538 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
539 | node *p = *I; |
||
540 | value* &v = p->src[id], *d = p->dst[0]; |
||
541 | assert(v); |
||
542 | |||
543 | if (!d->is_sgpr() || v->is_undef()) |
||
544 | continue; |
||
545 | |||
546 | value *t = sh.create_temp_value(); |
||
547 | if (loop && id == 0) |
||
548 | loc->insert_before(sh.create_copy_mov(t, v)); |
||
549 | else |
||
550 | loc->push_back(sh.create_copy_mov(t, v)); |
||
551 | v = t; |
||
552 | |||
553 | sh.coal.add_edge(v, d, coalescer::phi_cost); |
||
554 | } |
||
555 | } |
||
556 | |||
557 | void ra_split::split_phi_dst(node* loc, container_node *c, bool loop) { |
||
558 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
559 | node *p = *I; |
||
560 | value* &v = p->dst[0]; |
||
561 | assert(v); |
||
562 | |||
563 | if (!v->is_sgpr()) |
||
564 | continue; |
||
565 | |||
566 | value *t = sh.create_temp_value(); |
||
567 | node *cp = sh.create_copy_mov(v, t); |
||
568 | if (loop) |
||
569 | static_cast |
||
570 | else |
||
571 | loc->insert_after(cp); |
||
572 | v = t; |
||
573 | } |
||
574 | } |
||
575 | |||
576 | |||
577 | void ra_split::init_phi_constraints(container_node *c) { |
||
578 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
579 | node *p = *I; |
||
580 | ra_constraint *cc = sh.coal.create_constraint(CK_PHI); |
||
581 | cc->values.push_back(p->dst[0]); |
||
582 | |||
583 | for (vvec::iterator I = p->src.begin(), E = p->src.end(); I != E; ++I) { |
||
584 | value *v = *I; |
||
585 | if (v->is_sgpr()) |
||
586 | cc->values.push_back(v); |
||
587 | } |
||
588 | |||
589 | cc->update_values(); |
||
590 | } |
||
591 | } |
||
592 | |||
593 | void ra_split::split(container_node* n) { |
||
594 | |||
595 | if (n->type == NT_DEPART) { |
||
596 | depart_node *d = static_cast |
||
597 | if (d->target->phi) |
||
598 | split_phi_src(d, d->target->phi, d->dep_id, false); |
||
599 | } else if (n->type == NT_REPEAT) { |
||
600 | repeat_node *r = static_cast |
||
601 | if (r->target->loop_phi) |
||
602 | split_phi_src(r, r->target->loop_phi, r->rep_id, true); |
||
603 | } else if (n->type == NT_REGION) { |
||
604 | region_node *r = static_cast |
||
605 | if (r->phi) { |
||
606 | split_phi_dst(r, r->phi, false); |
||
607 | } |
||
608 | if (r->loop_phi) { |
||
609 | split_phi_dst(r->get_entry_code_location(), r->loop_phi, |
||
610 | true); |
||
611 | split_phi_src(r, r->loop_phi, 0, true); |
||
612 | } |
||
613 | } |
||
614 | |||
615 | for (node_riterator N, I = n->rbegin(), E = n->rend(); I != E; I = N) { |
||
616 | N = I; |
||
617 | ++N; |
||
618 | node *o = *I; |
||
619 | if (o->type == NT_OP) { |
||
620 | split_op(o); |
||
621 | } else if (o->is_container()) { |
||
622 | split(static_cast |
||
623 | } |
||
624 | } |
||
625 | |||
626 | if (n->type == NT_REGION) { |
||
627 | region_node *r = static_cast |
||
628 | if (r->phi) |
||
629 | init_phi_constraints(r->phi); |
||
630 | if (r->loop_phi) |
||
631 | init_phi_constraints(r->loop_phi); |
||
632 | } |
||
633 | } |
||
634 | |||
635 | void ra_split::split_op(node* n) { |
||
636 | switch(n->subtype) { |
||
637 | case NST_ALU_PACKED_INST: |
||
638 | split_alu_packed(static_cast |
||
639 | break; |
||
640 | case NST_FETCH_INST: |
||
641 | case NST_CF_INST: |
||
642 | split_vector_inst(n); |
||
643 | default: |
||
644 | break; |
||
645 | } |
||
646 | } |
||
647 | |||
648 | void ra_split::split_packed_ins(alu_packed_node *n) { |
||
649 | vvec vv = n->src; |
||
650 | vvec sv, dv; |
||
651 | |||
652 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
653 | |||
654 | value *&v = *I; |
||
655 | |||
656 | if (v && v->is_any_gpr() && !v->is_undef()) { |
||
657 | |||
658 | vvec::iterator F = std::find(sv.begin(), sv.end(), v); |
||
659 | value *t; |
||
660 | |||
661 | if (F != sv.end()) { |
||
662 | t = *(dv.begin() + (F - sv.begin())); |
||
663 | } else { |
||
664 | t = sh.create_temp_value(); |
||
665 | sv.push_back(v); |
||
666 | dv.push_back(t); |
||
667 | } |
||
668 | v = t; |
||
669 | } |
||
670 | } |
||
671 | |||
672 | unsigned cnt = sv.size(); |
||
673 | |||
674 | if (cnt > 0) { |
||
675 | n->src = vv; |
||
676 | for (vvec::iterator SI = sv.begin(), DI = dv.begin(), SE = sv.end(); |
||
677 | SI != SE; ++SI, ++DI) { |
||
678 | n->insert_before(sh.create_copy_mov(*DI, *SI)); |
||
679 | } |
||
680 | |||
681 | ra_constraint *c = sh.coal.create_constraint(CK_PACKED_BS); |
||
682 | c->values = dv; |
||
683 | c->update_values(); |
||
684 | } |
||
685 | } |
||
686 | |||
687 | // TODO handle other packed ops for cayman |
||
688 | void ra_split::split_alu_packed(alu_packed_node* n) { |
||
689 | switch (n->op()) { |
||
690 | case ALU_OP2_DOT4: |
||
691 | case ALU_OP2_CUBE: |
||
692 | split_packed_ins(n); |
||
693 | break; |
||
694 | default: |
||
695 | break; |
||
696 | } |
||
697 | } |
||
698 | |||
699 | void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) { |
||
700 | unsigned ch = 0; |
||
701 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I, ++ch) { |
||
702 | |||
703 | value* &o = *I; |
||
704 | |||
705 | if (o) { |
||
706 | |||
707 | assert(!o->is_dead()); |
||
708 | |||
709 | if (o->is_undef()) |
||
710 | continue; |
||
711 | |||
712 | if (allow_swz && o->is_float_0_or_1()) |
||
713 | continue; |
||
714 | |||
715 | value *t; |
||
716 | vvec::iterator F = |
||
717 | allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end(); |
||
718 | |||
719 | if (F != v2.end()) { |
||
720 | t = *(v1.begin() + (F - v2.begin())); |
||
721 | } else { |
||
722 | t = sh.create_temp_value(); |
||
723 | |||
724 | if (!allow_swz) { |
||
725 | t->flags |= VLF_PIN_CHAN; |
||
726 | t->pin_gpr = sel_chan(0, ch); |
||
727 | } |
||
728 | |||
729 | v2.push_back(o); |
||
730 | v1.push_back(t); |
||
731 | } |
||
732 | o = t; |
||
733 | } |
||
734 | } |
||
735 | } |
||
736 | |||
737 | void ra_split::split_vector_inst(node* n) { |
||
738 | ra_constraint *c; |
||
739 | |||
740 | bool call_fs = n->is_cf_op(CF_OP_CALL_FS); |
||
741 | bool no_src_swizzle = n->is_cf_inst() && (n->cf_op_flags() & CF_MEM); |
||
742 | |||
743 | no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) || |
||
744 | n->is_fetch_op(FETCH_OP_SEMFETCH); |
||
745 | |||
746 | if (!n->src.empty() && !call_fs) { |
||
747 | |||
748 | // we may have more than one source vector - |
||
749 | // fetch instructions with FF_USEGRAD have gradient values in |
||
750 | // src vectors 1 (src[4-7] and 2 (src[8-11]) |
||
751 | |||
752 | unsigned nvec = n->src.size() >> 2; |
||
753 | assert(nvec << 2 == n->src.size()); |
||
754 | |||
755 | for (unsigned nv = 0; nv < nvec; ++nv) { |
||
756 | vvec sv, tv, nsrc(4); |
||
757 | unsigned arg_start = nv << 2; |
||
758 | |||
759 | std::copy(n->src.begin() + arg_start, |
||
760 | n->src.begin() + arg_start + 4, |
||
761 | nsrc.begin()); |
||
762 | |||
763 | split_vec(nsrc, tv, sv, !no_src_swizzle); |
||
764 | |||
765 | unsigned cnt = sv.size(); |
||
766 | |||
767 | if (no_src_swizzle || cnt) { |
||
768 | |||
769 | std::copy(nsrc.begin(), nsrc.end(), n->src.begin() + arg_start); |
||
770 | |||
771 | for(unsigned i = 0, s = tv.size(); i < s; ++i) { |
||
772 | n->insert_before(sh.create_copy_mov(tv[i], sv[i])); |
||
773 | } |
||
774 | |||
775 | c = sh.coal.create_constraint(CK_SAME_REG); |
||
776 | c->values = tv; |
||
777 | c->update_values(); |
||
778 | } |
||
779 | } |
||
780 | } |
||
781 | |||
782 | if (!n->dst.empty()) { |
||
783 | vvec sv, tv, ndst = n->dst; |
||
784 | |||
785 | split_vec(ndst, tv, sv, true); |
||
786 | |||
787 | if (sv.size()) { |
||
788 | n->dst = ndst; |
||
789 | |||
790 | node *lp = n; |
||
791 | for(unsigned i = 0, s = tv.size(); i < s; ++i) { |
||
792 | lp->insert_after(sh.create_copy_mov(sv[i], tv[i])); |
||
793 | lp = lp->next; |
||
794 | } |
||
795 | |||
796 | if (call_fs) { |
||
797 | for (unsigned i = 0, cnt = tv.size(); i < cnt; ++i) { |
||
798 | value *v = tv[i]; |
||
799 | value *s = sv[i]; |
||
800 | if (!v) |
||
801 | continue; |
||
802 | |||
803 | v->flags |= VLF_PIN_REG | VLF_PIN_CHAN; |
||
804 | s->flags &= ~(VLF_PIN_REG | VLF_PIN_CHAN); |
||
805 | sel_chan sel; |
||
806 | |||
807 | if (s->is_rel()) { |
||
808 | assert(s->rel->is_const()); |
||
809 | sel = sel_chan(s->select.sel() + |
||
810 | s->rel->get_const_value().u, |
||
811 | s->select.chan()); |
||
812 | } else |
||
813 | sel = s->select; |
||
814 | |||
815 | v->gpr = v->pin_gpr = sel; |
||
816 | v->fix(); |
||
817 | } |
||
818 | } else { |
||
819 | c = sh.coal.create_constraint(CK_SAME_REG); |
||
820 | c->values = tv; |
||
821 | c->update_values(); |
||
822 | } |
||
823 | } |
||
824 | } |
||
825 | } |
||
826 | |||
827 | void ra_init::add_prev_chan(unsigned chan) { |
||
828 | prev_chans = (prev_chans << 4) | (1 << chan); |
||
829 | } |
||
830 | |||
831 | unsigned ra_init::get_preferable_chan_mask() { |
||
832 | unsigned i, used_chans = 0; |
||
833 | unsigned chans = prev_chans; |
||
834 | |||
835 | for (i = 0; i < ra_tune; ++i) { |
||
836 | used_chans |= chans; |
||
837 | chans >>= 4; |
||
838 | } |
||
839 | |||
840 | return (~used_chans) & 0xF; |
||
841 | } |
||
842 | |||
843 | } // namespace r600_sb>><>><>>>>><>>><>><>><>><>><>>><>><>><>><>><>><>><>><>><>><>><>><>>><>><>><>>><>><>><>>><>><>><>><>><>><>=>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>=>>><>=>>><>><>>>><>>><>>><>><>><>><>>><>>><>><>><>>><>><> |