Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #define RA_DEBUG 0 |
||
28 | |||
29 | #if RA_DEBUG |
||
30 | #define RA_DUMP(q) do { q } while (0) |
||
31 | #else |
||
32 | #define RA_DUMP(q) |
||
33 | #endif |
||
34 | |||
35 | #include "sb_shader.h" |
||
36 | #include "sb_pass.h" |
||
37 | |||
38 | namespace r600_sb { |
||
39 | |||
40 | int ra_coalesce::run() { |
||
41 | return sh.coal.run(); |
||
42 | } |
||
43 | |||
44 | void coalescer::add_edge(value* a, value* b, unsigned cost) { |
||
45 | assert(a->is_sgpr() && b->is_sgpr()); |
||
46 | edges.insert(new ra_edge(a,b, cost)); |
||
47 | } |
||
48 | |||
49 | void coalescer::create_chunk(value *v) { |
||
50 | |||
51 | assert(v->is_sgpr()); |
||
52 | |||
53 | ra_chunk *c = new ra_chunk(); |
||
54 | |||
55 | c->values.push_back(v); |
||
56 | |||
57 | if (v->is_chan_pinned()) |
||
58 | c->flags |= RCF_PIN_CHAN; |
||
59 | if (v->is_reg_pinned()) { |
||
60 | c->flags |= RCF_PIN_REG; |
||
61 | } |
||
62 | |||
63 | c->pin = v->pin_gpr; |
||
64 | |||
65 | RA_DUMP( |
||
66 | sblog << "create_chunk: "; |
||
67 | dump_chunk(c); |
||
68 | ); |
||
69 | |||
70 | all_chunks.push_back(c); |
||
71 | v->chunk = c; |
||
72 | |||
73 | } |
||
74 | |||
75 | void coalescer::unify_chunks(ra_edge *e) { |
||
76 | ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; |
||
77 | |||
78 | RA_DUMP( |
||
79 | sblog << "unify_chunks: "; |
||
80 | dump_chunk(c1); |
||
81 | dump_chunk(c2); |
||
82 | ); |
||
83 | |||
84 | if (c2->is_chan_pinned() && !c1->is_chan_pinned()) { |
||
85 | c1->flags |= RCF_PIN_CHAN; |
||
86 | c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan()); |
||
87 | } |
||
88 | |||
89 | if (c2->is_reg_pinned() && !c1->is_reg_pinned()) { |
||
90 | c1->flags |= RCF_PIN_REG; |
||
91 | c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan()); |
||
92 | } |
||
93 | |||
94 | c1->values.reserve(c1->values.size() + c2->values.size()); |
||
95 | |||
96 | for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; |
||
97 | ++I) { |
||
98 | (*I)->chunk = c1; |
||
99 | c1->values.push_back(*I); |
||
100 | } |
||
101 | |||
102 | chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2); |
||
103 | assert(F != all_chunks.end()); |
||
104 | |||
105 | all_chunks.erase(F); |
||
106 | |||
107 | c1->cost += c2->cost + e->cost; |
||
108 | delete c2; |
||
109 | } |
||
110 | |||
111 | bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) { |
||
112 | unsigned pin_flags = (c1->flags & c2->flags) & |
||
113 | (RCF_PIN_CHAN | RCF_PIN_REG); |
||
114 | |||
115 | if ((pin_flags & RCF_PIN_CHAN) && |
||
116 | c1->pin.chan() != c2->pin.chan()) |
||
117 | return true; |
||
118 | |||
119 | if ((pin_flags & RCF_PIN_REG) && |
||
120 | c1->pin.sel() != c2->pin.sel()) |
||
121 | return true; |
||
122 | |||
123 | for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E; |
||
124 | ++I) { |
||
125 | value *v1 = *I; |
||
126 | |||
127 | for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; |
||
128 | ++I) { |
||
129 | value *v2 = *I; |
||
130 | |||
131 | if (!v1->v_equal(v2) && v1->interferences.contains(v2)) |
||
132 | return true; |
||
133 | } |
||
134 | } |
||
135 | return false; |
||
136 | } |
||
137 | |||
138 | void coalescer::build_chunks() { |
||
139 | |||
140 | for (edge_queue::iterator I = edges.begin(), E = edges.end(); |
||
141 | I != E; ++I) { |
||
142 | |||
143 | ra_edge *e = *I; |
||
144 | |||
145 | if (!e->a->chunk) |
||
146 | create_chunk(e->a); |
||
147 | |||
148 | if (!e->b->chunk) |
||
149 | create_chunk(e->b); |
||
150 | |||
151 | ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; |
||
152 | |||
153 | if (c1 == c2) { |
||
154 | c1->cost += e->cost; |
||
155 | } else if (!chunks_interference(c1, c2)) |
||
156 | unify_chunks(e); |
||
157 | } |
||
158 | } |
||
159 | |||
160 | ra_constraint* coalescer::create_constraint(constraint_kind kind) { |
||
161 | ra_constraint *c = new ra_constraint(kind); |
||
162 | all_constraints.push_back(c); |
||
163 | return c; |
||
164 | } |
||
165 | |||
166 | void coalescer::dump_edges() { |
||
167 | sblog << "######## affinity edges\n"; |
||
168 | |||
169 | for (edge_queue::iterator I = edges.begin(), E = edges.end(); |
||
170 | I != E; ++I) { |
||
171 | ra_edge* e = *I; |
||
172 | sblog << " ra_edge "; |
||
173 | dump::dump_val(e->a); |
||
174 | sblog << " <-> "; |
||
175 | dump::dump_val(e->b); |
||
176 | sblog << " cost = " << e->cost << "\n"; |
||
177 | } |
||
178 | } |
||
179 | |||
180 | void coalescer::dump_chunks() { |
||
181 | sblog << "######## chunks\n"; |
||
182 | |||
183 | for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end(); |
||
184 | I != E; ++I) { |
||
185 | ra_chunk* c = *I; |
||
186 | dump_chunk(c); |
||
187 | } |
||
188 | } |
||
189 | |||
190 | |||
191 | void coalescer::dump_constraint_queue() { |
||
192 | sblog << "######## constraints\n"; |
||
193 | |||
194 | for (constraint_queue::iterator I = constraints.begin(), |
||
195 | E = constraints.end(); I != E; ++I) { |
||
196 | ra_constraint* c = *I; |
||
197 | dump_constraint(c); |
||
198 | } |
||
199 | } |
||
200 | |||
201 | void coalescer::dump_chunk(ra_chunk* c) { |
||
202 | sblog << " ra_chunk cost = " << c->cost << " : "; |
||
203 | dump::dump_vec(c->values); |
||
204 | |||
205 | if (c->flags & RCF_PIN_REG) |
||
206 | sblog << " REG = " << c->pin.sel(); |
||
207 | |||
208 | if (c->flags & RCF_PIN_CHAN) |
||
209 | sblog << " CHAN = " << c->pin.chan(); |
||
210 | |||
211 | sblog << (c->flags & RCF_GLOBAL ? " GLOBAL" : ""); |
||
212 | |||
213 | sblog << "\n"; |
||
214 | } |
||
215 | |||
216 | void coalescer::dump_constraint(ra_constraint* c) { |
||
217 | sblog << " ra_constraint: "; |
||
218 | switch (c->kind) { |
||
219 | case CK_PACKED_BS: sblog << "PACKED_BS"; break; |
||
220 | case CK_PHI: sblog << "PHI"; break; |
||
221 | case CK_SAME_REG: sblog << "SAME_REG"; break; |
||
222 | default: sblog << "UNKNOWN_KIND"; assert(0); break; |
||
223 | } |
||
224 | |||
225 | sblog << " cost = " << c->cost << " : "; |
||
226 | dump::dump_vec(c->values); |
||
227 | |||
228 | sblog << "\n"; |
||
229 | } |
||
230 | |||
231 | void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) { |
||
232 | |||
233 | for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E; |
||
234 | ++I) { |
||
235 | value *v = *I; |
||
236 | s.add_set(v->interferences); |
||
237 | } |
||
238 | s.remove_vec(c->values); |
||
239 | } |
||
240 | |||
241 | void coalescer::build_chunk_queue() { |
||
242 | for (chunk_vec::iterator I = all_chunks.begin(), |
||
243 | E = all_chunks.end(); I != E; ++I) { |
||
244 | ra_chunk *c = *I; |
||
245 | |||
246 | if (!c->is_fixed()) |
||
247 | chunks.insert(c); |
||
248 | } |
||
249 | } |
||
250 | |||
251 | void coalescer::build_constraint_queue() { |
||
252 | for (constraint_vec::iterator I = all_constraints.begin(), |
||
253 | E = all_constraints.end(); I != E; ++I) { |
||
254 | ra_constraint *c = *I; |
||
255 | unsigned cost = 0; |
||
256 | |||
257 | if (c->values.empty() || !c->values.front()->is_sgpr()) |
||
258 | continue; |
||
259 | |||
260 | if (c->kind != CK_SAME_REG) |
||
261 | continue; |
||
262 | |||
263 | for (vvec::iterator I = c->values.begin(), E = c->values.end(); |
||
264 | I != E; ++I) { |
||
265 | value *v = *I; |
||
266 | if (!v->chunk) |
||
267 | create_chunk(v); |
||
268 | else |
||
269 | cost += v->chunk->cost; |
||
270 | } |
||
271 | c->cost = cost; |
||
272 | constraints.insert(c); |
||
273 | } |
||
274 | } |
||
275 | |||
276 | void coalescer::color_chunks() { |
||
277 | |||
278 | for (chunk_queue::iterator I = chunks.begin(), E = chunks.end(); |
||
279 | I != E; ++I) { |
||
280 | ra_chunk *c = *I; |
||
281 | if (c->is_fixed() || c->values.size() == 1) |
||
282 | continue; |
||
283 | |||
284 | sb_bitset rb; |
||
285 | val_set interf; |
||
286 | |||
287 | get_chunk_interferences(c, interf); |
||
288 | |||
289 | RA_DUMP( |
||
290 | sblog << "color_chunks: "; |
||
291 | dump_chunk(c); |
||
292 | sblog << "\n interferences: "; |
||
293 | dump::dump_set(sh,interf); |
||
294 | sblog << "\n"; |
||
295 | ); |
||
296 | |||
297 | init_reg_bitset(rb, interf); |
||
298 | |||
299 | unsigned pass = c->is_reg_pinned() ? 0 : 1; |
||
300 | |||
301 | unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0; |
||
302 | unsigned ce = c->is_chan_pinned() ? cs + 1 : 4; |
||
303 | |||
304 | unsigned color = 0; |
||
305 | |||
306 | while (pass < 2) { |
||
307 | |||
308 | unsigned rs, re; |
||
309 | |||
310 | if (pass == 0) { |
||
311 | rs = c->pin.sel(); |
||
312 | re = rs + 1; |
||
313 | } else { |
||
314 | rs = 0; |
||
315 | re = sh.num_nontemp_gpr(); |
||
316 | } |
||
317 | |||
318 | for (unsigned reg = rs; reg < re; ++reg) { |
||
319 | for (unsigned chan = cs; chan < ce; ++chan) { |
||
320 | unsigned bit = sel_chan(reg, chan); |
||
321 | if (bit >= rb.size() || !rb.get(bit)) { |
||
322 | color = bit; |
||
323 | break; |
||
324 | } |
||
325 | } |
||
326 | if (color) |
||
327 | break; |
||
328 | } |
||
329 | |||
330 | if (color) |
||
331 | break; |
||
332 | |||
333 | ++pass; |
||
334 | } |
||
335 | |||
336 | assert(color); |
||
337 | color_chunk(c, color); |
||
338 | } |
||
339 | } |
||
340 | |||
341 | void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) { |
||
342 | |||
343 | for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) { |
||
344 | value *v = *I; |
||
345 | |||
346 | if (!v->is_any_gpr()) |
||
347 | continue; |
||
348 | |||
349 | unsigned gpr = v->get_final_gpr(); |
||
350 | if (!gpr) |
||
351 | continue; |
||
352 | |||
353 | if (gpr) { |
||
354 | if (gpr >= bs.size()) |
||
355 | bs.resize(gpr + 64); |
||
356 | bs.set(gpr, 1); |
||
357 | } |
||
358 | } |
||
359 | } |
||
360 | |||
361 | void coalescer::color_chunk(ra_chunk *c, sel_chan color) { |
||
362 | |||
363 | vvec vv = c->values; |
||
364 | |||
365 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; |
||
366 | ++I) { |
||
367 | value *v = *I; |
||
368 | |||
369 | if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) { |
||
370 | detach_value(v); |
||
371 | continue; |
||
372 | } |
||
373 | |||
374 | if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) { |
||
375 | detach_value(v); |
||
376 | continue; |
||
377 | } |
||
378 | |||
379 | v->gpr = color; |
||
380 | |||
381 | if (v->constraint && v->constraint->kind == CK_PHI) |
||
382 | v->fix(); |
||
383 | |||
384 | |||
385 | RA_DUMP( |
||
386 | sblog << " assigned " << color << " to "; |
||
387 | dump::dump_val(v); |
||
388 | sblog << "\n"; |
||
389 | ); |
||
390 | } |
||
391 | |||
392 | c->pin = color; |
||
393 | |||
394 | if (c->is_reg_pinned()) { |
||
395 | c->fix(); |
||
396 | } |
||
397 | } |
||
398 | |||
399 | coalescer::~coalescer() { |
||
400 | |||
401 | // FIXME use pool allocator ?? |
||
402 | |||
403 | for (constraint_vec::iterator I = all_constraints.begin(), |
||
404 | E = all_constraints.end(); I != E; ++I) { |
||
405 | delete (*I); |
||
406 | } |
||
407 | |||
408 | for (chunk_vec::iterator I = all_chunks.begin(), |
||
409 | E = all_chunks.end(); I != E; ++I) { |
||
410 | delete (*I); |
||
411 | } |
||
412 | |||
413 | for (edge_queue::iterator I = edges.begin(), E = edges.end(); |
||
414 | I != E; ++I) { |
||
415 | delete (*I); |
||
416 | } |
||
417 | } |
||
418 | |||
419 | int coalescer::run() { |
||
420 | int r; |
||
421 | |||
422 | RA_DUMP( dump_edges(); ); |
||
423 | |||
424 | build_chunks(); |
||
425 | RA_DUMP( dump_chunks(); ); |
||
426 | |||
427 | build_constraint_queue(); |
||
428 | RA_DUMP( dump_constraint_queue(); ); |
||
429 | |||
430 | if ((r = color_constraints())) |
||
431 | return r; |
||
432 | |||
433 | build_chunk_queue(); |
||
434 | color_chunks(); |
||
435 | |||
436 | return 0; |
||
437 | } |
||
438 | |||
439 | void coalescer::color_phi_constraint(ra_constraint* c) { |
||
440 | } |
||
441 | |||
442 | ra_chunk* coalescer::detach_value(value *v) { |
||
443 | |||
444 | vvec::iterator F = std::find(v->chunk->values.begin(), |
||
445 | v->chunk->values.end(), v); |
||
446 | |||
447 | assert(F != v->chunk->values.end()); |
||
448 | v->chunk->values.erase(F); |
||
449 | create_chunk(v); |
||
450 | |||
451 | if (v->is_reg_pinned()) { |
||
452 | v->chunk->fix(); |
||
453 | } |
||
454 | |||
455 | RA_DUMP( |
||
456 | sblog << " detached : "; |
||
457 | dump_chunk(v->chunk); |
||
458 | ); |
||
459 | |||
460 | return v->chunk; |
||
461 | |||
462 | } |
||
463 | |||
464 | int coalescer::color_reg_constraint(ra_constraint *c) { |
||
465 | unsigned k, cnt = c->values.size(); |
||
466 | vvec & cv = c->values; |
||
467 | |||
468 | ra_chunk *ch[4]; |
||
469 | unsigned swz[4] = {0, 1, 2, 3}; |
||
470 | val_set interf[4]; |
||
471 | sb_bitset rb[4]; |
||
472 | |||
473 | bool reg_pinned = false; |
||
474 | unsigned pin_reg = ~0; |
||
475 | |||
476 | unsigned chan_mask = 0; |
||
477 | |||
478 | k = 0; |
||
479 | for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) { |
||
480 | value *v = *I; |
||
481 | |||
482 | if (!v->chunk) |
||
483 | create_chunk(v); |
||
484 | |||
485 | ch[k] = v->chunk; |
||
486 | |||
487 | if (v->chunk->is_chan_pinned()) { |
||
488 | unsigned chan = 1 << v->chunk->pin.chan(); |
||
489 | |||
490 | if (chan & chan_mask) { // channel already in use |
||
491 | ch[k] = detach_value(v); |
||
492 | assert(!ch[k]->is_chan_pinned()); |
||
493 | } else { |
||
494 | chan_mask |= chan; |
||
495 | } |
||
496 | } |
||
497 | |||
498 | if (v->chunk->is_reg_pinned()) { |
||
499 | if (!reg_pinned) { |
||
500 | reg_pinned = true; |
||
501 | pin_reg = v->chunk->pin.sel(); |
||
502 | } |
||
503 | } |
||
504 | |||
505 | get_chunk_interferences(ch[k], interf[k]); |
||
506 | init_reg_bitset(rb[k], interf[k]); |
||
507 | } |
||
508 | |||
509 | unsigned start_reg, end_reg; |
||
510 | |||
511 | start_reg = 0; |
||
512 | end_reg = sh.num_nontemp_gpr(); |
||
513 | |||
514 | unsigned min_reg = end_reg; |
||
515 | unsigned min_swz[4]; |
||
516 | unsigned i, pass = reg_pinned ? 0 : 1; |
||
517 | |||
518 | bool done = false; |
||
519 | |||
520 | while (pass < 2) { |
||
521 | |||
522 | unsigned rs, re; |
||
523 | |||
524 | if (pass == 0) { |
||
525 | re = pin_reg + 1; |
||
526 | rs = pin_reg; |
||
527 | } else { |
||
528 | re = end_reg; |
||
529 | rs = start_reg; |
||
530 | } |
||
531 | |||
532 | min_reg = re; |
||
533 | |||
534 | // cycle on swizzle combinations |
||
535 | do { |
||
536 | for (i = 0; i < cnt; ++i) { |
||
537 | if (ch[i]->flags & RCF_PIN_CHAN) |
||
538 | if (ch[i]->pin.chan() != swz[i]) |
||
539 | break; |
||
540 | } |
||
541 | if (i != cnt) |
||
542 | continue; |
||
543 | |||
544 | // looking for minimal reg number such that the constrained chunks |
||
545 | // may be colored with the current swizzle combination |
||
546 | for (unsigned reg = rs; reg < min_reg; ++reg) { |
||
547 | for (i = 0; i < cnt; ++i) { |
||
548 | unsigned bit = sel_chan(reg, swz[i]); |
||
549 | if (bit < rb[i].size() && rb[i].get(bit)) |
||
550 | break; |
||
551 | } |
||
552 | if (i == cnt) { |
||
553 | done = true; |
||
554 | min_reg = reg; |
||
555 | std::copy(swz, swz + 4, min_swz); |
||
556 | break; |
||
557 | } |
||
558 | } |
||
559 | |||
560 | if (pass == 0 && done) |
||
561 | break; |
||
562 | |||
563 | } while (std::next_permutation(swz, swz + 4)); |
||
564 | |||
565 | if (!done && pass) { |
||
566 | sblog << "sb: ra_coalesce - out of registers\n"; |
||
567 | return -1; |
||
568 | } |
||
569 | |||
570 | if (pass == 0 && done) |
||
571 | break; |
||
572 | |||
573 | ++pass; |
||
574 | }; |
||
575 | |||
576 | assert(done); |
||
577 | |||
578 | RA_DUMP( |
||
579 | sblog << "min reg = " << min_reg << " min_swz = " |
||
580 | << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n"; |
||
581 | ); |
||
582 | |||
583 | for (i = 0; i < cnt; ++i) { |
||
584 | sel_chan color(min_reg, min_swz[i]); |
||
585 | ra_chunk *cc = ch[i]; |
||
586 | |||
587 | if (cc->is_fixed()) { |
||
588 | if (cc->pin != color) |
||
589 | cc = detach_value(cv[i]); |
||
590 | else |
||
591 | continue; |
||
592 | } |
||
593 | |||
594 | color_chunk(cc, color); |
||
595 | cc->fix(); |
||
596 | cc->set_prealloc(); |
||
597 | } |
||
598 | |||
599 | return 0; |
||
600 | } |
||
601 | |||
602 | int coalescer::color_constraints() { |
||
603 | int r; |
||
604 | |||
605 | for (constraint_queue::iterator I = constraints.begin(), |
||
606 | E = constraints.end(); I != E; ++I) { |
||
607 | |||
608 | ra_constraint *c = *I; |
||
609 | |||
610 | RA_DUMP( |
||
611 | sblog << "color_constraints: "; |
||
612 | dump_constraint(c); |
||
613 | ); |
||
614 | |||
615 | if (c->kind == CK_SAME_REG) { |
||
616 | if ((r = color_reg_constraint(c))) |
||
617 | return r; |
||
618 | } else if (c->kind == CK_PHI) |
||
619 | color_phi_constraint(c); |
||
620 | } |
||
621 | return 0; |
||
622 | } |
||
623 | |||
624 | } // namespace r600_sb><>>><>><>><>><>><>><>><>><>><>>>>>>><>><>><>><>><>><>>>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>->><>><>><>><>><> |