Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #define GCM_DEBUG 0 |
||
28 | |||
29 | #if GCM_DEBUG |
||
30 | #define GCM_DUMP(a) do { a } while(0); |
||
31 | #else |
||
32 | #define GCM_DUMP(a) |
||
33 | #endif |
||
34 | |||
35 | #include |
||
36 | |||
37 | #include "sb_bc.h" |
||
38 | #include "sb_shader.h" |
||
39 | #include "sb_pass.h" |
||
40 | |||
41 | namespace r600_sb { |
||
42 | |||
43 | int gcm::run() { |
||
44 | |||
45 | GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); ); |
||
46 | |||
47 | collect_instructions(sh.root, true); |
||
48 | |||
49 | init_def_count(uses, pending); |
||
50 | |||
51 | for (node_iterator N, I = pending.begin(), E = pending.end(); |
||
52 | I != E; I = N) { |
||
53 | N = I; |
||
54 | ++N; |
||
55 | node *o = *I; |
||
56 | |||
57 | GCM_DUMP( |
||
58 | sblog << "pending : "; |
||
59 | dump::dump_op(o); |
||
60 | sblog << "\n"; |
||
61 | ); |
||
62 | |||
63 | if (td_is_ready(o)) { |
||
64 | |||
65 | GCM_DUMP( |
||
66 | sblog << " ready: "; |
||
67 | dump::dump_op(o); |
||
68 | sblog << "\n"; |
||
69 | ); |
||
70 | pending.remove_node(o); |
||
71 | ready.push_back(o); |
||
72 | } else { |
||
73 | } |
||
74 | } |
||
75 | |||
76 | sched_early(sh.root); |
||
77 | |||
78 | if (!pending.empty()) { |
||
79 | sblog << "##### gcm_sched_early_pass: unscheduled ops:\n"; |
||
80 | dump::dump_op(pending.front()); |
||
81 | } |
||
82 | |||
83 | assert(pending.empty()); |
||
84 | |||
85 | GCM_DUMP( sh.dump_ir(); ); |
||
86 | |||
87 | GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; ); |
||
88 | |||
89 | collect_instructions(sh.root, false); |
||
90 | |||
91 | init_use_count(uses, pending); |
||
92 | |||
93 | sched_late(sh.root); |
||
94 | if (!pending.empty()) { |
||
95 | sblog << "##### gcm_sched_late_pass: unscheduled ops:\n"; |
||
96 | dump::dump_op(pending.front()); |
||
97 | } |
||
98 | |||
99 | assert(ucs_level == 0); |
||
100 | assert(pending.empty()); |
||
101 | |||
102 | return 0; |
||
103 | } |
||
104 | |||
105 | |||
106 | void gcm::collect_instructions(container_node *c, bool early_pass) { |
||
107 | if (c->is_bb()) { |
||
108 | |||
109 | if (early_pass) { |
||
110 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
111 | node *n = *I; |
||
112 | if (n->flags & NF_DONT_MOVE) { |
||
113 | op_info &o = op_map[n]; |
||
114 | o.top_bb = o.bottom_bb = static_cast |
||
115 | } |
||
116 | } |
||
117 | } |
||
118 | |||
119 | pending.append_from(c); |
||
120 | return; |
||
121 | } |
||
122 | |||
123 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
124 | if (I->is_container()) { |
||
125 | collect_instructions(static_cast |
||
126 | } |
||
127 | } |
||
128 | } |
||
129 | |||
130 | void gcm::sched_early(container_node *n) { |
||
131 | |||
132 | region_node *r = |
||
133 | (n->type == NT_REGION) ? static_cast |
||
134 | |||
135 | if (r && r->loop_phi) { |
||
136 | sched_early(r->loop_phi); |
||
137 | } |
||
138 | |||
139 | for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) { |
||
140 | if (I->type == NT_OP) { |
||
141 | node *op = *I; |
||
142 | if (op->subtype == NST_PHI) { |
||
143 | td_release_uses(op->dst); |
||
144 | } |
||
145 | } else if (I->is_container()) { |
||
146 | if (I->subtype == NST_BB) { |
||
147 | bb_node* bb = static_cast |
||
148 | td_sched_bb(bb); |
||
149 | } else { |
||
150 | sched_early(static_cast |
||
151 | } |
||
152 | } |
||
153 | } |
||
154 | |||
155 | if (r && r->phi) { |
||
156 | sched_early(r->phi); |
||
157 | } |
||
158 | } |
||
159 | |||
160 | void gcm::td_schedule(bb_node *bb, node *n) { |
||
161 | GCM_DUMP( |
||
162 | sblog << "scheduling : "; |
||
163 | dump::dump_op(n); |
||
164 | sblog << "\n"; |
||
165 | ); |
||
166 | td_release_uses(n->dst); |
||
167 | |||
168 | bb->push_back(n); |
||
169 | |||
170 | op_map[n].top_bb = bb; |
||
171 | |||
172 | } |
||
173 | |||
174 | void gcm::td_sched_bb(bb_node* bb) { |
||
175 | GCM_DUMP( |
||
176 | sblog << "td scheduling BB_" << bb->id << "\n"; |
||
177 | ); |
||
178 | |||
179 | while (!ready.empty()) { |
||
180 | for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E; |
||
181 | I = N) { |
||
182 | N = I; ++N; |
||
183 | td_schedule(bb, *I); |
||
184 | ready.erase(I); |
||
185 | } |
||
186 | } |
||
187 | } |
||
188 | |||
189 | bool gcm::td_is_ready(node* n) { |
||
190 | return uses[n] == 0; |
||
191 | } |
||
192 | |||
193 | void gcm::td_release_val(value *v) { |
||
194 | |||
195 | GCM_DUMP( |
||
196 | sblog << "td checking uses: "; |
||
197 | dump::dump_val(v); |
||
198 | sblog << "\n"; |
||
199 | ); |
||
200 | |||
201 | use_info *u = v->uses; |
||
202 | while (u) { |
||
203 | if (u->op->parent != &pending) { |
||
204 | u = u->next; |
||
205 | continue; |
||
206 | } |
||
207 | |||
208 | GCM_DUMP( |
||
209 | sblog << "td used in "; |
||
210 | dump::dump_op(u->op); |
||
211 | sblog << "\n"; |
||
212 | ); |
||
213 | |||
214 | if (--uses[u->op] == 0) { |
||
215 | GCM_DUMP( |
||
216 | sblog << "td released : "; |
||
217 | dump::dump_op(u->op); |
||
218 | sblog << "\n"; |
||
219 | ); |
||
220 | |||
221 | pending.remove_node(u->op); |
||
222 | ready.push_back(u->op); |
||
223 | } |
||
224 | u = u->next; |
||
225 | } |
||
226 | |||
227 | } |
||
228 | |||
229 | void gcm::td_release_uses(vvec& v) { |
||
230 | for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) { |
||
231 | value *v = *I; |
||
232 | if (!v) |
||
233 | continue; |
||
234 | |||
235 | if (v->is_rel()) |
||
236 | td_release_uses(v->mdef); |
||
237 | else |
||
238 | td_release_val(v); |
||
239 | } |
||
240 | } |
||
241 | |||
242 | void gcm::sched_late(container_node *n) { |
||
243 | |||
244 | bool stack_pushed = false; |
||
245 | |||
246 | if (n->is_depart()) { |
||
247 | depart_node *d = static_cast |
||
248 | push_uc_stack(); |
||
249 | stack_pushed = true; |
||
250 | bu_release_phi_defs(d->target->phi, d->dep_id); |
||
251 | } else if (n->is_repeat()) { |
||
252 | repeat_node *r = static_cast |
||
253 | assert(r->target->loop_phi); |
||
254 | push_uc_stack(); |
||
255 | stack_pushed = true; |
||
256 | bu_release_phi_defs(r->target->loop_phi, r->rep_id); |
||
257 | } |
||
258 | |||
259 | for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) { |
||
260 | if (I->is_container()) { |
||
261 | if (I->subtype == NST_BB) { |
||
262 | bb_node* bb = static_cast |
||
263 | bu_sched_bb(bb); |
||
264 | } else { |
||
265 | sched_late(static_cast |
||
266 | } |
||
267 | } |
||
268 | } |
||
269 | |||
270 | if (n->type == NT_IF) { |
||
271 | if_node *f = static_cast |
||
272 | if (f->cond) |
||
273 | pending_defs.push_back(f->cond); |
||
274 | } else if (n->type == NT_REGION) { |
||
275 | region_node *r = static_cast |
||
276 | if (r->loop_phi) |
||
277 | bu_release_phi_defs(r->loop_phi, 0); |
||
278 | } |
||
279 | |||
280 | if (stack_pushed) |
||
281 | pop_uc_stack(); |
||
282 | |||
283 | } |
||
284 | |||
285 | void gcm::bu_sched_bb(bb_node* bb) { |
||
286 | GCM_DUMP( |
||
287 | sblog << "bu scheduling BB_" << bb->id << "\n"; |
||
288 | ); |
||
289 | |||
290 | bu_bb = bb; |
||
291 | |||
292 | if (!pending_nodes.empty()) { |
||
293 | GCM_DUMP( |
||
294 | sblog << "pending nodes:\n"; |
||
295 | ); |
||
296 | |||
297 | // TODO consider sorting the exports by array_base, |
||
298 | // possibly it can improve performance |
||
299 | |||
300 | for (node_list::iterator I = pending_nodes.begin(), |
||
301 | E = pending_nodes.end(); I != E; ++I) { |
||
302 | bu_release_op(*I); |
||
303 | } |
||
304 | pending_nodes.clear(); |
||
305 | GCM_DUMP( |
||
306 | sblog << "pending nodes processed...\n"; |
||
307 | ); |
||
308 | } |
||
309 | |||
310 | |||
311 | if (!pending_defs.empty()) { |
||
312 | for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end(); |
||
313 | I != E; ++I) { |
||
314 | bu_release_val(*I); |
||
315 | } |
||
316 | pending_defs.clear(); |
||
317 | } |
||
318 | |||
319 | for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end(); |
||
320 | I != E; I = N) { |
||
321 | N = I; |
||
322 | ++N; |
||
323 | node *n = *I; |
||
324 | if (op_map[n].bottom_bb == bb) { |
||
325 | add_ready(*I); |
||
326 | ready_above.erase(I); |
||
327 | } |
||
328 | } |
||
329 | |||
330 | unsigned cnt_ready[SQ_NUM]; |
||
331 | |||
332 | container_node *clause = NULL; |
||
333 | unsigned last_inst_type = ~0; |
||
334 | unsigned last_count = 0; |
||
335 | |||
336 | bool s = true; |
||
337 | while (s) { |
||
338 | node *n; |
||
339 | |||
340 | s = false; |
||
341 | |||
342 | unsigned ready_mask = 0; |
||
343 | |||
344 | for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { |
||
345 | if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty()) |
||
346 | ready_mask |= (1 << sq); |
||
347 | } |
||
348 | |||
349 | if (!ready_mask) { |
||
350 | for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { |
||
351 | if (!bu_ready_early[sq].empty()) { |
||
352 | node *n = bu_ready_early[sq].front(); |
||
353 | bu_ready_early[sq].pop_front(); |
||
354 | bu_ready[sq].push_back(n); |
||
355 | break; |
||
356 | } |
||
357 | } |
||
358 | } |
||
359 | |||
360 | for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) { |
||
361 | |||
362 | if (sq == SQ_CF && pending_exec_mask_update) { |
||
363 | pending_exec_mask_update = false; |
||
364 | sq = SQ_ALU; |
||
365 | --sq; |
||
366 | continue; |
||
367 | } |
||
368 | |||
369 | if (!bu_ready_next[sq].empty()) |
||
370 | bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]); |
||
371 | |||
372 | cnt_ready[sq] = bu_ready[sq].size(); |
||
373 | |||
374 | if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold && |
||
375 | cnt_ready[sq] < ctx.max_fetch/2 && |
||
376 | !bu_ready_next[SQ_ALU].empty()) { |
||
377 | sq = SQ_ALU; |
||
378 | --sq; |
||
379 | continue; |
||
380 | } |
||
381 | |||
382 | while (!bu_ready[sq].empty()) { |
||
383 | |||
384 | if (last_inst_type != sq) { |
||
385 | clause = NULL; |
||
386 | last_count = 0; |
||
387 | last_inst_type = sq; |
||
388 | } |
||
389 | |||
390 | // simple heuristic to limit register pressure, |
||
391 | if (sq == SQ_ALU && live_count > rp_threshold && |
||
392 | (!bu_ready[SQ_TEX].empty() || |
||
393 | !bu_ready[SQ_VTX].empty() || |
||
394 | !bu_ready_next[SQ_TEX].empty() || |
||
395 | !bu_ready_next[SQ_VTX].empty())) { |
||
396 | GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; ); |
||
397 | break; |
||
398 | } |
||
399 | |||
400 | n = bu_ready[sq].front(); |
||
401 | |||
402 | // real count (e.g. SAMPLE_G will be expanded to 3 instructions, |
||
403 | // 2 SET_GRAD_ + 1 SAMPLE_G |
||
404 | unsigned ncnt = 1; |
||
405 | if (n->is_fetch_inst() && n->src.size() == 12) { |
||
406 | ncnt = 3; |
||
407 | } |
||
408 | |||
409 | if ((sq == SQ_TEX || sq == SQ_VTX) && |
||
410 | ((last_count >= ctx.max_fetch/2 && |
||
411 | check_alu_ready_count(24)) || |
||
412 | last_count + ncnt > ctx.max_fetch)) |
||
413 | break; |
||
414 | else if (sq == SQ_CF && last_count > 4 && |
||
415 | check_alu_ready_count(24)) |
||
416 | break; |
||
417 | |||
418 | bu_ready[sq].pop_front(); |
||
419 | |||
420 | if (sq != SQ_CF) { |
||
421 | if (!clause) { |
||
422 | clause = sh.create_clause(sq == SQ_ALU ? |
||
423 | NST_ALU_CLAUSE : |
||
424 | sq == SQ_TEX ? NST_TEX_CLAUSE : |
||
425 | NST_VTX_CLAUSE); |
||
426 | bb->push_front(clause); |
||
427 | } |
||
428 | } else { |
||
429 | clause = bb; |
||
430 | } |
||
431 | |||
432 | bu_schedule(clause, n); |
||
433 | s = true; |
||
434 | last_count += ncnt; |
||
435 | } |
||
436 | } |
||
437 | } |
||
438 | |||
439 | bu_bb = NULL; |
||
440 | |||
441 | GCM_DUMP( |
||
442 | sblog << "bu finished scheduling BB_" << bb->id << "\n"; |
||
443 | ); |
||
444 | } |
||
445 | |||
446 | void gcm::bu_release_defs(vvec& v, bool src) { |
||
447 | for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) { |
||
448 | value *v = *I; |
||
449 | if (!v || v->is_readonly()) |
||
450 | continue; |
||
451 | |||
452 | if (v->is_rel()) { |
||
453 | if (!v->rel->is_readonly()) |
||
454 | bu_release_val(v->rel); |
||
455 | bu_release_defs(v->muse, true); |
||
456 | } else if (src) |
||
457 | bu_release_val(v); |
||
458 | else { |
||
459 | if (live.remove_val(v)) { |
||
460 | --live_count; |
||
461 | } |
||
462 | } |
||
463 | } |
||
464 | } |
||
465 | |||
466 | void gcm::push_uc_stack() { |
||
467 | GCM_DUMP( |
||
468 | sblog << "pushing use count stack prev_level " << ucs_level |
||
469 | << " new level " << (ucs_level + 1) << "\n"; |
||
470 | ); |
||
471 | ++ucs_level; |
||
472 | if (ucs_level == nuc_stk.size()) { |
||
473 | nuc_stk.resize(ucs_level + 1); |
||
474 | } |
||
475 | else { |
||
476 | nuc_stk[ucs_level].clear(); |
||
477 | } |
||
478 | } |
||
479 | |||
480 | bool gcm::bu_is_ready(node* n) { |
||
481 | nuc_map &cm = nuc_stk[ucs_level]; |
||
482 | nuc_map::iterator F = cm.find(n); |
||
483 | unsigned uc = (F == cm.end() ? 0 : F->second); |
||
484 | return uc == uses[n]; |
||
485 | } |
||
486 | |||
487 | void gcm::bu_schedule(container_node* c, node* n) { |
||
488 | GCM_DUMP( |
||
489 | sblog << "bu scheduling : "; |
||
490 | dump::dump_op(n); |
||
491 | sblog << "\n"; |
||
492 | ); |
||
493 | |||
494 | assert(op_map[n].bottom_bb == bu_bb); |
||
495 | |||
496 | bu_release_defs(n->src, true); |
||
497 | bu_release_defs(n->dst, false); |
||
498 | |||
499 | c->push_front(n); |
||
500 | } |
||
501 | |||
502 | void gcm::dump_uc_stack() { |
||
503 | sblog << "##### uc_stk start ####\n"; |
||
504 | for (unsigned l = 0; l <= ucs_level; ++l) { |
||
505 | nuc_map &m = nuc_stk[l]; |
||
506 | |||
507 | sblog << "nuc_stk[" << l << "] : @" << &m << "\n"; |
||
508 | |||
509 | for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) { |
||
510 | sblog << " uc " << I->second << " for "; |
||
511 | dump::dump_op(I->first); |
||
512 | sblog << "\n"; |
||
513 | } |
||
514 | } |
||
515 | sblog << "##### uc_stk end ####\n"; |
||
516 | } |
||
517 | |||
518 | void gcm::pop_uc_stack() { |
||
519 | nuc_map &pm = nuc_stk[ucs_level]; |
||
520 | --ucs_level; |
||
521 | nuc_map &cm = nuc_stk[ucs_level]; |
||
522 | |||
523 | GCM_DUMP( |
||
524 | sblog << "merging use stack from level " << (ucs_level+1) |
||
525 | << " to " << ucs_level << "\n"; |
||
526 | ); |
||
527 | |||
528 | for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) { |
||
529 | node *n = I->first; |
||
530 | |||
531 | GCM_DUMP( |
||
532 | sblog << " " << cm[n] << " += " << I->second << " for "; |
||
533 | dump::dump_op(n); |
||
534 | sblog << "\n"; |
||
535 | ); |
||
536 | |||
537 | unsigned uc = cm[n] += I->second; |
||
538 | |||
539 | if (n->parent == &pending && uc == uses[n]) { |
||
540 | cm.erase(n); |
||
541 | pending_nodes.push_back(n); |
||
542 | GCM_DUMP( |
||
543 | sblog << "pushed pending_node due to stack pop "; |
||
544 | dump::dump_op(n); |
||
545 | sblog << "\n"; |
||
546 | ); |
||
547 | } |
||
548 | } |
||
549 | } |
||
550 | |||
551 | void gcm::bu_find_best_bb(node *n, op_info &oi) { |
||
552 | |||
553 | GCM_DUMP( |
||
554 | sblog << " find best bb : "; |
||
555 | dump::dump_op(n); |
||
556 | sblog << "\n"; |
||
557 | ); |
||
558 | |||
559 | if (oi.bottom_bb) |
||
560 | return; |
||
561 | |||
562 | // don't hoist generated copies |
||
563 | if (n->flags & NF_DONT_HOIST) { |
||
564 | oi.bottom_bb = bu_bb; |
||
565 | return; |
||
566 | } |
||
567 | |||
568 | bb_node* best_bb = bu_bb; |
||
569 | bb_node* top_bb = oi.top_bb; |
||
570 | assert(oi.top_bb && !oi.bottom_bb); |
||
571 | |||
572 | node *c = best_bb; |
||
573 | |||
574 | // FIXME top_bb may be located inside the loop so we'll never enter it |
||
575 | // in the loop below, and the instruction will be incorrectly placed at the |
||
576 | // beginning of the shader. |
||
577 | // For now just check if top_bb's loop_level is higher than of |
||
578 | // current bb and abort the search for better bb in such case, |
||
579 | // but this problem may require more complete (and more expensive) fix |
||
580 | if (top_bb->loop_level <= best_bb->loop_level) { |
||
581 | while (c && c != top_bb) { |
||
582 | |||
583 | if (c->prev) { |
||
584 | c = c->prev; |
||
585 | } else { |
||
586 | c = c->parent; |
||
587 | if (!c) |
||
588 | break; |
||
589 | continue; |
||
590 | } |
||
591 | |||
592 | if (c->subtype == NST_BB) { |
||
593 | bb_node *bb = static_cast |
||
594 | if (bb->loop_level < best_bb->loop_level) |
||
595 | best_bb = bb; |
||
596 | } |
||
597 | } |
||
598 | } |
||
599 | |||
600 | oi.bottom_bb = best_bb; |
||
601 | } |
||
602 | |||
603 | void gcm::add_ready(node *n) { |
||
604 | sched_queue_id sq = sh.get_queue_id(n); |
||
605 | if (n->flags & NF_SCHEDULE_EARLY) |
||
606 | bu_ready_early[sq].push_back(n); |
||
607 | else if (sq == SQ_ALU && n->is_copy_mov()) |
||
608 | bu_ready[sq].push_front(n); |
||
609 | else if (n->is_alu_inst()) { |
||
610 | alu_node *a = static_cast |
||
611 | if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) { |
||
612 | // PRED_SET instruction that updates exec mask |
||
613 | pending_exec_mask_update = true; |
||
614 | } |
||
615 | bu_ready_next[sq].push_back(n); |
||
616 | } else |
||
617 | bu_ready_next[sq].push_back(n); |
||
618 | } |
||
619 | |||
620 | void gcm::bu_release_op(node * n) { |
||
621 | op_info &oi = op_map[n]; |
||
622 | |||
623 | GCM_DUMP( |
||
624 | sblog << " bu release op "; |
||
625 | dump::dump_op(n); |
||
626 | ); |
||
627 | |||
628 | nuc_stk[ucs_level].erase(n); |
||
629 | pending.remove_node(n); |
||
630 | |||
631 | bu_find_best_bb(n, oi); |
||
632 | |||
633 | if (oi.bottom_bb == bu_bb) { |
||
634 | GCM_DUMP( sblog << " ready\n";); |
||
635 | add_ready(n); |
||
636 | } else { |
||
637 | GCM_DUMP( sblog << " ready_above\n";); |
||
638 | ready_above.push_back(n); |
||
639 | } |
||
640 | } |
||
641 | |||
642 | void gcm::bu_release_phi_defs(container_node* p, unsigned op) |
||
643 | { |
||
644 | for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) { |
||
645 | node *o = *I; |
||
646 | value *v = o->src[op]; |
||
647 | if (v && !v->is_readonly()) |
||
648 | pending_defs.push_back(o->src[op]); |
||
649 | |||
650 | } |
||
651 | } |
||
652 | |||
653 | unsigned gcm::get_uc_vec(vvec &vv) { |
||
654 | unsigned c = 0; |
||
655 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
656 | value *v = *I; |
||
657 | if (!v) |
||
658 | continue; |
||
659 | |||
660 | if (v->is_rel()) |
||
661 | c += get_uc_vec(v->mdef); |
||
662 | else |
||
663 | c += v->use_count(); |
||
664 | } |
||
665 | return c; |
||
666 | } |
||
667 | |||
668 | void gcm::init_use_count(nuc_map& m, container_node &s) { |
||
669 | m.clear(); |
||
670 | for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) { |
||
671 | node *n = *I; |
||
672 | unsigned uc = get_uc_vec(n->dst); |
||
673 | GCM_DUMP( |
||
674 | sblog << "uc " << uc << " "; |
||
675 | dump::dump_op(n); |
||
676 | sblog << "\n"; |
||
677 | ); |
||
678 | if (!uc) { |
||
679 | pending_nodes.push_back(n); |
||
680 | GCM_DUMP( |
||
681 | sblog << "pushed pending_node in init "; |
||
682 | dump::dump_op(n); |
||
683 | sblog << "\n"; |
||
684 | ); |
||
685 | |||
686 | } else |
||
687 | m[n] = uc; |
||
688 | } |
||
689 | } |
||
690 | |||
691 | void gcm::bu_release_val(value* v) { |
||
692 | node *n = v->any_def(); |
||
693 | |||
694 | if (n && n->parent == &pending) { |
||
695 | nuc_map &m = nuc_stk[ucs_level]; |
||
696 | unsigned uc = ++m[n]; |
||
697 | unsigned uc2 = uses[n]; |
||
698 | |||
699 | if (live.add_val(v)) { |
||
700 | ++live_count; |
||
701 | GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; ); |
||
702 | } |
||
703 | |||
704 | GCM_DUMP( |
||
705 | sblog << "release val "; |
||
706 | dump::dump_val(v); |
||
707 | sblog << " for node "; |
||
708 | dump::dump_op(n); |
||
709 | sblog << " new uc=" << uc << ", total " << uc2 << "\n"; |
||
710 | ); |
||
711 | |||
712 | if (uc == uc2) |
||
713 | bu_release_op(n); |
||
714 | } |
||
715 | |||
716 | } |
||
717 | |||
718 | void gcm::init_def_count(nuc_map& m, container_node& s) { |
||
719 | m.clear(); |
||
720 | for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) { |
||
721 | node *n = *I; |
||
722 | unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false); |
||
723 | m[n] = dc; |
||
724 | |||
725 | GCM_DUMP( |
||
726 | sblog << "dc " << dc << " "; |
||
727 | dump::dump_op(n); |
||
728 | sblog << "\n"; |
||
729 | ); |
||
730 | } |
||
731 | } |
||
732 | |||
733 | unsigned gcm::get_dc_vec(vvec& vv, bool src) { |
||
734 | unsigned c = 0; |
||
735 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
736 | value *v = *I; |
||
737 | if (!v || v->is_readonly()) |
||
738 | continue; |
||
739 | |||
740 | if (v->is_rel()) { |
||
741 | c += v->rel->def != NULL; |
||
742 | c += get_dc_vec(v->muse, true); |
||
743 | } |
||
744 | else if (src) { |
||
745 | c += v->def != NULL; |
||
746 | c += v->adef != NULL; |
||
747 | } |
||
748 | } |
||
749 | return c; |
||
750 | } |
||
751 | |||
752 | unsigned gcm::real_alu_count(sched_queue& q, unsigned max) { |
||
753 | sq_iterator I(q.begin()), E(q.end()); |
||
754 | unsigned c = 0; |
||
755 | |||
756 | while (I != E && c < max) { |
||
757 | node *n = *I; |
||
758 | if (n->is_alu_inst()) { |
||
759 | if (!n->is_copy_mov() || !n->src[0]->is_any_gpr()) |
||
760 | ++c; |
||
761 | } else if (n->is_alu_packed()) { |
||
762 | c += static_cast |
||
763 | } |
||
764 | ++I; |
||
765 | } |
||
766 | |||
767 | return c; |
||
768 | } |
||
769 | |||
770 | bool gcm::check_alu_ready_count(unsigned threshold) { |
||
771 | unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold); |
||
772 | if (r >= threshold) |
||
773 | return true; |
||
774 | r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r); |
||
775 | return r >= threshold; |
||
776 | } |
||
777 | |||
778 | } // namespace r600_sb>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>=>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>=>><>><>><>><>><>><>><>><>><>><>><>><>>=>>>><>>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><> |