Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #define PSC_DEBUG 0 |
||
28 | |||
29 | #if PSC_DEBUG |
||
30 | #define PSC_DUMP(a) do { a } while (0) |
||
31 | #else |
||
32 | #define PSC_DUMP(a) |
||
33 | #endif |
||
34 | |||
35 | #include "sb_bc.h" |
||
36 | #include "sb_shader.h" |
||
37 | #include "sb_pass.h" |
||
38 | #include "sb_sched.h" |
||
39 | |||
40 | namespace r600_sb { |
||
41 | |||
42 | rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(), |
||
43 | // FIXME: for now we'll use "two const pairs" limit for r600, same as |
||
44 | // for other chips, otherwise additional check in alu_group_tracker is |
||
45 | // required to make sure that all 4 consts in the group fit into 2 |
||
46 | // kcache sets |
||
47 | sel_count(2) {} |
||
48 | |||
49 | bool rp_kcache_tracker::try_reserve(sel_chan r) { |
||
50 | unsigned sel = kc_sel(r); |
||
51 | |||
52 | for (unsigned i = 0; i < sel_count; ++i) { |
||
53 | if (rp[i] == 0) { |
||
54 | rp[i] = sel; |
||
55 | ++uc[i]; |
||
56 | return true; |
||
57 | } |
||
58 | if (rp[i] == sel) { |
||
59 | ++uc[i]; |
||
60 | return true; |
||
61 | } |
||
62 | } |
||
63 | return false; |
||
64 | } |
||
65 | |||
66 | bool rp_kcache_tracker::try_reserve(node* n) { |
||
67 | bool need_unreserve = false; |
||
68 | vvec::iterator I(n->src.begin()), E(n->src.end()); |
||
69 | |||
70 | for (; I != E; ++I) { |
||
71 | value *v = *I; |
||
72 | if (v->is_kcache()) { |
||
73 | if (!try_reserve(v->select)) |
||
74 | break; |
||
75 | else |
||
76 | need_unreserve = true; |
||
77 | } |
||
78 | } |
||
79 | if (I == E) |
||
80 | return true; |
||
81 | |||
82 | if (need_unreserve && I != n->src.begin()) { |
||
83 | do { |
||
84 | --I; |
||
85 | value *v =*I; |
||
86 | if (v->is_kcache()) |
||
87 | unreserve(v->select); |
||
88 | } while (I != n->src.begin()); |
||
89 | } |
||
90 | return false; |
||
91 | } |
||
92 | |||
93 | inline |
||
94 | void rp_kcache_tracker::unreserve(node* n) { |
||
95 | vvec::iterator I(n->src.begin()), E(n->src.end()); |
||
96 | for (; I != E; ++I) { |
||
97 | value *v = *I; |
||
98 | if (v->is_kcache()) |
||
99 | unreserve(v->select); |
||
100 | } |
||
101 | } |
||
102 | |||
103 | void rp_kcache_tracker::unreserve(sel_chan r) { |
||
104 | unsigned sel = kc_sel(r); |
||
105 | |||
106 | for (unsigned i = 0; i < sel_count; ++i) |
||
107 | if (rp[i] == sel) { |
||
108 | if (--uc[i] == 0) |
||
109 | rp[i] = 0; |
||
110 | return; |
||
111 | } |
||
112 | assert(0); |
||
113 | return; |
||
114 | } |
||
115 | |||
116 | bool literal_tracker::try_reserve(alu_node* n) { |
||
117 | bool need_unreserve = false; |
||
118 | |||
119 | vvec::iterator I(n->src.begin()), E(n->src.end()); |
||
120 | |||
121 | for (; I != E; ++I) { |
||
122 | value *v = *I; |
||
123 | if (v->is_literal()) { |
||
124 | if (!try_reserve(v->literal_value)) |
||
125 | break; |
||
126 | else |
||
127 | need_unreserve = true; |
||
128 | } |
||
129 | } |
||
130 | if (I == E) |
||
131 | return true; |
||
132 | |||
133 | if (need_unreserve && I != n->src.begin()) { |
||
134 | do { |
||
135 | --I; |
||
136 | value *v =*I; |
||
137 | if (v->is_literal()) |
||
138 | unreserve(v->literal_value); |
||
139 | } while (I != n->src.begin()); |
||
140 | } |
||
141 | return false; |
||
142 | } |
||
143 | |||
144 | void literal_tracker::unreserve(alu_node* n) { |
||
145 | unsigned nsrc = n->bc.op_ptr->src_count, i; |
||
146 | |||
147 | for (i = 0; i < nsrc; ++i) { |
||
148 | value *v = n->src[i]; |
||
149 | if (v->is_literal()) |
||
150 | unreserve(v->literal_value); |
||
151 | } |
||
152 | } |
||
153 | |||
154 | bool literal_tracker::try_reserve(literal l) { |
||
155 | |||
156 | PSC_DUMP( sblog << "literal reserve " << l.u << " " << l.f << "\n"; ); |
||
157 | |||
158 | for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) { |
||
159 | if (lt[i] == 0) { |
||
160 | lt[i] = l; |
||
161 | ++uc[i]; |
||
162 | PSC_DUMP( sblog << " reserved new uc = " << uc[i] << "\n"; ); |
||
163 | return true; |
||
164 | } else if (lt[i] == l) { |
||
165 | ++uc[i]; |
||
166 | PSC_DUMP( sblog << " reserved uc = " << uc[i] << "\n"; ); |
||
167 | return true; |
||
168 | } |
||
169 | } |
||
170 | PSC_DUMP( sblog << " failed to reserve literal\n"; ); |
||
171 | return false; |
||
172 | } |
||
173 | |||
174 | void literal_tracker::unreserve(literal l) { |
||
175 | |||
176 | PSC_DUMP( sblog << "literal unreserve " << l.u << " " << l.f << "\n"; ); |
||
177 | |||
178 | for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) { |
||
179 | if (lt[i] == l) { |
||
180 | if (--uc[i] == 0) |
||
181 | lt[i] = 0; |
||
182 | return; |
||
183 | } |
||
184 | } |
||
185 | assert(0); |
||
186 | return; |
||
187 | } |
||
188 | |||
189 | static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) { |
||
190 | static const unsigned swz[VEC_NUM][3] = { |
||
191 | {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0} |
||
192 | }; |
||
193 | assert(bs < VEC_NUM && src < 3); |
||
194 | return swz[bs][src]; |
||
195 | } |
||
196 | |||
197 | static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) { |
||
198 | static const unsigned swz[SCL_NUM][3] = { |
||
199 | {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1} |
||
200 | }; |
||
201 | |||
202 | if (bs >= SCL_NUM || src >= 3) { |
||
203 | // this prevents gcc warning "array subscript is above array bounds" |
||
204 | // AFAICS we should never hit this path |
||
205 | abort(); |
||
206 | } |
||
207 | return swz[bs][src]; |
||
208 | } |
||
209 | |||
210 | static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) { |
||
211 | return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src); |
||
212 | } |
||
213 | |||
214 | inline |
||
215 | bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) { |
||
216 | ++sel; |
||
217 | if (rp[cycle][chan] == 0) { |
||
218 | rp[cycle][chan] = sel; |
||
219 | ++uc[cycle][chan]; |
||
220 | return true; |
||
221 | } else if (rp[cycle][chan] == sel) { |
||
222 | ++uc[cycle][chan]; |
||
223 | return true; |
||
224 | } |
||
225 | return false; |
||
226 | } |
||
227 | |||
228 | inline |
||
229 | void rp_gpr_tracker::unreserve(alu_node* n) { |
||
230 | unsigned nsrc = n->bc.op_ptr->src_count, i; |
||
231 | unsigned trans = n->bc.slot == SLOT_TRANS; |
||
232 | unsigned bs = n->bc.bank_swizzle; |
||
233 | unsigned opt = !trans |
||
234 | && n->bc.src[0].sel == n->bc.src[1].sel |
||
235 | && n->bc.src[0].chan == n->bc.src[1].chan; |
||
236 | |||
237 | for (i = 0; i < nsrc; ++i) { |
||
238 | value *v = n->src[i]; |
||
239 | if (v->is_readonly()) |
||
240 | continue; |
||
241 | if (i == 1 && opt) |
||
242 | continue; |
||
243 | unsigned cycle = bs_cycle(trans, bs, i); |
||
244 | unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan); |
||
245 | } |
||
246 | } |
||
247 | |||
248 | inline |
||
249 | void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) { |
||
250 | ++sel; |
||
251 | assert(rp[cycle][chan] == sel && uc[cycle][chan]); |
||
252 | if (--uc[cycle][chan] == 0) |
||
253 | rp[cycle][chan] = 0; |
||
254 | } |
||
255 | |||
256 | inline |
||
257 | bool rp_gpr_tracker::try_reserve(alu_node* n) { |
||
258 | unsigned nsrc = n->bc.op_ptr->src_count, i; |
||
259 | unsigned trans = n->bc.slot == SLOT_TRANS; |
||
260 | unsigned bs = n->bc.bank_swizzle; |
||
261 | unsigned opt = !trans && nsrc >= 2 && |
||
262 | n->src[0] == n->src[1]; |
||
263 | |||
264 | bool need_unreserve = false; |
||
265 | unsigned const_count = 0, min_gpr_cycle = 3; |
||
266 | |||
267 | for (i = 0; i < nsrc; ++i) { |
||
268 | value *v = n->src[i]; |
||
269 | if (v->is_readonly()) { |
||
270 | const_count++; |
||
271 | if (trans && const_count == 3) |
||
272 | break; |
||
273 | } else { |
||
274 | if (i == 1 && opt) |
||
275 | continue; |
||
276 | |||
277 | unsigned cycle = bs_cycle(trans, bs, i); |
||
278 | |||
279 | if (trans && cycle < min_gpr_cycle) |
||
280 | min_gpr_cycle = cycle; |
||
281 | |||
282 | if (const_count && cycle < const_count && trans) |
||
283 | break; |
||
284 | |||
285 | if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan)) |
||
286 | break; |
||
287 | else |
||
288 | need_unreserve = true; |
||
289 | } |
||
290 | } |
||
291 | |||
292 | if ((i == nsrc) && (min_gpr_cycle + 1 > const_count)) |
||
293 | return true; |
||
294 | |||
295 | if (need_unreserve && i--) { |
||
296 | do { |
||
297 | value *v = n->src[i]; |
||
298 | if (!v->is_readonly()) { |
||
299 | if (i == 1 && opt) |
||
300 | continue; |
||
301 | unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel, |
||
302 | n->bc.src[i].chan); |
||
303 | } |
||
304 | } while (i--); |
||
305 | } |
||
306 | return false; |
||
307 | } |
||
308 | |||
309 | alu_group_tracker::alu_group_tracker(shader &sh) |
||
310 | : sh(sh), kc(sh), |
||
311 | gpr(), lt(), slots(), |
||
312 | max_slots(sh.get_ctx().is_cayman() ? 4 : 5), |
||
313 | has_mova(), uses_ar(), has_predset(), has_kill(), |
||
314 | updates_exec_mask(), chan_count(), interp_param(), next_id() { |
||
315 | |||
316 | available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F; |
||
317 | } |
||
318 | |||
319 | inline |
||
320 | sel_chan alu_group_tracker::get_value_id(value* v) { |
||
321 | unsigned &id = vmap[v]; |
||
322 | if (!id) |
||
323 | id = ++next_id; |
||
324 | return sel_chan(id, v->get_final_chan()); |
||
325 | } |
||
326 | |||
327 | inline |
||
328 | void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) { |
||
329 | update_flags(n); |
||
330 | slots[slot] = n; |
||
331 | available_slots &= ~(1 << slot); |
||
332 | |||
333 | unsigned param = n->interp_param(); |
||
334 | |||
335 | if (param) { |
||
336 | assert(!interp_param || interp_param == param); |
||
337 | interp_param = param; |
||
338 | } |
||
339 | } |
||
340 | |||
341 | |||
342 | void alu_group_tracker::discard_all_slots(container_node &removed_nodes) { |
||
343 | PSC_DUMP( sblog << "agt::discard_all_slots\n"; ); |
||
344 | discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes); |
||
345 | } |
||
346 | |||
347 | void alu_group_tracker::discard_slots(unsigned slot_mask, |
||
348 | container_node &removed_nodes) { |
||
349 | |||
350 | PSC_DUMP( |
||
351 | sblog << "discard_slots : packed_ops : " |
||
352 | << (unsigned)packed_ops.size() << "\n"; |
||
353 | ); |
||
354 | |||
355 | for (node_vec::iterator N, I = packed_ops.begin(); |
||
356 | I != packed_ops.end(); I = N) { |
||
357 | N = I; ++N; |
||
358 | |||
359 | alu_packed_node *n = static_cast |
||
360 | unsigned pslots = n->get_slot_mask(); |
||
361 | |||
362 | PSC_DUMP( |
||
363 | sblog << "discard_slots : packed slot_mask : " << pslots << "\n"; |
||
364 | ); |
||
365 | |||
366 | if (pslots & slot_mask) { |
||
367 | |||
368 | PSC_DUMP( |
||
369 | sblog << "discard_slots : discarding packed...\n"; |
||
370 | ); |
||
371 | |||
372 | removed_nodes.push_back(n); |
||
373 | slot_mask &= ~pslots; |
||
374 | N = packed_ops.erase(I); |
||
375 | available_slots |= pslots; |
||
376 | for (unsigned k = 0; k < max_slots; ++k) { |
||
377 | if (pslots & (1 << k)) |
||
378 | slots[k] = NULL; |
||
379 | } |
||
380 | } |
||
381 | } |
||
382 | |||
383 | for (unsigned slot = 0; slot < max_slots; ++slot) { |
||
384 | unsigned slot_bit = 1 << slot; |
||
385 | |||
386 | if (slot_mask & slot_bit) { |
||
387 | assert(!(available_slots & slot_bit)); |
||
388 | assert(slots[slot]); |
||
389 | |||
390 | assert(!(slots[slot]->bc.slot_flags & AF_4SLOT)); |
||
391 | |||
392 | PSC_DUMP( |
||
393 | sblog << "discarding slot " << slot << " : "; |
||
394 | dump::dump_op(slots[slot]); |
||
395 | sblog << "\n"; |
||
396 | ); |
||
397 | |||
398 | removed_nodes.push_back(slots[slot]); |
||
399 | slots[slot] = NULL; |
||
400 | available_slots |= slot_bit; |
||
401 | } |
||
402 | } |
||
403 | |||
404 | alu_node *t = slots[4]; |
||
405 | if (t && (t->bc.slot_flags & AF_V)) { |
||
406 | unsigned chan = t->bc.dst_chan; |
||
407 | if (!slots[chan]) { |
||
408 | PSC_DUMP( |
||
409 | sblog << "moving "; |
||
410 | dump::dump_op(t); |
||
411 | sblog << " from trans slot to free slot " << chan << "\n"; |
||
412 | ); |
||
413 | |||
414 | slots[chan] = t; |
||
415 | slots[4] = NULL; |
||
416 | t->bc.slot = chan; |
||
417 | } |
||
418 | } |
||
419 | |||
420 | reinit(); |
||
421 | } |
||
422 | |||
423 | alu_group_node* alu_group_tracker::emit() { |
||
424 | |||
425 | alu_group_node *g = sh.create_alu_group(); |
||
426 | |||
427 | lt.init_group_literals(g); |
||
428 | |||
429 | for (unsigned i = 0; i < max_slots; ++i) { |
||
430 | alu_node *n = slots[i]; |
||
431 | if (n) { |
||
432 | g->push_back(n); |
||
433 | } |
||
434 | } |
||
435 | return g; |
||
436 | } |
||
437 | |||
438 | bool alu_group_tracker::try_reserve(alu_node* n) { |
||
439 | unsigned nsrc = n->bc.op_ptr->src_count; |
||
440 | unsigned slot = n->bc.slot; |
||
441 | bool trans = slot == 4; |
||
442 | |||
443 | if (slots[slot]) |
||
444 | return false; |
||
445 | |||
446 | unsigned flags = n->bc.op_ptr->flags; |
||
447 | |||
448 | unsigned param = n->interp_param(); |
||
449 | |||
450 | if (param && interp_param && interp_param != param) |
||
451 | return false; |
||
452 | |||
453 | if ((flags & AF_KILL) && has_predset) |
||
454 | return false; |
||
455 | if ((flags & AF_ANY_PRED) && (has_kill || has_predset)) |
||
456 | return false; |
||
457 | if ((flags & AF_MOVA) && (has_mova || uses_ar)) |
||
458 | return false; |
||
459 | |||
460 | if (n->uses_ar() && has_mova) |
||
461 | return false; |
||
462 | |||
463 | for (unsigned i = 0; i < nsrc; ++i) { |
||
464 | |||
465 | unsigned last_id = next_id; |
||
466 | |||
467 | value *v = n->src[i]; |
||
468 | if (!v->is_any_gpr() && !v->is_rel()) |
||
469 | continue; |
||
470 | sel_chan vid = get_value_id(n->src[i]); |
||
471 | |||
472 | if (vid > last_id && chan_count[vid.chan()] == 3) { |
||
473 | return false; |
||
474 | } |
||
475 | |||
476 | n->bc.src[i].sel = vid.sel(); |
||
477 | n->bc.src[i].chan = vid.chan(); |
||
478 | } |
||
479 | |||
480 | if (!lt.try_reserve(n)) |
||
481 | return false; |
||
482 | |||
483 | if (!kc.try_reserve(n)) { |
||
484 | lt.unreserve(n); |
||
485 | return false; |
||
486 | } |
||
487 | |||
488 | unsigned fbs = n->forced_bank_swizzle(); |
||
489 | |||
490 | n->bc.bank_swizzle = 0; |
||
491 | |||
492 | if (!trans & fbs) |
||
493 | n->bc.bank_swizzle = VEC_210; |
||
494 | |||
495 | if (gpr.try_reserve(n)) { |
||
496 | assign_slot(slot, n); |
||
497 | return true; |
||
498 | } |
||
499 | |||
500 | if (!fbs) { |
||
501 | unsigned swz_num = trans ? SCL_NUM : VEC_NUM; |
||
502 | for (unsigned bs = 0; bs < swz_num; ++bs) { |
||
503 | n->bc.bank_swizzle = bs; |
||
504 | if (gpr.try_reserve(n)) { |
||
505 | assign_slot(slot, n); |
||
506 | return true; |
||
507 | } |
||
508 | } |
||
509 | } |
||
510 | |||
511 | gpr.reset(); |
||
512 | |||
513 | slots[slot] = n; |
||
514 | unsigned forced_swz_slots = 0; |
||
515 | int first_slot = ~0, first_nf = ~0, last_slot = ~0; |
||
516 | unsigned save_bs[5]; |
||
517 | |||
518 | for (unsigned i = 0; i < max_slots; ++i) { |
||
519 | alu_node *a = slots[i]; |
||
520 | if (a) { |
||
521 | if (first_slot == ~0) |
||
522 | first_slot = i; |
||
523 | last_slot = i; |
||
524 | save_bs[i] = a->bc.bank_swizzle; |
||
525 | if (a->forced_bank_swizzle()) { |
||
526 | assert(i != SLOT_TRANS); |
||
527 | forced_swz_slots |= (1 << i); |
||
528 | a->bc.bank_swizzle = VEC_210; |
||
529 | if (!gpr.try_reserve(a)) |
||
530 | assert("!internal reservation error"); |
||
531 | } else { |
||
532 | if (first_nf == ~0) |
||
533 | first_nf = i; |
||
534 | |||
535 | a->bc.bank_swizzle = 0; |
||
536 | } |
||
537 | } |
||
538 | } |
||
539 | |||
540 | if (first_nf == ~0) { |
||
541 | assign_slot(slot, n); |
||
542 | return true; |
||
543 | } |
||
544 | |||
545 | assert(first_slot != ~0 && last_slot != ~0); |
||
546 | |||
547 | // silence "array subscript is above array bounds" with gcc 4.8 |
||
548 | if (last_slot >= 5) |
||
549 | abort(); |
||
550 | |||
551 | int i = first_nf; |
||
552 | alu_node *a = slots[i]; |
||
553 | bool backtrack = false; |
||
554 | |||
555 | while (1) { |
||
556 | |||
557 | PSC_DUMP( |
||
558 | sblog << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle |
||
559 | << " bt:" << backtrack << "\n"; |
||
560 | ); |
||
561 | |||
562 | if (!backtrack && gpr.try_reserve(a)) { |
||
563 | PSC_DUMP( |
||
564 | sblog << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle |
||
565 | << "\n"; |
||
566 | ); |
||
567 | |||
568 | while ((++i <= last_slot) && !slots[i]); |
||
569 | if (i <= last_slot) |
||
570 | a = slots[i]; |
||
571 | else |
||
572 | break; |
||
573 | } else { |
||
574 | bool itrans = i == SLOT_TRANS; |
||
575 | unsigned max_swz = itrans ? SCL_221 : VEC_210; |
||
576 | |||
577 | if (a->bc.bank_swizzle < max_swz) { |
||
578 | ++a->bc.bank_swizzle; |
||
579 | |||
580 | PSC_DUMP( |
||
581 | sblog << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle |
||
582 | << "\n"; |
||
583 | ); |
||
584 | |||
585 | } else { |
||
586 | |||
587 | a->bc.bank_swizzle = 0; |
||
588 | while ((--i >= first_nf) && !slots[i]); |
||
589 | if (i < first_nf) |
||
590 | break; |
||
591 | a = slots[i]; |
||
592 | PSC_DUMP( |
||
593 | sblog << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle |
||
594 | << "\n"; |
||
595 | ); |
||
596 | gpr.unreserve(a); |
||
597 | backtrack = true; |
||
598 | |||
599 | continue; |
||
600 | } |
||
601 | } |
||
602 | backtrack = false; |
||
603 | } |
||
604 | |||
605 | if (i == last_slot + 1) { |
||
606 | assign_slot(slot, n); |
||
607 | return true; |
||
608 | } |
||
609 | |||
610 | // reservation failed, restore previous state |
||
611 | slots[slot] = NULL; |
||
612 | gpr.reset(); |
||
613 | for (unsigned i = 0; i < max_slots; ++i) { |
||
614 | alu_node *a = slots[i]; |
||
615 | if (a) { |
||
616 | a->bc.bank_swizzle = save_bs[i]; |
||
617 | bool b = gpr.try_reserve(a); |
||
618 | assert(b); |
||
619 | } |
||
620 | } |
||
621 | |||
622 | kc.unreserve(n); |
||
623 | lt.unreserve(n); |
||
624 | return false; |
||
625 | } |
||
626 | |||
627 | bool alu_group_tracker::try_reserve(alu_packed_node* p) { |
||
628 | bool need_unreserve = false; |
||
629 | node_iterator I(p->begin()), E(p->end()); |
||
630 | |||
631 | for (; I != E; ++I) { |
||
632 | alu_node *n = static_cast |
||
633 | if (!try_reserve(n)) |
||
634 | break; |
||
635 | else |
||
636 | need_unreserve = true; |
||
637 | } |
||
638 | |||
639 | if (I == E) { |
||
640 | packed_ops.push_back(p); |
||
641 | return true; |
||
642 | } |
||
643 | |||
644 | if (need_unreserve) { |
||
645 | while (--I != E) { |
||
646 | alu_node *n = static_cast |
||
647 | slots[n->bc.slot] = NULL; |
||
648 | } |
||
649 | reinit(); |
||
650 | } |
||
651 | return false; |
||
652 | } |
||
653 | |||
654 | void alu_group_tracker::reinit() { |
||
655 | alu_node * s[5]; |
||
656 | memcpy(s, slots, sizeof(slots)); |
||
657 | |||
658 | reset(true); |
||
659 | |||
660 | for (int i = max_slots - 1; i >= 0; --i) { |
||
661 | if (s[i] && !try_reserve(s[i])) { |
||
662 | sblog << "alu_group_tracker: reinit error on slot " << i << "\n"; |
||
663 | for (unsigned i = 0; i < max_slots; ++i) { |
||
664 | sblog << " slot " << i << " : "; |
||
665 | if (s[i]) |
||
666 | dump::dump_op(s[i]); |
||
667 | |||
668 | sblog << "\n"; |
||
669 | } |
||
670 | assert(!"alu_group_tracker: reinit error"); |
||
671 | } |
||
672 | } |
||
673 | } |
||
674 | |||
675 | void alu_group_tracker::reset(bool keep_packed) { |
||
676 | kc.reset(); |
||
677 | gpr.reset(); |
||
678 | lt.reset(); |
||
679 | memset(slots, 0, sizeof(slots)); |
||
680 | vmap.clear(); |
||
681 | next_id = 0; |
||
682 | has_mova = false; |
||
683 | uses_ar = false; |
||
684 | has_predset = false; |
||
685 | has_kill = false; |
||
686 | updates_exec_mask = false; |
||
687 | available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F; |
||
688 | interp_param = 0; |
||
689 | |||
690 | chan_count[0] = 0; |
||
691 | chan_count[1] = 0; |
||
692 | chan_count[2] = 0; |
||
693 | chan_count[3] = 0; |
||
694 | |||
695 | if (!keep_packed) |
||
696 | packed_ops.clear(); |
||
697 | } |
||
698 | |||
699 | void alu_group_tracker::update_flags(alu_node* n) { |
||
700 | unsigned flags = n->bc.op_ptr->flags; |
||
701 | has_kill |= (flags & AF_KILL); |
||
702 | has_mova |= (flags & AF_MOVA); |
||
703 | has_predset |= (flags & AF_ANY_PRED); |
||
704 | uses_ar |= n->uses_ar(); |
||
705 | |||
706 | if (flags & AF_ANY_PRED) { |
||
707 | if (n->dst[2] != NULL) |
||
708 | updates_exec_mask = true; |
||
709 | } |
||
710 | } |
||
711 | |||
712 | int post_scheduler::run() { |
||
713 | run_on(sh.root); |
||
714 | return 0; |
||
715 | } |
||
716 | |||
717 | void post_scheduler::run_on(container_node* n) { |
||
718 | |||
719 | for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) { |
||
720 | if (I->is_container()) { |
||
721 | if (I->subtype == NST_BB) { |
||
722 | bb_node* bb = static_cast |
||
723 | schedule_bb(bb); |
||
724 | } else { |
||
725 | run_on(static_cast |
||
726 | } |
||
727 | } |
||
728 | } |
||
729 | } |
||
730 | |||
731 | void post_scheduler::init_uc_val(container_node *c, value *v) { |
||
732 | node *d = v->any_def(); |
||
733 | if (d && d->parent == c) |
||
734 | ++ucm[d]; |
||
735 | } |
||
736 | |||
737 | void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) { |
||
738 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
739 | value *v = *I; |
||
740 | if (!v || v->is_readonly()) |
||
741 | continue; |
||
742 | |||
743 | if (v->is_rel()) { |
||
744 | init_uc_val(c, v->rel); |
||
745 | init_uc_vec(c, v->muse, true); |
||
746 | } if (src) { |
||
747 | init_uc_val(c, v); |
||
748 | } |
||
749 | } |
||
750 | } |
||
751 | |||
752 | unsigned post_scheduler::init_ucm(container_node *c, node *n) { |
||
753 | init_uc_vec(c, n->src, true); |
||
754 | init_uc_vec(c, n->dst, false); |
||
755 | |||
756 | uc_map::iterator F = ucm.find(n); |
||
757 | return F == ucm.end() ? 0 : F->second; |
||
758 | } |
||
759 | |||
760 | void post_scheduler::schedule_bb(bb_node* bb) { |
||
761 | PSC_DUMP( |
||
762 | sblog << "scheduling BB " << bb->id << "\n"; |
||
763 | if (!pending.empty()) |
||
764 | dump::dump_op_list(&pending); |
||
765 | ); |
||
766 | |||
767 | assert(pending.empty()); |
||
768 | assert(bb_pending.empty()); |
||
769 | assert(ready.empty()); |
||
770 | |||
771 | bb_pending.append_from(bb); |
||
772 | cur_bb = bb; |
||
773 | |||
774 | node *n; |
||
775 | |||
776 | while ((n = bb_pending.back())) { |
||
777 | |||
778 | PSC_DUMP( |
||
779 | sblog << "post_sched_bb "; |
||
780 | dump::dump_op(n); |
||
781 | sblog << "\n"; |
||
782 | ); |
||
783 | |||
784 | if (n->subtype == NST_ALU_CLAUSE) { |
||
785 | n->remove(); |
||
786 | process_alu(static_cast |
||
787 | continue; |
||
788 | } |
||
789 | |||
790 | n->remove(); |
||
791 | bb->push_front(n); |
||
792 | } |
||
793 | |||
794 | this->cur_bb = NULL; |
||
795 | } |
||
796 | |||
797 | void post_scheduler::init_regmap() { |
||
798 | |||
799 | regmap.clear(); |
||
800 | |||
801 | PSC_DUMP( |
||
802 | sblog << "init_regmap: live: "; |
||
803 | dump::dump_set(sh, live); |
||
804 | sblog << "\n"; |
||
805 | ); |
||
806 | |||
807 | for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) { |
||
808 | value *v = *I; |
||
809 | assert(v); |
||
810 | if (!v->is_sgpr() || !v->is_prealloc()) |
||
811 | continue; |
||
812 | |||
813 | sel_chan r = v->gpr; |
||
814 | |||
815 | PSC_DUMP( |
||
816 | sblog << "init_regmap: " << r << " <= "; |
||
817 | dump::dump_val(v); |
||
818 | sblog << "\n"; |
||
819 | ); |
||
820 | |||
821 | assert(r); |
||
822 | regmap[r] = v; |
||
823 | } |
||
824 | } |
||
825 | |||
826 | void post_scheduler::process_alu(container_node *c) { |
||
827 | |||
828 | ucm.clear(); |
||
829 | alu.reset(); |
||
830 | |||
831 | live = c->live_after; |
||
832 | |||
833 | init_globals(c->live_after, true); |
||
834 | init_globals(c->live_before, true); |
||
835 | |||
836 | init_regmap(); |
||
837 | |||
838 | update_local_interferences(); |
||
839 | |||
840 | for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) { |
||
841 | N = I; |
||
842 | ++N; |
||
843 | |||
844 | node *n = *I; |
||
845 | unsigned uc = init_ucm(c, n); |
||
846 | |||
847 | PSC_DUMP( |
||
848 | sblog << "process_alu uc=" << uc << " "; |
||
849 | dump::dump_op(n); |
||
850 | sblog << " "; |
||
851 | ); |
||
852 | |||
853 | if (uc) { |
||
854 | n->remove(); |
||
855 | pending.push_back(n); |
||
856 | PSC_DUMP( sblog << "pending\n"; ); |
||
857 | } else { |
||
858 | release_op(n); |
||
859 | } |
||
860 | } |
||
861 | |||
862 | schedule_alu(c); |
||
863 | } |
||
864 | |||
865 | void post_scheduler::update_local_interferences() { |
||
866 | |||
867 | PSC_DUMP( |
||
868 | sblog << "update_local_interferences : "; |
||
869 | dump::dump_set(sh, live); |
||
870 | sblog << "\n"; |
||
871 | ); |
||
872 | |||
873 | |||
874 | for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) { |
||
875 | value *v = *I; |
||
876 | if (v->is_prealloc()) |
||
877 | continue; |
||
878 | |||
879 | v->interferences.add_set(live); |
||
880 | } |
||
881 | } |
||
882 | |||
883 | void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) { |
||
884 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
885 | value *v = *I; |
||
886 | |||
887 | if (!v) |
||
888 | continue; |
||
889 | |||
890 | if (src && v->is_any_gpr()) { |
||
891 | if (live.add_val(v)) { |
||
892 | if (!v->is_prealloc()) { |
||
893 | if (!cleared_interf.contains(v)) { |
||
894 | PSC_DUMP( |
||
895 | sblog << "clearing interferences for " << *v << "\n"; |
||
896 | ); |
||
897 | v->interferences.clear(); |
||
898 | cleared_interf.add_val(v); |
||
899 | } |
||
900 | } |
||
901 | if (born) |
||
902 | born->add_val(v); |
||
903 | } |
||
904 | } else if (v->is_rel()) { |
||
905 | if (!v->rel->is_any_gpr()) |
||
906 | live.add_val(v->rel); |
||
907 | update_live_src_vec(v->muse, born, true); |
||
908 | } |
||
909 | } |
||
910 | } |
||
911 | |||
912 | void post_scheduler::update_live_dst_vec(vvec &vv) { |
||
913 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
914 | value *v = *I; |
||
915 | if (!v) |
||
916 | continue; |
||
917 | |||
918 | if (v->is_rel()) { |
||
919 | update_live_dst_vec(v->mdef); |
||
920 | } else if (v->is_any_gpr()) { |
||
921 | if (!live.remove_val(v)) { |
||
922 | PSC_DUMP( |
||
923 | sblog << "failed to remove "; |
||
924 | dump::dump_val(v); |
||
925 | sblog << " from live : "; |
||
926 | dump::dump_set(sh, live); |
||
927 | sblog << "\n"; |
||
928 | ); |
||
929 | } |
||
930 | } |
||
931 | } |
||
932 | } |
||
933 | |||
934 | void post_scheduler::update_live(node *n, val_set *born) { |
||
935 | update_live_dst_vec(n->dst); |
||
936 | update_live_src_vec(n->src, born, true); |
||
937 | update_live_src_vec(n->dst, born, false); |
||
938 | } |
||
939 | |||
940 | void post_scheduler::process_group() { |
||
941 | alu_group_tracker &rt = alu.grp(); |
||
942 | |||
943 | val_set vals_born; |
||
944 | |||
945 | recolor_locals(); |
||
946 | |||
947 | PSC_DUMP( |
||
948 | sblog << "process_group: live_before : "; |
||
949 | dump::dump_set(sh, live); |
||
950 | sblog << "\n"; |
||
951 | ); |
||
952 | |||
953 | for (unsigned s = 0; s < ctx.num_slots; ++s) { |
||
954 | alu_node *n = rt.slot(s); |
||
955 | if (!n) |
||
956 | continue; |
||
957 | |||
958 | update_live(n, &vals_born); |
||
959 | } |
||
960 | |||
961 | PSC_DUMP( |
||
962 | sblog << "process_group: live_after : "; |
||
963 | dump::dump_set(sh, live); |
||
964 | sblog << "\n"; |
||
965 | ); |
||
966 | |||
967 | update_local_interferences(); |
||
968 | |||
969 | for (unsigned i = 0; i < 5; ++i) { |
||
970 | node *n = rt.slot(i); |
||
971 | if (n && !n->is_mova()) { |
||
972 | release_src_values(n); |
||
973 | } |
||
974 | } |
||
975 | } |
||
976 | |||
977 | void post_scheduler::init_globals(val_set &s, bool prealloc) { |
||
978 | |||
979 | PSC_DUMP( |
||
980 | sblog << "init_globals: "; |
||
981 | dump::dump_set(sh, s); |
||
982 | sblog << "\n"; |
||
983 | ); |
||
984 | |||
985 | for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) { |
||
986 | value *v = *I; |
||
987 | if (v->is_sgpr() && !v->is_global()) { |
||
988 | v->set_global(); |
||
989 | |||
990 | if (prealloc && v->is_fixed()) { |
||
991 | v->set_prealloc(); |
||
992 | } |
||
993 | } |
||
994 | } |
||
995 | } |
||
996 | |||
997 | void post_scheduler::emit_clause() { |
||
998 | |||
999 | if (alu.current_ar) { |
||
1000 | emit_load_ar(); |
||
1001 | process_group(); |
||
1002 | alu.emit_group(); |
||
1003 | } |
||
1004 | |||
1005 | alu.emit_clause(cur_bb); |
||
1006 | } |
||
1007 | |||
1008 | void post_scheduler::schedule_alu(container_node *c) { |
||
1009 | |||
1010 | assert(!ready.empty() || !ready_copies.empty()); |
||
1011 | |||
1012 | while (1) { |
||
1013 | |||
1014 | prev_regmap = regmap; |
||
1015 | |||
1016 | if (!prepare_alu_group()) { |
||
1017 | if (alu.current_ar) { |
||
1018 | emit_load_ar(); |
||
1019 | continue; |
||
1020 | } else |
||
1021 | break; |
||
1022 | } |
||
1023 | |||
1024 | if (!alu.check_clause_limits()) { |
||
1025 | regmap = prev_regmap; |
||
1026 | emit_clause(); |
||
1027 | init_globals(live, false); |
||
1028 | continue; |
||
1029 | } |
||
1030 | |||
1031 | process_group(); |
||
1032 | alu.emit_group(); |
||
1033 | }; |
||
1034 | |||
1035 | if (!alu.is_empty()) { |
||
1036 | emit_clause(); |
||
1037 | } |
||
1038 | |||
1039 | if (!ready.empty()) { |
||
1040 | sblog << "##post_scheduler: unscheduled ready instructions :"; |
||
1041 | dump::dump_op_list(&ready); |
||
1042 | assert(!"unscheduled ready instructions"); |
||
1043 | } |
||
1044 | |||
1045 | if (!pending.empty()) { |
||
1046 | sblog << "##post_scheduler: unscheduled pending instructions :"; |
||
1047 | dump::dump_op_list(&pending); |
||
1048 | assert(!"unscheduled pending instructions"); |
||
1049 | } |
||
1050 | } |
||
1051 | |||
1052 | void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) { |
||
1053 | unsigned chan = v->gpr.chan(); |
||
1054 | |||
1055 | for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); |
||
1056 | I != E; ++I) { |
||
1057 | value *vi = *I; |
||
1058 | sel_chan gpr = vi->get_final_gpr(); |
||
1059 | |||
1060 | if (vi->is_any_gpr() && gpr && vi != v && |
||
1061 | (!v->chunk || v->chunk != vi->chunk) && |
||
1062 | vi->is_fixed() && gpr.chan() == chan) { |
||
1063 | |||
1064 | unsigned r = gpr.sel(); |
||
1065 | |||
1066 | PSC_DUMP( |
||
1067 | sblog << "\tadd_interferences: " << *vi << "\n"; |
||
1068 | ); |
||
1069 | |||
1070 | if (rb.size() <= r) |
||
1071 | rb.resize(r + 32); |
||
1072 | rb.set(r); |
||
1073 | } |
||
1074 | } |
||
1075 | } |
||
1076 | |||
1077 | void post_scheduler::set_color_local_val(value *v, sel_chan color) { |
||
1078 | v->gpr = color; |
||
1079 | |||
1080 | PSC_DUMP( |
||
1081 | sblog << " recolored: "; |
||
1082 | dump::dump_val(v); |
||
1083 | sblog << "\n"; |
||
1084 | ); |
||
1085 | } |
||
1086 | |||
1087 | void post_scheduler::set_color_local(value *v, sel_chan color) { |
||
1088 | if (v->chunk) { |
||
1089 | vvec &vv = v->chunk->values; |
||
1090 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
1091 | value *v2 =*I; |
||
1092 | set_color_local_val(v2, color); |
||
1093 | } |
||
1094 | v->chunk->fix(); |
||
1095 | } else { |
||
1096 | set_color_local_val(v, color); |
||
1097 | v->fix(); |
||
1098 | } |
||
1099 | } |
||
1100 | |||
1101 | bool post_scheduler::recolor_local(value *v) { |
||
1102 | |||
1103 | sb_bitset rb; |
||
1104 | |||
1105 | assert(v->is_sgpr()); |
||
1106 | assert(!v->is_prealloc()); |
||
1107 | assert(v->gpr); |
||
1108 | |||
1109 | unsigned chan = v->gpr.chan(); |
||
1110 | |||
1111 | PSC_DUMP( |
||
1112 | sblog << "recolor_local: "; |
||
1113 | dump::dump_val(v); |
||
1114 | sblog << " interferences: "; |
||
1115 | dump::dump_set(sh, v->interferences); |
||
1116 | sblog << "\n"; |
||
1117 | if (v->chunk) { |
||
1118 | sblog << " in chunk: "; |
||
1119 | coalescer::dump_chunk(v->chunk); |
||
1120 | sblog << "\n"; |
||
1121 | } |
||
1122 | ); |
||
1123 | |||
1124 | if (v->chunk) { |
||
1125 | for (vvec::iterator I = v->chunk->values.begin(), |
||
1126 | E = v->chunk->values.end(); I != E; ++I) { |
||
1127 | value *v2 = *I; |
||
1128 | |||
1129 | PSC_DUMP( sblog << " add_interferences for " << *v2 << " :\n"; ); |
||
1130 | |||
1131 | add_interferences(v, rb, v2->interferences); |
||
1132 | } |
||
1133 | } else { |
||
1134 | add_interferences(v, rb, v->interferences); |
||
1135 | } |
||
1136 | |||
1137 | PSC_DUMP( |
||
1138 | unsigned sz = rb.size(); |
||
1139 | sblog << "registers bits: " << sz; |
||
1140 | for (unsigned r = 0; r < sz; ++r) { |
||
1141 | if ((r & 7) == 0) |
||
1142 | sblog << "\n " << r << " "; |
||
1143 | sblog << (rb.get(r) ? 1 : 0); |
||
1144 | } |
||
1145 | ); |
||
1146 | |||
1147 | bool no_temp_gprs = v->is_global(); |
||
1148 | unsigned rs, re, pass = no_temp_gprs ? 1 : 0; |
||
1149 | |||
1150 | while (pass < 2) { |
||
1151 | |||
1152 | if (pass == 0) { |
||
1153 | rs = sh.first_temp_gpr(); |
||
1154 | re = MAX_GPR; |
||
1155 | } else { |
||
1156 | rs = 0; |
||
1157 | re = sh.num_nontemp_gpr(); |
||
1158 | } |
||
1159 | |||
1160 | for (unsigned reg = rs; reg < re; ++reg) { |
||
1161 | if (reg >= rb.size() || !rb.get(reg)) { |
||
1162 | // color found |
||
1163 | set_color_local(v, sel_chan(reg, chan)); |
||
1164 | return true; |
||
1165 | } |
||
1166 | } |
||
1167 | ++pass; |
||
1168 | } |
||
1169 | |||
1170 | assert(!"recolor_local failed"); |
||
1171 | return true; |
||
1172 | } |
||
1173 | |||
1174 | void post_scheduler::emit_load_ar() { |
||
1175 | |||
1176 | regmap = prev_regmap; |
||
1177 | alu.discard_current_group(); |
||
1178 | |||
1179 | alu_group_tracker &rt = alu.grp(); |
||
1180 | alu_node *a = alu.create_ar_load(); |
||
1181 | |||
1182 | if (!rt.try_reserve(a)) { |
||
1183 | sblog << "can't emit AR load : "; |
||
1184 | dump::dump_op(a); |
||
1185 | sblog << "\n"; |
||
1186 | } |
||
1187 | |||
1188 | alu.current_ar = 0; |
||
1189 | } |
||
1190 | |||
1191 | bool post_scheduler::unmap_dst_val(value *d) { |
||
1192 | |||
1193 | if (d == alu.current_ar) { |
||
1194 | emit_load_ar(); |
||
1195 | return false; |
||
1196 | } |
||
1197 | |||
1198 | if (d->is_prealloc()) { |
||
1199 | sel_chan gpr = d->get_final_gpr(); |
||
1200 | rv_map::iterator F = regmap.find(gpr); |
||
1201 | value *c = NULL; |
||
1202 | if (F != regmap.end()) |
||
1203 | c = F->second; |
||
1204 | |||
1205 | if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) { |
||
1206 | PSC_DUMP( |
||
1207 | sblog << "dst value conflict : "; |
||
1208 | dump::dump_val(d); |
||
1209 | sblog << " regmap contains "; |
||
1210 | dump::dump_val(c); |
||
1211 | sblog << "\n"; |
||
1212 | ); |
||
1213 | assert(!"scheduler error"); |
||
1214 | return false; |
||
1215 | } else if (c) { |
||
1216 | regmap.erase(F); |
||
1217 | } |
||
1218 | } |
||
1219 | return true; |
||
1220 | } |
||
1221 | |||
1222 | bool post_scheduler::unmap_dst(alu_node *n) { |
||
1223 | value *d = n->dst.empty() ? NULL : n->dst[0]; |
||
1224 | |||
1225 | if (!d) |
||
1226 | return true; |
||
1227 | |||
1228 | if (!d->is_rel()) { |
||
1229 | if (d && d->is_any_reg()) { |
||
1230 | |||
1231 | if (d->is_AR()) { |
||
1232 | if (alu.current_ar != d) { |
||
1233 | sblog << "loading wrong ar value\n"; |
||
1234 | assert(0); |
||
1235 | } else { |
||
1236 | alu.current_ar = NULL; |
||
1237 | } |
||
1238 | |||
1239 | } else if (d->is_any_gpr()) { |
||
1240 | if (!unmap_dst_val(d)) |
||
1241 | return false; |
||
1242 | } |
||
1243 | } |
||
1244 | } else { |
||
1245 | for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end(); |
||
1246 | I != E; ++I) { |
||
1247 | d = *I; |
||
1248 | if (!d) |
||
1249 | continue; |
||
1250 | |||
1251 | assert(d->is_any_gpr()); |
||
1252 | |||
1253 | if (!unmap_dst_val(d)) |
||
1254 | return false; |
||
1255 | } |
||
1256 | } |
||
1257 | return true; |
||
1258 | } |
||
1259 | |||
1260 | bool post_scheduler::map_src_val(value *v) { |
||
1261 | |||
1262 | if (!v->is_prealloc()) |
||
1263 | return true; |
||
1264 | |||
1265 | sel_chan gpr = v->get_final_gpr(); |
||
1266 | rv_map::iterator F = regmap.find(gpr); |
||
1267 | value *c = NULL; |
||
1268 | if (F != regmap.end()) { |
||
1269 | c = F->second; |
||
1270 | if (!v->v_equal(c)) { |
||
1271 | PSC_DUMP( |
||
1272 | sblog << "can't map src value "; |
||
1273 | dump::dump_val(v); |
||
1274 | sblog << ", regmap contains "; |
||
1275 | dump::dump_val(c); |
||
1276 | sblog << "\n"; |
||
1277 | ); |
||
1278 | return false; |
||
1279 | } |
||
1280 | } else { |
||
1281 | regmap.insert(std::make_pair(gpr, v)); |
||
1282 | } |
||
1283 | return true; |
||
1284 | } |
||
1285 | |||
1286 | bool post_scheduler::map_src_vec(vvec &vv, bool src) { |
||
1287 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
1288 | value *v = *I; |
||
1289 | if (!v) |
||
1290 | continue; |
||
1291 | |||
1292 | if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel()) |
||
1293 | continue; |
||
1294 | |||
1295 | if (v->is_rel()) { |
||
1296 | value *rel = v->rel; |
||
1297 | assert(rel); |
||
1298 | |||
1299 | if (!rel->is_const()) { |
||
1300 | if (!map_src_vec(v->muse, true)) |
||
1301 | return false; |
||
1302 | |||
1303 | if (rel != alu.current_ar) { |
||
1304 | if (alu.current_ar) { |
||
1305 | PSC_DUMP( |
||
1306 | sblog << " current_AR is " << *alu.current_ar |
||
1307 | << " trying to use " << *rel << "\n"; |
||
1308 | ); |
||
1309 | return false; |
||
1310 | } |
||
1311 | |||
1312 | alu.current_ar = rel; |
||
1313 | |||
1314 | PSC_DUMP( |
||
1315 | sblog << " new current_AR assigned: " << *alu.current_ar |
||
1316 | << "\n"; |
||
1317 | ); |
||
1318 | } |
||
1319 | } |
||
1320 | |||
1321 | } else if (src) { |
||
1322 | if (!map_src_val(v)) { |
||
1323 | return false; |
||
1324 | } |
||
1325 | } |
||
1326 | } |
||
1327 | return true; |
||
1328 | } |
||
1329 | |||
1330 | bool post_scheduler::map_src(alu_node *n) { |
||
1331 | if (!map_src_vec(n->dst, false)) |
||
1332 | return false; |
||
1333 | |||
1334 | if (!map_src_vec(n->src, true)) |
||
1335 | return false; |
||
1336 | |||
1337 | return true; |
||
1338 | } |
||
1339 | |||
1340 | void post_scheduler::dump_regmap() { |
||
1341 | |||
1342 | sblog << "# REGMAP :\n"; |
||
1343 | |||
1344 | for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) { |
||
1345 | sblog << " # " << I->first << " => " << *(I->second) << "\n"; |
||
1346 | } |
||
1347 | |||
1348 | if (alu.current_ar) |
||
1349 | sblog << " current_AR: " << *alu.current_ar << "\n"; |
||
1350 | if (alu.current_pr) |
||
1351 | sblog << " current_PR: " << *alu.current_pr << "\n"; |
||
1352 | } |
||
1353 | |||
1354 | void post_scheduler::recolor_locals() { |
||
1355 | alu_group_tracker &rt = alu.grp(); |
||
1356 | |||
1357 | for (unsigned s = 0; s < ctx.num_slots; ++s) { |
||
1358 | alu_node *n = rt.slot(s); |
||
1359 | if (n) { |
||
1360 | value *d = n->dst[0]; |
||
1361 | if (d && d->is_sgpr() && !d->is_prealloc()) { |
||
1362 | recolor_local(d); |
||
1363 | } |
||
1364 | } |
||
1365 | } |
||
1366 | } |
||
1367 | |||
1368 | // returns true if there are interferences |
||
1369 | bool post_scheduler::check_interferences() { |
||
1370 | |||
1371 | alu_group_tracker &rt = alu.grp(); |
||
1372 | |||
1373 | unsigned interf_slots; |
||
1374 | |||
1375 | bool discarded = false; |
||
1376 | |||
1377 | PSC_DUMP( |
||
1378 | sblog << "check_interferences: before: \n"; |
||
1379 | dump_regmap(); |
||
1380 | ); |
||
1381 | |||
1382 | do { |
||
1383 | |||
1384 | interf_slots = 0; |
||
1385 | |||
1386 | for (unsigned s = 0; s < ctx.num_slots; ++s) { |
||
1387 | alu_node *n = rt.slot(s); |
||
1388 | if (n) { |
||
1389 | if (!unmap_dst(n)) { |
||
1390 | return true; |
||
1391 | } |
||
1392 | } |
||
1393 | } |
||
1394 | |||
1395 | for (unsigned s = 0; s < ctx.num_slots; ++s) { |
||
1396 | alu_node *n = rt.slot(s); |
||
1397 | if (n) { |
||
1398 | if (!map_src(n)) { |
||
1399 | interf_slots |= (1 << s); |
||
1400 | } |
||
1401 | } |
||
1402 | } |
||
1403 | |||
1404 | PSC_DUMP( |
||
1405 | for (unsigned i = 0; i < 5; ++i) { |
||
1406 | if (interf_slots & (1 << i)) { |
||
1407 | sblog << "!!!!!! interf slot: " << i << " : "; |
||
1408 | dump::dump_op(rt.slot(i)); |
||
1409 | sblog << "\n"; |
||
1410 | } |
||
1411 | } |
||
1412 | ); |
||
1413 | |||
1414 | if (!interf_slots) |
||
1415 | break; |
||
1416 | |||
1417 | PSC_DUMP( sblog << "ci: discarding slots " << interf_slots << "\n"; ); |
||
1418 | |||
1419 | rt.discard_slots(interf_slots, alu.conflict_nodes); |
||
1420 | regmap = prev_regmap; |
||
1421 | discarded = true; |
||
1422 | |||
1423 | } while(1); |
||
1424 | |||
1425 | PSC_DUMP( |
||
1426 | sblog << "check_interferences: after: \n"; |
||
1427 | dump_regmap(); |
||
1428 | ); |
||
1429 | |||
1430 | return discarded; |
||
1431 | } |
||
1432 | |||
1433 | // add instruction(s) (alu_node or contents of alu_packed_node) to current group |
||
1434 | // returns the number of added instructions on success |
||
1435 | unsigned post_scheduler::try_add_instruction(node *n) { |
||
1436 | |||
1437 | alu_group_tracker &rt = alu.grp(); |
||
1438 | |||
1439 | unsigned avail_slots = rt.avail_slots(); |
||
1440 | |||
1441 | if (n->is_alu_packed()) { |
||
1442 | alu_packed_node *p = static_cast |
||
1443 | unsigned slots = p->get_slot_mask(); |
||
1444 | unsigned cnt = __builtin_popcount(slots); |
||
1445 | |||
1446 | if ((slots & avail_slots) != slots) { |
||
1447 | PSC_DUMP( sblog << " no slots \n"; ); |
||
1448 | return 0; |
||
1449 | } |
||
1450 | |||
1451 | p->update_packed_items(ctx); |
||
1452 | |||
1453 | if (!rt.try_reserve(p)) { |
||
1454 | PSC_DUMP( sblog << " reservation failed \n"; ); |
||
1455 | return 0; |
||
1456 | } |
||
1457 | |||
1458 | p->remove(); |
||
1459 | return cnt; |
||
1460 | |||
1461 | } else { |
||
1462 | alu_node *a = static_cast |
||
1463 | value *d = a->dst.empty() ? NULL : a->dst[0]; |
||
1464 | |||
1465 | if (d && d->is_special_reg()) { |
||
1466 | assert(a->bc.op_ptr->flags & AF_MOVA); |
||
1467 | d = NULL; |
||
1468 | } |
||
1469 | |||
1470 | unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr); |
||
1471 | unsigned slot; |
||
1472 | |||
1473 | allowed_slots &= avail_slots; |
||
1474 | |||
1475 | if (!allowed_slots) |
||
1476 | return 0; |
||
1477 | |||
1478 | if (d) { |
||
1479 | slot = d->get_final_chan(); |
||
1480 | a->bc.dst_chan = slot; |
||
1481 | allowed_slots &= (1 << slot) | 0x10; |
||
1482 | } else { |
||
1483 | if (a->bc.op_ptr->flags & AF_MOVA) { |
||
1484 | if (a->bc.slot_flags & AF_V) |
||
1485 | allowed_slots &= (1 << SLOT_X); |
||
1486 | else |
||
1487 | allowed_slots &= (1 << SLOT_TRANS); |
||
1488 | } |
||
1489 | } |
||
1490 | |||
1491 | // FIXME workaround for some problems with MULADD in trans slot on r700, |
||
1492 | // (is it really needed on r600?) |
||
1493 | if ((a->bc.op == ALU_OP3_MULADD || a->bc.op == ALU_OP3_MULADD_IEEE) && |
||
1494 | !ctx.is_egcm()) { |
||
1495 | allowed_slots &= 0x0F; |
||
1496 | } |
||
1497 | |||
1498 | if (!allowed_slots) { |
||
1499 | PSC_DUMP( sblog << " no suitable slots\n"; ); |
||
1500 | return 0; |
||
1501 | } |
||
1502 | |||
1503 | slot = __builtin_ctz(allowed_slots); |
||
1504 | a->bc.slot = slot; |
||
1505 | |||
1506 | PSC_DUMP( sblog << "slot: " << slot << "\n"; ); |
||
1507 | |||
1508 | if (!rt.try_reserve(a)) { |
||
1509 | PSC_DUMP( sblog << " reservation failed\n"; ); |
||
1510 | return 0; |
||
1511 | } |
||
1512 | |||
1513 | a->remove(); |
||
1514 | return 1; |
||
1515 | } |
||
1516 | } |
||
1517 | |||
1518 | bool post_scheduler::check_copy(node *n) { |
||
1519 | if (!n->is_copy_mov()) |
||
1520 | return false; |
||
1521 | |||
1522 | value *s = n->src[0]; |
||
1523 | value *d = n->dst[0]; |
||
1524 | |||
1525 | if (!s->is_sgpr() || !d->is_sgpr()) |
||
1526 | return false; |
||
1527 | |||
1528 | if (!s->is_prealloc()) { |
||
1529 | recolor_local(s); |
||
1530 | } |
||
1531 | |||
1532 | if (s->gpr == d->gpr) { |
||
1533 | |||
1534 | PSC_DUMP( |
||
1535 | sblog << "check_copy: "; |
||
1536 | dump::dump_op(n); |
||
1537 | sblog << "\n"; |
||
1538 | ); |
||
1539 | |||
1540 | rv_map::iterator F = regmap.find(d->gpr); |
||
1541 | bool gpr_free = (F == regmap.end()); |
||
1542 | |||
1543 | if (d->is_prealloc()) { |
||
1544 | if (gpr_free) { |
||
1545 | PSC_DUMP( sblog << " copy not ready...\n";); |
||
1546 | return true; |
||
1547 | } |
||
1548 | |||
1549 | value *rv = F->second; |
||
1550 | if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) { |
||
1551 | PSC_DUMP( sblog << " copy not ready(2)...\n";); |
||
1552 | return true; |
||
1553 | } |
||
1554 | |||
1555 | unmap_dst(static_cast |
||
1556 | } |
||
1557 | |||
1558 | if (s->is_prealloc() && !map_src_val(s)) |
||
1559 | return true; |
||
1560 | |||
1561 | update_live(n, NULL); |
||
1562 | |||
1563 | release_src_values(n); |
||
1564 | n->remove(); |
||
1565 | PSC_DUMP( sblog << " copy coalesced...\n";); |
||
1566 | return true; |
||
1567 | } |
||
1568 | return false; |
||
1569 | } |
||
1570 | |||
1571 | void post_scheduler::dump_group(alu_group_tracker &rt) { |
||
1572 | for (unsigned i = 0; i < 5; ++i) { |
||
1573 | node *n = rt.slot(i); |
||
1574 | if (n) { |
||
1575 | sblog << "slot " << i << " : "; |
||
1576 | dump::dump_op(n); |
||
1577 | sblog << "\n"; |
||
1578 | } |
||
1579 | } |
||
1580 | } |
||
1581 | |||
1582 | void post_scheduler::process_ready_copies() { |
||
1583 | |||
1584 | node *last; |
||
1585 | |||
1586 | do { |
||
1587 | last = ready_copies.back(); |
||
1588 | |||
1589 | for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end(); |
||
1590 | I != E; I = N) { |
||
1591 | N = I; ++N; |
||
1592 | |||
1593 | node *n = *I; |
||
1594 | |||
1595 | if (!check_copy(n)) { |
||
1596 | n->remove(); |
||
1597 | ready.push_back(n); |
||
1598 | } |
||
1599 | } |
||
1600 | } while (last != ready_copies.back()); |
||
1601 | |||
1602 | update_local_interferences(); |
||
1603 | } |
||
1604 | |||
1605 | |||
1606 | bool post_scheduler::prepare_alu_group() { |
||
1607 | |||
1608 | alu_group_tracker &rt = alu.grp(); |
||
1609 | |||
1610 | unsigned i1 = 0; |
||
1611 | |||
1612 | PSC_DUMP( |
||
1613 | sblog << "prepare_alu_group: starting...\n"; |
||
1614 | dump_group(rt); |
||
1615 | ); |
||
1616 | |||
1617 | ready.append_from(&alu.conflict_nodes); |
||
1618 | |||
1619 | // FIXME rework this loop |
||
1620 | |||
1621 | do { |
||
1622 | |||
1623 | process_ready_copies(); |
||
1624 | |||
1625 | ++i1; |
||
1626 | |||
1627 | for (node_iterator N, I = ready.begin(), E = ready.end(); I != E; |
||
1628 | I = N) { |
||
1629 | N = I; ++N; |
||
1630 | node *n = *I; |
||
1631 | |||
1632 | PSC_DUMP( |
||
1633 | sblog << "p_a_g: "; |
||
1634 | dump::dump_op(n); |
||
1635 | sblog << "\n"; |
||
1636 | ); |
||
1637 | |||
1638 | |||
1639 | unsigned cnt = try_add_instruction(n); |
||
1640 | |||
1641 | if (!cnt) |
||
1642 | continue; |
||
1643 | |||
1644 | PSC_DUMP( |
||
1645 | sblog << "current group:\n"; |
||
1646 | dump_group(rt); |
||
1647 | ); |
||
1648 | |||
1649 | if (rt.inst_count() == ctx.num_slots) { |
||
1650 | PSC_DUMP( sblog << " all slots used\n"; ); |
||
1651 | break; |
||
1652 | } |
||
1653 | } |
||
1654 | |||
1655 | if (!check_interferences()) |
||
1656 | break; |
||
1657 | |||
1658 | // don't try to add more instructions to the group with mova if this |
||
1659 | // can lead to breaking clause slot count limit - we don't want mova to |
||
1660 | // end up in the end of the new clause instead of beginning of the |
||
1661 | // current clause. |
||
1662 | if (rt.has_ar_load() && alu.total_slots() > 121) |
||
1663 | break; |
||
1664 | |||
1665 | if (rt.inst_count() && i1 > 50) |
||
1666 | break; |
||
1667 | |||
1668 | regmap = prev_regmap; |
||
1669 | |||
1670 | } while (1); |
||
1671 | |||
1672 | PSC_DUMP( |
||
1673 | sblog << " prepare_alu_group done, " << rt.inst_count() |
||
1674 | << " slot(s) \n"; |
||
1675 | |||
1676 | sblog << "$$$$$$$$PAG i1=" << i1 |
||
1677 | << " ready " << ready.count() |
||
1678 | << " pending " << pending.count() |
||
1679 | << " conflicting " << alu.conflict_nodes.count() |
||
1680 | <<"\n"; |
||
1681 | |||
1682 | ); |
||
1683 | |||
1684 | return rt.inst_count(); |
||
1685 | } |
||
1686 | |||
1687 | void post_scheduler::release_src_values(node* n) { |
||
1688 | release_src_vec(n->src, true); |
||
1689 | release_src_vec(n->dst, false); |
||
1690 | } |
||
1691 | |||
1692 | void post_scheduler::release_op(node *n) { |
||
1693 | PSC_DUMP( |
||
1694 | sblog << "release_op "; |
||
1695 | dump::dump_op(n); |
||
1696 | sblog << "\n"; |
||
1697 | ); |
||
1698 | |||
1699 | n->remove(); |
||
1700 | |||
1701 | if (n->is_copy_mov()) { |
||
1702 | ready_copies.push_back(n); |
||
1703 | } else if (n->is_mova() || n->is_pred_set()) { |
||
1704 | ready.push_front(n); |
||
1705 | } else { |
||
1706 | ready.push_back(n); |
||
1707 | } |
||
1708 | } |
||
1709 | |||
1710 | void post_scheduler::release_src_val(value *v) { |
||
1711 | node *d = v->any_def(); |
||
1712 | if (d) { |
||
1713 | if (!--ucm[d]) |
||
1714 | release_op(d); |
||
1715 | } |
||
1716 | } |
||
1717 | |||
1718 | void post_scheduler::release_src_vec(vvec& vv, bool src) { |
||
1719 | |||
1720 | for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { |
||
1721 | value *v = *I; |
||
1722 | if (!v || v->is_readonly()) |
||
1723 | continue; |
||
1724 | |||
1725 | if (v->is_rel()) { |
||
1726 | release_src_val(v->rel); |
||
1727 | release_src_vec(v->muse, true); |
||
1728 | |||
1729 | } else if (src) { |
||
1730 | release_src_val(v); |
||
1731 | } |
||
1732 | } |
||
1733 | } |
||
1734 | |||
1735 | void literal_tracker::reset() { |
||
1736 | memset(lt, 0, sizeof(lt)); |
||
1737 | memset(uc, 0, sizeof(uc)); |
||
1738 | } |
||
1739 | |||
1740 | void rp_gpr_tracker::reset() { |
||
1741 | memset(rp, 0, sizeof(rp)); |
||
1742 | memset(uc, 0, sizeof(uc)); |
||
1743 | } |
||
1744 | |||
1745 | void rp_kcache_tracker::reset() { |
||
1746 | memset(rp, 0, sizeof(rp)); |
||
1747 | memset(uc, 0, sizeof(uc)); |
||
1748 | } |
||
1749 | |||
1750 | void alu_kcache_tracker::reset() { |
||
1751 | memset(kc, 0, sizeof(kc)); |
||
1752 | lines.clear(); |
||
1753 | } |
||
1754 | |||
1755 | void alu_clause_tracker::reset() { |
||
1756 | group = 0; |
||
1757 | slot_count = 0; |
||
1758 | grp0.reset(); |
||
1759 | grp1.reset(); |
||
1760 | } |
||
1761 | |||
1762 | alu_clause_tracker::alu_clause_tracker(shader &sh) |
||
1763 | : sh(sh), kt(sh.get_ctx().hw_class), slot_count(), |
||
1764 | grp0(sh), grp1(sh), |
||
1765 | group(), clause(), |
||
1766 | push_exec_mask(), |
||
1767 | current_ar(), current_pr() {} |
||
1768 | |||
1769 | void alu_clause_tracker::emit_group() { |
||
1770 | |||
1771 | assert(grp().inst_count()); |
||
1772 | |||
1773 | alu_group_node *g = grp().emit(); |
||
1774 | |||
1775 | if (grp().has_update_exec_mask()) { |
||
1776 | assert(!push_exec_mask); |
||
1777 | push_exec_mask = true; |
||
1778 | } |
||
1779 | |||
1780 | assert(g); |
||
1781 | |||
1782 | if (!clause) { |
||
1783 | clause = sh.create_clause(NST_ALU_CLAUSE); |
||
1784 | } |
||
1785 | |||
1786 | clause->push_front(g); |
||
1787 | |||
1788 | slot_count += grp().slot_count(); |
||
1789 | |||
1790 | new_group(); |
||
1791 | |||
1792 | PSC_DUMP( sblog << " #### group emitted\n"; ); |
||
1793 | } |
||
1794 | |||
1795 | void alu_clause_tracker::emit_clause(container_node *c) { |
||
1796 | assert(clause); |
||
1797 | |||
1798 | kt.init_clause(clause->bc); |
||
1799 | |||
1800 | assert(!current_ar); |
||
1801 | assert(!current_pr); |
||
1802 | |||
1803 | if (push_exec_mask) |
||
1804 | clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE); |
||
1805 | |||
1806 | c->push_front(clause); |
||
1807 | |||
1808 | clause = NULL; |
||
1809 | push_exec_mask = false; |
||
1810 | slot_count = 0; |
||
1811 | kt.reset(); |
||
1812 | |||
1813 | PSC_DUMP( sblog << "######### ALU clause emitted\n"; ); |
||
1814 | } |
||
1815 | |||
1816 | bool alu_clause_tracker::check_clause_limits() { |
||
1817 | |||
1818 | alu_group_tracker > = grp(); |
||
1819 | |||
1820 | unsigned slots = gt.slot_count(); |
||
1821 | |||
1822 | // reserving slots to load AR and PR values |
||
1823 | unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0); |
||
1824 | |||
1825 | if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots) |
||
1826 | return false; |
||
1827 | |||
1828 | if (!kt.try_reserve(gt)) |
||
1829 | return false; |
||
1830 | |||
1831 | return true; |
||
1832 | } |
||
1833 | |||
1834 | void alu_clause_tracker::new_group() { |
||
1835 | group = !group; |
||
1836 | grp().reset(); |
||
1837 | } |
||
1838 | |||
1839 | bool alu_clause_tracker::is_empty() { |
||
1840 | return clause == NULL; |
||
1841 | } |
||
1842 | |||
1843 | void literal_tracker::init_group_literals(alu_group_node* g) { |
||
1844 | |||
1845 | g->literals.clear(); |
||
1846 | for (unsigned i = 0; i < 4; ++i) { |
||
1847 | if (!lt[i]) |
||
1848 | break; |
||
1849 | |||
1850 | g->literals.push_back(lt[i]); |
||
1851 | |||
1852 | PSC_DUMP( |
||
1853 | sblog << "literal emitted: " << lt[i].f; |
||
1854 | sblog.print_zw_hex(lt[i].u, 8); |
||
1855 | sblog << " " << lt[i].i << "\n"; |
||
1856 | ); |
||
1857 | } |
||
1858 | } |
||
1859 | |||
1860 | bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) { |
||
1861 | rp_kcache_tracker &kt = gt.kcache(); |
||
1862 | |||
1863 | if (!kt.num_sels()) |
||
1864 | return true; |
||
1865 | |||
1866 | sb_set |
||
1867 | |||
1868 | unsigned nl = kt.get_lines(group_lines); |
||
1869 | assert(nl); |
||
1870 | |||
1871 | sb_set |
||
1872 | lines.add_set(group_lines); |
||
1873 | |||
1874 | if (clause_lines.size() == lines.size()) |
||
1875 | return true; |
||
1876 | |||
1877 | if (update_kc()) |
||
1878 | return true; |
||
1879 | |||
1880 | lines = clause_lines; |
||
1881 | |||
1882 | return false; |
||
1883 | } |
||
1884 | |||
1885 | unsigned rp_kcache_tracker::get_lines(kc_lines& lines) { |
||
1886 | unsigned cnt = 0; |
||
1887 | |||
1888 | for (unsigned i = 0; i < sel_count; ++i) { |
||
1889 | unsigned line = rp[i]; |
||
1890 | |||
1891 | if (!line) |
||
1892 | return cnt; |
||
1893 | |||
1894 | --line; |
||
1895 | line = (sel_count == 2) ? line >> 5 : line >> 6; |
||
1896 | |||
1897 | if (lines.insert(line).second) |
||
1898 | ++cnt; |
||
1899 | } |
||
1900 | return cnt; |
||
1901 | } |
||
1902 | |||
1903 | bool alu_kcache_tracker::update_kc() { |
||
1904 | unsigned c = 0; |
||
1905 | |||
1906 | bc_kcache old_kc[4]; |
||
1907 | memcpy(old_kc, kc, sizeof(kc)); |
||
1908 | |||
1909 | for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) { |
||
1910 | unsigned line = *I; |
||
1911 | unsigned bank = line >> 8; |
||
1912 | |||
1913 | line &= 0xFF; |
||
1914 | |||
1915 | if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line)) |
||
1916 | ++kc[c-1].mode; |
||
1917 | else { |
||
1918 | if (c == max_kcs) { |
||
1919 | memcpy(kc, old_kc, sizeof(kc)); |
||
1920 | return false; |
||
1921 | } |
||
1922 | |||
1923 | kc[c].mode = KC_LOCK_1; |
||
1924 | |||
1925 | kc[c].bank = bank; |
||
1926 | kc[c].addr = line; |
||
1927 | ++c; |
||
1928 | } |
||
1929 | } |
||
1930 | return true; |
||
1931 | } |
||
1932 | |||
1933 | alu_node* alu_clause_tracker::create_ar_load() { |
||
1934 | alu_node *a = sh.create_alu(); |
||
1935 | |||
1936 | // FIXME use MOVA_GPR on R6xx |
||
1937 | |||
1938 | if (sh.get_ctx().uses_mova_gpr) { |
||
1939 | a->bc.set_op(ALU_OP1_MOVA_GPR_INT); |
||
1940 | a->bc.slot = SLOT_TRANS; |
||
1941 | } else { |
||
1942 | a->bc.set_op(ALU_OP1_MOVA_INT); |
||
1943 | a->bc.slot = SLOT_X; |
||
1944 | } |
||
1945 | |||
1946 | a->dst.resize(1); |
||
1947 | a->src.push_back(current_ar); |
||
1948 | |||
1949 | PSC_DUMP( |
||
1950 | sblog << "created AR load: "; |
||
1951 | dump::dump_op(a); |
||
1952 | sblog << "\n"; |
||
1953 | ); |
||
1954 | |||
1955 | return a; |
||
1956 | } |
||
1957 | |||
1958 | void alu_clause_tracker::discard_current_group() { |
||
1959 | PSC_DUMP( sblog << "act::discard_current_group\n"; ); |
||
1960 | grp().discard_all_slots(conflict_nodes); |
||
1961 | } |
||
1962 | |||
1963 | void rp_gpr_tracker::dump() { |
||
1964 | sblog << "=== gpr_tracker dump:\n"; |
||
1965 | for (int c = 0; c < 3; ++c) { |
||
1966 | sblog << "cycle " << c << " "; |
||
1967 | for (int h = 0; h < 4; ++h) { |
||
1968 | sblog << rp[c][h] << ":" << uc[c][h] << " "; |
||
1969 | } |
||
1970 | sblog << "\n"; |
||
1971 | } |
||
1972 | } |
||
1973 | |||
1974 | } // namespace r600_sb><>><>><>><>><>>><>><>><>>><>><>><>><>>><>><>><>><>><>>><>><>><>><>"\n"; |