Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2013 Vadim Girlin 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Vadim Girlin
25
 */
26
 
27
#define PSC_DEBUG 0
28
 
29
#if PSC_DEBUG
30
#define PSC_DUMP(a) do { a } while (0)
31
#else
32
#define PSC_DUMP(a)
33
#endif
34
 
35
#include "sb_bc.h"
36
#include "sb_shader.h"
37
#include "sb_pass.h"
38
#include "sb_sched.h"
39
 
40
namespace r600_sb {
41
 
42
rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(),
43
		// FIXME: for now we'll use "two const pairs" limit for r600, same as
44
		// for other chips, otherwise additional check in alu_group_tracker is
45
		// required to make sure that all 4 consts in the group fit into 2
46
		// kcache sets
47
		sel_count(2) {}
48
 
49
bool rp_kcache_tracker::try_reserve(sel_chan r) {
50
	unsigned sel = kc_sel(r);
51
 
52
	for (unsigned i = 0; i < sel_count; ++i) {
53
		if (rp[i] == 0) {
54
			rp[i] = sel;
55
			++uc[i];
56
			return true;
57
		}
58
		if (rp[i] == sel) {
59
			++uc[i];
60
			return true;
61
		}
62
	}
63
	return false;
64
}
65
 
66
bool rp_kcache_tracker::try_reserve(node* n) {
67
	bool need_unreserve = false;
68
	vvec::iterator I(n->src.begin()), E(n->src.end());
69
 
70
	for (; I != E; ++I) {
71
		value *v = *I;
72
		if (v->is_kcache()) {
73
			if (!try_reserve(v->select))
74
				break;
75
			else
76
				need_unreserve = true;
77
		}
78
	}
79
	if (I == E)
80
		return true;
81
 
82
	if (need_unreserve && I != n->src.begin()) {
83
		do {
84
			--I;
85
			value *v =*I;
86
			if (v->is_kcache())
87
				unreserve(v->select);
88
		} while (I != n->src.begin());
89
	}
90
	return false;
91
}
92
 
93
inline
94
void rp_kcache_tracker::unreserve(node* n) {
95
	vvec::iterator I(n->src.begin()), E(n->src.end());
96
	for (; I != E; ++I) {
97
		value *v = *I;
98
		if (v->is_kcache())
99
			unreserve(v->select);
100
	}
101
}
102
 
103
void rp_kcache_tracker::unreserve(sel_chan r) {
104
	unsigned sel = kc_sel(r);
105
 
106
	for (unsigned i = 0; i < sel_count; ++i)
107
		if (rp[i] == sel) {
108
			if (--uc[i] == 0)
109
				rp[i] = 0;
110
			return;
111
		}
112
	assert(0);
113
	return;
114
}
115
 
116
bool literal_tracker::try_reserve(alu_node* n) {
117
	bool need_unreserve = false;
118
 
119
	vvec::iterator I(n->src.begin()), E(n->src.end());
120
 
121
	for (; I != E; ++I) {
122
		value *v = *I;
123
		if (v->is_literal()) {
124
			if (!try_reserve(v->literal_value))
125
				break;
126
			else
127
				need_unreserve = true;
128
		}
129
	}
130
	if (I == E)
131
		return true;
132
 
133
	if (need_unreserve && I != n->src.begin()) {
134
		do {
135
			--I;
136
			value *v =*I;
137
			if (v->is_literal())
138
				unreserve(v->literal_value);
139
		} while (I != n->src.begin());
140
	}
141
	return false;
142
}
143
 
144
void literal_tracker::unreserve(alu_node* n) {
145
	unsigned nsrc = n->bc.op_ptr->src_count, i;
146
 
147
	for (i = 0; i < nsrc; ++i) {
148
		value *v = n->src[i];
149
		if (v->is_literal())
150
			unreserve(v->literal_value);
151
	}
152
}
153
 
154
bool literal_tracker::try_reserve(literal l) {
155
 
156
	PSC_DUMP( sblog << "literal reserve " << l.u << "  " << l.f << "\n"; );
157
 
158
	for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
159
		if (lt[i] == 0) {
160
			lt[i] = l;
161
			++uc[i];
162
			PSC_DUMP( sblog << "  reserved new uc = " << uc[i] << "\n"; );
163
			return true;
164
		} else if (lt[i] == l) {
165
			++uc[i];
166
			PSC_DUMP( sblog << "  reserved uc = " << uc[i] << "\n"; );
167
			return true;
168
		}
169
	}
170
	PSC_DUMP( sblog << "  failed to reserve literal\n"; );
171
	return false;
172
}
173
 
174
void literal_tracker::unreserve(literal l) {
175
 
176
	PSC_DUMP( sblog << "literal unreserve " << l.u << "  " << l.f << "\n"; );
177
 
178
	for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
179
		if (lt[i] == l) {
180
			if (--uc[i] == 0)
181
				lt[i] = 0;
182
			return;
183
		}
184
	}
185
	assert(0);
186
	return;
187
}
188
 
189
static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) {
190
	static const unsigned swz[VEC_NUM][3] = {
191
		{0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
192
	};
193
	assert(bs < VEC_NUM && src < 3);
194
	return swz[bs][src];
195
}
196
 
197
static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) {
198
	static const unsigned swz[SCL_NUM][3] = {
199
		{2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
200
	};
201
 
202
	if (bs >= SCL_NUM || src >= 3) {
203
		// this prevents gcc warning "array subscript is above array bounds"
204
		// AFAICS we should never hit this path
205
		abort();
206
	}
207
	return swz[bs][src];
208
}
209
 
210
static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) {
211
	return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src);
212
}
213
 
214
inline
215
bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) {
216
	++sel;
217
	if (rp[cycle][chan] == 0) {
218
		rp[cycle][chan] = sel;
219
		++uc[cycle][chan];
220
		return true;
221
	} else if (rp[cycle][chan] == sel) {
222
		++uc[cycle][chan];
223
		return true;
224
	}
225
	return false;
226
}
227
 
228
inline
229
void rp_gpr_tracker::unreserve(alu_node* n) {
230
	unsigned nsrc = n->bc.op_ptr->src_count, i;
231
	unsigned trans = n->bc.slot == SLOT_TRANS;
232
	unsigned bs = n->bc.bank_swizzle;
233
	unsigned opt = !trans
234
			&& n->bc.src[0].sel == n->bc.src[1].sel
235
			&& n->bc.src[0].chan == n->bc.src[1].chan;
236
 
237
	for (i = 0; i < nsrc; ++i) {
238
		value *v = n->src[i];
239
		if (v->is_readonly())
240
			continue;
241
		if (i == 1 && opt)
242
			continue;
243
		unsigned cycle = bs_cycle(trans, bs, i);
244
		unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan);
245
	}
246
}
247
 
248
inline
249
void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) {
250
	++sel;
251
	assert(rp[cycle][chan] == sel && uc[cycle][chan]);
252
	if (--uc[cycle][chan] == 0)
253
		rp[cycle][chan] = 0;
254
}
255
 
256
inline
257
bool rp_gpr_tracker::try_reserve(alu_node* n) {
258
	unsigned nsrc = n->bc.op_ptr->src_count, i;
259
	unsigned trans = n->bc.slot == SLOT_TRANS;
260
	unsigned bs = n->bc.bank_swizzle;
261
	unsigned opt = !trans && nsrc >= 2 &&
262
			n->src[0] == n->src[1];
263
 
264
	bool need_unreserve = false;
265
	unsigned const_count = 0, min_gpr_cycle = 3;
266
 
267
	for (i = 0; i < nsrc; ++i) {
268
		value *v = n->src[i];
269
		if (v->is_readonly()) {
270
			const_count++;
271
			if (trans && const_count == 3)
272
				break;
273
		} else {
274
			if (i == 1 && opt)
275
				continue;
276
 
277
			unsigned cycle = bs_cycle(trans, bs, i);
278
 
279
			if (trans && cycle < min_gpr_cycle)
280
				min_gpr_cycle = cycle;
281
 
282
			if (const_count && cycle < const_count && trans)
283
				break;
284
 
285
			if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan))
286
				break;
287
			else
288
				need_unreserve = true;
289
		}
290
	}
291
 
292
	if ((i == nsrc) && (min_gpr_cycle + 1 > const_count))
293
		return true;
294
 
295
	if (need_unreserve && i--) {
296
		do {
297
			value *v = n->src[i];
298
			if (!v->is_readonly()) {
299
			if (i == 1 && opt)
300
				continue;
301
			unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel,
302
			          n->bc.src[i].chan);
303
			}
304
		} while (i--);
305
	}
306
	return false;
307
}
308
 
309
alu_group_tracker::alu_group_tracker(shader &sh)
310
	: sh(sh), kc(sh),
311
	  gpr(), lt(), slots(),
312
	  max_slots(sh.get_ctx().is_cayman() ? 4 : 5),
313
	  has_mova(), uses_ar(), has_predset(), has_kill(),
314
	  updates_exec_mask(), chan_count(), interp_param(), next_id() {
315
 
316
	available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
317
}
318
 
319
inline
320
sel_chan alu_group_tracker::get_value_id(value* v) {
321
	unsigned &id = vmap[v];
322
	if (!id)
323
		id = ++next_id;
324
	return sel_chan(id, v->get_final_chan());
325
}
326
 
327
inline
328
void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) {
329
	update_flags(n);
330
	slots[slot] = n;
331
	available_slots &= ~(1 << slot);
332
 
333
	unsigned param = n->interp_param();
334
 
335
	if (param) {
336
		assert(!interp_param || interp_param == param);
337
		interp_param = param;
338
	}
339
}
340
 
341
 
342
void alu_group_tracker::discard_all_slots(container_node &removed_nodes) {
343
	PSC_DUMP( sblog << "agt::discard_all_slots\n"; );
344
	discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes);
345
}
346
 
347
void alu_group_tracker::discard_slots(unsigned slot_mask,
348
                                    container_node &removed_nodes) {
349
 
350
	PSC_DUMP(
351
		sblog << "discard_slots : packed_ops : "
352
			<< (unsigned)packed_ops.size() << "\n";
353
	);
354
 
355
	for (node_vec::iterator N, I = packed_ops.begin();
356
			I != packed_ops.end(); I = N) {
357
		N = I; ++N;
358
 
359
		alu_packed_node *n = static_cast(*I);
360
		unsigned pslots = n->get_slot_mask();
361
 
362
		PSC_DUMP(
363
			sblog << "discard_slots : packed slot_mask : " << pslots << "\n";
364
		);
365
 
366
		if (pslots & slot_mask) {
367
 
368
			PSC_DUMP(
369
				sblog << "discard_slots : discarding packed...\n";
370
			);
371
 
372
			removed_nodes.push_back(n);
373
			slot_mask &= ~pslots;
374
			N = packed_ops.erase(I);
375
			available_slots |= pslots;
376
			for (unsigned k = 0; k < max_slots; ++k) {
377
				if (pslots & (1 << k))
378
					slots[k] = NULL;
379
			}
380
		}
381
	}
382
 
383
	for (unsigned slot = 0; slot < max_slots; ++slot) {
384
		unsigned slot_bit = 1 << slot;
385
 
386
		if (slot_mask & slot_bit) {
387
			assert(!(available_slots & slot_bit));
388
			assert(slots[slot]);
389
 
390
			assert(!(slots[slot]->bc.slot_flags & AF_4SLOT));
391
 
392
			PSC_DUMP(
393
				sblog << "discarding slot " << slot << " : ";
394
				dump::dump_op(slots[slot]);
395
				sblog << "\n";
396
			);
397
 
398
			removed_nodes.push_back(slots[slot]);
399
			slots[slot] = NULL;
400
			available_slots |= slot_bit;
401
		}
402
	}
403
 
404
	alu_node *t = slots[4];
405
	if (t && (t->bc.slot_flags & AF_V)) {
406
		unsigned chan = t->bc.dst_chan;
407
		if (!slots[chan]) {
408
			PSC_DUMP(
409
				sblog << "moving ";
410
				dump::dump_op(t);
411
				sblog << " from trans slot to free slot " << chan << "\n";
412
			);
413
 
414
			slots[chan] = t;
415
			slots[4] = NULL;
416
			t->bc.slot = chan;
417
		}
418
	}
419
 
420
	reinit();
421
}
422
 
423
alu_group_node* alu_group_tracker::emit() {
424
 
425
	alu_group_node *g = sh.create_alu_group();
426
 
427
	lt.init_group_literals(g);
428
 
429
	for (unsigned i = 0; i < max_slots; ++i) {
430
		alu_node *n = slots[i];
431
		if (n) {
432
			g->push_back(n);
433
		}
434
	}
435
	return g;
436
}
437
 
438
bool alu_group_tracker::try_reserve(alu_node* n) {
439
	unsigned nsrc = n->bc.op_ptr->src_count;
440
	unsigned slot = n->bc.slot;
441
	bool trans = slot == 4;
442
 
443
	if (slots[slot])
444
		return false;
445
 
446
	unsigned flags = n->bc.op_ptr->flags;
447
 
448
	unsigned param = n->interp_param();
449
 
450
	if (param && interp_param && interp_param != param)
451
		return false;
452
 
453
	if ((flags & AF_KILL) && has_predset)
454
		return false;
455
	if ((flags & AF_ANY_PRED) && (has_kill || has_predset))
456
		return false;
457
	if ((flags & AF_MOVA) && (has_mova || uses_ar))
458
		return false;
459
 
460
	if (n->uses_ar() && has_mova)
461
		return false;
462
 
463
	for (unsigned i = 0; i < nsrc; ++i) {
464
 
465
		unsigned last_id = next_id;
466
 
467
		value *v = n->src[i];
468
		if (!v->is_any_gpr() && !v->is_rel())
469
			continue;
470
		sel_chan vid = get_value_id(n->src[i]);
471
 
472
		if (vid > last_id && chan_count[vid.chan()] == 3) {
473
			return false;
474
		}
475
 
476
		n->bc.src[i].sel = vid.sel();
477
		n->bc.src[i].chan = vid.chan();
478
	}
479
 
480
	if (!lt.try_reserve(n))
481
		return false;
482
 
483
	if (!kc.try_reserve(n)) {
484
		lt.unreserve(n);
485
		return false;
486
	}
487
 
488
	unsigned fbs = n->forced_bank_swizzle();
489
 
490
	n->bc.bank_swizzle = 0;
491
 
492
	if (!trans & fbs)
493
		n->bc.bank_swizzle = VEC_210;
494
 
495
	if (gpr.try_reserve(n)) {
496
		assign_slot(slot, n);
497
		return true;
498
	}
499
 
500
	if (!fbs) {
501
		unsigned swz_num = trans ? SCL_NUM : VEC_NUM;
502
		for (unsigned bs = 0; bs < swz_num; ++bs) {
503
			n->bc.bank_swizzle = bs;
504
			if (gpr.try_reserve(n)) {
505
				assign_slot(slot, n);
506
				return true;
507
			}
508
		}
509
	}
510
 
511
	gpr.reset();
512
 
513
	slots[slot] = n;
514
	unsigned forced_swz_slots = 0;
515
	int first_slot = ~0, first_nf = ~0, last_slot = ~0;
516
	unsigned save_bs[5];
517
 
518
	for (unsigned i = 0; i < max_slots; ++i) {
519
		alu_node *a = slots[i];
520
		if (a) {
521
			if (first_slot == ~0)
522
				first_slot = i;
523
			last_slot = i;
524
			save_bs[i] = a->bc.bank_swizzle;
525
			if (a->forced_bank_swizzle()) {
526
				assert(i != SLOT_TRANS);
527
				forced_swz_slots |= (1 << i);
528
				a->bc.bank_swizzle = VEC_210;
529
				if (!gpr.try_reserve(a))
530
					assert("!internal reservation error");
531
			} else {
532
				if (first_nf == ~0)
533
					first_nf = i;
534
 
535
				a->bc.bank_swizzle = 0;
536
			}
537
		}
538
	}
539
 
540
	if (first_nf == ~0) {
541
		assign_slot(slot, n);
542
		return true;
543
	}
544
 
545
	assert(first_slot != ~0 && last_slot != ~0);
546
 
547
	// silence "array subscript is above array bounds" with gcc 4.8
548
	if (last_slot >= 5)
549
		abort();
550
 
551
	int i = first_nf;
552
	alu_node *a = slots[i];
553
	bool backtrack = false;
554
 
555
	while (1) {
556
 
557
		PSC_DUMP(
558
			sblog << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle
559
				<< " bt:" << backtrack << "\n";
560
		);
561
 
562
		if (!backtrack && gpr.try_reserve(a)) {
563
			PSC_DUMP(
564
				sblog << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle
565
					<< "\n";
566
			);
567
 
568
			while ((++i <= last_slot) && !slots[i]);
569
			if (i <= last_slot)
570
				a = slots[i];
571
			else
572
				break;
573
		} else {
574
			bool itrans = i == SLOT_TRANS;
575
			unsigned max_swz = itrans ? SCL_221 : VEC_210;
576
 
577
			if (a->bc.bank_swizzle < max_swz) {
578
				++a->bc.bank_swizzle;
579
 
580
				PSC_DUMP(
581
					sblog << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle
582
						<< "\n";
583
				);
584
 
585
			} else {
586
 
587
				a->bc.bank_swizzle = 0;
588
				while ((--i >= first_nf) && !slots[i]);
589
				if (i < first_nf)
590
					break;
591
				a = slots[i];
592
				PSC_DUMP(
593
					sblog << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle
594
						<< "\n";
595
				);
596
				gpr.unreserve(a);
597
				backtrack = true;
598
 
599
				continue;
600
			}
601
		}
602
		backtrack = false;
603
	}
604
 
605
	if (i == last_slot + 1) {
606
		assign_slot(slot, n);
607
		return true;
608
	}
609
 
610
	// reservation failed, restore previous state
611
	slots[slot] = NULL;
612
	gpr.reset();
613
	for (unsigned i = 0; i < max_slots; ++i) {
614
		alu_node *a = slots[i];
615
		if (a) {
616
			a->bc.bank_swizzle = save_bs[i];
617
			bool b = gpr.try_reserve(a);
618
			assert(b);
619
		}
620
	}
621
 
622
	kc.unreserve(n);
623
	lt.unreserve(n);
624
	return false;
625
}
626
 
627
bool alu_group_tracker::try_reserve(alu_packed_node* p) {
628
	bool need_unreserve = false;
629
	node_iterator I(p->begin()), E(p->end());
630
 
631
	for (; I != E; ++I) {
632
		alu_node *n = static_cast(*I);
633
		if (!try_reserve(n))
634
			break;
635
		else
636
			need_unreserve = true;
637
	}
638
 
639
	if (I == E)  {
640
		packed_ops.push_back(p);
641
		return true;
642
	}
643
 
644
	if (need_unreserve) {
645
		while (--I != E) {
646
			alu_node *n = static_cast(*I);
647
			slots[n->bc.slot] = NULL;
648
		}
649
		reinit();
650
	}
651
	return false;
652
}
653
 
654
void alu_group_tracker::reinit() {
655
	alu_node * s[5];
656
	memcpy(s, slots, sizeof(slots));
657
 
658
	reset(true);
659
 
660
	for (int i = max_slots - 1; i >= 0; --i) {
661
		if (s[i] && !try_reserve(s[i])) {
662
			sblog << "alu_group_tracker: reinit error on slot " << i <<  "\n";
663
			for (unsigned i = 0; i < max_slots; ++i) {
664
				sblog << "  slot " << i << " : ";
665
				if (s[i])
666
					dump::dump_op(s[i]);
667
 
668
				sblog << "\n";
669
			}
670
			assert(!"alu_group_tracker: reinit error");
671
		}
672
	}
673
}
674
 
675
void alu_group_tracker::reset(bool keep_packed) {
676
	kc.reset();
677
	gpr.reset();
678
	lt.reset();
679
	memset(slots, 0, sizeof(slots));
680
	vmap.clear();
681
	next_id = 0;
682
	has_mova = false;
683
	uses_ar = false;
684
	has_predset = false;
685
	has_kill = false;
686
	updates_exec_mask = false;
687
	available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
688
	interp_param = 0;
689
 
690
	chan_count[0] = 0;
691
	chan_count[1] = 0;
692
	chan_count[2] = 0;
693
	chan_count[3] = 0;
694
 
695
	if (!keep_packed)
696
		packed_ops.clear();
697
}
698
 
699
void alu_group_tracker::update_flags(alu_node* n) {
700
	unsigned flags = n->bc.op_ptr->flags;
701
	has_kill |= (flags & AF_KILL);
702
	has_mova |= (flags & AF_MOVA);
703
	has_predset |= (flags & AF_ANY_PRED);
704
	uses_ar |= n->uses_ar();
705
 
706
	if (flags & AF_ANY_PRED) {
707
		if (n->dst[2] != NULL)
708
			updates_exec_mask = true;
709
	}
710
}
711
 
712
int post_scheduler::run() {
713
	run_on(sh.root);
714
	return 0;
715
}
716
 
717
void post_scheduler::run_on(container_node* n) {
718
 
719
	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
720
		if (I->is_container()) {
721
			if (I->subtype == NST_BB) {
722
				bb_node* bb = static_cast(*I);
723
				schedule_bb(bb);
724
			} else {
725
				run_on(static_cast(*I));
726
			}
727
		}
728
	}
729
}
730
 
731
void post_scheduler::init_uc_val(container_node *c, value *v) {
732
	node *d = v->any_def();
733
	if (d && d->parent == c)
734
		++ucm[d];
735
}
736
 
737
void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) {
738
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
739
		value *v = *I;
740
		if (!v || v->is_readonly())
741
			continue;
742
 
743
		if (v->is_rel()) {
744
			init_uc_val(c, v->rel);
745
			init_uc_vec(c, v->muse, true);
746
		} if (src) {
747
			init_uc_val(c, v);
748
		}
749
	}
750
}
751
 
752
unsigned post_scheduler::init_ucm(container_node *c, node *n) {
753
	init_uc_vec(c, n->src, true);
754
	init_uc_vec(c, n->dst, false);
755
 
756
	uc_map::iterator F = ucm.find(n);
757
	return F == ucm.end() ? 0 : F->second;
758
}
759
 
760
void post_scheduler::schedule_bb(bb_node* bb) {
761
	PSC_DUMP(
762
		sblog << "scheduling BB " << bb->id << "\n";
763
		if (!pending.empty())
764
			dump::dump_op_list(&pending);
765
	);
766
 
767
	assert(pending.empty());
768
	assert(bb_pending.empty());
769
	assert(ready.empty());
770
 
771
	bb_pending.append_from(bb);
772
	cur_bb = bb;
773
 
774
	node *n;
775
 
776
	while ((n = bb_pending.back())) {
777
 
778
		PSC_DUMP(
779
			sblog << "post_sched_bb ";
780
			dump::dump_op(n);
781
			sblog << "\n";
782
		);
783
 
784
		if (n->subtype == NST_ALU_CLAUSE) {
785
			n->remove();
786
			process_alu(static_cast(n));
787
			continue;
788
		}
789
 
790
		n->remove();
791
		bb->push_front(n);
792
	}
793
 
794
	this->cur_bb = NULL;
795
}
796
 
797
void post_scheduler::init_regmap() {
798
 
799
	regmap.clear();
800
 
801
	PSC_DUMP(
802
		sblog << "init_regmap: live: ";
803
		dump::dump_set(sh, live);
804
		sblog << "\n";
805
	);
806
 
807
	for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
808
		value *v = *I;
809
		assert(v);
810
		if (!v->is_sgpr() || !v->is_prealloc())
811
			continue;
812
 
813
		sel_chan r = v->gpr;
814
 
815
		PSC_DUMP(
816
			sblog << "init_regmap:  " << r << " <= ";
817
			dump::dump_val(v);
818
			sblog << "\n";
819
		);
820
 
821
		assert(r);
822
		regmap[r] = v;
823
	}
824
}
825
 
826
void post_scheduler::process_alu(container_node *c) {
827
 
828
	ucm.clear();
829
	alu.reset();
830
 
831
	live = c->live_after;
832
 
833
	init_globals(c->live_after, true);
834
	init_globals(c->live_before, true);
835
 
836
	init_regmap();
837
 
838
	update_local_interferences();
839
 
840
	for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) {
841
		N = I;
842
		++N;
843
 
844
		node *n = *I;
845
		unsigned uc = init_ucm(c, n);
846
 
847
		PSC_DUMP(
848
			sblog << "process_alu uc=" << uc << "  ";
849
			dump::dump_op(n);
850
			sblog << "  ";
851
		);
852
 
853
		if (uc) {
854
			n->remove();
855
			pending.push_back(n);
856
			PSC_DUMP( sblog << "pending\n"; );
857
		} else {
858
			release_op(n);
859
		}
860
	}
861
 
862
	schedule_alu(c);
863
}
864
 
865
void post_scheduler::update_local_interferences() {
866
 
867
	PSC_DUMP(
868
		sblog << "update_local_interferences : ";
869
		dump::dump_set(sh, live);
870
		sblog << "\n";
871
	);
872
 
873
 
874
	for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
875
		value *v = *I;
876
		if (v->is_prealloc())
877
			continue;
878
 
879
		v->interferences.add_set(live);
880
	}
881
}
882
 
883
void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) {
884
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
885
		value *v = *I;
886
 
887
		if (!v)
888
			continue;
889
 
890
		if (src && v->is_any_gpr()) {
891
			if (live.add_val(v)) {
892
				if (!v->is_prealloc()) {
893
					if (!cleared_interf.contains(v)) {
894
						PSC_DUMP(
895
							sblog << "clearing interferences for " << *v << "\n";
896
						);
897
						v->interferences.clear();
898
						cleared_interf.add_val(v);
899
					}
900
				}
901
				if (born)
902
					born->add_val(v);
903
			}
904
		} else if (v->is_rel()) {
905
			if (!v->rel->is_any_gpr())
906
				live.add_val(v->rel);
907
			update_live_src_vec(v->muse, born, true);
908
		}
909
	}
910
}
911
 
912
void post_scheduler::update_live_dst_vec(vvec &vv) {
913
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
914
		value *v = *I;
915
		if (!v)
916
			continue;
917
 
918
		if (v->is_rel()) {
919
			update_live_dst_vec(v->mdef);
920
		} else if (v->is_any_gpr()) {
921
			if (!live.remove_val(v)) {
922
				PSC_DUMP(
923
						sblog << "failed to remove ";
924
				dump::dump_val(v);
925
				sblog << " from live : ";
926
				dump::dump_set(sh, live);
927
				sblog << "\n";
928
				);
929
			}
930
		}
931
	}
932
}
933
 
934
void post_scheduler::update_live(node *n, val_set *born) {
935
	update_live_dst_vec(n->dst);
936
	update_live_src_vec(n->src, born, true);
937
	update_live_src_vec(n->dst, born, false);
938
}
939
 
940
void post_scheduler::process_group() {
941
	alu_group_tracker &rt = alu.grp();
942
 
943
	val_set vals_born;
944
 
945
	recolor_locals();
946
 
947
	PSC_DUMP(
948
		sblog << "process_group: live_before : ";
949
		dump::dump_set(sh, live);
950
		sblog << "\n";
951
	);
952
 
953
	for (unsigned s = 0; s < ctx.num_slots; ++s) {
954
		alu_node *n = rt.slot(s);
955
		if (!n)
956
			continue;
957
 
958
		update_live(n, &vals_born);
959
	}
960
 
961
	PSC_DUMP(
962
		sblog << "process_group: live_after : ";
963
		dump::dump_set(sh, live);
964
		sblog << "\n";
965
	);
966
 
967
	update_local_interferences();
968
 
969
	for (unsigned i = 0; i < 5; ++i) {
970
		node *n = rt.slot(i);
971
		if (n && !n->is_mova()) {
972
			release_src_values(n);
973
		}
974
	}
975
}
976
 
977
void post_scheduler::init_globals(val_set &s, bool prealloc) {
978
 
979
	PSC_DUMP(
980
		sblog << "init_globals: ";
981
		dump::dump_set(sh, s);
982
		sblog << "\n";
983
	);
984
 
985
	for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
986
		value *v = *I;
987
		if (v->is_sgpr() && !v->is_global()) {
988
			v->set_global();
989
 
990
			if (prealloc && v->is_fixed()) {
991
				v->set_prealloc();
992
			}
993
		}
994
	}
995
}
996
 
997
void post_scheduler::emit_clause() {
998
 
999
	if (alu.current_ar) {
1000
		emit_load_ar();
1001
		process_group();
1002
		alu.emit_group();
1003
	}
1004
 
1005
	alu.emit_clause(cur_bb);
1006
}
1007
 
1008
void post_scheduler::schedule_alu(container_node *c) {
1009
 
1010
	assert(!ready.empty() || !ready_copies.empty());
1011
 
1012
	while (1) {
1013
 
1014
		prev_regmap = regmap;
1015
 
1016
		if (!prepare_alu_group()) {
1017
			if (alu.current_ar) {
1018
				emit_load_ar();
1019
				continue;
1020
			} else
1021
				break;
1022
		}
1023
 
1024
		if (!alu.check_clause_limits()) {
1025
			regmap = prev_regmap;
1026
			emit_clause();
1027
			init_globals(live, false);
1028
			continue;
1029
		}
1030
 
1031
		process_group();
1032
		alu.emit_group();
1033
	};
1034
 
1035
	if (!alu.is_empty()) {
1036
		emit_clause();
1037
	}
1038
 
1039
	if (!ready.empty()) {
1040
		sblog << "##post_scheduler: unscheduled ready instructions :";
1041
		dump::dump_op_list(&ready);
1042
		assert(!"unscheduled ready instructions");
1043
	}
1044
 
1045
	if (!pending.empty()) {
1046
		sblog << "##post_scheduler: unscheduled pending instructions :";
1047
		dump::dump_op_list(&pending);
1048
		assert(!"unscheduled pending instructions");
1049
	}
1050
}
1051
 
1052
void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
1053
	unsigned chan = v->gpr.chan();
1054
 
1055
	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh);
1056
			I != E; ++I) {
1057
		value *vi = *I;
1058
		sel_chan gpr = vi->get_final_gpr();
1059
 
1060
		if (vi->is_any_gpr() && gpr && vi != v &&
1061
				(!v->chunk || v->chunk != vi->chunk) &&
1062
				vi->is_fixed() && gpr.chan() == chan) {
1063
 
1064
			unsigned r = gpr.sel();
1065
 
1066
			PSC_DUMP(
1067
				sblog << "\tadd_interferences: " << *vi << "\n";
1068
			);
1069
 
1070
			if (rb.size() <= r)
1071
				rb.resize(r + 32);
1072
			rb.set(r);
1073
		}
1074
	}
1075
}
1076
 
1077
void post_scheduler::set_color_local_val(value *v, sel_chan color) {
1078
	v->gpr = color;
1079
 
1080
	PSC_DUMP(
1081
		sblog << "     recolored: ";
1082
		dump::dump_val(v);
1083
		sblog << "\n";
1084
	);
1085
}
1086
 
1087
void post_scheduler::set_color_local(value *v, sel_chan color) {
1088
	if (v->chunk) {
1089
		vvec &vv = v->chunk->values;
1090
		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1091
			value *v2 =*I;
1092
			set_color_local_val(v2, color);
1093
		}
1094
		v->chunk->fix();
1095
	} else {
1096
		set_color_local_val(v, color);
1097
		v->fix();
1098
	}
1099
}
1100
 
1101
bool post_scheduler::recolor_local(value *v) {
1102
 
1103
	sb_bitset rb;
1104
 
1105
	assert(v->is_sgpr());
1106
	assert(!v->is_prealloc());
1107
	assert(v->gpr);
1108
 
1109
	unsigned chan = v->gpr.chan();
1110
 
1111
	PSC_DUMP(
1112
		sblog << "recolor_local: ";
1113
		dump::dump_val(v);
1114
		sblog << "   interferences: ";
1115
		dump::dump_set(sh, v->interferences);
1116
		sblog << "\n";
1117
		if (v->chunk) {
1118
			sblog << "     in chunk: ";
1119
			coalescer::dump_chunk(v->chunk);
1120
			sblog << "\n";
1121
		}
1122
	);
1123
 
1124
	if (v->chunk) {
1125
		for (vvec::iterator I = v->chunk->values.begin(),
1126
				E = v->chunk->values.end(); I != E; ++I) {
1127
			value *v2 = *I;
1128
 
1129
			PSC_DUMP( sblog << "   add_interferences for " << *v2 << " :\n"; );
1130
 
1131
			add_interferences(v, rb, v2->interferences);
1132
		}
1133
	} else {
1134
		add_interferences(v, rb, v->interferences);
1135
	}
1136
 
1137
	PSC_DUMP(
1138
		unsigned sz = rb.size();
1139
		sblog << "registers bits: " << sz;
1140
		for (unsigned r = 0; r < sz; ++r) {
1141
			if ((r & 7) == 0)
1142
				sblog << "\n  " << r << "   ";
1143
			sblog << (rb.get(r) ? 1 : 0);
1144
		}
1145
	);
1146
 
1147
	bool no_temp_gprs = v->is_global();
1148
	unsigned rs, re, pass = no_temp_gprs ? 1 : 0;
1149
 
1150
	while (pass < 2) {
1151
 
1152
		if (pass == 0) {
1153
			rs = sh.first_temp_gpr();
1154
			re = MAX_GPR;
1155
		} else {
1156
			rs = 0;
1157
			re = sh.num_nontemp_gpr();
1158
		}
1159
 
1160
		for (unsigned reg = rs; reg < re; ++reg) {
1161
			if (reg >= rb.size() || !rb.get(reg)) {
1162
				// color found
1163
				set_color_local(v, sel_chan(reg, chan));
1164
				return true;
1165
			}
1166
		}
1167
		++pass;
1168
	}
1169
 
1170
	assert(!"recolor_local failed");
1171
	return true;
1172
}
1173
 
1174
void post_scheduler::emit_load_ar() {
1175
 
1176
	regmap = prev_regmap;
1177
	alu.discard_current_group();
1178
 
1179
	alu_group_tracker &rt = alu.grp();
1180
	alu_node *a = alu.create_ar_load();
1181
 
1182
	if (!rt.try_reserve(a)) {
1183
		sblog << "can't emit AR load : ";
1184
		dump::dump_op(a);
1185
		sblog << "\n";
1186
	}
1187
 
1188
	alu.current_ar = 0;
1189
}
1190
 
1191
bool post_scheduler::unmap_dst_val(value *d) {
1192
 
1193
	if (d == alu.current_ar) {
1194
		emit_load_ar();
1195
		return false;
1196
	}
1197
 
1198
	if (d->is_prealloc()) {
1199
		sel_chan gpr = d->get_final_gpr();
1200
		rv_map::iterator F = regmap.find(gpr);
1201
		value *c = NULL;
1202
		if (F != regmap.end())
1203
			c = F->second;
1204
 
1205
		if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) {
1206
			PSC_DUMP(
1207
				sblog << "dst value conflict : ";
1208
				dump::dump_val(d);
1209
				sblog << "   regmap contains ";
1210
				dump::dump_val(c);
1211
				sblog << "\n";
1212
			);
1213
			assert(!"scheduler error");
1214
			return false;
1215
		} else if (c) {
1216
			regmap.erase(F);
1217
		}
1218
	}
1219
	return true;
1220
}
1221
 
1222
bool post_scheduler::unmap_dst(alu_node *n) {
1223
	value *d = n->dst.empty() ? NULL : n->dst[0];
1224
 
1225
	if (!d)
1226
		return true;
1227
 
1228
	if (!d->is_rel()) {
1229
		if (d && d->is_any_reg()) {
1230
 
1231
			if (d->is_AR()) {
1232
				if (alu.current_ar != d) {
1233
					sblog << "loading wrong ar value\n";
1234
					assert(0);
1235
				} else {
1236
					alu.current_ar = NULL;
1237
				}
1238
 
1239
			} else if (d->is_any_gpr()) {
1240
				if (!unmap_dst_val(d))
1241
					return false;
1242
			}
1243
		}
1244
	} else {
1245
		for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end();
1246
				I != E; ++I) {
1247
			d = *I;
1248
			if (!d)
1249
				continue;
1250
 
1251
			assert(d->is_any_gpr());
1252
 
1253
			if (!unmap_dst_val(d))
1254
				return false;
1255
		}
1256
	}
1257
	return true;
1258
}
1259
 
1260
bool post_scheduler::map_src_val(value *v) {
1261
 
1262
	if (!v->is_prealloc())
1263
		return true;
1264
 
1265
	sel_chan gpr = v->get_final_gpr();
1266
	rv_map::iterator F = regmap.find(gpr);
1267
	value *c = NULL;
1268
	if (F != regmap.end()) {
1269
		c = F->second;
1270
		if (!v->v_equal(c)) {
1271
			PSC_DUMP(
1272
				sblog << "can't map src value ";
1273
				dump::dump_val(v);
1274
				sblog << ", regmap contains ";
1275
				dump::dump_val(c);
1276
				sblog << "\n";
1277
			);
1278
			return false;
1279
		}
1280
	} else {
1281
		regmap.insert(std::make_pair(gpr, v));
1282
	}
1283
	return true;
1284
}
1285
 
1286
bool post_scheduler::map_src_vec(vvec &vv, bool src) {
1287
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1288
		value *v = *I;
1289
		if (!v)
1290
			continue;
1291
 
1292
		if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel())
1293
			continue;
1294
 
1295
		if (v->is_rel()) {
1296
			value *rel = v->rel;
1297
			assert(rel);
1298
 
1299
			if (!rel->is_const()) {
1300
				if (!map_src_vec(v->muse, true))
1301
					return false;
1302
 
1303
				if (rel != alu.current_ar) {
1304
					if (alu.current_ar) {
1305
						PSC_DUMP(
1306
							sblog << "  current_AR is " << *alu.current_ar
1307
								<< "  trying to use " << *rel << "\n";
1308
						);
1309
						return false;
1310
					}
1311
 
1312
					alu.current_ar = rel;
1313
 
1314
					PSC_DUMP(
1315
						sblog << "  new current_AR assigned: " << *alu.current_ar
1316
							<< "\n";
1317
					);
1318
				}
1319
			}
1320
 
1321
		} else if (src) {
1322
			if (!map_src_val(v)) {
1323
				return false;
1324
			}
1325
		}
1326
	}
1327
	return true;
1328
}
1329
 
1330
bool post_scheduler::map_src(alu_node *n) {
1331
	if (!map_src_vec(n->dst, false))
1332
		return false;
1333
 
1334
	if (!map_src_vec(n->src, true))
1335
		return false;
1336
 
1337
	return true;
1338
}
1339
 
1340
void post_scheduler::dump_regmap() {
1341
 
1342
	sblog << "# REGMAP :\n";
1343
 
1344
	for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) {
1345
		sblog << "  # " << I->first << " => " << *(I->second) << "\n";
1346
	}
1347
 
1348
	if (alu.current_ar)
1349
		sblog << "    current_AR: " << *alu.current_ar << "\n";
1350
	if (alu.current_pr)
1351
		sblog << "    current_PR: " << *alu.current_pr << "\n";
1352
}
1353
 
1354
void post_scheduler::recolor_locals() {
1355
	alu_group_tracker &rt = alu.grp();
1356
 
1357
	for (unsigned s = 0; s < ctx.num_slots; ++s) {
1358
		alu_node *n = rt.slot(s);
1359
		if (n) {
1360
			value *d = n->dst[0];
1361
			if (d && d->is_sgpr() && !d->is_prealloc()) {
1362
				recolor_local(d);
1363
			}
1364
		}
1365
	}
1366
}
1367
 
1368
// returns true if there are interferences
1369
bool post_scheduler::check_interferences() {
1370
 
1371
	alu_group_tracker &rt = alu.grp();
1372
 
1373
	unsigned interf_slots;
1374
 
1375
	bool discarded = false;
1376
 
1377
	PSC_DUMP(
1378
			sblog << "check_interferences: before: \n";
1379
	dump_regmap();
1380
	);
1381
 
1382
	do {
1383
 
1384
		interf_slots = 0;
1385
 
1386
		for (unsigned s = 0; s < ctx.num_slots; ++s) {
1387
			alu_node *n = rt.slot(s);
1388
			if (n) {
1389
				if (!unmap_dst(n)) {
1390
					return true;
1391
				}
1392
			}
1393
		}
1394
 
1395
		for (unsigned s = 0; s < ctx.num_slots; ++s) {
1396
			alu_node *n = rt.slot(s);
1397
			if (n) {
1398
				if (!map_src(n)) {
1399
					interf_slots |= (1 << s);
1400
				}
1401
			}
1402
		}
1403
 
1404
		PSC_DUMP(
1405
				for (unsigned i = 0; i < 5; ++i) {
1406
					if (interf_slots & (1 << i)) {
1407
						sblog << "!!!!!! interf slot: " << i << "  : ";
1408
						dump::dump_op(rt.slot(i));
1409
						sblog << "\n";
1410
					}
1411
				}
1412
		);
1413
 
1414
		if (!interf_slots)
1415
			break;
1416
 
1417
		PSC_DUMP( sblog << "ci: discarding slots " << interf_slots << "\n"; );
1418
 
1419
		rt.discard_slots(interf_slots, alu.conflict_nodes);
1420
		regmap = prev_regmap;
1421
		discarded = true;
1422
 
1423
	} while(1);
1424
 
1425
	PSC_DUMP(
1426
		sblog << "check_interferences: after: \n";
1427
		dump_regmap();
1428
	);
1429
 
1430
	return discarded;
1431
}
1432
 
1433
// add instruction(s) (alu_node or contents of alu_packed_node) to current group
1434
// returns the number of added instructions on success
1435
unsigned post_scheduler::try_add_instruction(node *n) {
1436
 
1437
	alu_group_tracker &rt = alu.grp();
1438
 
1439
	unsigned avail_slots = rt.avail_slots();
1440
 
1441
	if (n->is_alu_packed()) {
1442
		alu_packed_node *p = static_cast(n);
1443
		unsigned slots = p->get_slot_mask();
1444
		unsigned cnt = __builtin_popcount(slots);
1445
 
1446
		if ((slots & avail_slots) != slots) {
1447
			PSC_DUMP( sblog << "   no slots \n"; );
1448
			return 0;
1449
		}
1450
 
1451
		p->update_packed_items(ctx);
1452
 
1453
		if (!rt.try_reserve(p)) {
1454
			PSC_DUMP( sblog << "   reservation failed \n"; );
1455
			return 0;
1456
		}
1457
 
1458
		p->remove();
1459
		return cnt;
1460
 
1461
	} else {
1462
		alu_node *a = static_cast(n);
1463
		value *d = a->dst.empty() ? NULL : a->dst[0];
1464
 
1465
		if (d && d->is_special_reg()) {
1466
			assert(a->bc.op_ptr->flags & AF_MOVA);
1467
			d = NULL;
1468
		}
1469
 
1470
		unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr);
1471
		unsigned slot;
1472
 
1473
		allowed_slots &= avail_slots;
1474
 
1475
		if (!allowed_slots)
1476
			return 0;
1477
 
1478
		if (d) {
1479
			slot = d->get_final_chan();
1480
			a->bc.dst_chan = slot;
1481
			allowed_slots &= (1 << slot) | 0x10;
1482
		} else {
1483
			if (a->bc.op_ptr->flags & AF_MOVA) {
1484
				if (a->bc.slot_flags & AF_V)
1485
					allowed_slots &= (1 << SLOT_X);
1486
				else
1487
					allowed_slots &= (1 << SLOT_TRANS);
1488
			}
1489
		}
1490
 
1491
		// FIXME workaround for some problems with MULADD in trans slot on r700,
1492
		// (is it really needed on r600?)
1493
		if ((a->bc.op == ALU_OP3_MULADD || a->bc.op == ALU_OP3_MULADD_IEEE) &&
1494
				!ctx.is_egcm()) {
1495
			allowed_slots &= 0x0F;
1496
		}
1497
 
1498
		if (!allowed_slots) {
1499
			PSC_DUMP( sblog << "   no suitable slots\n"; );
1500
			return 0;
1501
		}
1502
 
1503
		slot = __builtin_ctz(allowed_slots);
1504
		a->bc.slot = slot;
1505
 
1506
		PSC_DUMP( sblog << "slot: " << slot << "\n"; );
1507
 
1508
		if (!rt.try_reserve(a)) {
1509
			PSC_DUMP( sblog << "   reservation failed\n"; );
1510
			return 0;
1511
		}
1512
 
1513
		a->remove();
1514
		return 1;
1515
	}
1516
}
1517
 
1518
bool post_scheduler::check_copy(node *n) {
1519
	if (!n->is_copy_mov())
1520
		return false;
1521
 
1522
	value *s = n->src[0];
1523
	value *d = n->dst[0];
1524
 
1525
	if (!s->is_sgpr() || !d->is_sgpr())
1526
		return false;
1527
 
1528
	if (!s->is_prealloc()) {
1529
		recolor_local(s);
1530
	}
1531
 
1532
	if (s->gpr == d->gpr) {
1533
 
1534
		PSC_DUMP(
1535
			sblog << "check_copy: ";
1536
			dump::dump_op(n);
1537
			sblog << "\n";
1538
		);
1539
 
1540
		rv_map::iterator F = regmap.find(d->gpr);
1541
		bool gpr_free = (F == regmap.end());
1542
 
1543
		if (d->is_prealloc()) {
1544
			if (gpr_free) {
1545
				PSC_DUMP( sblog << "    copy not ready...\n";);
1546
				return true;
1547
			}
1548
 
1549
			value *rv = F->second;
1550
			if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) {
1551
				PSC_DUMP( sblog << "    copy not ready(2)...\n";);
1552
				return true;
1553
			}
1554
 
1555
			unmap_dst(static_cast(n));
1556
		}
1557
 
1558
		if (s->is_prealloc() && !map_src_val(s))
1559
			return true;
1560
 
1561
		update_live(n, NULL);
1562
 
1563
		release_src_values(n);
1564
		n->remove();
1565
		PSC_DUMP( sblog << "    copy coalesced...\n";);
1566
		return true;
1567
	}
1568
	return false;
1569
}
1570
 
1571
void post_scheduler::dump_group(alu_group_tracker &rt) {
1572
	for (unsigned i = 0; i < 5; ++i) {
1573
		node *n = rt.slot(i);
1574
		if (n) {
1575
			sblog << "slot " << i << " : ";
1576
			dump::dump_op(n);
1577
			sblog << "\n";
1578
		}
1579
	}
1580
}
1581
 
1582
void post_scheduler::process_ready_copies() {
1583
 
1584
	node *last;
1585
 
1586
	do {
1587
		last = ready_copies.back();
1588
 
1589
		for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end();
1590
				I != E; I = N) {
1591
			N = I; ++N;
1592
 
1593
			node *n = *I;
1594
 
1595
			if (!check_copy(n)) {
1596
				n->remove();
1597
				ready.push_back(n);
1598
			}
1599
		}
1600
	} while (last != ready_copies.back());
1601
 
1602
	update_local_interferences();
1603
}
1604
 
1605
 
1606
bool post_scheduler::prepare_alu_group() {
1607
 
1608
	alu_group_tracker &rt = alu.grp();
1609
 
1610
	unsigned i1 = 0;
1611
 
1612
	PSC_DUMP(
1613
		sblog << "prepare_alu_group: starting...\n";
1614
		dump_group(rt);
1615
	);
1616
 
1617
	ready.append_from(&alu.conflict_nodes);
1618
 
1619
	// FIXME rework this loop
1620
 
1621
	do {
1622
 
1623
		process_ready_copies();
1624
 
1625
		++i1;
1626
 
1627
		for (node_iterator N, I = ready.begin(), E = ready.end(); I != E;
1628
				I = N) {
1629
			N = I; ++N;
1630
			node *n = *I;
1631
 
1632
			PSC_DUMP(
1633
				sblog << "p_a_g: ";
1634
				dump::dump_op(n);
1635
				sblog << "\n";
1636
			);
1637
 
1638
 
1639
			unsigned cnt = try_add_instruction(n);
1640
 
1641
			if (!cnt)
1642
				continue;
1643
 
1644
			PSC_DUMP(
1645
				sblog << "current group:\n";
1646
				dump_group(rt);
1647
			);
1648
 
1649
			if (rt.inst_count() == ctx.num_slots) {
1650
				PSC_DUMP( sblog << " all slots used\n"; );
1651
				break;
1652
			}
1653
		}
1654
 
1655
		if (!check_interferences())
1656
			break;
1657
 
1658
		// don't try to add more instructions to the group with mova if this
1659
		// can lead to breaking clause slot count limit - we don't want mova to
1660
		// end up in the end of the new clause instead of beginning of the
1661
		// current clause.
1662
		if (rt.has_ar_load() && alu.total_slots() > 121)
1663
			break;
1664
 
1665
		if (rt.inst_count() && i1 > 50)
1666
			break;
1667
 
1668
		regmap = prev_regmap;
1669
 
1670
	} while (1);
1671
 
1672
	PSC_DUMP(
1673
		sblog << " prepare_alu_group done, " << rt.inst_count()
1674
	          << " slot(s) \n";
1675
 
1676
		sblog << "$$$$$$$$PAG i1=" << i1
1677
				<< "  ready " << ready.count()
1678
				<< "  pending " << pending.count()
1679
				<< "  conflicting " << alu.conflict_nodes.count()
1680
				<<"\n";
1681
 
1682
	);
1683
 
1684
	return rt.inst_count();
1685
}
1686
 
1687
void post_scheduler::release_src_values(node* n) {
1688
	release_src_vec(n->src, true);
1689
	release_src_vec(n->dst, false);
1690
}
1691
 
1692
void post_scheduler::release_op(node *n) {
1693
	PSC_DUMP(
1694
		sblog << "release_op ";
1695
		dump::dump_op(n);
1696
		sblog << "\n";
1697
	);
1698
 
1699
	n->remove();
1700
 
1701
	if (n->is_copy_mov()) {
1702
		ready_copies.push_back(n);
1703
	} else if (n->is_mova() || n->is_pred_set()) {
1704
		ready.push_front(n);
1705
	} else {
1706
		ready.push_back(n);
1707
	}
1708
}
1709
 
1710
void post_scheduler::release_src_val(value *v) {
1711
	node *d = v->any_def();
1712
	if (d) {
1713
		if (!--ucm[d])
1714
			release_op(d);
1715
	}
1716
}
1717
 
1718
void post_scheduler::release_src_vec(vvec& vv, bool src) {
1719
 
1720
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
1721
		value *v = *I;
1722
		if (!v || v->is_readonly())
1723
			continue;
1724
 
1725
		if (v->is_rel()) {
1726
			release_src_val(v->rel);
1727
			release_src_vec(v->muse, true);
1728
 
1729
		} else if (src) {
1730
			release_src_val(v);
1731
		}
1732
	}
1733
}
1734
 
1735
void literal_tracker::reset() {
1736
	memset(lt, 0, sizeof(lt));
1737
	memset(uc, 0, sizeof(uc));
1738
}
1739
 
1740
void rp_gpr_tracker::reset() {
1741
	memset(rp, 0, sizeof(rp));
1742
	memset(uc, 0, sizeof(uc));
1743
}
1744
 
1745
void rp_kcache_tracker::reset() {
1746
	memset(rp, 0, sizeof(rp));
1747
	memset(uc, 0, sizeof(uc));
1748
}
1749
 
1750
void alu_kcache_tracker::reset() {
1751
	memset(kc, 0, sizeof(kc));
1752
	lines.clear();
1753
}
1754
 
1755
void alu_clause_tracker::reset() {
1756
	group = 0;
1757
	slot_count = 0;
1758
	grp0.reset();
1759
	grp1.reset();
1760
}
1761
 
1762
alu_clause_tracker::alu_clause_tracker(shader &sh)
1763
	: sh(sh), kt(sh.get_ctx().hw_class), slot_count(),
1764
	  grp0(sh), grp1(sh),
1765
	  group(), clause(),
1766
	  push_exec_mask(),
1767
	  current_ar(), current_pr() {}
1768
 
1769
void alu_clause_tracker::emit_group() {
1770
 
1771
	assert(grp().inst_count());
1772
 
1773
	alu_group_node *g = grp().emit();
1774
 
1775
	if (grp().has_update_exec_mask()) {
1776
		assert(!push_exec_mask);
1777
		push_exec_mask = true;
1778
	}
1779
 
1780
	assert(g);
1781
 
1782
	if (!clause) {
1783
		clause = sh.create_clause(NST_ALU_CLAUSE);
1784
	}
1785
 
1786
	clause->push_front(g);
1787
 
1788
	slot_count += grp().slot_count();
1789
 
1790
	new_group();
1791
 
1792
	PSC_DUMP( sblog << "   #### group emitted\n"; );
1793
}
1794
 
1795
void alu_clause_tracker::emit_clause(container_node *c) {
1796
	assert(clause);
1797
 
1798
	kt.init_clause(clause->bc);
1799
 
1800
	assert(!current_ar);
1801
	assert(!current_pr);
1802
 
1803
	if (push_exec_mask)
1804
		clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE);
1805
 
1806
	c->push_front(clause);
1807
 
1808
	clause = NULL;
1809
	push_exec_mask = false;
1810
	slot_count = 0;
1811
	kt.reset();
1812
 
1813
	PSC_DUMP( sblog << "######### ALU clause emitted\n"; );
1814
}
1815
 
1816
bool alu_clause_tracker::check_clause_limits() {
1817
 
1818
	alu_group_tracker > = grp();
1819
 
1820
	unsigned slots = gt.slot_count();
1821
 
1822
	// reserving slots to load AR and PR values
1823
	unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
1824
 
1825
	if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
1826
		return false;
1827
 
1828
	if (!kt.try_reserve(gt))
1829
		return false;
1830
 
1831
	return true;
1832
}
1833
 
1834
void alu_clause_tracker::new_group() {
1835
	group = !group;
1836
	grp().reset();
1837
}
1838
 
1839
bool alu_clause_tracker::is_empty() {
1840
	return clause == NULL;
1841
}
1842
 
1843
void literal_tracker::init_group_literals(alu_group_node* g) {
1844
 
1845
	g->literals.clear();
1846
	for (unsigned i = 0; i < 4; ++i) {
1847
		if (!lt[i])
1848
			break;
1849
 
1850
		g->literals.push_back(lt[i]);
1851
 
1852
		PSC_DUMP(
1853
			sblog << "literal emitted: " << lt[i].f;
1854
			sblog.print_zw_hex(lt[i].u, 8);
1855
			sblog << "   " << lt[i].i << "\n";
1856
		);
1857
	}
1858
}
1859
 
1860
bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) {
1861
	rp_kcache_tracker &kt = gt.kcache();
1862
 
1863
	if (!kt.num_sels())
1864
		return true;
1865
 
1866
	sb_set group_lines;
1867
 
1868
	unsigned nl = kt.get_lines(group_lines);
1869
	assert(nl);
1870
 
1871
	sb_set clause_lines(lines);
1872
	lines.add_set(group_lines);
1873
 
1874
	if (clause_lines.size() == lines.size())
1875
		return true;
1876
 
1877
	if (update_kc())
1878
		return true;
1879
 
1880
	lines = clause_lines;
1881
 
1882
	return false;
1883
}
1884
 
1885
unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
1886
	unsigned cnt = 0;
1887
 
1888
	for (unsigned i = 0; i < sel_count; ++i) {
1889
		unsigned line = rp[i];
1890
 
1891
		if (!line)
1892
			return cnt;
1893
 
1894
		--line;
1895
		line = (sel_count == 2) ? line >> 5 : line >> 6;
1896
 
1897
		if (lines.insert(line).second)
1898
			++cnt;
1899
	}
1900
	return cnt;
1901
}
1902
 
1903
bool alu_kcache_tracker::update_kc() {
1904
	unsigned c = 0;
1905
 
1906
	bc_kcache old_kc[4];
1907
	memcpy(old_kc, kc, sizeof(kc));
1908
 
1909
	for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
1910
		unsigned line = *I;
1911
		unsigned bank = line >> 8;
1912
 
1913
		line &= 0xFF;
1914
 
1915
		if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line))
1916
			++kc[c-1].mode;
1917
		else {
1918
			if (c == max_kcs) {
1919
				memcpy(kc, old_kc, sizeof(kc));
1920
				return false;
1921
			}
1922
 
1923
			kc[c].mode = KC_LOCK_1;
1924
 
1925
			kc[c].bank = bank;
1926
			kc[c].addr = line;
1927
			++c;
1928
		}
1929
	}
1930
	return true;
1931
}
1932
 
1933
alu_node* alu_clause_tracker::create_ar_load() {
1934
	alu_node *a = sh.create_alu();
1935
 
1936
	// FIXME use MOVA_GPR on R6xx
1937
 
1938
	if (sh.get_ctx().uses_mova_gpr) {
1939
		a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
1940
		a->bc.slot = SLOT_TRANS;
1941
	} else {
1942
		a->bc.set_op(ALU_OP1_MOVA_INT);
1943
		a->bc.slot = SLOT_X;
1944
	}
1945
 
1946
	a->dst.resize(1);
1947
	a->src.push_back(current_ar);
1948
 
1949
	PSC_DUMP(
1950
		sblog << "created AR load: ";
1951
		dump::dump_op(a);
1952
		sblog << "\n";
1953
	);
1954
 
1955
	return a;
1956
}
1957
 
1958
void alu_clause_tracker::discard_current_group() {
1959
	PSC_DUMP( sblog << "act::discard_current_group\n"; );
1960
	grp().discard_all_slots(conflict_nodes);
1961
}
1962
 
1963
void rp_gpr_tracker::dump() {
1964
	sblog << "=== gpr_tracker dump:\n";
1965
	for (int c = 0; c < 3; ++c) {
1966
		sblog << "cycle " << c << "      ";
1967
		for (int h = 0; h < 4; ++h) {
1968
			sblog << rp[c][h] << ":" << uc[c][h] << "   ";
1969
		}
1970
		sblog << "\n";
1971
	}
1972
}
1973
 
1974
} // namespace r600_sb