Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2013 Vadim Girlin 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Vadim Girlin
25
 */
26
 
27
#define RA_DEBUG 0
28
 
29
#if RA_DEBUG
30
#define RA_DUMP(q) do { q } while (0)
31
#else
32
#define RA_DUMP(q)
33
#endif
34
 
35
#include "sb_shader.h"
36
#include "sb_pass.h"
37
 
38
namespace r600_sb {
39
 
40
int ra_coalesce::run() {
41
	return sh.coal.run();
42
}
43
 
44
void coalescer::add_edge(value* a, value* b, unsigned cost) {
45
	assert(a->is_sgpr() && b->is_sgpr());
46
	edges.insert(new ra_edge(a,b, cost));
47
}
48
 
49
void coalescer::create_chunk(value *v) {
50
 
51
	assert(v->is_sgpr());
52
 
53
	ra_chunk *c = new ra_chunk();
54
 
55
	c->values.push_back(v);
56
 
57
	if (v->is_chan_pinned())
58
		c->flags |= RCF_PIN_CHAN;
59
	if (v->is_reg_pinned()) {
60
		c->flags |= RCF_PIN_REG;
61
	}
62
 
63
	c->pin = v->pin_gpr;
64
 
65
	RA_DUMP(
66
		sblog << "create_chunk: ";
67
		dump_chunk(c);
68
	);
69
 
70
	all_chunks.push_back(c);
71
	v->chunk = c;
72
 
73
}
74
 
75
void coalescer::unify_chunks(ra_edge *e) {
76
	ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
77
 
78
	RA_DUMP(
79
		sblog << "unify_chunks: ";
80
		dump_chunk(c1);
81
		dump_chunk(c2);
82
	);
83
 
84
	if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
85
		c1->flags |= RCF_PIN_CHAN;
86
		c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
87
	}
88
 
89
	if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
90
		c1->flags |= RCF_PIN_REG;
91
		c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
92
	}
93
 
94
	c1->values.reserve(c1->values.size() + c2->values.size());
95
 
96
	for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
97
			++I) {
98
		(*I)->chunk = c1;
99
		c1->values.push_back(*I);
100
	}
101
 
102
	chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
103
	assert(F != all_chunks.end());
104
 
105
	all_chunks.erase(F);
106
 
107
	c1->cost += c2->cost + e->cost;
108
	delete c2;
109
}
110
 
111
bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
112
	unsigned pin_flags = (c1->flags & c2->flags) &
113
			(RCF_PIN_CHAN | RCF_PIN_REG);
114
 
115
	if ((pin_flags & RCF_PIN_CHAN) &&
116
			c1->pin.chan() != c2->pin.chan())
117
		return true;
118
 
119
	if ((pin_flags & RCF_PIN_REG) &&
120
			c1->pin.sel() != c2->pin.sel())
121
		return true;
122
 
123
	for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
124
			++I) {
125
		value *v1 = *I;
126
 
127
		for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
128
				++I) {
129
			value *v2 = *I;
130
 
131
			if (!v1->v_equal(v2) && v1->interferences.contains(v2))
132
				return true;
133
		}
134
	}
135
	return false;
136
}
137
 
138
void coalescer::build_chunks() {
139
 
140
	for (edge_queue::iterator I = edges.begin(), E = edges.end();
141
			I != E; ++I) {
142
 
143
		ra_edge *e = *I;
144
 
145
		if (!e->a->chunk)
146
			create_chunk(e->a);
147
 
148
		if (!e->b->chunk)
149
			create_chunk(e->b);
150
 
151
		ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
152
 
153
		if (c1 == c2) {
154
			c1->cost += e->cost;
155
		} else if (!chunks_interference(c1, c2))
156
			unify_chunks(e);
157
	}
158
}
159
 
160
ra_constraint* coalescer::create_constraint(constraint_kind kind) {
161
	ra_constraint *c = new ra_constraint(kind);
162
	all_constraints.push_back(c);
163
	return c;
164
}
165
 
166
void coalescer::dump_edges() {
167
	sblog << "######## affinity edges\n";
168
 
169
	for (edge_queue::iterator I = edges.begin(), E = edges.end();
170
			I != E; ++I) {
171
		ra_edge* e = *I;
172
		sblog << "  ra_edge ";
173
		dump::dump_val(e->a);
174
		sblog << " <-> ";
175
		dump::dump_val(e->b);
176
		sblog << "   cost = " << e->cost << "\n";
177
	}
178
}
179
 
180
void coalescer::dump_chunks() {
181
	sblog << "######## chunks\n";
182
 
183
	for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
184
			I != E; ++I) {
185
		ra_chunk* c = *I;
186
		dump_chunk(c);
187
	}
188
}
189
 
190
 
191
void coalescer::dump_constraint_queue() {
192
	sblog << "######## constraints\n";
193
 
194
	for (constraint_queue::iterator I = constraints.begin(),
195
			E = constraints.end(); I != E; ++I) {
196
		ra_constraint* c = *I;
197
		dump_constraint(c);
198
	}
199
}
200
 
201
void coalescer::dump_chunk(ra_chunk* c) {
202
	sblog << "  ra_chunk cost = " << c->cost << "  :  ";
203
	dump::dump_vec(c->values);
204
 
205
	if (c->flags & RCF_PIN_REG)
206
		sblog << "   REG = " << c->pin.sel();
207
 
208
	if (c->flags & RCF_PIN_CHAN)
209
		sblog << "   CHAN = " << c->pin.chan();
210
 
211
	sblog << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
212
 
213
	sblog << "\n";
214
}
215
 
216
void coalescer::dump_constraint(ra_constraint* c) {
217
	sblog << "  ra_constraint: ";
218
	switch (c->kind) {
219
		case CK_PACKED_BS: sblog << "PACKED_BS"; break;
220
		case CK_PHI: sblog << "PHI"; break;
221
		case CK_SAME_REG: sblog << "SAME_REG"; break;
222
		default: sblog << "UNKNOWN_KIND"; assert(0); break;
223
	}
224
 
225
	sblog << "  cost = " << c->cost << "  : ";
226
	dump::dump_vec(c->values);
227
 
228
	sblog << "\n";
229
}
230
 
231
void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
232
 
233
	for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
234
			++I) {
235
		value *v = *I;
236
		s.add_set(v->interferences);
237
	}
238
	s.remove_vec(c->values);
239
}
240
 
241
void coalescer::build_chunk_queue() {
242
	for (chunk_vec::iterator I = all_chunks.begin(),
243
			E = all_chunks.end(); I != E; ++I) {
244
		ra_chunk *c = *I;
245
 
246
		if (!c->is_fixed())
247
			chunks.insert(c);
248
	}
249
}
250
 
251
void coalescer::build_constraint_queue() {
252
	for (constraint_vec::iterator I = all_constraints.begin(),
253
			E = all_constraints.end(); I != E; ++I) {
254
		ra_constraint *c = *I;
255
		unsigned cost = 0;
256
 
257
		if (c->values.empty() || !c->values.front()->is_sgpr())
258
			continue;
259
 
260
		if (c->kind != CK_SAME_REG)
261
			continue;
262
 
263
		for (vvec::iterator I = c->values.begin(), E = c->values.end();
264
				I != E; ++I) {
265
			value *v = *I;
266
			if (!v->chunk)
267
				create_chunk(v);
268
			else
269
				cost += v->chunk->cost;
270
		}
271
		c->cost = cost;
272
		constraints.insert(c);
273
	}
274
}
275
 
276
void coalescer::color_chunks() {
277
 
278
	for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
279
			I != E; ++I) {
280
		ra_chunk *c = *I;
281
		if (c->is_fixed() || c->values.size() == 1)
282
			continue;
283
 
284
		sb_bitset rb;
285
		val_set interf;
286
 
287
		get_chunk_interferences(c, interf);
288
 
289
		RA_DUMP(
290
			sblog << "color_chunks: ";
291
			dump_chunk(c);
292
			sblog << "\n interferences: ";
293
			dump::dump_set(sh,interf);
294
			sblog << "\n";
295
		);
296
 
297
		init_reg_bitset(rb, interf);
298
 
299
		unsigned pass = c->is_reg_pinned() ? 0 : 1;
300
 
301
		unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
302
		unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
303
 
304
		unsigned color = 0;
305
 
306
		while (pass < 2) {
307
 
308
			unsigned rs, re;
309
 
310
			if (pass == 0) {
311
				rs = c->pin.sel();
312
				re = rs + 1;
313
			} else {
314
				rs = 0;
315
				re = sh.num_nontemp_gpr();
316
			}
317
 
318
			for (unsigned reg = rs; reg < re; ++reg) {
319
				for (unsigned chan = cs; chan < ce; ++chan) {
320
					unsigned bit = sel_chan(reg, chan);
321
					if (bit >= rb.size() || !rb.get(bit)) {
322
						color = bit;
323
						break;
324
					}
325
				}
326
				if (color)
327
					break;
328
			}
329
 
330
			if (color)
331
				break;
332
 
333
			++pass;
334
		}
335
 
336
		assert(color);
337
		color_chunk(c, color);
338
	}
339
}
340
 
341
void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
342
 
343
	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
344
		value *v = *I;
345
 
346
		if (!v->is_any_gpr())
347
			continue;
348
 
349
		unsigned gpr = v->get_final_gpr();
350
		if (!gpr)
351
			continue;
352
 
353
		if (gpr) {
354
			if (gpr >= bs.size())
355
				bs.resize(gpr + 64);
356
			bs.set(gpr, 1);
357
		}
358
	}
359
}
360
 
361
void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
362
 
363
	vvec vv = c->values;
364
 
365
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
366
			++I) {
367
		value *v = *I;
368
 
369
		if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
370
			detach_value(v);
371
			continue;
372
		}
373
 
374
		if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
375
			detach_value(v);
376
			continue;
377
		}
378
 
379
		v->gpr = color;
380
 
381
		if (v->constraint && v->constraint->kind == CK_PHI)
382
			v->fix();
383
 
384
 
385
		RA_DUMP(
386
			sblog << " assigned " << color << " to ";
387
			dump::dump_val(v);
388
			sblog << "\n";
389
		);
390
	}
391
 
392
	c->pin = color;
393
 
394
	if (c->is_reg_pinned()) {
395
		c->fix();
396
	}
397
}
398
 
399
coalescer::~coalescer() {
400
 
401
	// FIXME use pool allocator ??
402
 
403
	for (constraint_vec::iterator I = all_constraints.begin(),
404
			E = all_constraints.end(); I != E; ++I) {
405
		delete (*I);
406
	}
407
 
408
	for (chunk_vec::iterator I = all_chunks.begin(),
409
			E = all_chunks.end(); I != E; ++I) {
410
		delete (*I);
411
	}
412
 
413
	for (edge_queue::iterator I = edges.begin(), E = edges.end();
414
			I != E; ++I) {
415
		delete (*I);
416
	}
417
}
418
 
419
int coalescer::run() {
420
	int r;
421
 
422
	RA_DUMP( dump_edges(); );
423
 
424
	build_chunks();
425
	RA_DUMP( dump_chunks(); );
426
 
427
	build_constraint_queue();
428
	RA_DUMP( dump_constraint_queue(); );
429
 
430
	if ((r = color_constraints()))
431
		return r;
432
 
433
	build_chunk_queue();
434
	color_chunks();
435
 
436
	return 0;
437
}
438
 
439
void coalescer::color_phi_constraint(ra_constraint* c) {
440
}
441
 
442
ra_chunk* coalescer::detach_value(value *v) {
443
 
444
	vvec::iterator F = std::find(v->chunk->values.begin(),
445
	                             v->chunk->values.end(), v);
446
 
447
	assert(F != v->chunk->values.end());
448
	v->chunk->values.erase(F);
449
	create_chunk(v);
450
 
451
	if (v->is_reg_pinned()) {
452
		v->chunk->fix();
453
	}
454
 
455
	RA_DUMP(
456
		sblog << "           detached : ";
457
		dump_chunk(v->chunk);
458
	);
459
 
460
	return v->chunk;
461
 
462
}
463
 
464
int coalescer::color_reg_constraint(ra_constraint *c) {
465
	unsigned k, cnt = c->values.size();
466
	vvec & cv = c->values;
467
 
468
	ra_chunk *ch[4];
469
	unsigned swz[4] = {0, 1, 2, 3};
470
	val_set interf[4];
471
	sb_bitset rb[4];
472
 
473
	bool reg_pinned = false;
474
	unsigned pin_reg = ~0;
475
 
476
	unsigned chan_mask = 0;
477
 
478
	k = 0;
479
	for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
480
		value *v = *I;
481
 
482
		if (!v->chunk)
483
			create_chunk(v);
484
 
485
		ch[k] = v->chunk;
486
 
487
		if (v->chunk->is_chan_pinned()) {
488
			unsigned chan = 1 << v->chunk->pin.chan();
489
 
490
			if (chan & chan_mask) { // channel already in use
491
				ch[k] = detach_value(v);
492
				assert(!ch[k]->is_chan_pinned());
493
			} else {
494
				chan_mask |= chan;
495
			}
496
		}
497
 
498
		if (v->chunk->is_reg_pinned()) {
499
			if (!reg_pinned) {
500
				reg_pinned = true;
501
				pin_reg = v->chunk->pin.sel();
502
			}
503
		}
504
 
505
		get_chunk_interferences(ch[k], interf[k]);
506
		init_reg_bitset(rb[k], interf[k]);
507
	}
508
 
509
	unsigned start_reg, end_reg;
510
 
511
	start_reg = 0;
512
	end_reg = sh.num_nontemp_gpr();
513
 
514
	unsigned min_reg = end_reg;
515
	unsigned min_swz[4];
516
	unsigned i, pass = reg_pinned ? 0 : 1;
517
 
518
	bool done = false;
519
 
520
	while (pass < 2) {
521
 
522
		unsigned rs, re;
523
 
524
		if (pass == 0) {
525
			re = pin_reg + 1;
526
			rs = pin_reg;
527
		} else {
528
			re = end_reg;
529
			rs = start_reg;
530
		}
531
 
532
		min_reg = re;
533
 
534
		// cycle on swizzle combinations
535
		do {
536
			for (i = 0; i < cnt; ++i) {
537
				if (ch[i]->flags & RCF_PIN_CHAN)
538
					if (ch[i]->pin.chan() != swz[i])
539
						break;
540
			}
541
			if (i != cnt)
542
				continue;
543
 
544
			// looking for minimal reg number such that the constrained chunks
545
			// may be colored with the current swizzle combination
546
			for (unsigned reg = rs; reg < min_reg; ++reg) {
547
				for (i = 0; i < cnt; ++i) {
548
					unsigned bit = sel_chan(reg, swz[i]);
549
					if (bit < rb[i].size() && rb[i].get(bit))
550
						break;
551
				}
552
				if (i == cnt) {
553
					done = true;
554
					min_reg = reg;
555
					std::copy(swz, swz + 4, min_swz);
556
					break;
557
				}
558
			}
559
 
560
			if (pass == 0 && done)
561
				break;
562
 
563
		} while (std::next_permutation(swz, swz + 4));
564
 
565
		if (!done && pass) {
566
			sblog << "sb: ra_coalesce - out of registers\n";
567
			return -1;
568
		}
569
 
570
		if (pass == 0 && done)
571
			break;
572
 
573
		++pass;
574
	};
575
 
576
	assert(done);
577
 
578
	RA_DUMP(
579
	sblog << "min reg = " << min_reg << "   min_swz = "
580
			<< min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
581
	);
582
 
583
	for (i = 0; i < cnt; ++i) {
584
		sel_chan color(min_reg, min_swz[i]);
585
		ra_chunk *cc = ch[i];
586
 
587
		if (cc->is_fixed()) {
588
			if (cc->pin != color)
589
				cc = detach_value(cv[i]);
590
			else
591
				continue;
592
		}
593
 
594
		color_chunk(cc, color);
595
		cc->fix();
596
		cc->set_prealloc();
597
	}
598
 
599
	return 0;
600
}
601
 
602
int coalescer::color_constraints() {
603
	int r;
604
 
605
	for (constraint_queue::iterator I = constraints.begin(),
606
			E = constraints.end(); I != E; ++I) {
607
 
608
		ra_constraint *c = *I;
609
 
610
		RA_DUMP(
611
			sblog << "color_constraints: ";
612
			dump_constraint(c);
613
		);
614
 
615
		if (c->kind == CK_SAME_REG) {
616
			if ((r = color_reg_constraint(c)))
617
				return r;
618
		} else if (c->kind == CK_PHI)
619
			color_phi_constraint(c);
620
	}
621
	return 0;
622
}
623
 
624
} // namespace r600_sb