Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2013 Vadim Girlin 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Vadim Girlin
25
 */
26
 
27
#define RA_DEBUG 0
28
 
29
#if RA_DEBUG
30
#define RA_DUMP(q) do { q } while (0)
31
#else
32
#define RA_DUMP(q)
33
#endif
34
 
35
#include 
36
 
37
#include "sb_bc.h"
38
#include "sb_shader.h"
39
#include "sb_pass.h"
40
 
41
namespace r600_sb {
42
 
43
class regbits {
44
	typedef uint32_t basetype;
45
	static const unsigned bt_bytes = sizeof(basetype);
46
	static const unsigned bt_index_shift = 5;
47
	static const unsigned bt_index_mask = (1u << bt_index_shift) - 1;
48
	static const unsigned bt_bits = bt_bytes << 3;
49
	static const unsigned size = MAX_GPR * 4 / bt_bits;
50
 
51
	basetype dta[size];
52
 
53
	unsigned num_temps;
54
 
55
public:
56
 
57
	regbits(unsigned num_temps) : dta(), num_temps(num_temps) {}
58
	regbits(unsigned num_temps, unsigned value)	: num_temps(num_temps)
59
	{ set_all(value); }
60
 
61
	regbits(shader &sh, val_set &vs) : num_temps(sh.get_ctx().alu_temp_gprs)
62
	{ set_all(1); from_val_set(sh, vs); }
63
 
64
	void set_all(unsigned val);
65
	void from_val_set(shader &sh, val_set &vs);
66
 
67
	void set(unsigned index);
68
	void clear(unsigned index);
69
	bool get(unsigned index);
70
 
71
	void set(unsigned index, unsigned val);
72
 
73
	sel_chan find_free_bit();
74
	sel_chan find_free_chans(unsigned mask);
75
	sel_chan find_free_chan_by_mask(unsigned mask);
76
	sel_chan find_free_array(unsigned size, unsigned mask);
77
 
78
	void dump();
79
};
80
 
81
// =======================================
82
 
83
void regbits::dump() {
84
	for (unsigned i = 0; i < size * bt_bits; ++i) {
85
 
86
		if (!(i & 31))
87
			sblog << "\n";
88
 
89
		if (!(i & 3)) {
90
			sblog.print_w(i / 4, 7);
91
			sblog << " ";
92
		}
93
 
94
		sblog << (get(i) ? 1 : 0);
95
	}
96
}
97
 
98
 
99
void regbits::set_all(unsigned v) {
100
	memset(&dta, v ? 0xFF : 0x00, size * bt_bytes);
101
}
102
 
103
void regbits::from_val_set(shader &sh, val_set& vs) {
104
	val_set &s = vs;
105
	unsigned g;
106
	for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
107
		value *v = *I;
108
		if (v->is_any_gpr()) {
109
			g = v->get_final_gpr();
110
			if (!g)
111
				continue;
112
		} else
113
			continue;
114
 
115
		assert(g);
116
		--g;
117
		assert(g < 512);
118
		clear(g);
119
	}
120
}
121
 
122
void regbits::set(unsigned index) {
123
	unsigned ih = index >> bt_index_shift;
124
	unsigned il = index & bt_index_mask;
125
	dta[ih] |= ((basetype)1u << il);
126
}
127
 
128
void regbits::clear(unsigned index) {
129
	unsigned ih = index >> bt_index_shift;
130
	unsigned il = index & bt_index_mask;
131
	assert(ih < size);
132
	dta[ih] &= ~((basetype)1u << il);
133
}
134
 
135
bool regbits::get(unsigned index) {
136
	unsigned ih = index >> bt_index_shift;
137
	unsigned il = index & bt_index_mask;
138
	return dta[ih] & ((basetype)1u << il);
139
}
140
 
141
void regbits::set(unsigned index, unsigned val) {
142
	unsigned ih = index >> bt_index_shift;
143
	unsigned il = index & bt_index_mask;
144
	basetype bm = 1u << il;
145
	dta[ih] = (dta[ih] & ~bm) | (val << il);
146
}
147
 
148
// free register for ra means the bit is set
149
sel_chan regbits::find_free_bit() {
150
	unsigned elt = 0;
151
	unsigned bit = 0;
152
 
153
	while (elt < size && !dta[elt])
154
		++elt;
155
 
156
	if (elt >= size)
157
		return 0;
158
 
159
	bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift);
160
 
161
	assert(bit < ((MAX_GPR - num_temps) << 2));
162
 
163
	return bit + 1;
164
}
165
 
166
// find free gpr component to use as indirectly addressable array
167
sel_chan regbits::find_free_array(unsigned length, unsigned mask) {
168
	unsigned cc[4] = {};
169
 
170
	// FIXME optimize this. though hopefully we won't have a lot of arrays
171
	for (unsigned a = 0; a < MAX_GPR - num_temps; ++a) {
172
		for(unsigned c = 0; c < MAX_CHAN; ++c) {
173
			if (mask & (1 << c)) {
174
				if (get((a << 2) | c)) {
175
					if (++cc[c] == length)
176
						return sel_chan(a - length + 1, c);
177
				} else {
178
					cc[c] = 0;
179
				}
180
			}
181
		}
182
	}
183
	return 0;
184
}
185
 
186
sel_chan regbits::find_free_chans(unsigned mask) {
187
	unsigned elt = 0;
188
	unsigned bit = 0;
189
 
190
	assert (!(mask & ~0xF));
191
	basetype cd = dta[elt];
192
 
193
	do {
194
		if (!cd) {
195
			if (++elt < size) {
196
				cd = dta[elt];
197
				bit = 0;
198
				continue;
199
			} else
200
				return 0;
201
		}
202
 
203
		unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
204
 
205
		assert (p <= bt_bits - bit);
206
		bit += p;
207
		cd >>= p;
208
 
209
		if ((cd & mask) == mask) {
210
			return ((elt << bt_index_shift) | bit) + 1;
211
		}
212
 
213
		bit += 4;
214
		cd >>= 4;
215
 
216
	} while (1);
217
 
218
	return 0;
219
}
220
 
221
sel_chan regbits::find_free_chan_by_mask(unsigned mask) {
222
	unsigned elt = 0;
223
	unsigned bit = 0;
224
 
225
	assert (!(mask & ~0xF));
226
	basetype cd = dta[elt];
227
 
228
	do {
229
		if (!cd) {
230
			if (++elt < size) {
231
				cd = dta[elt];
232
				bit = 0;
233
				continue;
234
			} else
235
				return 0;
236
		}
237
 
238
		unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
239
 
240
		assert (p <= bt_bits - bit);
241
		bit += p;
242
		cd >>= p;
243
 
244
		if (cd & mask) {
245
			unsigned nb = __builtin_ctz(cd & mask);
246
			unsigned ofs = ((elt << bt_index_shift) | bit);
247
			return nb + ofs + 1;
248
		}
249
 
250
		bit += 4;
251
		cd >>= 4;
252
 
253
	} while (1);
254
 
255
	return 0;
256
}
257
 
258
// ================================
259
 
260
void ra_init::alloc_arrays() {
261
 
262
	gpr_array_vec &ga = sh.arrays();
263
 
264
	for(gpr_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) {
265
		gpr_array *a = *I;
266
 
267
		RA_DUMP(
268
			sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
269
			sblog << "\n";
270
		);
271
 
272
		// skip preallocated arrays (e.g. with preloaded inputs)
273
		if (a->gpr) {
274
			RA_DUMP( sblog << "   FIXED at " << a->gpr << "\n"; );
275
			continue;
276
		}
277
 
278
		bool dead = a->is_dead();
279
 
280
		if (dead) {
281
			RA_DUMP( sblog << "   DEAD\n"; );
282
			continue;
283
		}
284
 
285
		val_set &s = a->interferences;
286
 
287
 
288
		for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
289
			value *v = *I;
290
			if (v->array == a)
291
				s.remove_val(v);
292
		}
293
 
294
		RA_DUMP(
295
			sblog << "  interf: ";
296
			dump::dump_set(sh, s);
297
			sblog << "\n";
298
		);
299
 
300
		regbits rb(sh, s);
301
 
302
		sel_chan base = rb.find_free_array(a->array_size,
303
		                                   (1 << a->base_gpr.chan()));
304
 
305
		RA_DUMP( sblog << "  found base: " << base << "\n"; );
306
 
307
		a->gpr = base;
308
	}
309
}
310
 
311
 
312
int ra_init::run() {
313
 
314
	alloc_arrays();
315
 
316
	ra_node(sh.root);
317
	return 0;
318
}
319
 
320
void ra_init::ra_node(container_node* c) {
321
 
322
	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
323
		node *n = *I;
324
		if (n->type == NT_OP) {
325
			process_op(n);
326
		}
327
		if (n->is_container() && !n->is_alu_packed()) {
328
			ra_node(static_cast(n));
329
		}
330
	}
331
}
332
 
333
void ra_init::process_op(node* n) {
334
 
335
	bool copy = n->is_copy_mov();
336
 
337
	RA_DUMP(
338
		sblog << "ra_init: process_op : ";
339
		dump::dump_op(n);
340
		sblog << "\n";
341
	);
342
 
343
	if (n->is_alu_packed()) {
344
		for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
345
			value *v = *I;
346
			if (v && v->is_sgpr() && v->constraint &&
347
					v->constraint->kind == CK_PACKED_BS) {
348
				color_bs_constraint(v->constraint);
349
				break;
350
			}
351
		}
352
	}
353
 
354
	if (n->is_fetch_inst() || n->is_cf_inst()) {
355
		for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
356
			value *v = *I;
357
			if (v && v->is_sgpr())
358
				color(v);
359
		}
360
	}
361
 
362
	for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) {
363
		value *v = *I;
364
		if (!v)
365
			continue;
366
		if (v->is_sgpr()) {
367
			if (!v->gpr) {
368
				if (copy && !v->constraint) {
369
					value *s = *(n->src.begin() + (I - n->dst.begin()));
370
					assert(s);
371
					if (s->is_sgpr()) {
372
						assign_color(v, s->gpr);
373
					}
374
				} else
375
					color(v);
376
			}
377
		}
378
	}
379
}
380
 
381
void ra_init::color_bs_constraint(ra_constraint* c) {
382
	vvec &vv = c->values;
383
	assert(vv.size() <= 8);
384
 
385
	RA_DUMP(
386
		sblog << "color_bs_constraint: ";
387
		dump::dump_vec(vv);
388
		sblog << "\n";
389
	);
390
 
391
	regbits rb(ctx.alu_temp_gprs);
392
 
393
	unsigned chan_count[4] = {};
394
	unsigned allowed_chans = 0x0F;
395
 
396
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
397
		value *v = *I;
398
		sel_chan gpr = v->get_final_gpr();
399
 
400
		if (!v || v->is_dead())
401
			continue;
402
 
403
		val_set interf;
404
 
405
		if (v->chunk)
406
			sh.coal.get_chunk_interferences(v->chunk, interf);
407
		else
408
			interf = v->interferences;
409
 
410
		RA_DUMP(
411
			sblog << "   processing " << *v << "  interferences : ";
412
			dump::dump_set(sh, interf);
413
			sblog << "\n";
414
		);
415
 
416
		if (gpr) {
417
			unsigned chan = gpr.chan();
418
			if (chan_count[chan] < 3) {
419
				++chan_count[chan];
420
				continue;
421
			} else {
422
				v->flags &= ~VLF_FIXED;
423
				allowed_chans &= ~(1 << chan);
424
				assert(allowed_chans);
425
			}
426
		}
427
 
428
		v->gpr = 0;
429
 
430
		gpr = 1;
431
		rb.set_all(1);
432
 
433
 
434
		rb.from_val_set(sh, interf);
435
 
436
		RA_DUMP(
437
			sblog << "   regbits : ";
438
			rb.dump();
439
			sblog << "\n";
440
		);
441
 
442
		while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) {
443
 
444
			while (rb.get(gpr - 1) == 0)
445
				gpr = gpr + 1;
446
 
447
			RA_DUMP(
448
				sblog << "    trying " << gpr << "\n";
449
			);
450
 
451
			unsigned chan = gpr.chan();
452
			if (chan_count[chan] < 3) {
453
				++chan_count[chan];
454
 
455
				if (v->chunk) {
456
					vvec::iterator F = std::find(v->chunk->values.begin(),
457
					                             v->chunk->values.end(),
458
					                             v);
459
					v->chunk->values.erase(F);
460
					v->chunk = NULL;
461
				}
462
 
463
				assign_color(v, gpr);
464
				break;
465
			} else {
466
				allowed_chans &= ~(1 << chan);
467
			}
468
			gpr = gpr + 1;
469
		}
470
 
471
		if (!gpr) {
472
			sblog << "color_bs_constraint: failed...\n";
473
			assert(!"coloring failed");
474
		}
475
	}
476
}
477
 
478
void ra_init::color(value* v) {
479
 
480
	if (v->constraint && v->constraint->kind == CK_PACKED_BS) {
481
		color_bs_constraint(v->constraint);
482
		return;
483
	}
484
 
485
	if (v->chunk && v->chunk->is_fixed())
486
		return;
487
 
488
	RA_DUMP(
489
		sblog << "coloring ";
490
		dump::dump_val(v);
491
		sblog << "   interferences ";
492
		dump::dump_set(sh, v->interferences);
493
		sblog << "\n";
494
	);
495
 
496
	if (v->is_reg_pinned()) {
497
		assert(v->is_chan_pinned());
498
		assign_color(v, v->pin_gpr);
499
		return;
500
	}
501
 
502
	regbits rb(sh, v->interferences);
503
	sel_chan c;
504
 
505
	if (v->is_chan_pinned()) {
506
		RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << "  ";	);
507
		unsigned mask = 1 << v->pin_gpr.chan();
508
		c = rb.find_free_chans(mask) + v->pin_gpr.chan();
509
	} else {
510
		unsigned cm = get_preferable_chan_mask();
511
		RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; );
512
		c = rb.find_free_chan_by_mask(cm);
513
	}
514
 
515
	assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed");
516
	assign_color(v, c);
517
}
518
 
519
void ra_init::assign_color(value* v, sel_chan c) {
520
	add_prev_chan(c.chan());
521
	v->gpr = c;
522
	RA_DUMP(
523
		sblog << "colored ";
524
		dump::dump_val(v);
525
		sblog << " to " << c << "\n";
526
	);
527
}
528
 
529
// ===================================================
530
 
531
int ra_split::run() {
532
	split(sh.root);
533
	return 0;
534
}
535
 
536
void ra_split::split_phi_src(container_node *loc, container_node *c,
537
                             unsigned id, bool loop) {
538
	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
539
		node *p = *I;
540
		value* &v = p->src[id], *d = p->dst[0];
541
		assert(v);
542
 
543
		if (!d->is_sgpr() || v->is_undef())
544
			continue;
545
 
546
		value *t = sh.create_temp_value();
547
		if (loop && id == 0)
548
			loc->insert_before(sh.create_copy_mov(t, v));
549
		else
550
			loc->push_back(sh.create_copy_mov(t, v));
551
		v = t;
552
 
553
		sh.coal.add_edge(v, d, coalescer::phi_cost);
554
	}
555
}
556
 
557
void ra_split::split_phi_dst(node* loc, container_node *c, bool loop) {
558
	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
559
		node *p = *I;
560
		value* &v = p->dst[0];
561
		assert(v);
562
 
563
		if (!v->is_sgpr())
564
			continue;
565
 
566
		value *t = sh.create_temp_value();
567
		node *cp = sh.create_copy_mov(v, t);
568
		if (loop)
569
			static_cast(loc)->push_front(cp);
570
		else
571
			loc->insert_after(cp);
572
		v = t;
573
	}
574
}
575
 
576
 
577
void ra_split::init_phi_constraints(container_node *c) {
578
	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
579
		node *p = *I;
580
		ra_constraint *cc = sh.coal.create_constraint(CK_PHI);
581
		cc->values.push_back(p->dst[0]);
582
 
583
		for (vvec::iterator I = p->src.begin(), E = p->src.end(); I != E; ++I) {
584
			value *v = *I;
585
			if (v->is_sgpr())
586
				cc->values.push_back(v);
587
		}
588
 
589
		cc->update_values();
590
	}
591
}
592
 
593
void ra_split::split(container_node* n) {
594
 
595
	if (n->type == NT_DEPART) {
596
		depart_node *d = static_cast(n);
597
		if (d->target->phi)
598
			split_phi_src(d, d->target->phi, d->dep_id, false);
599
	} else if (n->type == NT_REPEAT) {
600
		repeat_node *r = static_cast(n);
601
		if (r->target->loop_phi)
602
			split_phi_src(r, r->target->loop_phi, r->rep_id, true);
603
	} else if (n->type == NT_REGION) {
604
		region_node *r = static_cast(n);
605
		if (r->phi) {
606
			split_phi_dst(r, r->phi, false);
607
		}
608
		if (r->loop_phi) {
609
			split_phi_dst(r->get_entry_code_location(), r->loop_phi,
610
					true);
611
			split_phi_src(r, r->loop_phi, 0, true);
612
		}
613
	}
614
 
615
	for (node_riterator N, I = n->rbegin(), E = n->rend(); I != E; I = N) {
616
		N = I;
617
		++N;
618
		node *o = *I;
619
		if (o->type == NT_OP) {
620
			split_op(o);
621
		} else if (o->is_container()) {
622
			split(static_cast(o));
623
		}
624
	}
625
 
626
	if (n->type == NT_REGION) {
627
		region_node *r = static_cast(n);
628
		if (r->phi)
629
			init_phi_constraints(r->phi);
630
		if (r->loop_phi)
631
			init_phi_constraints(r->loop_phi);
632
	}
633
}
634
 
635
void ra_split::split_op(node* n) {
636
	switch(n->subtype) {
637
		case NST_ALU_PACKED_INST:
638
			split_alu_packed(static_cast(n));
639
			break;
640
		case NST_FETCH_INST:
641
		case NST_CF_INST:
642
			split_vector_inst(n);
643
		default:
644
			break;
645
	}
646
}
647
 
648
void ra_split::split_packed_ins(alu_packed_node *n) {
649
	vvec vv = n->src;
650
	vvec sv, dv;
651
 
652
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
653
 
654
		value *&v = *I;
655
 
656
		if (v && v->is_any_gpr() && !v->is_undef()) {
657
 
658
			vvec::iterator F = std::find(sv.begin(), sv.end(), v);
659
			value *t;
660
 
661
			if (F != sv.end()) {
662
				t = *(dv.begin() + (F - sv.begin()));
663
			} else {
664
				t = sh.create_temp_value();
665
				sv.push_back(v);
666
				dv.push_back(t);
667
			}
668
			v = t;
669
		}
670
	}
671
 
672
	unsigned cnt = sv.size();
673
 
674
	if (cnt > 0) {
675
		n->src = vv;
676
		for (vvec::iterator SI = sv.begin(), DI = dv.begin(), SE = sv.end();
677
				SI != SE; ++SI, ++DI) {
678
			n->insert_before(sh.create_copy_mov(*DI, *SI));
679
		}
680
 
681
		ra_constraint *c = sh.coal.create_constraint(CK_PACKED_BS);
682
		c->values = dv;
683
		c->update_values();
684
	}
685
}
686
 
687
// TODO handle other packed ops for cayman
688
void ra_split::split_alu_packed(alu_packed_node* n) {
689
	switch (n->op()) {
690
		case ALU_OP2_DOT4:
691
		case ALU_OP2_CUBE:
692
			split_packed_ins(n);
693
			break;
694
		default:
695
			break;
696
	}
697
}
698
 
699
void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) {
700
	unsigned ch = 0;
701
	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I, ++ch) {
702
 
703
		value* &o = *I;
704
 
705
		if (o) {
706
 
707
			assert(!o->is_dead());
708
 
709
			if (o->is_undef())
710
				continue;
711
 
712
			if (allow_swz && o->is_float_0_or_1())
713
				continue;
714
 
715
			value *t;
716
			vvec::iterator F =
717
					allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end();
718
 
719
			if (F != v2.end()) {
720
				t = *(v1.begin() + (F - v2.begin()));
721
			} else {
722
				t = sh.create_temp_value();
723
 
724
				if (!allow_swz) {
725
					t->flags |= VLF_PIN_CHAN;
726
					t->pin_gpr = sel_chan(0, ch);
727
				}
728
 
729
				v2.push_back(o);
730
				v1.push_back(t);
731
			}
732
			o = t;
733
		}
734
	}
735
}
736
 
737
void ra_split::split_vector_inst(node* n) {
738
	ra_constraint *c;
739
 
740
	bool call_fs = n->is_cf_op(CF_OP_CALL_FS);
741
	bool no_src_swizzle = n->is_cf_inst() && (n->cf_op_flags() & CF_MEM);
742
 
743
	no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) ||
744
			n->is_fetch_op(FETCH_OP_SEMFETCH);
745
 
746
	if (!n->src.empty() && !call_fs) {
747
 
748
		// we may have more than one source vector -
749
		// fetch instructions with FF_USEGRAD have gradient values in
750
		// src vectors 1 (src[4-7] and 2 (src[8-11])
751
 
752
		unsigned nvec = n->src.size() >> 2;
753
		assert(nvec << 2 == n->src.size());
754
 
755
		for (unsigned nv = 0; nv < nvec; ++nv) {
756
			vvec sv, tv, nsrc(4);
757
			unsigned arg_start = nv << 2;
758
 
759
			std::copy(n->src.begin() + arg_start,
760
			          n->src.begin() + arg_start + 4,
761
			          nsrc.begin());
762
 
763
			split_vec(nsrc, tv, sv, !no_src_swizzle);
764
 
765
			unsigned cnt = sv.size();
766
 
767
			if (no_src_swizzle || cnt) {
768
 
769
				std::copy(nsrc.begin(), nsrc.end(), n->src.begin() + arg_start);
770
 
771
				for(unsigned i = 0, s = tv.size(); i < s; ++i) {
772
					n->insert_before(sh.create_copy_mov(tv[i], sv[i]));
773
				}
774
 
775
				c = sh.coal.create_constraint(CK_SAME_REG);
776
				c->values = tv;
777
				c->update_values();
778
			}
779
		}
780
	}
781
 
782
	if (!n->dst.empty()) {
783
		vvec sv, tv, ndst = n->dst;
784
 
785
		split_vec(ndst, tv, sv, true);
786
 
787
		if (sv.size()) {
788
			n->dst = ndst;
789
 
790
			node *lp = n;
791
			for(unsigned i = 0, s = tv.size(); i < s; ++i) {
792
				lp->insert_after(sh.create_copy_mov(sv[i], tv[i]));
793
				lp = lp->next;
794
			}
795
 
796
			if (call_fs) {
797
				for (unsigned i = 0, cnt = tv.size(); i < cnt; ++i) {
798
					value *v = tv[i];
799
					value *s = sv[i];
800
					if (!v)
801
						continue;
802
 
803
					v->flags |= VLF_PIN_REG | VLF_PIN_CHAN;
804
					s->flags &= ~(VLF_PIN_REG | VLF_PIN_CHAN);
805
					sel_chan sel;
806
 
807
					if (s->is_rel()) {
808
						assert(s->rel->is_const());
809
						sel = sel_chan(s->select.sel() +
810
										 s->rel->get_const_value().u,
811
						             s->select.chan());
812
					} else
813
						sel = s->select;
814
 
815
					v->gpr = v->pin_gpr = sel;
816
					v->fix();
817
				}
818
			} else {
819
				c = sh.coal.create_constraint(CK_SAME_REG);
820
				c->values = tv;
821
				c->update_values();
822
			}
823
		}
824
	}
825
}
826
 
827
void ra_init::add_prev_chan(unsigned chan) {
828
	prev_chans = (prev_chans << 4) | (1 << chan);
829
}
830
 
831
unsigned ra_init::get_preferable_chan_mask() {
832
	unsigned i, used_chans = 0;
833
	unsigned chans = prev_chans;
834
 
835
	for (i = 0; i < ra_tune; ++i) {
836
		used_chans |= chans;
837
		chans >>= 4;
838
	}
839
 
840
	return (~used_chans) & 0xF;
841
}
842
 
843
} // namespace r600_sb