Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright 2013 Vadim Girlin 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Vadim Girlin
25
 */
26
 
27
#include "sb_bc.h"
28
#include "sb_shader.h"
29
#include "sb_pass.h"
30
 
31
namespace r600_sb {
32
 
33
shader::shader(sb_context &sctx, shader_target t, unsigned id)
34
: ctx(sctx), next_temp_value_index(temp_regid_offset),
35
  prep_regs_count(), pred_sels(),
36
  regions(), inputs(), undef(), val_pool(sizeof(value)),
37
  pool(), all_nodes(), src_stats(), opt_stats(), errors(),
38
  optimized(), id(id),
39
  coal(*this), bbs(),
40
  target(t), vt(ex), ex(*this), root(),
41
  compute_interferences(),
42
  has_alu_predication(),
43
  uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
44
 
45
bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
46
 
47
	unsigned slot_flags = ctx.alu_slots(n->bc.op);
48
	unsigned slot = n->bc.dst_chan;
49
 
50
	if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
51
			(slot_flags & AF_S))
52
		slot = SLOT_TRANS;
53
 
54
	if (slots[slot])
55
		return false;
56
 
57
	n->bc.slot = slot;
58
	slots[slot] = n;
59
	return true;
60
}
61
 
62
void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
63
                            bool src) {
64
	unsigned chan = 0;
65
	while (comp_mask) {
66
		if (comp_mask & 1) {
67
			value *v = get_gpr_value(src, gpr, chan, false);
68
			v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
69
			if (!v->is_rel()) {
70
				v->gpr = v->pin_gpr = v->select;
71
				v->fix();
72
			}
73
			if (v->array && !v->array->gpr) {
74
				// if pinned value can be accessed with indirect addressing
75
				// pin the entire array to its original location
76
				v->array->gpr = v->array->base_gpr;
77
			}
78
			vec.push_back(v);
79
		}
80
		comp_mask >>= 1;
81
		++chan;
82
	}
83
}
84
 
85
cf_node* shader::create_clause(node_subtype nst) {
86
	cf_node *n = create_cf();
87
 
88
	n->subtype = nst;
89
 
90
	switch (nst) {
91
	case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
92
	case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
93
	case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
94
	default: assert(!"invalid clause type"); break;
95
	}
96
 
97
	n->bc.barrier = 1;
98
	return n;
99
}
100
 
101
void shader::create_bbs() {
102
	create_bbs(root, bbs);
103
}
104
 
105
void shader::expand_bbs() {
106
	expand_bbs(bbs);
107
}
108
 
109
alu_node* shader::create_mov(value* dst, value* src) {
110
	alu_node *n = create_alu();
111
	n->bc.set_op(ALU_OP1_MOV);
112
	n->dst.push_back(dst);
113
	n->src.push_back(src);
114
	dst->def = n;
115
 
116
	return n;
117
}
118
 
119
alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
120
	alu_node *n = create_mov(dst, src);
121
 
122
	dst->assign_source(src);
123
	n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
124
 
125
	if (affcost && dst->is_sgpr() && src->is_sgpr())
126
		coal.add_edge(src, dst, affcost);
127
 
128
	return n;
129
}
130
 
131
value* shader::get_value(value_kind kind, sel_chan id,
132
                         unsigned version) {
133
	if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
134
		return val_pool[id - 1];
135
 
136
 
137
	unsigned key = (kind << 28) | (version << 16) | id;
138
	value_map::iterator i = reg_values.find(key);
139
	if (i != reg_values.end()) {
140
		return i->second;
141
	}
142
	value *v = create_value(kind, id, version);
143
	reg_values.insert(std::make_pair(key, v));
144
	return v;
145
}
146
 
147
value* shader::get_special_value(unsigned sv_id, unsigned version) {
148
	sel_chan id(sv_id, 0);
149
	return get_value(VLK_SPECIAL_REG, id, version);
150
}
151
 
152
void shader::fill_array_values(gpr_array *a, vvec &vv) {
153
	unsigned sz = a->array_size;
154
	vv.resize(sz);
155
	for (unsigned i = 0; i < a->array_size; ++i) {
156
		vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
157
		                      false);
158
	}
159
}
160
 
161
value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
162
                             unsigned version) {
163
	sel_chan id(reg, chan);
164
	value *v;
165
	gpr_array *a = get_gpr_array(reg, chan);
166
	if (rel) {
167
		assert(a);
168
		v = create_value(VLK_REL_REG, id, 0);
169
		v->rel = get_special_value(SV_AR_INDEX);
170
		fill_array_values(a, v->muse);
171
		if (!src)
172
			fill_array_values(a, v->mdef);
173
	} else {
174
		if (version == 0 && reg < prep_regs_count)
175
			return (val_pool[id - 1]);
176
 
177
		v = get_value(VLK_REG, id, version);
178
	}
179
 
180
	v->array = a;
181
	v->pin_gpr = v->select;
182
 
183
	return v;
184
}
185
 
186
value* shader::create_temp_value() {
187
	sel_chan id(++next_temp_value_index, 0);
188
	return get_value(VLK_TEMP, id, 0);
189
}
190
 
191
value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
192
	return get_ro_value(kcache_values, VLK_KCACHE,
193
			sel_chan((bank << 12) | index, chan));
194
}
195
 
196
void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
197
	if (inputs.size() <= gpr)
198
		inputs.resize(gpr+1);
199
 
200
	shader_input &i = inputs[gpr];
201
	i.preloaded = preloaded;
202
	i.comp_mask = comp_mask;
203
 
204
	if (preloaded) {
205
		add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
206
	}
207
 
208
}
209
 
210
void shader::init() {
211
	assert(!root);
212
	root = create_container();
213
}
214
 
215
void shader::init_call_fs(cf_node* cf) {
216
	unsigned gpr = 0;
217
 
218
	assert(target == TARGET_VS || target == TARGET_ES);
219
 
220
	for(inputs_vec::const_iterator I = inputs.begin(),
221
			E = inputs.end(); I != E; ++I, ++gpr) {
222
		if (!I->preloaded)
223
			add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
224
		else
225
			add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
226
	}
227
}
228
 
229
void shader::set_undef(val_set& s) {
230
	value *undefined = get_undef_value();
231
	if (!undefined->gvn_source)
232
		vt.add_value(undefined);
233
 
234
	val_set &vs = s;
235
 
236
	for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
237
		value *v = *I;
238
 
239
		assert(!v->is_readonly() && !v->is_rel());
240
 
241
		v->gvn_source = undefined->gvn_source;
242
	}
243
}
244
 
245
value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
246
	value *v = val_pool.create(k, regid, ver);
247
	return v;
248
}
249
 
250
value* shader::get_undef_value() {
251
	if (!undef)
252
		undef = create_value(VLK_UNDEF, 0, 0);
253
	return undef;
254
}
255
 
256
node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
257
	node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
258
	all_nodes.push_back(n);
259
	return n;
260
}
261
 
262
alu_node* shader::create_alu() {
263
	alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
264
	all_nodes.push_back(n);
265
	return n;
266
}
267
 
268
alu_group_node* shader::create_alu_group() {
269
	alu_group_node* n =
270
			new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
271
	all_nodes.push_back(n);
272
	return n;
273
}
274
 
275
alu_packed_node* shader::create_alu_packed() {
276
	alu_packed_node* n =
277
			new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
278
	all_nodes.push_back(n);
279
	return n;
280
}
281
 
282
cf_node* shader::create_cf() {
283
	cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
284
	n->bc.barrier = 1;
285
	all_nodes.push_back(n);
286
	return n;
287
}
288
 
289
fetch_node* shader::create_fetch() {
290
	fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
291
	all_nodes.push_back(n);
292
	return n;
293
}
294
 
295
region_node* shader::create_region() {
296
	region_node *n = new (pool.allocate(sizeof(region_node)))
297
			region_node(regions.size());
298
	regions.push_back(n);
299
	all_nodes.push_back(n);
300
	return n;
301
}
302
 
303
depart_node* shader::create_depart(region_node* target) {
304
	depart_node* n = new (pool.allocate(sizeof(depart_node)))
305
			depart_node(target, target->departs.size());
306
	target->departs.push_back(n);
307
	all_nodes.push_back(n);
308
	return n;
309
}
310
 
311
repeat_node* shader::create_repeat(region_node* target) {
312
	repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
313
			repeat_node(target, target->repeats.size() + 1);
314
	target->repeats.push_back(n);
315
	all_nodes.push_back(n);
316
	return n;
317
}
318
 
319
container_node* shader::create_container(node_type nt, node_subtype nst,
320
		                                 node_flags flags) {
321
	container_node *n = new (pool.allocate(sizeof(container_node)))
322
			container_node(nt, nst, flags);
323
	all_nodes.push_back(n);
324
	return n;
325
}
326
 
327
if_node* shader::create_if() {
328
	if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
329
	all_nodes.push_back(n);
330
	return n;
331
}
332
 
333
bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
334
	bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
335
	all_nodes.push_back(n);
336
	return n;
337
}
338
 
339
value* shader::get_special_ro_value(unsigned sel) {
340
	return get_ro_value(special_ro_values, VLK_PARAM, sel);
341
}
342
 
343
value* shader::get_const_value(const literal &v) {
344
	value *val = get_ro_value(const_values, VLK_CONST, v);
345
	val->literal_value = v;
346
	return val;
347
}
348
 
349
shader::~shader() {
350
	for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
351
			I != E; ++I)
352
		(*I)->~node();
353
 
354
	for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
355
			I != E; ++I) {
356
		delete *I;
357
	}
358
}
359
 
360
void shader::dump_ir() {
361
	if (ctx.dump_pass)
362
		dump(*this).run();
363
}
364
 
365
value* shader::get_value_version(value* v, unsigned ver) {
366
	assert(!v->is_readonly() && !v->is_rel());
367
	value *vv = get_value(v->kind, v->select, ver);
368
	assert(vv);
369
 
370
	if (v->array) {
371
		vv->array = v->array;
372
	}
373
 
374
	return vv;
375
}
376
 
377
gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
378
 
379
	for (regarray_vec::iterator I = gpr_arrays.begin(),
380
			E = gpr_arrays.end(); I != E; ++I) {
381
		gpr_array* a = *I;
382
		unsigned achan = a->base_gpr.chan();
383
		unsigned areg = a->base_gpr.sel();
384
		if (achan == chan && (reg >= areg && reg < areg+a->array_size))
385
			return a;
386
	}
387
	return NULL;
388
}
389
 
390
void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
391
					   unsigned comp_mask) {
392
	unsigned chan = 0;
393
	while (comp_mask) {
394
		if (comp_mask & 1) {
395
			gpr_array *a = new gpr_array(
396
					sel_chan(gpr_start, chan), gpr_count);
397
 
398
			SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
399
			         << " [" << a->array_size << "]\n";
400
			);
401
 
402
			gpr_arrays.push_back(a);
403
		}
404
		comp_mask >>= 1;
405
		++chan;
406
	}
407
}
408
 
409
value* shader::get_pred_sel(int sel) {
410
	assert(sel == 0 || sel == 1);
411
	if (!pred_sels[sel])
412
		pred_sels[sel] = get_const_value(sel);
413
 
414
	return pred_sels[sel];
415
}
416
 
417
cf_node* shader::create_cf(unsigned op) {
418
	cf_node *c = create_cf();
419
	c->bc.set_op(op);
420
	c->bc.barrier = 1;
421
	return c;
422
}
423
 
424
std::string shader::get_full_target_name() {
425
	std::string s = get_shader_target_name();
426
	s += "/";
427
	s += ctx.get_hw_chip_name();
428
	s += "/";
429
	s += ctx.get_hw_class_name();
430
	return s;
431
}
432
 
433
const char* shader::get_shader_target_name() {
434
	switch (target) {
435
		case TARGET_VS: return "VS";
436
		case TARGET_ES: return "ES";
437
		case TARGET_PS: return "PS";
438
		case TARGET_GS: return "GS";
439
		case TARGET_COMPUTE: return "COMPUTE";
440
		case TARGET_FETCH: return "FETCH";
441
		default:
442
			return "INVALID_TARGET";
443
	}
444
}
445
 
446
void shader::simplify_dep_rep(node* dr) {
447
	container_node *p = dr->parent;
448
	if (p->is_repeat()) {
449
		repeat_node *r = static_cast(p);
450
		r->target->expand_repeat(r);
451
	} else if (p->is_depart()) {
452
		depart_node *d = static_cast(p);
453
		d->target->expand_depart(d);
454
	}
455
	if (dr->next)
456
		dr->parent->cut(dr->next, NULL);
457
}
458
 
459
 
460
// FIXME this is used in some places as the max non-temp gpr,
461
// (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
462
unsigned shader::first_temp_gpr() {
463
	return MAX_GPR - ctx.alu_temp_gprs;
464
}
465
 
466
unsigned shader::num_nontemp_gpr() {
467
	return MAX_GPR - 2 * ctx.alu_temp_gprs;
468
}
469
 
470
void shader::set_uses_kill() {
471
	if (root->src.empty())
472
		root->src.resize(1);
473
 
474
	if (!root->src[0])
475
		root->src[0] = get_special_value(SV_VALID_MASK);
476
}
477
 
478
alu_node* shader::clone(alu_node* n) {
479
	alu_node *c = create_alu();
480
 
481
	// FIXME: this may be wrong with indirect operands
482
	c->src = n->src;
483
	c->dst = n->dst;
484
 
485
	c->bc = n->bc;
486
	c->pred = n->pred;
487
 
488
	return c;
489
}
490
 
491
void shader::collect_stats(bool opt) {
492
	if (!sb_context::dump_stat)
493
		return;
494
 
495
	shader_stats &s = opt ? opt_stats : src_stats;
496
 
497
	s.shaders = 1;
498
	s.ngpr = ngpr;
499
	s.nstack = nstack;
500
	s.collect(root);
501
 
502
	if (opt)
503
		ctx.opt_stats.accumulate(s);
504
	else
505
		ctx.src_stats.accumulate(s);
506
}
507
 
508
value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
509
	value_map::iterator I = vm.find(key);
510
	if (I != vm.end())
511
		return I->second;
512
	value *v = create_value(vk, key, 0);
513
	v->flags = VLF_READONLY;
514
	vm.insert(std::make_pair(key, v));
515
	return v;
516
}
517
 
518
void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
519
 
520
	bool inside_bb = false;
521
	bool last_inside_bb = true;
522
	node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
523
 
524
	for (; I != E; ++I) {
525
		node *k = *I;
526
		inside_bb = k->type == NT_OP;
527
 
528
		if (inside_bb && !last_inside_bb)
529
			bb_start = I;
530
		else if (!inside_bb) {
531
			if (last_inside_bb
532
					&& I->type != NT_REPEAT
533
					&& I->type != NT_DEPART
534
					&& I->type != NT_IF) {
535
				bb_node *bb = create_bb(bbs.size(), loop_level);
536
				bbs.push_back(bb);
537
				n->insert_node_before(*bb_start, bb);
538
				if (bb_start != I)
539
					bb->move(bb_start, I);
540
			}
541
 
542
			if (k->is_container()) {
543
 
544
				bool loop = false;
545
				if (k->type == NT_REGION) {
546
					loop = static_cast(k)->is_loop();
547
				}
548
 
549
				create_bbs(static_cast(k), bbs,
550
				           loop_level + loop);
551
			}
552
		}
553
 
554
		if (k->type == NT_DEPART)
555
			return;
556
 
557
		last_inside_bb = inside_bb;
558
	}
559
 
560
	if (last_inside_bb) {
561
		bb_node *bb = create_bb(bbs.size(), loop_level);
562
		bbs.push_back(bb);
563
		if (n->empty())
564
				n->push_back(bb);
565
		else {
566
			n->insert_node_before(*bb_start, bb);
567
			if (bb_start != n->end())
568
				bb->move(bb_start, n->end());
569
		}
570
	} else {
571
		if (n->last && n->last->type == NT_IF) {
572
			bb_node *bb = create_bb(bbs.size(), loop_level);
573
			bbs.push_back(bb);
574
			n->push_back(bb);
575
		}
576
	}
577
}
578
 
579
void shader::expand_bbs(bbs_vec &bbs) {
580
 
581
	for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
582
		bb_node *b = *I;
583
		b->expand();
584
	}
585
}
586
 
587
sched_queue_id shader::get_queue_id(node* n) {
588
	switch (n->subtype) {
589
		case NST_ALU_INST:
590
		case NST_ALU_PACKED_INST:
591
		case NST_COPY:
592
		case NST_PSI:
593
			return SQ_ALU;
594
		case NST_FETCH_INST: {
595
			fetch_node *f = static_cast(n);
596
			if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
597
				return SQ_VTX;
598
			return SQ_TEX;
599
		}
600
		case NST_CF_INST:
601
			return SQ_CF;
602
		default:
603
			assert(0);
604
			return SQ_NUM;
605
	}
606
}
607
 
608
void shader_stats::collect(node *n) {
609
	if (n->is_alu_inst())
610
		++alu;
611
	else if (n->is_fetch_inst())
612
		++fetch;
613
	else if (n->is_container()) {
614
		container_node *c = static_cast(n);
615
 
616
		if (n->is_alu_group())
617
			++alu_groups;
618
		else if (n->is_alu_clause())
619
			++alu_clauses;
620
		else if (n->is_fetch_clause())
621
			++fetch_clauses;
622
		else if (n->is_cf_inst())
623
			++cf;
624
 
625
		if (!c->empty()) {
626
			for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
627
				collect(*I);
628
			}
629
		}
630
	}
631
}
632
 
633
void shader_stats::accumulate(shader_stats& s) {
634
	++shaders;
635
	ndw += s.ndw;
636
	ngpr += s.ngpr;
637
	nstack += s.nstack;
638
 
639
	alu += s.alu;
640
	alu_groups += s.alu_groups;
641
	alu_clauses += s.alu_clauses;
642
	fetch += s.fetch;
643
	fetch_clauses += s.fetch_clauses;
644
	cf += s.cf;
645
}
646
 
647
void shader_stats::dump() {
648
	sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
649
			<< ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
650
			<< ", alu:" << alu << ", fetch:" << fetch
651
			<< ", fetch clauses:" << fetch_clauses
652
			<< ", cf:" << cf;
653
 
654
	if (shaders > 1)
655
		sblog << ", shaders:" << shaders;
656
 
657
	sblog << "\n";
658
}
659
 
660
static void print_diff(unsigned d1, unsigned d2) {
661
	if (d1)
662
		sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
663
	else if (d2)
664
		sblog << "N/A";
665
	else
666
		sblog << "0%";
667
}
668
 
669
void shader_stats::dump_diff(shader_stats& s) {
670
	sblog << "dw:"; print_diff(ndw, s.ndw);
671
	sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
672
	sblog << ", stk:" ; print_diff(nstack, s.nstack);
673
	sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
674
	sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
675
	sblog << ", alu:" ; print_diff(alu, s.alu);
676
	sblog << ", fetch:" ; print_diff(fetch, s.fetch);
677
	sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
678
	sblog << ", cf:" ; print_diff(cf, s.cf);
679
	sblog << "\n";
680
}
681
 
682
} // namespace r600_sb