Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/*
2
 * Copyright 2013 Vadim Girlin 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Vadim Girlin
25
 */
26
 
27
#ifndef SB_PASS_H_
28
#define SB_PASS_H_
29
 
30
#include 
31
 
32
namespace r600_sb {
33
 
34
class pass {
35
protected:
36
	sb_context &ctx;
37
	shader &sh;
38
 
39
public:
40
	pass(shader &s);
41
 
42
	virtual int run();
43
 
44
	virtual ~pass() {}
45
};
46
 
47
class vpass : public pass {
48
 
49
public:
50
 
51
	vpass(shader &s) : pass(s) {}
52
 
53
	virtual int init();
54
	virtual int done();
55
 
56
	virtual int run();
57
	virtual void run_on(container_node &n);
58
 
59
	virtual bool visit(node &n, bool enter);
60
	virtual bool visit(container_node &n, bool enter);
61
	virtual bool visit(alu_group_node &n, bool enter);
62
	virtual bool visit(cf_node &n, bool enter);
63
	virtual bool visit(alu_node &n, bool enter);
64
	virtual bool visit(alu_packed_node &n, bool enter);
65
	virtual bool visit(fetch_node &n, bool enter);
66
	virtual bool visit(region_node &n, bool enter);
67
	virtual bool visit(repeat_node &n, bool enter);
68
	virtual bool visit(depart_node &n, bool enter);
69
	virtual bool visit(if_node &n, bool enter);
70
	virtual bool visit(bb_node &n, bool enter);
71
 
72
};
73
 
74
class rev_vpass : public vpass {
75
 
76
public:
77
	rev_vpass(shader &s) : vpass(s) {}
78
 
79
	virtual void run_on(container_node &n);
80
};
81
 
82
 
83
// =================== PASSES
84
 
85
class bytecode;
86
 
87
class bc_dump : public vpass {
88
	using vpass::visit;
89
 
90
	uint32_t *bc_data;
91
	unsigned ndw;
92
 
93
	unsigned id;
94
 
95
	unsigned new_group, group_index;
96
 
97
public:
98
 
99
	bc_dump(shader &s, bytecode *bc = NULL);
100
 
101
	bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) :
102
		vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
103
 
104
	virtual int init();
105
	virtual int done();
106
 
107
	virtual bool visit(cf_node &n, bool enter);
108
	virtual bool visit(alu_node &n, bool enter);
109
	virtual bool visit(fetch_node &n, bool enter);
110
 
111
	void dump_dw(unsigned dw_id, unsigned count = 2);
112
 
113
	void dump(cf_node& n);
114
	void dump(alu_node& n);
115
	void dump(fetch_node& n);
116
};
117
 
118
 
119
class dce_cleanup : public vpass {
120
	using vpass::visit;
121
 
122
public:
123
 
124
	dce_cleanup(shader &s) : vpass(s) {}
125
 
126
	virtual bool visit(node &n, bool enter);
127
	virtual bool visit(alu_group_node &n, bool enter);
128
	virtual bool visit(cf_node &n, bool enter);
129
	virtual bool visit(alu_node &n, bool enter);
130
	virtual bool visit(alu_packed_node &n, bool enter);
131
	virtual bool visit(fetch_node &n, bool enter);
132
	virtual bool visit(region_node &n, bool enter);
133
	virtual bool visit(container_node &n, bool enter);
134
 
135
private:
136
 
137
	void cleanup_dst(node &n);
138
	void cleanup_dst_vec(vvec &vv);
139
 
140
};
141
 
142
 
143
class def_use : public pass {
144
 
145
public:
146
 
147
	def_use(shader &sh) : pass(sh) {}
148
 
149
	virtual int run();
150
	void run_on(node *n, bool defs);
151
 
152
private:
153
 
154
	void process_uses(node *n);
155
	void process_defs(node *n, vvec &vv, bool arr_def);
156
	void process_phi(container_node *c, bool defs, bool uses);
157
};
158
 
159
 
160
 
161
class dump : public vpass {
162
	using vpass::visit;
163
 
164
	int level;
165
 
166
public:
167
 
168
	dump(shader &s) : vpass(s), level(0) {}
169
 
170
	virtual bool visit(node &n, bool enter);
171
	virtual bool visit(container_node &n, bool enter);
172
	virtual bool visit(alu_group_node &n, bool enter);
173
	virtual bool visit(cf_node &n, bool enter);
174
	virtual bool visit(alu_node &n, bool enter);
175
	virtual bool visit(alu_packed_node &n, bool enter);
176
	virtual bool visit(fetch_node &n, bool enter);
177
	virtual bool visit(region_node &n, bool enter);
178
	virtual bool visit(repeat_node &n, bool enter);
179
	virtual bool visit(depart_node &n, bool enter);
180
	virtual bool visit(if_node &n, bool enter);
181
	virtual bool visit(bb_node &n, bool enter);
182
 
183
 
184
	static void dump_op(node &n, const char *name);
185
	static void dump_vec(const vvec & vv);
186
	static void dump_set(shader &sh, val_set & v);
187
 
188
	static void dump_rels(vvec & vv);
189
 
190
	static void dump_val(value *v);
191
	static void dump_op(node *n);
192
 
193
	static void dump_op_list(container_node *c);
194
	static void dump_queue(sched_queue &q);
195
 
196
	static void dump_alu(alu_node *n);
197
 
198
private:
199
 
200
	void indent();
201
 
202
	void dump_common(node &n);
203
	void dump_flags(node &n);
204
 
205
	void dump_live_values(container_node &n, bool before);
206
};
207
 
208
 
209
// Global Code Motion
210
 
211
class gcm : public pass {
212
 
213
	sched_queue bu_ready[SQ_NUM];
214
	sched_queue bu_ready_next[SQ_NUM];
215
	sched_queue bu_ready_early[SQ_NUM];
216
	sched_queue ready;
217
	sched_queue ready_above;
218
 
219
	container_node pending;
220
 
221
	struct op_info {
222
		bb_node* top_bb;
223
		bb_node* bottom_bb;
224
		op_info() : top_bb(), bottom_bb() {}
225
	};
226
 
227
	typedef std::map op_info_map;
228
 
229
	typedef std::map nuc_map;
230
 
231
	op_info_map op_map;
232
	nuc_map uses;
233
 
234
	typedef std::vector nuc_stack;
235
 
236
	nuc_stack nuc_stk;
237
	unsigned ucs_level;
238
 
239
	bb_node * bu_bb;
240
 
241
	vvec pending_defs;
242
 
243
	node_list pending_nodes;
244
 
245
	unsigned cur_sq;
246
 
247
	// for register pressure tracking in bottom-up pass
248
	val_set live;
249
	int live_count;
250
 
251
	static const int rp_threshold = 100;
252
 
253
	bool pending_exec_mask_update;
254
 
255
public:
256
 
257
	gcm(shader &sh) : pass(sh),
258
		bu_ready(), bu_ready_next(), bu_ready_early(),
259
		ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
260
		bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
261
		live(), live_count(), pending_exec_mask_update() {}
262
 
263
	virtual int run();
264
 
265
private:
266
 
267
	void collect_instructions(container_node *c, bool early_pass);
268
 
269
	void sched_early(container_node *n);
270
	void td_sched_bb(bb_node *bb);
271
	bool td_is_ready(node *n);
272
	void td_release_uses(vvec &v);
273
	void td_release_val(value *v);
274
	void td_schedule(bb_node *bb, node *n);
275
 
276
	void sched_late(container_node *n);
277
	void bu_sched_bb(bb_node *bb);
278
	void bu_release_defs(vvec &v, bool src);
279
	void bu_release_phi_defs(container_node *p, unsigned op);
280
	bool bu_is_ready(node *n);
281
	void bu_release_val(value *v);
282
	void bu_release_op(node * n);
283
	void bu_find_best_bb(node *n, op_info &oi);
284
	void bu_schedule(container_node *bb, node *n);
285
 
286
	void push_uc_stack();
287
	void pop_uc_stack();
288
 
289
	void init_def_count(nuc_map &m, container_node &s);
290
	void init_use_count(nuc_map &m, container_node &s);
291
	unsigned get_uc_vec(vvec &vv);
292
	unsigned get_dc_vec(vvec &vv, bool src);
293
 
294
	void add_ready(node *n);
295
 
296
	void dump_uc_stack();
297
 
298
	unsigned real_alu_count(sched_queue &q, unsigned max);
299
 
300
	// check if we have not less than threshold ready alu instructions
301
	bool check_alu_ready_count(unsigned threshold);
302
};
303
 
304
 
305
class gvn : public vpass {
306
	using vpass::visit;
307
 
308
public:
309
 
310
	gvn(shader &sh) : vpass(sh) {}
311
 
312
	virtual bool visit(node &n, bool enter);
313
	virtual bool visit(cf_node &n, bool enter);
314
	virtual bool visit(alu_node &n, bool enter);
315
	virtual bool visit(alu_packed_node &n, bool enter);
316
	virtual bool visit(fetch_node &n, bool enter);
317
	virtual bool visit(region_node &n, bool enter);
318
 
319
private:
320
 
321
	void process_op(node &n, bool rewrite = true);
322
 
323
	// returns true if the value was rewritten
324
	bool process_src(value* &v, bool rewrite);
325
 
326
 
327
	void process_alu_src_constants(node &n, value* &v);
328
};
329
 
330
 
331
class if_conversion : public pass {
332
 
333
public:
334
 
335
	if_conversion(shader &sh) : pass(sh) {}
336
 
337
	virtual int run();
338
 
339
	bool run_on(region_node *r);
340
 
341
	void convert_kill_instructions(region_node *r, value *em, bool branch,
342
	                               container_node *c);
343
 
344
	bool check_and_convert(region_node *r);
345
 
346
	alu_node* convert_phi(value *select, node *phi);
347
 
348
};
349
 
350
 
351
class liveness : public rev_vpass {
352
	using vpass::visit;
353
 
354
	val_set live;
355
	bool live_changed;
356
 
357
public:
358
 
359
	liveness(shader &s) : rev_vpass(s), live_changed(false) {}
360
 
361
	virtual int init();
362
 
363
	virtual bool visit(node &n, bool enter);
364
	virtual bool visit(bb_node &n, bool enter);
365
	virtual bool visit(container_node &n, bool enter);
366
	virtual bool visit(alu_group_node &n, bool enter);
367
	virtual bool visit(cf_node &n, bool enter);
368
	virtual bool visit(alu_node &n, bool enter);
369
	virtual bool visit(alu_packed_node &n, bool enter);
370
	virtual bool visit(fetch_node &n, bool enter);
371
	virtual bool visit(region_node &n, bool enter);
372
	virtual bool visit(repeat_node &n, bool enter);
373
	virtual bool visit(depart_node &n, bool enter);
374
	virtual bool visit(if_node &n, bool enter);
375
 
376
private:
377
 
378
	void update_interferences();
379
	void process_op(node &n);
380
 
381
	bool remove_val(value *v);
382
	bool remove_vec(vvec &v);
383
	bool process_outs(node& n);
384
	void process_ins(node& n);
385
 
386
	void process_phi_outs(container_node *phi);
387
	void process_phi_branch(container_node *phi, unsigned id);
388
 
389
	bool process_maydef(value *v);
390
 
391
	bool add_vec(vvec &vv, bool src);
392
 
393
	void update_src_vec(vvec &vv, bool src);
394
};
395
 
396
 
397
struct bool_op_info {
398
	bool invert;
399
	unsigned int_cvt;
400
 
401
	alu_node *n;
402
};
403
 
404
class peephole : public pass {
405
 
406
public:
407
 
408
	peephole(shader &sh) : pass(sh) {}
409
 
410
	virtual int run();
411
 
412
	void run_on(container_node *c);
413
 
414
	void optimize_cc_op(alu_node *a);
415
 
416
	void optimize_cc_op2(alu_node *a);
417
	void optimize_CNDcc_op(alu_node *a);
418
 
419
	bool get_bool_op_info(value *b, bool_op_info& bop);
420
	bool get_bool_flt_to_int_source(alu_node* &a);
421
	void convert_float_setcc(alu_node *f2i, alu_node *s);
422
};
423
 
424
 
425
class psi_ops : public rev_vpass {
426
	using rev_vpass::visit;
427
 
428
public:
429
 
430
	psi_ops(shader &s) : rev_vpass(s) {}
431
 
432
	virtual bool visit(node &n, bool enter);
433
	virtual bool visit(alu_node &n, bool enter);
434
 
435
	bool try_inline(node &n);
436
	bool try_reduce(node &n);
437
	bool eliminate(node &n);
438
 
439
	void unpredicate(node *n);
440
};
441
 
442
 
443
// check correctness of the generated code, e.g.:
444
// - expected source operand value is the last value written to its gpr,
445
// - all arguments of phi node should be allocated to the same gpr,
446
// TODO other tests
447
class ra_checker : public pass {
448
 
449
	typedef std::map reg_value_map;
450
 
451
	typedef std::vector regmap_stack;
452
 
453
	regmap_stack rm_stack;
454
	unsigned rm_stk_level;
455
 
456
	value* prev_dst[5];
457
 
458
public:
459
 
460
	ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {}
461
 
462
	virtual int run();
463
 
464
	void run_on(container_node *c);
465
 
466
	void dump_error(const error_info &e);
467
	void dump_all_errors();
468
 
469
private:
470
 
471
	reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
472
 
473
	void push_stack();
474
	void pop_stack();
475
 
476
	// when going out of the alu clause, values in the clause temporary gprs,
477
	// AR, predicate values, PS/PV are destroyed
478
	void kill_alu_only_regs();
479
	void error(node *n, unsigned id, std::string msg);
480
 
481
	void check_phi_src(container_node *p, unsigned id);
482
	void process_phi_dst(container_node *p);
483
	void check_alu_group(alu_group_node *g);
484
	void process_op_dst(node *n);
485
	void check_op_src(node *n);
486
	void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
487
	void check_value_gpr(node *n, unsigned id, value *v);
488
};
489
 
490
// =======================================
491
 
492
 
493
class ra_coalesce : public pass {
494
 
495
public:
496
 
497
	ra_coalesce(shader &sh) : pass(sh) {}
498
 
499
	virtual int run();
500
};
501
 
502
 
503
 
504
// =======================================
505
 
506
class ra_init : public pass {
507
 
508
public:
509
 
510
	ra_init(shader &sh) : pass(sh), prev_chans() {
511
 
512
		// The parameter below affects register channels distribution.
513
		// For cayman (VLIW-4) we're trying to distribute the channels
514
		// uniformly, this means significantly better alu slots utilization
515
		// at the expense of higher gpr usage. Hopefully this will improve
516
		// performance, though it has to be proven with real benchmarks yet.
517
		// For VLIW-5 this method could also slightly improve slots
518
		// utilization, but increased register pressure seems more significant
519
		// and overall performance effect is negative according to some
520
		// benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
521
		// really need it because trans slot (unrestricted by register write
522
		// channel) allows to consume most deviations from uniform channel
523
		// distribution.
524
		// Value 3 means that for new allocation we'll use channel that differs
525
		// from 3 last used channels. 0 for VLIW-5 effectively turns this off.
526
 
527
		ra_tune = sh.get_ctx().is_cayman() ? 3 : 0;
528
	}
529
 
530
	virtual int run();
531
 
532
private:
533
 
534
	unsigned prev_chans;
535
	unsigned ra_tune;
536
 
537
	void add_prev_chan(unsigned chan);
538
	unsigned get_preferable_chan_mask();
539
 
540
	void ra_node(container_node *c);
541
	void process_op(node *n);
542
 
543
	void color(value *v);
544
 
545
	void color_bs_constraint(ra_constraint *c);
546
 
547
	void assign_color(value *v, sel_chan c);
548
	void alloc_arrays();
549
};
550
 
551
// =======================================
552
 
553
class ra_split : public pass {
554
 
555
public:
556
 
557
	ra_split(shader &sh) : pass(sh) {}
558
 
559
	virtual int run();
560
 
561
	void split(container_node *n);
562
	void split_op(node *n);
563
	void split_alu_packed(alu_packed_node *n);
564
	void split_vector_inst(node *n);
565
 
566
	void split_packed_ins(alu_packed_node *n);
567
 
568
#if 0
569
	void split_pinned_outs(node *n);
570
#endif
571
 
572
	void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
573
 
574
	void split_phi_src(container_node *loc, container_node *c, unsigned id,
575
	                   bool loop);
576
	void split_phi_dst(node *loc, container_node *c, bool loop);
577
	void init_phi_constraints(container_node *c);
578
};
579
 
580
 
581
 
582
class ssa_prepare : public vpass {
583
	using vpass::visit;
584
 
585
	typedef std::vector vd_stk;
586
	vd_stk stk;
587
 
588
	unsigned level;
589
 
590
public:
591
	ssa_prepare(shader &s) : vpass(s), level(0) {}
592
 
593
	virtual bool visit(cf_node &n, bool enter);
594
	virtual bool visit(alu_node &n, bool enter);
595
	virtual bool visit(fetch_node &n, bool enter);
596
	virtual bool visit(region_node &n, bool enter);
597
	virtual bool visit(repeat_node &n, bool enter);
598
	virtual bool visit(depart_node &n, bool enter);
599
 
600
private:
601
 
602
	void push_stk() {
603
		++level;
604
		if (level + 1 > stk.size())
605
			stk.resize(level+1);
606
		else
607
			stk[level].clear();
608
	}
609
	void pop_stk() {
610
		assert(level);
611
		--level;
612
		stk[level].add_set(stk[level + 1]);
613
	}
614
 
615
	void add_defs(node &n);
616
 
617
	val_set & cur_set() { return stk[level]; }
618
 
619
	container_node* create_phi_nodes(int count);
620
};
621
 
622
class ssa_rename : public vpass {
623
	using vpass::visit;
624
 
625
	typedef sb_map def_map;
626
 
627
	def_map def_count;
628
	std::stack rename_stack;
629
 
630
	typedef std::map val_map;
631
	val_map values;
632
 
633
public:
634
 
635
	ssa_rename(shader &s) : vpass(s) {}
636
 
637
	virtual int init();
638
 
639
	virtual bool visit(container_node &n, bool enter);
640
	virtual bool visit(node &n, bool enter);
641
	virtual bool visit(alu_group_node &n, bool enter);
642
	virtual bool visit(cf_node &n, bool enter);
643
	virtual bool visit(alu_node &n, bool enter);
644
	virtual bool visit(alu_packed_node &n, bool enter);
645
	virtual bool visit(fetch_node &n, bool enter);
646
	virtual bool visit(region_node &n, bool enter);
647
	virtual bool visit(repeat_node &n, bool enter);
648
	virtual bool visit(depart_node &n, bool enter);
649
	virtual bool visit(if_node &n, bool enter);
650
 
651
private:
652
 
653
	void push(node *phi);
654
	void pop();
655
 
656
	unsigned get_index(def_map& m, value* v);
657
	void set_index(def_map& m, value* v, unsigned index);
658
	unsigned new_index(def_map& m, value* v);
659
 
660
	value* rename_use(node *n, value* v);
661
	value* rename_def(node *def, value* v);
662
 
663
	void rename_src_vec(node *n, vvec &vv, bool src);
664
	void rename_dst_vec(node *def, vvec &vv, bool set_def);
665
 
666
	void rename_src(node *n);
667
	void rename_dst(node *n);
668
 
669
	void rename_phi_args(container_node *phi, unsigned op, bool def);
670
 
671
	void rename_virt(node *n);
672
	void rename_virt_val(node *n, value *v);
673
};
674
 
675
class bc_finalizer : public pass {
676
 
677
	cf_node *last_export[EXP_TYPE_COUNT];
678
	cf_node *last_cf;
679
 
680
	unsigned ngpr;
681
	unsigned nstack;
682
 
683
public:
684
 
685
	bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
686
		nstack() {}
687
 
688
	virtual int run();
689
 
690
	void finalize_loop(region_node *r);
691
	void finalize_if(region_node *r);
692
 
693
	void run_on(container_node *c);
694
 
695
	void finalize_alu_group(alu_group_node *g);
696
	void finalize_alu_src(alu_group_node *g, alu_node *a);
697
 
698
	void emit_set_grad(fetch_node* f);
699
	void finalize_fetch(fetch_node *f);
700
 
701
	void finalize_cf(cf_node *c);
702
 
703
	sel_chan translate_kcache(cf_node *alu, value *v);
704
 
705
	void update_ngpr(unsigned gpr);
706
	void update_nstack(region_node *r, unsigned add = 0);
707
 
708
	void cf_peephole();
709
 
710
};
711
 
712
 
713
} // namespace r600_sb
714
 
715
#endif /* SB_PASS_H_ */