Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/*
2
 * Copyright 2013 Vadim Girlin 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Vadim Girlin
25
 */
26
 
27
#define FBC_DEBUG 0
28
 
29
#if FBC_DEBUG
30
#define FBC_DUMP(q) do { q } while (0)
31
#else
32
#define FBC_DUMP(q)
33
#endif
34
 
35
#include "sb_bc.h"
36
#include "sb_shader.h"
37
#include "sb_pass.h"
38
 
39
namespace r600_sb {
40
 
41
int bc_finalizer::run() {
42
 
43
	regions_vec &rv = sh.get_regions();
44
 
45
	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
46
			++I) {
47
		region_node *r = *I;
48
 
49
		assert(r);
50
 
51
		bool loop = r->is_loop();
52
 
53
		if (loop)
54
			finalize_loop(r);
55
		else
56
			finalize_if(r);
57
 
58
		r->expand();
59
	}
60
 
61
	run_on(sh.root);
62
 
63
	cf_peephole();
64
 
65
	// workaround for some problems on r6xx/7xx
66
	// add ALU NOP to each vertex shader
67
	if (!ctx.is_egcm() && sh.target == TARGET_VS) {
68
		cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
69
 
70
		alu_group_node *g = sh.create_alu_group();
71
 
72
		alu_node *a = sh.create_alu();
73
		a->bc.set_op(ALU_OP0_NOP);
74
		a->bc.last = 1;
75
 
76
		g->push_back(a);
77
		c->push_back(g);
78
 
79
		sh.root->push_back(c);
80
 
81
		c = sh.create_cf(CF_OP_NOP);
82
		sh.root->push_back(c);
83
 
84
		last_cf = c;
85
	}
86
 
87
	if (last_cf->bc.op_ptr->flags & CF_ALU) {
88
		last_cf = sh.create_cf(CF_OP_NOP);
89
		sh.root->push_back(last_cf);
90
	}
91
 
92
	if (ctx.is_cayman())
93
		last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
94
	else
95
		last_cf->bc.end_of_program = 1;
96
 
97
	for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
98
		cf_node *le = last_export[t];
99
		if (le)
100
			le->bc.set_op(CF_OP_EXPORT_DONE);
101
	}
102
 
103
	sh.ngpr = ngpr;
104
	sh.nstack = nstack;
105
	return 0;
106
}
107
 
108
void bc_finalizer::finalize_loop(region_node* r) {
109
 
110
	cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
111
	cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
112
 
113
	loop_start->jump_after(loop_end);
114
	loop_end->jump_after(loop_start);
115
 
116
	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
117
			I != E; ++I) {
118
		depart_node *dep = *I;
119
		cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
120
		loop_break->jump(loop_end);
121
		dep->push_back(loop_break);
122
		dep->expand();
123
	}
124
 
125
	// FIXME produces unnecessary LOOP_CONTINUE
126
	for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
127
			I != E; ++I) {
128
		repeat_node *rep = *I;
129
		if (!(rep->parent == r && rep->prev == NULL)) {
130
			cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
131
			loop_cont->jump(loop_end);
132
			rep->push_back(loop_cont);
133
		}
134
		rep->expand();
135
	}
136
 
137
	r->push_front(loop_start);
138
	r->push_back(loop_end);
139
}
140
 
141
void bc_finalizer::finalize_if(region_node* r) {
142
 
143
	update_nstack(r);
144
 
145
	// expecting the following control flow structure here:
146
	//   - region
147
	//     {
148
	//       - depart/repeat 1 (it may be depart/repeat for some outer region)
149
	//         {
150
	//           - if
151
	//             {
152
	//               - depart/repeat 2 (possibly for outer region)
153
	//                 {
154
	//                   - some optional code
155
	//                 }
156
	//             }
157
	//           - optional  code> ...
158
	//         }
159
	//     }
160
 
161
	container_node *repdep1 = static_cast(r->first);
162
	assert(repdep1->is_depart() || repdep1->is_repeat());
163
 
164
	if_node *n_if = static_cast(repdep1->first);
165
 
166
	if (n_if) {
167
 
168
 
169
		assert(n_if->is_if());
170
 
171
		container_node *repdep2 = static_cast(n_if->first);
172
		assert(repdep2->is_depart() || repdep2->is_repeat());
173
 
174
		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
175
		cf_node *if_pop = sh.create_cf(CF_OP_POP);
176
 
177
		if_pop->bc.pop_count = 1;
178
		if_pop->jump_after(if_pop);
179
 
180
		r->push_front(if_jump);
181
		r->push_back(if_pop);
182
 
183
		bool has_else = n_if->next;
184
 
185
		if (has_else) {
186
			cf_node *nelse = sh.create_cf(CF_OP_ELSE);
187
			n_if->insert_after(nelse);
188
			if_jump->jump(nelse);
189
			nelse->jump_after(if_pop);
190
			nelse->bc.pop_count = 1;
191
 
192
		} else {
193
			if_jump->jump_after(if_pop);
194
			if_jump->bc.pop_count = 1;
195
		}
196
 
197
		n_if->expand();
198
	}
199
 
200
	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
201
			I != E; ++I) {
202
		(*I)->expand();
203
	}
204
	r->departs.clear();
205
	assert(r->repeats.empty());
206
}
207
 
208
void bc_finalizer::run_on(container_node* c) {
209
 
210
	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
211
		node *n = *I;
212
 
213
		if (n->is_alu_group()) {
214
			finalize_alu_group(static_cast(n));
215
		} else {
216
			if (n->is_fetch_inst()) {
217
				finalize_fetch(static_cast(n));
218
			} else if (n->is_cf_inst()) {
219
				finalize_cf(static_cast(n));
220
			} else if (n->is_alu_clause()) {
221
 
222
			} else if (n->is_fetch_clause()) {
223
 
224
			} else {
225
				assert(!"unexpected node");
226
			}
227
 
228
			if (n->is_container())
229
				run_on(static_cast(n));
230
		}
231
	}
232
}
233
 
234
void bc_finalizer::finalize_alu_group(alu_group_node* g) {
235
 
236
	alu_node *last = NULL;
237
 
238
	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
239
		alu_node *n = static_cast(*I);
240
		unsigned slot = n->bc.slot;
241
 
242
		value *d = n->dst.empty() ? NULL : n->dst[0];
243
 
244
		if (d && d->is_special_reg()) {
245
			assert(n->bc.op_ptr->flags & AF_MOVA);
246
			d = NULL;
247
		}
248
 
249
		sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
250
 
251
		if (d) {
252
			assert(fdst.chan() == slot || slot == SLOT_TRANS);
253
		}
254
 
255
		n->bc.dst_gpr = fdst.sel();
256
		n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
257
 
258
 
259
		if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
260
			n->bc.dst_rel = 1;
261
			update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
262
		} else {
263
			n->bc.dst_rel = 0;
264
		}
265
 
266
		n->bc.write_mask = d != NULL;
267
		n->bc.last = 0;
268
 
269
		if (n->bc.op_ptr->flags & AF_PRED) {
270
			n->bc.update_pred = (n->dst[1] != NULL);
271
			n->bc.update_exec_mask = (n->dst[2] != NULL);
272
		}
273
 
274
		// FIXME handle predication here
275
		n->bc.pred_sel = PRED_SEL_OFF;
276
 
277
		update_ngpr(n->bc.dst_gpr);
278
 
279
		finalize_alu_src(g, n);
280
 
281
		last = n;
282
	}
283
 
284
	last->bc.last = 1;
285
}
286
 
287
void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
288
	vvec &sv = a->src;
289
 
290
	FBC_DUMP(
291
		sblog << "finalize_alu_src: ";
292
		dump::dump_op(a);
293
		sblog << "\n";
294
	);
295
 
296
	unsigned si = 0;
297
 
298
	for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
299
		value *v = *I;
300
		assert(v);
301
 
302
		bc_alu_src &src = a->bc.src[si];
303
		sel_chan sc;
304
		src.rel = 0;
305
 
306
		sel_chan gpr;
307
 
308
		switch (v->kind) {
309
		case VLK_REL_REG:
310
			sc = v->get_final_gpr();
311
			src.sel = sc.sel();
312
			src.chan = sc.chan();
313
			if (!v->rel->is_const()) {
314
				src.rel = 1;
315
				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
316
			} else
317
				src.rel = 0;
318
 
319
			break;
320
		case VLK_REG:
321
			gpr = v->get_final_gpr();
322
			src.sel = gpr.sel();
323
			src.chan = gpr.chan();
324
			update_ngpr(src.sel);
325
			break;
326
		case VLK_TEMP:
327
			src.sel = v->gpr.sel();
328
			src.chan = v->gpr.chan();
329
			update_ngpr(src.sel);
330
			break;
331
		case VLK_UNDEF:
332
		case VLK_CONST: {
333
			literal lv = v->literal_value;
334
			src.chan = 0;
335
 
336
			if (lv == literal(0))
337
				src.sel = ALU_SRC_0;
338
			else if (lv == literal(0.5f))
339
				src.sel = ALU_SRC_0_5;
340
			else if (lv == literal(1.0f))
341
				src.sel = ALU_SRC_1;
342
			else if (lv == literal(1))
343
				src.sel = ALU_SRC_1_INT;
344
			else if (lv == literal(-1))
345
				src.sel = ALU_SRC_M_1_INT;
346
			else {
347
				src.sel = ALU_SRC_LITERAL;
348
				src.chan = g->literal_chan(lv);
349
				src.value = lv;
350
			}
351
			break;
352
		}
353
		case VLK_KCACHE: {
354
			cf_node *clause = static_cast(g->parent);
355
			assert(clause->is_alu_clause());
356
			sel_chan k = translate_kcache(clause, v);
357
 
358
			assert(k && "kcache translation failed");
359
 
360
			src.sel = k.sel();
361
			src.chan = k.chan();
362
			break;
363
		}
364
		case VLK_PARAM:
365
		case VLK_SPECIAL_CONST:
366
			src.sel = v->select.sel();
367
			src.chan = v->select.chan();
368
			break;
369
		default:
370
			assert(!"unknown value kind");
371
			break;
372
		}
373
	}
374
 
375
	while (si < 3) {
376
		a->bc.src[si++].sel = 0;
377
	}
378
}
379
 
380
void bc_finalizer::emit_set_grad(fetch_node* f) {
381
 
382
	assert(f->src.size() == 12);
383
	unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
384
 
385
	unsigned arg_start = 0;
386
 
387
	for (unsigned op = 0; op < 2; ++op) {
388
		fetch_node *n = sh.create_fetch();
389
		n->bc.set_op(ops[op]);
390
 
391
		// FIXME extract this loop into a separate method and reuse it
392
 
393
		int reg = -1;
394
 
395
		arg_start += 4;
396
 
397
		for (unsigned chan = 0; chan < 4; ++chan) {
398
 
399
			n->bc.dst_sel[chan] = SEL_MASK;
400
 
401
			unsigned sel = SEL_MASK;
402
 
403
			value *v = f->src[arg_start + chan];
404
 
405
			if (!v || v->is_undef()) {
406
				sel = SEL_MASK;
407
			} else if (v->is_const()) {
408
				literal l = v->literal_value;
409
				if (l == literal(0))
410
					sel = SEL_0;
411
				else if (l == literal(1.0f))
412
					sel = SEL_1;
413
				else {
414
					sblog << "invalid fetch constant operand  " << chan << " ";
415
					dump::dump_op(f);
416
					sblog << "\n";
417
					abort();
418
				}
419
 
420
			} else if (v->is_any_gpr()) {
421
				unsigned vreg = v->gpr.sel();
422
				unsigned vchan = v->gpr.chan();
423
 
424
				if (reg == -1)
425
					reg = vreg;
426
				else if ((unsigned)reg != vreg) {
427
					sblog << "invalid fetch source operand  " << chan << " ";
428
					dump::dump_op(f);
429
					sblog << "\n";
430
					abort();
431
				}
432
 
433
				sel = vchan;
434
 
435
			} else {
436
				sblog << "invalid fetch source operand  " << chan << " ";
437
				dump::dump_op(f);
438
				sblog << "\n";
439
				abort();
440
			}
441
 
442
			n->bc.src_sel[chan] = sel;
443
		}
444
 
445
		if (reg >= 0)
446
			update_ngpr(reg);
447
 
448
		n->bc.src_gpr = reg >= 0 ? reg : 0;
449
 
450
		f->insert_before(n);
451
	}
452
 
453
}
454
 
455
void bc_finalizer::finalize_fetch(fetch_node* f) {
456
 
457
	int reg = -1;
458
 
459
	// src
460
 
461
	unsigned src_count = 4;
462
 
463
	unsigned flags = f->bc.op_ptr->flags;
464
 
465
	if (flags & FF_VTX) {
466
		src_count = 1;
467
	} else if (flags & FF_USEGRAD) {
468
		emit_set_grad(f);
469
	}
470
 
471
	for (unsigned chan = 0; chan < src_count; ++chan) {
472
 
473
		unsigned sel = f->bc.src_sel[chan];
474
 
475
		if (sel > SEL_W)
476
			continue;
477
 
478
		value *v = f->src[chan];
479
 
480
		if (v->is_undef()) {
481
			sel = SEL_MASK;
482
		} else if (v->is_const()) {
483
			literal l = v->literal_value;
484
			if (l == literal(0))
485
				sel = SEL_0;
486
			else if (l == literal(1.0f))
487
				sel = SEL_1;
488
			else {
489
				sblog << "invalid fetch constant operand  " << chan << " ";
490
				dump::dump_op(f);
491
				sblog << "\n";
492
				abort();
493
			}
494
 
495
		} else if (v->is_any_gpr()) {
496
			unsigned vreg = v->gpr.sel();
497
			unsigned vchan = v->gpr.chan();
498
 
499
			if (reg == -1)
500
				reg = vreg;
501
			else if ((unsigned)reg != vreg) {
502
				sblog << "invalid fetch source operand  " << chan << " ";
503
				dump::dump_op(f);
504
				sblog << "\n";
505
				abort();
506
			}
507
 
508
			sel = vchan;
509
 
510
		} else {
511
			sblog << "invalid fetch source operand  " << chan << " ";
512
			dump::dump_op(f);
513
			sblog << "\n";
514
			abort();
515
		}
516
 
517
		f->bc.src_sel[chan] = sel;
518
	}
519
 
520
	if (reg >= 0)
521
		update_ngpr(reg);
522
 
523
	f->bc.src_gpr = reg >= 0 ? reg : 0;
524
 
525
	// dst
526
 
527
	reg = -1;
528
 
529
	unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
530
 
531
	for (unsigned chan = 0; chan < 4; ++chan) {
532
 
533
		unsigned sel = f->bc.dst_sel[chan];
534
 
535
		if (sel == SEL_MASK)
536
			continue;
537
 
538
		value *v = f->dst[chan];
539
		if (!v)
540
			continue;
541
 
542
		if (v->is_any_gpr()) {
543
			unsigned vreg = v->gpr.sel();
544
			unsigned vchan = v->gpr.chan();
545
 
546
			if (reg == -1)
547
				reg = vreg;
548
			else if ((unsigned)reg != vreg) {
549
				sblog << "invalid fetch dst operand  " << chan << " ";
550
				dump::dump_op(f);
551
				sblog << "\n";
552
				abort();
553
			}
554
 
555
			dst_swz[vchan] = sel;
556
 
557
		} else {
558
			sblog << "invalid fetch dst operand  " << chan << " ";
559
			dump::dump_op(f);
560
			sblog << "\n";
561
			abort();
562
		}
563
 
564
	}
565
 
566
	for (unsigned i = 0; i < 4; ++i)
567
		f->bc.dst_sel[i] = dst_swz[i];
568
 
569
	assert(reg >= 0);
570
 
571
	if (reg >= 0)
572
		update_ngpr(reg);
573
 
574
	f->bc.dst_gpr = reg >= 0 ? reg : 0;
575
}
576
 
577
void bc_finalizer::finalize_cf(cf_node* c) {
578
 
579
	unsigned flags = c->bc.op_ptr->flags;
580
 
581
	if (flags & CF_CALL) {
582
		update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2);
583
	}
584
 
585
	c->bc.end_of_program = 0;
586
	last_cf = c;
587
 
588
	if (flags & CF_EXP) {
589
		c->bc.set_op(CF_OP_EXPORT);
590
		last_export[c->bc.type] = c;
591
 
592
		int reg = -1;
593
 
594
		for (unsigned chan = 0; chan < 4; ++chan) {
595
 
596
			unsigned sel = c->bc.sel[chan];
597
 
598
			if (sel > SEL_W)
599
				continue;
600
 
601
			value *v = c->src[chan];
602
 
603
			if (v->is_undef()) {
604
				sel = SEL_MASK;
605
			} else if (v->is_const()) {
606
				literal l = v->literal_value;
607
				if (l == literal(0))
608
					sel = SEL_0;
609
				else if (l == literal(1.0f))
610
					sel = SEL_1;
611
				else {
612
					sblog << "invalid export constant operand  " << chan << " ";
613
					dump::dump_op(c);
614
					sblog << "\n";
615
					abort();
616
				}
617
 
618
			} else if (v->is_any_gpr()) {
619
				unsigned vreg = v->gpr.sel();
620
				unsigned vchan = v->gpr.chan();
621
 
622
				if (reg == -1)
623
					reg = vreg;
624
				else if ((unsigned)reg != vreg) {
625
					sblog << "invalid export source operand  " << chan << " ";
626
					dump::dump_op(c);
627
					sblog << "\n";
628
					abort();
629
				}
630
 
631
				sel = vchan;
632
 
633
			} else {
634
				sblog << "invalid export source operand  " << chan << " ";
635
				dump::dump_op(c);
636
				sblog << "\n";
637
				abort();
638
			}
639
 
640
			c->bc.sel[chan] = sel;
641
		}
642
 
643
		if (reg >= 0)
644
			update_ngpr(reg);
645
 
646
		c->bc.rw_gpr = reg >= 0 ? reg : 0;
647
 
648
	} else if (flags & CF_MEM) {
649
 
650
		int reg = -1;
651
		unsigned mask = 0;
652
 
653
		for (unsigned chan = 0; chan < 4; ++chan) {
654
			value *v = c->src[chan];
655
			if (!v || v->is_undef())
656
				continue;
657
 
658
			if (!v->is_any_gpr() || v->gpr.chan() != chan) {
659
				sblog << "invalid source operand  " << chan << " ";
660
				dump::dump_op(c);
661
				sblog << "\n";
662
				abort();
663
			}
664
			unsigned vreg = v->gpr.sel();
665
			if (reg == -1)
666
				reg = vreg;
667
			else if ((unsigned)reg != vreg) {
668
				sblog << "invalid source operand  " << chan << " ";
669
				dump::dump_op(c);
670
				sblog << "\n";
671
				abort();
672
			}
673
 
674
			mask |= (1 << chan);
675
		}
676
 
677
		assert(reg >= 0 && mask);
678
 
679
		if (reg >= 0)
680
			update_ngpr(reg);
681
 
682
		c->bc.rw_gpr = reg >= 0 ? reg : 0;
683
		c->bc.comp_mask = mask;
684
 
685
		if ((flags & CF_RAT) && (c->bc.type & 1)) {
686
 
687
			reg = -1;
688
 
689
			for (unsigned chan = 0; chan < 4; ++chan) {
690
				value *v = c->src[4 + chan];
691
				if (!v || v->is_undef())
692
					continue;
693
 
694
				if (!v->is_any_gpr() || v->gpr.chan() != chan) {
695
					sblog << "invalid source operand  " << chan << " ";
696
					dump::dump_op(c);
697
					sblog << "\n";
698
					abort();
699
				}
700
				unsigned vreg = v->gpr.sel();
701
				if (reg == -1)
702
					reg = vreg;
703
				else if ((unsigned)reg != vreg) {
704
					sblog << "invalid source operand  " << chan << " ";
705
					dump::dump_op(c);
706
					sblog << "\n";
707
					abort();
708
				}
709
			}
710
 
711
			assert(reg >= 0);
712
 
713
			if (reg >= 0)
714
				update_ngpr(reg);
715
 
716
			c->bc.index_gpr = reg >= 0 ? reg : 0;
717
		}
718
 
719
 
720
 
721
	} else {
722
 
723
#if 0
724
		if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) {
725
			c->bc.valid_pixel_mode = 1;
726
		}
727
#endif
728
 
729
	}
730
}
731
 
732
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
733
	unsigned sel = v->select.sel();
734
	unsigned bank = sel >> 12;
735
	unsigned chan = v->select.chan();
736
	static const unsigned kc_base[] = {128, 160, 256, 288};
737
 
738
	sel &= 4095;
739
 
740
	unsigned line = sel >> 4;
741
 
742
	for (unsigned k = 0; k < 4; ++k) {
743
		bc_kcache &kc = alu->bc.kc[k];
744
 
745
		if (kc.mode == KC_LOCK_NONE)
746
			break;
747
 
748
		if (kc.bank == bank && (kc.addr == line ||
749
				(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
750
 
751
			sel = kc_base[k] + (sel - (kc.addr << 4));
752
 
753
			return sel_chan(sel, chan);
754
		}
755
	}
756
 
757
	assert(!"kcache translation error");
758
	return 0;
759
}
760
 
761
void bc_finalizer::update_ngpr(unsigned gpr) {
762
	if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
763
		ngpr = gpr + 1;
764
}
765
 
766
void bc_finalizer::update_nstack(region_node* r, unsigned add) {
767
	unsigned loops = 0;
768
	unsigned ifs = 0;
769
 
770
	while (r) {
771
		if (r->is_loop())
772
			++loops;
773
		else
774
			++ifs;
775
 
776
		r = r->get_parent_region();
777
	}
778
 
779
	unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add;
780
 
781
	// FIXME calculate more precisely
782
	if (ctx.is_evergreen()) {
783
		++stack_elements;
784
	} else {
785
		stack_elements += 2;
786
		if (ctx.is_cayman())
787
			++stack_elements;
788
	}
789
 
790
	unsigned stack_entries = (stack_elements + 3) >> 2;
791
 
792
	if (nstack < stack_entries)
793
		nstack = stack_entries;
794
}
795
 
796
void bc_finalizer::cf_peephole() {
797
 
798
	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
799
			I = N) {
800
		N = I; ++N;
801
 
802
		cf_node *c = static_cast(*I);
803
 
804
		if (c->jump_after_target) {
805
			c->jump_target = static_cast(c->jump_target->next);
806
			c->jump_after_target = false;
807
		}
808
 
809
		if (c->is_cf_op(CF_OP_POP)) {
810
			node *p = c->prev;
811
			if (p->is_alu_clause()) {
812
				cf_node *a = static_cast(p);
813
 
814
				if (a->bc.op == CF_OP_ALU) {
815
					a->bc.set_op(CF_OP_ALU_POP_AFTER);
816
					c->remove();
817
				}
818
			}
819
		} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
820
			// if JUMP is immediately followed by its jump target,
821
			// then JUMP is useless and we can eliminate it
822
			c->remove();
823
		}
824
	}
825
}
826
 
827
} // namespace r600_sb