Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2013 Vadim Girlin 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Vadim Girlin
25
 */
26
 
27
#define BCP_DEBUG 0
28
 
29
#if BCP_DEBUG
30
#define BCP_DUMP(q) do { q } while (0)
31
#else
32
#define BCP_DUMP(q)
33
#endif
34
 
35
extern "C" {
36
#include "r600_pipe.h"
37
#include "r600_shader.h"
38
}
39
 
40
#include 
41
 
42
#include "sb_bc.h"
43
#include "sb_shader.h"
44
#include "sb_pass.h"
45
 
46
namespace r600_sb {
47
 
48
int bc_parser::decode() {
49
 
50
	dw = bc->bytecode;
51
	bc_ndw = bc->ndw;
52
	max_cf = 0;
53
 
54
	dec = new bc_decoder(ctx, dw, bc_ndw);
55
 
56
	shader_target t = TARGET_UNKNOWN;
57
 
58
	if (pshader) {
59
		switch (bc->type) {
60
		case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
61
		case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break;
62
		case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
63
		default: assert(!"unknown shader target"); return -1; break;
64
		}
65
	} else {
66
		if (bc->type == TGSI_PROCESSOR_COMPUTE)
67
			t = TARGET_COMPUTE;
68
		else
69
			t = TARGET_FETCH;
70
	}
71
 
72
	sh = new shader(ctx, t, bc->debug_id);
73
	sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
74
 
75
	int r = decode_shader();
76
 
77
	delete dec;
78
 
79
	sh->ngpr = bc->ngpr;
80
	sh->nstack = bc->nstack;
81
 
82
	return r;
83
}
84
 
85
int bc_parser::decode_shader() {
86
	int r = 0;
87
	unsigned i = 0;
88
	bool eop = false;
89
 
90
	sh->init();
91
 
92
	do {
93
		eop = false;
94
		if ((r = decode_cf(i, eop)))
95
			return r;
96
 
97
	} while (!eop || (i >> 1) <= max_cf);
98
 
99
	return 0;
100
}
101
 
102
int bc_parser::prepare() {
103
	int r = 0;
104
	if ((r = parse_decls()))
105
		return r;
106
	if ((r = prepare_ir()))
107
		return r;
108
	return 0;
109
}
110
 
111
int bc_parser::parse_decls() {
112
 
113
	if (!pshader) {
114
		if (gpr_reladdr)
115
			sh->add_gpr_array(0, bc->ngpr, 0x0F);
116
 
117
		// compute shaders have some values preloaded in R0, R1
118
		sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
119
		sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
120
		return 0;
121
	}
122
 
123
	if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
124
 
125
		assert(pshader->num_arrays);
126
 
127
		if (pshader->num_arrays) {
128
			for (unsigned i = 0; i < pshader->num_arrays; ++i) {
129
				r600_shader_array &a = pshader->arrays[i];
130
				sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
131
			}
132
		} else {
133
			sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
134
		}
135
	}
136
 
137
	if (sh->target == TARGET_VS)
138
		sh->add_input(0, 1, 0x0F);
139
 
140
	bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
141
			&& sh->target == TARGET_PS;
142
 
143
	unsigned linear = 0, persp = 0, centroid = 1;
144
 
145
	for (unsigned i = 0; i < pshader->ninput; ++i) {
146
		r600_shader_io & in = pshader->input[i];
147
		bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
148
		sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
149
		if (ps_interp && in.spi_sid) {
150
			if (in.interpolate == TGSI_INTERPOLATE_LINEAR ||
151
					in.interpolate == TGSI_INTERPOLATE_COLOR)
152
				linear = 1;
153
			else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
154
				persp = 1;
155
			if (in.centroid)
156
				centroid = 2;
157
		}
158
	}
159
 
160
	if (ps_interp) {
161
		unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1;
162
		unsigned gpr = 0;
163
 
164
		while (mask) {
165
			sh->add_input(gpr, true, mask & 0x0F);
166
			++gpr;
167
			mask >>= 4;
168
		}
169
	}
170
 
171
	return 0;
172
}
173
 
174
int bc_parser::decode_cf(unsigned &i, bool &eop) {
175
 
176
	int r;
177
 
178
	cf_node *cf = sh->create_cf();
179
	sh->root->push_back(cf);
180
 
181
	unsigned id = i >> 1;
182
 
183
	cf->bc.id = id;
184
 
185
	if (cf_map.size() < id + 1)
186
		cf_map.resize(id + 1);
187
 
188
	cf_map[id] = cf;
189
 
190
	if ((r = dec->decode_cf(i, cf->bc)))
191
		return r;
192
 
193
	cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
194
 
195
	if (flags & CF_ALU) {
196
		if ((r = decode_alu_clause(cf)))
197
			return r;
198
	} else if (flags & CF_FETCH) {
199
		if ((r = decode_fetch_clause(cf)))
200
			return r;;
201
	} else if (flags & CF_EXP) {
202
		if (cf->bc.rw_rel)
203
			gpr_reladdr = true;
204
		assert(!cf->bc.rw_rel);
205
	} else if (flags & (CF_STRM | CF_RAT)) {
206
		if (cf->bc.rw_rel)
207
			gpr_reladdr = true;
208
		assert(!cf->bc.rw_rel);
209
	} else if (flags & CF_BRANCH) {
210
		if (cf->bc.addr > max_cf)
211
			max_cf = cf->bc.addr;
212
	}
213
 
214
	eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END ||
215
			cf->bc.op == CF_OP_RET;
216
	return 0;
217
}
218
 
219
int bc_parser::decode_alu_clause(cf_node* cf) {
220
	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
221
 
222
	cf->subtype = NST_ALU_CLAUSE;
223
 
224
	cgroup = 0;
225
	memset(slots[0], 0, 5*sizeof(slots[0][0]));
226
 
227
	unsigned ng = 0;
228
 
229
	do {
230
		decode_alu_group(cf, i, gcnt);
231
		assert(gcnt <= cnt);
232
		cnt -= gcnt;
233
		ng++;
234
	} while (cnt);
235
 
236
	return 0;
237
}
238
 
239
int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
240
	int r;
241
	alu_node *n;
242
	alu_group_node *g = sh->create_alu_group();
243
 
244
	cgroup = !cgroup;
245
	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
246
	gcnt = 0;
247
 
248
	unsigned literal_mask = 0;
249
 
250
	do {
251
		n = sh->create_alu();
252
		g->push_back(n);
253
 
254
		if ((r = dec->decode_alu(i, n->bc)))
255
			return r;
256
 
257
		if (!sh->assign_slot(n, slots[cgroup])) {
258
			assert(!"alu slot assignment failed");
259
			return -1;
260
		}
261
 
262
		gcnt++;
263
 
264
	} while (gcnt <= 5 && !n->bc.last);
265
 
266
	assert(n->bc.last);
267
 
268
	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
269
		n = static_cast(*I);
270
 
271
		if (n->bc.dst_rel)
272
			gpr_reladdr = true;
273
 
274
		for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
275
			bc_alu_src &src = n->bc.src[k];
276
			if (src.rel)
277
				gpr_reladdr = true;
278
			if (src.sel == ALU_SRC_LITERAL) {
279
				literal_mask |= (1 << src.chan);
280
				src.value.u = dw[i + src.chan];
281
			}
282
		}
283
	}
284
 
285
	unsigned literal_ndw = 0;
286
	while (literal_mask) {
287
		g->literals.push_back(dw[i + literal_ndw]);
288
		literal_ndw += 1;
289
		literal_mask >>= 1;
290
	}
291
 
292
	literal_ndw = (literal_ndw + 1) & ~1u;
293
 
294
	i += literal_ndw;
295
	gcnt += literal_ndw >> 1;
296
 
297
	cf->push_back(g);
298
	return 0;
299
}
300
 
301
int bc_parser::prepare_alu_clause(cf_node* cf) {
302
 
303
	// loop over alu groups
304
	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
305
		assert(I->subtype == NST_ALU_GROUP);
306
		alu_group_node *g = static_cast(*I);
307
		prepare_alu_group(cf, g);
308
	}
309
 
310
	return 0;
311
}
312
 
313
int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
314
 
315
	alu_node *n;
316
 
317
	cgroup = !cgroup;
318
	memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
319
 
320
	for (node_iterator I = g->begin(), E = g->end();
321
			I != E; ++I) {
322
		n = static_cast(*I);
323
 
324
		if (!sh->assign_slot(n, slots[cgroup])) {
325
			assert(!"alu slot assignment failed");
326
			return -1;
327
		}
328
 
329
		unsigned src_count = n->bc.op_ptr->src_count;
330
 
331
		if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
332
			n->flags |= NF_ALU_4SLOT;
333
 
334
		n->src.resize(src_count);
335
 
336
		unsigned flags = n->bc.op_ptr->flags;
337
 
338
		if (flags & AF_PRED) {
339
			n->dst.resize(3);
340
			if (n->bc.update_pred)
341
				n->dst[1] = sh->get_special_value(SV_ALU_PRED);
342
			if (n->bc.update_exec_mask)
343
				n->dst[2] = sh->get_special_value(SV_EXEC_MASK);
344
 
345
			n->flags |= NF_DONT_HOIST;
346
 
347
		} else if (flags & AF_KILL) {
348
 
349
			n->dst.resize(2);
350
			n->dst[1] = sh->get_special_value(SV_VALID_MASK);
351
			sh->set_uses_kill();
352
 
353
			n->flags |= NF_DONT_HOIST | NF_DONT_MOVE |
354
					NF_DONT_KILL | NF_SCHEDULE_EARLY;
355
 
356
		} else {
357
			n->dst.resize(1);
358
		}
359
 
360
		if (flags & AF_MOVA) {
361
 
362
			n->dst[0] = sh->get_special_value(SV_AR_INDEX);
363
 
364
			n->flags |= NF_DONT_HOIST;
365
 
366
		} else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) {
367
			assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X);
368
 
369
			value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan,
370
					n->bc.dst_rel);
371
 
372
			n->dst[0] = v;
373
		}
374
 
375
		if (n->bc.pred_sel) {
376
			sh->has_alu_predication = true;
377
			n->pred = sh->get_special_value(SV_ALU_PRED);
378
		}
379
 
380
		for (unsigned s = 0; s < src_count; ++s) {
381
			bc_alu_src &src = n->bc.src[s];
382
 
383
			if (src.sel == ALU_SRC_LITERAL) {
384
				n->src[s] = sh->get_const_value(src.value);
385
			} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
386
				unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
387
						SLOT_TRANS : src.chan;
388
 
389
				// XXX shouldn't happen but llvm backend uses PS on cayman
390
				if (prev_slot == SLOT_TRANS && ctx.is_cayman())
391
					prev_slot = SLOT_X;
392
 
393
				alu_node *prev_alu = slots[pgroup][prev_slot];
394
 
395
				assert(prev_alu);
396
 
397
				if (!prev_alu->dst[0]) {
398
					value * t = sh->create_temp_value();
399
					prev_alu->dst[0] = t;
400
				}
401
 
402
				value *d = prev_alu->dst[0];
403
 
404
				if (d->is_rel()) {
405
					d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr,
406
					                      prev_alu->bc.dst_chan,
407
					                      prev_alu->bc.dst_rel);
408
				}
409
 
410
				n->src[s] = d;
411
			} else if (ctx.is_kcache_sel(src.sel)) {
412
				unsigned sel = src.sel, kc_addr;
413
				unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1);
414
 
415
				bc_kcache &kc = cf->bc.kc[kc_set];
416
				kc_addr = (kc.addr << 4) + (sel & 0x1F);
417
				n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan);
418
			} else if (src.sel < MAX_GPR) {
419
				value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
420
 
421
				n->src[s] = v;
422
 
423
			} else if (src.sel >= ALU_SRC_PARAM_OFFSET) {
424
				// using slot for value channel because in fact the slot
425
				// determines the channel that is loaded by INTERP_LOAD_P0
426
				// (and maybe some others).
427
				// otherwise GVN will consider INTERP_LOAD_P0s with the same
428
				// param index as equal instructions and leave only one of them
429
				n->src[s] = sh->get_special_ro_value(sel_chan(src.sel,
430
				                                              n->bc.slot));
431
			} else {
432
				switch (src.sel) {
433
				case ALU_SRC_0:
434
					n->src[s] = sh->get_const_value(0);
435
					break;
436
				case ALU_SRC_0_5:
437
					n->src[s] = sh->get_const_value(0.5f);
438
					break;
439
				case ALU_SRC_1:
440
					n->src[s] = sh->get_const_value(1.0f);
441
					break;
442
				case ALU_SRC_1_INT:
443
					n->src[s] = sh->get_const_value(1);
444
					break;
445
				case ALU_SRC_M_1_INT:
446
					n->src[s] = sh->get_const_value(-1);
447
					break;
448
				default:
449
					n->src[s] = sh->get_special_ro_value(src.sel);
450
					break;
451
				}
452
			}
453
		}
454
	}
455
 
456
	// pack multislot instructions into alu_packed_node
457
 
458
	alu_packed_node *p = NULL;
459
	for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) {
460
		N = I + 1;
461
		alu_node *a = static_cast(*I);
462
		unsigned sflags = a->bc.slot_flags;
463
 
464
		if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) {
465
			if (!p)
466
				p = sh->create_alu_packed();
467
 
468
			a->remove();
469
			p->push_back(a);
470
		}
471
	}
472
 
473
	if (p) {
474
		g->push_front(p);
475
 
476
		if (p->count() == 3 && ctx.is_cayman()) {
477
			// cayman's scalar instruction that can use 3 or 4 slots
478
 
479
			// FIXME for simplicity we'll always add 4th slot,
480
			// but probably we might want to always remove 4th slot and make
481
			// sure that regalloc won't choose 'w' component for dst
482
 
483
			alu_node *f = static_cast(p->first);
484
			alu_node *a = sh->create_alu();
485
			a->src = f->src;
486
			a->dst.resize(f->dst.size());
487
			a->bc = f->bc;
488
			a->bc.slot = SLOT_W;
489
			p->push_back(a);
490
		}
491
	}
492
 
493
	return 0;
494
}
495
 
496
int bc_parser::decode_fetch_clause(cf_node* cf) {
497
	int r;
498
	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
499
 
500
	cf->subtype = NST_TEX_CLAUSE;
501
 
502
	while (cnt--) {
503
		fetch_node *n = sh->create_fetch();
504
		cf->push_back(n);
505
		if ((r = dec->decode_fetch(i, n->bc)))
506
			return r;
507
		if (n->bc.src_rel || n->bc.dst_rel)
508
			gpr_reladdr = true;
509
 
510
	}
511
	return 0;
512
}
513
 
514
int bc_parser::prepare_fetch_clause(cf_node *cf) {
515
 
516
	vvec grad_v, grad_h;
517
 
518
	for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
519
 
520
		fetch_node *n = static_cast(*I);
521
		assert(n->is_valid());
522
 
523
		unsigned flags = n->bc.op_ptr->flags;
524
 
525
		unsigned vtx = flags & FF_VTX;
526
		unsigned num_src = vtx ? ctx.vtx_src_num : 4;
527
 
528
		n->dst.resize(4);
529
 
530
		if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) {
531
			sh->uses_gradients = true;
532
		}
533
 
534
		if (flags & FF_SETGRAD) {
535
 
536
			vvec *grad = NULL;
537
 
538
			switch (n->bc.op) {
539
				case FETCH_OP_SET_GRADIENTS_V:
540
					grad = &grad_v;
541
					break;
542
				case FETCH_OP_SET_GRADIENTS_H:
543
					grad = &grad_h;
544
					break;
545
				default:
546
					assert(!"unexpected SET_GRAD instruction");
547
					return -1;
548
			}
549
 
550
			if (grad->empty())
551
				grad->resize(4);
552
 
553
			for(unsigned s = 0; s < 4; ++s) {
554
				unsigned sw = n->bc.src_sel[s];
555
				if (sw <= SEL_W)
556
					(*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr,
557
					                               sw, false);
558
				else if (sw == SEL_0)
559
					(*grad)[s] = sh->get_const_value(0.0f);
560
				else if (sw == SEL_1)
561
					(*grad)[s] = sh->get_const_value(1.0f);
562
			}
563
		} else {
564
 
565
			if (flags & FF_USEGRAD) {
566
				n->src.resize(12);
567
				std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
568
				std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
569
			} else {
570
				n->src.resize(4);
571
			}
572
 
573
			for(int s = 0; s < 4; ++s) {
574
				if (n->bc.dst_sel[s] != SEL_MASK)
575
					n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false);
576
				// NOTE: it doesn't matter here which components of the result we
577
				// are using, but original n->bc.dst_sel should be taken into
578
				// account when building the bytecode
579
			}
580
			for(unsigned s = 0; s < num_src; ++s) {
581
				if (n->bc.src_sel[s] <= SEL_W)
582
					n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr,
583
					                              n->bc.src_sel[s], false);
584
			}
585
 
586
		}
587
	}
588
 
589
	return 0;
590
}
591
 
592
int bc_parser::prepare_ir() {
593
 
594
	for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) {
595
		cf_node *c = *I;
596
 
597
		if (!c)
598
			continue;
599
 
600
		unsigned flags = c->bc.op_ptr->flags;
601
 
602
		if (flags & CF_ALU) {
603
			prepare_alu_clause(c);
604
		} else if (flags & CF_FETCH) {
605
			prepare_fetch_clause(c);
606
		} else if (c->bc.op == CF_OP_CALL_FS) {
607
			sh->init_call_fs(c);
608
			c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
609
		} else if (flags & CF_LOOP_START) {
610
			prepare_loop(c);
611
		} else if (c->bc.op == CF_OP_JUMP) {
612
			prepare_if(c);
613
		} else if (c->bc.op == CF_OP_LOOP_END) {
614
			loop_stack.pop();
615
		} else if (c->bc.op == CF_OP_LOOP_CONTINUE) {
616
			assert(!loop_stack.empty());
617
			repeat_node *rep = sh->create_repeat(loop_stack.top());
618
			if (c->parent->first != c)
619
				rep->move(c->parent->first, c);
620
			c->replace_with(rep);
621
			sh->simplify_dep_rep(rep);
622
		} else if (c->bc.op == CF_OP_LOOP_BREAK) {
623
			assert(!loop_stack.empty());
624
			depart_node *dep = sh->create_depart(loop_stack.top());
625
			if (c->parent->first != c)
626
				dep->move(c->parent->first, c);
627
			c->replace_with(dep);
628
			sh->simplify_dep_rep(dep);
629
		} else if (flags & CF_EXP) {
630
 
631
			// unroll burst exports
632
 
633
			assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE);
634
 
635
			c->bc.set_op(CF_OP_EXPORT);
636
 
637
			unsigned burst_count = c->bc.burst_count;
638
			unsigned eop = c->bc.end_of_program;
639
 
640
			c->bc.end_of_program = 0;
641
			c->bc.burst_count = 0;
642
 
643
			do {
644
				c->src.resize(4);
645
 
646
				for(int s = 0; s < 4; ++s) {
647
					switch (c->bc.sel[s]) {
648
					case SEL_0:
649
						c->src[s] = sh->get_const_value(0.0f);
650
						break;
651
					case SEL_1:
652
						c->src[s] = sh->get_const_value(1.0f);
653
						break;
654
					case SEL_MASK:
655
						break;
656
					default:
657
						if (c->bc.sel[s] <= SEL_W)
658
							c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr,
659
									c->bc.sel[s], false);
660
						else
661
							assert(!"invalid src_sel for export");
662
					}
663
				}
664
 
665
				if (!burst_count--)
666
					break;
667
 
668
				cf_node *cf_next = sh->create_cf();
669
				cf_next->bc = c->bc;
670
				++cf_next->bc.rw_gpr;
671
				++cf_next->bc.array_base;
672
 
673
				c->insert_after(cf_next);
674
				c = cf_next;
675
 
676
			} while (1);
677
 
678
			c->bc.end_of_program = eop;
679
		} else if (flags & (CF_STRM | CF_RAT)) {
680
 
681
			unsigned burst_count = c->bc.burst_count;
682
			unsigned eop = c->bc.end_of_program;
683
 
684
			c->bc.end_of_program = 0;
685
			c->bc.burst_count = 0;
686
 
687
			do {
688
 
689
				c->src.resize(4);
690
 
691
				for(int s = 0; s < 4; ++s) {
692
					if (c->bc.comp_mask & (1 << s))
693
						c->src[s] =
694
								sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
695
				}
696
 
697
				if ((flags & CF_RAT) && (c->bc.type & 1)) { // indexed write
698
					c->src.resize(8);
699
					for(int s = 0; s < 3; ++s) {
700
						c->src[4 + s] =
701
							sh->get_gpr_value(true, c->bc.index_gpr, s, false);
702
					}
703
 
704
					// FIXME probably we can relax it a bit
705
					c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
706
				}
707
 
708
				if (!burst_count--)
709
					break;
710
 
711
				cf_node *cf_next = sh->create_cf();
712
				cf_next->bc = c->bc;
713
				++cf_next->bc.rw_gpr;
714
 
715
				// FIXME is it correct?
716
				cf_next->bc.array_base += cf_next->bc.elem_size + 1;
717
 
718
				c->insert_after(cf_next);
719
				c = cf_next;
720
			} while (1);
721
 
722
			c->bc.end_of_program = eop;
723
 
724
		}
725
	}
726
 
727
	assert(loop_stack.empty());
728
	return 0;
729
}
730
 
731
int bc_parser::prepare_loop(cf_node* c) {
732
 
733
	cf_node *end = cf_map[c->bc.addr - 1];
734
	assert(end->bc.op == CF_OP_LOOP_END);
735
	assert(c->parent == end->parent);
736
 
737
	region_node *reg = sh->create_region();
738
	repeat_node *rep = sh->create_repeat(reg);
739
 
740
	reg->push_back(rep);
741
	c->insert_before(reg);
742
	rep->move(c, end->next);
743
 
744
	loop_stack.push(reg);
745
	return 0;
746
}
747
 
748
int bc_parser::prepare_if(cf_node* c) {
749
	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
750
 
751
	BCP_DUMP(
752
		sblog << "parsing JUMP @" << c->bc.id;
753
		sblog << "\n";
754
	);
755
 
756
	if (end->bc.op == CF_OP_ELSE) {
757
		BCP_DUMP(
758
			sblog << "  found ELSE : ";
759
			dump::dump_op(end);
760
			sblog << "\n";
761
		);
762
 
763
		c_else = end;
764
		end = cf_map[c_else->bc.addr];
765
	} else {
766
		BCP_DUMP(
767
			sblog << "  no else\n";
768
		);
769
 
770
		c_else = end;
771
	}
772
 
773
	if (c_else->parent != c->parent)
774
		c_else = NULL;
775
 
776
	if (end->parent != c->parent)
777
		end = NULL;
778
 
779
	region_node *reg = sh->create_region();
780
 
781
	depart_node *dep2 = sh->create_depart(reg);
782
	depart_node *dep = sh->create_depart(reg);
783
	if_node *n_if = sh->create_if();
784
 
785
	c->insert_before(reg);
786
 
787
	if (c_else != end)
788
		dep->move(c_else, end);
789
	dep2->move(c, end);
790
 
791
	reg->push_back(dep);
792
	dep->push_front(n_if);
793
	n_if->push_back(dep2);
794
 
795
	n_if->cond = sh->get_special_value(SV_EXEC_MASK);
796
 
797
	return 0;
798
}
799
 
800
 
801
} // namespace r600_sb