Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #define BCP_DEBUG 0 |
||
28 | |||
29 | #if BCP_DEBUG |
||
30 | #define BCP_DUMP(q) do { q } while (0) |
||
31 | #else |
||
32 | #define BCP_DUMP(q) |
||
33 | #endif |
||
34 | |||
35 | extern "C" { |
||
36 | #include "r600_pipe.h" |
||
37 | #include "r600_shader.h" |
||
38 | } |
||
39 | |||
40 | #include |
||
41 | |||
42 | #include "sb_bc.h" |
||
43 | #include "sb_shader.h" |
||
44 | #include "sb_pass.h" |
||
45 | |||
46 | namespace r600_sb { |
||
47 | |||
48 | int bc_parser::decode() { |
||
49 | |||
50 | dw = bc->bytecode; |
||
51 | bc_ndw = bc->ndw; |
||
52 | max_cf = 0; |
||
53 | |||
54 | dec = new bc_decoder(ctx, dw, bc_ndw); |
||
55 | |||
56 | shader_target t = TARGET_UNKNOWN; |
||
57 | |||
58 | if (pshader) { |
||
59 | switch (bc->type) { |
||
60 | case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break; |
||
61 | case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break; |
||
62 | case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break; |
||
63 | default: assert(!"unknown shader target"); return -1; break; |
||
64 | } |
||
65 | } else { |
||
66 | if (bc->type == TGSI_PROCESSOR_COMPUTE) |
||
67 | t = TARGET_COMPUTE; |
||
68 | else |
||
69 | t = TARGET_FETCH; |
||
70 | } |
||
71 | |||
72 | sh = new shader(ctx, t, bc->debug_id); |
||
73 | sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); |
||
74 | |||
75 | int r = decode_shader(); |
||
76 | |||
77 | delete dec; |
||
78 | |||
79 | sh->ngpr = bc->ngpr; |
||
80 | sh->nstack = bc->nstack; |
||
81 | |||
82 | return r; |
||
83 | } |
||
84 | |||
85 | int bc_parser::decode_shader() { |
||
86 | int r = 0; |
||
87 | unsigned i = 0; |
||
88 | bool eop = false; |
||
89 | |||
90 | sh->init(); |
||
91 | |||
92 | do { |
||
93 | eop = false; |
||
94 | if ((r = decode_cf(i, eop))) |
||
95 | return r; |
||
96 | |||
97 | } while (!eop || (i >> 1) <= max_cf); |
||
98 | |||
99 | return 0; |
||
100 | } |
||
101 | |||
102 | int bc_parser::prepare() { |
||
103 | int r = 0; |
||
104 | if ((r = parse_decls())) |
||
105 | return r; |
||
106 | if ((r = prepare_ir())) |
||
107 | return r; |
||
108 | return 0; |
||
109 | } |
||
110 | |||
111 | int bc_parser::parse_decls() { |
||
112 | |||
113 | if (!pshader) { |
||
114 | if (gpr_reladdr) |
||
115 | sh->add_gpr_array(0, bc->ngpr, 0x0F); |
||
116 | |||
117 | // compute shaders have some values preloaded in R0, R1 |
||
118 | sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */); |
||
119 | sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */); |
||
120 | return 0; |
||
121 | } |
||
122 | |||
123 | if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { |
||
124 | |||
125 | assert(pshader->num_arrays); |
||
126 | |||
127 | if (pshader->num_arrays) { |
||
128 | for (unsigned i = 0; i < pshader->num_arrays; ++i) { |
||
129 | r600_shader_array &a = pshader->arrays[i]; |
||
130 | sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); |
||
131 | } |
||
132 | } else { |
||
133 | sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); |
||
134 | } |
||
135 | } |
||
136 | |||
137 | if (sh->target == TARGET_VS) |
||
138 | sh->add_input(0, 1, 0x0F); |
||
139 | |||
140 | bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN |
||
141 | && sh->target == TARGET_PS; |
||
142 | |||
143 | unsigned linear = 0, persp = 0, centroid = 1; |
||
144 | |||
145 | for (unsigned i = 0; i < pshader->ninput; ++i) { |
||
146 | r600_shader_io & in = pshader->input[i]; |
||
147 | bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); |
||
148 | sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); |
||
149 | if (ps_interp && in.spi_sid) { |
||
150 | if (in.interpolate == TGSI_INTERPOLATE_LINEAR || |
||
151 | in.interpolate == TGSI_INTERPOLATE_COLOR) |
||
152 | linear = 1; |
||
153 | else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE) |
||
154 | persp = 1; |
||
155 | if (in.centroid) |
||
156 | centroid = 2; |
||
157 | } |
||
158 | } |
||
159 | |||
160 | if (ps_interp) { |
||
161 | unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1; |
||
162 | unsigned gpr = 0; |
||
163 | |||
164 | while (mask) { |
||
165 | sh->add_input(gpr, true, mask & 0x0F); |
||
166 | ++gpr; |
||
167 | mask >>= 4; |
||
168 | } |
||
169 | } |
||
170 | |||
171 | return 0; |
||
172 | } |
||
173 | |||
174 | int bc_parser::decode_cf(unsigned &i, bool &eop) { |
||
175 | |||
176 | int r; |
||
177 | |||
178 | cf_node *cf = sh->create_cf(); |
||
179 | sh->root->push_back(cf); |
||
180 | |||
181 | unsigned id = i >> 1; |
||
182 | |||
183 | cf->bc.id = id; |
||
184 | |||
185 | if (cf_map.size() < id + 1) |
||
186 | cf_map.resize(id + 1); |
||
187 | |||
188 | cf_map[id] = cf; |
||
189 | |||
190 | if ((r = dec->decode_cf(i, cf->bc))) |
||
191 | return r; |
||
192 | |||
193 | cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; |
||
194 | |||
195 | if (flags & CF_ALU) { |
||
196 | if ((r = decode_alu_clause(cf))) |
||
197 | return r; |
||
198 | } else if (flags & CF_FETCH) { |
||
199 | if ((r = decode_fetch_clause(cf))) |
||
200 | return r;; |
||
201 | } else if (flags & CF_EXP) { |
||
202 | if (cf->bc.rw_rel) |
||
203 | gpr_reladdr = true; |
||
204 | assert(!cf->bc.rw_rel); |
||
205 | } else if (flags & (CF_STRM | CF_RAT)) { |
||
206 | if (cf->bc.rw_rel) |
||
207 | gpr_reladdr = true; |
||
208 | assert(!cf->bc.rw_rel); |
||
209 | } else if (flags & CF_BRANCH) { |
||
210 | if (cf->bc.addr > max_cf) |
||
211 | max_cf = cf->bc.addr; |
||
212 | } |
||
213 | |||
214 | eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END || |
||
215 | cf->bc.op == CF_OP_RET; |
||
216 | return 0; |
||
217 | } |
||
218 | |||
219 | int bc_parser::decode_alu_clause(cf_node* cf) { |
||
220 | unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; |
||
221 | |||
222 | cf->subtype = NST_ALU_CLAUSE; |
||
223 | |||
224 | cgroup = 0; |
||
225 | memset(slots[0], 0, 5*sizeof(slots[0][0])); |
||
226 | |||
227 | unsigned ng = 0; |
||
228 | |||
229 | do { |
||
230 | decode_alu_group(cf, i, gcnt); |
||
231 | assert(gcnt <= cnt); |
||
232 | cnt -= gcnt; |
||
233 | ng++; |
||
234 | } while (cnt); |
||
235 | |||
236 | return 0; |
||
237 | } |
||
238 | |||
239 | int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { |
||
240 | int r; |
||
241 | alu_node *n; |
||
242 | alu_group_node *g = sh->create_alu_group(); |
||
243 | |||
244 | cgroup = !cgroup; |
||
245 | memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); |
||
246 | gcnt = 0; |
||
247 | |||
248 | unsigned literal_mask = 0; |
||
249 | |||
250 | do { |
||
251 | n = sh->create_alu(); |
||
252 | g->push_back(n); |
||
253 | |||
254 | if ((r = dec->decode_alu(i, n->bc))) |
||
255 | return r; |
||
256 | |||
257 | if (!sh->assign_slot(n, slots[cgroup])) { |
||
258 | assert(!"alu slot assignment failed"); |
||
259 | return -1; |
||
260 | } |
||
261 | |||
262 | gcnt++; |
||
263 | |||
264 | } while (gcnt <= 5 && !n->bc.last); |
||
265 | |||
266 | assert(n->bc.last); |
||
267 | |||
268 | for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { |
||
269 | n = static_cast |
||
270 | |||
271 | if (n->bc.dst_rel) |
||
272 | gpr_reladdr = true; |
||
273 | |||
274 | for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { |
||
275 | bc_alu_src &src = n->bc.src[k]; |
||
276 | if (src.rel) |
||
277 | gpr_reladdr = true; |
||
278 | if (src.sel == ALU_SRC_LITERAL) { |
||
279 | literal_mask |= (1 << src.chan); |
||
280 | src.value.u = dw[i + src.chan]; |
||
281 | } |
||
282 | } |
||
283 | } |
||
284 | |||
285 | unsigned literal_ndw = 0; |
||
286 | while (literal_mask) { |
||
287 | g->literals.push_back(dw[i + literal_ndw]); |
||
288 | literal_ndw += 1; |
||
289 | literal_mask >>= 1; |
||
290 | } |
||
291 | |||
292 | literal_ndw = (literal_ndw + 1) & ~1u; |
||
293 | |||
294 | i += literal_ndw; |
||
295 | gcnt += literal_ndw >> 1; |
||
296 | |||
297 | cf->push_back(g); |
||
298 | return 0; |
||
299 | } |
||
300 | |||
301 | int bc_parser::prepare_alu_clause(cf_node* cf) { |
||
302 | |||
303 | // loop over alu groups |
||
304 | for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { |
||
305 | assert(I->subtype == NST_ALU_GROUP); |
||
306 | alu_group_node *g = static_cast |
||
307 | prepare_alu_group(cf, g); |
||
308 | } |
||
309 | |||
310 | return 0; |
||
311 | } |
||
312 | |||
313 | int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { |
||
314 | |||
315 | alu_node *n; |
||
316 | |||
317 | cgroup = !cgroup; |
||
318 | memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); |
||
319 | |||
320 | for (node_iterator I = g->begin(), E = g->end(); |
||
321 | I != E; ++I) { |
||
322 | n = static_cast |
||
323 | |||
324 | if (!sh->assign_slot(n, slots[cgroup])) { |
||
325 | assert(!"alu slot assignment failed"); |
||
326 | return -1; |
||
327 | } |
||
328 | |||
329 | unsigned src_count = n->bc.op_ptr->src_count; |
||
330 | |||
331 | if (ctx.alu_slots(n->bc.op) & AF_4SLOT) |
||
332 | n->flags |= NF_ALU_4SLOT; |
||
333 | |||
334 | n->src.resize(src_count); |
||
335 | |||
336 | unsigned flags = n->bc.op_ptr->flags; |
||
337 | |||
338 | if (flags & AF_PRED) { |
||
339 | n->dst.resize(3); |
||
340 | if (n->bc.update_pred) |
||
341 | n->dst[1] = sh->get_special_value(SV_ALU_PRED); |
||
342 | if (n->bc.update_exec_mask) |
||
343 | n->dst[2] = sh->get_special_value(SV_EXEC_MASK); |
||
344 | |||
345 | n->flags |= NF_DONT_HOIST; |
||
346 | |||
347 | } else if (flags & AF_KILL) { |
||
348 | |||
349 | n->dst.resize(2); |
||
350 | n->dst[1] = sh->get_special_value(SV_VALID_MASK); |
||
351 | sh->set_uses_kill(); |
||
352 | |||
353 | n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | |
||
354 | NF_DONT_KILL | NF_SCHEDULE_EARLY; |
||
355 | |||
356 | } else { |
||
357 | n->dst.resize(1); |
||
358 | } |
||
359 | |||
360 | if (flags & AF_MOVA) { |
||
361 | |||
362 | n->dst[0] = sh->get_special_value(SV_AR_INDEX); |
||
363 | |||
364 | n->flags |= NF_DONT_HOIST; |
||
365 | |||
366 | } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) { |
||
367 | assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); |
||
368 | |||
369 | value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, |
||
370 | n->bc.dst_rel); |
||
371 | |||
372 | n->dst[0] = v; |
||
373 | } |
||
374 | |||
375 | if (n->bc.pred_sel) { |
||
376 | sh->has_alu_predication = true; |
||
377 | n->pred = sh->get_special_value(SV_ALU_PRED); |
||
378 | } |
||
379 | |||
380 | for (unsigned s = 0; s < src_count; ++s) { |
||
381 | bc_alu_src &src = n->bc.src[s]; |
||
382 | |||
383 | if (src.sel == ALU_SRC_LITERAL) { |
||
384 | n->src[s] = sh->get_const_value(src.value); |
||
385 | } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { |
||
386 | unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? |
||
387 | SLOT_TRANS : src.chan; |
||
388 | |||
389 | // XXX shouldn't happen but llvm backend uses PS on cayman |
||
390 | if (prev_slot == SLOT_TRANS && ctx.is_cayman()) |
||
391 | prev_slot = SLOT_X; |
||
392 | |||
393 | alu_node *prev_alu = slots[pgroup][prev_slot]; |
||
394 | |||
395 | assert(prev_alu); |
||
396 | |||
397 | if (!prev_alu->dst[0]) { |
||
398 | value * t = sh->create_temp_value(); |
||
399 | prev_alu->dst[0] = t; |
||
400 | } |
||
401 | |||
402 | value *d = prev_alu->dst[0]; |
||
403 | |||
404 | if (d->is_rel()) { |
||
405 | d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr, |
||
406 | prev_alu->bc.dst_chan, |
||
407 | prev_alu->bc.dst_rel); |
||
408 | } |
||
409 | |||
410 | n->src[s] = d; |
||
411 | } else if (ctx.is_kcache_sel(src.sel)) { |
||
412 | unsigned sel = src.sel, kc_addr; |
||
413 | unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1); |
||
414 | |||
415 | bc_kcache &kc = cf->bc.kc[kc_set]; |
||
416 | kc_addr = (kc.addr << 4) + (sel & 0x1F); |
||
417 | n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan); |
||
418 | } else if (src.sel < MAX_GPR) { |
||
419 | value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); |
||
420 | |||
421 | n->src[s] = v; |
||
422 | |||
423 | } else if (src.sel >= ALU_SRC_PARAM_OFFSET) { |
||
424 | // using slot for value channel because in fact the slot |
||
425 | // determines the channel that is loaded by INTERP_LOAD_P0 |
||
426 | // (and maybe some others). |
||
427 | // otherwise GVN will consider INTERP_LOAD_P0s with the same |
||
428 | // param index as equal instructions and leave only one of them |
||
429 | n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, |
||
430 | n->bc.slot)); |
||
431 | } else { |
||
432 | switch (src.sel) { |
||
433 | case ALU_SRC_0: |
||
434 | n->src[s] = sh->get_const_value(0); |
||
435 | break; |
||
436 | case ALU_SRC_0_5: |
||
437 | n->src[s] = sh->get_const_value(0.5f); |
||
438 | break; |
||
439 | case ALU_SRC_1: |
||
440 | n->src[s] = sh->get_const_value(1.0f); |
||
441 | break; |
||
442 | case ALU_SRC_1_INT: |
||
443 | n->src[s] = sh->get_const_value(1); |
||
444 | break; |
||
445 | case ALU_SRC_M_1_INT: |
||
446 | n->src[s] = sh->get_const_value(-1); |
||
447 | break; |
||
448 | default: |
||
449 | n->src[s] = sh->get_special_ro_value(src.sel); |
||
450 | break; |
||
451 | } |
||
452 | } |
||
453 | } |
||
454 | } |
||
455 | |||
456 | // pack multislot instructions into alu_packed_node |
||
457 | |||
458 | alu_packed_node *p = NULL; |
||
459 | for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) { |
||
460 | N = I + 1; |
||
461 | alu_node *a = static_cast |
||
462 | unsigned sflags = a->bc.slot_flags; |
||
463 | |||
464 | if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) { |
||
465 | if (!p) |
||
466 | p = sh->create_alu_packed(); |
||
467 | |||
468 | a->remove(); |
||
469 | p->push_back(a); |
||
470 | } |
||
471 | } |
||
472 | |||
473 | if (p) { |
||
474 | g->push_front(p); |
||
475 | |||
476 | if (p->count() == 3 && ctx.is_cayman()) { |
||
477 | // cayman's scalar instruction that can use 3 or 4 slots |
||
478 | |||
479 | // FIXME for simplicity we'll always add 4th slot, |
||
480 | // but probably we might want to always remove 4th slot and make |
||
481 | // sure that regalloc won't choose 'w' component for dst |
||
482 | |||
483 | alu_node *f = static_cast |
||
484 | alu_node *a = sh->create_alu(); |
||
485 | a->src = f->src; |
||
486 | a->dst.resize(f->dst.size()); |
||
487 | a->bc = f->bc; |
||
488 | a->bc.slot = SLOT_W; |
||
489 | p->push_back(a); |
||
490 | } |
||
491 | } |
||
492 | |||
493 | return 0; |
||
494 | } |
||
495 | |||
496 | int bc_parser::decode_fetch_clause(cf_node* cf) { |
||
497 | int r; |
||
498 | unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; |
||
499 | |||
500 | cf->subtype = NST_TEX_CLAUSE; |
||
501 | |||
502 | while (cnt--) { |
||
503 | fetch_node *n = sh->create_fetch(); |
||
504 | cf->push_back(n); |
||
505 | if ((r = dec->decode_fetch(i, n->bc))) |
||
506 | return r; |
||
507 | if (n->bc.src_rel || n->bc.dst_rel) |
||
508 | gpr_reladdr = true; |
||
509 | |||
510 | } |
||
511 | return 0; |
||
512 | } |
||
513 | |||
514 | int bc_parser::prepare_fetch_clause(cf_node *cf) { |
||
515 | |||
516 | vvec grad_v, grad_h; |
||
517 | |||
518 | for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { |
||
519 | |||
520 | fetch_node *n = static_cast |
||
521 | assert(n->is_valid()); |
||
522 | |||
523 | unsigned flags = n->bc.op_ptr->flags; |
||
524 | |||
525 | unsigned vtx = flags & FF_VTX; |
||
526 | unsigned num_src = vtx ? ctx.vtx_src_num : 4; |
||
527 | |||
528 | n->dst.resize(4); |
||
529 | |||
530 | if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { |
||
531 | sh->uses_gradients = true; |
||
532 | } |
||
533 | |||
534 | if (flags & FF_SETGRAD) { |
||
535 | |||
536 | vvec *grad = NULL; |
||
537 | |||
538 | switch (n->bc.op) { |
||
539 | case FETCH_OP_SET_GRADIENTS_V: |
||
540 | grad = &grad_v; |
||
541 | break; |
||
542 | case FETCH_OP_SET_GRADIENTS_H: |
||
543 | grad = &grad_h; |
||
544 | break; |
||
545 | default: |
||
546 | assert(!"unexpected SET_GRAD instruction"); |
||
547 | return -1; |
||
548 | } |
||
549 | |||
550 | if (grad->empty()) |
||
551 | grad->resize(4); |
||
552 | |||
553 | for(unsigned s = 0; s < 4; ++s) { |
||
554 | unsigned sw = n->bc.src_sel[s]; |
||
555 | if (sw <= SEL_W) |
||
556 | (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr, |
||
557 | sw, false); |
||
558 | else if (sw == SEL_0) |
||
559 | (*grad)[s] = sh->get_const_value(0.0f); |
||
560 | else if (sw == SEL_1) |
||
561 | (*grad)[s] = sh->get_const_value(1.0f); |
||
562 | } |
||
563 | } else { |
||
564 | |||
565 | if (flags & FF_USEGRAD) { |
||
566 | n->src.resize(12); |
||
567 | std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4); |
||
568 | std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8); |
||
569 | } else { |
||
570 | n->src.resize(4); |
||
571 | } |
||
572 | |||
573 | for(int s = 0; s < 4; ++s) { |
||
574 | if (n->bc.dst_sel[s] != SEL_MASK) |
||
575 | n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false); |
||
576 | // NOTE: it doesn't matter here which components of the result we |
||
577 | // are using, but original n->bc.dst_sel should be taken into |
||
578 | // account when building the bytecode |
||
579 | } |
||
580 | for(unsigned s = 0; s < num_src; ++s) { |
||
581 | if (n->bc.src_sel[s] <= SEL_W) |
||
582 | n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr, |
||
583 | n->bc.src_sel[s], false); |
||
584 | } |
||
585 | |||
586 | } |
||
587 | } |
||
588 | |||
589 | return 0; |
||
590 | } |
||
591 | |||
592 | int bc_parser::prepare_ir() { |
||
593 | |||
594 | for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) { |
||
595 | cf_node *c = *I; |
||
596 | |||
597 | if (!c) |
||
598 | continue; |
||
599 | |||
600 | unsigned flags = c->bc.op_ptr->flags; |
||
601 | |||
602 | if (flags & CF_ALU) { |
||
603 | prepare_alu_clause(c); |
||
604 | } else if (flags & CF_FETCH) { |
||
605 | prepare_fetch_clause(c); |
||
606 | } else if (c->bc.op == CF_OP_CALL_FS) { |
||
607 | sh->init_call_fs(c); |
||
608 | c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; |
||
609 | } else if (flags & CF_LOOP_START) { |
||
610 | prepare_loop(c); |
||
611 | } else if (c->bc.op == CF_OP_JUMP) { |
||
612 | prepare_if(c); |
||
613 | } else if (c->bc.op == CF_OP_LOOP_END) { |
||
614 | loop_stack.pop(); |
||
615 | } else if (c->bc.op == CF_OP_LOOP_CONTINUE) { |
||
616 | assert(!loop_stack.empty()); |
||
617 | repeat_node *rep = sh->create_repeat(loop_stack.top()); |
||
618 | if (c->parent->first != c) |
||
619 | rep->move(c->parent->first, c); |
||
620 | c->replace_with(rep); |
||
621 | sh->simplify_dep_rep(rep); |
||
622 | } else if (c->bc.op == CF_OP_LOOP_BREAK) { |
||
623 | assert(!loop_stack.empty()); |
||
624 | depart_node *dep = sh->create_depart(loop_stack.top()); |
||
625 | if (c->parent->first != c) |
||
626 | dep->move(c->parent->first, c); |
||
627 | c->replace_with(dep); |
||
628 | sh->simplify_dep_rep(dep); |
||
629 | } else if (flags & CF_EXP) { |
||
630 | |||
631 | // unroll burst exports |
||
632 | |||
633 | assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE); |
||
634 | |||
635 | c->bc.set_op(CF_OP_EXPORT); |
||
636 | |||
637 | unsigned burst_count = c->bc.burst_count; |
||
638 | unsigned eop = c->bc.end_of_program; |
||
639 | |||
640 | c->bc.end_of_program = 0; |
||
641 | c->bc.burst_count = 0; |
||
642 | |||
643 | do { |
||
644 | c->src.resize(4); |
||
645 | |||
646 | for(int s = 0; s < 4; ++s) { |
||
647 | switch (c->bc.sel[s]) { |
||
648 | case SEL_0: |
||
649 | c->src[s] = sh->get_const_value(0.0f); |
||
650 | break; |
||
651 | case SEL_1: |
||
652 | c->src[s] = sh->get_const_value(1.0f); |
||
653 | break; |
||
654 | case SEL_MASK: |
||
655 | break; |
||
656 | default: |
||
657 | if (c->bc.sel[s] <= SEL_W) |
||
658 | c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, |
||
659 | c->bc.sel[s], false); |
||
660 | else |
||
661 | assert(!"invalid src_sel for export"); |
||
662 | } |
||
663 | } |
||
664 | |||
665 | if (!burst_count--) |
||
666 | break; |
||
667 | |||
668 | cf_node *cf_next = sh->create_cf(); |
||
669 | cf_next->bc = c->bc; |
||
670 | ++cf_next->bc.rw_gpr; |
||
671 | ++cf_next->bc.array_base; |
||
672 | |||
673 | c->insert_after(cf_next); |
||
674 | c = cf_next; |
||
675 | |||
676 | } while (1); |
||
677 | |||
678 | c->bc.end_of_program = eop; |
||
679 | } else if (flags & (CF_STRM | CF_RAT)) { |
||
680 | |||
681 | unsigned burst_count = c->bc.burst_count; |
||
682 | unsigned eop = c->bc.end_of_program; |
||
683 | |||
684 | c->bc.end_of_program = 0; |
||
685 | c->bc.burst_count = 0; |
||
686 | |||
687 | do { |
||
688 | |||
689 | c->src.resize(4); |
||
690 | |||
691 | for(int s = 0; s < 4; ++s) { |
||
692 | if (c->bc.comp_mask & (1 << s)) |
||
693 | c->src[s] = |
||
694 | sh->get_gpr_value(true, c->bc.rw_gpr, s, false); |
||
695 | } |
||
696 | |||
697 | if ((flags & CF_RAT) && (c->bc.type & 1)) { // indexed write |
||
698 | c->src.resize(8); |
||
699 | for(int s = 0; s < 3; ++s) { |
||
700 | c->src[4 + s] = |
||
701 | sh->get_gpr_value(true, c->bc.index_gpr, s, false); |
||
702 | } |
||
703 | |||
704 | // FIXME probably we can relax it a bit |
||
705 | c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; |
||
706 | } |
||
707 | |||
708 | if (!burst_count--) |
||
709 | break; |
||
710 | |||
711 | cf_node *cf_next = sh->create_cf(); |
||
712 | cf_next->bc = c->bc; |
||
713 | ++cf_next->bc.rw_gpr; |
||
714 | |||
715 | // FIXME is it correct? |
||
716 | cf_next->bc.array_base += cf_next->bc.elem_size + 1; |
||
717 | |||
718 | c->insert_after(cf_next); |
||
719 | c = cf_next; |
||
720 | } while (1); |
||
721 | |||
722 | c->bc.end_of_program = eop; |
||
723 | |||
724 | } |
||
725 | } |
||
726 | |||
727 | assert(loop_stack.empty()); |
||
728 | return 0; |
||
729 | } |
||
730 | |||
731 | int bc_parser::prepare_loop(cf_node* c) { |
||
732 | |||
733 | cf_node *end = cf_map[c->bc.addr - 1]; |
||
734 | assert(end->bc.op == CF_OP_LOOP_END); |
||
735 | assert(c->parent == end->parent); |
||
736 | |||
737 | region_node *reg = sh->create_region(); |
||
738 | repeat_node *rep = sh->create_repeat(reg); |
||
739 | |||
740 | reg->push_back(rep); |
||
741 | c->insert_before(reg); |
||
742 | rep->move(c, end->next); |
||
743 | |||
744 | loop_stack.push(reg); |
||
745 | return 0; |
||
746 | } |
||
747 | |||
748 | int bc_parser::prepare_if(cf_node* c) { |
||
749 | cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; |
||
750 | |||
751 | BCP_DUMP( |
||
752 | sblog << "parsing JUMP @" << c->bc.id; |
||
753 | sblog << "\n"; |
||
754 | ); |
||
755 | |||
756 | if (end->bc.op == CF_OP_ELSE) { |
||
757 | BCP_DUMP( |
||
758 | sblog << " found ELSE : "; |
||
759 | dump::dump_op(end); |
||
760 | sblog << "\n"; |
||
761 | ); |
||
762 | |||
763 | c_else = end; |
||
764 | end = cf_map[c_else->bc.addr]; |
||
765 | } else { |
||
766 | BCP_DUMP( |
||
767 | sblog << " no else\n"; |
||
768 | ); |
||
769 | |||
770 | c_else = end; |
||
771 | } |
||
772 | |||
773 | if (c_else->parent != c->parent) |
||
774 | c_else = NULL; |
||
775 | |||
776 | if (end->parent != c->parent) |
||
777 | end = NULL; |
||
778 | |||
779 | region_node *reg = sh->create_region(); |
||
780 | |||
781 | depart_node *dep2 = sh->create_depart(reg); |
||
782 | depart_node *dep = sh->create_depart(reg); |
||
783 | if_node *n_if = sh->create_if(); |
||
784 | |||
785 | c->insert_before(reg); |
||
786 | |||
787 | if (c_else != end) |
||
788 | dep->move(c_else, end); |
||
789 | dep2->move(c, end); |
||
790 | |||
791 | reg->push_back(dep); |
||
792 | dep->push_front(n_if); |
||
793 | n_if->push_back(dep2); |
||
794 | |||
795 | n_if->cond = sh->get_special_value(SV_EXEC_MASK); |
||
796 | |||
797 | return 0; |
||
798 | } |
||
799 | |||
800 | |||
801 | } // namespace r600_sb><>><>><>><>><>><>>><>>=>>=>>>=>>><>>><>>><>>=>=>><>>><>>>><>=> |