Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #include "sb_bc.h" |
||
28 | #include "sb_shader.h" |
||
29 | #include "sb_pass.h" |
||
30 | |||
31 | namespace r600_sb { |
||
32 | |||
33 | shader::shader(sb_context &sctx, shader_target t, unsigned id) |
||
34 | : ctx(sctx), next_temp_value_index(temp_regid_offset), |
||
35 | prep_regs_count(), pred_sels(), |
||
36 | regions(), inputs(), undef(), val_pool(sizeof(value)), |
||
37 | pool(), all_nodes(), src_stats(), opt_stats(), errors(), |
||
38 | optimized(), id(id), |
||
39 | coal(*this), bbs(), |
||
40 | target(t), vt(ex), ex(*this), root(), |
||
41 | compute_interferences(), |
||
42 | has_alu_predication(), |
||
43 | uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {} |
||
44 | |||
45 | bool shader::assign_slot(alu_node* n, alu_node *slots[5]) { |
||
46 | |||
47 | unsigned slot_flags = ctx.alu_slots(n->bc.op); |
||
48 | unsigned slot = n->bc.dst_chan; |
||
49 | |||
50 | if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) && |
||
51 | (slot_flags & AF_S)) |
||
52 | slot = SLOT_TRANS; |
||
53 | |||
54 | if (slots[slot]) |
||
55 | return false; |
||
56 | |||
57 | n->bc.slot = slot; |
||
58 | slots[slot] = n; |
||
59 | return true; |
||
60 | } |
||
61 | |||
62 | void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, |
||
63 | bool src) { |
||
64 | unsigned chan = 0; |
||
65 | while (comp_mask) { |
||
66 | if (comp_mask & 1) { |
||
67 | value *v = get_gpr_value(src, gpr, chan, false); |
||
68 | v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN); |
||
69 | if (!v->is_rel()) { |
||
70 | v->gpr = v->pin_gpr = v->select; |
||
71 | v->fix(); |
||
72 | } |
||
73 | if (v->array && !v->array->gpr) { |
||
74 | // if pinned value can be accessed with indirect addressing |
||
75 | // pin the entire array to its original location |
||
76 | v->array->gpr = v->array->base_gpr; |
||
77 | } |
||
78 | vec.push_back(v); |
||
79 | } |
||
80 | comp_mask >>= 1; |
||
81 | ++chan; |
||
82 | } |
||
83 | } |
||
84 | |||
85 | cf_node* shader::create_clause(node_subtype nst) { |
||
86 | cf_node *n = create_cf(); |
||
87 | |||
88 | n->subtype = nst; |
||
89 | |||
90 | switch (nst) { |
||
91 | case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break; |
||
92 | case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break; |
||
93 | case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break; |
||
94 | default: assert(!"invalid clause type"); break; |
||
95 | } |
||
96 | |||
97 | n->bc.barrier = 1; |
||
98 | return n; |
||
99 | } |
||
100 | |||
101 | void shader::create_bbs() { |
||
102 | create_bbs(root, bbs); |
||
103 | } |
||
104 | |||
105 | void shader::expand_bbs() { |
||
106 | expand_bbs(bbs); |
||
107 | } |
||
108 | |||
109 | alu_node* shader::create_mov(value* dst, value* src) { |
||
110 | alu_node *n = create_alu(); |
||
111 | n->bc.set_op(ALU_OP1_MOV); |
||
112 | n->dst.push_back(dst); |
||
113 | n->src.push_back(src); |
||
114 | dst->def = n; |
||
115 | |||
116 | return n; |
||
117 | } |
||
118 | |||
119 | alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) { |
||
120 | alu_node *n = create_mov(dst, src); |
||
121 | |||
122 | dst->assign_source(src); |
||
123 | n->flags |= NF_COPY_MOV | NF_DONT_HOIST; |
||
124 | |||
125 | if (affcost && dst->is_sgpr() && src->is_sgpr()) |
||
126 | coal.add_edge(src, dst, affcost); |
||
127 | |||
128 | return n; |
||
129 | } |
||
130 | |||
131 | value* shader::get_value(value_kind kind, sel_chan id, |
||
132 | unsigned version) { |
||
133 | if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count) |
||
134 | return val_pool[id - 1]; |
||
135 | |||
136 | |||
137 | unsigned key = (kind << 28) | (version << 16) | id; |
||
138 | value_map::iterator i = reg_values.find(key); |
||
139 | if (i != reg_values.end()) { |
||
140 | return i->second; |
||
141 | } |
||
142 | value *v = create_value(kind, id, version); |
||
143 | reg_values.insert(std::make_pair(key, v)); |
||
144 | return v; |
||
145 | } |
||
146 | |||
147 | value* shader::get_special_value(unsigned sv_id, unsigned version) { |
||
148 | sel_chan id(sv_id, 0); |
||
149 | return get_value(VLK_SPECIAL_REG, id, version); |
||
150 | } |
||
151 | |||
152 | void shader::fill_array_values(gpr_array *a, vvec &vv) { |
||
153 | unsigned sz = a->array_size; |
||
154 | vv.resize(sz); |
||
155 | for (unsigned i = 0; i < a->array_size; ++i) { |
||
156 | vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(), |
||
157 | false); |
||
158 | } |
||
159 | } |
||
160 | |||
161 | value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel, |
||
162 | unsigned version) { |
||
163 | sel_chan id(reg, chan); |
||
164 | value *v; |
||
165 | gpr_array *a = get_gpr_array(reg, chan); |
||
166 | if (rel) { |
||
167 | assert(a); |
||
168 | v = create_value(VLK_REL_REG, id, 0); |
||
169 | v->rel = get_special_value(SV_AR_INDEX); |
||
170 | fill_array_values(a, v->muse); |
||
171 | if (!src) |
||
172 | fill_array_values(a, v->mdef); |
||
173 | } else { |
||
174 | if (version == 0 && reg < prep_regs_count) |
||
175 | return (val_pool[id - 1]); |
||
176 | |||
177 | v = get_value(VLK_REG, id, version); |
||
178 | } |
||
179 | |||
180 | v->array = a; |
||
181 | v->pin_gpr = v->select; |
||
182 | |||
183 | return v; |
||
184 | } |
||
185 | |||
186 | value* shader::create_temp_value() { |
||
187 | sel_chan id(++next_temp_value_index, 0); |
||
188 | return get_value(VLK_TEMP, id, 0); |
||
189 | } |
||
190 | |||
191 | value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) { |
||
192 | return get_ro_value(kcache_values, VLK_KCACHE, |
||
193 | sel_chan((bank << 12) | index, chan)); |
||
194 | } |
||
195 | |||
196 | void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) { |
||
197 | if (inputs.size() <= gpr) |
||
198 | inputs.resize(gpr+1); |
||
199 | |||
200 | shader_input &i = inputs[gpr]; |
||
201 | i.preloaded = preloaded; |
||
202 | i.comp_mask = comp_mask; |
||
203 | |||
204 | if (preloaded) { |
||
205 | add_pinned_gpr_values(root->dst, gpr, comp_mask, true); |
||
206 | } |
||
207 | |||
208 | } |
||
209 | |||
210 | void shader::init() { |
||
211 | assert(!root); |
||
212 | root = create_container(); |
||
213 | } |
||
214 | |||
215 | void shader::init_call_fs(cf_node* cf) { |
||
216 | unsigned gpr = 0; |
||
217 | |||
218 | assert(target == TARGET_VS || target == TARGET_ES); |
||
219 | |||
220 | for(inputs_vec::const_iterator I = inputs.begin(), |
||
221 | E = inputs.end(); I != E; ++I, ++gpr) { |
||
222 | if (!I->preloaded) |
||
223 | add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false); |
||
224 | else |
||
225 | add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true); |
||
226 | } |
||
227 | } |
||
228 | |||
229 | void shader::set_undef(val_set& s) { |
||
230 | value *undefined = get_undef_value(); |
||
231 | if (!undefined->gvn_source) |
||
232 | vt.add_value(undefined); |
||
233 | |||
234 | val_set &vs = s; |
||
235 | |||
236 | for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) { |
||
237 | value *v = *I; |
||
238 | |||
239 | assert(!v->is_readonly() && !v->is_rel()); |
||
240 | |||
241 | v->gvn_source = undefined->gvn_source; |
||
242 | } |
||
243 | } |
||
244 | |||
245 | value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) { |
||
246 | value *v = val_pool.create(k, regid, ver); |
||
247 | return v; |
||
248 | } |
||
249 | |||
250 | value* shader::get_undef_value() { |
||
251 | if (!undef) |
||
252 | undef = create_value(VLK_UNDEF, 0, 0); |
||
253 | return undef; |
||
254 | } |
||
255 | |||
256 | node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) { |
||
257 | node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags); |
||
258 | all_nodes.push_back(n); |
||
259 | return n; |
||
260 | } |
||
261 | |||
262 | alu_node* shader::create_alu() { |
||
263 | alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node(); |
||
264 | all_nodes.push_back(n); |
||
265 | return n; |
||
266 | } |
||
267 | |||
268 | alu_group_node* shader::create_alu_group() { |
||
269 | alu_group_node* n = |
||
270 | new (pool.allocate(sizeof(alu_group_node))) alu_group_node(); |
||
271 | all_nodes.push_back(n); |
||
272 | return n; |
||
273 | } |
||
274 | |||
275 | alu_packed_node* shader::create_alu_packed() { |
||
276 | alu_packed_node* n = |
||
277 | new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node(); |
||
278 | all_nodes.push_back(n); |
||
279 | return n; |
||
280 | } |
||
281 | |||
282 | cf_node* shader::create_cf() { |
||
283 | cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node(); |
||
284 | n->bc.barrier = 1; |
||
285 | all_nodes.push_back(n); |
||
286 | return n; |
||
287 | } |
||
288 | |||
289 | fetch_node* shader::create_fetch() { |
||
290 | fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node(); |
||
291 | all_nodes.push_back(n); |
||
292 | return n; |
||
293 | } |
||
294 | |||
295 | region_node* shader::create_region() { |
||
296 | region_node *n = new (pool.allocate(sizeof(region_node))) |
||
297 | region_node(regions.size()); |
||
298 | regions.push_back(n); |
||
299 | all_nodes.push_back(n); |
||
300 | return n; |
||
301 | } |
||
302 | |||
303 | depart_node* shader::create_depart(region_node* target) { |
||
304 | depart_node* n = new (pool.allocate(sizeof(depart_node))) |
||
305 | depart_node(target, target->departs.size()); |
||
306 | target->departs.push_back(n); |
||
307 | all_nodes.push_back(n); |
||
308 | return n; |
||
309 | } |
||
310 | |||
311 | repeat_node* shader::create_repeat(region_node* target) { |
||
312 | repeat_node* n = new (pool.allocate(sizeof(repeat_node))) |
||
313 | repeat_node(target, target->repeats.size() + 1); |
||
314 | target->repeats.push_back(n); |
||
315 | all_nodes.push_back(n); |
||
316 | return n; |
||
317 | } |
||
318 | |||
319 | container_node* shader::create_container(node_type nt, node_subtype nst, |
||
320 | node_flags flags) { |
||
321 | container_node *n = new (pool.allocate(sizeof(container_node))) |
||
322 | container_node(nt, nst, flags); |
||
323 | all_nodes.push_back(n); |
||
324 | return n; |
||
325 | } |
||
326 | |||
327 | if_node* shader::create_if() { |
||
328 | if_node* n = new (pool.allocate(sizeof(if_node))) if_node(); |
||
329 | all_nodes.push_back(n); |
||
330 | return n; |
||
331 | } |
||
332 | |||
333 | bb_node* shader::create_bb(unsigned id, unsigned loop_level) { |
||
334 | bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level); |
||
335 | all_nodes.push_back(n); |
||
336 | return n; |
||
337 | } |
||
338 | |||
339 | value* shader::get_special_ro_value(unsigned sel) { |
||
340 | return get_ro_value(special_ro_values, VLK_PARAM, sel); |
||
341 | } |
||
342 | |||
343 | value* shader::get_const_value(const literal &v) { |
||
344 | value *val = get_ro_value(const_values, VLK_CONST, v); |
||
345 | val->literal_value = v; |
||
346 | return val; |
||
347 | } |
||
348 | |||
349 | shader::~shader() { |
||
350 | for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); |
||
351 | I != E; ++I) |
||
352 | (*I)->~node(); |
||
353 | |||
354 | for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end(); |
||
355 | I != E; ++I) { |
||
356 | delete *I; |
||
357 | } |
||
358 | } |
||
359 | |||
360 | void shader::dump_ir() { |
||
361 | if (ctx.dump_pass) |
||
362 | dump(*this).run(); |
||
363 | } |
||
364 | |||
365 | value* shader::get_value_version(value* v, unsigned ver) { |
||
366 | assert(!v->is_readonly() && !v->is_rel()); |
||
367 | value *vv = get_value(v->kind, v->select, ver); |
||
368 | assert(vv); |
||
369 | |||
370 | if (v->array) { |
||
371 | vv->array = v->array; |
||
372 | } |
||
373 | |||
374 | return vv; |
||
375 | } |
||
376 | |||
377 | gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) { |
||
378 | |||
379 | for (regarray_vec::iterator I = gpr_arrays.begin(), |
||
380 | E = gpr_arrays.end(); I != E; ++I) { |
||
381 | gpr_array* a = *I; |
||
382 | unsigned achan = a->base_gpr.chan(); |
||
383 | unsigned areg = a->base_gpr.sel(); |
||
384 | if (achan == chan && (reg >= areg && reg < areg+a->array_size)) |
||
385 | return a; |
||
386 | } |
||
387 | return NULL; |
||
388 | } |
||
389 | |||
390 | void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count, |
||
391 | unsigned comp_mask) { |
||
392 | unsigned chan = 0; |
||
393 | while (comp_mask) { |
||
394 | if (comp_mask & 1) { |
||
395 | gpr_array *a = new gpr_array( |
||
396 | sel_chan(gpr_start, chan), gpr_count); |
||
397 | |||
398 | SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr |
||
399 | << " [" << a->array_size << "]\n"; |
||
400 | ); |
||
401 | |||
402 | gpr_arrays.push_back(a); |
||
403 | } |
||
404 | comp_mask >>= 1; |
||
405 | ++chan; |
||
406 | } |
||
407 | } |
||
408 | |||
409 | value* shader::get_pred_sel(int sel) { |
||
410 | assert(sel == 0 || sel == 1); |
||
411 | if (!pred_sels[sel]) |
||
412 | pred_sels[sel] = get_const_value(sel); |
||
413 | |||
414 | return pred_sels[sel]; |
||
415 | } |
||
416 | |||
417 | cf_node* shader::create_cf(unsigned op) { |
||
418 | cf_node *c = create_cf(); |
||
419 | c->bc.set_op(op); |
||
420 | c->bc.barrier = 1; |
||
421 | return c; |
||
422 | } |
||
423 | |||
424 | std::string shader::get_full_target_name() { |
||
425 | std::string s = get_shader_target_name(); |
||
426 | s += "/"; |
||
427 | s += ctx.get_hw_chip_name(); |
||
428 | s += "/"; |
||
429 | s += ctx.get_hw_class_name(); |
||
430 | return s; |
||
431 | } |
||
432 | |||
433 | const char* shader::get_shader_target_name() { |
||
434 | switch (target) { |
||
435 | case TARGET_VS: return "VS"; |
||
436 | case TARGET_ES: return "ES"; |
||
437 | case TARGET_PS: return "PS"; |
||
438 | case TARGET_GS: return "GS"; |
||
439 | case TARGET_COMPUTE: return "COMPUTE"; |
||
440 | case TARGET_FETCH: return "FETCH"; |
||
441 | default: |
||
442 | return "INVALID_TARGET"; |
||
443 | } |
||
444 | } |
||
445 | |||
446 | void shader::simplify_dep_rep(node* dr) { |
||
447 | container_node *p = dr->parent; |
||
448 | if (p->is_repeat()) { |
||
449 | repeat_node *r = static_cast |
||
450 | r->target->expand_repeat(r); |
||
451 | } else if (p->is_depart()) { |
||
452 | depart_node *d = static_cast |
||
453 | d->target->expand_depart(d); |
||
454 | } |
||
455 | if (dr->next) |
||
456 | dr->parent->cut(dr->next, NULL); |
||
457 | } |
||
458 | |||
459 | |||
460 | // FIXME this is used in some places as the max non-temp gpr, |
||
461 | // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead. |
||
462 | unsigned shader::first_temp_gpr() { |
||
463 | return MAX_GPR - ctx.alu_temp_gprs; |
||
464 | } |
||
465 | |||
466 | unsigned shader::num_nontemp_gpr() { |
||
467 | return MAX_GPR - 2 * ctx.alu_temp_gprs; |
||
468 | } |
||
469 | |||
470 | void shader::set_uses_kill() { |
||
471 | if (root->src.empty()) |
||
472 | root->src.resize(1); |
||
473 | |||
474 | if (!root->src[0]) |
||
475 | root->src[0] = get_special_value(SV_VALID_MASK); |
||
476 | } |
||
477 | |||
478 | alu_node* shader::clone(alu_node* n) { |
||
479 | alu_node *c = create_alu(); |
||
480 | |||
481 | // FIXME: this may be wrong with indirect operands |
||
482 | c->src = n->src; |
||
483 | c->dst = n->dst; |
||
484 | |||
485 | c->bc = n->bc; |
||
486 | c->pred = n->pred; |
||
487 | |||
488 | return c; |
||
489 | } |
||
490 | |||
491 | void shader::collect_stats(bool opt) { |
||
492 | if (!sb_context::dump_stat) |
||
493 | return; |
||
494 | |||
495 | shader_stats &s = opt ? opt_stats : src_stats; |
||
496 | |||
497 | s.shaders = 1; |
||
498 | s.ngpr = ngpr; |
||
499 | s.nstack = nstack; |
||
500 | s.collect(root); |
||
501 | |||
502 | if (opt) |
||
503 | ctx.opt_stats.accumulate(s); |
||
504 | else |
||
505 | ctx.src_stats.accumulate(s); |
||
506 | } |
||
507 | |||
508 | value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) { |
||
509 | value_map::iterator I = vm.find(key); |
||
510 | if (I != vm.end()) |
||
511 | return I->second; |
||
512 | value *v = create_value(vk, key, 0); |
||
513 | v->flags = VLF_READONLY; |
||
514 | vm.insert(std::make_pair(key, v)); |
||
515 | return v; |
||
516 | } |
||
517 | |||
518 | void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) { |
||
519 | |||
520 | bool inside_bb = false; |
||
521 | bool last_inside_bb = true; |
||
522 | node_iterator bb_start(n->begin()), I(bb_start), E(n->end()); |
||
523 | |||
524 | for (; I != E; ++I) { |
||
525 | node *k = *I; |
||
526 | inside_bb = k->type == NT_OP; |
||
527 | |||
528 | if (inside_bb && !last_inside_bb) |
||
529 | bb_start = I; |
||
530 | else if (!inside_bb) { |
||
531 | if (last_inside_bb |
||
532 | && I->type != NT_REPEAT |
||
533 | && I->type != NT_DEPART |
||
534 | && I->type != NT_IF) { |
||
535 | bb_node *bb = create_bb(bbs.size(), loop_level); |
||
536 | bbs.push_back(bb); |
||
537 | n->insert_node_before(*bb_start, bb); |
||
538 | if (bb_start != I) |
||
539 | bb->move(bb_start, I); |
||
540 | } |
||
541 | |||
542 | if (k->is_container()) { |
||
543 | |||
544 | bool loop = false; |
||
545 | if (k->type == NT_REGION) { |
||
546 | loop = static_cast |
||
547 | } |
||
548 | |||
549 | create_bbs(static_cast |
||
550 | loop_level + loop); |
||
551 | } |
||
552 | } |
||
553 | |||
554 | if (k->type == NT_DEPART) |
||
555 | return; |
||
556 | |||
557 | last_inside_bb = inside_bb; |
||
558 | } |
||
559 | |||
560 | if (last_inside_bb) { |
||
561 | bb_node *bb = create_bb(bbs.size(), loop_level); |
||
562 | bbs.push_back(bb); |
||
563 | if (n->empty()) |
||
564 | n->push_back(bb); |
||
565 | else { |
||
566 | n->insert_node_before(*bb_start, bb); |
||
567 | if (bb_start != n->end()) |
||
568 | bb->move(bb_start, n->end()); |
||
569 | } |
||
570 | } else { |
||
571 | if (n->last && n->last->type == NT_IF) { |
||
572 | bb_node *bb = create_bb(bbs.size(), loop_level); |
||
573 | bbs.push_back(bb); |
||
574 | n->push_back(bb); |
||
575 | } |
||
576 | } |
||
577 | } |
||
578 | |||
579 | void shader::expand_bbs(bbs_vec &bbs) { |
||
580 | |||
581 | for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) { |
||
582 | bb_node *b = *I; |
||
583 | b->expand(); |
||
584 | } |
||
585 | } |
||
586 | |||
587 | sched_queue_id shader::get_queue_id(node* n) { |
||
588 | switch (n->subtype) { |
||
589 | case NST_ALU_INST: |
||
590 | case NST_ALU_PACKED_INST: |
||
591 | case NST_COPY: |
||
592 | case NST_PSI: |
||
593 | return SQ_ALU; |
||
594 | case NST_FETCH_INST: { |
||
595 | fetch_node *f = static_cast |
||
596 | if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) |
||
597 | return SQ_VTX; |
||
598 | return SQ_TEX; |
||
599 | } |
||
600 | case NST_CF_INST: |
||
601 | return SQ_CF; |
||
602 | default: |
||
603 | assert(0); |
||
604 | return SQ_NUM; |
||
605 | } |
||
606 | } |
||
607 | |||
608 | void shader_stats::collect(node *n) { |
||
609 | if (n->is_alu_inst()) |
||
610 | ++alu; |
||
611 | else if (n->is_fetch_inst()) |
||
612 | ++fetch; |
||
613 | else if (n->is_container()) { |
||
614 | container_node *c = static_cast |
||
615 | |||
616 | if (n->is_alu_group()) |
||
617 | ++alu_groups; |
||
618 | else if (n->is_alu_clause()) |
||
619 | ++alu_clauses; |
||
620 | else if (n->is_fetch_clause()) |
||
621 | ++fetch_clauses; |
||
622 | else if (n->is_cf_inst()) |
||
623 | ++cf; |
||
624 | |||
625 | if (!c->empty()) { |
||
626 | for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { |
||
627 | collect(*I); |
||
628 | } |
||
629 | } |
||
630 | } |
||
631 | } |
||
632 | |||
633 | void shader_stats::accumulate(shader_stats& s) { |
||
634 | ++shaders; |
||
635 | ndw += s.ndw; |
||
636 | ngpr += s.ngpr; |
||
637 | nstack += s.nstack; |
||
638 | |||
639 | alu += s.alu; |
||
640 | alu_groups += s.alu_groups; |
||
641 | alu_clauses += s.alu_clauses; |
||
642 | fetch += s.fetch; |
||
643 | fetch_clauses += s.fetch_clauses; |
||
644 | cf += s.cf; |
||
645 | } |
||
646 | |||
647 | void shader_stats::dump() { |
||
648 | sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack |
||
649 | << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses |
||
650 | << ", alu:" << alu << ", fetch:" << fetch |
||
651 | << ", fetch clauses:" << fetch_clauses |
||
652 | << ", cf:" << cf; |
||
653 | |||
654 | if (shaders > 1) |
||
655 | sblog << ", shaders:" << shaders; |
||
656 | |||
657 | sblog << "\n"; |
||
658 | } |
||
659 | |||
660 | static void print_diff(unsigned d1, unsigned d2) { |
||
661 | if (d1) |
||
662 | sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%"; |
||
663 | else if (d2) |
||
664 | sblog << "N/A"; |
||
665 | else |
||
666 | sblog << "0%"; |
||
667 | } |
||
668 | |||
669 | void shader_stats::dump_diff(shader_stats& s) { |
||
670 | sblog << "dw:"; print_diff(ndw, s.ndw); |
||
671 | sblog << ", gpr:" ; print_diff(ngpr, s.ngpr); |
||
672 | sblog << ", stk:" ; print_diff(nstack, s.nstack); |
||
673 | sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups); |
||
674 | sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses); |
||
675 | sblog << ", alu:" ; print_diff(alu, s.alu); |
||
676 | sblog << ", fetch:" ; print_diff(fetch, s.fetch); |
||
677 | sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses); |
||
678 | sblog << ", cf:" ; print_diff(cf, s.cf); |
||
679 | sblog << "\n"; |
||
680 | } |
||
681 | |||
682 | } // namespace r600_sb><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>=>><>>>><>><>> |