Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2013 Vadim Girlin |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Vadim Girlin |
||
25 | */ |
||
26 | |||
27 | #ifndef SB_SHADER_H_ |
||
28 | #define SB_SHADER_H_ |
||
29 | |||
30 | #include |
||
31 | #include |
||
32 | #include |
||
33 | |||
34 | #include "sb_ir.h" |
||
35 | #include "sb_expr.h" |
||
36 | |||
37 | namespace r600_sb { |
||
38 | |||
39 | struct shader_input { |
||
40 | unsigned comp_mask; |
||
41 | unsigned preloaded; |
||
42 | }; |
||
43 | |||
44 | struct error_info { |
||
45 | node *n; |
||
46 | unsigned arg_index; |
||
47 | std::string message; |
||
48 | }; |
||
49 | |||
50 | typedef std::multimap |
||
51 | |||
52 | class sb_context; |
||
53 | |||
54 | typedef std::vector |
||
55 | typedef std::vector |
||
56 | |||
57 | struct ra_edge { |
||
58 | value *a, *b; |
||
59 | unsigned cost; |
||
60 | |||
61 | ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {} |
||
62 | }; |
||
63 | |||
64 | enum chunk_flags { |
||
65 | RCF_GLOBAL = (1 << 0), |
||
66 | RCF_PIN_CHAN = (1 << 1), |
||
67 | RCF_PIN_REG = (1 << 2), |
||
68 | |||
69 | RCF_FIXED = (1 << 3), |
||
70 | |||
71 | RCF_PREALLOC = (1 << 4) |
||
72 | }; |
||
73 | |||
74 | enum dce_flags { |
||
75 | DF_REMOVE_DEAD = (1 << 0), |
||
76 | DF_REMOVE_UNUSED = (1 << 1), |
||
77 | DF_EXPAND = (1 << 2), |
||
78 | }; |
||
79 | |||
80 | inline dce_flags operator |(dce_flags l, dce_flags r) { |
||
81 | return (dce_flags)((unsigned)l|(unsigned)r); |
||
82 | } |
||
83 | |||
84 | inline chunk_flags operator |(chunk_flags l, chunk_flags r) { |
||
85 | return (chunk_flags)((unsigned)l|(unsigned)r); |
||
86 | } |
||
87 | inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) { |
||
88 | l = l | r; |
||
89 | return l; |
||
90 | } |
||
91 | |||
92 | inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) { |
||
93 | l = (chunk_flags)((unsigned)l & (unsigned)r); |
||
94 | return l; |
||
95 | } |
||
96 | |||
97 | inline chunk_flags operator ~(chunk_flags r) { |
||
98 | return (chunk_flags)~(unsigned)r; |
||
99 | } |
||
100 | |||
101 | struct ra_chunk { |
||
102 | vvec values; |
||
103 | chunk_flags flags; |
||
104 | unsigned cost; |
||
105 | sel_chan pin; |
||
106 | |||
107 | ra_chunk() : values(), flags(), cost(), pin() {} |
||
108 | |||
109 | bool is_fixed() { return flags & RCF_FIXED; } |
||
110 | void fix() { flags |= RCF_FIXED; } |
||
111 | |||
112 | bool is_global() { return flags & RCF_GLOBAL; } |
||
113 | void set_global() { flags |= RCF_GLOBAL; } |
||
114 | |||
115 | bool is_reg_pinned() { return flags & RCF_PIN_REG; } |
||
116 | bool is_chan_pinned() { return flags & RCF_PIN_CHAN; } |
||
117 | |||
118 | bool is_prealloc() { return flags & RCF_PREALLOC; } |
||
119 | void set_prealloc() { flags |= RCF_PREALLOC; } |
||
120 | }; |
||
121 | |||
122 | typedef std::vector |
||
123 | |||
124 | class ra_constraint { |
||
125 | public: |
||
126 | ra_constraint(constraint_kind kind) : kind(kind), cost(0) {} |
||
127 | |||
128 | constraint_kind kind; |
||
129 | vvec values; |
||
130 | unsigned cost; |
||
131 | |||
132 | void update_values(); |
||
133 | bool check(); |
||
134 | }; |
||
135 | |||
136 | typedef std::vector |
||
137 | typedef std::vector |
||
138 | |||
139 | // priority queue |
||
140 | // FIXME use something more suitale or custom class ? |
||
141 | |||
142 | template |
||
143 | struct cost_compare { |
||
144 | bool operator ()(const T& t1, const T& t2) { |
||
145 | return t1->cost > t2->cost; |
||
146 | } |
||
147 | }; |
||
148 | |||
149 | template |
||
150 | class queue { |
||
151 | typedef std::vector |
||
152 | container cont; |
||
153 | |||
154 | public: |
||
155 | queue() : cont() {} |
||
156 | |||
157 | typedef typename container::iterator iterator; |
||
158 | |||
159 | iterator begin() { return cont.begin(); } |
||
160 | iterator end() { return cont.end(); } |
||
161 | |||
162 | iterator insert(const T& t) { |
||
163 | iterator I = std::upper_bound(begin(), end(), t, Comp()); |
||
164 | if (I == end()) |
||
165 | cont.push_back(t); |
||
166 | else |
||
167 | cont.insert(I, t); |
||
168 | |||
169 | return I; |
||
170 | } |
||
171 | |||
172 | void erase(const T& t) { |
||
173 | std::pair |
||
174 | std::equal_range(begin(), end(), t, Comp()); |
||
175 | iterator F = std::find(R.first, R.second, t); |
||
176 | if (F != R.second) |
||
177 | cont.erase(F); |
||
178 | } |
||
179 | }; |
||
180 | |||
181 | typedef queue |
||
182 | typedef queue |
||
183 | typedef queue |
||
184 | |||
185 | typedef std::set |
||
186 | |||
187 | class shader; |
||
188 | |||
189 | class coalescer { |
||
190 | |||
191 | shader &sh; |
||
192 | |||
193 | edge_queue edges; |
||
194 | chunk_queue chunks; |
||
195 | constraint_queue constraints; |
||
196 | |||
197 | constraint_vec all_constraints; |
||
198 | chunk_vec all_chunks; |
||
199 | |||
200 | public: |
||
201 | |||
202 | coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {} |
||
203 | ~coalescer(); |
||
204 | |||
205 | int run(); |
||
206 | |||
207 | void add_edge(value *a, value *b, unsigned cost); |
||
208 | void build_chunks(); |
||
209 | void build_constraint_queue(); |
||
210 | void build_chunk_queue(); |
||
211 | int color_constraints(); |
||
212 | void color_chunks(); |
||
213 | |||
214 | ra_constraint* create_constraint(constraint_kind kind); |
||
215 | |||
216 | enum ac_cost { |
||
217 | phi_cost = 10000, |
||
218 | copy_cost = 1, |
||
219 | }; |
||
220 | |||
221 | void dump_edges(); |
||
222 | void dump_chunks(); |
||
223 | void dump_constraint_queue(); |
||
224 | |||
225 | static void dump_chunk(ra_chunk *c); |
||
226 | static void dump_constraint(ra_constraint* c); |
||
227 | |||
228 | void get_chunk_interferences(ra_chunk *c, val_set &s); |
||
229 | |||
230 | private: |
||
231 | |||
232 | void create_chunk(value *v); |
||
233 | void unify_chunks(ra_edge *e); |
||
234 | bool chunks_interference(ra_chunk *c1, ra_chunk *c2); |
||
235 | |||
236 | int color_reg_constraint(ra_constraint *c); |
||
237 | void color_phi_constraint(ra_constraint *c); |
||
238 | |||
239 | |||
240 | void init_reg_bitset(sb_bitset &bs, val_set &vs); |
||
241 | |||
242 | void color_chunk(ra_chunk *c, sel_chan color); |
||
243 | |||
244 | ra_chunk* detach_value(value *v); |
||
245 | }; |
||
246 | |||
247 | |||
248 | |||
249 | class shader { |
||
250 | |||
251 | sb_context &ctx; |
||
252 | |||
253 | typedef sb_map |
||
254 | value_map reg_values; |
||
255 | |||
256 | // read-only values |
||
257 | value_map const_values; // immediate constants key -const value (uint32_t) |
||
258 | value_map special_ro_values; // key - hw alu_sel & chan |
||
259 | value_map kcache_values; |
||
260 | |||
261 | gpr_array_vec gpr_arrays; |
||
262 | |||
263 | unsigned next_temp_value_index; |
||
264 | |||
265 | unsigned prep_regs_count; |
||
266 | |||
267 | value* pred_sels[2]; |
||
268 | |||
269 | regions_vec regions; |
||
270 | inputs_vec inputs; |
||
271 | |||
272 | value *undef; |
||
273 | |||
274 | sb_value_pool val_pool; |
||
275 | sb_pool pool; |
||
276 | |||
277 | std::vector |
||
278 | |||
279 | public: |
||
280 | shader_stats src_stats, opt_stats; |
||
281 | |||
282 | error_map errors; |
||
283 | |||
284 | bool optimized; |
||
285 | |||
286 | unsigned id; |
||
287 | |||
288 | coalescer coal; |
||
289 | |||
290 | static const unsigned temp_regid_offset = 512; |
||
291 | |||
292 | bbs_vec bbs; |
||
293 | |||
294 | const shader_target target; |
||
295 | |||
296 | value_table vt; |
||
297 | expr_handler ex; |
||
298 | |||
299 | container_node *root; |
||
300 | |||
301 | bool compute_interferences; |
||
302 | |||
303 | bool has_alu_predication; |
||
304 | bool uses_gradients; |
||
305 | |||
306 | bool safe_math; |
||
307 | |||
308 | unsigned ngpr, nstack; |
||
309 | |||
310 | unsigned dce_flags; |
||
311 | |||
312 | shader(sb_context &sctx, shader_target t, unsigned id); |
||
313 | |||
314 | ~shader(); |
||
315 | |||
316 | sb_context &get_ctx() const { return ctx; } |
||
317 | |||
318 | value* get_const_value(const literal & v); |
||
319 | value* get_special_value(unsigned sv_id, unsigned version = 0); |
||
320 | value* create_temp_value(); |
||
321 | value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel, |
||
322 | unsigned version = 0); |
||
323 | |||
324 | |||
325 | value* get_special_ro_value(unsigned sel); |
||
326 | value* get_kcache_value(unsigned bank, unsigned index, unsigned chan); |
||
327 | |||
328 | value* get_value_version(value* v, unsigned ver); |
||
329 | |||
330 | void init(); |
||
331 | void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src); |
||
332 | |||
333 | void dump_ir(); |
||
334 | |||
335 | void add_gpr_array(unsigned gpr_start, unsigned gpr_count, |
||
336 | unsigned comp_mask); |
||
337 | |||
338 | value* get_pred_sel(int sel); |
||
339 | bool assign_slot(alu_node *n, alu_node *slots[5]); |
||
340 | |||
341 | gpr_array* get_gpr_array(unsigned reg, unsigned chan); |
||
342 | |||
343 | void add_input(unsigned gpr, bool preloaded = false, |
||
344 | unsigned comp_mask = 0xF); |
||
345 | |||
346 | const inputs_vec & get_inputs() {return inputs; } |
||
347 | |||
348 | regions_vec & get_regions() { return regions; } |
||
349 | |||
350 | void init_call_fs(cf_node *cf); |
||
351 | |||
352 | value *get_undef_value(); |
||
353 | void set_undef(val_set &s); |
||
354 | |||
355 | node* create_node(node_type nt, node_subtype nst, |
||
356 | node_flags flags = NF_EMPTY); |
||
357 | alu_node* create_alu(); |
||
358 | alu_group_node* create_alu_group(); |
||
359 | alu_packed_node* create_alu_packed(); |
||
360 | cf_node* create_cf(); |
||
361 | cf_node* create_cf(unsigned op); |
||
362 | fetch_node* create_fetch(); |
||
363 | region_node* create_region(); |
||
364 | depart_node* create_depart(region_node *target); |
||
365 | repeat_node* create_repeat(region_node *target); |
||
366 | container_node* create_container(node_type nt = NT_LIST, |
||
367 | node_subtype nst = NST_LIST, |
||
368 | node_flags flags = NF_EMPTY); |
||
369 | if_node* create_if(); |
||
370 | bb_node* create_bb(unsigned id, unsigned loop_level); |
||
371 | |||
372 | value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; } |
||
373 | |||
374 | cf_node* create_clause(node_subtype nst); |
||
375 | |||
376 | void create_bbs(); |
||
377 | void expand_bbs(); |
||
378 | |||
379 | alu_node* create_mov(value* dst, value* src); |
||
380 | alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1); |
||
381 | |||
382 | const char * get_shader_target_name(); |
||
383 | |||
384 | std::string get_full_target_name(); |
||
385 | |||
386 | void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0); |
||
387 | void expand_bbs(bbs_vec &bbs); |
||
388 | |||
389 | sched_queue_id get_queue_id(node* n); |
||
390 | |||
391 | void simplify_dep_rep(node *dr); |
||
392 | |||
393 | unsigned first_temp_gpr(); |
||
394 | unsigned num_nontemp_gpr(); |
||
395 | |||
396 | gpr_array_vec& arrays() { return gpr_arrays; } |
||
397 | |||
398 | void set_uses_kill(); |
||
399 | |||
400 | void fill_array_values(gpr_array *a, vvec &vv); |
||
401 | |||
402 | alu_node* clone(alu_node *n); |
||
403 | |||
404 | sb_value_pool& get_value_pool() { return val_pool; } |
||
405 | |||
406 | void collect_stats(bool opt); |
||
407 | |||
408 | private: |
||
409 | value* create_value(value_kind k, sel_chan regid, unsigned ver); |
||
410 | value* get_value(value_kind kind, sel_chan id, |
||
411 | unsigned version = 0); |
||
412 | value* get_ro_value(value_map &vm, value_kind vk, unsigned key); |
||
413 | }; |
||
414 | |||
415 | } |
||
416 | |||
417 | #endif /* SHADER_H_ */><>><>><>><>><>><>><>><> |