Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright (c) 2013 Rob Clark |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
21 | * SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | #ifndef IR3_H_ |
||
25 | #define IR3_H_ |
||
26 | |||
27 | #include |
||
28 | #include |
||
29 | |||
30 | #include "util/u_debug.h" |
||
31 | |||
32 | #include "instr-a3xx.h" |
||
33 | #include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ |
||
34 | |||
35 | /* low level intermediate representation of an adreno shader program */ |
||
36 | |||
37 | struct ir3; |
||
38 | struct ir3_instruction; |
||
39 | struct ir3_block; |
||
40 | |||
41 | struct ir3_info { |
||
42 | uint16_t sizedwords; |
||
43 | uint16_t instrs_count; /* expanded to account for rpt's */ |
||
44 | /* NOTE: max_reg, etc, does not include registers not touched |
||
45 | * by the shader (ie. vertex fetched via VFD_DECODE but not |
||
46 | * touched by shader) |
||
47 | */ |
||
48 | int8_t max_reg; /* highest GPR # used by shader */ |
||
49 | int8_t max_half_reg; |
||
50 | int16_t max_const; |
||
51 | }; |
||
52 | |||
53 | struct ir3_register { |
||
54 | enum { |
||
55 | IR3_REG_CONST = 0x001, |
||
56 | IR3_REG_IMMED = 0x002, |
||
57 | IR3_REG_HALF = 0x004, |
||
58 | IR3_REG_RELATIV= 0x008, |
||
59 | IR3_REG_R = 0x010, |
||
60 | /* Most instructions, it seems, can do float abs/neg but not |
||
61 | * integer. The CP pass needs to know what is intended (int or |
||
62 | * float) in order to do the right thing. For this reason the |
||
63 | * abs/neg flags are split out into float and int variants. In |
||
64 | * addition, .b (bitwise) operations, the negate is actually a |
||
65 | * bitwise not, so split that out into a new flag to make it |
||
66 | * more clear. |
||
67 | */ |
||
68 | IR3_REG_FNEG = 0x020, |
||
69 | IR3_REG_FABS = 0x040, |
||
70 | IR3_REG_SNEG = 0x080, |
||
71 | IR3_REG_SABS = 0x100, |
||
72 | IR3_REG_BNOT = 0x200, |
||
73 | IR3_REG_EVEN = 0x400, |
||
74 | IR3_REG_POS_INF= 0x800, |
||
75 | /* (ei) flag, end-input? Set on last bary, presumably to signal |
||
76 | * that the shader needs no more input: |
||
77 | */ |
||
78 | IR3_REG_EI = 0x1000, |
||
79 | /* meta-flags, for intermediate stages of IR, ie. |
||
80 | * before register assignment is done: |
||
81 | */ |
||
82 | IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */ |
||
83 | IR3_REG_IA = 0x4000, /* meta-input dst is "assigned" */ |
||
84 | IR3_REG_ADDR = 0x8000, /* register is a0.x */ |
||
85 | } flags; |
||
86 | union { |
||
87 | /* normal registers: |
||
88 | * the component is in the low two bits of the reg #, so |
||
89 | * rN.x becomes: (N << 2) | x |
||
90 | */ |
||
91 | int num; |
||
92 | /* immediate: */ |
||
93 | int32_t iim_val; |
||
94 | uint32_t uim_val; |
||
95 | float fim_val; |
||
96 | /* relative: */ |
||
97 | int offset; |
||
98 | }; |
||
99 | |||
100 | /* for IR3_REG_SSA, src registers contain ptr back to |
||
101 | * assigning instruction. |
||
102 | */ |
||
103 | struct ir3_instruction *instr; |
||
104 | |||
105 | union { |
||
106 | /* used for cat5 instructions, but also for internal/IR level |
||
107 | * tracking of what registers are read/written by an instruction. |
||
108 | * wrmask may be a bad name since it is used to represent both |
||
109 | * src and dst that touch multiple adjacent registers. |
||
110 | */ |
||
111 | unsigned wrmask; |
||
112 | /* for relative addressing, 32bits for array size is too small, |
||
113 | * but otoh we don't need to deal with disjoint sets, so instead |
||
114 | * use a simple size field (number of scalar components). |
||
115 | */ |
||
116 | unsigned size; |
||
117 | }; |
||
118 | }; |
||
119 | |||
120 | struct ir3_instruction { |
||
121 | struct ir3_block *block; |
||
122 | int category; |
||
123 | opc_t opc; |
||
124 | enum { |
||
125 | /* (sy) flag is set on first instruction, and after sample |
||
126 | * instructions (probably just on RAW hazard). |
||
127 | */ |
||
128 | IR3_INSTR_SY = 0x001, |
||
129 | /* (ss) flag is set on first instruction, and first instruction |
||
130 | * to depend on the result of "long" instructions (RAW hazard): |
||
131 | * |
||
132 | * rcp, rsq, log2, exp2, sin, cos, sqrt |
||
133 | * |
||
134 | * It seems to synchronize until all in-flight instructions are |
||
135 | * completed, for example: |
||
136 | * |
||
137 | * rsq hr1.w, hr1.w |
||
138 | * add.f hr2.z, (neg)hr2.z, hc0.y |
||
139 | * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y |
||
140 | * rsq hr2.x, hr2.x |
||
141 | * (rpt1)nop |
||
142 | * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w |
||
143 | * nop |
||
144 | * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w |
||
145 | * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w |
||
146 | * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x |
||
147 | * |
||
148 | * The last mul.f does not have (ss) set, presumably because the |
||
149 | * (ss) on the previous instruction does the job. |
||
150 | * |
||
151 | * The blob driver also seems to set it on WAR hazards, although |
||
152 | * not really clear if this is needed or just blob compiler being |
||
153 | * sloppy. So far I haven't found a case where removing the (ss) |
||
154 | * causes problems for WAR hazard, but I could just be getting |
||
155 | * lucky: |
||
156 | * |
||
157 | * rcp r1.y, r3.y |
||
158 | * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z |
||
159 | * |
||
160 | */ |
||
161 | IR3_INSTR_SS = 0x002, |
||
162 | /* (jp) flag is set on jump targets: |
||
163 | */ |
||
164 | IR3_INSTR_JP = 0x004, |
||
165 | IR3_INSTR_UL = 0x008, |
||
166 | IR3_INSTR_3D = 0x010, |
||
167 | IR3_INSTR_A = 0x020, |
||
168 | IR3_INSTR_O = 0x040, |
||
169 | IR3_INSTR_P = 0x080, |
||
170 | IR3_INSTR_S = 0x100, |
||
171 | IR3_INSTR_S2EN = 0x200, |
||
172 | /* meta-flags, for intermediate stages of IR, ie. |
||
173 | * before register assignment is done: |
||
174 | */ |
||
175 | IR3_INSTR_MARK = 0x1000, |
||
176 | } flags; |
||
177 | int repeat; |
||
178 | #ifdef DEBUG |
||
179 | unsigned regs_max; |
||
180 | #endif |
||
181 | unsigned regs_count; |
||
182 | struct ir3_register **regs; |
||
183 | union { |
||
184 | struct { |
||
185 | char inv; |
||
186 | char comp; |
||
187 | int immed; |
||
188 | } cat0; |
||
189 | struct { |
||
190 | type_t src_type, dst_type; |
||
191 | } cat1; |
||
192 | struct { |
||
193 | enum { |
||
194 | IR3_COND_LT = 0, |
||
195 | IR3_COND_LE = 1, |
||
196 | IR3_COND_GT = 2, |
||
197 | IR3_COND_GE = 3, |
||
198 | IR3_COND_EQ = 4, |
||
199 | IR3_COND_NE = 5, |
||
200 | } condition; |
||
201 | } cat2; |
||
202 | struct { |
||
203 | unsigned samp, tex; |
||
204 | type_t type; |
||
205 | } cat5; |
||
206 | struct { |
||
207 | type_t type; |
||
208 | int offset; |
||
209 | int iim_val; |
||
210 | } cat6; |
||
211 | /* for meta-instructions, just used to hold extra data |
||
212 | * before instruction scheduling, etc |
||
213 | */ |
||
214 | struct { |
||
215 | int off; /* component/offset */ |
||
216 | } fo; |
||
217 | struct { |
||
218 | int aid; |
||
219 | } fi; |
||
220 | struct { |
||
221 | struct ir3_block *if_block, *else_block; |
||
222 | } flow; |
||
223 | struct { |
||
224 | struct ir3_block *block; |
||
225 | } inout; |
||
226 | |||
227 | /* XXX keep this as big as all other union members! */ |
||
228 | uint32_t info[3]; |
||
229 | }; |
||
230 | |||
231 | /* transient values used during various algorithms: */ |
||
232 | union { |
||
233 | /* The instruction depth is the max dependency distance to output. |
||
234 | * |
||
235 | * You can also think of it as the "cost", if we did any sort of |
||
236 | * optimization for register footprint. Ie. a value that is just |
||
237 | * result of moving a const to a reg would have a low cost, so to |
||
238 | * it could make sense to duplicate the instruction at various |
||
239 | * points where the result is needed to reduce register footprint. |
||
240 | * |
||
241 | * DEPTH_UNUSED used to mark unused instructions after depth |
||
242 | * calculation pass. |
||
243 | */ |
||
244 | #define DEPTH_UNUSED ~0 |
||
245 | unsigned depth; |
||
246 | }; |
||
247 | |||
248 | /* Used during CP and RA stages. For fanin and shader inputs/ |
||
249 | * outputs where we need a sequence of consecutive registers, |
||
250 | * keep track of each src instructions left (ie 'n-1') and right |
||
251 | * (ie 'n+1') neighbor. The front-end must insert enough mov's |
||
252 | * to ensure that each instruction has at most one left and at |
||
253 | * most one right neighbor. During the copy-propagation pass, |
||
254 | * we only remove mov's when we can preserve this constraint. |
||
255 | * And during the RA stage, we use the neighbor information to |
||
256 | * allocate a block of registers in one shot. |
||
257 | * |
||
258 | * TODO: maybe just add something like: |
||
259 | * struct ir3_instruction_ref { |
||
260 | * struct ir3_instruction *instr; |
||
261 | * unsigned cnt; |
||
262 | * } |
||
263 | * |
||
264 | * Or can we get away without the refcnt stuff? It seems like |
||
265 | * it should be overkill.. the problem is if, potentially after |
||
266 | * already eliminating some mov's, if you have a single mov that |
||
267 | * needs to be grouped with it's neighbors in two different |
||
268 | * places (ex. shader output and a fanin). |
||
269 | */ |
||
270 | struct { |
||
271 | struct ir3_instruction *left, *right; |
||
272 | uint16_t left_cnt, right_cnt; |
||
273 | } cp; |
||
274 | |||
275 | /* an instruction can reference at most one address register amongst |
||
276 | * it's src/dst registers. Beyond that, you need to insert mov's. |
||
277 | */ |
||
278 | struct ir3_instruction *address; |
||
279 | |||
280 | /* in case of a instruction with relative dst instruction, we need to |
||
281 | * capture the dependency on the fanin for the previous values of |
||
282 | * the array elements. Since we don't know at compile time actually |
||
283 | * which array elements are written, this serves to preserve the |
||
284 | * unconditional write to array elements prior to the conditional |
||
285 | * write. |
||
286 | * |
||
287 | * TODO only cat1 can do indirect write.. we could maybe move this |
||
288 | * into instr->cat1.fanin (but would require the frontend to insert |
||
289 | * the extra mov) |
||
290 | */ |
||
291 | struct ir3_instruction *fanin; |
||
292 | |||
293 | struct ir3_instruction *next; |
||
294 | #ifdef DEBUG |
||
295 | uint32_t serialno; |
||
296 | #endif |
||
297 | }; |
||
298 | |||
299 | static inline struct ir3_instruction * |
||
300 | ir3_neighbor_first(struct ir3_instruction *instr) |
||
301 | { |
||
302 | while (instr->cp.left) |
||
303 | instr = instr->cp.left; |
||
304 | return instr; |
||
305 | } |
||
306 | |||
307 | static inline int ir3_neighbor_count(struct ir3_instruction *instr) |
||
308 | { |
||
309 | int num = 1; |
||
310 | |||
311 | debug_assert(!instr->cp.left); |
||
312 | |||
313 | while (instr->cp.right) { |
||
314 | num++; |
||
315 | instr = instr->cp.right; |
||
316 | } |
||
317 | |||
318 | return num; |
||
319 | } |
||
320 | |||
321 | struct ir3_heap_chunk; |
||
322 | |||
323 | struct ir3 { |
||
324 | unsigned instrs_count, instrs_sz; |
||
325 | struct ir3_instruction **instrs; |
||
326 | |||
327 | /* Track bary.f (and ldlv) instructions.. this is needed in |
||
328 | * scheduling to ensure that all varying fetches happen before |
||
329 | * any potential kill instructions. The hw gets grumpy if all |
||
330 | * threads in a group are killed before the last bary.f gets |
||
331 | * a chance to signal end of input (ei). |
||
332 | */ |
||
333 | unsigned baryfs_count, baryfs_sz; |
||
334 | struct ir3_instruction **baryfs; |
||
335 | |||
336 | /* Track all indirect instructions (read and write). To avoid |
||
337 | * deadlock scenario where an address register gets scheduled, |
||
338 | * but other dependent src instructions cannot be scheduled due |
||
339 | * to dependency on a *different* address register value, the |
||
340 | * scheduler needs to ensure that all dependencies other than |
||
341 | * the instruction other than the address register are scheduled |
||
342 | * before the one that writes the address register. Having a |
||
343 | * convenient list of instructions that reference some address |
||
344 | * register simplifies this. |
||
345 | */ |
||
346 | unsigned indirects_count, indirects_sz; |
||
347 | struct ir3_instruction **indirects; |
||
348 | |||
349 | struct ir3_block *block; |
||
350 | unsigned heap_idx; |
||
351 | struct ir3_heap_chunk *chunk; |
||
352 | }; |
||
353 | |||
354 | struct ir3_block { |
||
355 | struct ir3 *shader; |
||
356 | unsigned ntemporaries, ninputs, noutputs; |
||
357 | /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */ |
||
358 | struct ir3_instruction **temporaries; |
||
359 | struct ir3_instruction **inputs; |
||
360 | struct ir3_instruction **outputs; |
||
361 | /* only a single address register: */ |
||
362 | struct ir3_instruction *address; |
||
363 | struct ir3_block *parent; |
||
364 | struct ir3_instruction *head; |
||
365 | }; |
||
366 | |||
367 | struct ir3 * ir3_create(void); |
||
368 | void ir3_destroy(struct ir3 *shader); |
||
369 | void * ir3_assemble(struct ir3 *shader, |
||
370 | struct ir3_info *info, uint32_t gpu_id); |
||
371 | void * ir3_alloc(struct ir3 *shader, int sz); |
||
372 | |||
373 | struct ir3_block * ir3_block_create(struct ir3 *shader, |
||
374 | unsigned ntmp, unsigned nin, unsigned nout); |
||
375 | |||
376 | struct ir3_instruction * ir3_instr_create(struct ir3_block *block, |
||
377 | int category, opc_t opc); |
||
378 | struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, |
||
379 | int category, opc_t opc, int nreg); |
||
380 | struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); |
||
381 | const char *ir3_instr_name(struct ir3_instruction *instr); |
||
382 | |||
383 | struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, |
||
384 | int num, int flags); |
||
385 | |||
386 | |||
387 | static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) |
||
388 | { |
||
389 | if (instr->flags & IR3_INSTR_MARK) |
||
390 | return true; /* already visited */ |
||
391 | instr->flags |= IR3_INSTR_MARK; |
||
392 | return false; |
||
393 | } |
||
394 | |||
395 | static inline void ir3_clear_mark(struct ir3 *shader) |
||
396 | { |
||
397 | /* TODO would be nice to drop the instruction array.. for |
||
398 | * new compiler, _clear_mark() is all we use it for, and |
||
399 | * we could probably manage a linked list instead.. |
||
400 | * |
||
401 | * Also, we'll probably want to mark instructions within |
||
402 | * a block, so tracking the list of instrs globally is |
||
403 | * unlikely to be what we want. |
||
404 | */ |
||
405 | unsigned i; |
||
406 | for (i = 0; i < shader->instrs_count; i++) { |
||
407 | struct ir3_instruction *instr = shader->instrs[i]; |
||
408 | instr->flags &= ~IR3_INSTR_MARK; |
||
409 | } |
||
410 | } |
||
411 | |||
412 | static inline int ir3_instr_regno(struct ir3_instruction *instr, |
||
413 | struct ir3_register *reg) |
||
414 | { |
||
415 | unsigned i; |
||
416 | for (i = 0; i < instr->regs_count; i++) |
||
417 | if (reg == instr->regs[i]) |
||
418 | return i; |
||
419 | return -1; |
||
420 | } |
||
421 | |||
422 | |||
423 | #define MAX_ARRAYS 16 |
||
424 | |||
425 | /* comp: |
||
426 | * 0 - x |
||
427 | * 1 - y |
||
428 | * 2 - z |
||
429 | * 3 - w |
||
430 | */ |
||
431 | static inline uint32_t regid(int num, int comp) |
||
432 | { |
||
433 | return (num << 2) | (comp & 0x3); |
||
434 | } |
||
435 | |||
436 | static inline uint32_t reg_num(struct ir3_register *reg) |
||
437 | { |
||
438 | return reg->num >> 2; |
||
439 | } |
||
440 | |||
441 | static inline uint32_t reg_comp(struct ir3_register *reg) |
||
442 | { |
||
443 | return reg->num & 0x3; |
||
444 | } |
||
445 | |||
446 | static inline bool is_flow(struct ir3_instruction *instr) |
||
447 | { |
||
448 | return (instr->category == 0); |
||
449 | } |
||
450 | |||
451 | static inline bool is_kill(struct ir3_instruction *instr) |
||
452 | { |
||
453 | return is_flow(instr) && (instr->opc == OPC_KILL); |
||
454 | } |
||
455 | |||
456 | static inline bool is_nop(struct ir3_instruction *instr) |
||
457 | { |
||
458 | return is_flow(instr) && (instr->opc == OPC_NOP); |
||
459 | } |
||
460 | |||
461 | /* Is it a non-transformative (ie. not type changing) mov? This can |
||
462 | * also include absneg.s/absneg.f, which for the most part can be |
||
463 | * treated as a mov (single src argument). |
||
464 | */ |
||
465 | static inline bool is_same_type_mov(struct ir3_instruction *instr) |
||
466 | { |
||
467 | struct ir3_register *dst = instr->regs[0]; |
||
468 | |||
469 | /* mov's that write to a0.x or p0.x are special: */ |
||
470 | if (dst->num == regid(REG_P0, 0)) |
||
471 | return false; |
||
472 | if (dst->num == regid(REG_A0, 0)) |
||
473 | return false; |
||
474 | |||
475 | if ((instr->category == 1) && |
||
476 | (instr->cat1.src_type == instr->cat1.dst_type)) |
||
477 | return true; |
||
478 | if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) || |
||
479 | (instr->opc == OPC_ABSNEG_S))) |
||
480 | return true; |
||
481 | return false; |
||
482 | } |
||
483 | |||
484 | static inline bool is_alu(struct ir3_instruction *instr) |
||
485 | { |
||
486 | return (1 <= instr->category) && (instr->category <= 3); |
||
487 | } |
||
488 | |||
489 | static inline bool is_sfu(struct ir3_instruction *instr) |
||
490 | { |
||
491 | return (instr->category == 4); |
||
492 | } |
||
493 | |||
494 | static inline bool is_tex(struct ir3_instruction *instr) |
||
495 | { |
||
496 | return (instr->category == 5); |
||
497 | } |
||
498 | |||
499 | static inline bool is_mem(struct ir3_instruction *instr) |
||
500 | { |
||
501 | return (instr->category == 6); |
||
502 | } |
||
503 | |||
504 | static inline bool is_input(struct ir3_instruction *instr) |
||
505 | { |
||
506 | /* in some cases, ldlv is used to fetch varying without |
||
507 | * interpolation.. fortunately inloc is the first src |
||
508 | * register in either case |
||
509 | */ |
||
510 | if (is_mem(instr) && (instr->opc == OPC_LDLV)) |
||
511 | return true; |
||
512 | return (instr->category == 2) && (instr->opc == OPC_BARY_F); |
||
513 | } |
||
514 | |||
515 | static inline bool is_meta(struct ir3_instruction *instr) |
||
516 | { |
||
517 | /* TODO how should we count PHI (and maybe fan-in/out) which |
||
518 | * might actually contribute some instructions to the final |
||
519 | * result? |
||
520 | */ |
||
521 | return (instr->category == -1); |
||
522 | } |
||
523 | |||
524 | static inline bool writes_addr(struct ir3_instruction *instr) |
||
525 | { |
||
526 | if (instr->regs_count > 0) { |
||
527 | struct ir3_register *dst = instr->regs[0]; |
||
528 | return !!(dst->flags & IR3_REG_ADDR); |
||
529 | } |
||
530 | return false; |
||
531 | } |
||
532 | |||
533 | static inline bool writes_pred(struct ir3_instruction *instr) |
||
534 | { |
||
535 | if (instr->regs_count > 0) { |
||
536 | struct ir3_register *dst = instr->regs[0]; |
||
537 | return reg_num(dst) == REG_P0; |
||
538 | } |
||
539 | return false; |
||
540 | } |
||
541 | |||
542 | /* returns defining instruction for reg */ |
||
543 | /* TODO better name */ |
||
544 | static inline struct ir3_instruction *ssa(struct ir3_register *reg) |
||
545 | { |
||
546 | if (reg->flags & IR3_REG_SSA) |
||
547 | return reg->instr; |
||
548 | return NULL; |
||
549 | } |
||
550 | |||
551 | static inline bool conflicts(struct ir3_instruction *a, |
||
552 | struct ir3_instruction *b) |
||
553 | { |
||
554 | return (a && b) && (a != b); |
||
555 | } |
||
556 | |||
557 | static inline bool reg_gpr(struct ir3_register *r) |
||
558 | { |
||
559 | if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR)) |
||
560 | return false; |
||
561 | if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) |
||
562 | return false; |
||
563 | return true; |
||
564 | } |
||
565 | |||
566 | /* some cat2 instructions (ie. those which are not float) can embed an |
||
567 | * immediate: |
||
568 | */ |
||
569 | static inline bool ir3_cat2_int(opc_t opc) |
||
570 | { |
||
571 | switch (opc) { |
||
572 | case OPC_ADD_U: |
||
573 | case OPC_ADD_S: |
||
574 | case OPC_SUB_U: |
||
575 | case OPC_SUB_S: |
||
576 | case OPC_CMPS_U: |
||
577 | case OPC_CMPS_S: |
||
578 | case OPC_MIN_U: |
||
579 | case OPC_MIN_S: |
||
580 | case OPC_MAX_U: |
||
581 | case OPC_MAX_S: |
||
582 | case OPC_CMPV_U: |
||
583 | case OPC_CMPV_S: |
||
584 | case OPC_MUL_U: |
||
585 | case OPC_MUL_S: |
||
586 | case OPC_MULL_U: |
||
587 | case OPC_CLZ_S: |
||
588 | case OPC_ABSNEG_S: |
||
589 | case OPC_AND_B: |
||
590 | case OPC_OR_B: |
||
591 | case OPC_NOT_B: |
||
592 | case OPC_XOR_B: |
||
593 | case OPC_BFREV_B: |
||
594 | case OPC_CLZ_B: |
||
595 | case OPC_SHL_B: |
||
596 | case OPC_SHR_B: |
||
597 | case OPC_ASHR_B: |
||
598 | case OPC_MGEN_B: |
||
599 | case OPC_GETBIT_B: |
||
600 | case OPC_CBITS_B: |
||
601 | case OPC_BARY_F: |
||
602 | return true; |
||
603 | |||
604 | default: |
||
605 | return false; |
||
606 | } |
||
607 | } |
||
608 | |||
609 | |||
610 | /* map cat2 instruction to valid abs/neg flags: */ |
||
611 | static inline unsigned ir3_cat2_absneg(opc_t opc) |
||
612 | { |
||
613 | switch (opc) { |
||
614 | case OPC_ADD_F: |
||
615 | case OPC_MIN_F: |
||
616 | case OPC_MAX_F: |
||
617 | case OPC_MUL_F: |
||
618 | case OPC_SIGN_F: |
||
619 | case OPC_CMPS_F: |
||
620 | case OPC_ABSNEG_F: |
||
621 | case OPC_CMPV_F: |
||
622 | case OPC_FLOOR_F: |
||
623 | case OPC_CEIL_F: |
||
624 | case OPC_RNDNE_F: |
||
625 | case OPC_RNDAZ_F: |
||
626 | case OPC_TRUNC_F: |
||
627 | case OPC_BARY_F: |
||
628 | return IR3_REG_FABS | IR3_REG_FNEG; |
||
629 | |||
630 | case OPC_ADD_U: |
||
631 | case OPC_ADD_S: |
||
632 | case OPC_SUB_U: |
||
633 | case OPC_SUB_S: |
||
634 | case OPC_CMPS_U: |
||
635 | case OPC_CMPS_S: |
||
636 | case OPC_MIN_U: |
||
637 | case OPC_MIN_S: |
||
638 | case OPC_MAX_U: |
||
639 | case OPC_MAX_S: |
||
640 | case OPC_CMPV_U: |
||
641 | case OPC_CMPV_S: |
||
642 | case OPC_MUL_U: |
||
643 | case OPC_MUL_S: |
||
644 | case OPC_MULL_U: |
||
645 | case OPC_CLZ_S: |
||
646 | return 0; |
||
647 | |||
648 | case OPC_ABSNEG_S: |
||
649 | return IR3_REG_SABS | IR3_REG_SNEG; |
||
650 | |||
651 | case OPC_AND_B: |
||
652 | case OPC_OR_B: |
||
653 | case OPC_NOT_B: |
||
654 | case OPC_XOR_B: |
||
655 | case OPC_BFREV_B: |
||
656 | case OPC_CLZ_B: |
||
657 | case OPC_SHL_B: |
||
658 | case OPC_SHR_B: |
||
659 | case OPC_ASHR_B: |
||
660 | case OPC_MGEN_B: |
||
661 | case OPC_GETBIT_B: |
||
662 | case OPC_CBITS_B: |
||
663 | return IR3_REG_BNOT; |
||
664 | |||
665 | default: |
||
666 | return 0; |
||
667 | } |
||
668 | } |
||
669 | |||
670 | /* map cat3 instructions to valid abs/neg flags: */ |
||
671 | static inline unsigned ir3_cat3_absneg(opc_t opc) |
||
672 | { |
||
673 | switch (opc) { |
||
674 | case OPC_MAD_F16: |
||
675 | case OPC_MAD_F32: |
||
676 | case OPC_SEL_F16: |
||
677 | case OPC_SEL_F32: |
||
678 | return IR3_REG_FNEG; |
||
679 | |||
680 | case OPC_MAD_U16: |
||
681 | case OPC_MADSH_U16: |
||
682 | case OPC_MAD_S16: |
||
683 | case OPC_MADSH_M16: |
||
684 | case OPC_MAD_U24: |
||
685 | case OPC_MAD_S24: |
||
686 | case OPC_SEL_S16: |
||
687 | case OPC_SEL_S32: |
||
688 | case OPC_SAD_S16: |
||
689 | case OPC_SAD_S32: |
||
690 | /* neg *may* work on 3rd src.. */ |
||
691 | |||
692 | case OPC_SEL_B16: |
||
693 | case OPC_SEL_B32: |
||
694 | |||
695 | default: |
||
696 | return 0; |
||
697 | } |
||
698 | } |
||
699 | |||
700 | #define array_insert(arr, val) do { \ |
||
701 | if (arr ## _count == arr ## _sz) { \ |
||
702 | arr ## _sz = MAX2(2 * arr ## _sz, 16); \ |
||
703 | arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \ |
||
704 | } \ |
||
705 | arr[arr ##_count++] = val; \ |
||
706 | } while (0) |
||
707 | |||
708 | /* iterator for an instructions's sources (reg), also returns src #: */ |
||
709 | #define foreach_src_n(__srcreg, __n, __instr) \ |
||
710 | if ((__instr)->regs_count) \ |
||
711 | for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \ |
||
712 | if ((__srcreg = (__instr)->regs[__n + 1])) |
||
713 | |||
714 | /* iterator for an instructions's sources (reg): */ |
||
715 | #define foreach_src(__srcreg, __instr) \ |
||
716 | foreach_src_n(__srcreg, __i, __instr) |
||
717 | |||
718 | static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr) |
||
719 | { |
||
720 | if (instr->fanin) |
||
721 | return instr->regs_count + 2; |
||
722 | if (instr->address) |
||
723 | return instr->regs_count + 1; |
||
724 | return instr->regs_count; |
||
725 | } |
||
726 | |||
727 | static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n) |
||
728 | { |
||
729 | if (n == (instr->regs_count + 1)) |
||
730 | return instr->fanin; |
||
731 | if (n == (instr->regs_count + 0)) |
||
732 | return instr->address; |
||
733 | return ssa(instr->regs[n]); |
||
734 | } |
||
735 | |||
736 | #define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1) |
||
737 | |||
738 | /* iterator for an instruction's SSA sources (instr), also returns src #: */ |
||
739 | #define foreach_ssa_src_n(__srcinst, __n, __instr) \ |
||
740 | if ((__instr)->regs_count) \ |
||
741 | for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \ |
||
742 | if ((__srcinst = __ssa_src_n(__instr, __n + 1))) |
||
743 | |||
744 | /* iterator for an instruction's SSA sources (instr): */ |
||
745 | #define foreach_ssa_src(__srcinst, __instr) \ |
||
746 | foreach_ssa_src_n(__srcinst, __i, __instr) |
||
747 | |||
748 | |||
749 | /* dump: */ |
||
750 | #include |
||
751 | void ir3_dump(struct ir3 *shader, const char *name, |
||
752 | struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */, |
||
753 | FILE *f); |
||
754 | void ir3_dump_instr_single(struct ir3_instruction *instr); |
||
755 | void ir3_dump_instr_list(struct ir3_instruction *instr); |
||
756 | |||
757 | /* flatten if/else: */ |
||
758 | int ir3_block_flatten(struct ir3_block *block); |
||
759 | |||
760 | /* depth calculation: */ |
||
761 | int ir3_delayslots(struct ir3_instruction *assigner, |
||
762 | struct ir3_instruction *consumer, unsigned n); |
||
763 | void ir3_block_depth(struct ir3_block *block); |
||
764 | |||
765 | /* copy-propagate: */ |
||
766 | void ir3_block_cp(struct ir3_block *block); |
||
767 | |||
768 | /* group neightbors and insert mov's to resolve conflicts: */ |
||
769 | void ir3_block_group(struct ir3_block *block); |
||
770 | |||
771 | /* scheduling: */ |
||
772 | int ir3_block_sched(struct ir3_block *block); |
||
773 | |||
774 | /* register assignment: */ |
||
775 | int ir3_block_ra(struct ir3_block *block, enum shader_t type, |
||
776 | bool frag_coord, bool frag_face); |
||
777 | |||
778 | /* legalize: */ |
||
779 | void ir3_block_legalize(struct ir3_block *block, |
||
780 | bool *has_samp, int *max_bary); |
||
781 | |||
782 | /* ************************************************************************* */ |
||
783 | /* instruction helpers */ |
||
784 | |||
785 | static inline struct ir3_instruction * |
||
786 | ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) |
||
787 | { |
||
788 | struct ir3_instruction *instr = |
||
789 | ir3_instr_create(block, 1, 0); |
||
790 | ir3_reg_create(instr, 0, 0); /* dst */ |
||
791 | ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; |
||
792 | instr->cat1.src_type = type; |
||
793 | instr->cat1.dst_type = type; |
||
794 | return instr; |
||
795 | } |
||
796 | |||
797 | static inline struct ir3_instruction * |
||
798 | ir3_COV(struct ir3_block *block, struct ir3_instruction *src, |
||
799 | type_t src_type, type_t dst_type) |
||
800 | { |
||
801 | struct ir3_instruction *instr = |
||
802 | ir3_instr_create(block, 1, 0); |
||
803 | ir3_reg_create(instr, 0, 0); /* dst */ |
||
804 | ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; |
||
805 | instr->cat1.src_type = src_type; |
||
806 | instr->cat1.dst_type = dst_type; |
||
807 | return instr; |
||
808 | } |
||
809 | |||
810 | #define INSTR1(CAT, name) \ |
||
811 | static inline struct ir3_instruction * \ |
||
812 | ir3_##name(struct ir3_block *block, \ |
||
813 | struct ir3_instruction *a, unsigned aflags) \ |
||
814 | { \ |
||
815 | struct ir3_instruction *instr = \ |
||
816 | ir3_instr_create(block, CAT, OPC_##name); \ |
||
817 | ir3_reg_create(instr, 0, 0); /* dst */ \ |
||
818 | ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ |
||
819 | return instr; \ |
||
820 | } |
||
821 | |||
822 | #define INSTR2(CAT, name) \ |
||
823 | static inline struct ir3_instruction * \ |
||
824 | ir3_##name(struct ir3_block *block, \ |
||
825 | struct ir3_instruction *a, unsigned aflags, \ |
||
826 | struct ir3_instruction *b, unsigned bflags) \ |
||
827 | { \ |
||
828 | struct ir3_instruction *instr = \ |
||
829 | ir3_instr_create(block, CAT, OPC_##name); \ |
||
830 | ir3_reg_create(instr, 0, 0); /* dst */ \ |
||
831 | ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ |
||
832 | ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ |
||
833 | return instr; \ |
||
834 | } |
||
835 | |||
836 | #define INSTR3(CAT, name) \ |
||
837 | static inline struct ir3_instruction * \ |
||
838 | ir3_##name(struct ir3_block *block, \ |
||
839 | struct ir3_instruction *a, unsigned aflags, \ |
||
840 | struct ir3_instruction *b, unsigned bflags, \ |
||
841 | struct ir3_instruction *c, unsigned cflags) \ |
||
842 | { \ |
||
843 | struct ir3_instruction *instr = \ |
||
844 | ir3_instr_create(block, CAT, OPC_##name); \ |
||
845 | ir3_reg_create(instr, 0, 0); /* dst */ \ |
||
846 | ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ |
||
847 | ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ |
||
848 | ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ |
||
849 | return instr; \ |
||
850 | } |
||
851 | |||
852 | /* cat0 instructions: */ |
||
853 | INSTR1(0, KILL); |
||
854 | |||
855 | /* cat2 instructions, most 2 src but some 1 src: */ |
||
856 | INSTR2(2, ADD_F) |
||
857 | INSTR2(2, MIN_F) |
||
858 | INSTR2(2, MAX_F) |
||
859 | INSTR2(2, MUL_F) |
||
860 | INSTR1(2, SIGN_F) |
||
861 | INSTR2(2, CMPS_F) |
||
862 | INSTR1(2, ABSNEG_F) |
||
863 | INSTR2(2, CMPV_F) |
||
864 | INSTR1(2, FLOOR_F) |
||
865 | INSTR1(2, CEIL_F) |
||
866 | INSTR1(2, RNDNE_F) |
||
867 | INSTR1(2, RNDAZ_F) |
||
868 | INSTR1(2, TRUNC_F) |
||
869 | INSTR2(2, ADD_U) |
||
870 | INSTR2(2, ADD_S) |
||
871 | INSTR2(2, SUB_U) |
||
872 | INSTR2(2, SUB_S) |
||
873 | INSTR2(2, CMPS_U) |
||
874 | INSTR2(2, CMPS_S) |
||
875 | INSTR2(2, MIN_U) |
||
876 | INSTR2(2, MIN_S) |
||
877 | INSTR2(2, MAX_U) |
||
878 | INSTR2(2, MAX_S) |
||
879 | INSTR1(2, ABSNEG_S) |
||
880 | INSTR2(2, AND_B) |
||
881 | INSTR2(2, OR_B) |
||
882 | INSTR1(2, NOT_B) |
||
883 | INSTR2(2, XOR_B) |
||
884 | INSTR2(2, CMPV_U) |
||
885 | INSTR2(2, CMPV_S) |
||
886 | INSTR2(2, MUL_U) |
||
887 | INSTR2(2, MUL_S) |
||
888 | INSTR2(2, MULL_U) |
||
889 | INSTR1(2, BFREV_B) |
||
890 | INSTR1(2, CLZ_S) |
||
891 | INSTR1(2, CLZ_B) |
||
892 | INSTR2(2, SHL_B) |
||
893 | INSTR2(2, SHR_B) |
||
894 | INSTR2(2, ASHR_B) |
||
895 | INSTR2(2, BARY_F) |
||
896 | INSTR2(2, MGEN_B) |
||
897 | INSTR2(2, GETBIT_B) |
||
898 | INSTR1(2, SETRM) |
||
899 | INSTR1(2, CBITS_B) |
||
900 | INSTR2(2, SHB) |
||
901 | INSTR2(2, MSAD) |
||
902 | |||
903 | /* cat3 instructions: */ |
||
904 | INSTR3(3, MAD_U16) |
||
905 | INSTR3(3, MADSH_U16) |
||
906 | INSTR3(3, MAD_S16) |
||
907 | INSTR3(3, MADSH_M16) |
||
908 | INSTR3(3, MAD_U24) |
||
909 | INSTR3(3, MAD_S24) |
||
910 | INSTR3(3, MAD_F16) |
||
911 | INSTR3(3, MAD_F32) |
||
912 | INSTR3(3, SEL_B16) |
||
913 | INSTR3(3, SEL_B32) |
||
914 | INSTR3(3, SEL_S16) |
||
915 | INSTR3(3, SEL_S32) |
||
916 | INSTR3(3, SEL_F16) |
||
917 | INSTR3(3, SEL_F32) |
||
918 | INSTR3(3, SAD_S16) |
||
919 | INSTR3(3, SAD_S32) |
||
920 | |||
921 | /* cat4 instructions: */ |
||
922 | INSTR1(4, RCP) |
||
923 | INSTR1(4, RSQ) |
||
924 | INSTR1(4, LOG2) |
||
925 | INSTR1(4, EXP2) |
||
926 | INSTR1(4, SIN) |
||
927 | INSTR1(4, COS) |
||
928 | INSTR1(4, SQRT) |
||
929 | |||
930 | /* cat5 instructions: */ |
||
931 | INSTR1(5, DSX) |
||
932 | INSTR1(5, DSY) |
||
933 | |||
934 | static inline struct ir3_instruction * |
||
935 | ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, |
||
936 | unsigned wrmask, unsigned flags, unsigned samp, unsigned tex, |
||
937 | struct ir3_instruction *src0, struct ir3_instruction *src1) |
||
938 | { |
||
939 | struct ir3_instruction *sam; |
||
940 | struct ir3_register *reg; |
||
941 | |||
942 | sam = ir3_instr_create(block, 5, opc); |
||
943 | sam->flags |= flags; |
||
944 | ir3_reg_create(sam, 0, 0)->wrmask = wrmask; |
||
945 | if (src0) { |
||
946 | reg = ir3_reg_create(sam, 0, IR3_REG_SSA); |
||
947 | reg->wrmask = (1 << (src0->regs_count - 1)) - 1; |
||
948 | reg->instr = src0; |
||
949 | } |
||
950 | if (src1) { |
||
951 | reg = ir3_reg_create(sam, 0, IR3_REG_SSA); |
||
952 | reg->instr = src1; |
||
953 | reg->wrmask = (1 << (src1->regs_count - 1)) - 1; |
||
954 | } |
||
955 | sam->cat5.samp = samp; |
||
956 | sam->cat5.tex = tex; |
||
957 | sam->cat5.type = type; |
||
958 | |||
959 | return sam; |
||
960 | } |
||
961 | |||
962 | /* cat6 instructions: */ |
||
963 | INSTR2(6, LDLV) |
||
964 | INSTR2(6, LDG) |
||
965 | |||
966 | /* ************************************************************************* */ |
||
967 | /* split this out or find some helper to use.. like main/bitset.h.. */ |
||
968 | |||
969 | #include |
||
970 | |||
971 | #define MAX_REG 256 |
||
972 | |||
973 | typedef uint8_t regmask_t[2 * MAX_REG / 8]; |
||
974 | |||
975 | static inline unsigned regmask_idx(struct ir3_register *reg) |
||
976 | { |
||
977 | unsigned num = reg->num; |
||
978 | debug_assert(num < MAX_REG); |
||
979 | if (reg->flags & IR3_REG_HALF) |
||
980 | num += MAX_REG; |
||
981 | return num; |
||
982 | } |
||
983 | |||
984 | static inline void regmask_init(regmask_t *regmask) |
||
985 | { |
||
986 | memset(regmask, 0, sizeof(*regmask)); |
||
987 | } |
||
988 | |||
989 | static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) |
||
990 | { |
||
991 | unsigned idx = regmask_idx(reg); |
||
992 | if (reg->flags & IR3_REG_RELATIV) { |
||
993 | unsigned i; |
||
994 | for (i = 0; i < reg->size; i++, idx++) |
||
995 | (*regmask)[idx / 8] |= 1 << (idx % 8); |
||
996 | } else { |
||
997 | unsigned mask; |
||
998 | for (mask = reg->wrmask; mask; mask >>= 1, idx++) |
||
999 | if (mask & 1) |
||
1000 | (*regmask)[idx / 8] |= 1 << (idx % 8); |
||
1001 | } |
||
1002 | } |
||
1003 | |||
1004 | static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) |
||
1005 | { |
||
1006 | unsigned i; |
||
1007 | for (i = 0; i < ARRAY_SIZE(*dst); i++) |
||
1008 | (*dst)[i] = (*a)[i] | (*b)[i]; |
||
1009 | } |
||
1010 | |||
1011 | /* set bits in a if not set in b, conceptually: |
||
1012 | * a |= (reg & ~b) |
||
1013 | */ |
||
1014 | static inline void regmask_set_if_not(regmask_t *a, |
||
1015 | struct ir3_register *reg, regmask_t *b) |
||
1016 | { |
||
1017 | unsigned idx = regmask_idx(reg); |
||
1018 | if (reg->flags & IR3_REG_RELATIV) { |
||
1019 | unsigned i; |
||
1020 | for (i = 0; i < reg->size; i++, idx++) |
||
1021 | if (!((*b)[idx / 8] & (1 << (idx % 8)))) |
||
1022 | (*a)[idx / 8] |= 1 << (idx % 8); |
||
1023 | } else { |
||
1024 | unsigned mask; |
||
1025 | for (mask = reg->wrmask; mask; mask >>= 1, idx++) |
||
1026 | if (mask & 1) |
||
1027 | if (!((*b)[idx / 8] & (1 << (idx % 8)))) |
||
1028 | (*a)[idx / 8] |= 1 << (idx % 8); |
||
1029 | } |
||
1030 | } |
||
1031 | |||
1032 | static inline bool regmask_get(regmask_t *regmask, |
||
1033 | struct ir3_register *reg) |
||
1034 | { |
||
1035 | unsigned idx = regmask_idx(reg); |
||
1036 | if (reg->flags & IR3_REG_RELATIV) { |
||
1037 | unsigned i; |
||
1038 | for (i = 0; i < reg->size; i++, idx++) |
||
1039 | if ((*regmask)[idx / 8] & (1 << (idx % 8))) |
||
1040 | return true; |
||
1041 | } else { |
||
1042 | unsigned mask; |
||
1043 | for (mask = reg->wrmask; mask; mask >>= 1, idx++) |
||
1044 | if (mask & 1) |
||
1045 | if ((*regmask)[idx / 8] & (1 << (idx % 8))) |
||
1046 | return true; |
||
1047 | } |
||
1048 | return false; |
||
1049 | } |
||
1050 | |||
1051 | /* ************************************************************************* */ |
||
1052 | |||
1053 | #endif /* IR3_H_ */><>><>>><>><>><>><>>>><>><>>>><>><>>>=>=>><>>>><> |