Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2011 Christoph Bumiller |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | #ifndef __NV50_IR_H__ |
||
24 | #define __NV50_IR_H__ |
||
25 | |||
26 | #include |
||
27 | #include |
||
28 | #include |
||
29 | #include |
||
30 | #include |
||
31 | #include |
||
32 | |||
33 | #include "nv50_ir_util.h" |
||
34 | #include "nv50_ir_graph.h" |
||
35 | |||
36 | #include "nv50_ir_driver.h" |
||
37 | |||
38 | namespace nv50_ir { |
||
39 | |||
40 | enum operation |
||
41 | { |
||
42 | OP_NOP = 0, |
||
43 | OP_PHI, |
||
44 | OP_UNION, // unify a new definition and several source values |
||
45 | OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced) |
||
46 | OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value |
||
47 | OP_CONSTRAINT, // copy values into consecutive registers |
||
48 | OP_MOV, // simple copy, no modifiers allowed |
||
49 | OP_LOAD, |
||
50 | OP_STORE, |
||
51 | OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds |
||
52 | OP_SUB, |
||
53 | OP_MUL, |
||
54 | OP_DIV, |
||
55 | OP_MOD, |
||
56 | OP_MAD, |
||
57 | OP_FMA, |
||
58 | OP_SAD, // abs(src0 - src1) + src2 |
||
59 | OP_ABS, |
||
60 | OP_NEG, |
||
61 | OP_NOT, |
||
62 | OP_AND, |
||
63 | OP_OR, |
||
64 | OP_XOR, |
||
65 | OP_SHL, |
||
66 | OP_SHR, |
||
67 | OP_MAX, |
||
68 | OP_MIN, |
||
69 | OP_SAT, // CLAMP(f32, 0.0, 1.0) |
||
70 | OP_CEIL, |
||
71 | OP_FLOOR, |
||
72 | OP_TRUNC, |
||
73 | OP_CVT, |
||
74 | OP_SET_AND, // dst = (src0 CMP src1) & src2 |
||
75 | OP_SET_OR, |
||
76 | OP_SET_XOR, |
||
77 | OP_SET, |
||
78 | OP_SELP, // dst = src2 ? src0 : src1 |
||
79 | OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1 |
||
80 | OP_RCP, |
||
81 | OP_RSQ, |
||
82 | OP_LG2, |
||
83 | OP_SIN, |
||
84 | OP_COS, |
||
85 | OP_EX2, |
||
86 | OP_EXP, // exponential (base M_E) |
||
87 | OP_LOG, // natural logarithm |
||
88 | OP_PRESIN, |
||
89 | OP_PREEX2, |
||
90 | OP_SQRT, |
||
91 | OP_POW, |
||
92 | OP_BRA, |
||
93 | OP_CALL, |
||
94 | OP_RET, |
||
95 | OP_CONT, |
||
96 | OP_BREAK, |
||
97 | OP_PRERET, |
||
98 | OP_PRECONT, |
||
99 | OP_PREBREAK, |
||
100 | OP_BRKPT, // breakpoint (not related to loops) |
||
101 | OP_JOINAT, // push control flow convergence point |
||
102 | OP_JOIN, // converge |
||
103 | OP_DISCARD, |
||
104 | OP_EXIT, |
||
105 | OP_MEMBAR, // memory barrier (mfence, lfence, sfence) |
||
106 | OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base |
||
107 | OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1] |
||
108 | OP_EXPORT, |
||
109 | OP_LINTERP, |
||
110 | OP_PINTERP, |
||
111 | OP_EMIT, // emit vertex |
||
112 | OP_RESTART, // restart primitive |
||
113 | OP_TEX, |
||
114 | OP_TXB, // texture bias |
||
115 | OP_TXL, // texure lod |
||
116 | OP_TXF, // texel fetch |
||
117 | OP_TXQ, // texture size query |
||
118 | OP_TXD, // texture derivatives |
||
119 | OP_TXG, // texture gather |
||
120 | OP_TEXCSAA, // texture op for coverage sampling |
||
121 | OP_TEXPREP, // turn cube map array into 2d array coordinates |
||
122 | OP_SULDB, // surface load (raw) |
||
123 | OP_SULDP, // surface load (formatted) |
||
124 | OP_SUSTB, // surface store (raw) |
||
125 | OP_SUSTP, // surface store (formatted) |
||
126 | OP_SUREDB, |
||
127 | OP_SUREDP, // surface reduction (atomic op) |
||
128 | OP_SULEA, // surface load effective address |
||
129 | OP_SUBFM, // surface bitfield manipulation |
||
130 | OP_SUCLAMP, // clamp surface coordinates |
||
131 | OP_SUEAU, // surface effective address |
||
132 | OP_MADSP, // special integer multiply-add |
||
133 | OP_TEXBAR, // texture dependency barrier |
||
134 | OP_DFDX, |
||
135 | OP_DFDY, |
||
136 | OP_RDSV, // read system value |
||
137 | OP_WRSV, // write system value |
||
138 | OP_QUADOP, |
||
139 | OP_QUADON, |
||
140 | OP_QUADPOP, |
||
141 | OP_POPCNT, // bitcount(src0 & src1) |
||
142 | OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] |
||
143 | OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK |
||
144 | OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) |
||
145 | OP_ATOM, |
||
146 | OP_BAR, // execution barrier, sources = { id, thread count, predicate } |
||
147 | OP_VADD, // byte/word vector operations |
||
148 | OP_VAVG, |
||
149 | OP_VMIN, |
||
150 | OP_VMAX, |
||
151 | OP_VSAD, |
||
152 | OP_VSET, |
||
153 | OP_VSHR, |
||
154 | OP_VSHL, |
||
155 | OP_VSEL, |
||
156 | OP_CCTL, // cache control |
||
157 | OP_LAST |
||
158 | }; |
||
159 | |||
160 | // various instruction-specific modifier definitions Instruction::subOp |
||
161 | // MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs) |
||
162 | #define NV50_IR_SUBOP_MUL_HIGH 1 |
||
163 | #define NV50_IR_SUBOP_EMIT_RESTART 1 |
||
164 | #define NV50_IR_SUBOP_LDC_IL 1 |
||
165 | #define NV50_IR_SUBOP_LDC_IS 2 |
||
166 | #define NV50_IR_SUBOP_LDC_ISL 3 |
||
167 | #define NV50_IR_SUBOP_SHIFT_WRAP 1 |
||
168 | #define NV50_IR_SUBOP_EMU_PRERET 1 |
||
169 | #define NV50_IR_SUBOP_TEXBAR(n) n |
||
170 | #define NV50_IR_SUBOP_MOV_FINAL 1 |
||
171 | #define NV50_IR_SUBOP_EXTBF_REV 1 |
||
172 | #define NV50_IR_SUBOP_PERMT_F4E 1 |
||
173 | #define NV50_IR_SUBOP_PERMT_B4E 2 |
||
174 | #define NV50_IR_SUBOP_PERMT_RC8 3 |
||
175 | #define NV50_IR_SUBOP_PERMT_ECL 4 |
||
176 | #define NV50_IR_SUBOP_PERMT_ECR 5 |
||
177 | #define NV50_IR_SUBOP_PERMT_RC16 6 |
||
178 | #define NV50_IR_SUBOP_BAR_SYNC 0 |
||
179 | #define NV50_IR_SUBOP_BAR_ARRIVE 1 |
||
180 | #define NV50_IR_SUBOP_BAR_RED_AND 2 |
||
181 | #define NV50_IR_SUBOP_BAR_RED_OR 3 |
||
182 | #define NV50_IR_SUBOP_BAR_RED_POPC 4 |
||
183 | #define NV50_IR_SUBOP_MEMBAR_L 1 |
||
184 | #define NV50_IR_SUBOP_MEMBAR_S 2 |
||
185 | #define NV50_IR_SUBOP_MEMBAR_M 3 |
||
186 | #define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2) |
||
187 | #define NV50_IR_SUBOP_MEMBAR_GL (1 << 2) |
||
188 | #define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2) |
||
189 | #define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3) |
||
190 | #define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3) |
||
191 | #define NV50_IR_SUBOP_MEMBAR(d,s) \ |
||
192 | (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s) |
||
193 | #define NV50_IR_SUBOP_ATOM_ADD 0 |
||
194 | #define NV50_IR_SUBOP_ATOM_MIN 1 |
||
195 | #define NV50_IR_SUBOP_ATOM_MAX 2 |
||
196 | #define NV50_IR_SUBOP_ATOM_INC 3 |
||
197 | #define NV50_IR_SUBOP_ATOM_DEC 4 |
||
198 | #define NV50_IR_SUBOP_ATOM_AND 5 |
||
199 | #define NV50_IR_SUBOP_ATOM_OR 6 |
||
200 | #define NV50_IR_SUBOP_ATOM_XOR 7 |
||
201 | #define NV50_IR_SUBOP_ATOM_CAS 8 |
||
202 | #define NV50_IR_SUBOP_ATOM_EXCH 9 |
||
203 | #define NV50_IR_SUBOP_CCTL_IV 5 |
||
204 | #define NV50_IR_SUBOP_CCTL_IVALL 6 |
||
205 | #define NV50_IR_SUBOP_SUST_IGN 0 |
||
206 | #define NV50_IR_SUBOP_SUST_TRAP 1 |
||
207 | #define NV50_IR_SUBOP_SUST_SDCL 3 |
||
208 | #define NV50_IR_SUBOP_SULD_ZERO 0 |
||
209 | #define NV50_IR_SUBOP_SULD_TRAP 1 |
||
210 | #define NV50_IR_SUBOP_SULD_SDCL 3 |
||
211 | #define NV50_IR_SUBOP_SUBFM_3D 1 |
||
212 | #define NV50_IR_SUBOP_SUCLAMP_2D 0x10 |
||
213 | #define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0)) |
||
214 | #define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0)) |
||
215 | #define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0)) |
||
216 | #define NV50_IR_SUBOP_MADSP_SD 0xffff |
||
217 | // Yes, we could represent those with DataType. |
||
218 | // Or put the type into operation and have a couple 1000 values in that enum. |
||
219 | // This will have to do for now. |
||
220 | // The bitfields are supposed to correspond to nve4 ISA. |
||
221 | #define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a)) |
||
222 | #define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000) |
||
223 | #define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000) |
||
224 | #define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000) |
||
225 | #define NV50_IR_SUBOP_Vn(n) ((n) >> 14) |
||
226 | |||
227 | enum DataType |
||
228 | { |
||
229 | TYPE_NONE, |
||
230 | TYPE_U8, |
||
231 | TYPE_S8, |
||
232 | TYPE_U16, |
||
233 | TYPE_S16, |
||
234 | TYPE_U32, |
||
235 | TYPE_S32, |
||
236 | TYPE_U64, // 64 bit operations are only lowered after register allocation |
||
237 | TYPE_S64, |
||
238 | TYPE_F16, |
||
239 | TYPE_F32, |
||
240 | TYPE_F64, |
||
241 | TYPE_B96, |
||
242 | TYPE_B128 |
||
243 | }; |
||
244 | |||
245 | enum CondCode |
||
246 | { |
||
247 | CC_FL = 0, |
||
248 | CC_NEVER = CC_FL, // when used with FILE_FLAGS |
||
249 | CC_LT = 1, |
||
250 | CC_EQ = 2, |
||
251 | CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE |
||
252 | CC_LE = 3, |
||
253 | CC_GT = 4, |
||
254 | CC_NE = 5, |
||
255 | CC_P = CC_NE, |
||
256 | CC_GE = 6, |
||
257 | CC_TR = 7, |
||
258 | CC_ALWAYS = CC_TR, |
||
259 | CC_U = 8, |
||
260 | CC_LTU = 9, |
||
261 | CC_EQU = 10, |
||
262 | CC_LEU = 11, |
||
263 | CC_GTU = 12, |
||
264 | CC_NEU = 13, |
||
265 | CC_GEU = 14, |
||
266 | CC_NO = 0x10, |
||
267 | CC_NC = 0x11, |
||
268 | CC_NS = 0x12, |
||
269 | CC_NA = 0x13, |
||
270 | CC_A = 0x14, |
||
271 | CC_S = 0x15, |
||
272 | CC_C = 0x16, |
||
273 | CC_O = 0x17 |
||
274 | }; |
||
275 | |||
276 | enum RoundMode |
||
277 | { |
||
278 | ROUND_N, // nearest |
||
279 | ROUND_M, // towards -inf |
||
280 | ROUND_Z, // towards 0 |
||
281 | ROUND_P, // towards +inf |
||
282 | ROUND_NI, // nearest integer |
||
283 | ROUND_MI, // to integer towards -inf |
||
284 | ROUND_ZI, // to integer towards 0 |
||
285 | ROUND_PI, // to integer towards +inf |
||
286 | }; |
||
287 | |||
288 | enum CacheMode |
||
289 | { |
||
290 | CACHE_CA, // cache at all levels |
||
291 | CACHE_WB = CACHE_CA, // cache write back |
||
292 | CACHE_CG, // cache at global level |
||
293 | CACHE_CS, // cache streaming |
||
294 | CACHE_CV, // cache as volatile |
||
295 | CACHE_WT = CACHE_CV // cache write-through |
||
296 | }; |
||
297 | |||
298 | enum DataFile |
||
299 | { |
||
300 | FILE_NULL = 0, |
||
301 | FILE_GPR, |
||
302 | FILE_PREDICATE, // boolean predicate |
||
303 | FILE_FLAGS, // zero/sign/carry/overflow bits |
||
304 | FILE_ADDRESS, |
||
305 | LAST_REGISTER_FILE = FILE_ADDRESS, |
||
306 | FILE_IMMEDIATE, |
||
307 | FILE_MEMORY_CONST, |
||
308 | FILE_SHADER_INPUT, |
||
309 | FILE_SHADER_OUTPUT, |
||
310 | FILE_MEMORY_GLOBAL, |
||
311 | FILE_MEMORY_SHARED, |
||
312 | FILE_MEMORY_LOCAL, |
||
313 | FILE_SYSTEM_VALUE, |
||
314 | DATA_FILE_COUNT |
||
315 | }; |
||
316 | |||
317 | enum TexTarget |
||
318 | { |
||
319 | TEX_TARGET_1D, |
||
320 | TEX_TARGET_2D, |
||
321 | TEX_TARGET_2D_MS, |
||
322 | TEX_TARGET_3D, |
||
323 | TEX_TARGET_CUBE, |
||
324 | TEX_TARGET_1D_SHADOW, |
||
325 | TEX_TARGET_2D_SHADOW, |
||
326 | TEX_TARGET_CUBE_SHADOW, |
||
327 | TEX_TARGET_1D_ARRAY, |
||
328 | TEX_TARGET_2D_ARRAY, |
||
329 | TEX_TARGET_2D_MS_ARRAY, |
||
330 | TEX_TARGET_CUBE_ARRAY, |
||
331 | TEX_TARGET_1D_ARRAY_SHADOW, |
||
332 | TEX_TARGET_2D_ARRAY_SHADOW, |
||
333 | TEX_TARGET_RECT, |
||
334 | TEX_TARGET_RECT_SHADOW, |
||
335 | TEX_TARGET_CUBE_ARRAY_SHADOW, |
||
336 | TEX_TARGET_BUFFER, |
||
337 | TEX_TARGET_COUNT |
||
338 | }; |
||
339 | |||
340 | enum SVSemantic |
||
341 | { |
||
342 | SV_POSITION, // WPOS |
||
343 | SV_VERTEX_ID, |
||
344 | SV_INSTANCE_ID, |
||
345 | SV_INVOCATION_ID, |
||
346 | SV_PRIMITIVE_ID, |
||
347 | SV_VERTEX_COUNT, // gl_PatchVerticesIn |
||
348 | SV_LAYER, |
||
349 | SV_VIEWPORT_INDEX, |
||
350 | SV_YDIR, |
||
351 | SV_FACE, |
||
352 | SV_POINT_SIZE, |
||
353 | SV_POINT_COORD, |
||
354 | SV_CLIP_DISTANCE, |
||
355 | SV_SAMPLE_INDEX, |
||
356 | SV_TESS_FACTOR, |
||
357 | SV_TESS_COORD, |
||
358 | SV_TID, |
||
359 | SV_CTAID, |
||
360 | SV_NTID, |
||
361 | SV_GRIDID, |
||
362 | SV_NCTAID, |
||
363 | SV_LANEID, |
||
364 | SV_PHYSID, |
||
365 | SV_NPHYSID, |
||
366 | SV_CLOCK, |
||
367 | SV_LBASE, |
||
368 | SV_SBASE, |
||
369 | SV_UNDEFINED, |
||
370 | SV_LAST |
||
371 | }; |
||
372 | |||
373 | class Program; |
||
374 | class Function; |
||
375 | class BasicBlock; |
||
376 | |||
377 | class Target; |
||
378 | |||
379 | class Instruction; |
||
380 | class CmpInstruction; |
||
381 | class TexInstruction; |
||
382 | class FlowInstruction; |
||
383 | |||
384 | class Value; |
||
385 | class LValue; |
||
386 | class Symbol; |
||
387 | class ImmediateValue; |
||
388 | |||
389 | struct Storage |
||
390 | { |
||
391 | DataFile file; |
||
392 | int8_t fileIndex; // signed, may be indirect for CONST[] |
||
393 | uint8_t size; // this should match the Instruction type's size |
||
394 | DataType type; // mainly for pretty printing |
||
395 | union { |
||
396 | uint64_t u64; // immediate values |
||
397 | uint32_t u32; |
||
398 | uint16_t u16; |
||
399 | uint8_t u8; |
||
400 | int64_t s64; |
||
401 | int32_t s32; |
||
402 | int16_t s16; |
||
403 | int8_t s8; |
||
404 | float f32; |
||
405 | double f64; |
||
406 | int32_t offset; // offset from 0 (base of address space) |
||
407 | int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4) |
||
408 | struct { |
||
409 | SVSemantic sv; |
||
410 | int index; |
||
411 | } sv; |
||
412 | } data; |
||
413 | }; |
||
414 | |||
415 | // precedence: NOT after SAT after NEG after ABS |
||
416 | #define NV50_IR_MOD_ABS (1 << 0) |
||
417 | #define NV50_IR_MOD_NEG (1 << 1) |
||
418 | #define NV50_IR_MOD_SAT (1 << 2) |
||
419 | #define NV50_IR_MOD_NOT (1 << 3) |
||
420 | #define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS) |
||
421 | |||
422 | #define NV50_IR_INTERP_MODE_MASK 0x3 |
||
423 | #define NV50_IR_INTERP_LINEAR (0 << 0) |
||
424 | #define NV50_IR_INTERP_PERSPECTIVE (1 << 0) |
||
425 | #define NV50_IR_INTERP_FLAT (2 << 0) |
||
426 | #define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ? |
||
427 | #define NV50_IR_INTERP_SAMPLE_MASK 0xc |
||
428 | #define NV50_IR_INTERP_DEFAULT (0 << 2) |
||
429 | #define NV50_IR_INTERP_CENTROID (1 << 2) |
||
430 | #define NV50_IR_INTERP_OFFSET (2 << 2) |
||
431 | #define NV50_IR_INTERP_SAMPLEID (3 << 2) |
||
432 | |||
433 | // do we really want this to be a class ? |
||
434 | class Modifier |
||
435 | { |
||
436 | public: |
||
437 | Modifier() : bits(0) { } |
||
438 | Modifier(unsigned int m) : bits(m) { } |
||
439 | Modifier(operation op); |
||
440 | |||
441 | // @return new Modifier applying a after b (asserts if unrepresentable) |
||
442 | Modifier operator*(const Modifier) const; |
||
443 | Modifier operator*=(const Modifier m) { *this = *this * m; return *this; } |
||
444 | Modifier operator==(const Modifier m) const { return m.bits == bits; } |
||
445 | Modifier operator!=(const Modifier m) const { return m.bits != bits; } |
||
446 | |||
447 | inline Modifier operator&(const Modifier m) const { return bits & m.bits; } |
||
448 | inline Modifier operator|(const Modifier m) const { return bits | m.bits; } |
||
449 | inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; } |
||
450 | |||
451 | operation getOp() const; |
||
452 | |||
453 | inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; } |
||
454 | inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; } |
||
455 | |||
456 | inline operator bool() const { return bits ? true : false; } |
||
457 | |||
458 | void applyTo(ImmediateValue &imm) const; |
||
459 | |||
460 | int print(char *buf, size_t size) const; |
||
461 | |||
462 | private: |
||
463 | uint8_t bits; |
||
464 | }; |
||
465 | |||
466 | class ValueRef |
||
467 | { |
||
468 | public: |
||
469 | ValueRef(Value * = NULL); |
||
470 | ValueRef(const ValueRef&); |
||
471 | ~ValueRef(); |
||
472 | |||
473 | inline bool exists() const { return value != NULL; } |
||
474 | |||
475 | void set(Value *); |
||
476 | void set(const ValueRef&); |
||
477 | inline Value *get() const { return value; } |
||
478 | inline Value *rep() const; |
||
479 | |||
480 | inline Instruction *getInsn() const { return insn; } |
||
481 | inline void setInsn(Instruction *inst) { insn = inst; } |
||
482 | |||
483 | inline bool isIndirect(int dim) const { return indirect[dim] >= 0; } |
||
484 | inline const ValueRef *getIndirect(int dim) const; |
||
485 | |||
486 | inline DataFile getFile() const; |
||
487 | inline unsigned getSize() const; |
||
488 | |||
489 | // SSA: return eventual (traverse MOVs) literal value, if it exists |
||
490 | bool getImmediate(ImmediateValue&) const; |
||
491 | |||
492 | public: |
||
493 | Modifier mod; |
||
494 | int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i]) |
||
495 | uint8_t swizzle; |
||
496 | |||
497 | bool usedAsPtr; // for printing |
||
498 | |||
499 | private: |
||
500 | Value *value; |
||
501 | Instruction *insn; |
||
502 | }; |
||
503 | |||
504 | class ValueDef |
||
505 | { |
||
506 | public: |
||
507 | ValueDef(Value * = NULL); |
||
508 | ValueDef(const ValueDef&); |
||
509 | ~ValueDef(); |
||
510 | |||
511 | inline bool exists() const { return value != NULL; } |
||
512 | |||
513 | inline Value *get() const { return value; } |
||
514 | inline Value *rep() const; |
||
515 | void set(Value *); |
||
516 | bool mayReplace(const ValueRef &); |
||
517 | void replace(const ValueRef &, bool doSet); // replace all uses of the old value |
||
518 | |||
519 | inline Instruction *getInsn() const { return insn; } |
||
520 | inline void setInsn(Instruction *inst) { insn = inst; } |
||
521 | |||
522 | inline DataFile getFile() const; |
||
523 | inline unsigned getSize() const; |
||
524 | |||
525 | inline void setSSA(LValue *); |
||
526 | inline const LValue *preSSA() const; |
||
527 | |||
528 | private: |
||
529 | Value *value; // should make this LValue * ... |
||
530 | LValue *origin; // pre SSA value |
||
531 | Instruction *insn; |
||
532 | }; |
||
533 | |||
534 | class Value |
||
535 | { |
||
536 | public: |
||
537 | Value(); |
||
538 | virtual ~Value() { } |
||
539 | |||
540 | virtual Value *clone(ClonePolicy |
||
541 | |||
542 | virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0; |
||
543 | |||
544 | virtual bool equals(const Value *, bool strict = false) const; |
||
545 | virtual bool interfers(const Value *) const; |
||
546 | virtual bool isUniform() const { return true; } |
||
547 | |||
548 | inline Value *rep() const { return join; } |
||
549 | |||
550 | inline Instruction *getUniqueInsn() const; |
||
551 | inline Instruction *getInsn() const; // use when uniqueness is certain |
||
552 | |||
553 | inline int refCount() { return uses.size(); } |
||
554 | |||
555 | inline LValue *asLValue(); |
||
556 | inline Symbol *asSym(); |
||
557 | inline ImmediateValue *asImm(); |
||
558 | inline const Symbol *asSym() const; |
||
559 | inline const ImmediateValue *asImm() const; |
||
560 | |||
561 | inline bool inFile(DataFile f) { return reg.file == f; } |
||
562 | |||
563 | static inline Value *get(Iterator&); |
||
564 | |||
565 | std::list |
||
566 | std::list |
||
567 | typedef std::list |
||
568 | typedef std::list |
||
569 | typedef std::list |
||
570 | typedef std::list |
||
571 | |||
572 | int id; |
||
573 | Storage reg; |
||
574 | |||
575 | // TODO: these should be in LValue: |
||
576 | Interval livei; |
||
577 | Value *join; |
||
578 | }; |
||
579 | |||
580 | class LValue : public Value |
||
581 | { |
||
582 | public: |
||
583 | LValue(Function *, DataFile file); |
||
584 | LValue(Function *, LValue *); |
||
585 | ~LValue() { } |
||
586 | |||
587 | virtual bool isUniform() const; |
||
588 | |||
589 | virtual LValue *clone(ClonePolicy |
||
590 | |||
591 | virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; |
||
592 | |||
593 | public: |
||
594 | unsigned compMask : 8; // compound/component mask |
||
595 | unsigned compound : 1; // used by RA, value involved in split/merge |
||
596 | unsigned ssa : 1; |
||
597 | unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0) |
||
598 | unsigned noSpill : 1; // do not spill (e.g. if spill temporary already) |
||
599 | }; |
||
600 | |||
601 | class Symbol : public Value |
||
602 | { |
||
603 | public: |
||
604 | Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0); |
||
605 | ~Symbol() { } |
||
606 | |||
607 | virtual Symbol *clone(ClonePolicy |
||
608 | |||
609 | virtual bool equals(const Value *that, bool strict) const; |
||
610 | |||
611 | virtual bool isUniform() const; |
||
612 | |||
613 | virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; |
||
614 | |||
615 | // print with indirect values |
||
616 | int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const; |
||
617 | |||
618 | inline void setFile(DataFile file, ubyte fileIndex = 0) |
||
619 | { |
||
620 | reg.file = file; |
||
621 | reg.fileIndex = fileIndex; |
||
622 | } |
||
623 | |||
624 | inline void setOffset(int32_t offset); |
||
625 | inline void setAddress(Symbol *base, int32_t offset); |
||
626 | inline void setSV(SVSemantic sv, uint32_t idx = 0); |
||
627 | |||
628 | inline const Symbol *getBase() const { return baseSym; } |
||
629 | |||
630 | private: |
||
631 | Symbol *baseSym; // array base for Symbols representing array elements |
||
632 | }; |
||
633 | |||
634 | class ImmediateValue : public Value |
||
635 | { |
||
636 | public: |
||
637 | ImmediateValue() { } |
||
638 | ImmediateValue(Program *, uint32_t); |
||
639 | ImmediateValue(Program *, float); |
||
640 | ImmediateValue(Program *, double); |
||
641 | // NOTE: not added to program with |
||
642 | ImmediateValue(const ImmediateValue *, DataType ty); |
||
643 | ~ImmediateValue() { }; |
||
644 | |||
645 | virtual ImmediateValue *clone(ClonePolicy |
||
646 | |||
647 | virtual bool equals(const Value *that, bool strict) const; |
||
648 | |||
649 | // these only work if 'type' is valid (we mostly use untyped literals): |
||
650 | bool isInteger(const int ival) const; // ival is cast to this' type |
||
651 | bool isNegative() const; |
||
652 | bool isPow2() const; |
||
653 | |||
654 | void applyLog2(); |
||
655 | |||
656 | // for constant folding: |
||
657 | ImmediateValue operator+(const ImmediateValue&) const; |
||
658 | ImmediateValue operator-(const ImmediateValue&) const; |
||
659 | ImmediateValue operator*(const ImmediateValue&) const; |
||
660 | ImmediateValue operator/(const ImmediateValue&) const; |
||
661 | |||
662 | ImmediateValue& operator=(const ImmediateValue&); // only sets value ! |
||
663 | |||
664 | bool compare(CondCode cc, float fval) const; |
||
665 | |||
666 | virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; |
||
667 | }; |
||
668 | |||
669 | class Instruction |
||
670 | { |
||
671 | public: |
||
672 | Instruction(); |
||
673 | Instruction(Function *, operation, DataType); |
||
674 | virtual ~Instruction(); |
||
675 | |||
676 | virtual Instruction *clone(ClonePolicy |
||
677 | Instruction * = NULL) const; |
||
678 | |||
679 | void setDef(int i, Value *); |
||
680 | void setSrc(int s, Value *); |
||
681 | void setSrc(int s, const ValueRef&); |
||
682 | void swapSources(int a, int b); |
||
683 | void moveSources(int s, int delta); |
||
684 | bool setIndirect(int s, int dim, Value *); |
||
685 | |||
686 | inline ValueRef& src(int s) { return srcs[s]; } |
||
687 | inline ValueDef& def(int s) { return defs[s]; } |
||
688 | inline const ValueRef& src(int s) const { return srcs[s]; } |
||
689 | inline const ValueDef& def(int s) const { return defs[s]; } |
||
690 | |||
691 | inline Value *getDef(int d) const { return defs[d].get(); } |
||
692 | inline Value *getSrc(int s) const { return srcs[s].get(); } |
||
693 | inline Value *getIndirect(int s, int dim) const; |
||
694 | |||
695 | inline bool defExists(unsigned d) const |
||
696 | { |
||
697 | return d < defs.size() && defs[d].exists(); |
||
698 | } |
||
699 | inline bool srcExists(unsigned s) const |
||
700 | { |
||
701 | return s < srcs.size() && srcs[s].exists(); |
||
702 | } |
||
703 | |||
704 | inline bool constrainedDefs() const; |
||
705 | |||
706 | bool setPredicate(CondCode ccode, Value *); |
||
707 | inline Value *getPredicate() const; |
||
708 | bool writesPredicate() const; |
||
709 | inline bool isPredicated() const { return predSrc >= 0; } |
||
710 | |||
711 | inline void setFlagsSrc(int s, Value *); |
||
712 | inline void setFlagsDef(int d, Value *); |
||
713 | inline bool usesFlags() const { return flagsSrc >= 0; } |
||
714 | |||
715 | unsigned int defCount() const { return defs.size(); }; |
||
716 | unsigned int defCount(unsigned int mask, bool singleFile = false) const; |
||
717 | unsigned int srcCount() const { return srcs.size(); }; |
||
718 | unsigned int srcCount(unsigned int mask, bool singleFile = false) const; |
||
719 | |||
720 | // save & remove / set indirect[0,1] and predicate source |
||
721 | void takeExtraSources(int s, Value *[3]); |
||
722 | void putExtraSources(int s, Value *[3]); |
||
723 | |||
724 | inline void setType(DataType type) { dType = sType = type; } |
||
725 | |||
726 | inline void setType(DataType dtype, DataType stype) |
||
727 | { |
||
728 | dType = dtype; |
||
729 | sType = stype; |
||
730 | } |
||
731 | |||
732 | inline bool isPseudo() const { return op < OP_MOV; } |
||
733 | bool isDead() const; |
||
734 | bool isNop() const; |
||
735 | bool isCommutationLegal(const Instruction *) const; // must be adjacent ! |
||
736 | bool isActionEqual(const Instruction *) const; |
||
737 | bool isResultEqual(const Instruction *) const; |
||
738 | |||
739 | void print() const; |
||
740 | |||
741 | inline CmpInstruction *asCmp(); |
||
742 | inline TexInstruction *asTex(); |
||
743 | inline FlowInstruction *asFlow(); |
||
744 | inline const TexInstruction *asTex() const; |
||
745 | inline const CmpInstruction *asCmp() const; |
||
746 | inline const FlowInstruction *asFlow() const; |
||
747 | |||
748 | public: |
||
749 | Instruction *next; |
||
750 | Instruction *prev; |
||
751 | int id; |
||
752 | int serial; // CFG order |
||
753 | |||
754 | operation op; |
||
755 | DataType dType; // destination or defining type |
||
756 | DataType sType; // source or secondary type |
||
757 | CondCode cc; |
||
758 | RoundMode rnd; |
||
759 | CacheMode cache; |
||
760 | |||
761 | uint16_t subOp; // quadop, 1 for mul-high, etc. |
||
762 | |||
763 | unsigned encSize : 4; // encoding size in bytes |
||
764 | unsigned saturate : 1; // to [0.0f, 1.0f] |
||
765 | unsigned join : 1; // converge control flow (use OP_JOIN until end) |
||
766 | unsigned fixed : 1; // prevent dead code elimination |
||
767 | unsigned terminator : 1; // end of basic block |
||
768 | unsigned ftz : 1; // flush denormal to zero |
||
769 | unsigned dnz : 1; // denormals, NaN are zero |
||
770 | unsigned ipa : 4; // interpolation mode |
||
771 | unsigned lanes : 4; |
||
772 | unsigned perPatch : 1; |
||
773 | unsigned exit : 1; // terminate program after insn |
||
774 | unsigned mask : 4; // for vector ops |
||
775 | |||
776 | int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor |
||
777 | |||
778 | int8_t predSrc; |
||
779 | int8_t flagsDef; |
||
780 | int8_t flagsSrc; |
||
781 | |||
782 | uint8_t sched; // scheduling data (NOTE: maybe move to separate storage) |
||
783 | |||
784 | BasicBlock *bb; |
||
785 | |||
786 | protected: |
||
787 | std::deque |
||
788 | std::deque |
||
789 | |||
790 | // instruction specific methods: |
||
791 | // (don't want to subclass, would need more constructors and memory pools) |
||
792 | public: |
||
793 | inline void setInterpolate(unsigned int mode) { ipa = mode; } |
||
794 | |||
795 | unsigned int getInterpMode() const { return ipa & 0x3; } |
||
796 | unsigned int getSampleMode() const { return ipa & 0xc; } |
||
797 | |||
798 | private: |
||
799 | void init(); |
||
800 | }; |
||
801 | |||
802 | enum TexQuery |
||
803 | { |
||
804 | TXQ_DIMS, |
||
805 | TXQ_TYPE, |
||
806 | TXQ_SAMPLE_POSITION, |
||
807 | TXQ_FILTER, |
||
808 | TXQ_LOD, |
||
809 | TXQ_WRAP, |
||
810 | TXQ_BORDER_COLOUR |
||
811 | }; |
||
812 | |||
813 | class TexInstruction : public Instruction |
||
814 | { |
||
815 | public: |
||
816 | class Target |
||
817 | { |
||
818 | public: |
||
819 | Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { } |
||
820 | |||
821 | const char *getName() const { return descTable[target].name; } |
||
822 | unsigned int getArgCount() const { return descTable[target].argc; } |
||
823 | unsigned int getDim() const { return descTable[target].dim; } |
||
824 | int isArray() const { return descTable[target].array ? 1 : 0; } |
||
825 | int isCube() const { return descTable[target].cube ? 1 : 0; } |
||
826 | int isShadow() const { return descTable[target].shadow ? 1 : 0; } |
||
827 | int isMS() const { |
||
828 | return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; } |
||
829 | |||
830 | Target& operator=(TexTarget targ) |
||
831 | { |
||
832 | assert(targ < TEX_TARGET_COUNT); |
||
833 | target = targ; |
||
834 | return *this; |
||
835 | } |
||
836 | |||
837 | inline bool operator==(TexTarget targ) const { return target == targ; } |
||
838 | inline bool operator!=(TexTarget targ) const { return target != targ; } |
||
839 | |||
840 | enum TexTarget getEnum() const { return target; } |
||
841 | |||
842 | private: |
||
843 | struct Desc |
||
844 | { |
||
845 | char name[19]; |
||
846 | uint8_t dim; |
||
847 | uint8_t argc; |
||
848 | bool array; |
||
849 | bool cube; |
||
850 | bool shadow; |
||
851 | }; |
||
852 | |||
853 | static const struct Desc descTable[TEX_TARGET_COUNT]; |
||
854 | |||
855 | private: |
||
856 | enum TexTarget target; |
||
857 | }; |
||
858 | |||
859 | public: |
||
860 | TexInstruction(Function *, operation); |
||
861 | virtual ~TexInstruction(); |
||
862 | |||
863 | virtual TexInstruction *clone(ClonePolicy |
||
864 | Instruction * = NULL) const; |
||
865 | |||
866 | inline void setTexture(Target targ, uint8_t r, uint8_t s) |
||
867 | { |
||
868 | tex.r = r; |
||
869 | tex.s = s; |
||
870 | tex.target = targ; |
||
871 | } |
||
872 | |||
873 | void setIndirectR(Value *); |
||
874 | void setIndirectS(Value *); |
||
875 | inline Value *getIndirectR() const; |
||
876 | inline Value *getIndirectS() const; |
||
877 | |||
878 | public: |
||
879 | struct { |
||
880 | Target target; |
||
881 | |||
882 | uint16_t r; |
||
883 | uint16_t s; |
||
884 | int8_t rIndirectSrc; |
||
885 | int8_t sIndirectSrc; |
||
886 | |||
887 | uint8_t mask; |
||
888 | uint8_t gatherComp; |
||
889 | |||
890 | bool liveOnly; // only execute on live pixels of a quad (optimization) |
||
891 | bool levelZero; |
||
892 | bool derivAll; |
||
893 | |||
894 | int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets |
||
895 | int8_t offset[4][3]; |
||
896 | |||
897 | enum TexQuery query; |
||
898 | } tex; |
||
899 | |||
900 | ValueRef dPdx[3]; |
||
901 | ValueRef dPdy[3]; |
||
902 | }; |
||
903 | |||
904 | class CmpInstruction : public Instruction |
||
905 | { |
||
906 | public: |
||
907 | CmpInstruction(Function *, operation); |
||
908 | |||
909 | virtual CmpInstruction *clone(ClonePolicy |
||
910 | Instruction * = NULL) const; |
||
911 | |||
912 | void setCondition(CondCode cond) { setCond = cond; } |
||
913 | CondCode getCondition() const { return setCond; } |
||
914 | |||
915 | public: |
||
916 | CondCode setCond; |
||
917 | }; |
||
918 | |||
919 | class FlowInstruction : public Instruction |
||
920 | { |
||
921 | public: |
||
922 | FlowInstruction(Function *, operation, void *target); |
||
923 | |||
924 | virtual FlowInstruction *clone(ClonePolicy |
||
925 | Instruction * = NULL) const; |
||
926 | |||
927 | public: |
||
928 | unsigned allWarp : 1; |
||
929 | unsigned absolute : 1; |
||
930 | unsigned limit : 1; |
||
931 | unsigned builtin : 1; // true for calls to emulation code |
||
932 | unsigned indirect : 1; // target in src(0) |
||
933 | |||
934 | union { |
||
935 | BasicBlock *bb; |
||
936 | int builtin; |
||
937 | Function *fn; |
||
938 | } target; |
||
939 | }; |
||
940 | |||
941 | class BasicBlock |
||
942 | { |
||
943 | public: |
||
944 | BasicBlock(Function *); |
||
945 | ~BasicBlock(); |
||
946 | |||
947 | BasicBlock *clone(ClonePolicy |
||
948 | |||
949 | inline int getId() const { return id; } |
||
950 | inline unsigned int getInsnCount() const { return numInsns; } |
||
951 | inline bool isTerminated() const { return exit && exit->terminator; } |
||
952 | |||
953 | bool dominatedBy(BasicBlock *bb); |
||
954 | inline bool reachableBy(const BasicBlock *by, const BasicBlock *term); |
||
955 | |||
956 | // returns mask of conditional out blocks |
||
957 | // e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF |
||
958 | unsigned int initiatesSimpleConditional() const; |
||
959 | |||
960 | public: |
||
961 | Function *getFunction() const { return func; } |
||
962 | Program *getProgram() const { return program; } |
||
963 | |||
964 | Instruction *getEntry() const { return entry; } // first non-phi instruction |
||
965 | Instruction *getPhi() const { return phi; } |
||
966 | Instruction *getFirst() const { return phi ? phi : entry; } |
||
967 | Instruction *getExit() const { return exit; } |
||
968 | |||
969 | void insertHead(Instruction *); |
||
970 | void insertTail(Instruction *); |
||
971 | void insertBefore(Instruction *, Instruction *); |
||
972 | void insertAfter(Instruction *, Instruction *); |
||
973 | void remove(Instruction *); |
||
974 | void permuteAdjacent(Instruction *, Instruction *); |
||
975 | |||
976 | BasicBlock *idom() const; |
||
977 | |||
978 | // NOTE: currently does not rebuild the dominator tree |
||
979 | BasicBlock *splitBefore(Instruction *, bool attach = true); |
||
980 | BasicBlock *splitAfter(Instruction *, bool attach = true); |
||
981 | |||
982 | DLList& getDF() { return df; } |
||
983 | DLList::Iterator iterDF() { return df.iterator(); } |
||
984 | |||
985 | static inline BasicBlock *get(Iterator&); |
||
986 | static inline BasicBlock *get(Graph::Node *); |
||
987 | |||
988 | public: |
||
989 | Graph::Node cfg; // first edge is branch *taken* (the ELSE branch) |
||
990 | Graph::Node dom; |
||
991 | |||
992 | BitSet liveSet; |
||
993 | BitSet defSet; |
||
994 | |||
995 | uint32_t binPos; |
||
996 | uint32_t binSize; |
||
997 | |||
998 | Instruction *joinAt; // for quick reference |
||
999 | |||
1000 | bool explicitCont; // loop headers: true if loop contains continue stmts |
||
1001 | |||
1002 | private: |
||
1003 | int id; |
||
1004 | DLList df; |
||
1005 | |||
1006 | Instruction *phi; |
||
1007 | Instruction *entry; |
||
1008 | Instruction *exit; |
||
1009 | |||
1010 | unsigned int numInsns; |
||
1011 | |||
1012 | private: |
||
1013 | Function *func; |
||
1014 | Program *program; |
||
1015 | |||
1016 | void splitCommon(Instruction *, BasicBlock *, bool attach); |
||
1017 | }; |
||
1018 | |||
1019 | class Function |
||
1020 | { |
||
1021 | public: |
||
1022 | Function(Program *, const char *name, uint32_t label); |
||
1023 | ~Function(); |
||
1024 | |||
1025 | static inline Function *get(Graph::Node *node); |
||
1026 | |||
1027 | inline Program *getProgram() const { return prog; } |
||
1028 | inline const char *getName() const { return name; } |
||
1029 | inline int getId() const { return id; } |
||
1030 | inline uint32_t getLabel() const { return label; } |
||
1031 | |||
1032 | void print(); |
||
1033 | void printLiveIntervals() const; |
||
1034 | void printCFGraph(const char *filePath); |
||
1035 | |||
1036 | bool setEntry(BasicBlock *); |
||
1037 | bool setExit(BasicBlock *); |
||
1038 | |||
1039 | unsigned int orderInstructions(ArrayList&); |
||
1040 | |||
1041 | inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); } |
||
1042 | inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); } |
||
1043 | inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); } |
||
1044 | |||
1045 | inline LValue *getLValue(int id); |
||
1046 | |||
1047 | void buildLiveSets(); |
||
1048 | void buildDefSets(); |
||
1049 | bool convertToSSA(); |
||
1050 | |||
1051 | public: |
||
1052 | std::deque |
||
1053 | std::deque |
||
1054 | std::deque |
||
1055 | |||
1056 | Graph cfg; |
||
1057 | Graph::Node *cfgExit; |
||
1058 | Graph *domTree; |
||
1059 | Graph::Node call; // node in the call graph |
||
1060 | |||
1061 | BasicBlock **bbArray; // BBs in emission order |
||
1062 | int bbCount; |
||
1063 | |||
1064 | unsigned int loopNestingBound; |
||
1065 | int regClobberMax; |
||
1066 | |||
1067 | uint32_t binPos; |
||
1068 | uint32_t binSize; |
||
1069 | |||
1070 | Value *stackPtr; |
||
1071 | |||
1072 | uint32_t tlsBase; // base address for l[] space (if no stack pointer is used) |
||
1073 | uint32_t tlsSize; |
||
1074 | |||
1075 | ArrayList allBBlocks; |
||
1076 | ArrayList allInsns; |
||
1077 | ArrayList allLValues; |
||
1078 | |||
1079 | private: |
||
1080 | void buildLiveSetsPreSSA(BasicBlock *, const int sequence); |
||
1081 | void buildDefSetsPreSSA(BasicBlock *bb, const int seq); |
||
1082 | |||
1083 | private: |
||
1084 | uint32_t label; |
||
1085 | int id; |
||
1086 | const char *const name; |
||
1087 | Program *prog; |
||
1088 | }; |
||
1089 | |||
1090 | enum CGStage |
||
1091 | { |
||
1092 | CG_STAGE_PRE_SSA, |
||
1093 | CG_STAGE_SSA, // expected directly before register allocation |
||
1094 | CG_STAGE_POST_RA |
||
1095 | }; |
||
1096 | |||
1097 | class Program |
||
1098 | { |
||
1099 | public: |
||
1100 | enum Type |
||
1101 | { |
||
1102 | TYPE_VERTEX, |
||
1103 | TYPE_TESSELLATION_CONTROL, |
||
1104 | TYPE_TESSELLATION_EVAL, |
||
1105 | TYPE_GEOMETRY, |
||
1106 | TYPE_FRAGMENT, |
||
1107 | TYPE_COMPUTE |
||
1108 | }; |
||
1109 | |||
1110 | Program(Type type, Target *targ); |
||
1111 | ~Program(); |
||
1112 | |||
1113 | void print(); |
||
1114 | |||
1115 | Type getType() const { return progType; } |
||
1116 | |||
1117 | inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); } |
||
1118 | inline void del(Function *fn, int& id) { allFuncs.remove(id); } |
||
1119 | inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } |
||
1120 | |||
1121 | bool makeFromTGSI(struct nv50_ir_prog_info *); |
||
1122 | bool makeFromSM4(struct nv50_ir_prog_info *); |
||
1123 | bool convertToSSA(); |
||
1124 | bool optimizeSSA(int level); |
||
1125 | bool optimizePostRA(int level); |
||
1126 | bool registerAllocation(); |
||
1127 | bool emitBinary(struct nv50_ir_prog_info *); |
||
1128 | |||
1129 | const Target *getTarget() const { return target; } |
||
1130 | |||
1131 | private: |
||
1132 | void emitSymbolTable(struct nv50_ir_prog_info *); |
||
1133 | |||
1134 | Type progType; |
||
1135 | Target *target; |
||
1136 | |||
1137 | public: |
||
1138 | Function *main; |
||
1139 | Graph calls; |
||
1140 | |||
1141 | ArrayList allFuncs; |
||
1142 | ArrayList allRValues; |
||
1143 | |||
1144 | uint32_t *code; |
||
1145 | uint32_t binSize; |
||
1146 | uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL |
||
1147 | |||
1148 | int maxGPR; |
||
1149 | |||
1150 | MemoryPool mem_Instruction; |
||
1151 | MemoryPool mem_CmpInstruction; |
||
1152 | MemoryPool mem_TexInstruction; |
||
1153 | MemoryPool mem_FlowInstruction; |
||
1154 | MemoryPool mem_LValue; |
||
1155 | MemoryPool mem_Symbol; |
||
1156 | MemoryPool mem_ImmediateValue; |
||
1157 | |||
1158 | uint32_t dbgFlags; |
||
1159 | uint8_t optLevel; |
||
1160 | |||
1161 | void *targetPriv; // e.g. to carry information between passes |
||
1162 | |||
1163 | const struct nv50_ir_prog_info *driver; // for driver configuration |
||
1164 | |||
1165 | void releaseInstruction(Instruction *); |
||
1166 | void releaseValue(Value *); |
||
1167 | }; |
||
1168 | |||
1169 | // TODO: add const version |
||
1170 | class Pass |
||
1171 | { |
||
1172 | public: |
||
1173 | bool run(Program *, bool ordered = false, bool skipPhi = false); |
||
1174 | bool run(Function *, bool ordered = false, bool skipPhi = false); |
||
1175 | |||
1176 | private: |
||
1177 | // return false to continue with next entity on next higher level |
||
1178 | virtual bool visit(Function *) { return true; } |
||
1179 | virtual bool visit(BasicBlock *) { return true; } |
||
1180 | virtual bool visit(Instruction *) { return false; } |
||
1181 | |||
1182 | bool doRun(Program *, bool ordered, bool skipPhi); |
||
1183 | bool doRun(Function *, bool ordered, bool skipPhi); |
||
1184 | |||
1185 | protected: |
||
1186 | bool err; |
||
1187 | Function *func; |
||
1188 | Program *prog; |
||
1189 | }; |
||
1190 | |||
1191 | // ============================================================================= |
||
1192 | |||
1193 | #include "nv50_ir_inlines.h" |
||
1194 | |||
1195 | } // namespace nv50_ir |
||
1196 | |||
1197 | #endif // __NV50_IR_H__>><>>>>>>><>><>><>><>><>><>><>><>><>><>><>><>=>>><>><>><>><>><>><>><>><>><>><>><> |