Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2011 Christoph Bumiller |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | #include "tgsi/tgsi_dump.h" |
||
24 | #include "tgsi/tgsi_scan.h" |
||
25 | #include "tgsi/tgsi_util.h" |
||
26 | |||
27 | #include |
||
28 | |||
29 | #include "codegen/nv50_ir.h" |
||
30 | #include "codegen/nv50_ir_util.h" |
||
31 | #include "codegen/nv50_ir_build_util.h" |
||
32 | |||
33 | namespace tgsi { |
||
34 | |||
35 | class Source; |
||
36 | |||
37 | static nv50_ir::operation translateOpcode(uint opcode); |
||
38 | static nv50_ir::DataFile translateFile(uint file); |
||
39 | static nv50_ir::TexTarget translateTexture(uint texTarg); |
||
40 | static nv50_ir::SVSemantic translateSysVal(uint sysval); |
||
41 | |||
42 | class Instruction |
||
43 | { |
||
44 | public: |
||
45 | Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { } |
||
46 | |||
47 | class SrcRegister |
||
48 | { |
||
49 | public: |
||
50 | SrcRegister(const struct tgsi_full_src_register *src) |
||
51 | : reg(src->Register), |
||
52 | fsr(src) |
||
53 | { } |
||
54 | |||
55 | SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { } |
||
56 | |||
57 | SrcRegister(const struct tgsi_ind_register& ind) |
||
58 | : reg(tgsi_util_get_src_from_ind(&ind)), |
||
59 | fsr(NULL) |
||
60 | { } |
||
61 | |||
62 | struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off) |
||
63 | { |
||
64 | struct tgsi_src_register reg; |
||
65 | memset(®, 0, sizeof(reg)); |
||
66 | reg.Index = off.Index; |
||
67 | reg.File = off.File; |
||
68 | reg.SwizzleX = off.SwizzleX; |
||
69 | reg.SwizzleY = off.SwizzleY; |
||
70 | reg.SwizzleZ = off.SwizzleZ; |
||
71 | return reg; |
||
72 | } |
||
73 | |||
74 | SrcRegister(const struct tgsi_texture_offset& off) : |
||
75 | reg(offsetToSrc(off)), |
||
76 | fsr(NULL) |
||
77 | { } |
||
78 | |||
79 | uint getFile() const { return reg.File; } |
||
80 | |||
81 | bool is2D() const { return reg.Dimension; } |
||
82 | |||
83 | bool isIndirect(int dim) const |
||
84 | { |
||
85 | return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect; |
||
86 | } |
||
87 | |||
88 | int getIndex(int dim) const |
||
89 | { |
||
90 | return (dim && fsr) ? fsr->Dimension.Index : reg.Index; |
||
91 | } |
||
92 | |||
93 | int getSwizzle(int chan) const |
||
94 | { |
||
95 | return tgsi_util_get_src_register_swizzle(®, chan); |
||
96 | } |
||
97 | |||
98 | nv50_ir::Modifier getMod(int chan) const; |
||
99 | |||
100 | SrcRegister getIndirect(int dim) const |
||
101 | { |
||
102 | assert(fsr && isIndirect(dim)); |
||
103 | if (dim) |
||
104 | return SrcRegister(fsr->DimIndirect); |
||
105 | return SrcRegister(fsr->Indirect); |
||
106 | } |
||
107 | |||
108 | uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const |
||
109 | { |
||
110 | assert(reg.File == TGSI_FILE_IMMEDIATE); |
||
111 | assert(!reg.Absolute); |
||
112 | assert(!reg.Negate); |
||
113 | return info->immd.data[reg.Index * 4 + getSwizzle(c)]; |
||
114 | } |
||
115 | |||
116 | private: |
||
117 | const struct tgsi_src_register reg; |
||
118 | const struct tgsi_full_src_register *fsr; |
||
119 | }; |
||
120 | |||
121 | class DstRegister |
||
122 | { |
||
123 | public: |
||
124 | DstRegister(const struct tgsi_full_dst_register *dst) |
||
125 | : reg(dst->Register), |
||
126 | fdr(dst) |
||
127 | { } |
||
128 | |||
129 | DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { } |
||
130 | |||
131 | uint getFile() const { return reg.File; } |
||
132 | |||
133 | bool is2D() const { return reg.Dimension; } |
||
134 | |||
135 | bool isIndirect(int dim) const |
||
136 | { |
||
137 | return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect; |
||
138 | } |
||
139 | |||
140 | int getIndex(int dim) const |
||
141 | { |
||
142 | return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index; |
||
143 | } |
||
144 | |||
145 | unsigned int getMask() const { return reg.WriteMask; } |
||
146 | |||
147 | bool isMasked(int chan) const { return !(getMask() & (1 << chan)); } |
||
148 | |||
149 | SrcRegister getIndirect(int dim) const |
||
150 | { |
||
151 | assert(fdr && isIndirect(dim)); |
||
152 | if (dim) |
||
153 | return SrcRegister(fdr->DimIndirect); |
||
154 | return SrcRegister(fdr->Indirect); |
||
155 | } |
||
156 | |||
157 | private: |
||
158 | const struct tgsi_dst_register reg; |
||
159 | const struct tgsi_full_dst_register *fdr; |
||
160 | }; |
||
161 | |||
162 | inline uint getOpcode() const { return insn->Instruction.Opcode; } |
||
163 | |||
164 | unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; } |
||
165 | unsigned int dstCount() const { return insn->Instruction.NumDstRegs; } |
||
166 | |||
167 | // mask of used components of source s |
||
168 | unsigned int srcMask(unsigned int s) const; |
||
169 | |||
170 | SrcRegister getSrc(unsigned int s) const |
||
171 | { |
||
172 | assert(s < srcCount()); |
||
173 | return SrcRegister(&insn->Src[s]); |
||
174 | } |
||
175 | |||
176 | DstRegister getDst(unsigned int d) const |
||
177 | { |
||
178 | assert(d < dstCount()); |
||
179 | return DstRegister(&insn->Dst[d]); |
||
180 | } |
||
181 | |||
182 | SrcRegister getTexOffset(unsigned int i) const |
||
183 | { |
||
184 | assert(i < TGSI_FULL_MAX_TEX_OFFSETS); |
||
185 | return SrcRegister(insn->TexOffsets[i]); |
||
186 | } |
||
187 | |||
188 | unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; } |
||
189 | |||
190 | bool checkDstSrcAliasing() const; |
||
191 | |||
192 | inline nv50_ir::operation getOP() const { |
||
193 | return translateOpcode(getOpcode()); } |
||
194 | |||
195 | nv50_ir::DataType inferSrcType() const; |
||
196 | nv50_ir::DataType inferDstType() const; |
||
197 | |||
198 | nv50_ir::CondCode getSetCond() const; |
||
199 | |||
200 | nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; |
||
201 | |||
202 | inline uint getLabel() { return insn->Label.Label; } |
||
203 | |||
204 | unsigned getSaturate() const { return insn->Instruction.Saturate; } |
||
205 | |||
206 | void print() const |
||
207 | { |
||
208 | tgsi_dump_instruction(insn, 1); |
||
209 | } |
||
210 | |||
211 | private: |
||
212 | const struct tgsi_full_instruction *insn; |
||
213 | }; |
||
214 | |||
215 | unsigned int Instruction::srcMask(unsigned int s) const |
||
216 | { |
||
217 | unsigned int mask = insn->Dst[0].Register.WriteMask; |
||
218 | |||
219 | switch (insn->Instruction.Opcode) { |
||
220 | case TGSI_OPCODE_COS: |
||
221 | case TGSI_OPCODE_SIN: |
||
222 | return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); |
||
223 | case TGSI_OPCODE_DP2: |
||
224 | return 0x3; |
||
225 | case TGSI_OPCODE_DP3: |
||
226 | return 0x7; |
||
227 | case TGSI_OPCODE_DP4: |
||
228 | case TGSI_OPCODE_DPH: |
||
229 | case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */ |
||
230 | return 0xf; |
||
231 | case TGSI_OPCODE_DST: |
||
232 | return mask & (s ? 0xa : 0x6); |
||
233 | case TGSI_OPCODE_EX2: |
||
234 | case TGSI_OPCODE_EXP: |
||
235 | case TGSI_OPCODE_LG2: |
||
236 | case TGSI_OPCODE_LOG: |
||
237 | case TGSI_OPCODE_POW: |
||
238 | case TGSI_OPCODE_RCP: |
||
239 | case TGSI_OPCODE_RSQ: |
||
240 | case TGSI_OPCODE_SCS: |
||
241 | return 0x1; |
||
242 | case TGSI_OPCODE_IF: |
||
243 | case TGSI_OPCODE_UIF: |
||
244 | return 0x1; |
||
245 | case TGSI_OPCODE_LIT: |
||
246 | return 0xb; |
||
247 | case TGSI_OPCODE_TEX2: |
||
248 | case TGSI_OPCODE_TXB2: |
||
249 | case TGSI_OPCODE_TXL2: |
||
250 | return (s == 0) ? 0xf : 0x3; |
||
251 | case TGSI_OPCODE_TEX: |
||
252 | case TGSI_OPCODE_TXB: |
||
253 | case TGSI_OPCODE_TXD: |
||
254 | case TGSI_OPCODE_TXL: |
||
255 | case TGSI_OPCODE_TXP: |
||
256 | case TGSI_OPCODE_LODQ: |
||
257 | { |
||
258 | const struct tgsi_instruction_texture *tex = &insn->Texture; |
||
259 | |||
260 | assert(insn->Instruction.Texture); |
||
261 | |||
262 | mask = 0x7; |
||
263 | if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && |
||
264 | insn->Instruction.Opcode != TGSI_OPCODE_TXD) |
||
265 | mask |= 0x8; /* bias, lod or proj */ |
||
266 | |||
267 | switch (tex->Texture) { |
||
268 | case TGSI_TEXTURE_1D: |
||
269 | mask &= 0x9; |
||
270 | break; |
||
271 | case TGSI_TEXTURE_SHADOW1D: |
||
272 | mask &= 0xd; |
||
273 | break; |
||
274 | case TGSI_TEXTURE_1D_ARRAY: |
||
275 | case TGSI_TEXTURE_2D: |
||
276 | case TGSI_TEXTURE_RECT: |
||
277 | mask &= 0xb; |
||
278 | break; |
||
279 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
280 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
281 | case TGSI_TEXTURE_SHADOWCUBE: |
||
282 | case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
||
283 | mask |= 0x8; |
||
284 | break; |
||
285 | default: |
||
286 | break; |
||
287 | } |
||
288 | } |
||
289 | return mask; |
||
290 | case TGSI_OPCODE_XPD: |
||
291 | { |
||
292 | unsigned int x = 0; |
||
293 | if (mask & 1) x |= 0x6; |
||
294 | if (mask & 2) x |= 0x5; |
||
295 | if (mask & 4) x |= 0x3; |
||
296 | return x; |
||
297 | } |
||
298 | case TGSI_OPCODE_D2I: |
||
299 | case TGSI_OPCODE_D2U: |
||
300 | case TGSI_OPCODE_D2F: |
||
301 | case TGSI_OPCODE_DSLT: |
||
302 | case TGSI_OPCODE_DSGE: |
||
303 | case TGSI_OPCODE_DSEQ: |
||
304 | case TGSI_OPCODE_DSNE: |
||
305 | switch (util_bitcount(mask)) { |
||
306 | case 1: return 0x3; |
||
307 | case 2: return 0xf; |
||
308 | default: |
||
309 | assert(!"unexpected mask"); |
||
310 | return 0xf; |
||
311 | } |
||
312 | case TGSI_OPCODE_I2D: |
||
313 | case TGSI_OPCODE_U2D: |
||
314 | case TGSI_OPCODE_F2D: { |
||
315 | unsigned int x = 0; |
||
316 | if ((mask & 0x3) == 0x3) |
||
317 | x |= 1; |
||
318 | if ((mask & 0xc) == 0xc) |
||
319 | x |= 2; |
||
320 | return x; |
||
321 | } |
||
322 | default: |
||
323 | break; |
||
324 | } |
||
325 | |||
326 | return mask; |
||
327 | } |
||
328 | |||
329 | nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const |
||
330 | { |
||
331 | nv50_ir::Modifier m(0); |
||
332 | |||
333 | if (reg.Absolute) |
||
334 | m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS); |
||
335 | if (reg.Negate) |
||
336 | m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG); |
||
337 | return m; |
||
338 | } |
||
339 | |||
340 | static nv50_ir::DataFile translateFile(uint file) |
||
341 | { |
||
342 | switch (file) { |
||
343 | case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST; |
||
344 | case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT; |
||
345 | case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; |
||
346 | case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; |
||
347 | case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; |
||
348 | case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; |
||
349 | case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; |
||
350 | case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; |
||
351 | case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL; |
||
352 | case TGSI_FILE_SAMPLER: |
||
353 | case TGSI_FILE_NULL: |
||
354 | default: |
||
355 | return nv50_ir::FILE_NULL; |
||
356 | } |
||
357 | } |
||
358 | |||
359 | static nv50_ir::SVSemantic translateSysVal(uint sysval) |
||
360 | { |
||
361 | switch (sysval) { |
||
362 | case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; |
||
363 | case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; |
||
364 | case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; |
||
365 | case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; |
||
366 | case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; |
||
367 | case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID; |
||
368 | case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID; |
||
369 | case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID; |
||
370 | case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID; |
||
371 | case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX; |
||
372 | case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS; |
||
373 | case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK; |
||
374 | case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID; |
||
375 | default: |
||
376 | assert(0); |
||
377 | return nv50_ir::SV_CLOCK; |
||
378 | } |
||
379 | } |
||
380 | |||
381 | #define NV50_IR_TEX_TARG_CASE(a, b) \ |
||
382 | case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b; |
||
383 | |||
384 | static nv50_ir::TexTarget translateTexture(uint tex) |
||
385 | { |
||
386 | switch (tex) { |
||
387 | NV50_IR_TEX_TARG_CASE(1D, 1D); |
||
388 | NV50_IR_TEX_TARG_CASE(2D, 2D); |
||
389 | NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS); |
||
390 | NV50_IR_TEX_TARG_CASE(3D, 3D); |
||
391 | NV50_IR_TEX_TARG_CASE(CUBE, CUBE); |
||
392 | NV50_IR_TEX_TARG_CASE(RECT, RECT); |
||
393 | NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY); |
||
394 | NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY); |
||
395 | NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY); |
||
396 | NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY); |
||
397 | NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW); |
||
398 | NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW); |
||
399 | NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); |
||
400 | NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); |
||
401 | NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW); |
||
402 | NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); |
||
403 | NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW); |
||
404 | NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); |
||
405 | |||
406 | case TGSI_TEXTURE_UNKNOWN: |
||
407 | default: |
||
408 | assert(!"invalid texture target"); |
||
409 | return nv50_ir::TEX_TARGET_2D; |
||
410 | } |
||
411 | } |
||
412 | |||
413 | nv50_ir::DataType Instruction::inferSrcType() const |
||
414 | { |
||
415 | switch (getOpcode()) { |
||
416 | case TGSI_OPCODE_UIF: |
||
417 | case TGSI_OPCODE_AND: |
||
418 | case TGSI_OPCODE_OR: |
||
419 | case TGSI_OPCODE_XOR: |
||
420 | case TGSI_OPCODE_NOT: |
||
421 | case TGSI_OPCODE_SHL: |
||
422 | case TGSI_OPCODE_U2F: |
||
423 | case TGSI_OPCODE_U2D: |
||
424 | case TGSI_OPCODE_UADD: |
||
425 | case TGSI_OPCODE_UDIV: |
||
426 | case TGSI_OPCODE_UMOD: |
||
427 | case TGSI_OPCODE_UMAD: |
||
428 | case TGSI_OPCODE_UMUL: |
||
429 | case TGSI_OPCODE_UMUL_HI: |
||
430 | case TGSI_OPCODE_UMAX: |
||
431 | case TGSI_OPCODE_UMIN: |
||
432 | case TGSI_OPCODE_USEQ: |
||
433 | case TGSI_OPCODE_USGE: |
||
434 | case TGSI_OPCODE_USLT: |
||
435 | case TGSI_OPCODE_USNE: |
||
436 | case TGSI_OPCODE_USHR: |
||
437 | case TGSI_OPCODE_UCMP: |
||
438 | case TGSI_OPCODE_ATOMUADD: |
||
439 | case TGSI_OPCODE_ATOMXCHG: |
||
440 | case TGSI_OPCODE_ATOMCAS: |
||
441 | case TGSI_OPCODE_ATOMAND: |
||
442 | case TGSI_OPCODE_ATOMOR: |
||
443 | case TGSI_OPCODE_ATOMXOR: |
||
444 | case TGSI_OPCODE_ATOMUMIN: |
||
445 | case TGSI_OPCODE_ATOMUMAX: |
||
446 | case TGSI_OPCODE_UBFE: |
||
447 | case TGSI_OPCODE_UMSB: |
||
448 | return nv50_ir::TYPE_U32; |
||
449 | case TGSI_OPCODE_I2F: |
||
450 | case TGSI_OPCODE_I2D: |
||
451 | case TGSI_OPCODE_IDIV: |
||
452 | case TGSI_OPCODE_IMUL_HI: |
||
453 | case TGSI_OPCODE_IMAX: |
||
454 | case TGSI_OPCODE_IMIN: |
||
455 | case TGSI_OPCODE_IABS: |
||
456 | case TGSI_OPCODE_INEG: |
||
457 | case TGSI_OPCODE_ISGE: |
||
458 | case TGSI_OPCODE_ISHR: |
||
459 | case TGSI_OPCODE_ISLT: |
||
460 | case TGSI_OPCODE_ISSG: |
||
461 | case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version |
||
462 | case TGSI_OPCODE_MOD: |
||
463 | case TGSI_OPCODE_UARL: |
||
464 | case TGSI_OPCODE_ATOMIMIN: |
||
465 | case TGSI_OPCODE_ATOMIMAX: |
||
466 | case TGSI_OPCODE_IBFE: |
||
467 | case TGSI_OPCODE_IMSB: |
||
468 | return nv50_ir::TYPE_S32; |
||
469 | case TGSI_OPCODE_D2F: |
||
470 | case TGSI_OPCODE_D2I: |
||
471 | case TGSI_OPCODE_D2U: |
||
472 | case TGSI_OPCODE_DABS: |
||
473 | case TGSI_OPCODE_DNEG: |
||
474 | case TGSI_OPCODE_DADD: |
||
475 | case TGSI_OPCODE_DMUL: |
||
476 | case TGSI_OPCODE_DMAX: |
||
477 | case TGSI_OPCODE_DMIN: |
||
478 | case TGSI_OPCODE_DSLT: |
||
479 | case TGSI_OPCODE_DSGE: |
||
480 | case TGSI_OPCODE_DSEQ: |
||
481 | case TGSI_OPCODE_DSNE: |
||
482 | case TGSI_OPCODE_DRCP: |
||
483 | case TGSI_OPCODE_DSQRT: |
||
484 | case TGSI_OPCODE_DMAD: |
||
485 | case TGSI_OPCODE_DFRAC: |
||
486 | case TGSI_OPCODE_DRSQ: |
||
487 | case TGSI_OPCODE_DTRUNC: |
||
488 | case TGSI_OPCODE_DCEIL: |
||
489 | case TGSI_OPCODE_DFLR: |
||
490 | case TGSI_OPCODE_DROUND: |
||
491 | return nv50_ir::TYPE_F64; |
||
492 | default: |
||
493 | return nv50_ir::TYPE_F32; |
||
494 | } |
||
495 | } |
||
496 | |||
497 | nv50_ir::DataType Instruction::inferDstType() const |
||
498 | { |
||
499 | switch (getOpcode()) { |
||
500 | case TGSI_OPCODE_D2U: |
||
501 | case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32; |
||
502 | case TGSI_OPCODE_D2I: |
||
503 | case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32; |
||
504 | case TGSI_OPCODE_FSEQ: |
||
505 | case TGSI_OPCODE_FSGE: |
||
506 | case TGSI_OPCODE_FSLT: |
||
507 | case TGSI_OPCODE_FSNE: |
||
508 | case TGSI_OPCODE_DSEQ: |
||
509 | case TGSI_OPCODE_DSGE: |
||
510 | case TGSI_OPCODE_DSLT: |
||
511 | case TGSI_OPCODE_DSNE: |
||
512 | return nv50_ir::TYPE_U32; |
||
513 | case TGSI_OPCODE_I2F: |
||
514 | case TGSI_OPCODE_U2F: |
||
515 | case TGSI_OPCODE_D2F: |
||
516 | return nv50_ir::TYPE_F32; |
||
517 | case TGSI_OPCODE_I2D: |
||
518 | case TGSI_OPCODE_U2D: |
||
519 | case TGSI_OPCODE_F2D: |
||
520 | return nv50_ir::TYPE_F64; |
||
521 | default: |
||
522 | return inferSrcType(); |
||
523 | } |
||
524 | } |
||
525 | |||
526 | nv50_ir::CondCode Instruction::getSetCond() const |
||
527 | { |
||
528 | using namespace nv50_ir; |
||
529 | |||
530 | switch (getOpcode()) { |
||
531 | case TGSI_OPCODE_SLT: |
||
532 | case TGSI_OPCODE_ISLT: |
||
533 | case TGSI_OPCODE_USLT: |
||
534 | case TGSI_OPCODE_FSLT: |
||
535 | case TGSI_OPCODE_DSLT: |
||
536 | return CC_LT; |
||
537 | case TGSI_OPCODE_SLE: |
||
538 | return CC_LE; |
||
539 | case TGSI_OPCODE_SGE: |
||
540 | case TGSI_OPCODE_ISGE: |
||
541 | case TGSI_OPCODE_USGE: |
||
542 | case TGSI_OPCODE_FSGE: |
||
543 | case TGSI_OPCODE_DSGE: |
||
544 | return CC_GE; |
||
545 | case TGSI_OPCODE_SGT: |
||
546 | return CC_GT; |
||
547 | case TGSI_OPCODE_SEQ: |
||
548 | case TGSI_OPCODE_USEQ: |
||
549 | case TGSI_OPCODE_FSEQ: |
||
550 | case TGSI_OPCODE_DSEQ: |
||
551 | return CC_EQ; |
||
552 | case TGSI_OPCODE_SNE: |
||
553 | case TGSI_OPCODE_FSNE: |
||
554 | case TGSI_OPCODE_DSNE: |
||
555 | return CC_NEU; |
||
556 | case TGSI_OPCODE_USNE: |
||
557 | return CC_NE; |
||
558 | default: |
||
559 | return CC_ALWAYS; |
||
560 | } |
||
561 | } |
||
562 | |||
563 | #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b |
||
564 | |||
565 | static nv50_ir::operation translateOpcode(uint opcode) |
||
566 | { |
||
567 | switch (opcode) { |
||
568 | NV50_IR_OPCODE_CASE(ARL, SHL); |
||
569 | NV50_IR_OPCODE_CASE(MOV, MOV); |
||
570 | |||
571 | NV50_IR_OPCODE_CASE(RCP, RCP); |
||
572 | NV50_IR_OPCODE_CASE(RSQ, RSQ); |
||
573 | |||
574 | NV50_IR_OPCODE_CASE(MUL, MUL); |
||
575 | NV50_IR_OPCODE_CASE(ADD, ADD); |
||
576 | |||
577 | NV50_IR_OPCODE_CASE(MIN, MIN); |
||
578 | NV50_IR_OPCODE_CASE(MAX, MAX); |
||
579 | NV50_IR_OPCODE_CASE(SLT, SET); |
||
580 | NV50_IR_OPCODE_CASE(SGE, SET); |
||
581 | NV50_IR_OPCODE_CASE(MAD, MAD); |
||
582 | NV50_IR_OPCODE_CASE(SUB, SUB); |
||
583 | |||
584 | NV50_IR_OPCODE_CASE(FLR, FLOOR); |
||
585 | NV50_IR_OPCODE_CASE(ROUND, CVT); |
||
586 | NV50_IR_OPCODE_CASE(EX2, EX2); |
||
587 | NV50_IR_OPCODE_CASE(LG2, LG2); |
||
588 | NV50_IR_OPCODE_CASE(POW, POW); |
||
589 | |||
590 | NV50_IR_OPCODE_CASE(ABS, ABS); |
||
591 | |||
592 | NV50_IR_OPCODE_CASE(COS, COS); |
||
593 | NV50_IR_OPCODE_CASE(DDX, DFDX); |
||
594 | NV50_IR_OPCODE_CASE(DDX_FINE, DFDX); |
||
595 | NV50_IR_OPCODE_CASE(DDY, DFDY); |
||
596 | NV50_IR_OPCODE_CASE(DDY_FINE, DFDY); |
||
597 | NV50_IR_OPCODE_CASE(KILL, DISCARD); |
||
598 | |||
599 | NV50_IR_OPCODE_CASE(SEQ, SET); |
||
600 | NV50_IR_OPCODE_CASE(SGT, SET); |
||
601 | NV50_IR_OPCODE_CASE(SIN, SIN); |
||
602 | NV50_IR_OPCODE_CASE(SLE, SET); |
||
603 | NV50_IR_OPCODE_CASE(SNE, SET); |
||
604 | NV50_IR_OPCODE_CASE(TEX, TEX); |
||
605 | NV50_IR_OPCODE_CASE(TXD, TXD); |
||
606 | NV50_IR_OPCODE_CASE(TXP, TEX); |
||
607 | |||
608 | NV50_IR_OPCODE_CASE(CAL, CALL); |
||
609 | NV50_IR_OPCODE_CASE(RET, RET); |
||
610 | NV50_IR_OPCODE_CASE(CMP, SLCT); |
||
611 | |||
612 | NV50_IR_OPCODE_CASE(TXB, TXB); |
||
613 | |||
614 | NV50_IR_OPCODE_CASE(DIV, DIV); |
||
615 | |||
616 | NV50_IR_OPCODE_CASE(TXL, TXL); |
||
617 | |||
618 | NV50_IR_OPCODE_CASE(CEIL, CEIL); |
||
619 | NV50_IR_OPCODE_CASE(I2F, CVT); |
||
620 | NV50_IR_OPCODE_CASE(NOT, NOT); |
||
621 | NV50_IR_OPCODE_CASE(TRUNC, TRUNC); |
||
622 | NV50_IR_OPCODE_CASE(SHL, SHL); |
||
623 | |||
624 | NV50_IR_OPCODE_CASE(AND, AND); |
||
625 | NV50_IR_OPCODE_CASE(OR, OR); |
||
626 | NV50_IR_OPCODE_CASE(MOD, MOD); |
||
627 | NV50_IR_OPCODE_CASE(XOR, XOR); |
||
628 | NV50_IR_OPCODE_CASE(SAD, SAD); |
||
629 | NV50_IR_OPCODE_CASE(TXF, TXF); |
||
630 | NV50_IR_OPCODE_CASE(TXQ, TXQ); |
||
631 | NV50_IR_OPCODE_CASE(TG4, TXG); |
||
632 | NV50_IR_OPCODE_CASE(LODQ, TXLQ); |
||
633 | |||
634 | NV50_IR_OPCODE_CASE(EMIT, EMIT); |
||
635 | NV50_IR_OPCODE_CASE(ENDPRIM, RESTART); |
||
636 | |||
637 | NV50_IR_OPCODE_CASE(KILL_IF, DISCARD); |
||
638 | |||
639 | NV50_IR_OPCODE_CASE(F2I, CVT); |
||
640 | NV50_IR_OPCODE_CASE(FSEQ, SET); |
||
641 | NV50_IR_OPCODE_CASE(FSGE, SET); |
||
642 | NV50_IR_OPCODE_CASE(FSLT, SET); |
||
643 | NV50_IR_OPCODE_CASE(FSNE, SET); |
||
644 | NV50_IR_OPCODE_CASE(IDIV, DIV); |
||
645 | NV50_IR_OPCODE_CASE(IMAX, MAX); |
||
646 | NV50_IR_OPCODE_CASE(IMIN, MIN); |
||
647 | NV50_IR_OPCODE_CASE(IABS, ABS); |
||
648 | NV50_IR_OPCODE_CASE(INEG, NEG); |
||
649 | NV50_IR_OPCODE_CASE(ISGE, SET); |
||
650 | NV50_IR_OPCODE_CASE(ISHR, SHR); |
||
651 | NV50_IR_OPCODE_CASE(ISLT, SET); |
||
652 | NV50_IR_OPCODE_CASE(F2U, CVT); |
||
653 | NV50_IR_OPCODE_CASE(U2F, CVT); |
||
654 | NV50_IR_OPCODE_CASE(UADD, ADD); |
||
655 | NV50_IR_OPCODE_CASE(UDIV, DIV); |
||
656 | NV50_IR_OPCODE_CASE(UMAD, MAD); |
||
657 | NV50_IR_OPCODE_CASE(UMAX, MAX); |
||
658 | NV50_IR_OPCODE_CASE(UMIN, MIN); |
||
659 | NV50_IR_OPCODE_CASE(UMOD, MOD); |
||
660 | NV50_IR_OPCODE_CASE(UMUL, MUL); |
||
661 | NV50_IR_OPCODE_CASE(USEQ, SET); |
||
662 | NV50_IR_OPCODE_CASE(USGE, SET); |
||
663 | NV50_IR_OPCODE_CASE(USHR, SHR); |
||
664 | NV50_IR_OPCODE_CASE(USLT, SET); |
||
665 | NV50_IR_OPCODE_CASE(USNE, SET); |
||
666 | |||
667 | NV50_IR_OPCODE_CASE(DABS, ABS); |
||
668 | NV50_IR_OPCODE_CASE(DNEG, NEG); |
||
669 | NV50_IR_OPCODE_CASE(DADD, ADD); |
||
670 | NV50_IR_OPCODE_CASE(DMUL, MUL); |
||
671 | NV50_IR_OPCODE_CASE(DMAX, MAX); |
||
672 | NV50_IR_OPCODE_CASE(DMIN, MIN); |
||
673 | NV50_IR_OPCODE_CASE(DSLT, SET); |
||
674 | NV50_IR_OPCODE_CASE(DSGE, SET); |
||
675 | NV50_IR_OPCODE_CASE(DSEQ, SET); |
||
676 | NV50_IR_OPCODE_CASE(DSNE, SET); |
||
677 | NV50_IR_OPCODE_CASE(DRCP, RCP); |
||
678 | NV50_IR_OPCODE_CASE(DSQRT, SQRT); |
||
679 | NV50_IR_OPCODE_CASE(DMAD, MAD); |
||
680 | NV50_IR_OPCODE_CASE(D2I, CVT); |
||
681 | NV50_IR_OPCODE_CASE(D2U, CVT); |
||
682 | NV50_IR_OPCODE_CASE(I2D, CVT); |
||
683 | NV50_IR_OPCODE_CASE(U2D, CVT); |
||
684 | NV50_IR_OPCODE_CASE(DRSQ, RSQ); |
||
685 | NV50_IR_OPCODE_CASE(DTRUNC, TRUNC); |
||
686 | NV50_IR_OPCODE_CASE(DCEIL, CEIL); |
||
687 | NV50_IR_OPCODE_CASE(DFLR, FLOOR); |
||
688 | NV50_IR_OPCODE_CASE(DROUND, CVT); |
||
689 | |||
690 | NV50_IR_OPCODE_CASE(IMUL_HI, MUL); |
||
691 | NV50_IR_OPCODE_CASE(UMUL_HI, MUL); |
||
692 | |||
693 | NV50_IR_OPCODE_CASE(SAMPLE, TEX); |
||
694 | NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); |
||
695 | NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); |
||
696 | NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX); |
||
697 | NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); |
||
698 | NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); |
||
699 | NV50_IR_OPCODE_CASE(SAMPLE_I, TXF); |
||
700 | NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF); |
||
701 | NV50_IR_OPCODE_CASE(GATHER4, TXG); |
||
702 | NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); |
||
703 | |||
704 | NV50_IR_OPCODE_CASE(ATOMUADD, ATOM); |
||
705 | NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM); |
||
706 | NV50_IR_OPCODE_CASE(ATOMCAS, ATOM); |
||
707 | NV50_IR_OPCODE_CASE(ATOMAND, ATOM); |
||
708 | NV50_IR_OPCODE_CASE(ATOMOR, ATOM); |
||
709 | NV50_IR_OPCODE_CASE(ATOMXOR, ATOM); |
||
710 | NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM); |
||
711 | NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); |
||
712 | NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); |
||
713 | NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); |
||
714 | |||
715 | NV50_IR_OPCODE_CASE(TEX2, TEX); |
||
716 | NV50_IR_OPCODE_CASE(TXB2, TXB); |
||
717 | NV50_IR_OPCODE_CASE(TXL2, TXL); |
||
718 | |||
719 | NV50_IR_OPCODE_CASE(IBFE, EXTBF); |
||
720 | NV50_IR_OPCODE_CASE(UBFE, EXTBF); |
||
721 | NV50_IR_OPCODE_CASE(BFI, INSBF); |
||
722 | NV50_IR_OPCODE_CASE(BREV, EXTBF); |
||
723 | NV50_IR_OPCODE_CASE(POPC, POPCNT); |
||
724 | NV50_IR_OPCODE_CASE(LSB, BFIND); |
||
725 | NV50_IR_OPCODE_CASE(IMSB, BFIND); |
||
726 | NV50_IR_OPCODE_CASE(UMSB, BFIND); |
||
727 | |||
728 | NV50_IR_OPCODE_CASE(END, EXIT); |
||
729 | |||
730 | default: |
||
731 | return nv50_ir::OP_NOP; |
||
732 | } |
||
733 | } |
||
734 | |||
735 | static uint16_t opcodeToSubOp(uint opcode) |
||
736 | { |
||
737 | switch (opcode) { |
||
738 | case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); |
||
739 | case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); |
||
740 | case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); |
||
741 | case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; |
||
742 | case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; |
||
743 | case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; |
||
744 | case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND; |
||
745 | case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR; |
||
746 | case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR; |
||
747 | case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN; |
||
748 | case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; |
||
749 | case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; |
||
750 | case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; |
||
751 | case TGSI_OPCODE_IMUL_HI: |
||
752 | case TGSI_OPCODE_UMUL_HI: |
||
753 | return NV50_IR_SUBOP_MUL_HIGH; |
||
754 | default: |
||
755 | return 0; |
||
756 | } |
||
757 | } |
||
758 | |||
759 | bool Instruction::checkDstSrcAliasing() const |
||
760 | { |
||
761 | if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory |
||
762 | return false; |
||
763 | |||
764 | for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) { |
||
765 | if (insn->Src[s].Register.File == TGSI_FILE_NULL) |
||
766 | break; |
||
767 | if (insn->Src[s].Register.File == insn->Dst[0].Register.File && |
||
768 | insn->Src[s].Register.Index == insn->Dst[0].Register.Index) |
||
769 | return true; |
||
770 | } |
||
771 | return false; |
||
772 | } |
||
773 | |||
774 | class Source |
||
775 | { |
||
776 | public: |
||
777 | Source(struct nv50_ir_prog_info *); |
||
778 | ~Source(); |
||
779 | |||
780 | public: |
||
781 | bool scanSource(); |
||
782 | unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; } |
||
783 | |||
784 | public: |
||
785 | struct tgsi_shader_info scan; |
||
786 | struct tgsi_full_instruction *insns; |
||
787 | const struct tgsi_token *tokens; |
||
788 | struct nv50_ir_prog_info *info; |
||
789 | |||
790 | nv50_ir::DynArray tempArrays; |
||
791 | nv50_ir::DynArray immdArrays; |
||
792 | |||
793 | typedef nv50_ir::BuildUtil::Location Location; |
||
794 | // these registers are per-subroutine, cannot be used for parameter passing |
||
795 | std::set |
||
796 | |||
797 | bool mainTempsInLMem; |
||
798 | |||
799 | int clipVertexOutput; |
||
800 | |||
801 | struct TextureView { |
||
802 | uint8_t target; // TGSI_TEXTURE_* |
||
803 | }; |
||
804 | std::vector |
||
805 | |||
806 | struct Resource { |
||
807 | uint8_t target; // TGSI_TEXTURE_* |
||
808 | bool raw; |
||
809 | uint8_t slot; // $surface index |
||
810 | }; |
||
811 | std::vector |
||
812 | |||
813 | private: |
||
814 | int inferSysValDirection(unsigned sn) const; |
||
815 | bool scanDeclaration(const struct tgsi_full_declaration *); |
||
816 | bool scanInstruction(const struct tgsi_full_instruction *); |
||
817 | void scanProperty(const struct tgsi_full_property *); |
||
818 | void scanImmediate(const struct tgsi_full_immediate *); |
||
819 | |||
820 | inline bool isEdgeFlagPassthrough(const Instruction&) const; |
||
821 | }; |
||
822 | |||
823 | Source::Source(struct nv50_ir_prog_info *prog) : info(prog) |
||
824 | { |
||
825 | tokens = (const struct tgsi_token *)info->bin.source; |
||
826 | |||
827 | if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) |
||
828 | tgsi_dump(tokens, 0); |
||
829 | |||
830 | mainTempsInLMem = FALSE; |
||
831 | } |
||
832 | |||
833 | Source::~Source() |
||
834 | { |
||
835 | if (insns) |
||
836 | FREE(insns); |
||
837 | |||
838 | if (info->immd.data) |
||
839 | FREE(info->immd.data); |
||
840 | if (info->immd.type) |
||
841 | FREE(info->immd.type); |
||
842 | } |
||
843 | |||
844 | bool Source::scanSource() |
||
845 | { |
||
846 | unsigned insnCount = 0; |
||
847 | struct tgsi_parse_context parse; |
||
848 | |||
849 | tgsi_scan_shader(tokens, &scan); |
||
850 | |||
851 | insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions * |
||
852 | sizeof(insns[0])); |
||
853 | if (!insns) |
||
854 | return false; |
||
855 | |||
856 | clipVertexOutput = -1; |
||
857 | |||
858 | textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); |
||
859 | resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); |
||
860 | |||
861 | info->immd.bufSize = 0; |
||
862 | |||
863 | info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; |
||
864 | info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; |
||
865 | info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; |
||
866 | |||
867 | if (info->type == PIPE_SHADER_FRAGMENT) { |
||
868 | info->prop.fp.writesDepth = scan.writes_z; |
||
869 | info->prop.fp.usesDiscard = scan.uses_kill; |
||
870 | } else |
||
871 | if (info->type == PIPE_SHADER_GEOMETRY) { |
||
872 | info->prop.gp.instanceCount = 1; // default value |
||
873 | } |
||
874 | |||
875 | info->io.viewportId = -1; |
||
876 | |||
877 | info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); |
||
878 | info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); |
||
879 | |||
880 | tgsi_parse_init(&parse, tokens); |
||
881 | while (!tgsi_parse_end_of_tokens(&parse)) { |
||
882 | tgsi_parse_token(&parse); |
||
883 | |||
884 | switch (parse.FullToken.Token.Type) { |
||
885 | case TGSI_TOKEN_TYPE_IMMEDIATE: |
||
886 | scanImmediate(&parse.FullToken.FullImmediate); |
||
887 | break; |
||
888 | case TGSI_TOKEN_TYPE_DECLARATION: |
||
889 | scanDeclaration(&parse.FullToken.FullDeclaration); |
||
890 | break; |
||
891 | case TGSI_TOKEN_TYPE_INSTRUCTION: |
||
892 | insns[insnCount++] = parse.FullToken.FullInstruction; |
||
893 | scanInstruction(&parse.FullToken.FullInstruction); |
||
894 | break; |
||
895 | case TGSI_TOKEN_TYPE_PROPERTY: |
||
896 | scanProperty(&parse.FullToken.FullProperty); |
||
897 | break; |
||
898 | default: |
||
899 | INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type); |
||
900 | break; |
||
901 | } |
||
902 | } |
||
903 | tgsi_parse_free(&parse); |
||
904 | |||
905 | if (mainTempsInLMem) |
||
906 | info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; |
||
907 | |||
908 | if (info->io.genUserClip > 0) { |
||
909 | info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; |
||
910 | |||
911 | const unsigned int nOut = (info->io.genUserClip + 3) / 4; |
||
912 | |||
913 | for (unsigned int n = 0; n < nOut; ++n) { |
||
914 | unsigned int i = info->numOutputs++; |
||
915 | info->out[i].id = i; |
||
916 | info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; |
||
917 | info->out[i].si = n; |
||
918 | info->out[i].mask = info->io.clipDistanceMask >> (n * 4); |
||
919 | } |
||
920 | } |
||
921 | |||
922 | return info->assignSlots(info) == 0; |
||
923 | } |
||
924 | |||
925 | void Source::scanProperty(const struct tgsi_full_property *prop) |
||
926 | { |
||
927 | switch (prop->Property.PropertyName) { |
||
928 | case TGSI_PROPERTY_GS_OUTPUT_PRIM: |
||
929 | info->prop.gp.outputPrim = prop->u[0].Data; |
||
930 | break; |
||
931 | case TGSI_PROPERTY_GS_INPUT_PRIM: |
||
932 | info->prop.gp.inputPrim = prop->u[0].Data; |
||
933 | break; |
||
934 | case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: |
||
935 | info->prop.gp.maxVertices = prop->u[0].Data; |
||
936 | break; |
||
937 | case TGSI_PROPERTY_GS_INVOCATIONS: |
||
938 | info->prop.gp.instanceCount = prop->u[0].Data; |
||
939 | break; |
||
940 | case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: |
||
941 | info->prop.fp.separateFragData = TRUE; |
||
942 | break; |
||
943 | case TGSI_PROPERTY_FS_COORD_ORIGIN: |
||
944 | case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: |
||
945 | // we don't care |
||
946 | break; |
||
947 | case TGSI_PROPERTY_VS_PROHIBIT_UCPS: |
||
948 | info->io.genUserClip = -1; |
||
949 | break; |
||
950 | default: |
||
951 | INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); |
||
952 | break; |
||
953 | } |
||
954 | } |
||
955 | |||
956 | void Source::scanImmediate(const struct tgsi_full_immediate *imm) |
||
957 | { |
||
958 | const unsigned n = info->immd.count++; |
||
959 | |||
960 | assert(n < scan.immediate_count); |
||
961 | |||
962 | for (int c = 0; c < 4; ++c) |
||
963 | info->immd.data[n * 4 + c] = imm->u[c].Uint; |
||
964 | |||
965 | info->immd.type[n] = imm->Immediate.DataType; |
||
966 | } |
||
967 | |||
968 | int Source::inferSysValDirection(unsigned sn) const |
||
969 | { |
||
970 | switch (sn) { |
||
971 | case TGSI_SEMANTIC_INSTANCEID: |
||
972 | case TGSI_SEMANTIC_VERTEXID: |
||
973 | return 1; |
||
974 | case TGSI_SEMANTIC_LAYER: |
||
975 | #if 0 |
||
976 | case TGSI_SEMANTIC_VIEWPORTINDEX: |
||
977 | return 0; |
||
978 | #endif |
||
979 | case TGSI_SEMANTIC_PRIMID: |
||
980 | return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0; |
||
981 | default: |
||
982 | return 0; |
||
983 | } |
||
984 | } |
||
985 | |||
986 | bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) |
||
987 | { |
||
988 | unsigned i, c; |
||
989 | unsigned sn = TGSI_SEMANTIC_GENERIC; |
||
990 | unsigned si = 0; |
||
991 | const unsigned first = decl->Range.First, last = decl->Range.Last; |
||
992 | |||
993 | if (decl->Declaration.Semantic) { |
||
994 | sn = decl->Semantic.Name; |
||
995 | si = decl->Semantic.Index; |
||
996 | } |
||
997 | |||
998 | if (decl->Declaration.Local) { |
||
999 | for (i = first; i <= last; ++i) { |
||
1000 | for (c = 0; c < 4; ++c) { |
||
1001 | locals.insert( |
||
1002 | Location(decl->Declaration.File, decl->Dim.Index2D, i, c)); |
||
1003 | } |
||
1004 | } |
||
1005 | } |
||
1006 | |||
1007 | switch (decl->Declaration.File) { |
||
1008 | case TGSI_FILE_INPUT: |
||
1009 | if (info->type == PIPE_SHADER_VERTEX) { |
||
1010 | // all vertex attributes are equal |
||
1011 | for (i = first; i <= last; ++i) { |
||
1012 | info->in[i].sn = TGSI_SEMANTIC_GENERIC; |
||
1013 | info->in[i].si = i; |
||
1014 | } |
||
1015 | } else { |
||
1016 | for (i = first; i <= last; ++i, ++si) { |
||
1017 | info->in[i].id = i; |
||
1018 | info->in[i].sn = sn; |
||
1019 | info->in[i].si = si; |
||
1020 | if (info->type == PIPE_SHADER_FRAGMENT) { |
||
1021 | // translate interpolation mode |
||
1022 | switch (decl->Interp.Interpolate) { |
||
1023 | case TGSI_INTERPOLATE_CONSTANT: |
||
1024 | info->in[i].flat = 1; |
||
1025 | break; |
||
1026 | case TGSI_INTERPOLATE_COLOR: |
||
1027 | info->in[i].sc = 1; |
||
1028 | break; |
||
1029 | case TGSI_INTERPOLATE_LINEAR: |
||
1030 | info->in[i].linear = 1; |
||
1031 | break; |
||
1032 | default: |
||
1033 | break; |
||
1034 | } |
||
1035 | if (decl->Interp.Location || info->io.sampleInterp) |
||
1036 | info->in[i].centroid = 1; |
||
1037 | } |
||
1038 | } |
||
1039 | } |
||
1040 | break; |
||
1041 | case TGSI_FILE_OUTPUT: |
||
1042 | for (i = first; i <= last; ++i, ++si) { |
||
1043 | switch (sn) { |
||
1044 | case TGSI_SEMANTIC_POSITION: |
||
1045 | if (info->type == PIPE_SHADER_FRAGMENT) |
||
1046 | info->io.fragDepth = i; |
||
1047 | else |
||
1048 | if (clipVertexOutput < 0) |
||
1049 | clipVertexOutput = i; |
||
1050 | break; |
||
1051 | case TGSI_SEMANTIC_COLOR: |
||
1052 | if (info->type == PIPE_SHADER_FRAGMENT) |
||
1053 | info->prop.fp.numColourResults++; |
||
1054 | break; |
||
1055 | case TGSI_SEMANTIC_EDGEFLAG: |
||
1056 | info->io.edgeFlagOut = i; |
||
1057 | break; |
||
1058 | case TGSI_SEMANTIC_CLIPVERTEX: |
||
1059 | clipVertexOutput = i; |
||
1060 | break; |
||
1061 | case TGSI_SEMANTIC_CLIPDIST: |
||
1062 | info->io.clipDistanceMask |= |
||
1063 | decl->Declaration.UsageMask << (si * 4); |
||
1064 | info->io.genUserClip = -1; |
||
1065 | break; |
||
1066 | case TGSI_SEMANTIC_SAMPLEMASK: |
||
1067 | info->io.sampleMask = i; |
||
1068 | break; |
||
1069 | case TGSI_SEMANTIC_VIEWPORT_INDEX: |
||
1070 | info->io.viewportId = i; |
||
1071 | break; |
||
1072 | default: |
||
1073 | break; |
||
1074 | } |
||
1075 | info->out[i].id = i; |
||
1076 | info->out[i].sn = sn; |
||
1077 | info->out[i].si = si; |
||
1078 | } |
||
1079 | break; |
||
1080 | case TGSI_FILE_SYSTEM_VALUE: |
||
1081 | switch (sn) { |
||
1082 | case TGSI_SEMANTIC_INSTANCEID: |
||
1083 | info->io.instanceId = first; |
||
1084 | break; |
||
1085 | case TGSI_SEMANTIC_VERTEXID: |
||
1086 | info->io.vertexId = first; |
||
1087 | break; |
||
1088 | default: |
||
1089 | break; |
||
1090 | } |
||
1091 | for (i = first; i <= last; ++i, ++si) { |
||
1092 | info->sv[i].sn = sn; |
||
1093 | info->sv[i].si = si; |
||
1094 | info->sv[i].input = inferSysValDirection(sn); |
||
1095 | } |
||
1096 | break; |
||
1097 | case TGSI_FILE_RESOURCE: |
||
1098 | for (i = first; i <= last; ++i) { |
||
1099 | resources[i].target = decl->Resource.Resource; |
||
1100 | resources[i].raw = decl->Resource.Raw; |
||
1101 | resources[i].slot = i; |
||
1102 | } |
||
1103 | break; |
||
1104 | case TGSI_FILE_SAMPLER_VIEW: |
||
1105 | for (i = first; i <= last; ++i) |
||
1106 | textureViews[i].target = decl->SamplerView.Resource; |
||
1107 | break; |
||
1108 | case TGSI_FILE_NULL: |
||
1109 | case TGSI_FILE_TEMPORARY: |
||
1110 | case TGSI_FILE_ADDRESS: |
||
1111 | case TGSI_FILE_CONSTANT: |
||
1112 | case TGSI_FILE_IMMEDIATE: |
||
1113 | case TGSI_FILE_PREDICATE: |
||
1114 | case TGSI_FILE_SAMPLER: |
||
1115 | break; |
||
1116 | default: |
||
1117 | ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); |
||
1118 | return false; |
||
1119 | } |
||
1120 | return true; |
||
1121 | } |
||
1122 | |||
1123 | inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const |
||
1124 | { |
||
1125 | return insn.getOpcode() == TGSI_OPCODE_MOV && |
||
1126 | insn.getDst(0).getIndex(0) == info->io.edgeFlagOut && |
||
1127 | insn.getSrc(0).getFile() == TGSI_FILE_INPUT; |
||
1128 | } |
||
1129 | |||
1130 | bool Source::scanInstruction(const struct tgsi_full_instruction *inst) |
||
1131 | { |
||
1132 | Instruction insn(inst); |
||
1133 | |||
1134 | if (insn.getOpcode() == TGSI_OPCODE_BARRIER) |
||
1135 | info->numBarriers = 1; |
||
1136 | |||
1137 | if (insn.dstCount()) { |
||
1138 | if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { |
||
1139 | Instruction::DstRegister dst = insn.getDst(0); |
||
1140 | |||
1141 | if (dst.isIndirect(0)) |
||
1142 | for (unsigned i = 0; i < info->numOutputs; ++i) |
||
1143 | info->out[i].mask = 0xf; |
||
1144 | else |
||
1145 | info->out[dst.getIndex(0)].mask |= dst.getMask(); |
||
1146 | |||
1147 | if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || |
||
1148 | info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || |
||
1149 | info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER || |
||
1150 | info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX || |
||
1151 | info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) |
||
1152 | info->out[dst.getIndex(0)].mask &= 1; |
||
1153 | |||
1154 | if (isEdgeFlagPassthrough(insn)) |
||
1155 | info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); |
||
1156 | } else |
||
1157 | if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { |
||
1158 | if (insn.getDst(0).isIndirect(0)) |
||
1159 | mainTempsInLMem = TRUE; |
||
1160 | } |
||
1161 | } |
||
1162 | |||
1163 | for (unsigned s = 0; s < insn.srcCount(); ++s) { |
||
1164 | Instruction::SrcRegister src = insn.getSrc(s); |
||
1165 | if (src.getFile() == TGSI_FILE_TEMPORARY) { |
||
1166 | if (src.isIndirect(0)) |
||
1167 | mainTempsInLMem = TRUE; |
||
1168 | } else |
||
1169 | if (src.getFile() == TGSI_FILE_RESOURCE) { |
||
1170 | if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) |
||
1171 | info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? |
||
1172 | 0x1 : 0x2; |
||
1173 | } |
||
1174 | if (src.getFile() != TGSI_FILE_INPUT) |
||
1175 | continue; |
||
1176 | unsigned mask = insn.srcMask(s); |
||
1177 | |||
1178 | if (src.isIndirect(0)) { |
||
1179 | for (unsigned i = 0; i < info->numInputs; ++i) |
||
1180 | info->in[i].mask = 0xf; |
||
1181 | } else { |
||
1182 | const int i = src.getIndex(0); |
||
1183 | for (unsigned c = 0; c < 4; ++c) { |
||
1184 | if (!(mask & (1 << c))) |
||
1185 | continue; |
||
1186 | int k = src.getSwizzle(c); |
||
1187 | if (k <= TGSI_SWIZZLE_W) |
||
1188 | info->in[i].mask |= 1 << k; |
||
1189 | } |
||
1190 | switch (info->in[i].sn) { |
||
1191 | case TGSI_SEMANTIC_PSIZE: |
||
1192 | case TGSI_SEMANTIC_PRIMID: |
||
1193 | case TGSI_SEMANTIC_FOG: |
||
1194 | info->in[i].mask &= 0x1; |
||
1195 | break; |
||
1196 | case TGSI_SEMANTIC_PCOORD: |
||
1197 | info->in[i].mask &= 0x3; |
||
1198 | break; |
||
1199 | default: |
||
1200 | break; |
||
1201 | } |
||
1202 | } |
||
1203 | } |
||
1204 | return true; |
||
1205 | } |
||
1206 | |||
1207 | nv50_ir::TexInstruction::Target |
||
1208 | Instruction::getTexture(const tgsi::Source *code, int s) const |
||
1209 | { |
||
1210 | // XXX: indirect access |
||
1211 | unsigned int r; |
||
1212 | |||
1213 | switch (getSrc(s).getFile()) { |
||
1214 | case TGSI_FILE_RESOURCE: |
||
1215 | r = getSrc(s).getIndex(0); |
||
1216 | return translateTexture(code->resources.at(r).target); |
||
1217 | case TGSI_FILE_SAMPLER_VIEW: |
||
1218 | r = getSrc(s).getIndex(0); |
||
1219 | return translateTexture(code->textureViews.at(r).target); |
||
1220 | default: |
||
1221 | return translateTexture(insn->Texture.Texture); |
||
1222 | } |
||
1223 | } |
||
1224 | |||
1225 | } // namespace tgsi |
||
1226 | |||
1227 | namespace { |
||
1228 | |||
1229 | using namespace nv50_ir; |
||
1230 | |||
1231 | class Converter : public BuildUtil |
||
1232 | { |
||
1233 | public: |
||
1234 | Converter(Program *, const tgsi::Source *); |
||
1235 | ~Converter(); |
||
1236 | |||
1237 | bool run(); |
||
1238 | |||
1239 | private: |
||
1240 | struct Subroutine |
||
1241 | { |
||
1242 | Subroutine(Function *f) : f(f) { } |
||
1243 | Function *f; |
||
1244 | ValueMap values; |
||
1245 | }; |
||
1246 | |||
1247 | Value *shiftAddress(Value *); |
||
1248 | Value *getVertexBase(int s); |
||
1249 | DataArray *getArrayForFile(unsigned file, int idx); |
||
1250 | Value *fetchSrc(int s, int c); |
||
1251 | Value *acquireDst(int d, int c); |
||
1252 | void storeDst(int d, int c, Value *); |
||
1253 | |||
1254 | Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr); |
||
1255 | void storeDst(const tgsi::Instruction::DstRegister dst, int c, |
||
1256 | Value *val, Value *ptr); |
||
1257 | |||
1258 | Value *applySrcMod(Value *, int s, int c); |
||
1259 | |||
1260 | Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); |
||
1261 | Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); |
||
1262 | Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); |
||
1263 | |||
1264 | bool handleInstruction(const struct tgsi_full_instruction *); |
||
1265 | void exportOutputs(); |
||
1266 | inline Subroutine *getSubroutine(unsigned ip); |
||
1267 | inline Subroutine *getSubroutine(Function *); |
||
1268 | inline bool isEndOfSubroutine(uint ip); |
||
1269 | |||
1270 | void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); |
||
1271 | |||
1272 | // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto) |
||
1273 | void setTexRS(TexInstruction *, unsigned int& s, int R, int S); |
||
1274 | void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); |
||
1275 | void handleTXF(Value *dst0[4], int R, int L_M); |
||
1276 | void handleTXQ(Value *dst0[4], enum TexQuery); |
||
1277 | void handleLIT(Value *dst0[4]); |
||
1278 | void handleUserClipPlanes(); |
||
1279 | |||
1280 | Symbol *getResourceBase(int r); |
||
1281 | void getResourceCoords(std::vector |
||
1282 | |||
1283 | void handleLOAD(Value *dst0[4]); |
||
1284 | void handleSTORE(); |
||
1285 | void handleATOM(Value *dst0[4], DataType, uint16_t subOp); |
||
1286 | |||
1287 | void handleINTERP(Value *dst0[4]); |
||
1288 | |||
1289 | Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); |
||
1290 | |||
1291 | void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); |
||
1292 | |||
1293 | Value *buildDot(int dim); |
||
1294 | |||
1295 | class BindArgumentsPass : public Pass { |
||
1296 | public: |
||
1297 | BindArgumentsPass(Converter &conv) : conv(conv) { } |
||
1298 | |||
1299 | private: |
||
1300 | Converter &conv; |
||
1301 | Subroutine *sub; |
||
1302 | |||
1303 | inline const Location *getValueLocation(Subroutine *, Value *); |
||
1304 | |||
1305 | template |
||
1306 | updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *), |
||
1307 | T (Function::*proto)); |
||
1308 | |||
1309 | template |
||
1310 | updatePrototype(BitSet *set, void (Function::*updateSet)(), |
||
1311 | T (Function::*proto)); |
||
1312 | |||
1313 | protected: |
||
1314 | bool visit(Function *); |
||
1315 | bool visit(BasicBlock *bb) { return false; } |
||
1316 | }; |
||
1317 | |||
1318 | private: |
||
1319 | const tgsi::Source *code; |
||
1320 | const struct nv50_ir_prog_info *info; |
||
1321 | |||
1322 | struct { |
||
1323 | std::map |
||
1324 | Subroutine *cur; |
||
1325 | } sub; |
||
1326 | |||
1327 | uint ip; // instruction pointer |
||
1328 | |||
1329 | tgsi::Instruction tgsi; |
||
1330 | |||
1331 | DataType dstTy; |
||
1332 | DataType srcTy; |
||
1333 | |||
1334 | DataArray tData; // TGSI_FILE_TEMPORARY |
||
1335 | DataArray aData; // TGSI_FILE_ADDRESS |
||
1336 | DataArray pData; // TGSI_FILE_PREDICATE |
||
1337 | DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) |
||
1338 | |||
1339 | Value *zero; |
||
1340 | Value *fragCoord[4]; |
||
1341 | Value *clipVtx[4]; |
||
1342 | |||
1343 | Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) |
||
1344 | uint8_t vtxBaseValid; |
||
1345 | |||
1346 | Stack condBBs; // fork BB, then else clause BB |
||
1347 | Stack joinBBs; // fork BB, for inserting join ops on ENDIF |
||
1348 | Stack loopBBs; // loop headers |
||
1349 | Stack breakBBs; // end of / after loop |
||
1350 | |||
1351 | Value *viewport; |
||
1352 | }; |
||
1353 | |||
1354 | Symbol * |
||
1355 | Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) |
||
1356 | { |
||
1357 | const int swz = src.getSwizzle(c); |
||
1358 | |||
1359 | /* TODO: Use Array ID when it's available for the index */ |
||
1360 | return makeSym(src.getFile(), |
||
1361 | src.is2D() ? src.getIndex(1) : 0, |
||
1362 | src.getIndex(0), swz, |
||
1363 | src.getIndex(0) * 16 + swz * 4); |
||
1364 | } |
||
1365 | |||
1366 | Symbol * |
||
1367 | Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) |
||
1368 | { |
||
1369 | /* TODO: Use Array ID when it's available for the index */ |
||
1370 | return makeSym(dst.getFile(), |
||
1371 | dst.is2D() ? dst.getIndex(1) : 0, |
||
1372 | dst.getIndex(0), c, |
||
1373 | dst.getIndex(0) * 16 + c * 4); |
||
1374 | } |
||
1375 | |||
1376 | Symbol * |
||
1377 | Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) |
||
1378 | { |
||
1379 | Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile)); |
||
1380 | |||
1381 | sym->reg.fileIndex = fileIdx; |
||
1382 | |||
1383 | if (idx >= 0) { |
||
1384 | if (sym->reg.file == FILE_SHADER_INPUT) |
||
1385 | sym->setOffset(info->in[idx].slot[c] * 4); |
||
1386 | else |
||
1387 | if (sym->reg.file == FILE_SHADER_OUTPUT) |
||
1388 | sym->setOffset(info->out[idx].slot[c] * 4); |
||
1389 | else |
||
1390 | if (sym->reg.file == FILE_SYSTEM_VALUE) |
||
1391 | sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c); |
||
1392 | else |
||
1393 | sym->setOffset(address); |
||
1394 | } else { |
||
1395 | sym->setOffset(address); |
||
1396 | } |
||
1397 | return sym; |
||
1398 | } |
||
1399 | |||
1400 | static inline uint8_t |
||
1401 | translateInterpMode(const struct nv50_ir_varying *var, operation& op) |
||
1402 | { |
||
1403 | uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; |
||
1404 | |||
1405 | if (var->flat) |
||
1406 | mode = NV50_IR_INTERP_FLAT; |
||
1407 | else |
||
1408 | if (var->linear) |
||
1409 | mode = NV50_IR_INTERP_LINEAR; |
||
1410 | else |
||
1411 | if (var->sc) |
||
1412 | mode = NV50_IR_INTERP_SC; |
||
1413 | |||
1414 | op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) |
||
1415 | ? OP_PINTERP : OP_LINTERP; |
||
1416 | |||
1417 | if (var->centroid) |
||
1418 | mode |= NV50_IR_INTERP_CENTROID; |
||
1419 | |||
1420 | return mode; |
||
1421 | } |
||
1422 | |||
1423 | Value * |
||
1424 | Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) |
||
1425 | { |
||
1426 | operation op; |
||
1427 | |||
1428 | // XXX: no way to know interpolation mode if we don't know what's accessed |
||
1429 | const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 : |
||
1430 | src.getIndex(0)], op); |
||
1431 | |||
1432 | Instruction *insn = new_Instruction(func, op, TYPE_F32); |
||
1433 | |||
1434 | insn->setDef(0, getScratch()); |
||
1435 | insn->setSrc(0, srcToSym(src, c)); |
||
1436 | if (op == OP_PINTERP) |
||
1437 | insn->setSrc(1, fragCoord[3]); |
||
1438 | if (ptr) |
||
1439 | insn->setIndirect(0, 0, ptr); |
||
1440 | |||
1441 | insn->setInterpolate(mode); |
||
1442 | |||
1443 | bb->insertTail(insn); |
||
1444 | return insn->getDef(0); |
||
1445 | } |
||
1446 | |||
1447 | Value * |
||
1448 | Converter::applySrcMod(Value *val, int s, int c) |
||
1449 | { |
||
1450 | Modifier m = tgsi.getSrc(s).getMod(c); |
||
1451 | DataType ty = tgsi.inferSrcType(); |
||
1452 | |||
1453 | if (m & Modifier(NV50_IR_MOD_ABS)) |
||
1454 | val = mkOp1v(OP_ABS, ty, getScratch(), val); |
||
1455 | |||
1456 | if (m & Modifier(NV50_IR_MOD_NEG)) |
||
1457 | val = mkOp1v(OP_NEG, ty, getScratch(), val); |
||
1458 | |||
1459 | return val; |
||
1460 | } |
||
1461 | |||
1462 | Value * |
||
1463 | Converter::getVertexBase(int s) |
||
1464 | { |
||
1465 | assert(s < 5); |
||
1466 | if (!(vtxBaseValid & (1 << s))) { |
||
1467 | const int index = tgsi.getSrc(s).getIndex(1); |
||
1468 | Value *rel = NULL; |
||
1469 | if (tgsi.getSrc(s).isIndirect(1)) |
||
1470 | rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); |
||
1471 | vtxBaseValid |= 1 << s; |
||
1472 | vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), |
||
1473 | mkImm(index), rel); |
||
1474 | } |
||
1475 | return vtxBase[s]; |
||
1476 | } |
||
1477 | |||
1478 | Value * |
||
1479 | Converter::fetchSrc(int s, int c) |
||
1480 | { |
||
1481 | Value *res; |
||
1482 | Value *ptr = NULL, *dimRel = NULL; |
||
1483 | |||
1484 | tgsi::Instruction::SrcRegister src = tgsi.getSrc(s); |
||
1485 | |||
1486 | if (src.isIndirect(0)) |
||
1487 | ptr = fetchSrc(src.getIndirect(0), 0, NULL); |
||
1488 | |||
1489 | if (src.is2D()) { |
||
1490 | switch (src.getFile()) { |
||
1491 | case TGSI_FILE_INPUT: |
||
1492 | dimRel = getVertexBase(s); |
||
1493 | break; |
||
1494 | case TGSI_FILE_CONSTANT: |
||
1495 | // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] |
||
1496 | if (src.isIndirect(1)) |
||
1497 | dimRel = fetchSrc(src.getIndirect(1), 0, 0); |
||
1498 | break; |
||
1499 | default: |
||
1500 | break; |
||
1501 | } |
||
1502 | } |
||
1503 | |||
1504 | res = fetchSrc(src, c, ptr); |
||
1505 | |||
1506 | if (dimRel) |
||
1507 | res->getInsn()->setIndirect(0, 1, dimRel); |
||
1508 | |||
1509 | return applySrcMod(res, s, c); |
||
1510 | } |
||
1511 | |||
1512 | Converter::DataArray * |
||
1513 | Converter::getArrayForFile(unsigned file, int idx) |
||
1514 | { |
||
1515 | switch (file) { |
||
1516 | case TGSI_FILE_TEMPORARY: |
||
1517 | return &tData; |
||
1518 | case TGSI_FILE_PREDICATE: |
||
1519 | return &pData; |
||
1520 | case TGSI_FILE_ADDRESS: |
||
1521 | return &aData; |
||
1522 | case TGSI_FILE_OUTPUT: |
||
1523 | assert(prog->getType() == Program::TYPE_FRAGMENT); |
||
1524 | return &oData; |
||
1525 | default: |
||
1526 | assert(!"invalid/unhandled TGSI source file"); |
||
1527 | return NULL; |
||
1528 | } |
||
1529 | } |
||
1530 | |||
1531 | Value * |
||
1532 | Converter::shiftAddress(Value *index) |
||
1533 | { |
||
1534 | if (!index) |
||
1535 | return NULL; |
||
1536 | return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); |
||
1537 | } |
||
1538 | |||
1539 | Value * |
||
1540 | Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) |
||
1541 | { |
||
1542 | const int idx2d = src.is2D() ? src.getIndex(1) : 0; |
||
1543 | const int idx = src.getIndex(0); |
||
1544 | const int swz = src.getSwizzle(c); |
||
1545 | |||
1546 | switch (src.getFile()) { |
||
1547 | case TGSI_FILE_IMMEDIATE: |
||
1548 | assert(!ptr); |
||
1549 | return loadImm(NULL, info->immd.data[idx * 4 + swz]); |
||
1550 | case TGSI_FILE_CONSTANT: |
||
1551 | return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); |
||
1552 | case TGSI_FILE_INPUT: |
||
1553 | if (prog->getType() == Program::TYPE_FRAGMENT) { |
||
1554 | // don't load masked inputs, won't be assigned a slot |
||
1555 | if (!ptr && !(info->in[idx].mask & (1 << swz))) |
||
1556 | return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); |
||
1557 | if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) |
||
1558 | return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); |
||
1559 | return interpolate(src, c, shiftAddress(ptr)); |
||
1560 | } else |
||
1561 | if (prog->getType() == Program::TYPE_GEOMETRY) { |
||
1562 | if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID) |
||
1563 | return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0)); |
||
1564 | // XXX: This is going to be a problem with scalar arrays, i.e. when |
||
1565 | // we cannot assume that the address is given in units of vec4. |
||
1566 | // |
||
1567 | // nv50 and nvc0 need different things here, so let the lowering |
||
1568 | // passes decide what to do with the address |
||
1569 | if (ptr) |
||
1570 | return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); |
||
1571 | } |
||
1572 | return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); |
||
1573 | case TGSI_FILE_OUTPUT: |
||
1574 | assert(!"load from output file"); |
||
1575 | return NULL; |
||
1576 | case TGSI_FILE_SYSTEM_VALUE: |
||
1577 | assert(!ptr); |
||
1578 | return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); |
||
1579 | default: |
||
1580 | return getArrayForFile(src.getFile(), idx2d)->load( |
||
1581 | sub.cur->values, idx, swz, shiftAddress(ptr)); |
||
1582 | } |
||
1583 | } |
||
1584 | |||
1585 | Value * |
||
1586 | Converter::acquireDst(int d, int c) |
||
1587 | { |
||
1588 | const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); |
||
1589 | const unsigned f = dst.getFile(); |
||
1590 | const int idx = dst.getIndex(0); |
||
1591 | const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; |
||
1592 | |||
1593 | if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE) |
||
1594 | return NULL; |
||
1595 | |||
1596 | if (dst.isIndirect(0) || |
||
1597 | f == TGSI_FILE_SYSTEM_VALUE || |
||
1598 | (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) |
||
1599 | return getScratch(); |
||
1600 | |||
1601 | return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c); |
||
1602 | } |
||
1603 | |||
1604 | void |
||
1605 | Converter::storeDst(int d, int c, Value *val) |
||
1606 | { |
||
1607 | const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); |
||
1608 | |||
1609 | switch (tgsi.getSaturate()) { |
||
1610 | case TGSI_SAT_NONE: |
||
1611 | break; |
||
1612 | case TGSI_SAT_ZERO_ONE: |
||
1613 | mkOp1(OP_SAT, dstTy, val, val); |
||
1614 | break; |
||
1615 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
1616 | mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f)); |
||
1617 | mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f)); |
||
1618 | break; |
||
1619 | default: |
||
1620 | assert(!"invalid saturation mode"); |
||
1621 | break; |
||
1622 | } |
||
1623 | |||
1624 | Value *ptr = NULL; |
||
1625 | if (dst.isIndirect(0)) |
||
1626 | ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL)); |
||
1627 | |||
1628 | if (info->io.genUserClip > 0 && |
||
1629 | dst.getFile() == TGSI_FILE_OUTPUT && |
||
1630 | !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) { |
||
1631 | mkMov(clipVtx[c], val); |
||
1632 | val = clipVtx[c]; |
||
1633 | } |
||
1634 | |||
1635 | storeDst(dst, c, val, ptr); |
||
1636 | } |
||
1637 | |||
1638 | void |
||
1639 | Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, |
||
1640 | Value *val, Value *ptr) |
||
1641 | { |
||
1642 | const unsigned f = dst.getFile(); |
||
1643 | const int idx = dst.getIndex(0); |
||
1644 | const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; |
||
1645 | |||
1646 | if (f == TGSI_FILE_SYSTEM_VALUE) { |
||
1647 | assert(!ptr); |
||
1648 | mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); |
||
1649 | } else |
||
1650 | if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { |
||
1651 | |||
1652 | if (ptr || (info->out[idx].mask & (1 << c))) { |
||
1653 | /* Save the viewport index into a scratch register so that it can be |
||
1654 | exported at EMIT time */ |
||
1655 | if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && |
||
1656 | viewport != NULL) |
||
1657 | mkOp1(OP_MOV, TYPE_U32, viewport, val); |
||
1658 | else |
||
1659 | mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); |
||
1660 | } |
||
1661 | } else |
||
1662 | if (f == TGSI_FILE_TEMPORARY || |
||
1663 | f == TGSI_FILE_PREDICATE || |
||
1664 | f == TGSI_FILE_ADDRESS || |
||
1665 | f == TGSI_FILE_OUTPUT) { |
||
1666 | getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val); |
||
1667 | } else { |
||
1668 | assert(!"invalid dst file"); |
||
1669 | } |
||
1670 | } |
||
1671 | |||
1672 | #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \ |
||
1673 | for (chan = 0; chan < 4; ++chan) \ |
||
1674 | if (!inst.getDst(d).isMasked(chan)) |
||
1675 | |||
1676 | Value * |
||
1677 | Converter::buildDot(int dim) |
||
1678 | { |
||
1679 | assert(dim > 0); |
||
1680 | |||
1681 | Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); |
||
1682 | Value *dotp = getScratch(); |
||
1683 | |||
1684 | mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); |
||
1685 | |||
1686 | for (int c = 1; c < dim; ++c) { |
||
1687 | src0 = fetchSrc(0, c); |
||
1688 | src1 = fetchSrc(1, c); |
||
1689 | mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); |
||
1690 | } |
||
1691 | return dotp; |
||
1692 | } |
||
1693 | |||
1694 | void |
||
1695 | Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) |
||
1696 | { |
||
1697 | FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); |
||
1698 | join->fixed = 1; |
||
1699 | conv->insertHead(join); |
||
1700 | |||
1701 | fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); |
||
1702 | fork->insertBefore(fork->getExit(), fork->joinAt); |
||
1703 | } |
||
1704 | |||
1705 | void |
||
1706 | Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) |
||
1707 | { |
||
1708 | unsigned rIdx = 0, sIdx = 0; |
||
1709 | |||
1710 | if (R >= 0) |
||
1711 | rIdx = tgsi.getSrc(R).getIndex(0); |
||
1712 | if (S >= 0) |
||
1713 | sIdx = tgsi.getSrc(S).getIndex(0); |
||
1714 | |||
1715 | tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx); |
||
1716 | |||
1717 | if (tgsi.getSrc(R).isIndirect(0)) { |
||
1718 | tex->tex.rIndirectSrc = s; |
||
1719 | tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL)); |
||
1720 | } |
||
1721 | if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) { |
||
1722 | tex->tex.sIndirectSrc = s; |
||
1723 | tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL)); |
||
1724 | } |
||
1725 | } |
||
1726 | |||
1727 | void |
||
1728 | Converter::handleTXQ(Value *dst0[4], enum TexQuery query) |
||
1729 | { |
||
1730 | TexInstruction *tex = new_TexInstruction(func, OP_TXQ); |
||
1731 | tex->tex.query = query; |
||
1732 | unsigned int c, d; |
||
1733 | |||
1734 | for (d = 0, c = 0; c < 4; ++c) { |
||
1735 | if (!dst0[c]) |
||
1736 | continue; |
||
1737 | tex->tex.mask |= 1 << c; |
||
1738 | tex->setDef(d++, dst0[c]); |
||
1739 | } |
||
1740 | tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level |
||
1741 | |||
1742 | setTexRS(tex, c, 1, -1); |
||
1743 | |||
1744 | bb->insertTail(tex); |
||
1745 | } |
||
1746 | |||
1747 | void |
||
1748 | Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) |
||
1749 | { |
||
1750 | Value *proj = fetchSrc(0, 3); |
||
1751 | Instruction *insn = proj->getUniqueInsn(); |
||
1752 | int c; |
||
1753 | |||
1754 | if (insn->op == OP_PINTERP) { |
||
1755 | bb->insertTail(insn = cloneForward(func, insn)); |
||
1756 | insn->op = OP_LINTERP; |
||
1757 | insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode()); |
||
1758 | insn->setSrc(1, NULL); |
||
1759 | proj = insn->getDef(0); |
||
1760 | } |
||
1761 | proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj); |
||
1762 | |||
1763 | for (c = 0; c < 4; ++c) { |
||
1764 | if (!(mask & (1 << c))) |
||
1765 | continue; |
||
1766 | if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP) |
||
1767 | continue; |
||
1768 | mask &= ~(1 << c); |
||
1769 | |||
1770 | bb->insertTail(insn = cloneForward(func, insn)); |
||
1771 | insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode()); |
||
1772 | insn->setSrc(1, proj); |
||
1773 | dst[c] = insn->getDef(0); |
||
1774 | } |
||
1775 | if (!mask) |
||
1776 | return; |
||
1777 | |||
1778 | proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3)); |
||
1779 | |||
1780 | for (c = 0; c < 4; ++c) |
||
1781 | if (mask & (1 << c)) |
||
1782 | dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj); |
||
1783 | } |
||
1784 | |||
1785 | // order of nv50 ir sources: x y z layer lod/bias shadow |
||
1786 | // order of TGSI TEX sources: x y z layer shadow lod/bias |
||
1787 | // lowering will finally set the hw specific order (like array first on nvc0) |
||
1788 | void |
||
1789 | Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) |
||
1790 | { |
||
1791 | Value *val; |
||
1792 | Value *arg[4], *src[8]; |
||
1793 | Value *lod = NULL, *shd = NULL; |
||
1794 | unsigned int s, c, d; |
||
1795 | TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); |
||
1796 | |||
1797 | TexInstruction::Target tgt = tgsi.getTexture(code, R); |
||
1798 | |||
1799 | for (s = 0; s < tgt.getArgCount(); ++s) |
||
1800 | arg[s] = src[s] = fetchSrc(0, s); |
||
1801 | |||
1802 | if (texi->op == OP_TXL || texi->op == OP_TXB) |
||
1803 | lod = fetchSrc(L >> 4, L & 3); |
||
1804 | |||
1805 | if (C == 0x0f) |
||
1806 | C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src |
||
1807 | |||
1808 | if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && |
||
1809 | tgt == TEX_TARGET_CUBE_ARRAY_SHADOW) |
||
1810 | shd = fetchSrc(1, 0); |
||
1811 | else if (tgt.isShadow()) |
||
1812 | shd = fetchSrc(C >> 4, C & 3); |
||
1813 | |||
1814 | if (texi->op == OP_TXD) { |
||
1815 | for (c = 0; c < tgt.getDim(); ++c) { |
||
1816 | texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c)); |
||
1817 | texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c)); |
||
1818 | } |
||
1819 | } |
||
1820 | |||
1821 | // cube textures don't care about projection value, it's divided out |
||
1822 | if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) { |
||
1823 | unsigned int n = tgt.getDim(); |
||
1824 | if (shd) { |
||
1825 | arg[n] = shd; |
||
1826 | ++n; |
||
1827 | assert(tgt.getDim() == tgt.getArgCount()); |
||
1828 | } |
||
1829 | loadProjTexCoords(src, arg, (1 << n) - 1); |
||
1830 | if (shd) |
||
1831 | shd = src[n - 1]; |
||
1832 | } |
||
1833 | |||
1834 | if (tgt.isCube()) { |
||
1835 | for (c = 0; c < 3; ++c) |
||
1836 | src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); |
||
1837 | val = getScratch(); |
||
1838 | mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); |
||
1839 | mkOp2(OP_MAX, TYPE_F32, val, src[2], val); |
||
1840 | mkOp1(OP_RCP, TYPE_F32, val, val); |
||
1841 | for (c = 0; c < 3; ++c) |
||
1842 | src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); |
||
1843 | } |
||
1844 | |||
1845 | for (c = 0, d = 0; c < 4; ++c) { |
||
1846 | if (dst[c]) { |
||
1847 | texi->setDef(d++, dst[c]); |
||
1848 | texi->tex.mask |= 1 << c; |
||
1849 | } else { |
||
1850 | // NOTE: maybe hook up def too, for CSE |
||
1851 | } |
||
1852 | } |
||
1853 | for (s = 0; s < tgt.getArgCount(); ++s) |
||
1854 | texi->setSrc(s, src[s]); |
||
1855 | if (lod) |
||
1856 | texi->setSrc(s++, lod); |
||
1857 | if (shd) |
||
1858 | texi->setSrc(s++, shd); |
||
1859 | |||
1860 | setTexRS(texi, s, R, S); |
||
1861 | |||
1862 | if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) |
||
1863 | texi->tex.levelZero = true; |
||
1864 | if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) |
||
1865 | texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); |
||
1866 | |||
1867 | texi->tex.useOffsets = tgsi.getNumTexOffsets(); |
||
1868 | for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { |
||
1869 | for (c = 0; c < 3; ++c) { |
||
1870 | texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); |
||
1871 | texi->offset[s][c].setInsn(texi); |
||
1872 | } |
||
1873 | } |
||
1874 | |||
1875 | bb->insertTail(texi); |
||
1876 | } |
||
1877 | |||
1878 | // 1st source: xyz = coordinates, w = lod/sample |
||
1879 | // 2nd source: offset |
||
1880 | void |
||
1881 | Converter::handleTXF(Value *dst[4], int R, int L_M) |
||
1882 | { |
||
1883 | TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); |
||
1884 | int ms; |
||
1885 | unsigned int c, d, s; |
||
1886 | |||
1887 | texi->tex.target = tgsi.getTexture(code, R); |
||
1888 | |||
1889 | ms = texi->tex.target.isMS() ? 1 : 0; |
||
1890 | texi->tex.levelZero = ms; /* MS textures don't have mip-maps */ |
||
1891 | |||
1892 | for (c = 0, d = 0; c < 4; ++c) { |
||
1893 | if (dst[c]) { |
||
1894 | texi->setDef(d++, dst[c]); |
||
1895 | texi->tex.mask |= 1 << c; |
||
1896 | } |
||
1897 | } |
||
1898 | for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c) |
||
1899 | texi->setSrc(c, fetchSrc(0, c)); |
||
1900 | texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms |
||
1901 | |||
1902 | setTexRS(texi, c, R, -1); |
||
1903 | |||
1904 | texi->tex.useOffsets = tgsi.getNumTexOffsets(); |
||
1905 | for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { |
||
1906 | for (c = 0; c < 3; ++c) { |
||
1907 | texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); |
||
1908 | texi->offset[s][c].setInsn(texi); |
||
1909 | } |
||
1910 | } |
||
1911 | |||
1912 | bb->insertTail(texi); |
||
1913 | } |
||
1914 | |||
1915 | void |
||
1916 | Converter::handleLIT(Value *dst0[4]) |
||
1917 | { |
||
1918 | Value *val0 = NULL; |
||
1919 | unsigned int mask = tgsi.getDst(0).getMask(); |
||
1920 | |||
1921 | if (mask & (1 << 0)) |
||
1922 | loadImm(dst0[0], 1.0f); |
||
1923 | |||
1924 | if (mask & (1 << 3)) |
||
1925 | loadImm(dst0[3], 1.0f); |
||
1926 | |||
1927 | if (mask & (3 << 1)) { |
||
1928 | val0 = getScratch(); |
||
1929 | mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero); |
||
1930 | if (mask & (1 << 1)) |
||
1931 | mkMov(dst0[1], val0); |
||
1932 | } |
||
1933 | |||
1934 | if (mask & (1 << 2)) { |
||
1935 | Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3); |
||
1936 | Value *val1 = getScratch(), *val3 = getScratch(); |
||
1937 | |||
1938 | Value *pos128 = loadImm(NULL, +127.999999f); |
||
1939 | Value *neg128 = loadImm(NULL, -127.999999f); |
||
1940 | |||
1941 | mkOp2(OP_MAX, TYPE_F32, val1, src1, zero); |
||
1942 | mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128); |
||
1943 | mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128); |
||
1944 | mkOp2(OP_POW, TYPE_F32, val3, val1, val3); |
||
1945 | |||
1946 | mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0); |
||
1947 | } |
||
1948 | } |
||
1949 | |||
1950 | static inline bool |
||
1951 | isResourceSpecial(const int r) |
||
1952 | { |
||
1953 | return (r == TGSI_RESOURCE_GLOBAL || |
||
1954 | r == TGSI_RESOURCE_LOCAL || |
||
1955 | r == TGSI_RESOURCE_PRIVATE || |
||
1956 | r == TGSI_RESOURCE_INPUT); |
||
1957 | } |
||
1958 | |||
1959 | static inline bool |
||
1960 | isResourceRaw(const tgsi::Source *code, const int r) |
||
1961 | { |
||
1962 | return isResourceSpecial(r) || code->resources[r].raw; |
||
1963 | } |
||
1964 | |||
1965 | static inline nv50_ir::TexTarget |
||
1966 | getResourceTarget(const tgsi::Source *code, int r) |
||
1967 | { |
||
1968 | if (isResourceSpecial(r)) |
||
1969 | return nv50_ir::TEX_TARGET_BUFFER; |
||
1970 | return tgsi::translateTexture(code->resources.at(r).target); |
||
1971 | } |
||
1972 | |||
1973 | Symbol * |
||
1974 | Converter::getResourceBase(const int r) |
||
1975 | { |
||
1976 | Symbol *sym = NULL; |
||
1977 | |||
1978 | switch (r) { |
||
1979 | case TGSI_RESOURCE_GLOBAL: |
||
1980 | sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15); |
||
1981 | break; |
||
1982 | case TGSI_RESOURCE_LOCAL: |
||
1983 | assert(prog->getType() == Program::TYPE_COMPUTE); |
||
1984 | sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32, |
||
1985 | info->prop.cp.sharedOffset); |
||
1986 | break; |
||
1987 | case TGSI_RESOURCE_PRIVATE: |
||
1988 | sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32, |
||
1989 | info->bin.tlsSpace); |
||
1990 | break; |
||
1991 | case TGSI_RESOURCE_INPUT: |
||
1992 | assert(prog->getType() == Program::TYPE_COMPUTE); |
||
1993 | sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32, |
||
1994 | info->prop.cp.inputOffset); |
||
1995 | break; |
||
1996 | default: |
||
1997 | sym = new_Symbol(prog, |
||
1998 | nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot); |
||
1999 | break; |
||
2000 | } |
||
2001 | return sym; |
||
2002 | } |
||
2003 | |||
2004 | void |
||
2005 | Converter::getResourceCoords(std::vector |
||
2006 | { |
||
2007 | const int arg = |
||
2008 | TexInstruction::Target(getResourceTarget(code, r)).getArgCount(); |
||
2009 | |||
2010 | for (int c = 0; c < arg; ++c) |
||
2011 | coords.push_back(fetchSrc(s, c)); |
||
2012 | |||
2013 | // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk |
||
2014 | if (r == TGSI_RESOURCE_LOCAL || |
||
2015 | r == TGSI_RESOURCE_PRIVATE || |
||
2016 | r == TGSI_RESOURCE_INPUT) |
||
2017 | coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS), |
||
2018 | coords[0]); |
||
2019 | } |
||
2020 | |||
2021 | static inline int |
||
2022 | partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) |
||
2023 | { |
||
2024 | int n = 0; |
||
2025 | |||
2026 | while (mask) { |
||
2027 | if (mask & 1) { |
||
2028 | size[n]++; |
||
2029 | } else { |
||
2030 | if (size[n]) |
||
2031 | comp[n = 1] = size[0] + 1; |
||
2032 | else |
||
2033 | comp[n]++; |
||
2034 | } |
||
2035 | mask >>= 1; |
||
2036 | } |
||
2037 | if (size[0] == 3) { |
||
2038 | n = 1; |
||
2039 | size[0] = (comp[0] == 1) ? 1 : 2; |
||
2040 | size[1] = 3 - size[0]; |
||
2041 | comp[1] = comp[0] + size[0]; |
||
2042 | } |
||
2043 | return n + 1; |
||
2044 | } |
||
2045 | |||
2046 | // For raw loads, granularity is 4 byte. |
||
2047 | // Usage of the texture read mask on OP_SULDP is not allowed. |
||
2048 | void |
||
2049 | Converter::handleLOAD(Value *dst0[4]) |
||
2050 | { |
||
2051 | const int r = tgsi.getSrc(0).getIndex(0); |
||
2052 | int c; |
||
2053 | std::vector |
||
2054 | |||
2055 | getResourceCoords(off, r, 1); |
||
2056 | |||
2057 | if (isResourceRaw(code, r)) { |
||
2058 | uint8_t mask = 0; |
||
2059 | uint8_t comp[2] = { 0, 0 }; |
||
2060 | uint8_t size[2] = { 0, 0 }; |
||
2061 | |||
2062 | Symbol *base = getResourceBase(r); |
||
2063 | |||
2064 | // determine the base and size of the at most 2 load ops |
||
2065 | for (c = 0; c < 4; ++c) |
||
2066 | if (!tgsi.getDst(0).isMasked(c)) |
||
2067 | mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X); |
||
2068 | |||
2069 | int n = partitionLoadStore(comp, size, mask); |
||
2070 | |||
2071 | src = off; |
||
2072 | |||
2073 | def.resize(4); // index by component, the ones we need will be non-NULL |
||
2074 | for (c = 0; c < 4; ++c) { |
||
2075 | if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c)) |
||
2076 | def[c] = dst0[c]; |
||
2077 | else |
||
2078 | if (mask & (1 << c)) |
||
2079 | def[c] = getScratch(); |
||
2080 | } |
||
2081 | |||
2082 | const bool useLd = isResourceSpecial(r) || |
||
2083 | (info->io.nv50styleSurfaces && |
||
2084 | code->resources[r].target == TGSI_TEXTURE_BUFFER); |
||
2085 | |||
2086 | for (int i = 0; i < n; ++i) { |
||
2087 | ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]); |
||
2088 | |||
2089 | if (comp[i]) // adjust x component of source address if necessary |
||
2090 | src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), |
||
2091 | off[0], mkImm(comp[i] * 4)); |
||
2092 | else |
||
2093 | src[0] = off[0]; |
||
2094 | |||
2095 | if (useLd) { |
||
2096 | Instruction *ld = |
||
2097 | mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]); |
||
2098 | for (size_t c = 1; c < ldv.size(); ++c) |
||
2099 | ld->setDef(c, ldv[c]); |
||
2100 | } else { |
||
2101 | mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot, |
||
2102 | 0, ldv, src)->dType = typeOfSize(size[i] * 4); |
||
2103 | } |
||
2104 | } |
||
2105 | } else { |
||
2106 | def.resize(4); |
||
2107 | for (c = 0; c < 4; ++c) { |
||
2108 | if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) |
||
2109 | def[c] = getScratch(); |
||
2110 | else |
||
2111 | def[c] = dst0[c]; |
||
2112 | } |
||
2113 | |||
2114 | mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0, |
||
2115 | def, off); |
||
2116 | } |
||
2117 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2118 | if (dst0[c] != def[c]) |
||
2119 | mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); |
||
2120 | } |
||
2121 | |||
2122 | // For formatted stores, the write mask on OP_SUSTP can be used. |
||
2123 | // Raw stores have to be split. |
||
2124 | void |
||
2125 | Converter::handleSTORE() |
||
2126 | { |
||
2127 | const int r = tgsi.getDst(0).getIndex(0); |
||
2128 | int c; |
||
2129 | std::vector |
||
2130 | |||
2131 | getResourceCoords(off, r, 0); |
||
2132 | src = off; |
||
2133 | const int s = src.size(); |
||
2134 | |||
2135 | if (isResourceRaw(code, r)) { |
||
2136 | uint8_t comp[2] = { 0, 0 }; |
||
2137 | uint8_t size[2] = { 0, 0 }; |
||
2138 | |||
2139 | int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask()); |
||
2140 | |||
2141 | Symbol *base = getResourceBase(r); |
||
2142 | |||
2143 | const bool useSt = isResourceSpecial(r) || |
||
2144 | (info->io.nv50styleSurfaces && |
||
2145 | code->resources[r].target == TGSI_TEXTURE_BUFFER); |
||
2146 | |||
2147 | for (int i = 0; i < n; ++i) { |
||
2148 | if (comp[i]) // adjust x component of source address if necessary |
||
2149 | src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), |
||
2150 | off[0], mkImm(comp[i] * 4)); |
||
2151 | else |
||
2152 | src[0] = off[0]; |
||
2153 | |||
2154 | const DataType stTy = typeOfSize(size[i] * 4); |
||
2155 | |||
2156 | if (useSt) { |
||
2157 | Instruction *st = |
||
2158 | mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i])); |
||
2159 | for (c = 1; c < size[i]; ++c) |
||
2160 | st->setSrc(1 + c, fetchSrc(1, comp[i] + c)); |
||
2161 | st->setIndirect(0, 0, src[0]); |
||
2162 | } else { |
||
2163 | // attach values to be stored |
||
2164 | src.resize(s + size[i]); |
||
2165 | for (c = 0; c < size[i]; ++c) |
||
2166 | src[s + c] = fetchSrc(1, comp[i] + c); |
||
2167 | mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot, |
||
2168 | 0, dummy, src)->setType(stTy); |
||
2169 | } |
||
2170 | } |
||
2171 | } else { |
||
2172 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2173 | src.push_back(fetchSrc(1, c)); |
||
2174 | |||
2175 | mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, |
||
2176 | dummy, src)->tex.mask = tgsi.getDst(0).getMask(); |
||
2177 | } |
||
2178 | } |
||
2179 | |||
2180 | // XXX: These only work on resources with the single-component u32/s32 formats. |
||
2181 | // Therefore the result is replicated. This might not be intended by TGSI, but |
||
2182 | // operating on more than 1 component would produce undefined results because |
||
2183 | // they do not exist. |
||
2184 | void |
||
2185 | Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) |
||
2186 | { |
||
2187 | const int r = tgsi.getSrc(0).getIndex(0); |
||
2188 | std::vector |
||
2189 | std::vector |
||
2190 | LValue *dst = getScratch(); |
||
2191 | |||
2192 | getResourceCoords(srcv, r, 1); |
||
2193 | |||
2194 | if (isResourceSpecial(r)) { |
||
2195 | assert(r != TGSI_RESOURCE_INPUT); |
||
2196 | Instruction *insn; |
||
2197 | insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0)); |
||
2198 | insn->subOp = subOp; |
||
2199 | if (subOp == NV50_IR_SUBOP_ATOM_CAS) |
||
2200 | insn->setSrc(2, fetchSrc(3, 0)); |
||
2201 | insn->setIndirect(0, 0, srcv.at(0)); |
||
2202 | } else { |
||
2203 | operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP; |
||
2204 | TexTarget targ = getResourceTarget(code, r); |
||
2205 | int idx = code->resources[r].slot; |
||
2206 | defv.push_back(dst); |
||
2207 | srcv.push_back(fetchSrc(2, 0)); |
||
2208 | if (subOp == NV50_IR_SUBOP_ATOM_CAS) |
||
2209 | srcv.push_back(fetchSrc(3, 0)); |
||
2210 | TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv); |
||
2211 | tex->subOp = subOp; |
||
2212 | tex->tex.mask = 1; |
||
2213 | tex->setType(ty); |
||
2214 | } |
||
2215 | |||
2216 | for (int c = 0; c < 4; ++c) |
||
2217 | if (dst0[c]) |
||
2218 | dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov |
||
2219 | } |
||
2220 | |||
2221 | void |
||
2222 | Converter::handleINTERP(Value *dst[4]) |
||
2223 | { |
||
2224 | // Check whether the input is linear. All other attributes ignored. |
||
2225 | Instruction *insn; |
||
2226 | Value *offset = NULL, *ptr = NULL, *w = NULL; |
||
2227 | bool linear; |
||
2228 | operation op; |
||
2229 | int c, mode; |
||
2230 | |||
2231 | tgsi::Instruction::SrcRegister src = tgsi.getSrc(0); |
||
2232 | assert(src.getFile() == TGSI_FILE_INPUT); |
||
2233 | |||
2234 | if (src.isIndirect(0)) |
||
2235 | ptr = fetchSrc(src.getIndirect(0), 0, NULL); |
||
2236 | |||
2237 | // XXX: no way to know interp mode if we don't know the index |
||
2238 | linear = info->in[ptr ? 0 : src.getIndex(0)].linear; |
||
2239 | if (linear) { |
||
2240 | op = OP_LINTERP; |
||
2241 | mode = NV50_IR_INTERP_LINEAR; |
||
2242 | } else { |
||
2243 | op = OP_PINTERP; |
||
2244 | mode = NV50_IR_INTERP_PERSPECTIVE; |
||
2245 | } |
||
2246 | |||
2247 | switch (tgsi.getOpcode()) { |
||
2248 | case TGSI_OPCODE_INTERP_CENTROID: |
||
2249 | mode |= NV50_IR_INTERP_CENTROID; |
||
2250 | break; |
||
2251 | case TGSI_OPCODE_INTERP_SAMPLE: |
||
2252 | insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0)); |
||
2253 | insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET; |
||
2254 | mode |= NV50_IR_INTERP_OFFSET; |
||
2255 | break; |
||
2256 | case TGSI_OPCODE_INTERP_OFFSET: { |
||
2257 | // The input in src1.xy is float, but we need a single 32-bit value |
||
2258 | // where the upper and lower 16 bits are encoded in S0.12 format. We need |
||
2259 | // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096, |
||
2260 | // and then convert to s32. |
||
2261 | Value *offs[2]; |
||
2262 | for (c = 0; c < 2; c++) { |
||
2263 | offs[c] = fetchSrc(1, c); |
||
2264 | mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f)); |
||
2265 | mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); |
||
2266 | mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); |
||
2267 | mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); |
||
2268 | } |
||
2269 | offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(), |
||
2270 | offs[1], mkImm(0x1010), offs[0]); |
||
2271 | mode |= NV50_IR_INTERP_OFFSET; |
||
2272 | break; |
||
2273 | } |
||
2274 | } |
||
2275 | |||
2276 | if (op == OP_PINTERP) { |
||
2277 | if (offset) { |
||
2278 | w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset); |
||
2279 | mkOp1(OP_RCP, TYPE_F32, w, w); |
||
2280 | } else { |
||
2281 | w = fragCoord[3]; |
||
2282 | } |
||
2283 | } |
||
2284 | |||
2285 | |||
2286 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2287 | insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c)); |
||
2288 | if (op == OP_PINTERP) |
||
2289 | insn->setSrc(1, w); |
||
2290 | if (ptr) |
||
2291 | insn->setIndirect(0, 0, ptr); |
||
2292 | if (offset) |
||
2293 | insn->setSrc(op == OP_PINTERP ? 2 : 1, offset); |
||
2294 | |||
2295 | insn->setInterpolate(mode); |
||
2296 | } |
||
2297 | } |
||
2298 | |||
2299 | Converter::Subroutine * |
||
2300 | Converter::getSubroutine(unsigned ip) |
||
2301 | { |
||
2302 | std::map |
||
2303 | |||
2304 | if (it == sub.map.end()) |
||
2305 | it = sub.map.insert(std::make_pair( |
||
2306 | ip, Subroutine(new Function(prog, "SUB", ip)))).first; |
||
2307 | |||
2308 | return &it->second; |
||
2309 | } |
||
2310 | |||
2311 | Converter::Subroutine * |
||
2312 | Converter::getSubroutine(Function *f) |
||
2313 | { |
||
2314 | unsigned ip = f->getLabel(); |
||
2315 | std::map |
||
2316 | |||
2317 | if (it == sub.map.end()) |
||
2318 | it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; |
||
2319 | |||
2320 | return &it->second; |
||
2321 | } |
||
2322 | |||
2323 | bool |
||
2324 | Converter::isEndOfSubroutine(uint ip) |
||
2325 | { |
||
2326 | assert(ip < code->scan.num_instructions); |
||
2327 | tgsi::Instruction insn(&code->insns[ip]); |
||
2328 | return (insn.getOpcode() == TGSI_OPCODE_END || |
||
2329 | insn.getOpcode() == TGSI_OPCODE_ENDSUB || |
||
2330 | // does END occur at end of main or the very end ? |
||
2331 | insn.getOpcode() == TGSI_OPCODE_BGNSUB); |
||
2332 | } |
||
2333 | |||
2334 | bool |
||
2335 | Converter::handleInstruction(const struct tgsi_full_instruction *insn) |
||
2336 | { |
||
2337 | Instruction *geni; |
||
2338 | |||
2339 | Value *dst0[4], *rDst0[4]; |
||
2340 | Value *src0, *src1, *src2, *src3; |
||
2341 | Value *val0, *val1; |
||
2342 | int c; |
||
2343 | |||
2344 | tgsi = tgsi::Instruction(insn); |
||
2345 | |||
2346 | bool useScratchDst = tgsi.checkDstSrcAliasing(); |
||
2347 | |||
2348 | operation op = tgsi.getOP(); |
||
2349 | dstTy = tgsi.inferDstType(); |
||
2350 | srcTy = tgsi.inferSrcType(); |
||
2351 | |||
2352 | unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; |
||
2353 | |||
2354 | if (tgsi.dstCount()) { |
||
2355 | for (c = 0; c < 4; ++c) { |
||
2356 | rDst0[c] = acquireDst(0, c); |
||
2357 | dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; |
||
2358 | } |
||
2359 | } |
||
2360 | |||
2361 | switch (tgsi.getOpcode()) { |
||
2362 | case TGSI_OPCODE_ADD: |
||
2363 | case TGSI_OPCODE_UADD: |
||
2364 | case TGSI_OPCODE_AND: |
||
2365 | case TGSI_OPCODE_DIV: |
||
2366 | case TGSI_OPCODE_IDIV: |
||
2367 | case TGSI_OPCODE_UDIV: |
||
2368 | case TGSI_OPCODE_MAX: |
||
2369 | case TGSI_OPCODE_MIN: |
||
2370 | case TGSI_OPCODE_IMAX: |
||
2371 | case TGSI_OPCODE_IMIN: |
||
2372 | case TGSI_OPCODE_UMAX: |
||
2373 | case TGSI_OPCODE_UMIN: |
||
2374 | case TGSI_OPCODE_MOD: |
||
2375 | case TGSI_OPCODE_UMOD: |
||
2376 | case TGSI_OPCODE_MUL: |
||
2377 | case TGSI_OPCODE_UMUL: |
||
2378 | case TGSI_OPCODE_IMUL_HI: |
||
2379 | case TGSI_OPCODE_UMUL_HI: |
||
2380 | case TGSI_OPCODE_OR: |
||
2381 | case TGSI_OPCODE_SHL: |
||
2382 | case TGSI_OPCODE_ISHR: |
||
2383 | case TGSI_OPCODE_USHR: |
||
2384 | case TGSI_OPCODE_SUB: |
||
2385 | case TGSI_OPCODE_XOR: |
||
2386 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2387 | src0 = fetchSrc(0, c); |
||
2388 | src1 = fetchSrc(1, c); |
||
2389 | geni = mkOp2(op, dstTy, dst0[c], src0, src1); |
||
2390 | geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); |
||
2391 | } |
||
2392 | break; |
||
2393 | case TGSI_OPCODE_MAD: |
||
2394 | case TGSI_OPCODE_UMAD: |
||
2395 | case TGSI_OPCODE_SAD: |
||
2396 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2397 | src0 = fetchSrc(0, c); |
||
2398 | src1 = fetchSrc(1, c); |
||
2399 | src2 = fetchSrc(2, c); |
||
2400 | mkOp3(op, dstTy, dst0[c], src0, src1, src2); |
||
2401 | } |
||
2402 | break; |
||
2403 | case TGSI_OPCODE_MOV: |
||
2404 | case TGSI_OPCODE_ABS: |
||
2405 | case TGSI_OPCODE_CEIL: |
||
2406 | case TGSI_OPCODE_FLR: |
||
2407 | case TGSI_OPCODE_TRUNC: |
||
2408 | case TGSI_OPCODE_RCP: |
||
2409 | case TGSI_OPCODE_IABS: |
||
2410 | case TGSI_OPCODE_INEG: |
||
2411 | case TGSI_OPCODE_NOT: |
||
2412 | case TGSI_OPCODE_DDX: |
||
2413 | case TGSI_OPCODE_DDY: |
||
2414 | case TGSI_OPCODE_DDX_FINE: |
||
2415 | case TGSI_OPCODE_DDY_FINE: |
||
2416 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2417 | mkOp1(op, dstTy, dst0[c], fetchSrc(0, c)); |
||
2418 | break; |
||
2419 | case TGSI_OPCODE_RSQ: |
||
2420 | src0 = fetchSrc(0, 0); |
||
2421 | val0 = getScratch(); |
||
2422 | mkOp1(OP_ABS, TYPE_F32, val0, src0); |
||
2423 | mkOp1(OP_RSQ, TYPE_F32, val0, val0); |
||
2424 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2425 | mkMov(dst0[c], val0); |
||
2426 | break; |
||
2427 | case TGSI_OPCODE_ARL: |
||
2428 | case TGSI_OPCODE_ARR: |
||
2429 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2430 | const RoundMode rnd = |
||
2431 | tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M; |
||
2432 | src0 = fetchSrc(0, c); |
||
2433 | mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd; |
||
2434 | } |
||
2435 | break; |
||
2436 | case TGSI_OPCODE_UARL: |
||
2437 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2438 | mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); |
||
2439 | break; |
||
2440 | case TGSI_OPCODE_POW: |
||
2441 | val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); |
||
2442 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2443 | mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); |
||
2444 | break; |
||
2445 | case TGSI_OPCODE_EX2: |
||
2446 | case TGSI_OPCODE_LG2: |
||
2447 | val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0); |
||
2448 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2449 | mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); |
||
2450 | break; |
||
2451 | case TGSI_OPCODE_COS: |
||
2452 | case TGSI_OPCODE_SIN: |
||
2453 | val0 = getScratch(); |
||
2454 | if (mask & 7) { |
||
2455 | mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0)); |
||
2456 | mkOp1(op, TYPE_F32, val0, val0); |
||
2457 | for (c = 0; c < 3; ++c) |
||
2458 | if (dst0[c]) |
||
2459 | mkMov(dst0[c], val0); |
||
2460 | } |
||
2461 | if (dst0[3]) { |
||
2462 | mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3)); |
||
2463 | mkOp1(op, TYPE_F32, dst0[3], val0); |
||
2464 | } |
||
2465 | break; |
||
2466 | case TGSI_OPCODE_SCS: |
||
2467 | if (mask & 3) { |
||
2468 | val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); |
||
2469 | if (dst0[0]) |
||
2470 | mkOp1(OP_COS, TYPE_F32, dst0[0], val0); |
||
2471 | if (dst0[1]) |
||
2472 | mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); |
||
2473 | } |
||
2474 | if (dst0[2]) |
||
2475 | loadImm(dst0[2], 0.0f); |
||
2476 | if (dst0[3]) |
||
2477 | loadImm(dst0[3], 1.0f); |
||
2478 | break; |
||
2479 | case TGSI_OPCODE_EXP: |
||
2480 | src0 = fetchSrc(0, 0); |
||
2481 | val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); |
||
2482 | if (dst0[1]) |
||
2483 | mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0); |
||
2484 | if (dst0[0]) |
||
2485 | mkOp1(OP_EX2, TYPE_F32, dst0[0], val0); |
||
2486 | if (dst0[2]) |
||
2487 | mkOp1(OP_EX2, TYPE_F32, dst0[2], src0); |
||
2488 | if (dst0[3]) |
||
2489 | loadImm(dst0[3], 1.0f); |
||
2490 | break; |
||
2491 | case TGSI_OPCODE_LOG: |
||
2492 | src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0)); |
||
2493 | val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0); |
||
2494 | if (dst0[0] || dst0[1]) |
||
2495 | val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0); |
||
2496 | if (dst0[1]) { |
||
2497 | mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); |
||
2498 | mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); |
||
2499 | mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); |
||
2500 | } |
||
2501 | if (dst0[3]) |
||
2502 | loadImm(dst0[3], 1.0f); |
||
2503 | break; |
||
2504 | case TGSI_OPCODE_DP2: |
||
2505 | val0 = buildDot(2); |
||
2506 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2507 | mkMov(dst0[c], val0); |
||
2508 | break; |
||
2509 | case TGSI_OPCODE_DP3: |
||
2510 | val0 = buildDot(3); |
||
2511 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2512 | mkMov(dst0[c], val0); |
||
2513 | break; |
||
2514 | case TGSI_OPCODE_DP4: |
||
2515 | val0 = buildDot(4); |
||
2516 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2517 | mkMov(dst0[c], val0); |
||
2518 | break; |
||
2519 | case TGSI_OPCODE_DPH: |
||
2520 | val0 = buildDot(3); |
||
2521 | src1 = fetchSrc(1, 3); |
||
2522 | mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); |
||
2523 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2524 | mkMov(dst0[c], val0); |
||
2525 | break; |
||
2526 | case TGSI_OPCODE_DST: |
||
2527 | if (dst0[0]) |
||
2528 | loadImm(dst0[0], 1.0f); |
||
2529 | if (dst0[1]) { |
||
2530 | src0 = fetchSrc(0, 1); |
||
2531 | src1 = fetchSrc(1, 1); |
||
2532 | mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); |
||
2533 | } |
||
2534 | if (dst0[2]) |
||
2535 | mkMov(dst0[2], fetchSrc(0, 2)); |
||
2536 | if (dst0[3]) |
||
2537 | mkMov(dst0[3], fetchSrc(1, 3)); |
||
2538 | break; |
||
2539 | case TGSI_OPCODE_LRP: |
||
2540 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2541 | src0 = fetchSrc(0, c); |
||
2542 | src1 = fetchSrc(1, c); |
||
2543 | src2 = fetchSrc(2, c); |
||
2544 | mkOp3(OP_MAD, TYPE_F32, dst0[c], |
||
2545 | mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); |
||
2546 | } |
||
2547 | break; |
||
2548 | case TGSI_OPCODE_LIT: |
||
2549 | handleLIT(dst0); |
||
2550 | break; |
||
2551 | case TGSI_OPCODE_XPD: |
||
2552 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2553 | if (c < 3) { |
||
2554 | val0 = getSSA(); |
||
2555 | src0 = fetchSrc(1, (c + 1) % 3); |
||
2556 | src1 = fetchSrc(0, (c + 2) % 3); |
||
2557 | mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); |
||
2558 | mkOp1(OP_NEG, TYPE_F32, val0, val0); |
||
2559 | |||
2560 | src0 = fetchSrc(0, (c + 1) % 3); |
||
2561 | src1 = fetchSrc(1, (c + 2) % 3); |
||
2562 | mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); |
||
2563 | } else { |
||
2564 | loadImm(dst0[c], 1.0f); |
||
2565 | } |
||
2566 | } |
||
2567 | break; |
||
2568 | case TGSI_OPCODE_ISSG: |
||
2569 | case TGSI_OPCODE_SSG: |
||
2570 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2571 | src0 = fetchSrc(0, c); |
||
2572 | val0 = getScratch(); |
||
2573 | val1 = getScratch(); |
||
2574 | mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero); |
||
2575 | mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero); |
||
2576 | if (srcTy == TYPE_F32) |
||
2577 | mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1); |
||
2578 | else |
||
2579 | mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); |
||
2580 | } |
||
2581 | break; |
||
2582 | case TGSI_OPCODE_UCMP: |
||
2583 | case TGSI_OPCODE_CMP: |
||
2584 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2585 | src0 = fetchSrc(0, c); |
||
2586 | src1 = fetchSrc(1, c); |
||
2587 | src2 = fetchSrc(2, c); |
||
2588 | if (src1 == src2) |
||
2589 | mkMov(dst0[c], src1); |
||
2590 | else |
||
2591 | mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE, |
||
2592 | srcTy, dst0[c], srcTy, src1, src2, src0); |
||
2593 | } |
||
2594 | break; |
||
2595 | case TGSI_OPCODE_FRC: |
||
2596 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2597 | src0 = fetchSrc(0, c); |
||
2598 | val0 = getScratch(); |
||
2599 | mkOp1(OP_FLOOR, TYPE_F32, val0, src0); |
||
2600 | mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); |
||
2601 | } |
||
2602 | break; |
||
2603 | case TGSI_OPCODE_ROUND: |
||
2604 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2605 | mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) |
||
2606 | ->rnd = ROUND_NI; |
||
2607 | break; |
||
2608 | case TGSI_OPCODE_CLAMP: |
||
2609 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2610 | src0 = fetchSrc(0, c); |
||
2611 | src1 = fetchSrc(1, c); |
||
2612 | src2 = fetchSrc(2, c); |
||
2613 | val0 = getScratch(); |
||
2614 | mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); |
||
2615 | mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); |
||
2616 | } |
||
2617 | break; |
||
2618 | case TGSI_OPCODE_SLT: |
||
2619 | case TGSI_OPCODE_SGE: |
||
2620 | case TGSI_OPCODE_SEQ: |
||
2621 | case TGSI_OPCODE_SGT: |
||
2622 | case TGSI_OPCODE_SLE: |
||
2623 | case TGSI_OPCODE_SNE: |
||
2624 | case TGSI_OPCODE_FSEQ: |
||
2625 | case TGSI_OPCODE_FSGE: |
||
2626 | case TGSI_OPCODE_FSLT: |
||
2627 | case TGSI_OPCODE_FSNE: |
||
2628 | case TGSI_OPCODE_ISGE: |
||
2629 | case TGSI_OPCODE_ISLT: |
||
2630 | case TGSI_OPCODE_USEQ: |
||
2631 | case TGSI_OPCODE_USGE: |
||
2632 | case TGSI_OPCODE_USLT: |
||
2633 | case TGSI_OPCODE_USNE: |
||
2634 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2635 | src0 = fetchSrc(0, c); |
||
2636 | src1 = fetchSrc(1, c); |
||
2637 | mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); |
||
2638 | } |
||
2639 | break; |
||
2640 | case TGSI_OPCODE_KILL_IF: |
||
2641 | val0 = new_LValue(func, FILE_PREDICATE); |
||
2642 | mask = 0; |
||
2643 | for (c = 0; c < 4; ++c) { |
||
2644 | const int s = tgsi.getSrc(0).getSwizzle(c); |
||
2645 | if (mask & (1 << s)) |
||
2646 | continue; |
||
2647 | mask |= 1 << s; |
||
2648 | mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero); |
||
2649 | mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0); |
||
2650 | } |
||
2651 | break; |
||
2652 | case TGSI_OPCODE_KILL: |
||
2653 | mkOp(OP_DISCARD, TYPE_NONE, NULL); |
||
2654 | break; |
||
2655 | case TGSI_OPCODE_TEX: |
||
2656 | case TGSI_OPCODE_TXB: |
||
2657 | case TGSI_OPCODE_TXL: |
||
2658 | case TGSI_OPCODE_TXP: |
||
2659 | case TGSI_OPCODE_LODQ: |
||
2660 | // R S L C Dx Dy |
||
2661 | handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00); |
||
2662 | break; |
||
2663 | case TGSI_OPCODE_TXD: |
||
2664 | handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20); |
||
2665 | break; |
||
2666 | case TGSI_OPCODE_TG4: |
||
2667 | handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00); |
||
2668 | break; |
||
2669 | case TGSI_OPCODE_TEX2: |
||
2670 | handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00); |
||
2671 | break; |
||
2672 | case TGSI_OPCODE_TXB2: |
||
2673 | case TGSI_OPCODE_TXL2: |
||
2674 | handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00); |
||
2675 | break; |
||
2676 | case TGSI_OPCODE_SAMPLE: |
||
2677 | case TGSI_OPCODE_SAMPLE_B: |
||
2678 | case TGSI_OPCODE_SAMPLE_D: |
||
2679 | case TGSI_OPCODE_SAMPLE_L: |
||
2680 | case TGSI_OPCODE_SAMPLE_C: |
||
2681 | case TGSI_OPCODE_SAMPLE_C_LZ: |
||
2682 | handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); |
||
2683 | break; |
||
2684 | case TGSI_OPCODE_TXF: |
||
2685 | handleTXF(dst0, 1, 0x03); |
||
2686 | break; |
||
2687 | case TGSI_OPCODE_SAMPLE_I: |
||
2688 | handleTXF(dst0, 1, 0x03); |
||
2689 | break; |
||
2690 | case TGSI_OPCODE_SAMPLE_I_MS: |
||
2691 | handleTXF(dst0, 1, 0x20); |
||
2692 | break; |
||
2693 | case TGSI_OPCODE_TXQ: |
||
2694 | case TGSI_OPCODE_SVIEWINFO: |
||
2695 | handleTXQ(dst0, TXQ_DIMS); |
||
2696 | break; |
||
2697 | case TGSI_OPCODE_F2I: |
||
2698 | case TGSI_OPCODE_F2U: |
||
2699 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2700 | mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z; |
||
2701 | break; |
||
2702 | case TGSI_OPCODE_I2F: |
||
2703 | case TGSI_OPCODE_U2F: |
||
2704 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2705 | mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); |
||
2706 | break; |
||
2707 | case TGSI_OPCODE_EMIT: |
||
2708 | /* export the saved viewport index */ |
||
2709 | if (viewport != NULL) { |
||
2710 | Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32, |
||
2711 | info->out[info->io.viewportId].slot[0] * 4); |
||
2712 | mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport); |
||
2713 | } |
||
2714 | /* fallthrough */ |
||
2715 | case TGSI_OPCODE_ENDPRIM: |
||
2716 | { |
||
2717 | // get vertex stream (must be immediate) |
||
2718 | unsigned int stream = tgsi.getSrc(0).getValueU32(0, info); |
||
2719 | if (stream && op == OP_RESTART) |
||
2720 | break; |
||
2721 | src0 = mkImm(stream); |
||
2722 | mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; |
||
2723 | break; |
||
2724 | } |
||
2725 | case TGSI_OPCODE_IF: |
||
2726 | case TGSI_OPCODE_UIF: |
||
2727 | { |
||
2728 | BasicBlock *ifBB = new BasicBlock(func); |
||
2729 | |||
2730 | bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); |
||
2731 | condBBs.push(bb); |
||
2732 | joinBBs.push(bb); |
||
2733 | |||
2734 | mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy); |
||
2735 | |||
2736 | setPosition(ifBB, true); |
||
2737 | } |
||
2738 | break; |
||
2739 | case TGSI_OPCODE_ELSE: |
||
2740 | { |
||
2741 | BasicBlock *elseBB = new BasicBlock(func); |
||
2742 | BasicBlock *forkBB = reinterpret_cast |
||
2743 | |||
2744 | forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); |
||
2745 | condBBs.push(bb); |
||
2746 | |||
2747 | forkBB->getExit()->asFlow()->target.bb = elseBB; |
||
2748 | if (!bb->isTerminated()) |
||
2749 | mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); |
||
2750 | |||
2751 | setPosition(elseBB, true); |
||
2752 | } |
||
2753 | break; |
||
2754 | case TGSI_OPCODE_ENDIF: |
||
2755 | { |
||
2756 | BasicBlock *convBB = new BasicBlock(func); |
||
2757 | BasicBlock *prevBB = reinterpret_cast |
||
2758 | BasicBlock *forkBB = reinterpret_cast |
||
2759 | |||
2760 | if (!bb->isTerminated()) { |
||
2761 | // we only want join if none of the clauses ended with CONT/BREAK/RET |
||
2762 | if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) |
||
2763 | insertConvergenceOps(convBB, forkBB); |
||
2764 | mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL); |
||
2765 | bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); |
||
2766 | } |
||
2767 | |||
2768 | if (prevBB->getExit()->op == OP_BRA) { |
||
2769 | prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); |
||
2770 | prevBB->getExit()->asFlow()->target.bb = convBB; |
||
2771 | } |
||
2772 | setPosition(convBB, true); |
||
2773 | } |
||
2774 | break; |
||
2775 | case TGSI_OPCODE_BGNLOOP: |
||
2776 | { |
||
2777 | BasicBlock *lbgnBB = new BasicBlock(func); |
||
2778 | BasicBlock *lbrkBB = new BasicBlock(func); |
||
2779 | |||
2780 | loopBBs.push(lbgnBB); |
||
2781 | breakBBs.push(lbrkBB); |
||
2782 | if (loopBBs.getSize() > func->loopNestingBound) |
||
2783 | func->loopNestingBound++; |
||
2784 | |||
2785 | mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL); |
||
2786 | |||
2787 | bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE); |
||
2788 | setPosition(lbgnBB, true); |
||
2789 | mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL); |
||
2790 | } |
||
2791 | break; |
||
2792 | case TGSI_OPCODE_ENDLOOP: |
||
2793 | { |
||
2794 | BasicBlock *loopBB = reinterpret_cast |
||
2795 | |||
2796 | if (!bb->isTerminated()) { |
||
2797 | mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); |
||
2798 | bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); |
||
2799 | } |
||
2800 | setPosition(reinterpret_cast |
||
2801 | } |
||
2802 | break; |
||
2803 | case TGSI_OPCODE_BRK: |
||
2804 | { |
||
2805 | if (bb->isTerminated()) |
||
2806 | break; |
||
2807 | BasicBlock *brkBB = reinterpret_cast |
||
2808 | mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL); |
||
2809 | bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS); |
||
2810 | } |
||
2811 | break; |
||
2812 | case TGSI_OPCODE_CONT: |
||
2813 | { |
||
2814 | if (bb->isTerminated()) |
||
2815 | break; |
||
2816 | BasicBlock *contBB = reinterpret_cast |
||
2817 | mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); |
||
2818 | contBB->explicitCont = true; |
||
2819 | bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); |
||
2820 | } |
||
2821 | break; |
||
2822 | case TGSI_OPCODE_BGNSUB: |
||
2823 | { |
||
2824 | Subroutine *s = getSubroutine(ip); |
||
2825 | BasicBlock *entry = new BasicBlock(s->f); |
||
2826 | BasicBlock *leave = new BasicBlock(s->f); |
||
2827 | |||
2828 | // multiple entrypoints possible, keep the graph connected |
||
2829 | if (prog->getType() == Program::TYPE_COMPUTE) |
||
2830 | prog->main->call.attach(&s->f->call, Graph::Edge::TREE); |
||
2831 | |||
2832 | sub.cur = s; |
||
2833 | s->f->setEntry(entry); |
||
2834 | s->f->setExit(leave); |
||
2835 | setPosition(entry, true); |
||
2836 | return true; |
||
2837 | } |
||
2838 | case TGSI_OPCODE_ENDSUB: |
||
2839 | { |
||
2840 | sub.cur = getSubroutine(prog->main); |
||
2841 | setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true); |
||
2842 | return true; |
||
2843 | } |
||
2844 | case TGSI_OPCODE_CAL: |
||
2845 | { |
||
2846 | Subroutine *s = getSubroutine(tgsi.getLabel()); |
||
2847 | mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL); |
||
2848 | func->call.attach(&s->f->call, Graph::Edge::TREE); |
||
2849 | return true; |
||
2850 | } |
||
2851 | case TGSI_OPCODE_RET: |
||
2852 | { |
||
2853 | if (bb->isTerminated()) |
||
2854 | return true; |
||
2855 | BasicBlock *leave = BasicBlock::get(func->cfgExit); |
||
2856 | |||
2857 | if (!isEndOfSubroutine(ip + 1)) { |
||
2858 | // insert a PRERET at the entry if this is an early return |
||
2859 | // (only needed for sharing code in the epilogue) |
||
2860 | BasicBlock *pos = getBB(); |
||
2861 | setPosition(BasicBlock::get(func->cfg.getRoot()), false); |
||
2862 | mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; |
||
2863 | setPosition(pos, true); |
||
2864 | } |
||
2865 | mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1; |
||
2866 | bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS); |
||
2867 | } |
||
2868 | break; |
||
2869 | case TGSI_OPCODE_END: |
||
2870 | { |
||
2871 | // attach and generate epilogue code |
||
2872 | BasicBlock *epilogue = BasicBlock::get(func->cfgExit); |
||
2873 | bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); |
||
2874 | setPosition(epilogue, true); |
||
2875 | if (prog->getType() == Program::TYPE_FRAGMENT) |
||
2876 | exportOutputs(); |
||
2877 | if (info->io.genUserClip > 0) |
||
2878 | handleUserClipPlanes(); |
||
2879 | mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; |
||
2880 | } |
||
2881 | break; |
||
2882 | case TGSI_OPCODE_SWITCH: |
||
2883 | case TGSI_OPCODE_CASE: |
||
2884 | ERROR("switch/case opcode encountered, should have been lowered\n"); |
||
2885 | abort(); |
||
2886 | break; |
||
2887 | case TGSI_OPCODE_LOAD: |
||
2888 | handleLOAD(dst0); |
||
2889 | break; |
||
2890 | case TGSI_OPCODE_STORE: |
||
2891 | handleSTORE(); |
||
2892 | break; |
||
2893 | case TGSI_OPCODE_BARRIER: |
||
2894 | geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); |
||
2895 | geni->fixed = 1; |
||
2896 | geni->subOp = NV50_IR_SUBOP_BAR_SYNC; |
||
2897 | break; |
||
2898 | case TGSI_OPCODE_MFENCE: |
||
2899 | case TGSI_OPCODE_LFENCE: |
||
2900 | case TGSI_OPCODE_SFENCE: |
||
2901 | geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); |
||
2902 | geni->fixed = 1; |
||
2903 | geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); |
||
2904 | break; |
||
2905 | case TGSI_OPCODE_ATOMUADD: |
||
2906 | case TGSI_OPCODE_ATOMXCHG: |
||
2907 | case TGSI_OPCODE_ATOMCAS: |
||
2908 | case TGSI_OPCODE_ATOMAND: |
||
2909 | case TGSI_OPCODE_ATOMOR: |
||
2910 | case TGSI_OPCODE_ATOMXOR: |
||
2911 | case TGSI_OPCODE_ATOMUMIN: |
||
2912 | case TGSI_OPCODE_ATOMIMIN: |
||
2913 | case TGSI_OPCODE_ATOMUMAX: |
||
2914 | case TGSI_OPCODE_ATOMIMAX: |
||
2915 | handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); |
||
2916 | break; |
||
2917 | case TGSI_OPCODE_IBFE: |
||
2918 | case TGSI_OPCODE_UBFE: |
||
2919 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2920 | src0 = fetchSrc(0, c); |
||
2921 | src1 = fetchSrc(1, c); |
||
2922 | src2 = fetchSrc(2, c); |
||
2923 | mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); |
||
2924 | mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1); |
||
2925 | } |
||
2926 | break; |
||
2927 | case TGSI_OPCODE_BFI: |
||
2928 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2929 | src0 = fetchSrc(0, c); |
||
2930 | src1 = fetchSrc(1, c); |
||
2931 | src2 = fetchSrc(2, c); |
||
2932 | src3 = fetchSrc(3, c); |
||
2933 | mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2); |
||
2934 | mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0); |
||
2935 | } |
||
2936 | break; |
||
2937 | case TGSI_OPCODE_LSB: |
||
2938 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2939 | src0 = fetchSrc(0, c); |
||
2940 | geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000)); |
||
2941 | geni->subOp = NV50_IR_SUBOP_EXTBF_REV; |
||
2942 | geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0); |
||
2943 | geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; |
||
2944 | } |
||
2945 | break; |
||
2946 | case TGSI_OPCODE_IMSB: |
||
2947 | case TGSI_OPCODE_UMSB: |
||
2948 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2949 | src0 = fetchSrc(0, c); |
||
2950 | mkOp1(OP_BFIND, srcTy, dst0[c], src0); |
||
2951 | } |
||
2952 | break; |
||
2953 | case TGSI_OPCODE_BREV: |
||
2954 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2955 | src0 = fetchSrc(0, c); |
||
2956 | geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); |
||
2957 | geni->subOp = NV50_IR_SUBOP_EXTBF_REV; |
||
2958 | } |
||
2959 | break; |
||
2960 | case TGSI_OPCODE_POPC: |
||
2961 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2962 | src0 = fetchSrc(0, c); |
||
2963 | mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0); |
||
2964 | } |
||
2965 | break; |
||
2966 | case TGSI_OPCODE_INTERP_CENTROID: |
||
2967 | case TGSI_OPCODE_INTERP_SAMPLE: |
||
2968 | case TGSI_OPCODE_INTERP_OFFSET: |
||
2969 | handleINTERP(dst0); |
||
2970 | break; |
||
2971 | case TGSI_OPCODE_D2I: |
||
2972 | case TGSI_OPCODE_D2U: |
||
2973 | case TGSI_OPCODE_D2F: { |
||
2974 | int pos = 0; |
||
2975 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2976 | Value *dreg = getSSA(8); |
||
2977 | src0 = fetchSrc(0, pos); |
||
2978 | src1 = fetchSrc(0, pos + 1); |
||
2979 | mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1); |
||
2980 | mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg); |
||
2981 | pos += 2; |
||
2982 | } |
||
2983 | break; |
||
2984 | } |
||
2985 | case TGSI_OPCODE_I2D: |
||
2986 | case TGSI_OPCODE_U2D: |
||
2987 | case TGSI_OPCODE_F2D: |
||
2988 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2989 | Value *dreg = getSSA(8); |
||
2990 | mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); |
||
2991 | mkSplit(&dst0[c], 4, dreg); |
||
2992 | c++; |
||
2993 | } |
||
2994 | break; |
||
2995 | case TGSI_OPCODE_DABS: |
||
2996 | case TGSI_OPCODE_DNEG: |
||
2997 | case TGSI_OPCODE_DRCP: |
||
2998 | case TGSI_OPCODE_DSQRT: |
||
2999 | case TGSI_OPCODE_DRSQ: |
||
3000 | case TGSI_OPCODE_DTRUNC: |
||
3001 | case TGSI_OPCODE_DCEIL: |
||
3002 | case TGSI_OPCODE_DFLR: |
||
3003 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
3004 | src0 = getSSA(8); |
||
3005 | Value *dst = getSSA(8), *tmp[2]; |
||
3006 | tmp[0] = fetchSrc(0, c); |
||
3007 | tmp[1] = fetchSrc(0, c + 1); |
||
3008 | mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
||
3009 | mkOp1(op, dstTy, dst, src0); |
||
3010 | mkSplit(&dst0[c], 4, dst); |
||
3011 | c++; |
||
3012 | } |
||
3013 | break; |
||
3014 | case TGSI_OPCODE_DFRAC: |
||
3015 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
3016 | src0 = getSSA(8); |
||
3017 | Value *dst = getSSA(8), *tmp[2]; |
||
3018 | tmp[0] = fetchSrc(0, c); |
||
3019 | tmp[1] = fetchSrc(0, c + 1); |
||
3020 | mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
||
3021 | mkOp1(OP_FLOOR, TYPE_F64, dst, src0); |
||
3022 | mkOp2(OP_SUB, TYPE_F64, dst, src0, dst); |
||
3023 | mkSplit(&dst0[c], 4, dst); |
||
3024 | c++; |
||
3025 | } |
||
3026 | break; |
||
3027 | case TGSI_OPCODE_DSLT: |
||
3028 | case TGSI_OPCODE_DSGE: |
||
3029 | case TGSI_OPCODE_DSEQ: |
||
3030 | case TGSI_OPCODE_DSNE: { |
||
3031 | int pos = 0; |
||
3032 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
3033 | Value *tmp[2]; |
||
3034 | |||
3035 | src0 = getSSA(8); |
||
3036 | src1 = getSSA(8); |
||
3037 | tmp[0] = fetchSrc(0, pos); |
||
3038 | tmp[1] = fetchSrc(0, pos + 1); |
||
3039 | mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
||
3040 | tmp[0] = fetchSrc(1, pos); |
||
3041 | tmp[1] = fetchSrc(1, pos + 1); |
||
3042 | mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); |
||
3043 | mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); |
||
3044 | pos += 2; |
||
3045 | } |
||
3046 | break; |
||
3047 | } |
||
3048 | case TGSI_OPCODE_DADD: |
||
3049 | case TGSI_OPCODE_DMUL: |
||
3050 | case TGSI_OPCODE_DMAX: |
||
3051 | case TGSI_OPCODE_DMIN: |
||
3052 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
3053 | src0 = getSSA(8); |
||
3054 | src1 = getSSA(8); |
||
3055 | Value *dst = getSSA(8), *tmp[2]; |
||
3056 | tmp[0] = fetchSrc(0, c); |
||
3057 | tmp[1] = fetchSrc(0, c + 1); |
||
3058 | mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
||
3059 | tmp[0] = fetchSrc(1, c); |
||
3060 | tmp[1] = fetchSrc(1, c + 1); |
||
3061 | mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); |
||
3062 | mkOp2(op, dstTy, dst, src0, src1); |
||
3063 | mkSplit(&dst0[c], 4, dst); |
||
3064 | c++; |
||
3065 | } |
||
3066 | break; |
||
3067 | case TGSI_OPCODE_DMAD: |
||
3068 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
3069 | src0 = getSSA(8); |
||
3070 | src1 = getSSA(8); |
||
3071 | src2 = getSSA(8); |
||
3072 | Value *dst = getSSA(8), *tmp[2]; |
||
3073 | tmp[0] = fetchSrc(0, c); |
||
3074 | tmp[1] = fetchSrc(0, c + 1); |
||
3075 | mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
||
3076 | tmp[0] = fetchSrc(1, c); |
||
3077 | tmp[1] = fetchSrc(1, c + 1); |
||
3078 | mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); |
||
3079 | tmp[0] = fetchSrc(2, c); |
||
3080 | tmp[1] = fetchSrc(2, c + 1); |
||
3081 | mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]); |
||
3082 | mkOp3(op, dstTy, dst, src0, src1, src2); |
||
3083 | mkSplit(&dst0[c], 4, dst); |
||
3084 | c++; |
||
3085 | } |
||
3086 | break; |
||
3087 | case TGSI_OPCODE_DROUND: |
||
3088 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
3089 | src0 = getSSA(8); |
||
3090 | Value *dst = getSSA(8), *tmp[2]; |
||
3091 | tmp[0] = fetchSrc(0, c); |
||
3092 | tmp[1] = fetchSrc(0, c + 1); |
||
3093 | mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
||
3094 | mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0) |
||
3095 | ->rnd = ROUND_NI; |
||
3096 | mkSplit(&dst0[c], 4, dst); |
||
3097 | c++; |
||
3098 | } |
||
3099 | break; |
||
3100 | case TGSI_OPCODE_DSSG: |
||
3101 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
3102 | src0 = getSSA(8); |
||
3103 | Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2]; |
||
3104 | tmp[0] = fetchSrc(0, c); |
||
3105 | tmp[1] = fetchSrc(0, c + 1); |
||
3106 | mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
||
3107 | |||
3108 | val0 = getScratch(); |
||
3109 | val1 = getScratch(); |
||
3110 | // The zero is wrong here since it's only 32-bit, but it works out in |
||
3111 | // the end since it gets replaced with $r63. |
||
3112 | mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero); |
||
3113 | mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero); |
||
3114 | mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1); |
||
3115 | mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32); |
||
3116 | mkSplit(&dst0[c], 4, dst); |
||
3117 | c++; |
||
3118 | } |
||
3119 | break; |
||
3120 | default: |
||
3121 | ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); |
||
3122 | assert(0); |
||
3123 | break; |
||
3124 | } |
||
3125 | |||
3126 | if (tgsi.dstCount()) { |
||
3127 | for (c = 0; c < 4; ++c) { |
||
3128 | if (!dst0[c]) |
||
3129 | continue; |
||
3130 | if (dst0[c] != rDst0[c]) |
||
3131 | mkMov(rDst0[c], dst0[c]); |
||
3132 | storeDst(0, c, rDst0[c]); |
||
3133 | } |
||
3134 | } |
||
3135 | vtxBaseValid = 0; |
||
3136 | |||
3137 | return true; |
||
3138 | } |
||
3139 | |||
3140 | void |
||
3141 | Converter::handleUserClipPlanes() |
||
3142 | { |
||
3143 | Value *res[8]; |
||
3144 | int n, i, c; |
||
3145 | |||
3146 | for (c = 0; c < 4; ++c) { |
||
3147 | for (i = 0; i < info->io.genUserClip; ++i) { |
||
3148 | Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot, |
||
3149 | TYPE_F32, info->io.ucpBase + i * 16 + c * 4); |
||
3150 | Value *ucp = mkLoadv(TYPE_F32, sym, NULL); |
||
3151 | if (c == 0) |
||
3152 | res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); |
||
3153 | else |
||
3154 | mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); |
||
3155 | } |
||
3156 | } |
||
3157 | |||
3158 | const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; |
||
3159 | |||
3160 | for (i = 0; i < info->io.genUserClip; ++i) { |
||
3161 | n = i / 4 + first; |
||
3162 | c = i % 4; |
||
3163 | Symbol *sym = |
||
3164 | mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); |
||
3165 | mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); |
||
3166 | } |
||
3167 | } |
||
3168 | |||
3169 | void |
||
3170 | Converter::exportOutputs() |
||
3171 | { |
||
3172 | for (unsigned int i = 0; i < info->numOutputs; ++i) { |
||
3173 | for (unsigned int c = 0; c < 4; ++c) { |
||
3174 | if (!oData.exists(sub.cur->values, i, c)) |
||
3175 | continue; |
||
3176 | Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, |
||
3177 | info->out[i].slot[c] * 4); |
||
3178 | Value *val = oData.load(sub.cur->values, i, c, NULL); |
||
3179 | if (val) |
||
3180 | mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); |
||
3181 | } |
||
3182 | } |
||
3183 | } |
||
3184 | |||
3185 | Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), |
||
3186 | code(code), |
||
3187 | tgsi(NULL), |
||
3188 | tData(this), aData(this), pData(this), oData(this) |
||
3189 | { |
||
3190 | info = code->info; |
||
3191 | |||
3192 | const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR; |
||
3193 | |||
3194 | const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); |
||
3195 | const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); |
||
3196 | const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); |
||
3197 | const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); |
||
3198 | |||
3199 | tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); |
||
3200 | pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); |
||
3201 | aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0); |
||
3202 | oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); |
||
3203 | |||
3204 | zero = mkImm((uint32_t)0); |
||
3205 | |||
3206 | vtxBaseValid = 0; |
||
3207 | } |
||
3208 | |||
3209 | Converter::~Converter() |
||
3210 | { |
||
3211 | } |
||
3212 | |||
3213 | inline const Converter::Location * |
||
3214 | Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v) |
||
3215 | { |
||
3216 | ValueMap::l_iterator it = s->values.l.find(v); |
||
3217 | return it == s->values.l.end() ? NULL : &it->second; |
||
3218 | } |
||
3219 | |||
3220 | template |
||
3221 | Converter::BindArgumentsPass::updateCallArgs( |
||
3222 | Instruction *i, void (Instruction::*setArg)(int, Value *), |
||
3223 | T (Function::*proto)) |
||
3224 | { |
||
3225 | Function *g = i->asFlow()->target.fn; |
||
3226 | Subroutine *subg = conv.getSubroutine(g); |
||
3227 | |||
3228 | for (unsigned a = 0; a < (g->*proto).size(); ++a) { |
||
3229 | Value *v = (g->*proto)[a].get(); |
||
3230 | const Converter::Location &l = *getValueLocation(subg, v); |
||
3231 | Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx); |
||
3232 | |||
3233 | (i->*setArg)(a, array->acquire(sub->values, l.i, l.c)); |
||
3234 | } |
||
3235 | } |
||
3236 | |||
3237 | template |
||
3238 | Converter::BindArgumentsPass::updatePrototype( |
||
3239 | BitSet *set, void (Function::*updateSet)(), T (Function::*proto)) |
||
3240 | { |
||
3241 | (func->*updateSet)(); |
||
3242 | |||
3243 | for (unsigned i = 0; i < set->getSize(); ++i) { |
||
3244 | Value *v = func->getLValue(i); |
||
3245 | const Converter::Location *l = getValueLocation(sub, v); |
||
3246 | |||
3247 | // only include values with a matching TGSI register |
||
3248 | if (set->test(i) && l && !conv.code->locals.count(*l)) |
||
3249 | (func->*proto).push_back(v); |
||
3250 | } |
||
3251 | } |
||
3252 | |||
3253 | bool |
||
3254 | Converter::BindArgumentsPass::visit(Function *f) |
||
3255 | { |
||
3256 | sub = conv.getSubroutine(f); |
||
3257 | |||
3258 | for (ArrayList::Iterator bi = f->allBBlocks.iterator(); |
||
3259 | !bi.end(); bi.next()) { |
||
3260 | for (Instruction *i = BasicBlock::get(bi)->getFirst(); |
||
3261 | i; i = i->next) { |
||
3262 | if (i->op == OP_CALL && !i->asFlow()->builtin) { |
||
3263 | updateCallArgs(i, &Instruction::setSrc, &Function::ins); |
||
3264 | updateCallArgs(i, &Instruction::setDef, &Function::outs); |
||
3265 | } |
||
3266 | } |
||
3267 | } |
||
3268 | |||
3269 | if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE) |
||
3270 | return true; |
||
3271 | updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet, |
||
3272 | &Function::buildLiveSets, &Function::ins); |
||
3273 | updatePrototype(&BasicBlock::get(f->cfgExit)->defSet, |
||
3274 | &Function::buildDefSets, &Function::outs); |
||
3275 | |||
3276 | return true; |
||
3277 | } |
||
3278 | |||
3279 | bool |
||
3280 | Converter::run() |
||
3281 | { |
||
3282 | BasicBlock *entry = new BasicBlock(prog->main); |
||
3283 | BasicBlock *leave = new BasicBlock(prog->main); |
||
3284 | |||
3285 | prog->main->setEntry(entry); |
||
3286 | prog->main->setExit(leave); |
||
3287 | |||
3288 | setPosition(entry, true); |
||
3289 | sub.cur = getSubroutine(prog->main); |
||
3290 | |||
3291 | if (info->io.genUserClip > 0) { |
||
3292 | for (int c = 0; c < 4; ++c) |
||
3293 | clipVtx[c] = getScratch(); |
||
3294 | } |
||
3295 | |||
3296 | if (prog->getType() == Program::TYPE_FRAGMENT) { |
||
3297 | Symbol *sv = mkSysVal(SV_POSITION, 3); |
||
3298 | fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); |
||
3299 | mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); |
||
3300 | } |
||
3301 | |||
3302 | if (info->io.viewportId >= 0) |
||
3303 | viewport = getScratch(); |
||
3304 | else |
||
3305 | viewport = NULL; |
||
3306 | |||
3307 | for (ip = 0; ip < code->scan.num_instructions; ++ip) { |
||
3308 | if (!handleInstruction(&code->insns[ip])) |
||
3309 | return false; |
||
3310 | } |
||
3311 | |||
3312 | if (!BindArgumentsPass(*this).run(prog)) |
||
3313 | return false; |
||
3314 | |||
3315 | return true; |
||
3316 | } |
||
3317 | |||
3318 | } // unnamed namespace |
||
3319 | |||
3320 | namespace nv50_ir { |
||
3321 | |||
3322 | bool |
||
3323 | Program::makeFromTGSI(struct nv50_ir_prog_info *info) |
||
3324 | { |
||
3325 | tgsi::Source src(info); |
||
3326 | if (!src.scanSource()) |
||
3327 | return false; |
||
3328 | tlsSize = info->bin.tlsSpace; |
||
3329 | |||
3330 | Converter builder(this, &src); |
||
3331 | return builder.run(); |
||
3332 | } |
||
3333 | |||
3334 | } // namespace nv50_ir>>>>>>>>>>>><>><>>>>>>>>>>>>>>><>>><>>>><>><>><>><>><>>>>><>>>>>><>>>>><>>>><>>><>><>>><>>>>><>><>><>><>><>>><>=>><>>>>>=>=>=>><>>=>=>=>>=>>>>><>>>>>><> |