Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2011 Christoph Bumiller |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | extern "C" { |
||
24 | #include "tgsi/tgsi_dump.h" |
||
25 | #include "tgsi/tgsi_scan.h" |
||
26 | #include "tgsi/tgsi_util.h" |
||
27 | } |
||
28 | |||
29 | #include |
||
30 | |||
31 | #include "nv50_ir.h" |
||
32 | #include "nv50_ir_util.h" |
||
33 | #include "nv50_ir_build_util.h" |
||
34 | |||
35 | namespace tgsi { |
||
36 | |||
37 | class Source; |
||
38 | |||
39 | static nv50_ir::operation translateOpcode(uint opcode); |
||
40 | static nv50_ir::DataFile translateFile(uint file); |
||
41 | static nv50_ir::TexTarget translateTexture(uint texTarg); |
||
42 | static nv50_ir::SVSemantic translateSysVal(uint sysval); |
||
43 | |||
44 | class Instruction |
||
45 | { |
||
46 | public: |
||
47 | Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { } |
||
48 | |||
49 | class SrcRegister |
||
50 | { |
||
51 | public: |
||
52 | SrcRegister(const struct tgsi_full_src_register *src) |
||
53 | : reg(src->Register), |
||
54 | fsr(src) |
||
55 | { } |
||
56 | |||
57 | SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { } |
||
58 | |||
59 | SrcRegister(const struct tgsi_ind_register& ind) |
||
60 | : reg(tgsi_util_get_src_from_ind(&ind)), |
||
61 | fsr(NULL) |
||
62 | { } |
||
63 | |||
64 | struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off) |
||
65 | { |
||
66 | struct tgsi_src_register reg; |
||
67 | memset(®, 0, sizeof(reg)); |
||
68 | reg.Index = off.Index; |
||
69 | reg.File = off.File; |
||
70 | reg.SwizzleX = off.SwizzleX; |
||
71 | reg.SwizzleY = off.SwizzleY; |
||
72 | reg.SwizzleZ = off.SwizzleZ; |
||
73 | return reg; |
||
74 | } |
||
75 | |||
76 | SrcRegister(const struct tgsi_texture_offset& off) : |
||
77 | reg(offsetToSrc(off)), |
||
78 | fsr(NULL) |
||
79 | { } |
||
80 | |||
81 | uint getFile() const { return reg.File; } |
||
82 | |||
83 | bool is2D() const { return reg.Dimension; } |
||
84 | |||
85 | bool isIndirect(int dim) const |
||
86 | { |
||
87 | return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect; |
||
88 | } |
||
89 | |||
90 | int getIndex(int dim) const |
||
91 | { |
||
92 | return (dim && fsr) ? fsr->Dimension.Index : reg.Index; |
||
93 | } |
||
94 | |||
95 | int getSwizzle(int chan) const |
||
96 | { |
||
97 | return tgsi_util_get_src_register_swizzle(®, chan); |
||
98 | } |
||
99 | |||
100 | nv50_ir::Modifier getMod(int chan) const; |
||
101 | |||
102 | SrcRegister getIndirect(int dim) const |
||
103 | { |
||
104 | assert(fsr && isIndirect(dim)); |
||
105 | if (dim) |
||
106 | return SrcRegister(fsr->DimIndirect); |
||
107 | return SrcRegister(fsr->Indirect); |
||
108 | } |
||
109 | |||
110 | uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const |
||
111 | { |
||
112 | assert(reg.File == TGSI_FILE_IMMEDIATE); |
||
113 | assert(!reg.Absolute); |
||
114 | assert(!reg.Negate); |
||
115 | return info->immd.data[reg.Index * 4 + getSwizzle(c)]; |
||
116 | } |
||
117 | |||
118 | private: |
||
119 | const struct tgsi_src_register reg; |
||
120 | const struct tgsi_full_src_register *fsr; |
||
121 | }; |
||
122 | |||
123 | class DstRegister |
||
124 | { |
||
125 | public: |
||
126 | DstRegister(const struct tgsi_full_dst_register *dst) |
||
127 | : reg(dst->Register), |
||
128 | fdr(dst) |
||
129 | { } |
||
130 | |||
131 | DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { } |
||
132 | |||
133 | uint getFile() const { return reg.File; } |
||
134 | |||
135 | bool is2D() const { return reg.Dimension; } |
||
136 | |||
137 | bool isIndirect(int dim) const |
||
138 | { |
||
139 | return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect; |
||
140 | } |
||
141 | |||
142 | int getIndex(int dim) const |
||
143 | { |
||
144 | return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index; |
||
145 | } |
||
146 | |||
147 | unsigned int getMask() const { return reg.WriteMask; } |
||
148 | |||
149 | bool isMasked(int chan) const { return !(getMask() & (1 << chan)); } |
||
150 | |||
151 | SrcRegister getIndirect(int dim) const |
||
152 | { |
||
153 | assert(fdr && isIndirect(dim)); |
||
154 | if (dim) |
||
155 | return SrcRegister(fdr->DimIndirect); |
||
156 | return SrcRegister(fdr->Indirect); |
||
157 | } |
||
158 | |||
159 | private: |
||
160 | const struct tgsi_dst_register reg; |
||
161 | const struct tgsi_full_dst_register *fdr; |
||
162 | }; |
||
163 | |||
164 | inline uint getOpcode() const { return insn->Instruction.Opcode; } |
||
165 | |||
166 | unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; } |
||
167 | unsigned int dstCount() const { return insn->Instruction.NumDstRegs; } |
||
168 | |||
169 | // mask of used components of source s |
||
170 | unsigned int srcMask(unsigned int s) const; |
||
171 | |||
172 | SrcRegister getSrc(unsigned int s) const |
||
173 | { |
||
174 | assert(s < srcCount()); |
||
175 | return SrcRegister(&insn->Src[s]); |
||
176 | } |
||
177 | |||
178 | DstRegister getDst(unsigned int d) const |
||
179 | { |
||
180 | assert(d < dstCount()); |
||
181 | return DstRegister(&insn->Dst[d]); |
||
182 | } |
||
183 | |||
184 | SrcRegister getTexOffset(unsigned int i) const |
||
185 | { |
||
186 | assert(i < TGSI_FULL_MAX_TEX_OFFSETS); |
||
187 | return SrcRegister(insn->TexOffsets[i]); |
||
188 | } |
||
189 | |||
190 | unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; } |
||
191 | |||
192 | bool checkDstSrcAliasing() const; |
||
193 | |||
194 | inline nv50_ir::operation getOP() const { |
||
195 | return translateOpcode(getOpcode()); } |
||
196 | |||
197 | nv50_ir::DataType inferSrcType() const; |
||
198 | nv50_ir::DataType inferDstType() const; |
||
199 | |||
200 | nv50_ir::CondCode getSetCond() const; |
||
201 | |||
202 | nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; |
||
203 | |||
204 | inline uint getLabel() { return insn->Label.Label; } |
||
205 | |||
206 | unsigned getSaturate() const { return insn->Instruction.Saturate; } |
||
207 | |||
208 | void print() const |
||
209 | { |
||
210 | tgsi_dump_instruction(insn, 1); |
||
211 | } |
||
212 | |||
213 | private: |
||
214 | const struct tgsi_full_instruction *insn; |
||
215 | }; |
||
216 | |||
217 | unsigned int Instruction::srcMask(unsigned int s) const |
||
218 | { |
||
219 | unsigned int mask = insn->Dst[0].Register.WriteMask; |
||
220 | |||
221 | switch (insn->Instruction.Opcode) { |
||
222 | case TGSI_OPCODE_COS: |
||
223 | case TGSI_OPCODE_SIN: |
||
224 | return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); |
||
225 | case TGSI_OPCODE_DP2: |
||
226 | return 0x3; |
||
227 | case TGSI_OPCODE_DP3: |
||
228 | return 0x7; |
||
229 | case TGSI_OPCODE_DP4: |
||
230 | case TGSI_OPCODE_DPH: |
||
231 | case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */ |
||
232 | return 0xf; |
||
233 | case TGSI_OPCODE_DST: |
||
234 | return mask & (s ? 0xa : 0x6); |
||
235 | case TGSI_OPCODE_EX2: |
||
236 | case TGSI_OPCODE_EXP: |
||
237 | case TGSI_OPCODE_LG2: |
||
238 | case TGSI_OPCODE_LOG: |
||
239 | case TGSI_OPCODE_POW: |
||
240 | case TGSI_OPCODE_RCP: |
||
241 | case TGSI_OPCODE_RSQ: |
||
242 | case TGSI_OPCODE_SCS: |
||
243 | return 0x1; |
||
244 | case TGSI_OPCODE_IF: |
||
245 | case TGSI_OPCODE_UIF: |
||
246 | return 0x1; |
||
247 | case TGSI_OPCODE_LIT: |
||
248 | return 0xb; |
||
249 | case TGSI_OPCODE_TEX2: |
||
250 | case TGSI_OPCODE_TXB2: |
||
251 | case TGSI_OPCODE_TXL2: |
||
252 | return (s == 0) ? 0xf : 0x3; |
||
253 | case TGSI_OPCODE_TEX: |
||
254 | case TGSI_OPCODE_TXB: |
||
255 | case TGSI_OPCODE_TXD: |
||
256 | case TGSI_OPCODE_TXL: |
||
257 | case TGSI_OPCODE_TXP: |
||
258 | { |
||
259 | const struct tgsi_instruction_texture *tex = &insn->Texture; |
||
260 | |||
261 | assert(insn->Instruction.Texture); |
||
262 | |||
263 | mask = 0x7; |
||
264 | if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && |
||
265 | insn->Instruction.Opcode != TGSI_OPCODE_TXD) |
||
266 | mask |= 0x8; /* bias, lod or proj */ |
||
267 | |||
268 | switch (tex->Texture) { |
||
269 | case TGSI_TEXTURE_1D: |
||
270 | mask &= 0x9; |
||
271 | break; |
||
272 | case TGSI_TEXTURE_SHADOW1D: |
||
273 | mask &= 0xd; |
||
274 | break; |
||
275 | case TGSI_TEXTURE_1D_ARRAY: |
||
276 | case TGSI_TEXTURE_2D: |
||
277 | case TGSI_TEXTURE_RECT: |
||
278 | mask &= 0xb; |
||
279 | break; |
||
280 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
281 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
282 | case TGSI_TEXTURE_SHADOWCUBE: |
||
283 | case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
||
284 | mask |= 0x8; |
||
285 | break; |
||
286 | default: |
||
287 | break; |
||
288 | } |
||
289 | } |
||
290 | return mask; |
||
291 | case TGSI_OPCODE_XPD: |
||
292 | { |
||
293 | unsigned int x = 0; |
||
294 | if (mask & 1) x |= 0x6; |
||
295 | if (mask & 2) x |= 0x5; |
||
296 | if (mask & 4) x |= 0x3; |
||
297 | return x; |
||
298 | } |
||
299 | default: |
||
300 | break; |
||
301 | } |
||
302 | |||
303 | return mask; |
||
304 | } |
||
305 | |||
306 | nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const |
||
307 | { |
||
308 | nv50_ir::Modifier m(0); |
||
309 | |||
310 | if (reg.Absolute) |
||
311 | m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS); |
||
312 | if (reg.Negate) |
||
313 | m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG); |
||
314 | return m; |
||
315 | } |
||
316 | |||
317 | static nv50_ir::DataFile translateFile(uint file) |
||
318 | { |
||
319 | switch (file) { |
||
320 | case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST; |
||
321 | case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT; |
||
322 | case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; |
||
323 | case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; |
||
324 | case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; |
||
325 | case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; |
||
326 | case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; |
||
327 | case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; |
||
328 | case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL; |
||
329 | case TGSI_FILE_SAMPLER: |
||
330 | case TGSI_FILE_NULL: |
||
331 | default: |
||
332 | return nv50_ir::FILE_NULL; |
||
333 | } |
||
334 | } |
||
335 | |||
336 | static nv50_ir::SVSemantic translateSysVal(uint sysval) |
||
337 | { |
||
338 | switch (sysval) { |
||
339 | case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; |
||
340 | case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; |
||
341 | case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; |
||
342 | case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; |
||
343 | case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; |
||
344 | case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID; |
||
345 | case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID; |
||
346 | case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID; |
||
347 | case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID; |
||
348 | default: |
||
349 | assert(0); |
||
350 | return nv50_ir::SV_CLOCK; |
||
351 | } |
||
352 | } |
||
353 | |||
354 | #define NV50_IR_TEX_TARG_CASE(a, b) \ |
||
355 | case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b; |
||
356 | |||
357 | static nv50_ir::TexTarget translateTexture(uint tex) |
||
358 | { |
||
359 | switch (tex) { |
||
360 | NV50_IR_TEX_TARG_CASE(1D, 1D); |
||
361 | NV50_IR_TEX_TARG_CASE(2D, 2D); |
||
362 | NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS); |
||
363 | NV50_IR_TEX_TARG_CASE(3D, 3D); |
||
364 | NV50_IR_TEX_TARG_CASE(CUBE, CUBE); |
||
365 | NV50_IR_TEX_TARG_CASE(RECT, RECT); |
||
366 | NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY); |
||
367 | NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY); |
||
368 | NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY); |
||
369 | NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY); |
||
370 | NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW); |
||
371 | NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW); |
||
372 | NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); |
||
373 | NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); |
||
374 | NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW); |
||
375 | NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); |
||
376 | NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW); |
||
377 | NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); |
||
378 | |||
379 | case TGSI_TEXTURE_UNKNOWN: |
||
380 | default: |
||
381 | assert(!"invalid texture target"); |
||
382 | return nv50_ir::TEX_TARGET_2D; |
||
383 | } |
||
384 | } |
||
385 | |||
386 | nv50_ir::DataType Instruction::inferSrcType() const |
||
387 | { |
||
388 | switch (getOpcode()) { |
||
389 | case TGSI_OPCODE_UIF: |
||
390 | case TGSI_OPCODE_AND: |
||
391 | case TGSI_OPCODE_OR: |
||
392 | case TGSI_OPCODE_XOR: |
||
393 | case TGSI_OPCODE_NOT: |
||
394 | case TGSI_OPCODE_U2F: |
||
395 | case TGSI_OPCODE_UADD: |
||
396 | case TGSI_OPCODE_UDIV: |
||
397 | case TGSI_OPCODE_UMOD: |
||
398 | case TGSI_OPCODE_UMAD: |
||
399 | case TGSI_OPCODE_UMUL: |
||
400 | case TGSI_OPCODE_UMAX: |
||
401 | case TGSI_OPCODE_UMIN: |
||
402 | case TGSI_OPCODE_USEQ: |
||
403 | case TGSI_OPCODE_USGE: |
||
404 | case TGSI_OPCODE_USLT: |
||
405 | case TGSI_OPCODE_USNE: |
||
406 | case TGSI_OPCODE_USHR: |
||
407 | case TGSI_OPCODE_UCMP: |
||
408 | case TGSI_OPCODE_ATOMUADD: |
||
409 | case TGSI_OPCODE_ATOMXCHG: |
||
410 | case TGSI_OPCODE_ATOMCAS: |
||
411 | case TGSI_OPCODE_ATOMAND: |
||
412 | case TGSI_OPCODE_ATOMOR: |
||
413 | case TGSI_OPCODE_ATOMXOR: |
||
414 | case TGSI_OPCODE_ATOMUMIN: |
||
415 | case TGSI_OPCODE_ATOMUMAX: |
||
416 | return nv50_ir::TYPE_U32; |
||
417 | case TGSI_OPCODE_I2F: |
||
418 | case TGSI_OPCODE_IDIV: |
||
419 | case TGSI_OPCODE_IMAX: |
||
420 | case TGSI_OPCODE_IMIN: |
||
421 | case TGSI_OPCODE_IABS: |
||
422 | case TGSI_OPCODE_INEG: |
||
423 | case TGSI_OPCODE_ISGE: |
||
424 | case TGSI_OPCODE_ISHR: |
||
425 | case TGSI_OPCODE_ISLT: |
||
426 | case TGSI_OPCODE_ISSG: |
||
427 | case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version |
||
428 | case TGSI_OPCODE_MOD: |
||
429 | case TGSI_OPCODE_UARL: |
||
430 | case TGSI_OPCODE_ATOMIMIN: |
||
431 | case TGSI_OPCODE_ATOMIMAX: |
||
432 | return nv50_ir::TYPE_S32; |
||
433 | default: |
||
434 | return nv50_ir::TYPE_F32; |
||
435 | } |
||
436 | } |
||
437 | |||
438 | nv50_ir::DataType Instruction::inferDstType() const |
||
439 | { |
||
440 | switch (getOpcode()) { |
||
441 | case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32; |
||
442 | case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32; |
||
443 | case TGSI_OPCODE_I2F: |
||
444 | case TGSI_OPCODE_U2F: |
||
445 | return nv50_ir::TYPE_F32; |
||
446 | default: |
||
447 | return inferSrcType(); |
||
448 | } |
||
449 | } |
||
450 | |||
451 | nv50_ir::CondCode Instruction::getSetCond() const |
||
452 | { |
||
453 | using namespace nv50_ir; |
||
454 | |||
455 | switch (getOpcode()) { |
||
456 | case TGSI_OPCODE_SLT: |
||
457 | case TGSI_OPCODE_ISLT: |
||
458 | case TGSI_OPCODE_USLT: |
||
459 | return CC_LT; |
||
460 | case TGSI_OPCODE_SLE: |
||
461 | return CC_LE; |
||
462 | case TGSI_OPCODE_SGE: |
||
463 | case TGSI_OPCODE_ISGE: |
||
464 | case TGSI_OPCODE_USGE: |
||
465 | return CC_GE; |
||
466 | case TGSI_OPCODE_SGT: |
||
467 | return CC_GT; |
||
468 | case TGSI_OPCODE_SEQ: |
||
469 | case TGSI_OPCODE_USEQ: |
||
470 | return CC_EQ; |
||
471 | case TGSI_OPCODE_SNE: |
||
472 | return CC_NEU; |
||
473 | case TGSI_OPCODE_USNE: |
||
474 | return CC_NE; |
||
475 | case TGSI_OPCODE_SFL: |
||
476 | return CC_NEVER; |
||
477 | case TGSI_OPCODE_STR: |
||
478 | default: |
||
479 | return CC_ALWAYS; |
||
480 | } |
||
481 | } |
||
482 | |||
483 | #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b |
||
484 | |||
485 | static nv50_ir::operation translateOpcode(uint opcode) |
||
486 | { |
||
487 | switch (opcode) { |
||
488 | NV50_IR_OPCODE_CASE(ARL, SHL); |
||
489 | NV50_IR_OPCODE_CASE(MOV, MOV); |
||
490 | |||
491 | NV50_IR_OPCODE_CASE(RCP, RCP); |
||
492 | NV50_IR_OPCODE_CASE(RSQ, RSQ); |
||
493 | |||
494 | NV50_IR_OPCODE_CASE(MUL, MUL); |
||
495 | NV50_IR_OPCODE_CASE(ADD, ADD); |
||
496 | |||
497 | NV50_IR_OPCODE_CASE(MIN, MIN); |
||
498 | NV50_IR_OPCODE_CASE(MAX, MAX); |
||
499 | NV50_IR_OPCODE_CASE(SLT, SET); |
||
500 | NV50_IR_OPCODE_CASE(SGE, SET); |
||
501 | NV50_IR_OPCODE_CASE(MAD, MAD); |
||
502 | NV50_IR_OPCODE_CASE(SUB, SUB); |
||
503 | |||
504 | NV50_IR_OPCODE_CASE(FLR, FLOOR); |
||
505 | NV50_IR_OPCODE_CASE(ROUND, CVT); |
||
506 | NV50_IR_OPCODE_CASE(EX2, EX2); |
||
507 | NV50_IR_OPCODE_CASE(LG2, LG2); |
||
508 | NV50_IR_OPCODE_CASE(POW, POW); |
||
509 | |||
510 | NV50_IR_OPCODE_CASE(ABS, ABS); |
||
511 | |||
512 | NV50_IR_OPCODE_CASE(COS, COS); |
||
513 | NV50_IR_OPCODE_CASE(DDX, DFDX); |
||
514 | NV50_IR_OPCODE_CASE(DDY, DFDY); |
||
515 | NV50_IR_OPCODE_CASE(KILL, DISCARD); |
||
516 | |||
517 | NV50_IR_OPCODE_CASE(SEQ, SET); |
||
518 | NV50_IR_OPCODE_CASE(SFL, SET); |
||
519 | NV50_IR_OPCODE_CASE(SGT, SET); |
||
520 | NV50_IR_OPCODE_CASE(SIN, SIN); |
||
521 | NV50_IR_OPCODE_CASE(SLE, SET); |
||
522 | NV50_IR_OPCODE_CASE(SNE, SET); |
||
523 | NV50_IR_OPCODE_CASE(STR, SET); |
||
524 | NV50_IR_OPCODE_CASE(TEX, TEX); |
||
525 | NV50_IR_OPCODE_CASE(TXD, TXD); |
||
526 | NV50_IR_OPCODE_CASE(TXP, TEX); |
||
527 | |||
528 | NV50_IR_OPCODE_CASE(BRA, BRA); |
||
529 | NV50_IR_OPCODE_CASE(CAL, CALL); |
||
530 | NV50_IR_OPCODE_CASE(RET, RET); |
||
531 | NV50_IR_OPCODE_CASE(CMP, SLCT); |
||
532 | |||
533 | NV50_IR_OPCODE_CASE(TXB, TXB); |
||
534 | |||
535 | NV50_IR_OPCODE_CASE(DIV, DIV); |
||
536 | |||
537 | NV50_IR_OPCODE_CASE(TXL, TXL); |
||
538 | |||
539 | NV50_IR_OPCODE_CASE(CEIL, CEIL); |
||
540 | NV50_IR_OPCODE_CASE(I2F, CVT); |
||
541 | NV50_IR_OPCODE_CASE(NOT, NOT); |
||
542 | NV50_IR_OPCODE_CASE(TRUNC, TRUNC); |
||
543 | NV50_IR_OPCODE_CASE(SHL, SHL); |
||
544 | |||
545 | NV50_IR_OPCODE_CASE(AND, AND); |
||
546 | NV50_IR_OPCODE_CASE(OR, OR); |
||
547 | NV50_IR_OPCODE_CASE(MOD, MOD); |
||
548 | NV50_IR_OPCODE_CASE(XOR, XOR); |
||
549 | NV50_IR_OPCODE_CASE(SAD, SAD); |
||
550 | NV50_IR_OPCODE_CASE(TXF, TXF); |
||
551 | NV50_IR_OPCODE_CASE(TXQ, TXQ); |
||
552 | |||
553 | NV50_IR_OPCODE_CASE(EMIT, EMIT); |
||
554 | NV50_IR_OPCODE_CASE(ENDPRIM, RESTART); |
||
555 | |||
556 | NV50_IR_OPCODE_CASE(KILL_IF, DISCARD); |
||
557 | |||
558 | NV50_IR_OPCODE_CASE(F2I, CVT); |
||
559 | NV50_IR_OPCODE_CASE(IDIV, DIV); |
||
560 | NV50_IR_OPCODE_CASE(IMAX, MAX); |
||
561 | NV50_IR_OPCODE_CASE(IMIN, MIN); |
||
562 | NV50_IR_OPCODE_CASE(IABS, ABS); |
||
563 | NV50_IR_OPCODE_CASE(INEG, NEG); |
||
564 | NV50_IR_OPCODE_CASE(ISGE, SET); |
||
565 | NV50_IR_OPCODE_CASE(ISHR, SHR); |
||
566 | NV50_IR_OPCODE_CASE(ISLT, SET); |
||
567 | NV50_IR_OPCODE_CASE(F2U, CVT); |
||
568 | NV50_IR_OPCODE_CASE(U2F, CVT); |
||
569 | NV50_IR_OPCODE_CASE(UADD, ADD); |
||
570 | NV50_IR_OPCODE_CASE(UDIV, DIV); |
||
571 | NV50_IR_OPCODE_CASE(UMAD, MAD); |
||
572 | NV50_IR_OPCODE_CASE(UMAX, MAX); |
||
573 | NV50_IR_OPCODE_CASE(UMIN, MIN); |
||
574 | NV50_IR_OPCODE_CASE(UMOD, MOD); |
||
575 | NV50_IR_OPCODE_CASE(UMUL, MUL); |
||
576 | NV50_IR_OPCODE_CASE(USEQ, SET); |
||
577 | NV50_IR_OPCODE_CASE(USGE, SET); |
||
578 | NV50_IR_OPCODE_CASE(USHR, SHR); |
||
579 | NV50_IR_OPCODE_CASE(USLT, SET); |
||
580 | NV50_IR_OPCODE_CASE(USNE, SET); |
||
581 | |||
582 | NV50_IR_OPCODE_CASE(SAMPLE, TEX); |
||
583 | NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); |
||
584 | NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); |
||
585 | NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX); |
||
586 | NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); |
||
587 | NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); |
||
588 | NV50_IR_OPCODE_CASE(SAMPLE_I, TXF); |
||
589 | NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF); |
||
590 | NV50_IR_OPCODE_CASE(GATHER4, TXG); |
||
591 | NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); |
||
592 | |||
593 | NV50_IR_OPCODE_CASE(ATOMUADD, ATOM); |
||
594 | NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM); |
||
595 | NV50_IR_OPCODE_CASE(ATOMCAS, ATOM); |
||
596 | NV50_IR_OPCODE_CASE(ATOMAND, ATOM); |
||
597 | NV50_IR_OPCODE_CASE(ATOMOR, ATOM); |
||
598 | NV50_IR_OPCODE_CASE(ATOMXOR, ATOM); |
||
599 | NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM); |
||
600 | NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); |
||
601 | NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); |
||
602 | NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); |
||
603 | |||
604 | NV50_IR_OPCODE_CASE(TEX2, TEX); |
||
605 | NV50_IR_OPCODE_CASE(TXB2, TXB); |
||
606 | NV50_IR_OPCODE_CASE(TXL2, TXL); |
||
607 | |||
608 | NV50_IR_OPCODE_CASE(END, EXIT); |
||
609 | |||
610 | default: |
||
611 | return nv50_ir::OP_NOP; |
||
612 | } |
||
613 | } |
||
614 | |||
615 | static uint16_t opcodeToSubOp(uint opcode) |
||
616 | { |
||
617 | switch (opcode) { |
||
618 | case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); |
||
619 | case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); |
||
620 | case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); |
||
621 | case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; |
||
622 | case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; |
||
623 | case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; |
||
624 | case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND; |
||
625 | case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR; |
||
626 | case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR; |
||
627 | case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN; |
||
628 | case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; |
||
629 | case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; |
||
630 | case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; |
||
631 | default: |
||
632 | return 0; |
||
633 | } |
||
634 | } |
||
635 | |||
636 | bool Instruction::checkDstSrcAliasing() const |
||
637 | { |
||
638 | if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory |
||
639 | return false; |
||
640 | |||
641 | for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) { |
||
642 | if (insn->Src[s].Register.File == TGSI_FILE_NULL) |
||
643 | break; |
||
644 | if (insn->Src[s].Register.File == insn->Dst[0].Register.File && |
||
645 | insn->Src[s].Register.Index == insn->Dst[0].Register.Index) |
||
646 | return true; |
||
647 | } |
||
648 | return false; |
||
649 | } |
||
650 | |||
651 | class Source |
||
652 | { |
||
653 | public: |
||
654 | Source(struct nv50_ir_prog_info *); |
||
655 | ~Source(); |
||
656 | |||
657 | public: |
||
658 | bool scanSource(); |
||
659 | unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; } |
||
660 | |||
661 | public: |
||
662 | struct tgsi_shader_info scan; |
||
663 | struct tgsi_full_instruction *insns; |
||
664 | const struct tgsi_token *tokens; |
||
665 | struct nv50_ir_prog_info *info; |
||
666 | |||
667 | nv50_ir::DynArray tempArrays; |
||
668 | nv50_ir::DynArray immdArrays; |
||
669 | |||
670 | typedef nv50_ir::BuildUtil::Location Location; |
||
671 | // these registers are per-subroutine, cannot be used for parameter passing |
||
672 | std::set |
||
673 | |||
674 | bool mainTempsInLMem; |
||
675 | |||
676 | int clipVertexOutput; |
||
677 | |||
678 | struct TextureView { |
||
679 | uint8_t target; // TGSI_TEXTURE_* |
||
680 | }; |
||
681 | std::vector |
||
682 | |||
683 | struct Resource { |
||
684 | uint8_t target; // TGSI_TEXTURE_* |
||
685 | bool raw; |
||
686 | uint8_t slot; // $surface index |
||
687 | }; |
||
688 | std::vector |
||
689 | |||
690 | private: |
||
691 | int inferSysValDirection(unsigned sn) const; |
||
692 | bool scanDeclaration(const struct tgsi_full_declaration *); |
||
693 | bool scanInstruction(const struct tgsi_full_instruction *); |
||
694 | void scanProperty(const struct tgsi_full_property *); |
||
695 | void scanImmediate(const struct tgsi_full_immediate *); |
||
696 | |||
697 | inline bool isEdgeFlagPassthrough(const Instruction&) const; |
||
698 | }; |
||
699 | |||
700 | Source::Source(struct nv50_ir_prog_info *prog) : info(prog) |
||
701 | { |
||
702 | tokens = (const struct tgsi_token *)info->bin.source; |
||
703 | |||
704 | if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) |
||
705 | tgsi_dump(tokens, 0); |
||
706 | |||
707 | mainTempsInLMem = FALSE; |
||
708 | } |
||
709 | |||
710 | Source::~Source() |
||
711 | { |
||
712 | if (insns) |
||
713 | FREE(insns); |
||
714 | |||
715 | if (info->immd.data) |
||
716 | FREE(info->immd.data); |
||
717 | if (info->immd.type) |
||
718 | FREE(info->immd.type); |
||
719 | } |
||
720 | |||
721 | bool Source::scanSource() |
||
722 | { |
||
723 | unsigned insnCount = 0; |
||
724 | struct tgsi_parse_context parse; |
||
725 | |||
726 | tgsi_scan_shader(tokens, &scan); |
||
727 | |||
728 | insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions * |
||
729 | sizeof(insns[0])); |
||
730 | if (!insns) |
||
731 | return false; |
||
732 | |||
733 | clipVertexOutput = -1; |
||
734 | |||
735 | textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); |
||
736 | resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); |
||
737 | |||
738 | info->immd.bufSize = 0; |
||
739 | |||
740 | info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; |
||
741 | info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; |
||
742 | info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; |
||
743 | |||
744 | if (info->type == PIPE_SHADER_FRAGMENT) { |
||
745 | info->prop.fp.writesDepth = scan.writes_z; |
||
746 | info->prop.fp.usesDiscard = scan.uses_kill; |
||
747 | } else |
||
748 | if (info->type == PIPE_SHADER_GEOMETRY) { |
||
749 | info->prop.gp.instanceCount = 1; // default value |
||
750 | } |
||
751 | |||
752 | info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); |
||
753 | info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); |
||
754 | |||
755 | tgsi_parse_init(&parse, tokens); |
||
756 | while (!tgsi_parse_end_of_tokens(&parse)) { |
||
757 | tgsi_parse_token(&parse); |
||
758 | |||
759 | switch (parse.FullToken.Token.Type) { |
||
760 | case TGSI_TOKEN_TYPE_IMMEDIATE: |
||
761 | scanImmediate(&parse.FullToken.FullImmediate); |
||
762 | break; |
||
763 | case TGSI_TOKEN_TYPE_DECLARATION: |
||
764 | scanDeclaration(&parse.FullToken.FullDeclaration); |
||
765 | break; |
||
766 | case TGSI_TOKEN_TYPE_INSTRUCTION: |
||
767 | insns[insnCount++] = parse.FullToken.FullInstruction; |
||
768 | scanInstruction(&parse.FullToken.FullInstruction); |
||
769 | break; |
||
770 | case TGSI_TOKEN_TYPE_PROPERTY: |
||
771 | scanProperty(&parse.FullToken.FullProperty); |
||
772 | break; |
||
773 | default: |
||
774 | INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type); |
||
775 | break; |
||
776 | } |
||
777 | } |
||
778 | tgsi_parse_free(&parse); |
||
779 | |||
780 | if (mainTempsInLMem) |
||
781 | info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; |
||
782 | |||
783 | if (info->io.genUserClip > 0) { |
||
784 | info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; |
||
785 | |||
786 | const unsigned int nOut = (info->io.genUserClip + 3) / 4; |
||
787 | |||
788 | for (unsigned int n = 0; n < nOut; ++n) { |
||
789 | unsigned int i = info->numOutputs++; |
||
790 | info->out[i].id = i; |
||
791 | info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; |
||
792 | info->out[i].si = n; |
||
793 | info->out[i].mask = info->io.clipDistanceMask >> (n * 4); |
||
794 | } |
||
795 | } |
||
796 | |||
797 | return info->assignSlots(info) == 0; |
||
798 | } |
||
799 | |||
800 | void Source::scanProperty(const struct tgsi_full_property *prop) |
||
801 | { |
||
802 | switch (prop->Property.PropertyName) { |
||
803 | case TGSI_PROPERTY_GS_OUTPUT_PRIM: |
||
804 | info->prop.gp.outputPrim = prop->u[0].Data; |
||
805 | break; |
||
806 | case TGSI_PROPERTY_GS_INPUT_PRIM: |
||
807 | info->prop.gp.inputPrim = prop->u[0].Data; |
||
808 | break; |
||
809 | case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: |
||
810 | info->prop.gp.maxVertices = prop->u[0].Data; |
||
811 | break; |
||
812 | #if 0 |
||
813 | case TGSI_PROPERTY_GS_INSTANCE_COUNT: |
||
814 | info->prop.gp.instanceCount = prop->u[0].Data; |
||
815 | break; |
||
816 | #endif |
||
817 | case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: |
||
818 | info->prop.fp.separateFragData = TRUE; |
||
819 | break; |
||
820 | case TGSI_PROPERTY_FS_COORD_ORIGIN: |
||
821 | case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: |
||
822 | // we don't care |
||
823 | break; |
||
824 | case TGSI_PROPERTY_VS_PROHIBIT_UCPS: |
||
825 | info->io.genUserClip = -1; |
||
826 | break; |
||
827 | default: |
||
828 | INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); |
||
829 | break; |
||
830 | } |
||
831 | } |
||
832 | |||
833 | void Source::scanImmediate(const struct tgsi_full_immediate *imm) |
||
834 | { |
||
835 | const unsigned n = info->immd.count++; |
||
836 | |||
837 | assert(n < scan.immediate_count); |
||
838 | |||
839 | for (int c = 0; c < 4; ++c) |
||
840 | info->immd.data[n * 4 + c] = imm->u[c].Uint; |
||
841 | |||
842 | info->immd.type[n] = imm->Immediate.DataType; |
||
843 | } |
||
844 | |||
845 | int Source::inferSysValDirection(unsigned sn) const |
||
846 | { |
||
847 | switch (sn) { |
||
848 | case TGSI_SEMANTIC_INSTANCEID: |
||
849 | case TGSI_SEMANTIC_VERTEXID: |
||
850 | return 1; |
||
851 | #if 0 |
||
852 | case TGSI_SEMANTIC_LAYER: |
||
853 | case TGSI_SEMANTIC_VIEWPORTINDEX: |
||
854 | return 0; |
||
855 | #endif |
||
856 | case TGSI_SEMANTIC_PRIMID: |
||
857 | return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0; |
||
858 | default: |
||
859 | return 0; |
||
860 | } |
||
861 | } |
||
862 | |||
863 | bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) |
||
864 | { |
||
865 | unsigned i, c; |
||
866 | unsigned sn = TGSI_SEMANTIC_GENERIC; |
||
867 | unsigned si = 0; |
||
868 | const unsigned first = decl->Range.First, last = decl->Range.Last; |
||
869 | |||
870 | if (decl->Declaration.Semantic) { |
||
871 | sn = decl->Semantic.Name; |
||
872 | si = decl->Semantic.Index; |
||
873 | } |
||
874 | |||
875 | if (decl->Declaration.Local) { |
||
876 | for (i = first; i <= last; ++i) { |
||
877 | for (c = 0; c < 4; ++c) { |
||
878 | locals.insert( |
||
879 | Location(decl->Declaration.File, decl->Dim.Index2D, i, c)); |
||
880 | } |
||
881 | } |
||
882 | } |
||
883 | |||
884 | switch (decl->Declaration.File) { |
||
885 | case TGSI_FILE_INPUT: |
||
886 | if (info->type == PIPE_SHADER_VERTEX) { |
||
887 | // all vertex attributes are equal |
||
888 | for (i = first; i <= last; ++i) { |
||
889 | info->in[i].sn = TGSI_SEMANTIC_GENERIC; |
||
890 | info->in[i].si = i; |
||
891 | } |
||
892 | } else { |
||
893 | for (i = first; i <= last; ++i, ++si) { |
||
894 | info->in[i].id = i; |
||
895 | info->in[i].sn = sn; |
||
896 | info->in[i].si = si; |
||
897 | if (info->type == PIPE_SHADER_FRAGMENT) { |
||
898 | // translate interpolation mode |
||
899 | switch (decl->Interp.Interpolate) { |
||
900 | case TGSI_INTERPOLATE_CONSTANT: |
||
901 | info->in[i].flat = 1; |
||
902 | break; |
||
903 | case TGSI_INTERPOLATE_COLOR: |
||
904 | info->in[i].sc = 1; |
||
905 | break; |
||
906 | case TGSI_INTERPOLATE_LINEAR: |
||
907 | info->in[i].linear = 1; |
||
908 | break; |
||
909 | default: |
||
910 | break; |
||
911 | } |
||
912 | if (decl->Interp.Centroid) |
||
913 | info->in[i].centroid = 1; |
||
914 | } |
||
915 | } |
||
916 | } |
||
917 | break; |
||
918 | case TGSI_FILE_OUTPUT: |
||
919 | for (i = first; i <= last; ++i, ++si) { |
||
920 | switch (sn) { |
||
921 | case TGSI_SEMANTIC_POSITION: |
||
922 | if (info->type == PIPE_SHADER_FRAGMENT) |
||
923 | info->io.fragDepth = i; |
||
924 | else |
||
925 | if (clipVertexOutput < 0) |
||
926 | clipVertexOutput = i; |
||
927 | break; |
||
928 | case TGSI_SEMANTIC_COLOR: |
||
929 | if (info->type == PIPE_SHADER_FRAGMENT) |
||
930 | info->prop.fp.numColourResults++; |
||
931 | break; |
||
932 | case TGSI_SEMANTIC_EDGEFLAG: |
||
933 | info->io.edgeFlagOut = i; |
||
934 | break; |
||
935 | case TGSI_SEMANTIC_CLIPVERTEX: |
||
936 | clipVertexOutput = i; |
||
937 | break; |
||
938 | case TGSI_SEMANTIC_CLIPDIST: |
||
939 | info->io.clipDistanceMask |= |
||
940 | decl->Declaration.UsageMask << (si * 4); |
||
941 | info->io.genUserClip = -1; |
||
942 | break; |
||
943 | default: |
||
944 | break; |
||
945 | } |
||
946 | info->out[i].id = i; |
||
947 | info->out[i].sn = sn; |
||
948 | info->out[i].si = si; |
||
949 | } |
||
950 | break; |
||
951 | case TGSI_FILE_SYSTEM_VALUE: |
||
952 | switch (sn) { |
||
953 | case TGSI_SEMANTIC_INSTANCEID: |
||
954 | info->io.instanceId = first; |
||
955 | break; |
||
956 | case TGSI_SEMANTIC_VERTEXID: |
||
957 | info->io.vertexId = first; |
||
958 | break; |
||
959 | default: |
||
960 | break; |
||
961 | } |
||
962 | for (i = first; i <= last; ++i, ++si) { |
||
963 | info->sv[i].sn = sn; |
||
964 | info->sv[i].si = si; |
||
965 | info->sv[i].input = inferSysValDirection(sn); |
||
966 | } |
||
967 | break; |
||
968 | case TGSI_FILE_RESOURCE: |
||
969 | for (i = first; i <= last; ++i) { |
||
970 | resources[i].target = decl->Resource.Resource; |
||
971 | resources[i].raw = decl->Resource.Raw; |
||
972 | resources[i].slot = i; |
||
973 | } |
||
974 | break; |
||
975 | case TGSI_FILE_SAMPLER_VIEW: |
||
976 | for (i = first; i <= last; ++i) |
||
977 | textureViews[i].target = decl->SamplerView.Resource; |
||
978 | break; |
||
979 | case TGSI_FILE_NULL: |
||
980 | case TGSI_FILE_TEMPORARY: |
||
981 | case TGSI_FILE_ADDRESS: |
||
982 | case TGSI_FILE_CONSTANT: |
||
983 | case TGSI_FILE_IMMEDIATE: |
||
984 | case TGSI_FILE_PREDICATE: |
||
985 | case TGSI_FILE_SAMPLER: |
||
986 | break; |
||
987 | default: |
||
988 | ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); |
||
989 | return false; |
||
990 | } |
||
991 | return true; |
||
992 | } |
||
993 | |||
994 | inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const |
||
995 | { |
||
996 | return insn.getOpcode() == TGSI_OPCODE_MOV && |
||
997 | insn.getDst(0).getIndex(0) == info->io.edgeFlagOut && |
||
998 | insn.getSrc(0).getFile() == TGSI_FILE_INPUT; |
||
999 | } |
||
1000 | |||
1001 | bool Source::scanInstruction(const struct tgsi_full_instruction *inst) |
||
1002 | { |
||
1003 | Instruction insn(inst); |
||
1004 | |||
1005 | if (insn.getOpcode() == TGSI_OPCODE_BARRIER) |
||
1006 | info->numBarriers = 1; |
||
1007 | |||
1008 | if (insn.dstCount()) { |
||
1009 | if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { |
||
1010 | Instruction::DstRegister dst = insn.getDst(0); |
||
1011 | |||
1012 | if (dst.isIndirect(0)) |
||
1013 | for (unsigned i = 0; i < info->numOutputs; ++i) |
||
1014 | info->out[i].mask = 0xf; |
||
1015 | else |
||
1016 | info->out[dst.getIndex(0)].mask |= dst.getMask(); |
||
1017 | |||
1018 | if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || |
||
1019 | info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || |
||
1020 | info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) |
||
1021 | info->out[dst.getIndex(0)].mask &= 1; |
||
1022 | |||
1023 | if (isEdgeFlagPassthrough(insn)) |
||
1024 | info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); |
||
1025 | } else |
||
1026 | if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { |
||
1027 | if (insn.getDst(0).isIndirect(0)) |
||
1028 | mainTempsInLMem = TRUE; |
||
1029 | } |
||
1030 | } |
||
1031 | |||
1032 | for (unsigned s = 0; s < insn.srcCount(); ++s) { |
||
1033 | Instruction::SrcRegister src = insn.getSrc(s); |
||
1034 | if (src.getFile() == TGSI_FILE_TEMPORARY) { |
||
1035 | if (src.isIndirect(0)) |
||
1036 | mainTempsInLMem = TRUE; |
||
1037 | } else |
||
1038 | if (src.getFile() == TGSI_FILE_RESOURCE) { |
||
1039 | if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) |
||
1040 | info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? |
||
1041 | 0x1 : 0x2; |
||
1042 | } |
||
1043 | if (src.getFile() != TGSI_FILE_INPUT) |
||
1044 | continue; |
||
1045 | unsigned mask = insn.srcMask(s); |
||
1046 | |||
1047 | if (src.isIndirect(0)) { |
||
1048 | for (unsigned i = 0; i < info->numInputs; ++i) |
||
1049 | info->in[i].mask = 0xf; |
||
1050 | } else { |
||
1051 | const int i = src.getIndex(0); |
||
1052 | for (unsigned c = 0; c < 4; ++c) { |
||
1053 | if (!(mask & (1 << c))) |
||
1054 | continue; |
||
1055 | int k = src.getSwizzle(c); |
||
1056 | if (k <= TGSI_SWIZZLE_W) |
||
1057 | info->in[i].mask |= 1 << k; |
||
1058 | } |
||
1059 | switch (info->in[i].sn) { |
||
1060 | case TGSI_SEMANTIC_PSIZE: |
||
1061 | case TGSI_SEMANTIC_PRIMID: |
||
1062 | case TGSI_SEMANTIC_FOG: |
||
1063 | info->in[i].mask &= 0x1; |
||
1064 | break; |
||
1065 | case TGSI_SEMANTIC_PCOORD: |
||
1066 | info->in[i].mask &= 0x3; |
||
1067 | break; |
||
1068 | default: |
||
1069 | break; |
||
1070 | } |
||
1071 | } |
||
1072 | } |
||
1073 | return true; |
||
1074 | } |
||
1075 | |||
1076 | nv50_ir::TexInstruction::Target |
||
1077 | Instruction::getTexture(const tgsi::Source *code, int s) const |
||
1078 | { |
||
1079 | // XXX: indirect access |
||
1080 | unsigned int r; |
||
1081 | |||
1082 | switch (getSrc(s).getFile()) { |
||
1083 | case TGSI_FILE_RESOURCE: |
||
1084 | r = getSrc(s).getIndex(0); |
||
1085 | return translateTexture(code->resources.at(r).target); |
||
1086 | case TGSI_FILE_SAMPLER_VIEW: |
||
1087 | r = getSrc(s).getIndex(0); |
||
1088 | return translateTexture(code->textureViews.at(r).target); |
||
1089 | default: |
||
1090 | return translateTexture(insn->Texture.Texture); |
||
1091 | } |
||
1092 | } |
||
1093 | |||
1094 | } // namespace tgsi |
||
1095 | |||
1096 | namespace { |
||
1097 | |||
1098 | using namespace nv50_ir; |
||
1099 | |||
1100 | class Converter : public BuildUtil |
||
1101 | { |
||
1102 | public: |
||
1103 | Converter(Program *, const tgsi::Source *); |
||
1104 | ~Converter(); |
||
1105 | |||
1106 | bool run(); |
||
1107 | |||
1108 | private: |
||
1109 | struct Subroutine |
||
1110 | { |
||
1111 | Subroutine(Function *f) : f(f) { } |
||
1112 | Function *f; |
||
1113 | ValueMap values; |
||
1114 | }; |
||
1115 | |||
1116 | Value *getVertexBase(int s); |
||
1117 | DataArray *getArrayForFile(unsigned file, int idx); |
||
1118 | Value *fetchSrc(int s, int c); |
||
1119 | Value *acquireDst(int d, int c); |
||
1120 | void storeDst(int d, int c, Value *); |
||
1121 | |||
1122 | Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr); |
||
1123 | void storeDst(const tgsi::Instruction::DstRegister dst, int c, |
||
1124 | Value *val, Value *ptr); |
||
1125 | |||
1126 | Value *applySrcMod(Value *, int s, int c); |
||
1127 | |||
1128 | Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); |
||
1129 | Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); |
||
1130 | Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); |
||
1131 | |||
1132 | bool handleInstruction(const struct tgsi_full_instruction *); |
||
1133 | void exportOutputs(); |
||
1134 | inline Subroutine *getSubroutine(unsigned ip); |
||
1135 | inline Subroutine *getSubroutine(Function *); |
||
1136 | inline bool isEndOfSubroutine(uint ip); |
||
1137 | |||
1138 | void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); |
||
1139 | |||
1140 | // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto) |
||
1141 | void setTexRS(TexInstruction *, unsigned int& s, int R, int S); |
||
1142 | void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); |
||
1143 | void handleTXF(Value *dst0[4], int R, int L_M); |
||
1144 | void handleTXQ(Value *dst0[4], enum TexQuery); |
||
1145 | void handleLIT(Value *dst0[4]); |
||
1146 | void handleUserClipPlanes(); |
||
1147 | |||
1148 | Symbol *getResourceBase(int r); |
||
1149 | void getResourceCoords(std::vector |
||
1150 | |||
1151 | void handleLOAD(Value *dst0[4]); |
||
1152 | void handleSTORE(); |
||
1153 | void handleATOM(Value *dst0[4], DataType, uint16_t subOp); |
||
1154 | |||
1155 | Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); |
||
1156 | |||
1157 | void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); |
||
1158 | |||
1159 | Value *buildDot(int dim); |
||
1160 | |||
1161 | class BindArgumentsPass : public Pass { |
||
1162 | public: |
||
1163 | BindArgumentsPass(Converter &conv) : conv(conv) { } |
||
1164 | |||
1165 | private: |
||
1166 | Converter &conv; |
||
1167 | Subroutine *sub; |
||
1168 | |||
1169 | inline const Location *getValueLocation(Subroutine *, Value *); |
||
1170 | |||
1171 | template |
||
1172 | updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *), |
||
1173 | T (Function::*proto)); |
||
1174 | |||
1175 | template |
||
1176 | updatePrototype(BitSet *set, void (Function::*updateSet)(), |
||
1177 | T (Function::*proto)); |
||
1178 | |||
1179 | protected: |
||
1180 | bool visit(Function *); |
||
1181 | bool visit(BasicBlock *bb) { return false; } |
||
1182 | }; |
||
1183 | |||
1184 | private: |
||
1185 | const struct tgsi::Source *code; |
||
1186 | const struct nv50_ir_prog_info *info; |
||
1187 | |||
1188 | struct { |
||
1189 | std::map |
||
1190 | Subroutine *cur; |
||
1191 | } sub; |
||
1192 | |||
1193 | uint ip; // instruction pointer |
||
1194 | |||
1195 | tgsi::Instruction tgsi; |
||
1196 | |||
1197 | DataType dstTy; |
||
1198 | DataType srcTy; |
||
1199 | |||
1200 | DataArray tData; // TGSI_FILE_TEMPORARY |
||
1201 | DataArray aData; // TGSI_FILE_ADDRESS |
||
1202 | DataArray pData; // TGSI_FILE_PREDICATE |
||
1203 | DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) |
||
1204 | |||
1205 | Value *zero; |
||
1206 | Value *fragCoord[4]; |
||
1207 | Value *clipVtx[4]; |
||
1208 | |||
1209 | Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) |
||
1210 | uint8_t vtxBaseValid; |
||
1211 | |||
1212 | Stack condBBs; // fork BB, then else clause BB |
||
1213 | Stack joinBBs; // fork BB, for inserting join ops on ENDIF |
||
1214 | Stack loopBBs; // loop headers |
||
1215 | Stack breakBBs; // end of / after loop |
||
1216 | }; |
||
1217 | |||
1218 | Symbol * |
||
1219 | Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) |
||
1220 | { |
||
1221 | const int swz = src.getSwizzle(c); |
||
1222 | |||
1223 | return makeSym(src.getFile(), |
||
1224 | src.is2D() ? src.getIndex(1) : 0, |
||
1225 | src.isIndirect(0) ? -1 : src.getIndex(0), swz, |
||
1226 | src.getIndex(0) * 16 + swz * 4); |
||
1227 | } |
||
1228 | |||
1229 | Symbol * |
||
1230 | Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) |
||
1231 | { |
||
1232 | return makeSym(dst.getFile(), |
||
1233 | dst.is2D() ? dst.getIndex(1) : 0, |
||
1234 | dst.isIndirect(0) ? -1 : dst.getIndex(0), c, |
||
1235 | dst.getIndex(0) * 16 + c * 4); |
||
1236 | } |
||
1237 | |||
1238 | Symbol * |
||
1239 | Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) |
||
1240 | { |
||
1241 | Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile)); |
||
1242 | |||
1243 | sym->reg.fileIndex = fileIdx; |
||
1244 | |||
1245 | if (idx >= 0) { |
||
1246 | if (sym->reg.file == FILE_SHADER_INPUT) |
||
1247 | sym->setOffset(info->in[idx].slot[c] * 4); |
||
1248 | else |
||
1249 | if (sym->reg.file == FILE_SHADER_OUTPUT) |
||
1250 | sym->setOffset(info->out[idx].slot[c] * 4); |
||
1251 | else |
||
1252 | if (sym->reg.file == FILE_SYSTEM_VALUE) |
||
1253 | sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c); |
||
1254 | else |
||
1255 | sym->setOffset(address); |
||
1256 | } else { |
||
1257 | sym->setOffset(address); |
||
1258 | } |
||
1259 | return sym; |
||
1260 | } |
||
1261 | |||
1262 | static inline uint8_t |
||
1263 | translateInterpMode(const struct nv50_ir_varying *var, operation& op) |
||
1264 | { |
||
1265 | uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; |
||
1266 | |||
1267 | if (var->flat) |
||
1268 | mode = NV50_IR_INTERP_FLAT; |
||
1269 | else |
||
1270 | if (var->linear) |
||
1271 | mode = NV50_IR_INTERP_LINEAR; |
||
1272 | else |
||
1273 | if (var->sc) |
||
1274 | mode = NV50_IR_INTERP_SC; |
||
1275 | |||
1276 | op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) |
||
1277 | ? OP_PINTERP : OP_LINTERP; |
||
1278 | |||
1279 | if (var->centroid) |
||
1280 | mode |= NV50_IR_INTERP_CENTROID; |
||
1281 | |||
1282 | return mode; |
||
1283 | } |
||
1284 | |||
1285 | Value * |
||
1286 | Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) |
||
1287 | { |
||
1288 | operation op; |
||
1289 | |||
1290 | // XXX: no way to know interpolation mode if we don't know what's accessed |
||
1291 | const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 : |
||
1292 | src.getIndex(0)], op); |
||
1293 | |||
1294 | Instruction *insn = new_Instruction(func, op, TYPE_F32); |
||
1295 | |||
1296 | insn->setDef(0, getScratch()); |
||
1297 | insn->setSrc(0, srcToSym(src, c)); |
||
1298 | if (op == OP_PINTERP) |
||
1299 | insn->setSrc(1, fragCoord[3]); |
||
1300 | if (ptr) |
||
1301 | insn->setIndirect(0, 0, ptr); |
||
1302 | |||
1303 | insn->setInterpolate(mode); |
||
1304 | |||
1305 | bb->insertTail(insn); |
||
1306 | return insn->getDef(0); |
||
1307 | } |
||
1308 | |||
1309 | Value * |
||
1310 | Converter::applySrcMod(Value *val, int s, int c) |
||
1311 | { |
||
1312 | Modifier m = tgsi.getSrc(s).getMod(c); |
||
1313 | DataType ty = tgsi.inferSrcType(); |
||
1314 | |||
1315 | if (m & Modifier(NV50_IR_MOD_ABS)) |
||
1316 | val = mkOp1v(OP_ABS, ty, getScratch(), val); |
||
1317 | |||
1318 | if (m & Modifier(NV50_IR_MOD_NEG)) |
||
1319 | val = mkOp1v(OP_NEG, ty, getScratch(), val); |
||
1320 | |||
1321 | return val; |
||
1322 | } |
||
1323 | |||
1324 | Value * |
||
1325 | Converter::getVertexBase(int s) |
||
1326 | { |
||
1327 | assert(s < 5); |
||
1328 | if (!(vtxBaseValid & (1 << s))) { |
||
1329 | const int index = tgsi.getSrc(s).getIndex(1); |
||
1330 | Value *rel = NULL; |
||
1331 | if (tgsi.getSrc(s).isIndirect(1)) |
||
1332 | rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); |
||
1333 | vtxBaseValid |= 1 << s; |
||
1334 | vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel); |
||
1335 | } |
||
1336 | return vtxBase[s]; |
||
1337 | } |
||
1338 | |||
1339 | Value * |
||
1340 | Converter::fetchSrc(int s, int c) |
||
1341 | { |
||
1342 | Value *res; |
||
1343 | Value *ptr = NULL, *dimRel = NULL; |
||
1344 | |||
1345 | tgsi::Instruction::SrcRegister src = tgsi.getSrc(s); |
||
1346 | |||
1347 | if (src.isIndirect(0)) |
||
1348 | ptr = fetchSrc(src.getIndirect(0), 0, NULL); |
||
1349 | |||
1350 | if (src.is2D()) { |
||
1351 | switch (src.getFile()) { |
||
1352 | case TGSI_FILE_INPUT: |
||
1353 | dimRel = getVertexBase(s); |
||
1354 | break; |
||
1355 | case TGSI_FILE_CONSTANT: |
||
1356 | // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] |
||
1357 | if (src.isIndirect(1)) |
||
1358 | dimRel = fetchSrc(src.getIndirect(1), 0, 0); |
||
1359 | break; |
||
1360 | default: |
||
1361 | break; |
||
1362 | } |
||
1363 | } |
||
1364 | |||
1365 | res = fetchSrc(src, c, ptr); |
||
1366 | |||
1367 | if (dimRel) |
||
1368 | res->getInsn()->setIndirect(0, 1, dimRel); |
||
1369 | |||
1370 | return applySrcMod(res, s, c); |
||
1371 | } |
||
1372 | |||
1373 | Converter::DataArray * |
||
1374 | Converter::getArrayForFile(unsigned file, int idx) |
||
1375 | { |
||
1376 | switch (file) { |
||
1377 | case TGSI_FILE_TEMPORARY: |
||
1378 | return &tData; |
||
1379 | case TGSI_FILE_PREDICATE: |
||
1380 | return &pData; |
||
1381 | case TGSI_FILE_ADDRESS: |
||
1382 | return &aData; |
||
1383 | case TGSI_FILE_OUTPUT: |
||
1384 | assert(prog->getType() == Program::TYPE_FRAGMENT); |
||
1385 | return &oData; |
||
1386 | default: |
||
1387 | assert(!"invalid/unhandled TGSI source file"); |
||
1388 | return NULL; |
||
1389 | } |
||
1390 | } |
||
1391 | |||
1392 | Value * |
||
1393 | Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) |
||
1394 | { |
||
1395 | const int idx2d = src.is2D() ? src.getIndex(1) : 0; |
||
1396 | const int idx = src.getIndex(0); |
||
1397 | const int swz = src.getSwizzle(c); |
||
1398 | |||
1399 | switch (src.getFile()) { |
||
1400 | case TGSI_FILE_IMMEDIATE: |
||
1401 | assert(!ptr); |
||
1402 | return loadImm(NULL, info->immd.data[idx * 4 + swz]); |
||
1403 | case TGSI_FILE_CONSTANT: |
||
1404 | return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); |
||
1405 | case TGSI_FILE_INPUT: |
||
1406 | if (prog->getType() == Program::TYPE_FRAGMENT) { |
||
1407 | // don't load masked inputs, won't be assigned a slot |
||
1408 | if (!ptr && !(info->in[idx].mask & (1 << swz))) |
||
1409 | return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); |
||
1410 | if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) |
||
1411 | return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); |
||
1412 | return interpolate(src, c, ptr); |
||
1413 | } |
||
1414 | return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); |
||
1415 | case TGSI_FILE_OUTPUT: |
||
1416 | assert(!"load from output file"); |
||
1417 | return NULL; |
||
1418 | case TGSI_FILE_SYSTEM_VALUE: |
||
1419 | assert(!ptr); |
||
1420 | return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); |
||
1421 | default: |
||
1422 | return getArrayForFile(src.getFile(), idx2d)->load( |
||
1423 | sub.cur->values, idx, swz, ptr); |
||
1424 | } |
||
1425 | } |
||
1426 | |||
1427 | Value * |
||
1428 | Converter::acquireDst(int d, int c) |
||
1429 | { |
||
1430 | const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); |
||
1431 | const unsigned f = dst.getFile(); |
||
1432 | const int idx = dst.getIndex(0); |
||
1433 | const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; |
||
1434 | |||
1435 | if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE) |
||
1436 | return NULL; |
||
1437 | |||
1438 | if (dst.isIndirect(0) || |
||
1439 | f == TGSI_FILE_SYSTEM_VALUE || |
||
1440 | (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) |
||
1441 | return getScratch(); |
||
1442 | |||
1443 | return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c); |
||
1444 | } |
||
1445 | |||
1446 | void |
||
1447 | Converter::storeDst(int d, int c, Value *val) |
||
1448 | { |
||
1449 | const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); |
||
1450 | |||
1451 | switch (tgsi.getSaturate()) { |
||
1452 | case TGSI_SAT_NONE: |
||
1453 | break; |
||
1454 | case TGSI_SAT_ZERO_ONE: |
||
1455 | mkOp1(OP_SAT, dstTy, val, val); |
||
1456 | break; |
||
1457 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
1458 | mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f)); |
||
1459 | mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f)); |
||
1460 | break; |
||
1461 | default: |
||
1462 | assert(!"invalid saturation mode"); |
||
1463 | break; |
||
1464 | } |
||
1465 | |||
1466 | Value *ptr = dst.isIndirect(0) ? |
||
1467 | fetchSrc(dst.getIndirect(0), 0, NULL) : NULL; |
||
1468 | |||
1469 | if (info->io.genUserClip > 0 && |
||
1470 | dst.getFile() == TGSI_FILE_OUTPUT && |
||
1471 | !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) { |
||
1472 | mkMov(clipVtx[c], val); |
||
1473 | val = clipVtx[c]; |
||
1474 | } |
||
1475 | |||
1476 | storeDst(dst, c, val, ptr); |
||
1477 | } |
||
1478 | |||
1479 | void |
||
1480 | Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, |
||
1481 | Value *val, Value *ptr) |
||
1482 | { |
||
1483 | const unsigned f = dst.getFile(); |
||
1484 | const int idx = dst.getIndex(0); |
||
1485 | const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; |
||
1486 | |||
1487 | if (f == TGSI_FILE_SYSTEM_VALUE) { |
||
1488 | assert(!ptr); |
||
1489 | mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); |
||
1490 | } else |
||
1491 | if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { |
||
1492 | if (ptr || (info->out[idx].mask & (1 << c))) |
||
1493 | mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); |
||
1494 | } else |
||
1495 | if (f == TGSI_FILE_TEMPORARY || |
||
1496 | f == TGSI_FILE_PREDICATE || |
||
1497 | f == TGSI_FILE_ADDRESS || |
||
1498 | f == TGSI_FILE_OUTPUT) { |
||
1499 | getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val); |
||
1500 | } else { |
||
1501 | assert(!"invalid dst file"); |
||
1502 | } |
||
1503 | } |
||
1504 | |||
1505 | #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \ |
||
1506 | for (chan = 0; chan < 4; ++chan) \ |
||
1507 | if (!inst.getDst(d).isMasked(chan)) |
||
1508 | |||
1509 | Value * |
||
1510 | Converter::buildDot(int dim) |
||
1511 | { |
||
1512 | assert(dim > 0); |
||
1513 | |||
1514 | Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); |
||
1515 | Value *dotp = getScratch(); |
||
1516 | |||
1517 | mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); |
||
1518 | |||
1519 | for (int c = 1; c < dim; ++c) { |
||
1520 | src0 = fetchSrc(0, c); |
||
1521 | src1 = fetchSrc(1, c); |
||
1522 | mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); |
||
1523 | } |
||
1524 | return dotp; |
||
1525 | } |
||
1526 | |||
1527 | void |
||
1528 | Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) |
||
1529 | { |
||
1530 | FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); |
||
1531 | join->fixed = 1; |
||
1532 | conv->insertHead(join); |
||
1533 | |||
1534 | fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); |
||
1535 | fork->insertBefore(fork->getExit(), fork->joinAt); |
||
1536 | } |
||
1537 | |||
1538 | void |
||
1539 | Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) |
||
1540 | { |
||
1541 | unsigned rIdx = 0, sIdx = 0; |
||
1542 | |||
1543 | if (R >= 0) |
||
1544 | rIdx = tgsi.getSrc(R).getIndex(0); |
||
1545 | if (S >= 0) |
||
1546 | sIdx = tgsi.getSrc(S).getIndex(0); |
||
1547 | |||
1548 | tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx); |
||
1549 | |||
1550 | if (tgsi.getSrc(R).isIndirect(0)) { |
||
1551 | tex->tex.rIndirectSrc = s; |
||
1552 | tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL)); |
||
1553 | } |
||
1554 | if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) { |
||
1555 | tex->tex.sIndirectSrc = s; |
||
1556 | tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL)); |
||
1557 | } |
||
1558 | } |
||
1559 | |||
1560 | void |
||
1561 | Converter::handleTXQ(Value *dst0[4], enum TexQuery query) |
||
1562 | { |
||
1563 | TexInstruction *tex = new_TexInstruction(func, OP_TXQ); |
||
1564 | tex->tex.query = query; |
||
1565 | unsigned int c, d; |
||
1566 | |||
1567 | for (d = 0, c = 0; c < 4; ++c) { |
||
1568 | if (!dst0[c]) |
||
1569 | continue; |
||
1570 | tex->tex.mask |= 1 << c; |
||
1571 | tex->setDef(d++, dst0[c]); |
||
1572 | } |
||
1573 | tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level |
||
1574 | |||
1575 | setTexRS(tex, c, 1, -1); |
||
1576 | |||
1577 | bb->insertTail(tex); |
||
1578 | } |
||
1579 | |||
1580 | void |
||
1581 | Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) |
||
1582 | { |
||
1583 | Value *proj = fetchSrc(0, 3); |
||
1584 | Instruction *insn = proj->getUniqueInsn(); |
||
1585 | int c; |
||
1586 | |||
1587 | if (insn->op == OP_PINTERP) { |
||
1588 | bb->insertTail(insn = cloneForward(func, insn)); |
||
1589 | insn->op = OP_LINTERP; |
||
1590 | insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode()); |
||
1591 | insn->setSrc(1, NULL); |
||
1592 | proj = insn->getDef(0); |
||
1593 | } |
||
1594 | proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj); |
||
1595 | |||
1596 | for (c = 0; c < 4; ++c) { |
||
1597 | if (!(mask & (1 << c))) |
||
1598 | continue; |
||
1599 | if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP) |
||
1600 | continue; |
||
1601 | mask &= ~(1 << c); |
||
1602 | |||
1603 | bb->insertTail(insn = cloneForward(func, insn)); |
||
1604 | insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode()); |
||
1605 | insn->setSrc(1, proj); |
||
1606 | dst[c] = insn->getDef(0); |
||
1607 | } |
||
1608 | if (!mask) |
||
1609 | return; |
||
1610 | |||
1611 | proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3)); |
||
1612 | |||
1613 | for (c = 0; c < 4; ++c) |
||
1614 | if (mask & (1 << c)) |
||
1615 | dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj); |
||
1616 | } |
||
1617 | |||
1618 | // order of nv50 ir sources: x y z layer lod/bias shadow |
||
1619 | // order of TGSI TEX sources: x y z layer shadow lod/bias |
||
1620 | // lowering will finally set the hw specific order (like array first on nvc0) |
||
1621 | void |
||
1622 | Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) |
||
1623 | { |
||
1624 | Value *val; |
||
1625 | Value *arg[4], *src[8]; |
||
1626 | Value *lod = NULL, *shd = NULL; |
||
1627 | unsigned int s, c, d; |
||
1628 | TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); |
||
1629 | |||
1630 | TexInstruction::Target tgt = tgsi.getTexture(code, R); |
||
1631 | |||
1632 | for (s = 0; s < tgt.getArgCount(); ++s) |
||
1633 | arg[s] = src[s] = fetchSrc(0, s); |
||
1634 | |||
1635 | if (texi->op == OP_TXL || texi->op == OP_TXB) |
||
1636 | lod = fetchSrc(L >> 4, L & 3); |
||
1637 | |||
1638 | if (C == 0x0f) |
||
1639 | C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src |
||
1640 | |||
1641 | if (tgt.isShadow()) |
||
1642 | shd = fetchSrc(C >> 4, C & 3); |
||
1643 | |||
1644 | if (texi->op == OP_TXD) { |
||
1645 | for (c = 0; c < tgt.getDim(); ++c) { |
||
1646 | texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c)); |
||
1647 | texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c)); |
||
1648 | } |
||
1649 | } |
||
1650 | |||
1651 | // cube textures don't care about projection value, it's divided out |
||
1652 | if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) { |
||
1653 | unsigned int n = tgt.getDim(); |
||
1654 | if (shd) { |
||
1655 | arg[n] = shd; |
||
1656 | ++n; |
||
1657 | assert(tgt.getDim() == tgt.getArgCount()); |
||
1658 | } |
||
1659 | loadProjTexCoords(src, arg, (1 << n) - 1); |
||
1660 | if (shd) |
||
1661 | shd = src[n - 1]; |
||
1662 | } |
||
1663 | |||
1664 | if (tgt.isCube()) { |
||
1665 | for (c = 0; c < 3; ++c) |
||
1666 | src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); |
||
1667 | val = getScratch(); |
||
1668 | mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); |
||
1669 | mkOp2(OP_MAX, TYPE_F32, val, src[2], val); |
||
1670 | mkOp1(OP_RCP, TYPE_F32, val, val); |
||
1671 | for (c = 0; c < 3; ++c) |
||
1672 | src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); |
||
1673 | } |
||
1674 | |||
1675 | for (c = 0, d = 0; c < 4; ++c) { |
||
1676 | if (dst[c]) { |
||
1677 | texi->setDef(d++, dst[c]); |
||
1678 | texi->tex.mask |= 1 << c; |
||
1679 | } else { |
||
1680 | // NOTE: maybe hook up def too, for CSE |
||
1681 | } |
||
1682 | } |
||
1683 | for (s = 0; s < tgt.getArgCount(); ++s) |
||
1684 | texi->setSrc(s, src[s]); |
||
1685 | if (lod) |
||
1686 | texi->setSrc(s++, lod); |
||
1687 | if (shd) |
||
1688 | texi->setSrc(s++, shd); |
||
1689 | |||
1690 | setTexRS(texi, s, R, S); |
||
1691 | |||
1692 | if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) |
||
1693 | texi->tex.levelZero = true; |
||
1694 | |||
1695 | bb->insertTail(texi); |
||
1696 | } |
||
1697 | |||
1698 | // 1st source: xyz = coordinates, w = lod/sample |
||
1699 | // 2nd source: offset |
||
1700 | void |
||
1701 | Converter::handleTXF(Value *dst[4], int R, int L_M) |
||
1702 | { |
||
1703 | TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); |
||
1704 | int ms; |
||
1705 | unsigned int c, d, s; |
||
1706 | |||
1707 | texi->tex.target = tgsi.getTexture(code, R); |
||
1708 | |||
1709 | ms = texi->tex.target.isMS() ? 1 : 0; |
||
1710 | texi->tex.levelZero = ms; /* MS textures don't have mip-maps */ |
||
1711 | |||
1712 | for (c = 0, d = 0; c < 4; ++c) { |
||
1713 | if (dst[c]) { |
||
1714 | texi->setDef(d++, dst[c]); |
||
1715 | texi->tex.mask |= 1 << c; |
||
1716 | } |
||
1717 | } |
||
1718 | for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c) |
||
1719 | texi->setSrc(c, fetchSrc(0, c)); |
||
1720 | texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms |
||
1721 | |||
1722 | setTexRS(texi, c, R, -1); |
||
1723 | |||
1724 | for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { |
||
1725 | for (c = 0; c < 3; ++c) { |
||
1726 | texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info); |
||
1727 | if (texi->tex.offset[s][c]) |
||
1728 | texi->tex.useOffsets = s + 1; |
||
1729 | } |
||
1730 | } |
||
1731 | |||
1732 | bb->insertTail(texi); |
||
1733 | } |
||
1734 | |||
1735 | void |
||
1736 | Converter::handleLIT(Value *dst0[4]) |
||
1737 | { |
||
1738 | Value *val0 = NULL; |
||
1739 | unsigned int mask = tgsi.getDst(0).getMask(); |
||
1740 | |||
1741 | if (mask & (1 << 0)) |
||
1742 | loadImm(dst0[0], 1.0f); |
||
1743 | |||
1744 | if (mask & (1 << 3)) |
||
1745 | loadImm(dst0[3], 1.0f); |
||
1746 | |||
1747 | if (mask & (3 << 1)) { |
||
1748 | val0 = getScratch(); |
||
1749 | mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero); |
||
1750 | if (mask & (1 << 1)) |
||
1751 | mkMov(dst0[1], val0); |
||
1752 | } |
||
1753 | |||
1754 | if (mask & (1 << 2)) { |
||
1755 | Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3); |
||
1756 | Value *val1 = getScratch(), *val3 = getScratch(); |
||
1757 | |||
1758 | Value *pos128 = loadImm(NULL, +127.999999f); |
||
1759 | Value *neg128 = loadImm(NULL, -127.999999f); |
||
1760 | |||
1761 | mkOp2(OP_MAX, TYPE_F32, val1, src1, zero); |
||
1762 | mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128); |
||
1763 | mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128); |
||
1764 | mkOp2(OP_POW, TYPE_F32, val3, val1, val3); |
||
1765 | |||
1766 | mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0); |
||
1767 | } |
||
1768 | } |
||
1769 | |||
1770 | static inline bool |
||
1771 | isResourceSpecial(const int r) |
||
1772 | { |
||
1773 | return (r == TGSI_RESOURCE_GLOBAL || |
||
1774 | r == TGSI_RESOURCE_LOCAL || |
||
1775 | r == TGSI_RESOURCE_PRIVATE || |
||
1776 | r == TGSI_RESOURCE_INPUT); |
||
1777 | } |
||
1778 | |||
1779 | static inline bool |
||
1780 | isResourceRaw(const struct tgsi::Source *code, const int r) |
||
1781 | { |
||
1782 | return isResourceSpecial(r) || code->resources[r].raw; |
||
1783 | } |
||
1784 | |||
1785 | static inline nv50_ir::TexTarget |
||
1786 | getResourceTarget(const struct tgsi::Source *code, int r) |
||
1787 | { |
||
1788 | if (isResourceSpecial(r)) |
||
1789 | return nv50_ir::TEX_TARGET_BUFFER; |
||
1790 | return tgsi::translateTexture(code->resources.at(r).target); |
||
1791 | } |
||
1792 | |||
1793 | Symbol * |
||
1794 | Converter::getResourceBase(const int r) |
||
1795 | { |
||
1796 | Symbol *sym = NULL; |
||
1797 | |||
1798 | switch (r) { |
||
1799 | case TGSI_RESOURCE_GLOBAL: |
||
1800 | sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15); |
||
1801 | break; |
||
1802 | case TGSI_RESOURCE_LOCAL: |
||
1803 | assert(prog->getType() == Program::TYPE_COMPUTE); |
||
1804 | sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32, |
||
1805 | info->prop.cp.sharedOffset); |
||
1806 | break; |
||
1807 | case TGSI_RESOURCE_PRIVATE: |
||
1808 | sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32, |
||
1809 | info->bin.tlsSpace); |
||
1810 | break; |
||
1811 | case TGSI_RESOURCE_INPUT: |
||
1812 | assert(prog->getType() == Program::TYPE_COMPUTE); |
||
1813 | sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32, |
||
1814 | info->prop.cp.inputOffset); |
||
1815 | break; |
||
1816 | default: |
||
1817 | sym = new_Symbol(prog, |
||
1818 | nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot); |
||
1819 | break; |
||
1820 | } |
||
1821 | return sym; |
||
1822 | } |
||
1823 | |||
1824 | void |
||
1825 | Converter::getResourceCoords(std::vector |
||
1826 | { |
||
1827 | const int arg = |
||
1828 | TexInstruction::Target(getResourceTarget(code, r)).getArgCount(); |
||
1829 | |||
1830 | for (int c = 0; c < arg; ++c) |
||
1831 | coords.push_back(fetchSrc(s, c)); |
||
1832 | |||
1833 | // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk |
||
1834 | if (r == TGSI_RESOURCE_LOCAL || |
||
1835 | r == TGSI_RESOURCE_PRIVATE || |
||
1836 | r == TGSI_RESOURCE_INPUT) |
||
1837 | coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS), |
||
1838 | coords[0]); |
||
1839 | } |
||
1840 | |||
1841 | static inline int |
||
1842 | partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) |
||
1843 | { |
||
1844 | int n = 0; |
||
1845 | |||
1846 | while (mask) { |
||
1847 | if (mask & 1) { |
||
1848 | size[n]++; |
||
1849 | } else { |
||
1850 | if (size[n]) |
||
1851 | comp[n = 1] = size[0] + 1; |
||
1852 | else |
||
1853 | comp[n]++; |
||
1854 | } |
||
1855 | mask >>= 1; |
||
1856 | } |
||
1857 | if (size[0] == 3) { |
||
1858 | n = 1; |
||
1859 | size[0] = (comp[0] == 1) ? 1 : 2; |
||
1860 | size[1] = 3 - size[0]; |
||
1861 | comp[1] = comp[0] + size[0]; |
||
1862 | } |
||
1863 | return n + 1; |
||
1864 | } |
||
1865 | |||
1866 | // For raw loads, granularity is 4 byte. |
||
1867 | // Usage of the texture read mask on OP_SULDP is not allowed. |
||
1868 | void |
||
1869 | Converter::handleLOAD(Value *dst0[4]) |
||
1870 | { |
||
1871 | const int r = tgsi.getSrc(0).getIndex(0); |
||
1872 | int c; |
||
1873 | std::vector |
||
1874 | |||
1875 | getResourceCoords(off, r, 1); |
||
1876 | |||
1877 | if (isResourceRaw(code, r)) { |
||
1878 | uint8_t mask = 0; |
||
1879 | uint8_t comp[2] = { 0, 0 }; |
||
1880 | uint8_t size[2] = { 0, 0 }; |
||
1881 | |||
1882 | Symbol *base = getResourceBase(r); |
||
1883 | |||
1884 | // determine the base and size of the at most 2 load ops |
||
1885 | for (c = 0; c < 4; ++c) |
||
1886 | if (!tgsi.getDst(0).isMasked(c)) |
||
1887 | mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X); |
||
1888 | |||
1889 | int n = partitionLoadStore(comp, size, mask); |
||
1890 | |||
1891 | src = off; |
||
1892 | |||
1893 | def.resize(4); // index by component, the ones we need will be non-NULL |
||
1894 | for (c = 0; c < 4; ++c) { |
||
1895 | if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c)) |
||
1896 | def[c] = dst0[c]; |
||
1897 | else |
||
1898 | if (mask & (1 << c)) |
||
1899 | def[c] = getScratch(); |
||
1900 | } |
||
1901 | |||
1902 | const bool useLd = isResourceSpecial(r) || |
||
1903 | (info->io.nv50styleSurfaces && |
||
1904 | code->resources[r].target == TGSI_TEXTURE_BUFFER); |
||
1905 | |||
1906 | for (int i = 0; i < n; ++i) { |
||
1907 | ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]); |
||
1908 | |||
1909 | if (comp[i]) // adjust x component of source address if necessary |
||
1910 | src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), |
||
1911 | off[0], mkImm(comp[i] * 4)); |
||
1912 | else |
||
1913 | src[0] = off[0]; |
||
1914 | |||
1915 | if (useLd) { |
||
1916 | Instruction *ld = |
||
1917 | mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]); |
||
1918 | for (size_t c = 1; c < ldv.size(); ++c) |
||
1919 | ld->setDef(c, ldv[c]); |
||
1920 | } else { |
||
1921 | mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot, |
||
1922 | 0, ldv, src)->dType = typeOfSize(size[i] * 4); |
||
1923 | } |
||
1924 | } |
||
1925 | } else { |
||
1926 | def.resize(4); |
||
1927 | for (c = 0; c < 4; ++c) { |
||
1928 | if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) |
||
1929 | def[c] = getScratch(); |
||
1930 | else |
||
1931 | def[c] = dst0[c]; |
||
1932 | } |
||
1933 | |||
1934 | mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0, |
||
1935 | def, off); |
||
1936 | } |
||
1937 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
1938 | if (dst0[c] != def[c]) |
||
1939 | mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); |
||
1940 | } |
||
1941 | |||
1942 | // For formatted stores, the write mask on OP_SUSTP can be used. |
||
1943 | // Raw stores have to be split. |
||
1944 | void |
||
1945 | Converter::handleSTORE() |
||
1946 | { |
||
1947 | const int r = tgsi.getDst(0).getIndex(0); |
||
1948 | int c; |
||
1949 | std::vector |
||
1950 | |||
1951 | getResourceCoords(off, r, 0); |
||
1952 | src = off; |
||
1953 | const int s = src.size(); |
||
1954 | |||
1955 | if (isResourceRaw(code, r)) { |
||
1956 | uint8_t comp[2] = { 0, 0 }; |
||
1957 | uint8_t size[2] = { 0, 0 }; |
||
1958 | |||
1959 | int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask()); |
||
1960 | |||
1961 | Symbol *base = getResourceBase(r); |
||
1962 | |||
1963 | const bool useSt = isResourceSpecial(r) || |
||
1964 | (info->io.nv50styleSurfaces && |
||
1965 | code->resources[r].target == TGSI_TEXTURE_BUFFER); |
||
1966 | |||
1967 | for (int i = 0; i < n; ++i) { |
||
1968 | if (comp[i]) // adjust x component of source address if necessary |
||
1969 | src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), |
||
1970 | off[0], mkImm(comp[i] * 4)); |
||
1971 | else |
||
1972 | src[0] = off[0]; |
||
1973 | |||
1974 | const DataType stTy = typeOfSize(size[i] * 4); |
||
1975 | |||
1976 | if (useSt) { |
||
1977 | Instruction *st = |
||
1978 | mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i])); |
||
1979 | for (c = 1; c < size[i]; ++c) |
||
1980 | st->setSrc(1 + c, fetchSrc(1, comp[i] + c)); |
||
1981 | st->setIndirect(0, 0, src[0]); |
||
1982 | } else { |
||
1983 | // attach values to be stored |
||
1984 | src.resize(s + size[i]); |
||
1985 | for (c = 0; c < size[i]; ++c) |
||
1986 | src[s + c] = fetchSrc(1, comp[i] + c); |
||
1987 | mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot, |
||
1988 | 0, dummy, src)->setType(stTy); |
||
1989 | } |
||
1990 | } |
||
1991 | } else { |
||
1992 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
1993 | src.push_back(fetchSrc(1, c)); |
||
1994 | |||
1995 | mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, |
||
1996 | dummy, src)->tex.mask = tgsi.getDst(0).getMask(); |
||
1997 | } |
||
1998 | } |
||
1999 | |||
2000 | // XXX: These only work on resources with the single-component u32/s32 formats. |
||
2001 | // Therefore the result is replicated. This might not be intended by TGSI, but |
||
2002 | // operating on more than 1 component would produce undefined results because |
||
2003 | // they do not exist. |
||
2004 | void |
||
2005 | Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) |
||
2006 | { |
||
2007 | const int r = tgsi.getSrc(0).getIndex(0); |
||
2008 | std::vector |
||
2009 | std::vector |
||
2010 | LValue *dst = getScratch(); |
||
2011 | |||
2012 | getResourceCoords(srcv, r, 1); |
||
2013 | |||
2014 | if (isResourceSpecial(r)) { |
||
2015 | assert(r != TGSI_RESOURCE_INPUT); |
||
2016 | Instruction *insn; |
||
2017 | insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0)); |
||
2018 | insn->subOp = subOp; |
||
2019 | if (subOp == NV50_IR_SUBOP_ATOM_CAS) |
||
2020 | insn->setSrc(2, fetchSrc(3, 0)); |
||
2021 | insn->setIndirect(0, 0, srcv.at(0)); |
||
2022 | } else { |
||
2023 | operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP; |
||
2024 | TexTarget targ = getResourceTarget(code, r); |
||
2025 | int idx = code->resources[r].slot; |
||
2026 | defv.push_back(dst); |
||
2027 | srcv.push_back(fetchSrc(2, 0)); |
||
2028 | if (subOp == NV50_IR_SUBOP_ATOM_CAS) |
||
2029 | srcv.push_back(fetchSrc(3, 0)); |
||
2030 | TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv); |
||
2031 | tex->subOp = subOp; |
||
2032 | tex->tex.mask = 1; |
||
2033 | tex->setType(ty); |
||
2034 | } |
||
2035 | |||
2036 | for (int c = 0; c < 4; ++c) |
||
2037 | if (dst0[c]) |
||
2038 | dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov |
||
2039 | } |
||
2040 | |||
2041 | Converter::Subroutine * |
||
2042 | Converter::getSubroutine(unsigned ip) |
||
2043 | { |
||
2044 | std::map |
||
2045 | |||
2046 | if (it == sub.map.end()) |
||
2047 | it = sub.map.insert(std::make_pair( |
||
2048 | ip, Subroutine(new Function(prog, "SUB", ip)))).first; |
||
2049 | |||
2050 | return &it->second; |
||
2051 | } |
||
2052 | |||
2053 | Converter::Subroutine * |
||
2054 | Converter::getSubroutine(Function *f) |
||
2055 | { |
||
2056 | unsigned ip = f->getLabel(); |
||
2057 | std::map |
||
2058 | |||
2059 | if (it == sub.map.end()) |
||
2060 | it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; |
||
2061 | |||
2062 | return &it->second; |
||
2063 | } |
||
2064 | |||
2065 | bool |
||
2066 | Converter::isEndOfSubroutine(uint ip) |
||
2067 | { |
||
2068 | assert(ip < code->scan.num_instructions); |
||
2069 | tgsi::Instruction insn(&code->insns[ip]); |
||
2070 | return (insn.getOpcode() == TGSI_OPCODE_END || |
||
2071 | insn.getOpcode() == TGSI_OPCODE_ENDSUB || |
||
2072 | // does END occur at end of main or the very end ? |
||
2073 | insn.getOpcode() == TGSI_OPCODE_BGNSUB); |
||
2074 | } |
||
2075 | |||
2076 | bool |
||
2077 | Converter::handleInstruction(const struct tgsi_full_instruction *insn) |
||
2078 | { |
||
2079 | Instruction *geni; |
||
2080 | |||
2081 | Value *dst0[4], *rDst0[4]; |
||
2082 | Value *src0, *src1, *src2; |
||
2083 | Value *val0, *val1; |
||
2084 | int c; |
||
2085 | |||
2086 | tgsi = tgsi::Instruction(insn); |
||
2087 | |||
2088 | bool useScratchDst = tgsi.checkDstSrcAliasing(); |
||
2089 | |||
2090 | operation op = tgsi.getOP(); |
||
2091 | dstTy = tgsi.inferDstType(); |
||
2092 | srcTy = tgsi.inferSrcType(); |
||
2093 | |||
2094 | unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; |
||
2095 | |||
2096 | if (tgsi.dstCount()) { |
||
2097 | for (c = 0; c < 4; ++c) { |
||
2098 | rDst0[c] = acquireDst(0, c); |
||
2099 | dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; |
||
2100 | } |
||
2101 | } |
||
2102 | |||
2103 | switch (tgsi.getOpcode()) { |
||
2104 | case TGSI_OPCODE_ADD: |
||
2105 | case TGSI_OPCODE_UADD: |
||
2106 | case TGSI_OPCODE_AND: |
||
2107 | case TGSI_OPCODE_DIV: |
||
2108 | case TGSI_OPCODE_IDIV: |
||
2109 | case TGSI_OPCODE_UDIV: |
||
2110 | case TGSI_OPCODE_MAX: |
||
2111 | case TGSI_OPCODE_MIN: |
||
2112 | case TGSI_OPCODE_IMAX: |
||
2113 | case TGSI_OPCODE_IMIN: |
||
2114 | case TGSI_OPCODE_UMAX: |
||
2115 | case TGSI_OPCODE_UMIN: |
||
2116 | case TGSI_OPCODE_MOD: |
||
2117 | case TGSI_OPCODE_UMOD: |
||
2118 | case TGSI_OPCODE_MUL: |
||
2119 | case TGSI_OPCODE_UMUL: |
||
2120 | case TGSI_OPCODE_OR: |
||
2121 | case TGSI_OPCODE_POW: |
||
2122 | case TGSI_OPCODE_SHL: |
||
2123 | case TGSI_OPCODE_ISHR: |
||
2124 | case TGSI_OPCODE_USHR: |
||
2125 | case TGSI_OPCODE_SUB: |
||
2126 | case TGSI_OPCODE_XOR: |
||
2127 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2128 | src0 = fetchSrc(0, c); |
||
2129 | src1 = fetchSrc(1, c); |
||
2130 | mkOp2(op, dstTy, dst0[c], src0, src1); |
||
2131 | } |
||
2132 | break; |
||
2133 | case TGSI_OPCODE_MAD: |
||
2134 | case TGSI_OPCODE_UMAD: |
||
2135 | case TGSI_OPCODE_SAD: |
||
2136 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2137 | src0 = fetchSrc(0, c); |
||
2138 | src1 = fetchSrc(1, c); |
||
2139 | src2 = fetchSrc(2, c); |
||
2140 | mkOp3(op, dstTy, dst0[c], src0, src1, src2); |
||
2141 | } |
||
2142 | break; |
||
2143 | case TGSI_OPCODE_MOV: |
||
2144 | case TGSI_OPCODE_ABS: |
||
2145 | case TGSI_OPCODE_CEIL: |
||
2146 | case TGSI_OPCODE_FLR: |
||
2147 | case TGSI_OPCODE_TRUNC: |
||
2148 | case TGSI_OPCODE_RCP: |
||
2149 | case TGSI_OPCODE_IABS: |
||
2150 | case TGSI_OPCODE_INEG: |
||
2151 | case TGSI_OPCODE_NOT: |
||
2152 | case TGSI_OPCODE_DDX: |
||
2153 | case TGSI_OPCODE_DDY: |
||
2154 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2155 | mkOp1(op, dstTy, dst0[c], fetchSrc(0, c)); |
||
2156 | break; |
||
2157 | case TGSI_OPCODE_RSQ: |
||
2158 | src0 = fetchSrc(0, 0); |
||
2159 | val0 = getScratch(); |
||
2160 | mkOp1(OP_ABS, TYPE_F32, val0, src0); |
||
2161 | mkOp1(OP_RSQ, TYPE_F32, val0, val0); |
||
2162 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2163 | mkMov(dst0[c], val0); |
||
2164 | break; |
||
2165 | case TGSI_OPCODE_ARL: |
||
2166 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2167 | src0 = fetchSrc(0, c); |
||
2168 | mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M; |
||
2169 | mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4)); |
||
2170 | } |
||
2171 | break; |
||
2172 | case TGSI_OPCODE_UARL: |
||
2173 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2174 | mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4)); |
||
2175 | break; |
||
2176 | case TGSI_OPCODE_EX2: |
||
2177 | case TGSI_OPCODE_LG2: |
||
2178 | val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0); |
||
2179 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2180 | mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); |
||
2181 | break; |
||
2182 | case TGSI_OPCODE_COS: |
||
2183 | case TGSI_OPCODE_SIN: |
||
2184 | val0 = getScratch(); |
||
2185 | if (mask & 7) { |
||
2186 | mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0)); |
||
2187 | mkOp1(op, TYPE_F32, val0, val0); |
||
2188 | for (c = 0; c < 3; ++c) |
||
2189 | if (dst0[c]) |
||
2190 | mkMov(dst0[c], val0); |
||
2191 | } |
||
2192 | if (dst0[3]) { |
||
2193 | mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3)); |
||
2194 | mkOp1(op, TYPE_F32, dst0[3], val0); |
||
2195 | } |
||
2196 | break; |
||
2197 | case TGSI_OPCODE_SCS: |
||
2198 | if (mask & 3) { |
||
2199 | val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); |
||
2200 | if (dst0[0]) |
||
2201 | mkOp1(OP_COS, TYPE_F32, dst0[0], val0); |
||
2202 | if (dst0[1]) |
||
2203 | mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); |
||
2204 | } |
||
2205 | if (dst0[2]) |
||
2206 | loadImm(dst0[2], 0.0f); |
||
2207 | if (dst0[3]) |
||
2208 | loadImm(dst0[3], 1.0f); |
||
2209 | break; |
||
2210 | case TGSI_OPCODE_EXP: |
||
2211 | src0 = fetchSrc(0, 0); |
||
2212 | val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); |
||
2213 | if (dst0[1]) |
||
2214 | mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0); |
||
2215 | if (dst0[0]) |
||
2216 | mkOp1(OP_EX2, TYPE_F32, dst0[0], val0); |
||
2217 | if (dst0[2]) |
||
2218 | mkOp1(OP_EX2, TYPE_F32, dst0[2], src0); |
||
2219 | if (dst0[3]) |
||
2220 | loadImm(dst0[3], 1.0f); |
||
2221 | break; |
||
2222 | case TGSI_OPCODE_LOG: |
||
2223 | src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0)); |
||
2224 | val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0); |
||
2225 | if (dst0[0] || dst0[1]) |
||
2226 | val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0); |
||
2227 | if (dst0[1]) { |
||
2228 | mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); |
||
2229 | mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); |
||
2230 | mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); |
||
2231 | } |
||
2232 | if (dst0[3]) |
||
2233 | loadImm(dst0[3], 1.0f); |
||
2234 | break; |
||
2235 | case TGSI_OPCODE_DP2: |
||
2236 | val0 = buildDot(2); |
||
2237 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2238 | mkMov(dst0[c], val0); |
||
2239 | break; |
||
2240 | case TGSI_OPCODE_DP3: |
||
2241 | val0 = buildDot(3); |
||
2242 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2243 | mkMov(dst0[c], val0); |
||
2244 | break; |
||
2245 | case TGSI_OPCODE_DP4: |
||
2246 | val0 = buildDot(4); |
||
2247 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2248 | mkMov(dst0[c], val0); |
||
2249 | break; |
||
2250 | case TGSI_OPCODE_DPH: |
||
2251 | val0 = buildDot(3); |
||
2252 | src1 = fetchSrc(1, 3); |
||
2253 | mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); |
||
2254 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2255 | mkMov(dst0[c], val0); |
||
2256 | break; |
||
2257 | case TGSI_OPCODE_DST: |
||
2258 | if (dst0[0]) |
||
2259 | loadImm(dst0[0], 1.0f); |
||
2260 | if (dst0[1]) { |
||
2261 | src0 = fetchSrc(0, 1); |
||
2262 | src1 = fetchSrc(1, 1); |
||
2263 | mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); |
||
2264 | } |
||
2265 | if (dst0[2]) |
||
2266 | mkMov(dst0[2], fetchSrc(0, 2)); |
||
2267 | if (dst0[3]) |
||
2268 | mkMov(dst0[3], fetchSrc(1, 3)); |
||
2269 | break; |
||
2270 | case TGSI_OPCODE_LRP: |
||
2271 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2272 | src0 = fetchSrc(0, c); |
||
2273 | src1 = fetchSrc(1, c); |
||
2274 | src2 = fetchSrc(2, c); |
||
2275 | mkOp3(OP_MAD, TYPE_F32, dst0[c], |
||
2276 | mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); |
||
2277 | } |
||
2278 | break; |
||
2279 | case TGSI_OPCODE_LIT: |
||
2280 | handleLIT(dst0); |
||
2281 | break; |
||
2282 | case TGSI_OPCODE_XPD: |
||
2283 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2284 | if (c < 3) { |
||
2285 | val0 = getSSA(); |
||
2286 | src0 = fetchSrc(1, (c + 1) % 3); |
||
2287 | src1 = fetchSrc(0, (c + 2) % 3); |
||
2288 | mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); |
||
2289 | mkOp1(OP_NEG, TYPE_F32, val0, val0); |
||
2290 | |||
2291 | src0 = fetchSrc(0, (c + 1) % 3); |
||
2292 | src1 = fetchSrc(1, (c + 2) % 3); |
||
2293 | mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); |
||
2294 | } else { |
||
2295 | loadImm(dst0[c], 1.0f); |
||
2296 | } |
||
2297 | } |
||
2298 | break; |
||
2299 | case TGSI_OPCODE_ISSG: |
||
2300 | case TGSI_OPCODE_SSG: |
||
2301 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2302 | src0 = fetchSrc(0, c); |
||
2303 | val0 = getScratch(); |
||
2304 | val1 = getScratch(); |
||
2305 | mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero); |
||
2306 | mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero); |
||
2307 | if (srcTy == TYPE_F32) |
||
2308 | mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1); |
||
2309 | else |
||
2310 | mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); |
||
2311 | } |
||
2312 | break; |
||
2313 | case TGSI_OPCODE_UCMP: |
||
2314 | case TGSI_OPCODE_CMP: |
||
2315 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2316 | src0 = fetchSrc(0, c); |
||
2317 | src1 = fetchSrc(1, c); |
||
2318 | src2 = fetchSrc(2, c); |
||
2319 | if (src1 == src2) |
||
2320 | mkMov(dst0[c], src1); |
||
2321 | else |
||
2322 | mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE, |
||
2323 | srcTy, dst0[c], src1, src2, src0); |
||
2324 | } |
||
2325 | break; |
||
2326 | case TGSI_OPCODE_FRC: |
||
2327 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2328 | src0 = fetchSrc(0, c); |
||
2329 | val0 = getScratch(); |
||
2330 | mkOp1(OP_FLOOR, TYPE_F32, val0, src0); |
||
2331 | mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); |
||
2332 | } |
||
2333 | break; |
||
2334 | case TGSI_OPCODE_ROUND: |
||
2335 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2336 | mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) |
||
2337 | ->rnd = ROUND_NI; |
||
2338 | break; |
||
2339 | case TGSI_OPCODE_CLAMP: |
||
2340 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2341 | src0 = fetchSrc(0, c); |
||
2342 | src1 = fetchSrc(1, c); |
||
2343 | src2 = fetchSrc(2, c); |
||
2344 | val0 = getScratch(); |
||
2345 | mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); |
||
2346 | mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); |
||
2347 | } |
||
2348 | break; |
||
2349 | case TGSI_OPCODE_SLT: |
||
2350 | case TGSI_OPCODE_SGE: |
||
2351 | case TGSI_OPCODE_SEQ: |
||
2352 | case TGSI_OPCODE_SFL: |
||
2353 | case TGSI_OPCODE_SGT: |
||
2354 | case TGSI_OPCODE_SLE: |
||
2355 | case TGSI_OPCODE_SNE: |
||
2356 | case TGSI_OPCODE_STR: |
||
2357 | case TGSI_OPCODE_ISGE: |
||
2358 | case TGSI_OPCODE_ISLT: |
||
2359 | case TGSI_OPCODE_USEQ: |
||
2360 | case TGSI_OPCODE_USGE: |
||
2361 | case TGSI_OPCODE_USLT: |
||
2362 | case TGSI_OPCODE_USNE: |
||
2363 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
||
2364 | src0 = fetchSrc(0, c); |
||
2365 | src1 = fetchSrc(1, c); |
||
2366 | mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1); |
||
2367 | } |
||
2368 | break; |
||
2369 | case TGSI_OPCODE_KILL_IF: |
||
2370 | val0 = new_LValue(func, FILE_PREDICATE); |
||
2371 | for (c = 0; c < 4; ++c) { |
||
2372 | mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero); |
||
2373 | mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0); |
||
2374 | } |
||
2375 | break; |
||
2376 | case TGSI_OPCODE_KILL: |
||
2377 | mkOp(OP_DISCARD, TYPE_NONE, NULL); |
||
2378 | break; |
||
2379 | case TGSI_OPCODE_TEX: |
||
2380 | case TGSI_OPCODE_TXB: |
||
2381 | case TGSI_OPCODE_TXL: |
||
2382 | case TGSI_OPCODE_TXP: |
||
2383 | // R S L C Dx Dy |
||
2384 | handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00); |
||
2385 | break; |
||
2386 | case TGSI_OPCODE_TXD: |
||
2387 | handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20); |
||
2388 | break; |
||
2389 | case TGSI_OPCODE_TEX2: |
||
2390 | handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00); |
||
2391 | break; |
||
2392 | case TGSI_OPCODE_TXB2: |
||
2393 | case TGSI_OPCODE_TXL2: |
||
2394 | handleTEX(dst0, 2, 2, 0x10, 0x11, 0x00, 0x00); |
||
2395 | break; |
||
2396 | case TGSI_OPCODE_SAMPLE: |
||
2397 | case TGSI_OPCODE_SAMPLE_B: |
||
2398 | case TGSI_OPCODE_SAMPLE_D: |
||
2399 | case TGSI_OPCODE_SAMPLE_L: |
||
2400 | case TGSI_OPCODE_SAMPLE_C: |
||
2401 | case TGSI_OPCODE_SAMPLE_C_LZ: |
||
2402 | handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); |
||
2403 | break; |
||
2404 | case TGSI_OPCODE_TXF: |
||
2405 | handleTXF(dst0, 1, 0x03); |
||
2406 | break; |
||
2407 | case TGSI_OPCODE_SAMPLE_I: |
||
2408 | handleTXF(dst0, 1, 0x03); |
||
2409 | break; |
||
2410 | case TGSI_OPCODE_SAMPLE_I_MS: |
||
2411 | handleTXF(dst0, 1, 0x20); |
||
2412 | break; |
||
2413 | case TGSI_OPCODE_TXQ: |
||
2414 | case TGSI_OPCODE_SVIEWINFO: |
||
2415 | handleTXQ(dst0, TXQ_DIMS); |
||
2416 | break; |
||
2417 | case TGSI_OPCODE_F2I: |
||
2418 | case TGSI_OPCODE_F2U: |
||
2419 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2420 | mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z; |
||
2421 | break; |
||
2422 | case TGSI_OPCODE_I2F: |
||
2423 | case TGSI_OPCODE_U2F: |
||
2424 | FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
||
2425 | mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); |
||
2426 | break; |
||
2427 | case TGSI_OPCODE_EMIT: |
||
2428 | case TGSI_OPCODE_ENDPRIM: |
||
2429 | // get vertex stream if specified (must be immediate) |
||
2430 | src0 = tgsi.srcCount() ? |
||
2431 | mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero; |
||
2432 | mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; |
||
2433 | break; |
||
2434 | case TGSI_OPCODE_IF: |
||
2435 | case TGSI_OPCODE_UIF: |
||
2436 | { |
||
2437 | BasicBlock *ifBB = new BasicBlock(func); |
||
2438 | |||
2439 | bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); |
||
2440 | condBBs.push(bb); |
||
2441 | joinBBs.push(bb); |
||
2442 | |||
2443 | mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy); |
||
2444 | |||
2445 | setPosition(ifBB, true); |
||
2446 | } |
||
2447 | break; |
||
2448 | case TGSI_OPCODE_ELSE: |
||
2449 | { |
||
2450 | BasicBlock *elseBB = new BasicBlock(func); |
||
2451 | BasicBlock *forkBB = reinterpret_cast |
||
2452 | |||
2453 | forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); |
||
2454 | condBBs.push(bb); |
||
2455 | |||
2456 | forkBB->getExit()->asFlow()->target.bb = elseBB; |
||
2457 | if (!bb->isTerminated()) |
||
2458 | mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); |
||
2459 | |||
2460 | setPosition(elseBB, true); |
||
2461 | } |
||
2462 | break; |
||
2463 | case TGSI_OPCODE_ENDIF: |
||
2464 | { |
||
2465 | BasicBlock *convBB = new BasicBlock(func); |
||
2466 | BasicBlock *prevBB = reinterpret_cast |
||
2467 | BasicBlock *forkBB = reinterpret_cast |
||
2468 | |||
2469 | if (!bb->isTerminated()) { |
||
2470 | // we only want join if none of the clauses ended with CONT/BREAK/RET |
||
2471 | if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) |
||
2472 | insertConvergenceOps(convBB, forkBB); |
||
2473 | mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL); |
||
2474 | bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); |
||
2475 | } |
||
2476 | |||
2477 | if (prevBB->getExit()->op == OP_BRA) { |
||
2478 | prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); |
||
2479 | prevBB->getExit()->asFlow()->target.bb = convBB; |
||
2480 | } |
||
2481 | setPosition(convBB, true); |
||
2482 | } |
||
2483 | break; |
||
2484 | case TGSI_OPCODE_BGNLOOP: |
||
2485 | { |
||
2486 | BasicBlock *lbgnBB = new BasicBlock(func); |
||
2487 | BasicBlock *lbrkBB = new BasicBlock(func); |
||
2488 | |||
2489 | loopBBs.push(lbgnBB); |
||
2490 | breakBBs.push(lbrkBB); |
||
2491 | if (loopBBs.getSize() > func->loopNestingBound) |
||
2492 | func->loopNestingBound++; |
||
2493 | |||
2494 | mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL); |
||
2495 | |||
2496 | bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE); |
||
2497 | setPosition(lbgnBB, true); |
||
2498 | mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL); |
||
2499 | } |
||
2500 | break; |
||
2501 | case TGSI_OPCODE_ENDLOOP: |
||
2502 | { |
||
2503 | BasicBlock *loopBB = reinterpret_cast |
||
2504 | |||
2505 | if (!bb->isTerminated()) { |
||
2506 | mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); |
||
2507 | bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); |
||
2508 | } |
||
2509 | setPosition(reinterpret_cast |
||
2510 | } |
||
2511 | break; |
||
2512 | case TGSI_OPCODE_BRK: |
||
2513 | { |
||
2514 | if (bb->isTerminated()) |
||
2515 | break; |
||
2516 | BasicBlock *brkBB = reinterpret_cast |
||
2517 | mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL); |
||
2518 | bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS); |
||
2519 | } |
||
2520 | break; |
||
2521 | case TGSI_OPCODE_CONT: |
||
2522 | { |
||
2523 | if (bb->isTerminated()) |
||
2524 | break; |
||
2525 | BasicBlock *contBB = reinterpret_cast |
||
2526 | mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); |
||
2527 | contBB->explicitCont = true; |
||
2528 | bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); |
||
2529 | } |
||
2530 | break; |
||
2531 | case TGSI_OPCODE_BGNSUB: |
||
2532 | { |
||
2533 | Subroutine *s = getSubroutine(ip); |
||
2534 | BasicBlock *entry = new BasicBlock(s->f); |
||
2535 | BasicBlock *leave = new BasicBlock(s->f); |
||
2536 | |||
2537 | // multiple entrypoints possible, keep the graph connected |
||
2538 | if (prog->getType() == Program::TYPE_COMPUTE) |
||
2539 | prog->main->call.attach(&s->f->call, Graph::Edge::TREE); |
||
2540 | |||
2541 | sub.cur = s; |
||
2542 | s->f->setEntry(entry); |
||
2543 | s->f->setExit(leave); |
||
2544 | setPosition(entry, true); |
||
2545 | return true; |
||
2546 | } |
||
2547 | case TGSI_OPCODE_ENDSUB: |
||
2548 | { |
||
2549 | sub.cur = getSubroutine(prog->main); |
||
2550 | setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true); |
||
2551 | return true; |
||
2552 | } |
||
2553 | case TGSI_OPCODE_CAL: |
||
2554 | { |
||
2555 | Subroutine *s = getSubroutine(tgsi.getLabel()); |
||
2556 | mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL); |
||
2557 | func->call.attach(&s->f->call, Graph::Edge::TREE); |
||
2558 | return true; |
||
2559 | } |
||
2560 | case TGSI_OPCODE_RET: |
||
2561 | { |
||
2562 | if (bb->isTerminated()) |
||
2563 | return true; |
||
2564 | BasicBlock *leave = BasicBlock::get(func->cfgExit); |
||
2565 | |||
2566 | if (!isEndOfSubroutine(ip + 1)) { |
||
2567 | // insert a PRERET at the entry if this is an early return |
||
2568 | // (only needed for sharing code in the epilogue) |
||
2569 | BasicBlock *pos = getBB(); |
||
2570 | setPosition(BasicBlock::get(func->cfg.getRoot()), false); |
||
2571 | mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; |
||
2572 | setPosition(pos, true); |
||
2573 | } |
||
2574 | mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1; |
||
2575 | bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS); |
||
2576 | } |
||
2577 | break; |
||
2578 | case TGSI_OPCODE_END: |
||
2579 | { |
||
2580 | // attach and generate epilogue code |
||
2581 | BasicBlock *epilogue = BasicBlock::get(func->cfgExit); |
||
2582 | bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); |
||
2583 | setPosition(epilogue, true); |
||
2584 | if (prog->getType() == Program::TYPE_FRAGMENT) |
||
2585 | exportOutputs(); |
||
2586 | if (info->io.genUserClip > 0) |
||
2587 | handleUserClipPlanes(); |
||
2588 | mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; |
||
2589 | } |
||
2590 | break; |
||
2591 | case TGSI_OPCODE_SWITCH: |
||
2592 | case TGSI_OPCODE_CASE: |
||
2593 | ERROR("switch/case opcode encountered, should have been lowered\n"); |
||
2594 | abort(); |
||
2595 | break; |
||
2596 | case TGSI_OPCODE_LOAD: |
||
2597 | handleLOAD(dst0); |
||
2598 | break; |
||
2599 | case TGSI_OPCODE_STORE: |
||
2600 | handleSTORE(); |
||
2601 | break; |
||
2602 | case TGSI_OPCODE_BARRIER: |
||
2603 | geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); |
||
2604 | geni->fixed = 1; |
||
2605 | geni->subOp = NV50_IR_SUBOP_BAR_SYNC; |
||
2606 | break; |
||
2607 | case TGSI_OPCODE_MFENCE: |
||
2608 | case TGSI_OPCODE_LFENCE: |
||
2609 | case TGSI_OPCODE_SFENCE: |
||
2610 | geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); |
||
2611 | geni->fixed = 1; |
||
2612 | geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); |
||
2613 | break; |
||
2614 | case TGSI_OPCODE_ATOMUADD: |
||
2615 | case TGSI_OPCODE_ATOMXCHG: |
||
2616 | case TGSI_OPCODE_ATOMCAS: |
||
2617 | case TGSI_OPCODE_ATOMAND: |
||
2618 | case TGSI_OPCODE_ATOMOR: |
||
2619 | case TGSI_OPCODE_ATOMXOR: |
||
2620 | case TGSI_OPCODE_ATOMUMIN: |
||
2621 | case TGSI_OPCODE_ATOMIMIN: |
||
2622 | case TGSI_OPCODE_ATOMUMAX: |
||
2623 | case TGSI_OPCODE_ATOMIMAX: |
||
2624 | handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); |
||
2625 | break; |
||
2626 | default: |
||
2627 | ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); |
||
2628 | assert(0); |
||
2629 | break; |
||
2630 | } |
||
2631 | |||
2632 | if (tgsi.dstCount()) { |
||
2633 | for (c = 0; c < 4; ++c) { |
||
2634 | if (!dst0[c]) |
||
2635 | continue; |
||
2636 | if (dst0[c] != rDst0[c]) |
||
2637 | mkMov(rDst0[c], dst0[c]); |
||
2638 | storeDst(0, c, rDst0[c]); |
||
2639 | } |
||
2640 | } |
||
2641 | vtxBaseValid = 0; |
||
2642 | |||
2643 | return true; |
||
2644 | } |
||
2645 | |||
2646 | void |
||
2647 | Converter::handleUserClipPlanes() |
||
2648 | { |
||
2649 | Value *res[8]; |
||
2650 | int n, i, c; |
||
2651 | |||
2652 | for (c = 0; c < 4; ++c) { |
||
2653 | for (i = 0; i < info->io.genUserClip; ++i) { |
||
2654 | Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot, |
||
2655 | TYPE_F32, info->io.ucpBase + i * 16 + c * 4); |
||
2656 | Value *ucp = mkLoadv(TYPE_F32, sym, NULL); |
||
2657 | if (c == 0) |
||
2658 | res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); |
||
2659 | else |
||
2660 | mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); |
||
2661 | } |
||
2662 | } |
||
2663 | |||
2664 | const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; |
||
2665 | |||
2666 | for (i = 0; i < info->io.genUserClip; ++i) { |
||
2667 | n = i / 4 + first; |
||
2668 | c = i % 4; |
||
2669 | Symbol *sym = |
||
2670 | mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); |
||
2671 | mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); |
||
2672 | } |
||
2673 | } |
||
2674 | |||
2675 | void |
||
2676 | Converter::exportOutputs() |
||
2677 | { |
||
2678 | for (unsigned int i = 0; i < info->numOutputs; ++i) { |
||
2679 | for (unsigned int c = 0; c < 4; ++c) { |
||
2680 | if (!oData.exists(sub.cur->values, i, c)) |
||
2681 | continue; |
||
2682 | Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, |
||
2683 | info->out[i].slot[c] * 4); |
||
2684 | Value *val = oData.load(sub.cur->values, i, c, NULL); |
||
2685 | if (val) |
||
2686 | mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); |
||
2687 | } |
||
2688 | } |
||
2689 | } |
||
2690 | |||
2691 | Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), |
||
2692 | code(code), |
||
2693 | tgsi(NULL), |
||
2694 | tData(this), aData(this), pData(this), oData(this) |
||
2695 | { |
||
2696 | info = code->info; |
||
2697 | |||
2698 | const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR; |
||
2699 | |||
2700 | const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); |
||
2701 | const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); |
||
2702 | const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); |
||
2703 | const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); |
||
2704 | |||
2705 | tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); |
||
2706 | pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); |
||
2707 | aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0); |
||
2708 | oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); |
||
2709 | |||
2710 | zero = mkImm((uint32_t)0); |
||
2711 | |||
2712 | vtxBaseValid = 0; |
||
2713 | } |
||
2714 | |||
2715 | Converter::~Converter() |
||
2716 | { |
||
2717 | } |
||
2718 | |||
2719 | inline const Converter::Location * |
||
2720 | Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v) |
||
2721 | { |
||
2722 | ValueMap::l_iterator it = s->values.l.find(v); |
||
2723 | return it == s->values.l.end() ? NULL : &it->second; |
||
2724 | } |
||
2725 | |||
2726 | template |
||
2727 | Converter::BindArgumentsPass::updateCallArgs( |
||
2728 | Instruction *i, void (Instruction::*setArg)(int, Value *), |
||
2729 | T (Function::*proto)) |
||
2730 | { |
||
2731 | Function *g = i->asFlow()->target.fn; |
||
2732 | Subroutine *subg = conv.getSubroutine(g); |
||
2733 | |||
2734 | for (unsigned a = 0; a < (g->*proto).size(); ++a) { |
||
2735 | Value *v = (g->*proto)[a].get(); |
||
2736 | const Converter::Location &l = *getValueLocation(subg, v); |
||
2737 | Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx); |
||
2738 | |||
2739 | (i->*setArg)(a, array->acquire(sub->values, l.i, l.c)); |
||
2740 | } |
||
2741 | } |
||
2742 | |||
2743 | template |
||
2744 | Converter::BindArgumentsPass::updatePrototype( |
||
2745 | BitSet *set, void (Function::*updateSet)(), T (Function::*proto)) |
||
2746 | { |
||
2747 | (func->*updateSet)(); |
||
2748 | |||
2749 | for (unsigned i = 0; i < set->getSize(); ++i) { |
||
2750 | Value *v = func->getLValue(i); |
||
2751 | const Converter::Location *l = getValueLocation(sub, v); |
||
2752 | |||
2753 | // only include values with a matching TGSI register |
||
2754 | if (set->test(i) && l && !conv.code->locals.count(*l)) |
||
2755 | (func->*proto).push_back(v); |
||
2756 | } |
||
2757 | } |
||
2758 | |||
2759 | bool |
||
2760 | Converter::BindArgumentsPass::visit(Function *f) |
||
2761 | { |
||
2762 | sub = conv.getSubroutine(f); |
||
2763 | |||
2764 | for (ArrayList::Iterator bi = f->allBBlocks.iterator(); |
||
2765 | !bi.end(); bi.next()) { |
||
2766 | for (Instruction *i = BasicBlock::get(bi)->getFirst(); |
||
2767 | i; i = i->next) { |
||
2768 | if (i->op == OP_CALL && !i->asFlow()->builtin) { |
||
2769 | updateCallArgs(i, &Instruction::setSrc, &Function::ins); |
||
2770 | updateCallArgs(i, &Instruction::setDef, &Function::outs); |
||
2771 | } |
||
2772 | } |
||
2773 | } |
||
2774 | |||
2775 | if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE) |
||
2776 | return true; |
||
2777 | updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet, |
||
2778 | &Function::buildLiveSets, &Function::ins); |
||
2779 | updatePrototype(&BasicBlock::get(f->cfgExit)->defSet, |
||
2780 | &Function::buildDefSets, &Function::outs); |
||
2781 | |||
2782 | return true; |
||
2783 | } |
||
2784 | |||
2785 | bool |
||
2786 | Converter::run() |
||
2787 | { |
||
2788 | BasicBlock *entry = new BasicBlock(prog->main); |
||
2789 | BasicBlock *leave = new BasicBlock(prog->main); |
||
2790 | |||
2791 | prog->main->setEntry(entry); |
||
2792 | prog->main->setExit(leave); |
||
2793 | |||
2794 | setPosition(entry, true); |
||
2795 | sub.cur = getSubroutine(prog->main); |
||
2796 | |||
2797 | if (info->io.genUserClip > 0) { |
||
2798 | for (int c = 0; c < 4; ++c) |
||
2799 | clipVtx[c] = getScratch(); |
||
2800 | } |
||
2801 | |||
2802 | if (prog->getType() == Program::TYPE_FRAGMENT) { |
||
2803 | Symbol *sv = mkSysVal(SV_POSITION, 3); |
||
2804 | fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); |
||
2805 | mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); |
||
2806 | } |
||
2807 | |||
2808 | for (ip = 0; ip < code->scan.num_instructions; ++ip) { |
||
2809 | if (!handleInstruction(&code->insns[ip])) |
||
2810 | return false; |
||
2811 | } |
||
2812 | |||
2813 | if (!BindArgumentsPass(*this).run(prog)) |
||
2814 | return false; |
||
2815 | |||
2816 | return true; |
||
2817 | } |
||
2818 | |||
2819 | } // unnamed namespace |
||
2820 | |||
2821 | namespace nv50_ir { |
||
2822 | |||
2823 | bool |
||
2824 | Program::makeFromTGSI(struct nv50_ir_prog_info *info) |
||
2825 | { |
||
2826 | tgsi::Source src(info); |
||
2827 | if (!src.scanSource()) |
||
2828 | return false; |
||
2829 | tlsSize = info->bin.tlsSpace; |
||
2830 | |||
2831 | Converter builder(this, &src); |
||
2832 | return builder.run(); |
||
2833 | } |
||
2834 | |||
2835 | } // namespace nv50_ir>>>>>>>>>>>>>>>>>>>>>>>><>>><>>>><>><>><>><>><>>>>><>>>><>>>>><>>>><>>><>><>>><>>>>><>><>><>><>><>>><>=>><>>>>>=>=>=>><>>=>=>=>>=>>>>><>>>>>><> |