Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2011 Christoph Bumiller |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | #include "codegen/nv50_ir.h" |
||
24 | #include "codegen/nv50_ir_target_nv50.h" |
||
25 | |||
26 | namespace nv50_ir { |
||
27 | |||
28 | #define NV50_OP_ENC_LONG 0 |
||
29 | #define NV50_OP_ENC_SHORT 1 |
||
30 | #define NV50_OP_ENC_IMM 2 |
||
31 | #define NV50_OP_ENC_LONG_ALT 3 |
||
32 | |||
33 | class CodeEmitterNV50 : public CodeEmitter |
||
34 | { |
||
35 | public: |
||
36 | CodeEmitterNV50(const TargetNV50 *); |
||
37 | |||
38 | virtual bool emitInstruction(Instruction *); |
||
39 | |||
40 | virtual uint32_t getMinEncodingSize(const Instruction *) const; |
||
41 | |||
42 | inline void setProgramType(Program::Type pType) { progType = pType; } |
||
43 | |||
44 | virtual void prepareEmission(Function *); |
||
45 | |||
46 | private: |
||
47 | Program::Type progType; |
||
48 | |||
49 | const TargetNV50 *targNV50; |
||
50 | |||
51 | private: |
||
52 | inline void defId(const ValueDef&, const int pos); |
||
53 | inline void srcId(const ValueRef&, const int pos); |
||
54 | inline void srcId(const ValueRef *, const int pos); |
||
55 | |||
56 | inline void srcAddr16(const ValueRef&, bool adj, const int pos); |
||
57 | inline void srcAddr8(const ValueRef&, const int pos); |
||
58 | |||
59 | void emitFlagsRd(const Instruction *); |
||
60 | void emitFlagsWr(const Instruction *); |
||
61 | |||
62 | void emitCondCode(CondCode cc, DataType ty, int pos); |
||
63 | |||
64 | inline void setARegBits(unsigned int); |
||
65 | |||
66 | void setAReg16(const Instruction *, int s); |
||
67 | void setImmediate(const Instruction *, int s); |
||
68 | |||
69 | void setDst(const Value *); |
||
70 | void setDst(const Instruction *, int d); |
||
71 | void setSrcFileBits(const Instruction *, int enc); |
||
72 | void setSrc(const Instruction *, unsigned int s, int slot); |
||
73 | |||
74 | void emitForm_MAD(const Instruction *); |
||
75 | void emitForm_ADD(const Instruction *); |
||
76 | void emitForm_MUL(const Instruction *); |
||
77 | void emitForm_IMM(const Instruction *); |
||
78 | |||
79 | void emitLoadStoreSizeLG(DataType ty, int pos); |
||
80 | void emitLoadStoreSizeCS(DataType ty); |
||
81 | |||
82 | void roundMode_MAD(const Instruction *); |
||
83 | void roundMode_CVT(RoundMode); |
||
84 | |||
85 | void emitMNeg12(const Instruction *); |
||
86 | |||
87 | void emitLOAD(const Instruction *); |
||
88 | void emitSTORE(const Instruction *); |
||
89 | void emitMOV(const Instruction *); |
||
90 | void emitRDSV(const Instruction *); |
||
91 | void emitNOP(); |
||
92 | void emitINTERP(const Instruction *); |
||
93 | void emitPFETCH(const Instruction *); |
||
94 | void emitOUT(const Instruction *); |
||
95 | |||
96 | void emitUADD(const Instruction *); |
||
97 | void emitAADD(const Instruction *); |
||
98 | void emitFADD(const Instruction *); |
||
99 | void emitIMUL(const Instruction *); |
||
100 | void emitFMUL(const Instruction *); |
||
101 | void emitFMAD(const Instruction *); |
||
102 | void emitIMAD(const Instruction *); |
||
103 | void emitISAD(const Instruction *); |
||
104 | |||
105 | void emitMINMAX(const Instruction *); |
||
106 | |||
107 | void emitPreOp(const Instruction *); |
||
108 | void emitSFnOp(const Instruction *, uint8_t subOp); |
||
109 | |||
110 | void emitShift(const Instruction *); |
||
111 | void emitARL(const Instruction *, unsigned int shl); |
||
112 | void emitLogicOp(const Instruction *); |
||
113 | void emitNOT(const Instruction *); |
||
114 | |||
115 | void emitCVT(const Instruction *); |
||
116 | void emitSET(const Instruction *); |
||
117 | |||
118 | void emitTEX(const TexInstruction *); |
||
119 | void emitTXQ(const TexInstruction *); |
||
120 | void emitTEXPREP(const TexInstruction *); |
||
121 | |||
122 | void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp); |
||
123 | |||
124 | void emitFlow(const Instruction *, uint8_t flowOp); |
||
125 | void emitPRERETEmu(const FlowInstruction *); |
||
126 | void emitBAR(const Instruction *); |
||
127 | |||
128 | void emitATOM(const Instruction *); |
||
129 | }; |
||
130 | |||
131 | #define SDATA(a) ((a).rep()->reg.data) |
||
132 | #define DDATA(a) ((a).rep()->reg.data) |
||
133 | |||
134 | void CodeEmitterNV50::srcId(const ValueRef& src, const int pos) |
||
135 | { |
||
136 | assert(src.get()); |
||
137 | code[pos / 32] |= SDATA(src).id << (pos % 32); |
||
138 | } |
||
139 | |||
140 | void CodeEmitterNV50::srcId(const ValueRef *src, const int pos) |
||
141 | { |
||
142 | assert(src->get()); |
||
143 | code[pos / 32] |= SDATA(*src).id << (pos % 32); |
||
144 | } |
||
145 | |||
146 | void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos) |
||
147 | { |
||
148 | assert(src.get()); |
||
149 | |||
150 | int32_t offset = SDATA(src).offset; |
||
151 | |||
152 | assert(!adj || src.get()->reg.size <= 4); |
||
153 | if (adj) |
||
154 | offset /= src.get()->reg.size; |
||
155 | |||
156 | assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16); |
||
157 | |||
158 | if (offset < 0) |
||
159 | offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff; |
||
160 | |||
161 | code[pos / 32] |= offset << (pos % 32); |
||
162 | } |
||
163 | |||
164 | void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos) |
||
165 | { |
||
166 | assert(src.get()); |
||
167 | |||
168 | uint32_t offset = SDATA(src).offset; |
||
169 | |||
170 | assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3)); |
||
171 | |||
172 | code[pos / 32] |= (offset >> 2) << (pos % 32); |
||
173 | } |
||
174 | |||
175 | void CodeEmitterNV50::defId(const ValueDef& def, const int pos) |
||
176 | { |
||
177 | assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT); |
||
178 | |||
179 | code[pos / 32] |= DDATA(def).id << (pos % 32); |
||
180 | } |
||
181 | |||
182 | void |
||
183 | CodeEmitterNV50::roundMode_MAD(const Instruction *insn) |
||
184 | { |
||
185 | switch (insn->rnd) { |
||
186 | case ROUND_M: code[1] |= 1 << 22; break; |
||
187 | case ROUND_P: code[1] |= 2 << 22; break; |
||
188 | case ROUND_Z: code[1] |= 3 << 22; break; |
||
189 | default: |
||
190 | assert(insn->rnd == ROUND_N); |
||
191 | break; |
||
192 | } |
||
193 | } |
||
194 | |||
195 | void |
||
196 | CodeEmitterNV50::emitMNeg12(const Instruction *i) |
||
197 | { |
||
198 | code[1] |= i->src(0).mod.neg() << 26; |
||
199 | code[1] |= i->src(1).mod.neg() << 27; |
||
200 | } |
||
201 | |||
202 | void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos) |
||
203 | { |
||
204 | uint8_t enc; |
||
205 | |||
206 | assert(pos >= 32 || pos <= 27); |
||
207 | |||
208 | switch (cc) { |
||
209 | case CC_LT: enc = 0x1; break; |
||
210 | case CC_LTU: enc = 0x9; break; |
||
211 | case CC_EQ: enc = 0x2; break; |
||
212 | case CC_EQU: enc = 0xa; break; |
||
213 | case CC_LE: enc = 0x3; break; |
||
214 | case CC_LEU: enc = 0xb; break; |
||
215 | case CC_GT: enc = 0x4; break; |
||
216 | case CC_GTU: enc = 0xc; break; |
||
217 | case CC_NE: enc = 0x5; break; |
||
218 | case CC_NEU: enc = 0xd; break; |
||
219 | case CC_GE: enc = 0x6; break; |
||
220 | case CC_GEU: enc = 0xe; break; |
||
221 | case CC_TR: enc = 0xf; break; |
||
222 | case CC_FL: enc = 0x0; break; |
||
223 | |||
224 | case CC_O: enc = 0x10; break; |
||
225 | case CC_C: enc = 0x11; break; |
||
226 | case CC_A: enc = 0x12; break; |
||
227 | case CC_S: enc = 0x13; break; |
||
228 | case CC_NS: enc = 0x1c; break; |
||
229 | case CC_NA: enc = 0x1d; break; |
||
230 | case CC_NC: enc = 0x1e; break; |
||
231 | case CC_NO: enc = 0x1f; break; |
||
232 | |||
233 | default: |
||
234 | enc = 0; |
||
235 | assert(!"invalid condition code"); |
||
236 | break; |
||
237 | } |
||
238 | if (ty != TYPE_NONE && !isFloatType(ty)) |
||
239 | enc &= ~0x8; // unordered only exists for float types |
||
240 | |||
241 | code[pos / 32] |= enc << (pos % 32); |
||
242 | } |
||
243 | |||
244 | void |
||
245 | CodeEmitterNV50::emitFlagsRd(const Instruction *i) |
||
246 | { |
||
247 | int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc; |
||
248 | |||
249 | assert(!(code[1] & 0x00003f80)); |
||
250 | |||
251 | if (s >= 0) { |
||
252 | assert(i->getSrc(s)->reg.file == FILE_FLAGS); |
||
253 | emitCondCode(i->cc, TYPE_NONE, 32 + 7); |
||
254 | srcId(i->src(s), 32 + 12); |
||
255 | } else { |
||
256 | code[1] |= 0x0780; |
||
257 | } |
||
258 | } |
||
259 | |||
260 | void |
||
261 | CodeEmitterNV50::emitFlagsWr(const Instruction *i) |
||
262 | { |
||
263 | assert(!(code[1] & 0x70)); |
||
264 | |||
265 | int flagsDef = i->flagsDef; |
||
266 | |||
267 | // find flags definition and check that it is the last def |
||
268 | if (flagsDef < 0) { |
||
269 | for (int d = 0; i->defExists(d); ++d) |
||
270 | if (i->def(d).getFile() == FILE_FLAGS) |
||
271 | flagsDef = d; |
||
272 | if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point |
||
273 | WARN("Instruction::flagsDef was not set properly\n"); |
||
274 | } |
||
275 | if (flagsDef == 0 && i->defExists(1)) |
||
276 | WARN("flags def should not be the primary definition\n"); |
||
277 | |||
278 | if (flagsDef >= 0) |
||
279 | code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40; |
||
280 | |||
281 | } |
||
282 | |||
283 | void |
||
284 | CodeEmitterNV50::setARegBits(unsigned int u) |
||
285 | { |
||
286 | code[0] |= (u & 3) << 26; |
||
287 | code[1] |= (u & 4); |
||
288 | } |
||
289 | |||
290 | void |
||
291 | CodeEmitterNV50::setAReg16(const Instruction *i, int s) |
||
292 | { |
||
293 | if (i->srcExists(s)) { |
||
294 | s = i->src(s).indirect[0]; |
||
295 | if (s >= 0) |
||
296 | setARegBits(SDATA(i->src(s)).id + 1); |
||
297 | } |
||
298 | } |
||
299 | |||
300 | void |
||
301 | CodeEmitterNV50::setImmediate(const Instruction *i, int s) |
||
302 | { |
||
303 | const ImmediateValue *imm = i->src(s).get()->asImm(); |
||
304 | assert(imm); |
||
305 | |||
306 | uint32_t u = imm->reg.data.u32; |
||
307 | |||
308 | if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) |
||
309 | u = ~u; |
||
310 | |||
311 | code[1] |= 3; |
||
312 | code[0] |= (u & 0x3f) << 16; |
||
313 | code[1] |= (u >> 6) << 2; |
||
314 | } |
||
315 | |||
316 | void |
||
317 | CodeEmitterNV50::setDst(const Value *dst) |
||
318 | { |
||
319 | const Storage *reg = &dst->join->reg; |
||
320 | |||
321 | assert(reg->file != FILE_ADDRESS); |
||
322 | |||
323 | if (reg->data.id < 0 || reg->file == FILE_FLAGS) { |
||
324 | code[0] |= (127 << 2) | 1; |
||
325 | code[1] |= 8; |
||
326 | } else { |
||
327 | int id; |
||
328 | if (reg->file == FILE_SHADER_OUTPUT) { |
||
329 | code[1] |= 8; |
||
330 | id = reg->data.offset / 4; |
||
331 | } else { |
||
332 | id = reg->data.id; |
||
333 | } |
||
334 | code[0] |= id << 2; |
||
335 | } |
||
336 | } |
||
337 | |||
338 | void |
||
339 | CodeEmitterNV50::setDst(const Instruction *i, int d) |
||
340 | { |
||
341 | if (i->defExists(d)) { |
||
342 | setDst(i->getDef(d)); |
||
343 | } else |
||
344 | if (!d) { |
||
345 | code[0] |= 0x01fc; // bit bucket |
||
346 | code[1] |= 0x0008; |
||
347 | } |
||
348 | } |
||
349 | |||
350 | // 3 * 2 bits: |
||
351 | // 0: r |
||
352 | // 1: a/s |
||
353 | // 2: c |
||
354 | // 3: i |
||
355 | void |
||
356 | CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc) |
||
357 | { |
||
358 | uint8_t mode = 0; |
||
359 | |||
360 | for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) { |
||
361 | switch (i->src(s).getFile()) { |
||
362 | case FILE_GPR: |
||
363 | break; |
||
364 | case FILE_MEMORY_SHARED: |
||
365 | case FILE_SHADER_INPUT: |
||
366 | mode |= 1 << (s * 2); |
||
367 | break; |
||
368 | case FILE_MEMORY_CONST: |
||
369 | mode |= 2 << (s * 2); |
||
370 | break; |
||
371 | case FILE_IMMEDIATE: |
||
372 | mode |= 3 << (s * 2); |
||
373 | break; |
||
374 | default: |
||
375 | ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); |
||
376 | assert(0); |
||
377 | break; |
||
378 | } |
||
379 | } |
||
380 | switch (mode) { |
||
381 | case 0x00: // rrr |
||
382 | break; |
||
383 | case 0x01: // arr/grr |
||
384 | if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { |
||
385 | code[0] |= 0x01800000; |
||
386 | if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT) |
||
387 | code[1] |= 0x00200000; |
||
388 | } else { |
||
389 | if (enc == NV50_OP_ENC_SHORT) |
||
390 | code[0] |= 0x01000000; |
||
391 | else |
||
392 | code[1] |= 0x00200000; |
||
393 | } |
||
394 | break; |
||
395 | case 0x03: // irr |
||
396 | assert(i->op == OP_MOV); |
||
397 | return; |
||
398 | case 0x0c: // rir |
||
399 | break; |
||
400 | case 0x0d: // gir |
||
401 | assert(progType == Program::TYPE_GEOMETRY || |
||
402 | progType == Program::TYPE_COMPUTE); |
||
403 | code[0] |= 0x01000000; |
||
404 | if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { |
||
405 | int reg = i->src(0).getIndirect(0)->rep()->reg.data.id; |
||
406 | assert(reg < 3); |
||
407 | code[0] |= (reg + 1) << 26; |
||
408 | } |
||
409 | break; |
||
410 | case 0x08: // rcr |
||
411 | code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; |
||
412 | code[1] |= (i->getSrc(1)->reg.fileIndex << 22); |
||
413 | break; |
||
414 | case 0x09: // acr/gcr |
||
415 | if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { |
||
416 | code[0] |= 0x01800000; |
||
417 | } else { |
||
418 | code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; |
||
419 | code[1] |= 0x00200000; |
||
420 | } |
||
421 | code[1] |= (i->getSrc(1)->reg.fileIndex << 22); |
||
422 | break; |
||
423 | case 0x20: // rrc |
||
424 | code[0] |= 0x01000000; |
||
425 | code[1] |= (i->getSrc(2)->reg.fileIndex << 22); |
||
426 | break; |
||
427 | case 0x21: // arc |
||
428 | code[0] |= 0x01000000; |
||
429 | code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22); |
||
430 | assert(progType != Program::TYPE_GEOMETRY); |
||
431 | break; |
||
432 | default: |
||
433 | ERROR("not encodable: %x\n", mode); |
||
434 | assert(0); |
||
435 | break; |
||
436 | } |
||
437 | if (progType != Program::TYPE_COMPUTE) |
||
438 | return; |
||
439 | |||
440 | if ((mode & 3) == 1) { |
||
441 | const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14; |
||
442 | |||
443 | switch (i->getSrc(0)->reg.type) { |
||
444 | case TYPE_U8: |
||
445 | break; |
||
446 | case TYPE_U16: |
||
447 | code[0] |= 1 << pos; |
||
448 | break; |
||
449 | case TYPE_S16: |
||
450 | code[0] |= 2 << pos; |
||
451 | break; |
||
452 | default: |
||
453 | code[0] |= 3 << pos; |
||
454 | assert(i->getSrc(0)->reg.size == 4); |
||
455 | break; |
||
456 | } |
||
457 | } |
||
458 | } |
||
459 | |||
460 | void |
||
461 | CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot) |
||
462 | { |
||
463 | if (Target::operationSrcNr[i->op] <= s) |
||
464 | return; |
||
465 | const Storage *reg = &i->src(s).rep()->reg; |
||
466 | |||
467 | unsigned int id = (reg->file == FILE_GPR) ? |
||
468 | reg->data.id : |
||
469 | reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here |
||
470 | |||
471 | switch (slot) { |
||
472 | case 0: code[0] |= id << 9; break; |
||
473 | case 1: code[0] |= id << 16; break; |
||
474 | case 2: code[1] |= id << 14; break; |
||
475 | default: |
||
476 | assert(0); |
||
477 | break; |
||
478 | } |
||
479 | } |
||
480 | |||
481 | // the default form: |
||
482 | // - long instruction |
||
483 | // - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr) |
||
484 | // - address & flags |
||
485 | void |
||
486 | CodeEmitterNV50::emitForm_MAD(const Instruction *i) |
||
487 | { |
||
488 | assert(i->encSize == 8); |
||
489 | code[0] |= 1; |
||
490 | |||
491 | emitFlagsRd(i); |
||
492 | emitFlagsWr(i); |
||
493 | |||
494 | setDst(i, 0); |
||
495 | |||
496 | setSrcFileBits(i, NV50_OP_ENC_LONG); |
||
497 | setSrc(i, 0, 0); |
||
498 | setSrc(i, 1, 1); |
||
499 | setSrc(i, 2, 2); |
||
500 | |||
501 | if (i->getIndirect(0, 0)) { |
||
502 | assert(!i->getIndirect(1, 0)); |
||
503 | setAReg16(i, 0); |
||
504 | } else { |
||
505 | setAReg16(i, 1); |
||
506 | } |
||
507 | } |
||
508 | |||
509 | // like default form, but 2nd source in slot 2, and no 3rd source |
||
510 | void |
||
511 | CodeEmitterNV50::emitForm_ADD(const Instruction *i) |
||
512 | { |
||
513 | assert(i->encSize == 8); |
||
514 | code[0] |= 1; |
||
515 | |||
516 | emitFlagsRd(i); |
||
517 | emitFlagsWr(i); |
||
518 | |||
519 | setDst(i, 0); |
||
520 | |||
521 | setSrcFileBits(i, NV50_OP_ENC_LONG_ALT); |
||
522 | setSrc(i, 0, 0); |
||
523 | setSrc(i, 1, 2); |
||
524 | |||
525 | if (i->getIndirect(0, 0)) { |
||
526 | assert(!i->getIndirect(1, 0)); |
||
527 | setAReg16(i, 0); |
||
528 | } else { |
||
529 | setAReg16(i, 1); |
||
530 | } |
||
531 | } |
||
532 | |||
533 | // default short form (rr, ar, rc, gr) |
||
534 | void |
||
535 | CodeEmitterNV50::emitForm_MUL(const Instruction *i) |
||
536 | { |
||
537 | assert(i->encSize == 4 && !(code[0] & 1)); |
||
538 | assert(i->defExists(0)); |
||
539 | assert(!i->getPredicate()); |
||
540 | |||
541 | setDst(i, 0); |
||
542 | |||
543 | setSrcFileBits(i, NV50_OP_ENC_SHORT); |
||
544 | setSrc(i, 0, 0); |
||
545 | setSrc(i, 1, 1); |
||
546 | } |
||
547 | |||
548 | // usual immediate form |
||
549 | // - 1 to 3 sources where last is immediate (rir, gir) |
||
550 | // - no address or predicate possible |
||
551 | void |
||
552 | CodeEmitterNV50::emitForm_IMM(const Instruction *i) |
||
553 | { |
||
554 | assert(i->encSize == 8); |
||
555 | code[0] |= 1; |
||
556 | |||
557 | assert(i->defExists(0) && i->srcExists(0)); |
||
558 | |||
559 | setDst(i, 0); |
||
560 | |||
561 | setSrcFileBits(i, NV50_OP_ENC_IMM); |
||
562 | if (Target::operationSrcNr[i->op] > 1) { |
||
563 | setSrc(i, 0, 0); |
||
564 | setImmediate(i, 1); |
||
565 | setSrc(i, 2, 1); |
||
566 | } else { |
||
567 | setImmediate(i, 0); |
||
568 | } |
||
569 | } |
||
570 | |||
571 | void |
||
572 | CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos) |
||
573 | { |
||
574 | uint8_t enc; |
||
575 | |||
576 | switch (ty) { |
||
577 | case TYPE_F32: // fall through |
||
578 | case TYPE_S32: // fall through |
||
579 | case TYPE_U32: enc = 0x6; break; |
||
580 | case TYPE_B128: enc = 0x5; break; |
||
581 | case TYPE_F64: // fall through |
||
582 | case TYPE_S64: // fall through |
||
583 | case TYPE_U64: enc = 0x4; break; |
||
584 | case TYPE_S16: enc = 0x3; break; |
||
585 | case TYPE_U16: enc = 0x2; break; |
||
586 | case TYPE_S8: enc = 0x1; break; |
||
587 | case TYPE_U8: enc = 0x0; break; |
||
588 | default: |
||
589 | enc = 0; |
||
590 | assert(!"invalid load/store type"); |
||
591 | break; |
||
592 | } |
||
593 | code[pos / 32] |= enc << (pos % 32); |
||
594 | } |
||
595 | |||
596 | void |
||
597 | CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty) |
||
598 | { |
||
599 | switch (ty) { |
||
600 | case TYPE_U8: break; |
||
601 | case TYPE_U16: code[1] |= 0x4000; break; |
||
602 | case TYPE_S16: code[1] |= 0x8000; break; |
||
603 | case TYPE_F32: |
||
604 | case TYPE_S32: |
||
605 | case TYPE_U32: code[1] |= 0xc000; break; |
||
606 | default: |
||
607 | assert(0); |
||
608 | break; |
||
609 | } |
||
610 | } |
||
611 | |||
612 | void |
||
613 | CodeEmitterNV50::emitLOAD(const Instruction *i) |
||
614 | { |
||
615 | DataFile sf = i->src(0).getFile(); |
||
616 | int32_t offset = i->getSrc(0)->reg.data.offset; |
||
617 | |||
618 | switch (sf) { |
||
619 | case FILE_SHADER_INPUT: |
||
620 | if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) |
||
621 | code[0] = 0x11800001; |
||
622 | else |
||
623 | // use 'mov' where we can |
||
624 | code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001; |
||
625 | code[1] = 0x00200000 | (i->lanes << 14); |
||
626 | if (typeSizeof(i->dType) == 4) |
||
627 | code[1] |= 0x04000000; |
||
628 | break; |
||
629 | case FILE_MEMORY_SHARED: |
||
630 | if (targ->getChipset() >= 0x84) { |
||
631 | assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType))); |
||
632 | code[0] = 0x10000001; |
||
633 | code[1] = 0x40000000; |
||
634 | |||
635 | if (typeSizeof(i->dType) == 4) |
||
636 | code[1] |= 0x04000000; |
||
637 | |||
638 | emitLoadStoreSizeCS(i->sType); |
||
639 | } else { |
||
640 | assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType))); |
||
641 | code[0] = 0x10000001; |
||
642 | code[1] = 0x00200000 | (i->lanes << 14); |
||
643 | emitLoadStoreSizeCS(i->sType); |
||
644 | } |
||
645 | break; |
||
646 | case FILE_MEMORY_CONST: |
||
647 | code[0] = 0x10000001; |
||
648 | code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22); |
||
649 | if (typeSizeof(i->dType) == 4) |
||
650 | code[1] |= 0x04000000; |
||
651 | emitLoadStoreSizeCS(i->sType); |
||
652 | break; |
||
653 | case FILE_MEMORY_LOCAL: |
||
654 | code[0] = 0xd0000001; |
||
655 | code[1] = 0x40000000; |
||
656 | break; |
||
657 | case FILE_MEMORY_GLOBAL: |
||
658 | code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); |
||
659 | code[1] = 0x80000000; |
||
660 | break; |
||
661 | default: |
||
662 | assert(!"invalid load source file"); |
||
663 | break; |
||
664 | } |
||
665 | if (sf == FILE_MEMORY_LOCAL || |
||
666 | sf == FILE_MEMORY_GLOBAL) |
||
667 | emitLoadStoreSizeLG(i->sType, 21 + 32); |
||
668 | |||
669 | setDst(i, 0); |
||
670 | |||
671 | emitFlagsRd(i); |
||
672 | emitFlagsWr(i); |
||
673 | |||
674 | if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { |
||
675 | srcId(*i->src(0).getIndirect(0), 9); |
||
676 | } else { |
||
677 | setAReg16(i, 0); |
||
678 | srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9); |
||
679 | } |
||
680 | } |
||
681 | |||
682 | void |
||
683 | CodeEmitterNV50::emitSTORE(const Instruction *i) |
||
684 | { |
||
685 | DataFile f = i->getSrc(0)->reg.file; |
||
686 | int32_t offset = i->getSrc(0)->reg.data.offset; |
||
687 | |||
688 | switch (f) { |
||
689 | case FILE_SHADER_OUTPUT: |
||
690 | code[0] = 0x00000001 | ((offset >> 2) << 9); |
||
691 | code[1] = 0x80c00000; |
||
692 | srcId(i->src(1), 32 + 14); |
||
693 | break; |
||
694 | case FILE_MEMORY_GLOBAL: |
||
695 | code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); |
||
696 | code[1] = 0xa0000000; |
||
697 | emitLoadStoreSizeLG(i->dType, 21 + 32); |
||
698 | srcId(i->src(1), 2); |
||
699 | break; |
||
700 | case FILE_MEMORY_LOCAL: |
||
701 | code[0] = 0xd0000001; |
||
702 | code[1] = 0x60000000; |
||
703 | emitLoadStoreSizeLG(i->dType, 21 + 32); |
||
704 | srcId(i->src(1), 2); |
||
705 | break; |
||
706 | case FILE_MEMORY_SHARED: |
||
707 | code[0] = 0x00000001; |
||
708 | code[1] = 0xe0000000; |
||
709 | switch (typeSizeof(i->dType)) { |
||
710 | case 1: |
||
711 | code[0] |= offset << 9; |
||
712 | code[1] |= 0x00400000; |
||
713 | break; |
||
714 | case 2: |
||
715 | code[0] |= (offset >> 1) << 9; |
||
716 | break; |
||
717 | case 4: |
||
718 | code[0] |= (offset >> 2) << 9; |
||
719 | code[1] |= 0x04200000; |
||
720 | break; |
||
721 | default: |
||
722 | assert(0); |
||
723 | break; |
||
724 | } |
||
725 | srcId(i->src(1), 32 + 14); |
||
726 | break; |
||
727 | default: |
||
728 | assert(!"invalid store destination file"); |
||
729 | break; |
||
730 | } |
||
731 | |||
732 | if (f == FILE_MEMORY_GLOBAL) |
||
733 | srcId(*i->src(0).getIndirect(0), 9); |
||
734 | else |
||
735 | setAReg16(i, 0); |
||
736 | |||
737 | if (f == FILE_MEMORY_LOCAL) |
||
738 | srcAddr16(i->src(0), false, 9); |
||
739 | |||
740 | emitFlagsRd(i); |
||
741 | } |
||
742 | |||
743 | void |
||
744 | CodeEmitterNV50::emitMOV(const Instruction *i) |
||
745 | { |
||
746 | DataFile sf = i->getSrc(0)->reg.file; |
||
747 | DataFile df = i->getDef(0)->reg.file; |
||
748 | |||
749 | assert(sf == FILE_GPR || df == FILE_GPR); |
||
750 | |||
751 | if (sf == FILE_FLAGS) { |
||
752 | code[0] = 0x00000001; |
||
753 | code[1] = 0x20000000; |
||
754 | defId(i->def(0), 2); |
||
755 | srcId(i->src(0), 12); |
||
756 | emitFlagsRd(i); |
||
757 | } else |
||
758 | if (sf == FILE_ADDRESS) { |
||
759 | code[0] = 0x00000001; |
||
760 | code[1] = 0x40000000; |
||
761 | defId(i->def(0), 2); |
||
762 | setARegBits(SDATA(i->src(0)).id + 1); |
||
763 | emitFlagsRd(i); |
||
764 | } else |
||
765 | if (df == FILE_FLAGS) { |
||
766 | code[0] = 0x00000001; |
||
767 | code[1] = 0xa0000000; |
||
768 | defId(i->def(0), 4); |
||
769 | srcId(i->src(0), 9); |
||
770 | emitFlagsRd(i); |
||
771 | } else |
||
772 | if (sf == FILE_IMMEDIATE) { |
||
773 | code[0] = 0x10008001; |
||
774 | code[1] = 0x00000003; |
||
775 | emitForm_IMM(i); |
||
776 | } else { |
||
777 | if (i->encSize == 4) { |
||
778 | code[0] = 0x10008000; |
||
779 | } else { |
||
780 | code[0] = 0x10000001; |
||
781 | code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; |
||
782 | code[1] |= (i->lanes << 14); |
||
783 | emitFlagsRd(i); |
||
784 | } |
||
785 | defId(i->def(0), 2); |
||
786 | srcId(i->src(0), 9); |
||
787 | } |
||
788 | if (df == FILE_SHADER_OUTPUT) { |
||
789 | assert(i->encSize == 8); |
||
790 | code[1] |= 0x8; |
||
791 | } |
||
792 | } |
||
793 | |||
794 | static inline uint8_t getSRegEncoding(const ValueRef &ref) |
||
795 | { |
||
796 | switch (SDATA(ref).sv.sv) { |
||
797 | case SV_PHYSID: return 0; |
||
798 | case SV_CLOCK: return 1; |
||
799 | case SV_VERTEX_STRIDE: return 3; |
||
800 | // case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index; |
||
801 | case SV_SAMPLE_INDEX: return 8; |
||
802 | default: |
||
803 | assert(!"no sreg for system value"); |
||
804 | return 0; |
||
805 | } |
||
806 | } |
||
807 | |||
808 | void |
||
809 | CodeEmitterNV50::emitRDSV(const Instruction *i) |
||
810 | { |
||
811 | code[0] = 0x00000001; |
||
812 | code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14); |
||
813 | defId(i->def(0), 2); |
||
814 | emitFlagsRd(i); |
||
815 | } |
||
816 | |||
817 | void |
||
818 | CodeEmitterNV50::emitNOP() |
||
819 | { |
||
820 | code[0] = 0xf0000001; |
||
821 | code[1] = 0xe0000000; |
||
822 | } |
||
823 | |||
824 | void |
||
825 | CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp) |
||
826 | { |
||
827 | code[0] = 0xc0000000 | (lane << 16); |
||
828 | code[1] = 0x80000000; |
||
829 | |||
830 | code[0] |= (quOp & 0x03) << 20; |
||
831 | code[1] |= (quOp & 0xfc) << 20; |
||
832 | |||
833 | emitForm_ADD(i); |
||
834 | |||
835 | if (!i->srcExists(1)) |
||
836 | srcId(i->src(0), 32 + 14); |
||
837 | } |
||
838 | |||
839 | /* NOTE: This returns the base address of a vertex inside the primitive. |
||
840 | * src0 is an immediate, the index (not offset) of the vertex |
||
841 | * inside the primitive. XXX: signed or unsigned ? |
||
842 | * src1 (may be NULL) should use whatever units the hardware requires |
||
843 | * (on nv50 this is bytes, so, relative index * 4; signed 16 bit value). |
||
844 | */ |
||
845 | void |
||
846 | CodeEmitterNV50::emitPFETCH(const Instruction *i) |
||
847 | { |
||
848 | const uint32_t prim = i->src(0).get()->reg.data.u32; |
||
849 | assert(prim <= 127); |
||
850 | |||
851 | if (i->def(0).getFile() == FILE_ADDRESS) { |
||
852 | // shl $aX a[] 0 |
||
853 | code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2); |
||
854 | code[1] = 0xc0200000; |
||
855 | code[0] |= prim << 9; |
||
856 | assert(!i->srcExists(1)); |
||
857 | } else |
||
858 | if (i->srcExists(1)) { |
||
859 | // ld b32 $rX a[$aX+base] |
||
860 | code[0] = 0x00000001; |
||
861 | code[1] = 0x04200000 | (0xf << 14); |
||
862 | defId(i->def(0), 2); |
||
863 | code[0] |= prim << 9; |
||
864 | setARegBits(SDATA(i->src(1)).id + 1); |
||
865 | } else { |
||
866 | // mov b32 $rX a[] |
||
867 | code[0] = 0x10000001; |
||
868 | code[1] = 0x04200000 | (0xf << 14); |
||
869 | defId(i->def(0), 2); |
||
870 | code[0] |= prim << 9; |
||
871 | } |
||
872 | emitFlagsRd(i); |
||
873 | } |
||
874 | |||
875 | void |
||
876 | CodeEmitterNV50::emitINTERP(const Instruction *i) |
||
877 | { |
||
878 | code[0] = 0x80000000; |
||
879 | |||
880 | defId(i->def(0), 2); |
||
881 | srcAddr8(i->src(0), 16); |
||
882 | |||
883 | if (i->getInterpMode() == NV50_IR_INTERP_FLAT) { |
||
884 | code[0] |= 1 << 8; |
||
885 | } else { |
||
886 | if (i->op == OP_PINTERP) { |
||
887 | code[0] |= 1 << 25; |
||
888 | srcId(i->src(1), 9); |
||
889 | } |
||
890 | if (i->getSampleMode() == NV50_IR_INTERP_CENTROID) |
||
891 | code[0] |= 1 << 24; |
||
892 | } |
||
893 | |||
894 | if (i->encSize == 8) { |
||
895 | code[1] = |
||
896 | (code[0] & (3 << 24)) >> (24 - 16) | |
||
897 | (code[0] & (1 << 8)) << (18 - 8); |
||
898 | code[0] &= ~0x03000100; |
||
899 | code[0] |= 1; |
||
900 | emitFlagsRd(i); |
||
901 | } |
||
902 | } |
||
903 | |||
904 | void |
||
905 | CodeEmitterNV50::emitMINMAX(const Instruction *i) |
||
906 | { |
||
907 | if (i->dType == TYPE_F64) { |
||
908 | code[0] = 0xe0000000; |
||
909 | code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000; |
||
910 | } else { |
||
911 | code[0] = 0x30000000; |
||
912 | code[1] = 0x80000000; |
||
913 | if (i->op == OP_MIN) |
||
914 | code[1] |= 0x20000000; |
||
915 | |||
916 | switch (i->dType) { |
||
917 | case TYPE_F32: code[0] |= 0x80000000; break; |
||
918 | case TYPE_S32: code[1] |= 0x8c000000; break; |
||
919 | case TYPE_U32: code[1] |= 0x84000000; break; |
||
920 | case TYPE_S16: code[1] |= 0x80000000; break; |
||
921 | case TYPE_U16: break; |
||
922 | default: |
||
923 | assert(0); |
||
924 | break; |
||
925 | } |
||
926 | code[1] |= i->src(0).mod.abs() << 20; |
||
927 | code[1] |= i->src(0).mod.neg() << 26; |
||
928 | code[1] |= i->src(1).mod.abs() << 19; |
||
929 | code[1] |= i->src(1).mod.neg() << 27; |
||
930 | } |
||
931 | emitForm_MAD(i); |
||
932 | } |
||
933 | |||
934 | void |
||
935 | CodeEmitterNV50::emitFMAD(const Instruction *i) |
||
936 | { |
||
937 | const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg(); |
||
938 | const int neg_add = i->src(2).mod.neg(); |
||
939 | |||
940 | code[0] = 0xe0000000; |
||
941 | |||
942 | if (i->src(1).getFile() == FILE_IMMEDIATE) { |
||
943 | code[1] = 0; |
||
944 | emitForm_IMM(i); |
||
945 | code[0] |= neg_mul << 15; |
||
946 | code[0] |= neg_add << 22; |
||
947 | if (i->saturate) |
||
948 | code[0] |= 1 << 8; |
||
949 | } else |
||
950 | if (i->encSize == 4) { |
||
951 | emitForm_MUL(i); |
||
952 | code[0] |= neg_mul << 15; |
||
953 | code[0] |= neg_add << 22; |
||
954 | if (i->saturate) |
||
955 | code[0] |= 1 << 8; |
||
956 | } else { |
||
957 | code[1] = neg_mul << 26; |
||
958 | code[1] |= neg_add << 27; |
||
959 | if (i->saturate) |
||
960 | code[1] |= 1 << 29; |
||
961 | emitForm_MAD(i); |
||
962 | } |
||
963 | } |
||
964 | |||
965 | void |
||
966 | CodeEmitterNV50::emitFADD(const Instruction *i) |
||
967 | { |
||
968 | const int neg0 = i->src(0).mod.neg(); |
||
969 | const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); |
||
970 | |||
971 | code[0] = 0xb0000000; |
||
972 | |||
973 | assert(!(i->src(0).mod | i->src(1).mod).abs()); |
||
974 | |||
975 | if (i->src(1).getFile() == FILE_IMMEDIATE) { |
||
976 | code[1] = 0; |
||
977 | emitForm_IMM(i); |
||
978 | code[0] |= neg0 << 15; |
||
979 | code[0] |= neg1 << 22; |
||
980 | if (i->saturate) |
||
981 | code[0] |= 1 << 8; |
||
982 | } else |
||
983 | if (i->encSize == 8) { |
||
984 | code[1] = 0; |
||
985 | emitForm_ADD(i); |
||
986 | code[1] |= neg0 << 26; |
||
987 | code[1] |= neg1 << 27; |
||
988 | if (i->saturate) |
||
989 | code[1] |= 1 << 29; |
||
990 | } else { |
||
991 | emitForm_MUL(i); |
||
992 | code[0] |= neg0 << 15; |
||
993 | code[0] |= neg1 << 22; |
||
994 | if (i->saturate) |
||
995 | code[0] |= 1 << 8; |
||
996 | } |
||
997 | } |
||
998 | |||
999 | void |
||
1000 | CodeEmitterNV50::emitUADD(const Instruction *i) |
||
1001 | { |
||
1002 | const int neg0 = i->src(0).mod.neg(); |
||
1003 | const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); |
||
1004 | |||
1005 | code[0] = 0x20008000; |
||
1006 | |||
1007 | if (i->src(1).getFile() == FILE_IMMEDIATE) { |
||
1008 | code[1] = 0; |
||
1009 | emitForm_IMM(i); |
||
1010 | } else |
||
1011 | if (i->encSize == 8) { |
||
1012 | code[0] = 0x20000000; |
||
1013 | code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; |
||
1014 | emitForm_ADD(i); |
||
1015 | } else { |
||
1016 | emitForm_MUL(i); |
||
1017 | } |
||
1018 | assert(!(neg0 && neg1)); |
||
1019 | code[0] |= neg0 << 28; |
||
1020 | code[0] |= neg1 << 22; |
||
1021 | |||
1022 | if (i->flagsSrc >= 0) { |
||
1023 | // addc == sub | subr |
||
1024 | assert(!(code[0] & 0x10400000) && !i->getPredicate()); |
||
1025 | code[0] |= 0x10400000; |
||
1026 | srcId(i->src(i->flagsSrc), 32 + 12); |
||
1027 | } |
||
1028 | } |
||
1029 | |||
1030 | void |
||
1031 | CodeEmitterNV50::emitAADD(const Instruction *i) |
||
1032 | { |
||
1033 | const int s = (i->op == OP_MOV) ? 0 : 1; |
||
1034 | |||
1035 | code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9); |
||
1036 | code[1] = 0x20000000; |
||
1037 | |||
1038 | code[0] |= (DDATA(i->def(0)).id + 1) << 2; |
||
1039 | |||
1040 | emitFlagsRd(i); |
||
1041 | |||
1042 | if (s && i->srcExists(0)) |
||
1043 | setARegBits(SDATA(i->src(0)).id + 1); |
||
1044 | } |
||
1045 | |||
1046 | void |
||
1047 | CodeEmitterNV50::emitIMUL(const Instruction *i) |
||
1048 | { |
||
1049 | code[0] = 0x40000000; |
||
1050 | |||
1051 | if (i->encSize == 8) { |
||
1052 | code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000; |
||
1053 | emitForm_MAD(i); |
||
1054 | } else { |
||
1055 | if (i->sType == TYPE_S16) |
||
1056 | code[0] |= 0x8100; |
||
1057 | emitForm_MUL(i); |
||
1058 | } |
||
1059 | } |
||
1060 | |||
1061 | void |
||
1062 | CodeEmitterNV50::emitFMUL(const Instruction *i) |
||
1063 | { |
||
1064 | const int neg = (i->src(0).mod ^ i->src(1).mod).neg(); |
||
1065 | |||
1066 | code[0] = 0xc0000000; |
||
1067 | |||
1068 | if (i->src(1).getFile() == FILE_IMMEDIATE) { |
||
1069 | code[1] = 0; |
||
1070 | emitForm_IMM(i); |
||
1071 | if (neg) |
||
1072 | code[0] |= 0x8000; |
||
1073 | if (i->saturate) |
||
1074 | code[0] |= 1 << 8; |
||
1075 | } else |
||
1076 | if (i->encSize == 8) { |
||
1077 | code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0; |
||
1078 | if (neg) |
||
1079 | code[1] |= 0x08000000; |
||
1080 | if (i->saturate) |
||
1081 | code[1] |= 1 << 20; |
||
1082 | emitForm_MAD(i); |
||
1083 | } else { |
||
1084 | emitForm_MUL(i); |
||
1085 | if (neg) |
||
1086 | code[0] |= 0x8000; |
||
1087 | if (i->saturate) |
||
1088 | code[0] |= 1 << 8; |
||
1089 | } |
||
1090 | } |
||
1091 | |||
1092 | void |
||
1093 | CodeEmitterNV50::emitIMAD(const Instruction *i) |
||
1094 | { |
||
1095 | code[0] = 0x60000000; |
||
1096 | if (isSignedType(i->sType)) |
||
1097 | code[1] = i->saturate ? 0x40000000 : 0x20000000; |
||
1098 | else |
||
1099 | code[1] = 0x00000000; |
||
1100 | |||
1101 | int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg(); |
||
1102 | int neg2 = i->src(2).mod.neg(); |
||
1103 | |||
1104 | assert(!(neg1 & neg2)); |
||
1105 | code[1] |= neg1 << 27; |
||
1106 | code[1] |= neg2 << 26; |
||
1107 | |||
1108 | emitForm_MAD(i); |
||
1109 | |||
1110 | if (i->flagsSrc >= 0) { |
||
1111 | // add with carry from $cX |
||
1112 | assert(!(code[1] & 0x0c000000) && !i->getPredicate()); |
||
1113 | code[1] |= 0xc << 24; |
||
1114 | srcId(i->src(i->flagsSrc), 32 + 12); |
||
1115 | } |
||
1116 | } |
||
1117 | |||
1118 | void |
||
1119 | CodeEmitterNV50::emitISAD(const Instruction *i) |
||
1120 | { |
||
1121 | if (i->encSize == 8) { |
||
1122 | code[0] = 0x50000000; |
||
1123 | switch (i->sType) { |
||
1124 | case TYPE_U32: code[1] = 0x04000000; break; |
||
1125 | case TYPE_S32: code[1] = 0x0c000000; break; |
||
1126 | case TYPE_U16: code[1] = 0x00000000; break; |
||
1127 | case TYPE_S16: code[1] = 0x08000000; break; |
||
1128 | default: |
||
1129 | assert(0); |
||
1130 | break; |
||
1131 | } |
||
1132 | emitForm_MAD(i); |
||
1133 | } else { |
||
1134 | switch (i->sType) { |
||
1135 | case TYPE_U32: code[0] = 0x50008000; break; |
||
1136 | case TYPE_S32: code[0] = 0x50008100; break; |
||
1137 | case TYPE_U16: code[0] = 0x50000000; break; |
||
1138 | case TYPE_S16: code[0] = 0x50000100; break; |
||
1139 | default: |
||
1140 | assert(0); |
||
1141 | break; |
||
1142 | } |
||
1143 | emitForm_MUL(i); |
||
1144 | } |
||
1145 | } |
||
1146 | |||
1147 | void |
||
1148 | CodeEmitterNV50::emitSET(const Instruction *i) |
||
1149 | { |
||
1150 | code[0] = 0x30000000; |
||
1151 | code[1] = 0x60000000; |
||
1152 | |||
1153 | emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14); |
||
1154 | |||
1155 | switch (i->sType) { |
||
1156 | case TYPE_F32: code[0] |= 0x80000000; break; |
||
1157 | case TYPE_S32: code[1] |= 0x0c000000; break; |
||
1158 | case TYPE_U32: code[1] |= 0x04000000; break; |
||
1159 | case TYPE_S16: code[1] |= 0x08000000; break; |
||
1160 | case TYPE_U16: break; |
||
1161 | default: |
||
1162 | assert(0); |
||
1163 | break; |
||
1164 | } |
||
1165 | if (i->src(0).mod.neg()) code[1] |= 0x04000000; |
||
1166 | if (i->src(1).mod.neg()) code[1] |= 0x08000000; |
||
1167 | if (i->src(0).mod.abs()) code[1] |= 0x00100000; |
||
1168 | if (i->src(1).mod.abs()) code[1] |= 0x00080000; |
||
1169 | |||
1170 | emitForm_MAD(i); |
||
1171 | } |
||
1172 | |||
1173 | void |
||
1174 | CodeEmitterNV50::roundMode_CVT(RoundMode rnd) |
||
1175 | { |
||
1176 | switch (rnd) { |
||
1177 | case ROUND_NI: code[1] |= 0x08000000; break; |
||
1178 | case ROUND_M: code[1] |= 0x00020000; break; |
||
1179 | case ROUND_MI: code[1] |= 0x08020000; break; |
||
1180 | case ROUND_P: code[1] |= 0x00040000; break; |
||
1181 | case ROUND_PI: code[1] |= 0x08040000; break; |
||
1182 | case ROUND_Z: code[1] |= 0x00060000; break; |
||
1183 | case ROUND_ZI: code[1] |= 0x08060000; break; |
||
1184 | default: |
||
1185 | assert(rnd == ROUND_N); |
||
1186 | break; |
||
1187 | } |
||
1188 | } |
||
1189 | |||
1190 | void |
||
1191 | CodeEmitterNV50::emitCVT(const Instruction *i) |
||
1192 | { |
||
1193 | const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); |
||
1194 | RoundMode rnd; |
||
1195 | DataType dType; |
||
1196 | |||
1197 | switch (i->op) { |
||
1198 | case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; |
||
1199 | case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; |
||
1200 | case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; |
||
1201 | default: |
||
1202 | rnd = i->rnd; |
||
1203 | break; |
||
1204 | } |
||
1205 | |||
1206 | if (i->op == OP_NEG && i->dType == TYPE_U32) |
||
1207 | dType = TYPE_S32; |
||
1208 | else |
||
1209 | dType = i->dType; |
||
1210 | |||
1211 | code[0] = 0xa0000000; |
||
1212 | |||
1213 | switch (dType) { |
||
1214 | case TYPE_F64: |
||
1215 | switch (i->sType) { |
||
1216 | case TYPE_F64: code[1] = 0xc4404000; break; |
||
1217 | case TYPE_S64: code[1] = 0x44414000; break; |
||
1218 | case TYPE_U64: code[1] = 0x44404000; break; |
||
1219 | case TYPE_F32: code[1] = 0xc4400000; break; |
||
1220 | case TYPE_S32: code[1] = 0x44410000; break; |
||
1221 | case TYPE_U32: code[1] = 0x44400000; break; |
||
1222 | default: |
||
1223 | assert(0); |
||
1224 | break; |
||
1225 | } |
||
1226 | break; |
||
1227 | case TYPE_S64: |
||
1228 | switch (i->sType) { |
||
1229 | case TYPE_F64: code[1] = 0x8c404000; break; |
||
1230 | case TYPE_F32: code[1] = 0x8c400000; break; |
||
1231 | default: |
||
1232 | assert(0); |
||
1233 | break; |
||
1234 | } |
||
1235 | break; |
||
1236 | case TYPE_U64: |
||
1237 | switch (i->sType) { |
||
1238 | case TYPE_F64: code[1] = 0x84404000; break; |
||
1239 | case TYPE_F32: code[1] = 0x84400000; break; |
||
1240 | default: |
||
1241 | assert(0); |
||
1242 | break; |
||
1243 | } |
||
1244 | break; |
||
1245 | case TYPE_F32: |
||
1246 | switch (i->sType) { |
||
1247 | case TYPE_F64: code[1] = 0xc0404000; break; |
||
1248 | case TYPE_S64: code[1] = 0x40414000; break; |
||
1249 | case TYPE_U64: code[1] = 0x40404000; break; |
||
1250 | case TYPE_F32: code[1] = 0xc4004000; break; |
||
1251 | case TYPE_S32: code[1] = 0x44014000; break; |
||
1252 | case TYPE_U32: code[1] = 0x44004000; break; |
||
1253 | case TYPE_F16: code[1] = 0xc4000000; break; |
||
1254 | case TYPE_U16: code[1] = 0x44000000; break; |
||
1255 | default: |
||
1256 | assert(0); |
||
1257 | break; |
||
1258 | } |
||
1259 | break; |
||
1260 | case TYPE_S32: |
||
1261 | switch (i->sType) { |
||
1262 | case TYPE_F64: code[1] = 0x88404000; break; |
||
1263 | case TYPE_F32: code[1] = 0x8c004000; break; |
||
1264 | case TYPE_S32: code[1] = 0x0c014000; break; |
||
1265 | case TYPE_U32: code[1] = 0x0c004000; break; |
||
1266 | case TYPE_F16: code[1] = 0x8c000000; break; |
||
1267 | case TYPE_S16: code[1] = 0x0c010000; break; |
||
1268 | case TYPE_U16: code[1] = 0x0c000000; break; |
||
1269 | case TYPE_S8: code[1] = 0x0c018000; break; |
||
1270 | case TYPE_U8: code[1] = 0x0c008000; break; |
||
1271 | default: |
||
1272 | assert(0); |
||
1273 | break; |
||
1274 | } |
||
1275 | break; |
||
1276 | case TYPE_U32: |
||
1277 | switch (i->sType) { |
||
1278 | case TYPE_F64: code[1] = 0x80404000; break; |
||
1279 | case TYPE_F32: code[1] = 0x84004000; break; |
||
1280 | case TYPE_S32: code[1] = 0x04014000; break; |
||
1281 | case TYPE_U32: code[1] = 0x04004000; break; |
||
1282 | case TYPE_F16: code[1] = 0x84000000; break; |
||
1283 | case TYPE_S16: code[1] = 0x04010000; break; |
||
1284 | case TYPE_U16: code[1] = 0x04000000; break; |
||
1285 | case TYPE_S8: code[1] = 0x04018000; break; |
||
1286 | case TYPE_U8: code[1] = 0x04008000; break; |
||
1287 | default: |
||
1288 | assert(0); |
||
1289 | break; |
||
1290 | } |
||
1291 | break; |
||
1292 | case TYPE_S16: |
||
1293 | case TYPE_U16: |
||
1294 | case TYPE_S8: |
||
1295 | case TYPE_U8: |
||
1296 | default: |
||
1297 | assert(0); |
||
1298 | break; |
||
1299 | } |
||
1300 | if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4) |
||
1301 | code[1] |= 0x00004000; |
||
1302 | |||
1303 | roundMode_CVT(rnd); |
||
1304 | |||
1305 | switch (i->op) { |
||
1306 | case OP_ABS: code[1] |= 1 << 20; break; |
||
1307 | case OP_SAT: code[1] |= 1 << 19; break; |
||
1308 | case OP_NEG: code[1] |= 1 << 29; break; |
||
1309 | default: |
||
1310 | break; |
||
1311 | } |
||
1312 | code[1] ^= i->src(0).mod.neg() << 29; |
||
1313 | code[1] |= i->src(0).mod.abs() << 20; |
||
1314 | if (i->saturate) |
||
1315 | code[1] |= 1 << 19; |
||
1316 | |||
1317 | assert(i->op != OP_ABS || !i->src(0).mod.neg()); |
||
1318 | |||
1319 | emitForm_MAD(i); |
||
1320 | } |
||
1321 | |||
1322 | void |
||
1323 | CodeEmitterNV50::emitPreOp(const Instruction *i) |
||
1324 | { |
||
1325 | code[0] = 0xb0000000; |
||
1326 | code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000; |
||
1327 | |||
1328 | code[1] |= i->src(0).mod.abs() << 20; |
||
1329 | code[1] |= i->src(0).mod.neg() << 26; |
||
1330 | |||
1331 | emitForm_MAD(i); |
||
1332 | } |
||
1333 | |||
1334 | void |
||
1335 | CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp) |
||
1336 | { |
||
1337 | code[0] = 0x90000000; |
||
1338 | |||
1339 | if (i->encSize == 4) { |
||
1340 | assert(i->op == OP_RCP); |
||
1341 | code[0] |= i->src(0).mod.abs() << 15; |
||
1342 | code[0] |= i->src(0).mod.neg() << 22; |
||
1343 | emitForm_MUL(i); |
||
1344 | } else { |
||
1345 | code[1] = subOp << 29; |
||
1346 | code[1] |= i->src(0).mod.abs() << 20; |
||
1347 | code[1] |= i->src(0).mod.neg() << 26; |
||
1348 | emitForm_MAD(i); |
||
1349 | } |
||
1350 | } |
||
1351 | |||
1352 | void |
||
1353 | CodeEmitterNV50::emitNOT(const Instruction *i) |
||
1354 | { |
||
1355 | code[0] = 0xd0000000; |
||
1356 | code[1] = 0x0002c000; |
||
1357 | |||
1358 | switch (i->sType) { |
||
1359 | case TYPE_U32: |
||
1360 | case TYPE_S32: |
||
1361 | code[1] |= 0x04000000; |
||
1362 | break; |
||
1363 | default: |
||
1364 | break; |
||
1365 | } |
||
1366 | emitForm_MAD(i); |
||
1367 | setSrc(i, 0, 1); |
||
1368 | } |
||
1369 | |||
1370 | void |
||
1371 | CodeEmitterNV50::emitLogicOp(const Instruction *i) |
||
1372 | { |
||
1373 | code[0] = 0xd0000000; |
||
1374 | code[1] = 0; |
||
1375 | |||
1376 | if (i->src(1).getFile() == FILE_IMMEDIATE) { |
||
1377 | switch (i->op) { |
||
1378 | case OP_OR: code[0] |= 0x0100; break; |
||
1379 | case OP_XOR: code[0] |= 0x8000; break; |
||
1380 | default: |
||
1381 | assert(i->op == OP_AND); |
||
1382 | break; |
||
1383 | } |
||
1384 | if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) |
||
1385 | code[0] |= 1 << 22; |
||
1386 | |||
1387 | emitForm_IMM(i); |
||
1388 | } else { |
||
1389 | switch (i->op) { |
||
1390 | case OP_AND: code[1] = 0x04000000; break; |
||
1391 | case OP_OR: code[1] = 0x04004000; break; |
||
1392 | case OP_XOR: code[1] = 0x04008000; break; |
||
1393 | default: |
||
1394 | assert(0); |
||
1395 | break; |
||
1396 | } |
||
1397 | if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) |
||
1398 | code[1] |= 1 << 16; |
||
1399 | if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) |
||
1400 | code[1] |= 1 << 17; |
||
1401 | |||
1402 | emitForm_MAD(i); |
||
1403 | } |
||
1404 | } |
||
1405 | |||
1406 | void |
||
1407 | CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl) |
||
1408 | { |
||
1409 | code[0] = 0x00000001 | (shl << 16); |
||
1410 | code[1] = 0xc0000000; |
||
1411 | |||
1412 | code[0] |= (DDATA(i->def(0)).id + 1) << 2; |
||
1413 | |||
1414 | setSrcFileBits(i, NV50_OP_ENC_IMM); |
||
1415 | setSrc(i, 0, 0); |
||
1416 | emitFlagsRd(i); |
||
1417 | } |
||
1418 | |||
1419 | void |
||
1420 | CodeEmitterNV50::emitShift(const Instruction *i) |
||
1421 | { |
||
1422 | if (i->def(0).getFile() == FILE_ADDRESS) { |
||
1423 | assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE); |
||
1424 | emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f); |
||
1425 | } else { |
||
1426 | code[0] = 0x30000001; |
||
1427 | code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000; |
||
1428 | if (i->op == OP_SHR && isSignedType(i->sType)) |
||
1429 | code[1] |= 1 << 27; |
||
1430 | |||
1431 | if (i->src(1).getFile() == FILE_IMMEDIATE) { |
||
1432 | code[1] |= 1 << 20; |
||
1433 | code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16; |
||
1434 | defId(i->def(0), 2); |
||
1435 | srcId(i->src(0), 9); |
||
1436 | emitFlagsRd(i); |
||
1437 | } else { |
||
1438 | emitForm_MAD(i); |
||
1439 | } |
||
1440 | } |
||
1441 | } |
||
1442 | |||
1443 | void |
||
1444 | CodeEmitterNV50::emitOUT(const Instruction *i) |
||
1445 | { |
||
1446 | code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401; |
||
1447 | code[1] = 0xc0000000; |
||
1448 | |||
1449 | emitFlagsRd(i); |
||
1450 | } |
||
1451 | |||
1452 | void |
||
1453 | CodeEmitterNV50::emitTEX(const TexInstruction *i) |
||
1454 | { |
||
1455 | code[0] = 0xf0000001; |
||
1456 | code[1] = 0x00000000; |
||
1457 | |||
1458 | switch (i->op) { |
||
1459 | case OP_TXB: |
||
1460 | code[1] = 0x20000000; |
||
1461 | break; |
||
1462 | case OP_TXL: |
||
1463 | code[1] = 0x40000000; |
||
1464 | break; |
||
1465 | case OP_TXF: |
||
1466 | code[0] |= 0x01000000; |
||
1467 | break; |
||
1468 | case OP_TXG: |
||
1469 | code[0] |= 0x01000000; |
||
1470 | code[1] = 0x80000000; |
||
1471 | break; |
||
1472 | case OP_TXLQ: |
||
1473 | code[1] = 0x60020000; |
||
1474 | break; |
||
1475 | default: |
||
1476 | assert(i->op == OP_TEX); |
||
1477 | break; |
||
1478 | } |
||
1479 | |||
1480 | code[0] |= i->tex.r << 9; |
||
1481 | code[0] |= i->tex.s << 17; |
||
1482 | |||
1483 | int argc = i->tex.target.getArgCount(); |
||
1484 | |||
1485 | if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF) |
||
1486 | argc += 1; |
||
1487 | if (i->tex.target.isShadow()) |
||
1488 | argc += 1; |
||
1489 | assert(argc <= 4); |
||
1490 | |||
1491 | code[0] |= (argc - 1) << 22; |
||
1492 | |||
1493 | if (i->tex.target.isCube()) { |
||
1494 | code[0] |= 0x08000000; |
||
1495 | } else |
||
1496 | if (i->tex.useOffsets) { |
||
1497 | code[1] |= (i->tex.offset[0] & 0xf) << 24; |
||
1498 | code[1] |= (i->tex.offset[1] & 0xf) << 20; |
||
1499 | code[1] |= (i->tex.offset[2] & 0xf) << 16; |
||
1500 | } |
||
1501 | |||
1502 | code[0] |= (i->tex.mask & 0x3) << 25; |
||
1503 | code[1] |= (i->tex.mask & 0xc) << 12; |
||
1504 | |||
1505 | if (i->tex.liveOnly) |
||
1506 | code[1] |= 4; |
||
1507 | |||
1508 | defId(i->def(0), 2); |
||
1509 | |||
1510 | emitFlagsRd(i); |
||
1511 | } |
||
1512 | |||
1513 | void |
||
1514 | CodeEmitterNV50::emitTXQ(const TexInstruction *i) |
||
1515 | { |
||
1516 | assert(i->tex.query == TXQ_DIMS); |
||
1517 | |||
1518 | code[0] = 0xf0000001; |
||
1519 | code[1] = 0x60000000; |
||
1520 | |||
1521 | code[0] |= i->tex.r << 9; |
||
1522 | code[0] |= i->tex.s << 17; |
||
1523 | |||
1524 | code[0] |= (i->tex.mask & 0x3) << 25; |
||
1525 | code[1] |= (i->tex.mask & 0xc) << 12; |
||
1526 | |||
1527 | defId(i->def(0), 2); |
||
1528 | |||
1529 | emitFlagsRd(i); |
||
1530 | } |
||
1531 | |||
1532 | void |
||
1533 | CodeEmitterNV50::emitTEXPREP(const TexInstruction *i) |
||
1534 | { |
||
1535 | code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9); |
||
1536 | code[1] = 0x60010000; |
||
1537 | |||
1538 | code[0] |= (i->tex.mask & 0x3) << 25; |
||
1539 | code[1] |= (i->tex.mask & 0xc) << 12; |
||
1540 | defId(i->def(0), 2); |
||
1541 | |||
1542 | emitFlagsRd(i); |
||
1543 | } |
||
1544 | |||
1545 | void |
||
1546 | CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i) |
||
1547 | { |
||
1548 | uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */ |
||
1549 | |||
1550 | code[0] = 0x10000003; // bra |
||
1551 | code[1] = 0x00000780; // always |
||
1552 | |||
1553 | switch (i->subOp) { |
||
1554 | case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call |
||
1555 | break; |
||
1556 | case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call |
||
1557 | pos += 8; |
||
1558 | break; |
||
1559 | default: |
||
1560 | assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2)); |
||
1561 | code[0] = 0x20000003; // call |
||
1562 | code[1] = 0x00000000; // no predicate |
||
1563 | break; |
||
1564 | } |
||
1565 | addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9); |
||
1566 | addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4); |
||
1567 | } |
||
1568 | |||
1569 | void |
||
1570 | CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp) |
||
1571 | { |
||
1572 | const FlowInstruction *f = i->asFlow(); |
||
1573 | bool hasPred = false; |
||
1574 | bool hasTarg = false; |
||
1575 | |||
1576 | code[0] = 0x00000003 | (flowOp << 28); |
||
1577 | code[1] = 0x00000000; |
||
1578 | |||
1579 | switch (i->op) { |
||
1580 | case OP_BRA: |
||
1581 | hasPred = true; |
||
1582 | hasTarg = true; |
||
1583 | break; |
||
1584 | case OP_BREAK: |
||
1585 | case OP_BRKPT: |
||
1586 | case OP_DISCARD: |
||
1587 | case OP_RET: |
||
1588 | hasPred = true; |
||
1589 | break; |
||
1590 | case OP_CALL: |
||
1591 | case OP_PREBREAK: |
||
1592 | case OP_JOINAT: |
||
1593 | hasTarg = true; |
||
1594 | break; |
||
1595 | case OP_PRERET: |
||
1596 | hasTarg = true; |
||
1597 | if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) { |
||
1598 | emitPRERETEmu(f); |
||
1599 | return; |
||
1600 | } |
||
1601 | break; |
||
1602 | default: |
||
1603 | break; |
||
1604 | } |
||
1605 | |||
1606 | if (hasPred) |
||
1607 | emitFlagsRd(i); |
||
1608 | |||
1609 | if (hasTarg && f) { |
||
1610 | uint32_t pos; |
||
1611 | |||
1612 | if (f->op == OP_CALL) { |
||
1613 | if (f->builtin) { |
||
1614 | pos = targNV50->getBuiltinOffset(f->target.builtin); |
||
1615 | } else { |
||
1616 | pos = f->target.fn->binPos; |
||
1617 | } |
||
1618 | } else { |
||
1619 | pos = f->target.bb->binPos; |
||
1620 | } |
||
1621 | |||
1622 | code[0] |= ((pos >> 2) & 0xffff) << 11; |
||
1623 | code[1] |= ((pos >> 18) & 0x003f) << 14; |
||
1624 | |||
1625 | RelocEntry::Type relocTy; |
||
1626 | |||
1627 | relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE; |
||
1628 | |||
1629 | addReloc(relocTy, 0, pos, 0x07fff800, 9); |
||
1630 | addReloc(relocTy, 1, pos, 0x000fc000, -4); |
||
1631 | } |
||
1632 | } |
||
1633 | |||
1634 | void |
||
1635 | CodeEmitterNV50::emitBAR(const Instruction *i) |
||
1636 | { |
||
1637 | ImmediateValue *barId = i->getSrc(0)->asImm(); |
||
1638 | assert(barId); |
||
1639 | |||
1640 | code[0] = 0x82000003 | (barId->reg.data.u32 << 21); |
||
1641 | code[1] = 0x00004000; |
||
1642 | |||
1643 | if (i->subOp == NV50_IR_SUBOP_BAR_SYNC) |
||
1644 | code[0] |= 1 << 26; |
||
1645 | } |
||
1646 | |||
1647 | void |
||
1648 | CodeEmitterNV50::emitATOM(const Instruction *i) |
||
1649 | { |
||
1650 | uint8_t subOp; |
||
1651 | switch (i->subOp) { |
||
1652 | case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break; |
||
1653 | case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break; |
||
1654 | case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break; |
||
1655 | case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break; |
||
1656 | case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break; |
||
1657 | case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break; |
||
1658 | case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break; |
||
1659 | case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break; |
||
1660 | case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break; |
||
1661 | case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break; |
||
1662 | default: |
||
1663 | assert(!"invalid subop"); |
||
1664 | return; |
||
1665 | } |
||
1666 | code[0] = 0xd0000001; |
||
1667 | code[1] = 0xe0c00000 | (subOp << 2); |
||
1668 | if (isSignedType(i->dType)) |
||
1669 | code[1] |= 1 << 21; |
||
1670 | |||
1671 | // args |
||
1672 | emitFlagsRd(i); |
||
1673 | setDst(i, 0); |
||
1674 | setSrc(i, 1, 1); |
||
1675 | if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) |
||
1676 | setSrc(i, 2, 2); |
||
1677 | |||
1678 | // g[] pointer |
||
1679 | code[0] |= i->getSrc(0)->reg.fileIndex << 23; |
||
1680 | srcId(i->getIndirect(0, 0), 9); |
||
1681 | } |
||
1682 | |||
1683 | bool |
||
1684 | CodeEmitterNV50::emitInstruction(Instruction *insn) |
||
1685 | { |
||
1686 | if (!insn->encSize) { |
||
1687 | ERROR("skipping unencodable instruction: "); insn->print(); |
||
1688 | return false; |
||
1689 | } else |
||
1690 | if (codeSize + insn->encSize > codeSizeLimit) { |
||
1691 | ERROR("code emitter output buffer too small\n"); |
||
1692 | return false; |
||
1693 | } |
||
1694 | |||
1695 | if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) { |
||
1696 | INFO("EMIT: "); insn->print(); |
||
1697 | } |
||
1698 | |||
1699 | switch (insn->op) { |
||
1700 | case OP_MOV: |
||
1701 | emitMOV(insn); |
||
1702 | break; |
||
1703 | case OP_EXIT: |
||
1704 | case OP_NOP: |
||
1705 | case OP_JOIN: |
||
1706 | emitNOP(); |
||
1707 | break; |
||
1708 | case OP_VFETCH: |
||
1709 | case OP_LOAD: |
||
1710 | emitLOAD(insn); |
||
1711 | break; |
||
1712 | case OP_EXPORT: |
||
1713 | case OP_STORE: |
||
1714 | emitSTORE(insn); |
||
1715 | break; |
||
1716 | case OP_PFETCH: |
||
1717 | emitPFETCH(insn); |
||
1718 | break; |
||
1719 | case OP_RDSV: |
||
1720 | emitRDSV(insn); |
||
1721 | break; |
||
1722 | case OP_LINTERP: |
||
1723 | case OP_PINTERP: |
||
1724 | emitINTERP(insn); |
||
1725 | break; |
||
1726 | case OP_ADD: |
||
1727 | case OP_SUB: |
||
1728 | if (isFloatType(insn->dType)) |
||
1729 | emitFADD(insn); |
||
1730 | else if (insn->getDef(0)->reg.file == FILE_ADDRESS) |
||
1731 | emitAADD(insn); |
||
1732 | else |
||
1733 | emitUADD(insn); |
||
1734 | break; |
||
1735 | case OP_MUL: |
||
1736 | if (isFloatType(insn->dType)) |
||
1737 | emitFMUL(insn); |
||
1738 | else |
||
1739 | emitIMUL(insn); |
||
1740 | break; |
||
1741 | case OP_MAD: |
||
1742 | case OP_FMA: |
||
1743 | if (isFloatType(insn->dType)) |
||
1744 | emitFMAD(insn); |
||
1745 | else |
||
1746 | emitIMAD(insn); |
||
1747 | break; |
||
1748 | case OP_SAD: |
||
1749 | emitISAD(insn); |
||
1750 | break; |
||
1751 | case OP_NOT: |
||
1752 | emitNOT(insn); |
||
1753 | break; |
||
1754 | case OP_AND: |
||
1755 | case OP_OR: |
||
1756 | case OP_XOR: |
||
1757 | emitLogicOp(insn); |
||
1758 | break; |
||
1759 | case OP_SHL: |
||
1760 | case OP_SHR: |
||
1761 | emitShift(insn); |
||
1762 | break; |
||
1763 | case OP_SET: |
||
1764 | emitSET(insn); |
||
1765 | break; |
||
1766 | case OP_MIN: |
||
1767 | case OP_MAX: |
||
1768 | emitMINMAX(insn); |
||
1769 | break; |
||
1770 | case OP_CEIL: |
||
1771 | case OP_FLOOR: |
||
1772 | case OP_TRUNC: |
||
1773 | case OP_ABS: |
||
1774 | case OP_NEG: |
||
1775 | case OP_SAT: |
||
1776 | emitCVT(insn); |
||
1777 | break; |
||
1778 | case OP_CVT: |
||
1779 | if (insn->def(0).getFile() == FILE_ADDRESS) |
||
1780 | emitARL(insn, 0); |
||
1781 | else |
||
1782 | if (insn->def(0).getFile() == FILE_FLAGS || |
||
1783 | insn->src(0).getFile() == FILE_FLAGS || |
||
1784 | insn->src(0).getFile() == FILE_ADDRESS) |
||
1785 | emitMOV(insn); |
||
1786 | else |
||
1787 | emitCVT(insn); |
||
1788 | break; |
||
1789 | case OP_RCP: |
||
1790 | emitSFnOp(insn, 0); |
||
1791 | break; |
||
1792 | case OP_RSQ: |
||
1793 | emitSFnOp(insn, 2); |
||
1794 | break; |
||
1795 | case OP_LG2: |
||
1796 | emitSFnOp(insn, 3); |
||
1797 | break; |
||
1798 | case OP_SIN: |
||
1799 | emitSFnOp(insn, 4); |
||
1800 | break; |
||
1801 | case OP_COS: |
||
1802 | emitSFnOp(insn, 5); |
||
1803 | break; |
||
1804 | case OP_EX2: |
||
1805 | emitSFnOp(insn, 6); |
||
1806 | break; |
||
1807 | case OP_PRESIN: |
||
1808 | case OP_PREEX2: |
||
1809 | emitPreOp(insn); |
||
1810 | break; |
||
1811 | case OP_TEX: |
||
1812 | case OP_TXB: |
||
1813 | case OP_TXL: |
||
1814 | case OP_TXF: |
||
1815 | case OP_TXG: |
||
1816 | case OP_TXLQ: |
||
1817 | emitTEX(insn->asTex()); |
||
1818 | break; |
||
1819 | case OP_TXQ: |
||
1820 | emitTXQ(insn->asTex()); |
||
1821 | break; |
||
1822 | case OP_TEXPREP: |
||
1823 | emitTEXPREP(insn->asTex()); |
||
1824 | break; |
||
1825 | case OP_EMIT: |
||
1826 | case OP_RESTART: |
||
1827 | emitOUT(insn); |
||
1828 | break; |
||
1829 | case OP_DISCARD: |
||
1830 | emitFlow(insn, 0x0); |
||
1831 | break; |
||
1832 | case OP_BRA: |
||
1833 | emitFlow(insn, 0x1); |
||
1834 | break; |
||
1835 | case OP_CALL: |
||
1836 | emitFlow(insn, 0x2); |
||
1837 | break; |
||
1838 | case OP_RET: |
||
1839 | emitFlow(insn, 0x3); |
||
1840 | break; |
||
1841 | case OP_PREBREAK: |
||
1842 | emitFlow(insn, 0x4); |
||
1843 | break; |
||
1844 | case OP_BREAK: |
||
1845 | emitFlow(insn, 0x5); |
||
1846 | break; |
||
1847 | case OP_QUADON: |
||
1848 | emitFlow(insn, 0x6); |
||
1849 | break; |
||
1850 | case OP_QUADPOP: |
||
1851 | emitFlow(insn, 0x7); |
||
1852 | break; |
||
1853 | case OP_JOINAT: |
||
1854 | emitFlow(insn, 0xa); |
||
1855 | break; |
||
1856 | case OP_PRERET: |
||
1857 | emitFlow(insn, 0xd); |
||
1858 | break; |
||
1859 | case OP_QUADOP: |
||
1860 | emitQUADOP(insn, insn->lanes, insn->subOp); |
||
1861 | break; |
||
1862 | case OP_DFDX: |
||
1863 | emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99); |
||
1864 | break; |
||
1865 | case OP_DFDY: |
||
1866 | emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5); |
||
1867 | break; |
||
1868 | case OP_ATOM: |
||
1869 | emitATOM(insn); |
||
1870 | break; |
||
1871 | case OP_BAR: |
||
1872 | emitBAR(insn); |
||
1873 | break; |
||
1874 | case OP_PHI: |
||
1875 | case OP_UNION: |
||
1876 | case OP_CONSTRAINT: |
||
1877 | ERROR("operation should have been eliminated\n"); |
||
1878 | return false; |
||
1879 | case OP_EXP: |
||
1880 | case OP_LOG: |
||
1881 | case OP_SQRT: |
||
1882 | case OP_POW: |
||
1883 | case OP_SELP: |
||
1884 | case OP_SLCT: |
||
1885 | case OP_TXD: |
||
1886 | case OP_PRECONT: |
||
1887 | case OP_CONT: |
||
1888 | case OP_POPCNT: |
||
1889 | case OP_INSBF: |
||
1890 | case OP_EXTBF: |
||
1891 | ERROR("operation should have been lowered\n"); |
||
1892 | return false; |
||
1893 | default: |
||
1894 | ERROR("unknown op: %u\n", insn->op); |
||
1895 | return false; |
||
1896 | } |
||
1897 | if (insn->join || insn->op == OP_JOIN) |
||
1898 | code[1] |= 0x2; |
||
1899 | else |
||
1900 | if (insn->exit || insn->op == OP_EXIT) |
||
1901 | code[1] |= 0x1; |
||
1902 | |||
1903 | assert((insn->encSize == 8) == (code[0] & 1)); |
||
1904 | |||
1905 | code += insn->encSize / 4; |
||
1906 | codeSize += insn->encSize; |
||
1907 | return true; |
||
1908 | } |
||
1909 | |||
1910 | uint32_t |
||
1911 | CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const |
||
1912 | { |
||
1913 | const Target::OpInfo &info = targ->getOpInfo(i); |
||
1914 | |||
1915 | if (info.minEncSize > 4) |
||
1916 | return 8; |
||
1917 | |||
1918 | // check constraints on dst and src operands |
||
1919 | for (int d = 0; i->defExists(d); ++d) { |
||
1920 | if (i->def(d).rep()->reg.data.id > 63 || |
||
1921 | i->def(d).rep()->reg.file != FILE_GPR) |
||
1922 | return 8; |
||
1923 | } |
||
1924 | |||
1925 | for (int s = 0; i->srcExists(s); ++s) { |
||
1926 | DataFile sf = i->src(s).getFile(); |
||
1927 | if (sf != FILE_GPR) |
||
1928 | if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT) |
||
1929 | return 8; |
||
1930 | if (i->src(s).rep()->reg.data.id > 63) |
||
1931 | return 8; |
||
1932 | } |
||
1933 | |||
1934 | // check modifiers & rounding |
||
1935 | if (i->join || i->lanes != 0xf || i->exit) |
||
1936 | return 8; |
||
1937 | if (i->op == OP_MUL && i->rnd != ROUND_N) |
||
1938 | return 8; |
||
1939 | |||
1940 | if (i->asTex()) |
||
1941 | return 8; // TODO: short tex encoding |
||
1942 | |||
1943 | // check constraints on short MAD |
||
1944 | if (info.srcNr >= 2 && i->srcExists(2)) { |
||
1945 | if (!i->defExists(0) || !isFloatType(i->dType) || |
||
1946 | i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) |
||
1947 | return 8; |
||
1948 | } |
||
1949 | |||
1950 | return info.minEncSize; |
||
1951 | } |
||
1952 | |||
1953 | // Change the encoding size of an instruction after BBs have been scheduled. |
||
1954 | static void |
||
1955 | makeInstructionLong(Instruction *insn) |
||
1956 | { |
||
1957 | if (insn->encSize == 8) |
||
1958 | return; |
||
1959 | Function *fn = insn->bb->getFunction(); |
||
1960 | int n = 0; |
||
1961 | int adj = 4; |
||
1962 | |||
1963 | for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next); |
||
1964 | |||
1965 | if (n & 1) { |
||
1966 | adj = 8; |
||
1967 | insn->next->encSize = 8; |
||
1968 | } else |
||
1969 | if (insn->prev && insn->prev->encSize == 4) { |
||
1970 | adj = 8; |
||
1971 | insn->prev->encSize = 8; |
||
1972 | } |
||
1973 | insn->encSize = 8; |
||
1974 | |||
1975 | for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) { |
||
1976 | fn->bbArray[i]->binPos += 4; |
||
1977 | } |
||
1978 | fn->binSize += adj; |
||
1979 | insn->bb->binSize += adj; |
||
1980 | } |
||
1981 | |||
1982 | static bool |
||
1983 | trySetExitModifier(Instruction *insn) |
||
1984 | { |
||
1985 | if (insn->op == OP_DISCARD || |
||
1986 | insn->op == OP_QUADON || |
||
1987 | insn->op == OP_QUADPOP) |
||
1988 | return false; |
||
1989 | for (int s = 0; insn->srcExists(s); ++s) |
||
1990 | if (insn->src(s).getFile() == FILE_IMMEDIATE) |
||
1991 | return false; |
||
1992 | if (insn->asFlow()) { |
||
1993 | if (insn->op == OP_CALL) // side effects ! |
||
1994 | return false; |
||
1995 | if (insn->getPredicate()) // cannot do conditional exit (or can we ?) |
||
1996 | return false; |
||
1997 | insn->op = OP_EXIT; |
||
1998 | } |
||
1999 | insn->exit = 1; |
||
2000 | makeInstructionLong(insn); |
||
2001 | return true; |
||
2002 | } |
||
2003 | |||
2004 | static void |
||
2005 | replaceExitWithModifier(Function *func) |
||
2006 | { |
||
2007 | BasicBlock *epilogue = BasicBlock::get(func->cfgExit); |
||
2008 | |||
2009 | if (!epilogue->getExit() || |
||
2010 | epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT |
||
2011 | return; |
||
2012 | |||
2013 | if (epilogue->getEntry()->op != OP_EXIT) { |
||
2014 | Instruction *insn = epilogue->getExit()->prev; |
||
2015 | if (!insn || !trySetExitModifier(insn)) |
||
2016 | return; |
||
2017 | insn->exit = 1; |
||
2018 | } else { |
||
2019 | for (Graph::EdgeIterator ei = func->cfgExit->incident(); |
||
2020 | !ei.end(); ei.next()) { |
||
2021 | BasicBlock *bb = BasicBlock::get(ei.getNode()); |
||
2022 | Instruction *i = bb->getExit(); |
||
2023 | |||
2024 | if (!i || !trySetExitModifier(i)) |
||
2025 | return; |
||
2026 | } |
||
2027 | } |
||
2028 | epilogue->binSize -= 8; |
||
2029 | func->binSize -= 8; |
||
2030 | delete_Instruction(func->getProgram(), epilogue->getExit()); |
||
2031 | } |
||
2032 | |||
2033 | void |
||
2034 | CodeEmitterNV50::prepareEmission(Function *func) |
||
2035 | { |
||
2036 | CodeEmitter::prepareEmission(func); |
||
2037 | |||
2038 | replaceExitWithModifier(func); |
||
2039 | } |
||
2040 | |||
2041 | CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : |
||
2042 | CodeEmitter(target), targNV50(target) |
||
2043 | { |
||
2044 | targ = target; // specialized |
||
2045 | code = NULL; |
||
2046 | codeSize = codeSizeLimit = 0; |
||
2047 | relocInfo = NULL; |
||
2048 | } |
||
2049 | |||
2050 | CodeEmitter * |
||
2051 | TargetNV50::getCodeEmitter(Program::Type type) |
||
2052 | { |
||
2053 | CodeEmitterNV50 *emit = new CodeEmitterNV50(this); |
||
2054 | emit->setProgramType(type); |
||
2055 | return emit; |
||
2056 | } |
||
2057 | |||
2058 | } // namespace nv50_ir><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>=>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>=>><>><>><>><>><>><>><>><>><>><>><>><>><>=>=>><>><>><>><>><>=>><>><>><>><>><>><>><>><>>><>><>><>>><>><>>><>><>><>><>>><>=>><>><>><>><>><>><>><>=>><>>=>=>=>><>><> |