Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Mesa 3-D graphics library
3
 *
4
 * Copyright (C) 2012-2013 LunarG, Inc.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Chia-I Wu 
26
 */
27
 
28
#include "tgsi/tgsi_parse.h"
29
#include "tgsi/tgsi_info.h"
30
#include "tgsi/tgsi_strings.h"
31
#include "util/u_hash_table.h"
32
#include "toy_helpers.h"
33
#include "toy_tgsi.h"
34
 
35
/* map TGSI opcode to GEN opcode 1-to-1 */
36
static const struct {
37
   int opcode;
38
   int num_dst;
39
   int num_src;
40
} aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
41
   [TGSI_OPCODE_ARL]          = { GEN6_OPCODE_RNDD,                1, 1 },
42
   [TGSI_OPCODE_MOV]          = { GEN6_OPCODE_MOV,                 1, 1 },
43
   [TGSI_OPCODE_RCP]          = { TOY_OPCODE_INV,                 1, 1 },
44
   [TGSI_OPCODE_RSQ]          = { TOY_OPCODE_RSQ,                 1, 1 },
45
   [TGSI_OPCODE_MUL]          = { GEN6_OPCODE_MUL,                 1, 2 },
46
   [TGSI_OPCODE_ADD]          = { GEN6_OPCODE_ADD,                 1, 2 },
47
   [TGSI_OPCODE_DP3]          = { GEN6_OPCODE_DP3,                 1, 2 },
48
   [TGSI_OPCODE_DP4]          = { GEN6_OPCODE_DP4,                 1, 2 },
49
   [TGSI_OPCODE_MIN]          = { GEN6_OPCODE_SEL,                 1, 2 },
50
   [TGSI_OPCODE_MAX]          = { GEN6_OPCODE_SEL,                 1, 2 },
51
   /* a later pass will move src[2] to accumulator */
52
   [TGSI_OPCODE_MAD]          = { GEN6_OPCODE_MAC,                 1, 3 },
53
   [TGSI_OPCODE_SUB]          = { GEN6_OPCODE_ADD,                 1, 2 },
54
   [TGSI_OPCODE_SQRT]         = { TOY_OPCODE_SQRT,                1, 1 },
55
   [TGSI_OPCODE_FRC]          = { GEN6_OPCODE_FRC,                 1, 1 },
56
   [TGSI_OPCODE_FLR]          = { GEN6_OPCODE_RNDD,                1, 1 },
57
   [TGSI_OPCODE_ROUND]        = { GEN6_OPCODE_RNDE,                1, 1 },
58
   [TGSI_OPCODE_EX2]          = { TOY_OPCODE_EXP,                 1, 1 },
59
   [TGSI_OPCODE_LG2]          = { TOY_OPCODE_LOG,                 1, 1 },
60
   [TGSI_OPCODE_POW]          = { TOY_OPCODE_POW,                 1, 2 },
61
   [TGSI_OPCODE_ABS]          = { GEN6_OPCODE_MOV,                 1, 1 },
62
   [TGSI_OPCODE_DPH]          = { GEN6_OPCODE_DPH,                 1, 2 },
63
   [TGSI_OPCODE_COS]          = { TOY_OPCODE_COS,                 1, 1 },
64
   [TGSI_OPCODE_KILL]         = { TOY_OPCODE_KIL,                 0, 0 },
65
   [TGSI_OPCODE_SIN]          = { TOY_OPCODE_SIN,                 1, 1 },
66
   [TGSI_OPCODE_ARR]          = { GEN6_OPCODE_RNDZ,                1, 1 },
67
   [TGSI_OPCODE_DP2]          = { GEN6_OPCODE_DP2,                 1, 2 },
68
   [TGSI_OPCODE_IF]           = { GEN6_OPCODE_IF,                  0, 1 },
69
   [TGSI_OPCODE_UIF]          = { GEN6_OPCODE_IF,                  0, 1 },
70
   [TGSI_OPCODE_ELSE]         = { GEN6_OPCODE_ELSE,                0, 0 },
71
   [TGSI_OPCODE_ENDIF]        = { GEN6_OPCODE_ENDIF,               0, 0 },
72
   [TGSI_OPCODE_I2F]          = { GEN6_OPCODE_MOV,                 1, 1 },
73
   [TGSI_OPCODE_NOT]          = { GEN6_OPCODE_NOT,                 1, 1 },
74
   [TGSI_OPCODE_TRUNC]        = { GEN6_OPCODE_RNDZ,                1, 1 },
75
   [TGSI_OPCODE_SHL]          = { GEN6_OPCODE_SHL,                 1, 2 },
76
   [TGSI_OPCODE_AND]          = { GEN6_OPCODE_AND,                 1, 2 },
77
   [TGSI_OPCODE_OR]           = { GEN6_OPCODE_OR,                  1, 2 },
78
   [TGSI_OPCODE_MOD]          = { TOY_OPCODE_INT_DIV_REMAINDER,   1, 2 },
79
   [TGSI_OPCODE_XOR]          = { GEN6_OPCODE_XOR,                 1, 2 },
80
   [TGSI_OPCODE_EMIT]         = { TOY_OPCODE_EMIT,                0, 0 },
81
   [TGSI_OPCODE_ENDPRIM]      = { TOY_OPCODE_ENDPRIM,             0, 0 },
82
   [TGSI_OPCODE_NOP]          = { GEN6_OPCODE_NOP,                 0, 0 },
83
   [TGSI_OPCODE_KILL_IF]      = { TOY_OPCODE_KIL,                 0, 1 },
84
   [TGSI_OPCODE_END]          = { GEN6_OPCODE_NOP,                 0, 0 },
85
   [TGSI_OPCODE_F2I]          = { GEN6_OPCODE_MOV,                 1, 1 },
86
   [TGSI_OPCODE_IDIV]         = { TOY_OPCODE_INT_DIV_QUOTIENT,    1, 2 },
87
   [TGSI_OPCODE_IMAX]         = { GEN6_OPCODE_SEL,                 1, 2 },
88
   [TGSI_OPCODE_IMIN]         = { GEN6_OPCODE_SEL,                 1, 2 },
89
   [TGSI_OPCODE_INEG]         = { GEN6_OPCODE_MOV,                 1, 1 },
90
   [TGSI_OPCODE_ISHR]         = { GEN6_OPCODE_ASR,                 1, 2 },
91
   [TGSI_OPCODE_F2U]          = { GEN6_OPCODE_MOV,                 1, 1 },
92
   [TGSI_OPCODE_U2F]          = { GEN6_OPCODE_MOV,                 1, 1 },
93
   [TGSI_OPCODE_UADD]         = { GEN6_OPCODE_ADD,                 1, 2 },
94
   [TGSI_OPCODE_UDIV]         = { TOY_OPCODE_INT_DIV_QUOTIENT,    1, 2 },
95
   /* a later pass will move src[2] to accumulator */
96
   [TGSI_OPCODE_UMAD]         = { GEN6_OPCODE_MAC,                 1, 3 },
97
   [TGSI_OPCODE_UMAX]         = { GEN6_OPCODE_SEL,                 1, 2 },
98
   [TGSI_OPCODE_UMIN]         = { GEN6_OPCODE_SEL,                 1, 2 },
99
   [TGSI_OPCODE_UMOD]         = { TOY_OPCODE_INT_DIV_REMAINDER,   1, 2 },
100
   [TGSI_OPCODE_UMUL]         = { GEN6_OPCODE_MUL,                 1, 2 },
101
   [TGSI_OPCODE_USHR]         = { GEN6_OPCODE_SHR,                 1, 2 },
102
   [TGSI_OPCODE_UARL]         = { GEN6_OPCODE_MOV,                 1, 1 },
103
   [TGSI_OPCODE_IABS]         = { GEN6_OPCODE_MOV,                 1, 1 },
104
};
105
 
106
static void
107
aos_simple(struct toy_compiler *tc,
108
           const struct tgsi_full_instruction *tgsi_inst,
109
           struct toy_dst *dst,
110
           struct toy_src *src)
111
{
112
   struct toy_inst *inst;
113
   int opcode;
114
   int cond_modifier = GEN6_COND_NONE;
115
   int num_dst = tgsi_inst->Instruction.NumDstRegs;
116
   int num_src = tgsi_inst->Instruction.NumSrcRegs;
117
   int i;
118
 
119
   opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
120
   assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
121
   assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
122
   if (!opcode) {
123
      assert(!"invalid aos_simple() call");
124
      return;
125
   }
126
 
127
   /* no need to emit nop */
128
   if (opcode == GEN6_OPCODE_NOP)
129
      return;
130
 
131
   inst = tc_add(tc);
132
   if (!inst)
133
      return;
134
 
135
   inst->opcode = opcode;
136
 
137
   switch (tgsi_inst->Instruction.Opcode) {
138
   case TGSI_OPCODE_MIN:
139
   case TGSI_OPCODE_IMIN:
140
   case TGSI_OPCODE_UMIN:
141
      cond_modifier = GEN6_COND_L;
142
      break;
143
   case TGSI_OPCODE_MAX:
144
   case TGSI_OPCODE_IMAX:
145
   case TGSI_OPCODE_UMAX:
146
      cond_modifier = GEN6_COND_GE;
147
      break;
148
   case TGSI_OPCODE_SUB:
149
      src[1] = tsrc_negate(src[1]);
150
      break;
151
   case TGSI_OPCODE_ABS:
152
   case TGSI_OPCODE_IABS:
153
      src[0] = tsrc_absolute(src[0]);
154
      break;
155
   case TGSI_OPCODE_IF:
156
      cond_modifier = GEN6_COND_NZ;
157
      num_src = 2;
158
      assert(src[0].type == TOY_TYPE_F);
159
      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
160
      src[1] = tsrc_imm_f(0.0f);
161
      break;
162
   case TGSI_OPCODE_UIF:
163
      cond_modifier = GEN6_COND_NZ;
164
      num_src = 2;
165
      assert(src[0].type == TOY_TYPE_UD);
166
      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
167
      src[1] = tsrc_imm_d(0);
168
      break;
169
   case TGSI_OPCODE_INEG:
170
      src[0] = tsrc_negate(src[0]);
171
      break;
172
   case TGSI_OPCODE_RCP:
173
   case TGSI_OPCODE_RSQ:
174
   case TGSI_OPCODE_EX2:
175
   case TGSI_OPCODE_LG2:
176
   case TGSI_OPCODE_COS:
177
   case TGSI_OPCODE_SIN:
178
      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
179
      break;
180
   case TGSI_OPCODE_POW:
181
      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
182
      src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
183
      break;
184
   }
185
 
186
   inst->cond_modifier = cond_modifier;
187
 
188
   if (num_dst) {
189
      assert(num_dst == 1);
190
      inst->dst = dst[0];
191
   }
192
 
193
   assert(num_src <= Elements(inst->src));
194
   for (i = 0; i < num_src; i++)
195
      inst->src[i] = src[i];
196
}
197
 
198
static void
199
aos_set_on_cond(struct toy_compiler *tc,
200
                const struct tgsi_full_instruction *tgsi_inst,
201
                struct toy_dst *dst,
202
                struct toy_src *src)
203
{
204
   struct toy_inst *inst;
205
   int cond;
206
   struct toy_src zero, one;
207
 
208
   switch (tgsi_inst->Instruction.Opcode) {
209
   case TGSI_OPCODE_SLT:
210
   case TGSI_OPCODE_ISLT:
211
   case TGSI_OPCODE_USLT:
212
   case TGSI_OPCODE_FSLT:
213
      cond = GEN6_COND_L;
214
      break;
215
   case TGSI_OPCODE_SGE:
216
   case TGSI_OPCODE_ISGE:
217
   case TGSI_OPCODE_USGE:
218
   case TGSI_OPCODE_FSGE:
219
      cond = GEN6_COND_GE;
220
      break;
221
   case TGSI_OPCODE_SEQ:
222
   case TGSI_OPCODE_USEQ:
223
   case TGSI_OPCODE_FSEQ:
224
      cond = GEN6_COND_Z;
225
      break;
226
   case TGSI_OPCODE_SGT:
227
      cond = GEN6_COND_G;
228
      break;
229
   case TGSI_OPCODE_SLE:
230
      cond = GEN6_COND_LE;
231
      break;
232
   case TGSI_OPCODE_SNE:
233
   case TGSI_OPCODE_USNE:
234
   case TGSI_OPCODE_FSNE:
235
      cond = GEN6_COND_NZ;
236
      break;
237
   default:
238
      assert(!"invalid aos_set_on_cond() call");
239
      return;
240
   }
241
 
242
   /* note that for integer versions, all bits are set */
243
   switch (dst[0].type) {
244
   case TOY_TYPE_F:
245
   default:
246
      zero = tsrc_imm_f(0.0f);
247
      one = tsrc_imm_f(1.0f);
248
      break;
249
   case TOY_TYPE_D:
250
      zero = tsrc_imm_d(0);
251
      one = tsrc_imm_d(-1);
252
      break;
253
   case TOY_TYPE_UD:
254
      zero = tsrc_imm_ud(0);
255
      one = tsrc_imm_ud(~0);
256
      break;
257
   }
258
 
259
   tc_MOV(tc, dst[0], zero);
260
   tc_CMP(tc, tdst_null(), src[0], src[1], cond);
261
   inst = tc_MOV(tc, dst[0], one);
262
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
263
}
264
 
265
static void
266
aos_compare(struct toy_compiler *tc,
267
            const struct tgsi_full_instruction *tgsi_inst,
268
            struct toy_dst *dst,
269
            struct toy_src *src)
270
{
271
   struct toy_inst *inst;
272
   struct toy_src zero;
273
 
274
   switch (tgsi_inst->Instruction.Opcode) {
275
   case TGSI_OPCODE_CMP:
276
      zero = tsrc_imm_f(0.0f);
277
      break;
278
   case TGSI_OPCODE_UCMP:
279
      zero = tsrc_imm_ud(0);
280
      break;
281
   default:
282
      assert(!"invalid aos_compare() call");
283
      return;
284
   }
285
 
286
   tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
287
   inst = tc_SEL(tc, dst[0], src[1], src[2], GEN6_COND_NONE);
288
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
289
}
290
 
291
static void
292
aos_set_sign(struct toy_compiler *tc,
293
             const struct tgsi_full_instruction *tgsi_inst,
294
             struct toy_dst *dst,
295
             struct toy_src *src)
296
{
297
   struct toy_inst *inst;
298
   struct toy_src zero, one, neg_one;
299
 
300
   switch (tgsi_inst->Instruction.Opcode) {
301
   case TGSI_OPCODE_SSG:
302
      zero = tsrc_imm_f(0.0f);
303
      one = tsrc_imm_f(1.0f);
304
      neg_one = tsrc_imm_f(-1.0f);
305
      break;
306
   case TGSI_OPCODE_ISSG:
307
      zero = tsrc_imm_d(0);
308
      one = tsrc_imm_d(1);
309
      neg_one = tsrc_imm_d(-1);
310
      break;
311
   default:
312
      assert(!"invalid aos_set_sign() call");
313
      return;
314
   }
315
 
316
   tc_MOV(tc, dst[0], zero);
317
 
318
   tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_G);
319
   inst = tc_MOV(tc, dst[0], one);
320
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
321
 
322
   tc_CMP(tc, tdst_null(), src[0], zero, GEN6_COND_L);
323
   inst = tc_MOV(tc, dst[0], neg_one);
324
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
325
}
326
 
327
static void
328
aos_tex(struct toy_compiler *tc,
329
        const struct tgsi_full_instruction *tgsi_inst,
330
        struct toy_dst *dst,
331
        struct toy_src *src)
332
{
333
   struct toy_inst *inst;
334
   enum toy_opcode opcode;
335
   int i;
336
 
337
   switch (tgsi_inst->Instruction.Opcode) {
338
   case TGSI_OPCODE_TEX:
339
      opcode = TOY_OPCODE_TGSI_TEX;
340
      break;
341
   case TGSI_OPCODE_TXD:
342
      opcode = TOY_OPCODE_TGSI_TXD;
343
      break;
344
   case TGSI_OPCODE_TXP:
345
      opcode = TOY_OPCODE_TGSI_TXP;
346
      break;
347
   case TGSI_OPCODE_TXB:
348
      opcode = TOY_OPCODE_TGSI_TXB;
349
      break;
350
   case TGSI_OPCODE_TXL:
351
      opcode = TOY_OPCODE_TGSI_TXL;
352
      break;
353
   case TGSI_OPCODE_TXF:
354
      opcode = TOY_OPCODE_TGSI_TXF;
355
      break;
356
   case TGSI_OPCODE_TXQ:
357
      opcode = TOY_OPCODE_TGSI_TXQ;
358
      break;
359
   case TGSI_OPCODE_TXQ_LZ:
360
      opcode = TOY_OPCODE_TGSI_TXQ_LZ;
361
      break;
362
   case TGSI_OPCODE_TEX2:
363
      opcode = TOY_OPCODE_TGSI_TEX2;
364
      break;
365
   case TGSI_OPCODE_TXB2:
366
      opcode = TOY_OPCODE_TGSI_TXB2;
367
      break;
368
   case TGSI_OPCODE_TXL2:
369
      opcode = TOY_OPCODE_TGSI_TXL2;
370
      break;
371
   default:
372
      assert(!"unsupported texturing opcode");
373
      return;
374
      break;
375
   }
376
 
377
   assert(tgsi_inst->Instruction.Texture);
378
 
379
   inst = tc_add(tc);
380
   inst->opcode = opcode;
381
   inst->tex.target = tgsi_inst->Texture.Texture;
382
 
383
   assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
384
   assert(tgsi_inst->Instruction.NumDstRegs == 1);
385
 
386
   inst->dst = dst[0];
387
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
388
      inst->src[i] = src[i];
389
 
390
   for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
391
      tc_fail(tc, "texelFetchOffset unsupported");
392
}
393
 
394
static void
395
aos_sample(struct toy_compiler *tc,
396
           const struct tgsi_full_instruction *tgsi_inst,
397
           struct toy_dst *dst,
398
           struct toy_src *src)
399
{
400
   struct toy_inst *inst;
401
   enum toy_opcode opcode;
402
   int i;
403
 
404
   assert(!"sampling untested");
405
 
406
   switch (tgsi_inst->Instruction.Opcode) {
407
   case TGSI_OPCODE_SAMPLE:
408
      opcode = TOY_OPCODE_TGSI_SAMPLE;
409
      break;
410
   case TGSI_OPCODE_SAMPLE_I:
411
      opcode = TOY_OPCODE_TGSI_SAMPLE_I;
412
      break;
413
   case TGSI_OPCODE_SAMPLE_I_MS:
414
      opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
415
      break;
416
   case TGSI_OPCODE_SAMPLE_B:
417
      opcode = TOY_OPCODE_TGSI_SAMPLE_B;
418
      break;
419
   case TGSI_OPCODE_SAMPLE_C:
420
      opcode = TOY_OPCODE_TGSI_SAMPLE_C;
421
      break;
422
   case TGSI_OPCODE_SAMPLE_C_LZ:
423
      opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
424
      break;
425
   case TGSI_OPCODE_SAMPLE_D:
426
      opcode = TOY_OPCODE_TGSI_SAMPLE_D;
427
      break;
428
   case TGSI_OPCODE_SAMPLE_L:
429
      opcode = TOY_OPCODE_TGSI_SAMPLE_L;
430
      break;
431
   case TGSI_OPCODE_GATHER4:
432
      opcode = TOY_OPCODE_TGSI_GATHER4;
433
      break;
434
   case TGSI_OPCODE_SVIEWINFO:
435
      opcode = TOY_OPCODE_TGSI_SVIEWINFO;
436
      break;
437
   case TGSI_OPCODE_SAMPLE_POS:
438
      opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
439
      break;
440
   case TGSI_OPCODE_SAMPLE_INFO:
441
      opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
442
      break;
443
   default:
444
      assert(!"unsupported sampling opcode");
445
      return;
446
      break;
447
   }
448
 
449
   inst = tc_add(tc);
450
   inst->opcode = opcode;
451
 
452
   assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
453
   assert(tgsi_inst->Instruction.NumDstRegs == 1);
454
 
455
   inst->dst = dst[0];
456
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
457
      inst->src[i] = src[i];
458
}
459
 
460
static void
461
aos_LIT(struct toy_compiler *tc,
462
        const struct tgsi_full_instruction *tgsi_inst,
463
        struct toy_dst *dst,
464
        struct toy_src *src)
465
{
466
   struct toy_inst *inst;
467
 
468
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
469
 
470
   if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
471
      return;
472
 
473
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
474
 
475
   tc_CMP(tc, tdst_null(),
476
         tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
477
         tsrc_imm_f(0.0f),
478
         GEN6_COND_G);
479
 
480
   inst = tc_MOV(tc,
481
         tdst_writemask(dst[0], TOY_WRITEMASK_Y),
482
         tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
483
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
484
 
485
   /* clamp W to (-128, 128)? */
486
   inst = tc_POW(tc,
487
         tdst_writemask(dst[0], TOY_WRITEMASK_Z),
488
         tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
489
         tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
490
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
491
}
492
 
493
static void
494
aos_EXP(struct toy_compiler *tc,
495
        const struct tgsi_full_instruction *tgsi_inst,
496
        struct toy_dst *dst,
497
        struct toy_src *src)
498
{
499
   struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
500
 
501
   if (dst[0].writemask & TOY_WRITEMASK_X) {
502
      struct toy_dst tmp =
503
         tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
504
 
505
      tc_RNDD(tc, tmp, src0);
506
 
507
      /* construct the floating point number manually */
508
      tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
509
      tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
510
            tsrc_from(tmp), tsrc_imm_d(23));
511
   }
512
 
513
   tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
514
   tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
515
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
516
}
517
 
518
static void
519
aos_LOG(struct toy_compiler *tc,
520
        const struct tgsi_full_instruction *tgsi_inst,
521
        struct toy_dst *dst,
522
        struct toy_src *src)
523
{
524
   struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
525
 
526
   if (dst[0].writemask & TOY_WRITEMASK_XY) {
527
      struct toy_dst tmp;
528
 
529
      tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
530
 
531
      /* exponent */
532
      tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
533
      tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
534
            tsrc_from(tmp), tsrc_imm_d(-127));
535
 
536
      /* mantissa  */
537
      tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
538
      tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
539
            tsrc_from(tmp), tsrc_imm_d(127 << 23));
540
   }
541
 
542
   tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
543
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
544
}
545
 
546
static void
547
aos_DST(struct toy_compiler *tc,
548
        const struct tgsi_full_instruction *tgsi_inst,
549
        struct toy_dst *dst,
550
        struct toy_src *src)
551
{
552
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
553
   tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
554
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
555
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
556
}
557
 
558
static void
559
aos_LRP(struct toy_compiler *tc,
560
        const struct tgsi_full_instruction *tgsi_inst,
561
        struct toy_dst *dst,
562
        struct toy_src *src)
563
{
564
   struct toy_dst tmp = tc_alloc_tmp(tc);
565
 
566
   tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
567
   tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
568
   tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
569
}
570
 
571
static void
572
aos_DP2A(struct toy_compiler *tc,
573
         const struct tgsi_full_instruction *tgsi_inst,
574
         struct toy_dst *dst,
575
         struct toy_src *src)
576
{
577
   struct toy_dst tmp = tc_alloc_tmp(tc);
578
 
579
   assert(!"DP2A untested");
580
 
581
   tc_DP2(tc, tmp, src[0], src[1]);
582
   tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
583
}
584
 
585
static void
586
aos_CLAMP(struct toy_compiler *tc,
587
          const struct tgsi_full_instruction *tgsi_inst,
588
          struct toy_dst *dst,
589
          struct toy_src *src)
590
{
591
   assert(!"CLAMP untested");
592
 
593
   tc_SEL(tc, dst[0], src[0], src[1], GEN6_COND_GE);
594
   tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), GEN6_COND_L);
595
}
596
 
597
static void
598
aos_XPD(struct toy_compiler *tc,
599
        const struct tgsi_full_instruction *tgsi_inst,
600
        struct toy_dst *dst,
601
        struct toy_src *src)
602
{
603
   struct toy_dst tmp = tc_alloc_tmp(tc);
604
 
605
   tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
606
         tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
607
                              TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
608
         tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
609
                              TOY_SWIZZLE_X, TOY_SWIZZLE_W));
610
 
611
   tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
612
         tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
613
                              TOY_SWIZZLE_X, TOY_SWIZZLE_W),
614
         tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
615
                              TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
616
         tsrc_negate(tsrc_from(tmp)));
617
 
618
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
619
         tsrc_imm_f(1.0f));
620
}
621
 
622
static void
623
aos_PK2H(struct toy_compiler *tc,
624
         const struct tgsi_full_instruction *tgsi_inst,
625
         struct toy_dst *dst,
626
         struct toy_src *src)
627
{
628
   const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
629
   const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
630
   struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
631
 
632
   assert(!"PK2H untested");
633
 
634
   tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
635
   tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
636
}
637
 
638
static void
639
aos_UP2H(struct toy_compiler *tc,
640
         const struct tgsi_full_instruction *tgsi_inst,
641
         struct toy_dst *dst,
642
         struct toy_src *src)
643
{
644
   assert(!"UP2H untested");
645
 
646
   tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
647
         tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
648
   tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
649
         tsrc_ud(src[0]), tsrc_imm_ud(16));
650
}
651
 
652
static void
653
aos_SCS(struct toy_compiler *tc,
654
        const struct tgsi_full_instruction *tgsi_inst,
655
        struct toy_dst *dst,
656
        struct toy_src *src)
657
{
658
   assert(!"SCS untested");
659
 
660
   tc_add1(tc, TOY_OPCODE_COS,
661
         tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
662
 
663
   tc_add1(tc, TOY_OPCODE_SIN,
664
         tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
665
 
666
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
667
   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
668
}
669
 
670
static void
671
aos_DIV(struct toy_compiler *tc,
672
        const struct tgsi_full_instruction *tgsi_inst,
673
        struct toy_dst *dst,
674
        struct toy_src *src)
675
{
676
   struct toy_dst tmp = tc_alloc_tmp(tc);
677
 
678
   assert(!"DIV untested");
679
 
680
   tc_INV(tc, tmp, src[1]);
681
   tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
682
}
683
 
684
static void
685
aos_BRK(struct toy_compiler *tc,
686
        const struct tgsi_full_instruction *tgsi_inst,
687
        struct toy_dst *dst,
688
        struct toy_src *src)
689
{
690
   tc_add0(tc, GEN6_OPCODE_BREAK);
691
}
692
 
693
static void
694
aos_CEIL(struct toy_compiler *tc,
695
         const struct tgsi_full_instruction *tgsi_inst,
696
         struct toy_dst *dst,
697
         struct toy_src *src)
698
{
699
   struct toy_dst tmp = tc_alloc_tmp(tc);
700
 
701
   tc_RNDD(tc, tmp, tsrc_negate(src[0]));
702
   tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
703
}
704
 
705
static void
706
aos_SAD(struct toy_compiler *tc,
707
        const struct tgsi_full_instruction *tgsi_inst,
708
        struct toy_dst *dst,
709
        struct toy_src *src)
710
{
711
   struct toy_dst tmp = tc_alloc_tmp(tc);
712
 
713
   assert(!"SAD untested");
714
 
715
   tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
716
   tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
717
}
718
 
719
static void
720
aos_CONT(struct toy_compiler *tc,
721
         const struct tgsi_full_instruction *tgsi_inst,
722
         struct toy_dst *dst,
723
         struct toy_src *src)
724
{
725
   tc_add0(tc, GEN6_OPCODE_CONT);
726
}
727
 
728
static void
729
aos_BGNLOOP(struct toy_compiler *tc,
730
            const struct tgsi_full_instruction *tgsi_inst,
731
            struct toy_dst *dst,
732
            struct toy_src *src)
733
{
734
   struct toy_inst *inst;
735
 
736
   inst = tc_add0(tc, TOY_OPCODE_DO);
737
   /* this is just a marker */
738
   inst->marker = true;
739
}
740
 
741
static void
742
aos_ENDLOOP(struct toy_compiler *tc,
743
            const struct tgsi_full_instruction *tgsi_inst,
744
            struct toy_dst *dst,
745
            struct toy_src *src)
746
{
747
   tc_add0(tc, GEN6_OPCODE_WHILE);
748
}
749
 
750
static void
751
aos_unsupported(struct toy_compiler *tc,
752
                const struct tgsi_full_instruction *tgsi_inst,
753
                struct toy_dst *dst,
754
                struct toy_src *src)
755
{
756
   const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
757
 
758
   ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
759
 
760
   tc_fail(tc, "unsupported TGSI instruction");
761
}
762
 
763
static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
764
   [TGSI_OPCODE_ARL]          = aos_simple,
765
   [TGSI_OPCODE_MOV]          = aos_simple,
766
   [TGSI_OPCODE_LIT]          = aos_LIT,
767
   [TGSI_OPCODE_RCP]          = aos_simple,
768
   [TGSI_OPCODE_RSQ]          = aos_simple,
769
   [TGSI_OPCODE_EXP]          = aos_EXP,
770
   [TGSI_OPCODE_LOG]          = aos_LOG,
771
   [TGSI_OPCODE_MUL]          = aos_simple,
772
   [TGSI_OPCODE_ADD]          = aos_simple,
773
   [TGSI_OPCODE_DP3]          = aos_simple,
774
   [TGSI_OPCODE_DP4]          = aos_simple,
775
   [TGSI_OPCODE_DST]          = aos_DST,
776
   [TGSI_OPCODE_MIN]          = aos_simple,
777
   [TGSI_OPCODE_MAX]          = aos_simple,
778
   [TGSI_OPCODE_SLT]          = aos_set_on_cond,
779
   [TGSI_OPCODE_SGE]          = aos_set_on_cond,
780
   [TGSI_OPCODE_MAD]          = aos_simple,
781
   [TGSI_OPCODE_SUB]          = aos_simple,
782
   [TGSI_OPCODE_LRP]          = aos_LRP,
783
   [TGSI_OPCODE_SQRT]         = aos_simple,
784
   [TGSI_OPCODE_DP2A]         = aos_DP2A,
785
   [TGSI_OPCODE_FRC]          = aos_simple,
786
   [TGSI_OPCODE_CLAMP]        = aos_CLAMP,
787
   [TGSI_OPCODE_FLR]          = aos_simple,
788
   [TGSI_OPCODE_ROUND]        = aos_simple,
789
   [TGSI_OPCODE_EX2]          = aos_simple,
790
   [TGSI_OPCODE_LG2]          = aos_simple,
791
   [TGSI_OPCODE_POW]          = aos_simple,
792
   [TGSI_OPCODE_XPD]          = aos_XPD,
793
   [TGSI_OPCODE_ABS]          = aos_simple,
794
   [TGSI_OPCODE_DPH]          = aos_simple,
795
   [TGSI_OPCODE_COS]          = aos_simple,
796
   [TGSI_OPCODE_DDX]          = aos_unsupported,
797
   [TGSI_OPCODE_DDY]          = aos_unsupported,
798
   [TGSI_OPCODE_KILL]         = aos_simple,
799
   [TGSI_OPCODE_PK2H]         = aos_PK2H,
800
   [TGSI_OPCODE_PK2US]        = aos_unsupported,
801
   [TGSI_OPCODE_PK4B]         = aos_unsupported,
802
   [TGSI_OPCODE_PK4UB]        = aos_unsupported,
803
   [TGSI_OPCODE_SEQ]          = aos_set_on_cond,
804
   [TGSI_OPCODE_SGT]          = aos_set_on_cond,
805
   [TGSI_OPCODE_SIN]          = aos_simple,
806
   [TGSI_OPCODE_SLE]          = aos_set_on_cond,
807
   [TGSI_OPCODE_SNE]          = aos_set_on_cond,
808
   [TGSI_OPCODE_TEX]          = aos_tex,
809
   [TGSI_OPCODE_TXD]          = aos_tex,
810
   [TGSI_OPCODE_TXP]          = aos_tex,
811
   [TGSI_OPCODE_UP2H]         = aos_UP2H,
812
   [TGSI_OPCODE_UP2US]        = aos_unsupported,
813
   [TGSI_OPCODE_UP4B]         = aos_unsupported,
814
   [TGSI_OPCODE_UP4UB]        = aos_unsupported,
815
   [TGSI_OPCODE_ARR]          = aos_simple,
816
   [TGSI_OPCODE_CAL]          = aos_unsupported,
817
   [TGSI_OPCODE_RET]          = aos_unsupported,
818
   [TGSI_OPCODE_SSG]          = aos_set_sign,
819
   [TGSI_OPCODE_CMP]          = aos_compare,
820
   [TGSI_OPCODE_SCS]          = aos_SCS,
821
   [TGSI_OPCODE_TXB]          = aos_tex,
822
   [TGSI_OPCODE_DIV]          = aos_DIV,
823
   [TGSI_OPCODE_DP2]          = aos_simple,
824
   [TGSI_OPCODE_TXL]          = aos_tex,
825
   [TGSI_OPCODE_BRK]          = aos_BRK,
826
   [TGSI_OPCODE_IF]           = aos_simple,
827
   [TGSI_OPCODE_UIF]          = aos_simple,
828
   [TGSI_OPCODE_ELSE]         = aos_simple,
829
   [TGSI_OPCODE_ENDIF]        = aos_simple,
830
   [TGSI_OPCODE_PUSHA]        = aos_unsupported,
831
   [TGSI_OPCODE_POPA]         = aos_unsupported,
832
   [TGSI_OPCODE_CEIL]         = aos_CEIL,
833
   [TGSI_OPCODE_I2F]          = aos_simple,
834
   [TGSI_OPCODE_NOT]          = aos_simple,
835
   [TGSI_OPCODE_TRUNC]        = aos_simple,
836
   [TGSI_OPCODE_SHL]          = aos_simple,
837
   [TGSI_OPCODE_AND]          = aos_simple,
838
   [TGSI_OPCODE_OR]           = aos_simple,
839
   [TGSI_OPCODE_MOD]          = aos_simple,
840
   [TGSI_OPCODE_XOR]          = aos_simple,
841
   [TGSI_OPCODE_SAD]          = aos_SAD,
842
   [TGSI_OPCODE_TXF]          = aos_tex,
843
   [TGSI_OPCODE_TXQ]          = aos_tex,
844
   [TGSI_OPCODE_CONT]         = aos_CONT,
845
   [TGSI_OPCODE_EMIT]         = aos_simple,
846
   [TGSI_OPCODE_ENDPRIM]      = aos_simple,
847
   [TGSI_OPCODE_BGNLOOP]      = aos_BGNLOOP,
848
   [TGSI_OPCODE_BGNSUB]       = aos_unsupported,
849
   [TGSI_OPCODE_ENDLOOP]      = aos_ENDLOOP,
850
   [TGSI_OPCODE_ENDSUB]       = aos_unsupported,
851
   [TGSI_OPCODE_TXQ_LZ]       = aos_tex,
852
   [TGSI_OPCODE_NOP]          = aos_simple,
853
   [TGSI_OPCODE_FSEQ]         = aos_set_on_cond,
854
   [TGSI_OPCODE_FSGE]         = aos_set_on_cond,
855
   [TGSI_OPCODE_FSLT]         = aos_set_on_cond,
856
   [TGSI_OPCODE_FSNE]         = aos_set_on_cond,
857
   [TGSI_OPCODE_CALLNZ]       = aos_unsupported,
858
   [TGSI_OPCODE_BREAKC]       = aos_unsupported,
859
   [TGSI_OPCODE_KILL_IF]      = aos_simple,
860
   [TGSI_OPCODE_END]          = aos_simple,
861
   [TGSI_OPCODE_F2I]          = aos_simple,
862
   [TGSI_OPCODE_IDIV]         = aos_simple,
863
   [TGSI_OPCODE_IMAX]         = aos_simple,
864
   [TGSI_OPCODE_IMIN]         = aos_simple,
865
   [TGSI_OPCODE_INEG]         = aos_simple,
866
   [TGSI_OPCODE_ISGE]         = aos_set_on_cond,
867
   [TGSI_OPCODE_ISHR]         = aos_simple,
868
   [TGSI_OPCODE_ISLT]         = aos_set_on_cond,
869
   [TGSI_OPCODE_F2U]          = aos_simple,
870
   [TGSI_OPCODE_U2F]          = aos_simple,
871
   [TGSI_OPCODE_UADD]         = aos_simple,
872
   [TGSI_OPCODE_UDIV]         = aos_simple,
873
   [TGSI_OPCODE_UMAD]         = aos_simple,
874
   [TGSI_OPCODE_UMAX]         = aos_simple,
875
   [TGSI_OPCODE_UMIN]         = aos_simple,
876
   [TGSI_OPCODE_UMOD]         = aos_simple,
877
   [TGSI_OPCODE_UMUL]         = aos_simple,
878
   [TGSI_OPCODE_USEQ]         = aos_set_on_cond,
879
   [TGSI_OPCODE_USGE]         = aos_set_on_cond,
880
   [TGSI_OPCODE_USHR]         = aos_simple,
881
   [TGSI_OPCODE_USLT]         = aos_set_on_cond,
882
   [TGSI_OPCODE_USNE]         = aos_set_on_cond,
883
   [TGSI_OPCODE_SWITCH]       = aos_unsupported,
884
   [TGSI_OPCODE_CASE]         = aos_unsupported,
885
   [TGSI_OPCODE_DEFAULT]      = aos_unsupported,
886
   [TGSI_OPCODE_ENDSWITCH]    = aos_unsupported,
887
   [TGSI_OPCODE_SAMPLE]       = aos_sample,
888
   [TGSI_OPCODE_SAMPLE_I]     = aos_sample,
889
   [TGSI_OPCODE_SAMPLE_I_MS]  = aos_sample,
890
   [TGSI_OPCODE_SAMPLE_B]     = aos_sample,
891
   [TGSI_OPCODE_SAMPLE_C]     = aos_sample,
892
   [TGSI_OPCODE_SAMPLE_C_LZ]  = aos_sample,
893
   [TGSI_OPCODE_SAMPLE_D]     = aos_sample,
894
   [TGSI_OPCODE_SAMPLE_L]     = aos_sample,
895
   [TGSI_OPCODE_GATHER4]      = aos_sample,
896
   [TGSI_OPCODE_SVIEWINFO]    = aos_sample,
897
   [TGSI_OPCODE_SAMPLE_POS]   = aos_sample,
898
   [TGSI_OPCODE_SAMPLE_INFO]  = aos_sample,
899
   [TGSI_OPCODE_UARL]         = aos_simple,
900
   [TGSI_OPCODE_UCMP]         = aos_compare,
901
   [TGSI_OPCODE_IABS]         = aos_simple,
902
   [TGSI_OPCODE_ISSG]         = aos_set_sign,
903
   [TGSI_OPCODE_LOAD]         = aos_unsupported,
904
   [TGSI_OPCODE_STORE]        = aos_unsupported,
905
   [TGSI_OPCODE_MFENCE]       = aos_unsupported,
906
   [TGSI_OPCODE_LFENCE]       = aos_unsupported,
907
   [TGSI_OPCODE_SFENCE]       = aos_unsupported,
908
   [TGSI_OPCODE_BARRIER]      = aos_unsupported,
909
   [TGSI_OPCODE_ATOMUADD]     = aos_unsupported,
910
   [TGSI_OPCODE_ATOMXCHG]     = aos_unsupported,
911
   [TGSI_OPCODE_ATOMCAS]      = aos_unsupported,
912
   [TGSI_OPCODE_ATOMAND]      = aos_unsupported,
913
   [TGSI_OPCODE_ATOMOR]       = aos_unsupported,
914
   [TGSI_OPCODE_ATOMXOR]      = aos_unsupported,
915
   [TGSI_OPCODE_ATOMUMIN]     = aos_unsupported,
916
   [TGSI_OPCODE_ATOMUMAX]     = aos_unsupported,
917
   [TGSI_OPCODE_ATOMIMIN]     = aos_unsupported,
918
   [TGSI_OPCODE_ATOMIMAX]     = aos_unsupported,
919
   [TGSI_OPCODE_TEX2]         = aos_tex,
920
   [TGSI_OPCODE_TXB2]         = aos_tex,
921
   [TGSI_OPCODE_TXL2]         = aos_tex,
922
};
923
 
924
static void
925
soa_passthrough(struct toy_compiler *tc,
926
                const struct tgsi_full_instruction *tgsi_inst,
927
                struct toy_dst *dst_,
928
                struct toy_src *src_)
929
{
930
   const toy_tgsi_translate translate =
931
      aos_translate_table[tgsi_inst->Instruction.Opcode];
932
 
933
   translate(tc, tgsi_inst, dst_, src_);
934
}
935
 
936
static void
937
soa_per_channel(struct toy_compiler *tc,
938
                const struct tgsi_full_instruction *tgsi_inst,
939
                struct toy_dst *dst_,
940
                struct toy_src *src_)
941
{
942
   struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
943
   struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
944
   int i, ch;
945
 
946
   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
947
      tdst_transpose(dst_[i], dst[i]);
948
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
949
      tsrc_transpose(src_[i], src[i]);
950
 
951
   /* emit the same instruction four times for the four channels */
952
   for (ch = 0; ch < 4; ch++) {
953
      struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
954
      struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
955
 
956
      for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
957
         aos_dst[i] = dst[i][ch];
958
      for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
959
         aos_src[i] = src[i][ch];
960
 
961
      aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
962
            tgsi_inst, aos_dst, aos_src);
963
   }
964
}
965
 
966
static void
967
soa_scalar_replicate(struct toy_compiler *tc,
968
                     const struct tgsi_full_instruction *tgsi_inst,
969
                     struct toy_dst *dst_,
970
                     struct toy_src *src_)
971
{
972
   struct toy_dst dst0[4], tmp;
973
   struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
974
   int opcode, i;
975
 
976
   assert(tgsi_inst->Instruction.NumDstRegs == 1);
977
 
978
   tdst_transpose(dst_[0], dst0);
979
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
980
      struct toy_src tmp[4];
981
 
982
      tsrc_transpose(src_[i], tmp);
983
      /* only the X channels */
984
      srcx[i] = tmp[0];
985
   }
986
 
987
   tmp = tc_alloc_tmp(tc);
988
 
989
   opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
990
   assert(opcode);
991
 
992
   switch (tgsi_inst->Instruction.Opcode) {
993
   case TGSI_OPCODE_RCP:
994
   case TGSI_OPCODE_RSQ:
995
   case TGSI_OPCODE_SQRT:
996
   case TGSI_OPCODE_EX2:
997
   case TGSI_OPCODE_LG2:
998
   case TGSI_OPCODE_COS:
999
   case TGSI_OPCODE_SIN:
1000
      tc_add1(tc, opcode, tmp, srcx[0]);
1001
      break;
1002
   case TGSI_OPCODE_POW:
1003
      tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
1004
      break;
1005
   default:
1006
      assert(!"invalid soa_scalar_replicate() call");
1007
      return;
1008
   }
1009
 
1010
   /* replicate the result */
1011
   for (i = 0; i < 4; i++)
1012
      tc_MOV(tc, dst0[i], tsrc_from(tmp));
1013
}
1014
 
1015
static void
1016
soa_dot_product(struct toy_compiler *tc,
1017
                const struct tgsi_full_instruction *tgsi_inst,
1018
                struct toy_dst *dst_,
1019
                struct toy_src *src_)
1020
{
1021
   struct toy_dst dst0[4], tmp;
1022
   struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
1023
   int i;
1024
 
1025
   tdst_transpose(dst_[0], dst0);
1026
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
1027
      tsrc_transpose(src_[i], src[i]);
1028
 
1029
   tmp = tc_alloc_tmp(tc);
1030
 
1031
   switch (tgsi_inst->Instruction.Opcode) {
1032
   case TGSI_OPCODE_DP2:
1033
      tc_MUL(tc, tmp, src[0][1], src[1][1]);
1034
      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1035
      break;
1036
   case TGSI_OPCODE_DP2A:
1037
      tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
1038
      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1039
      break;
1040
   case TGSI_OPCODE_DP3:
1041
      tc_MUL(tc, tmp, src[0][2], src[1][2]);
1042
      tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1043
      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1044
      break;
1045
   case TGSI_OPCODE_DPH:
1046
      tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
1047
      tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1048
      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1049
      break;
1050
   case TGSI_OPCODE_DP4:
1051
      tc_MUL(tc, tmp, src[0][3], src[1][3]);
1052
      tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
1053
      tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
1054
      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
1055
      break;
1056
   default:
1057
      assert(!"invalid soa_dot_product() call");
1058
      return;
1059
   }
1060
 
1061
   for (i = 0; i < 4; i++)
1062
      tc_MOV(tc, dst0[i], tsrc_from(tmp));
1063
}
1064
 
1065
static void
1066
soa_partial_derivative(struct toy_compiler *tc,
1067
                       const struct tgsi_full_instruction *tgsi_inst,
1068
                       struct toy_dst *dst_,
1069
                       struct toy_src *src_)
1070
{
1071
   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
1072
      tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
1073
   else
1074
      tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
1075
}
1076
 
1077
static void
1078
soa_if(struct toy_compiler *tc,
1079
       const struct tgsi_full_instruction *tgsi_inst,
1080
       struct toy_dst *dst_,
1081
       struct toy_src *src_)
1082
{
1083
   struct toy_src src0[4];
1084
 
1085
   assert(tsrc_is_swizzle1(src_[0]));
1086
   tsrc_transpose(src_[0], src0);
1087
 
1088
   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
1089
      tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_NZ);
1090
   else
1091
      tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), GEN6_COND_NZ);
1092
}
1093
 
1094
static void
1095
soa_LIT(struct toy_compiler *tc,
1096
        const struct tgsi_full_instruction *tgsi_inst,
1097
        struct toy_dst *dst_,
1098
        struct toy_src *src_)
1099
{
1100
   struct toy_inst *inst;
1101
   struct toy_dst dst0[4];
1102
   struct toy_src src0[4];
1103
 
1104
   tdst_transpose(dst_[0], dst0);
1105
   tsrc_transpose(src_[0], src0);
1106
 
1107
   tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1108
   tc_MOV(tc, dst0[1], src0[0]);
1109
   tc_POW(tc, dst0[2], src0[1], src0[3]);
1110
   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1111
 
1112
   /*
1113
    * POW is calculated first because math with pred_ctrl is broken here.
1114
    * But, why?
1115
    */
1116
   tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), GEN6_COND_L);
1117
   inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
1118
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1119
   inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1120
   inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
1121
}
1122
 
1123
static void
1124
soa_EXP(struct toy_compiler *tc,
1125
        const struct tgsi_full_instruction *tgsi_inst,
1126
        struct toy_dst *dst_,
1127
        struct toy_src *src_)
1128
{
1129
   struct toy_dst dst0[4];
1130
   struct toy_src src0[4];
1131
 
1132
   assert(!"SoA EXP untested");
1133
 
1134
   tdst_transpose(dst_[0], dst0);
1135
   tsrc_transpose(src_[0], src0);
1136
 
1137
   if (!tdst_is_null(dst0[0])) {
1138
      struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1139
 
1140
      tc_RNDD(tc, tmp, src0[0]);
1141
 
1142
      /* construct the floating point number manually */
1143
      tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
1144
      tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
1145
   }
1146
 
1147
   tc_FRC(tc, dst0[1], src0[0]);
1148
   tc_EXP(tc, dst0[2], src0[0]);
1149
   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1150
}
1151
 
1152
static void
1153
soa_LOG(struct toy_compiler *tc,
1154
        const struct tgsi_full_instruction *tgsi_inst,
1155
        struct toy_dst *dst_,
1156
        struct toy_src *src_)
1157
{
1158
   struct toy_dst dst0[4];
1159
   struct toy_src src0[4];
1160
 
1161
   assert(!"SoA LOG untested");
1162
 
1163
   tdst_transpose(dst_[0], dst0);
1164
   tsrc_transpose(src_[0], src0);
1165
 
1166
   if (dst_[0].writemask & TOY_WRITEMASK_XY) {
1167
      struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
1168
 
1169
      /* exponent */
1170
      tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
1171
      tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
1172
 
1173
      /* mantissa  */
1174
      tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
1175
      tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
1176
   }
1177
 
1178
   tc_LOG(tc, dst0[2], src0[0]);
1179
   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1180
}
1181
 
1182
static void
1183
soa_DST(struct toy_compiler *tc,
1184
        const struct tgsi_full_instruction *tgsi_inst,
1185
        struct toy_dst *dst_,
1186
        struct toy_src *src_)
1187
{
1188
   struct toy_dst dst0[4];
1189
   struct toy_src src[2][4];
1190
 
1191
   tdst_transpose(dst_[0], dst0);
1192
   tsrc_transpose(src_[0], src[0]);
1193
   tsrc_transpose(src_[1], src[1]);
1194
 
1195
   tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
1196
   tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
1197
   tc_MOV(tc, dst0[2], src[0][2]);
1198
   tc_MOV(tc, dst0[3], src[1][3]);
1199
}
1200
 
1201
static void
1202
soa_XPD(struct toy_compiler *tc,
1203
        const struct tgsi_full_instruction *tgsi_inst,
1204
        struct toy_dst *dst_,
1205
        struct toy_src *src_)
1206
{
1207
   struct toy_dst dst0[4];
1208
   struct toy_src src[2][4];
1209
 
1210
   tdst_transpose(dst_[0], dst0);
1211
   tsrc_transpose(src_[0], src[0]);
1212
   tsrc_transpose(src_[1], src[1]);
1213
 
1214
   /* dst.x = src0.y * src1.z - src1.y * src0.z */
1215
   tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
1216
   tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
1217
 
1218
   /* dst.y = src0.z * src1.x - src1.z * src0.x */
1219
   tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
1220
   tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
1221
 
1222
   /* dst.z = src0.x * src1.y - src1.x * src0.y */
1223
   tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
1224
   tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
1225
 
1226
   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1227
}
1228
 
1229
static void
1230
soa_PK2H(struct toy_compiler *tc,
1231
         const struct tgsi_full_instruction *tgsi_inst,
1232
         struct toy_dst *dst_,
1233
         struct toy_src *src_)
1234
{
1235
   struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
1236
   struct toy_dst dst0[4];
1237
   struct toy_src src0[4];
1238
   int i;
1239
 
1240
   assert(!"SoA PK2H untested");
1241
 
1242
   tdst_transpose(dst_[0], dst0);
1243
   tsrc_transpose(src_[0], src0);
1244
 
1245
   tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
1246
   tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
1247
 
1248
   for (i = 0; i < 4; i++)
1249
      tc_MOV(tc, dst0[i], tsrc_from(tmp));
1250
}
1251
 
1252
static void
1253
soa_UP2H(struct toy_compiler *tc,
1254
         const struct tgsi_full_instruction *tgsi_inst,
1255
         struct toy_dst *dst_,
1256
         struct toy_src *src_)
1257
{
1258
   struct toy_dst dst0[4];
1259
   struct toy_src src0[4];
1260
 
1261
   assert(!"SoA UP2H untested");
1262
 
1263
   tdst_transpose(dst_[0], dst0);
1264
   tsrc_transpose(src_[0], src0);
1265
 
1266
   tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
1267
   tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
1268
   tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
1269
   tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
1270
 
1271
}
1272
 
1273
static void
1274
soa_SCS(struct toy_compiler *tc,
1275
        const struct tgsi_full_instruction *tgsi_inst,
1276
        struct toy_dst *dst_,
1277
        struct toy_src *src_)
1278
{
1279
   struct toy_dst dst0[4];
1280
   struct toy_src src0[4];
1281
 
1282
   tdst_transpose(dst_[0], dst0);
1283
   tsrc_transpose(src_[0], src0);
1284
 
1285
   tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
1286
   tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
1287
   tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
1288
   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
1289
}
1290
 
1291
static void
1292
soa_unsupported(struct toy_compiler *tc,
1293
                const struct tgsi_full_instruction *tgsi_inst,
1294
                struct toy_dst *dst_,
1295
                struct toy_src *src_)
1296
{
1297
   const struct tgsi_opcode_info *info =
1298
      tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
1299
 
1300
   ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
1301
         info->mnemonic);
1302
 
1303
   tc_fail(tc, "unsupported TGSI instruction in SoA form");
1304
}
1305
 
1306
static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
1307
   [TGSI_OPCODE_ARL]          = soa_per_channel,
1308
   [TGSI_OPCODE_MOV]          = soa_per_channel,
1309
   [TGSI_OPCODE_LIT]          = soa_LIT,
1310
   [TGSI_OPCODE_RCP]          = soa_scalar_replicate,
1311
   [TGSI_OPCODE_RSQ]          = soa_scalar_replicate,
1312
   [TGSI_OPCODE_EXP]          = soa_EXP,
1313
   [TGSI_OPCODE_LOG]          = soa_LOG,
1314
   [TGSI_OPCODE_MUL]          = soa_per_channel,
1315
   [TGSI_OPCODE_ADD]          = soa_per_channel,
1316
   [TGSI_OPCODE_DP3]          = soa_dot_product,
1317
   [TGSI_OPCODE_DP4]          = soa_dot_product,
1318
   [TGSI_OPCODE_DST]          = soa_DST,
1319
   [TGSI_OPCODE_MIN]          = soa_per_channel,
1320
   [TGSI_OPCODE_MAX]          = soa_per_channel,
1321
   [TGSI_OPCODE_SLT]          = soa_per_channel,
1322
   [TGSI_OPCODE_SGE]          = soa_per_channel,
1323
   [TGSI_OPCODE_MAD]          = soa_per_channel,
1324
   [TGSI_OPCODE_SUB]          = soa_per_channel,
1325
   [TGSI_OPCODE_LRP]          = soa_per_channel,
1326
   [TGSI_OPCODE_SQRT]         = soa_scalar_replicate,
1327
   [TGSI_OPCODE_DP2A]         = soa_dot_product,
1328
   [TGSI_OPCODE_FRC]          = soa_per_channel,
1329
   [TGSI_OPCODE_CLAMP]        = soa_per_channel,
1330
   [TGSI_OPCODE_FLR]          = soa_per_channel,
1331
   [TGSI_OPCODE_ROUND]        = soa_per_channel,
1332
   [TGSI_OPCODE_EX2]          = soa_scalar_replicate,
1333
   [TGSI_OPCODE_LG2]          = soa_scalar_replicate,
1334
   [TGSI_OPCODE_POW]          = soa_scalar_replicate,
1335
   [TGSI_OPCODE_XPD]          = soa_XPD,
1336
   [TGSI_OPCODE_ABS]          = soa_per_channel,
1337
   [TGSI_OPCODE_DPH]          = soa_dot_product,
1338
   [TGSI_OPCODE_COS]          = soa_scalar_replicate,
1339
   [TGSI_OPCODE_DDX]          = soa_partial_derivative,
1340
   [TGSI_OPCODE_DDY]          = soa_partial_derivative,
1341
   [TGSI_OPCODE_KILL]         = soa_passthrough,
1342
   [TGSI_OPCODE_PK2H]         = soa_PK2H,
1343
   [TGSI_OPCODE_PK2US]        = soa_unsupported,
1344
   [TGSI_OPCODE_PK4B]         = soa_unsupported,
1345
   [TGSI_OPCODE_PK4UB]        = soa_unsupported,
1346
   [TGSI_OPCODE_SEQ]          = soa_per_channel,
1347
   [TGSI_OPCODE_SGT]          = soa_per_channel,
1348
   [TGSI_OPCODE_SIN]          = soa_scalar_replicate,
1349
   [TGSI_OPCODE_SLE]          = soa_per_channel,
1350
   [TGSI_OPCODE_SNE]          = soa_per_channel,
1351
   [TGSI_OPCODE_TEX]          = soa_passthrough,
1352
   [TGSI_OPCODE_TXD]          = soa_passthrough,
1353
   [TGSI_OPCODE_TXP]          = soa_passthrough,
1354
   [TGSI_OPCODE_UP2H]         = soa_UP2H,
1355
   [TGSI_OPCODE_UP2US]        = soa_unsupported,
1356
   [TGSI_OPCODE_UP4B]         = soa_unsupported,
1357
   [TGSI_OPCODE_UP4UB]        = soa_unsupported,
1358
   [TGSI_OPCODE_ARR]          = soa_per_channel,
1359
   [TGSI_OPCODE_CAL]          = soa_unsupported,
1360
   [TGSI_OPCODE_RET]          = soa_unsupported,
1361
   [TGSI_OPCODE_SSG]          = soa_per_channel,
1362
   [TGSI_OPCODE_CMP]          = soa_per_channel,
1363
   [TGSI_OPCODE_SCS]          = soa_SCS,
1364
   [TGSI_OPCODE_TXB]          = soa_passthrough,
1365
   [TGSI_OPCODE_DIV]          = soa_per_channel,
1366
   [TGSI_OPCODE_DP2]          = soa_dot_product,
1367
   [TGSI_OPCODE_TXL]          = soa_passthrough,
1368
   [TGSI_OPCODE_BRK]          = soa_passthrough,
1369
   [TGSI_OPCODE_IF]           = soa_if,
1370
   [TGSI_OPCODE_UIF]          = soa_if,
1371
   [TGSI_OPCODE_ELSE]         = soa_passthrough,
1372
   [TGSI_OPCODE_ENDIF]        = soa_passthrough,
1373
   [TGSI_OPCODE_PUSHA]        = soa_unsupported,
1374
   [TGSI_OPCODE_POPA]         = soa_unsupported,
1375
   [TGSI_OPCODE_CEIL]         = soa_per_channel,
1376
   [TGSI_OPCODE_I2F]          = soa_per_channel,
1377
   [TGSI_OPCODE_NOT]          = soa_per_channel,
1378
   [TGSI_OPCODE_TRUNC]        = soa_per_channel,
1379
   [TGSI_OPCODE_SHL]          = soa_per_channel,
1380
   [TGSI_OPCODE_AND]          = soa_per_channel,
1381
   [TGSI_OPCODE_OR]           = soa_per_channel,
1382
   [TGSI_OPCODE_MOD]          = soa_per_channel,
1383
   [TGSI_OPCODE_XOR]          = soa_per_channel,
1384
   [TGSI_OPCODE_SAD]          = soa_per_channel,
1385
   [TGSI_OPCODE_TXF]          = soa_passthrough,
1386
   [TGSI_OPCODE_TXQ]          = soa_passthrough,
1387
   [TGSI_OPCODE_CONT]         = soa_passthrough,
1388
   [TGSI_OPCODE_EMIT]         = soa_unsupported,
1389
   [TGSI_OPCODE_ENDPRIM]      = soa_unsupported,
1390
   [TGSI_OPCODE_BGNLOOP]      = soa_passthrough,
1391
   [TGSI_OPCODE_BGNSUB]       = soa_unsupported,
1392
   [TGSI_OPCODE_ENDLOOP]      = soa_passthrough,
1393
   [TGSI_OPCODE_ENDSUB]       = soa_unsupported,
1394
   [TGSI_OPCODE_TXQ_LZ]       = soa_passthrough,
1395
   [TGSI_OPCODE_NOP]          = soa_passthrough,
1396
   [TGSI_OPCODE_FSEQ]         = soa_per_channel,
1397
   [TGSI_OPCODE_FSGE]         = soa_per_channel,
1398
   [TGSI_OPCODE_FSLT]         = soa_per_channel,
1399
   [TGSI_OPCODE_FSNE]         = soa_per_channel,
1400
   [TGSI_OPCODE_CALLNZ]       = soa_unsupported,
1401
   [TGSI_OPCODE_BREAKC]       = soa_unsupported,
1402
   [TGSI_OPCODE_KILL_IF]          = soa_passthrough,
1403
   [TGSI_OPCODE_END]          = soa_passthrough,
1404
   [TGSI_OPCODE_F2I]          = soa_per_channel,
1405
   [TGSI_OPCODE_IDIV]         = soa_per_channel,
1406
   [TGSI_OPCODE_IMAX]         = soa_per_channel,
1407
   [TGSI_OPCODE_IMIN]         = soa_per_channel,
1408
   [TGSI_OPCODE_INEG]         = soa_per_channel,
1409
   [TGSI_OPCODE_ISGE]         = soa_per_channel,
1410
   [TGSI_OPCODE_ISHR]         = soa_per_channel,
1411
   [TGSI_OPCODE_ISLT]         = soa_per_channel,
1412
   [TGSI_OPCODE_F2U]          = soa_per_channel,
1413
   [TGSI_OPCODE_U2F]          = soa_per_channel,
1414
   [TGSI_OPCODE_UADD]         = soa_per_channel,
1415
   [TGSI_OPCODE_UDIV]         = soa_per_channel,
1416
   [TGSI_OPCODE_UMAD]         = soa_per_channel,
1417
   [TGSI_OPCODE_UMAX]         = soa_per_channel,
1418
   [TGSI_OPCODE_UMIN]         = soa_per_channel,
1419
   [TGSI_OPCODE_UMOD]         = soa_per_channel,
1420
   [TGSI_OPCODE_UMUL]         = soa_per_channel,
1421
   [TGSI_OPCODE_USEQ]         = soa_per_channel,
1422
   [TGSI_OPCODE_USGE]         = soa_per_channel,
1423
   [TGSI_OPCODE_USHR]         = soa_per_channel,
1424
   [TGSI_OPCODE_USLT]         = soa_per_channel,
1425
   [TGSI_OPCODE_USNE]         = soa_per_channel,
1426
   [TGSI_OPCODE_SWITCH]       = soa_unsupported,
1427
   [TGSI_OPCODE_CASE]         = soa_unsupported,
1428
   [TGSI_OPCODE_DEFAULT]      = soa_unsupported,
1429
   [TGSI_OPCODE_ENDSWITCH]    = soa_unsupported,
1430
   [TGSI_OPCODE_SAMPLE]       = soa_passthrough,
1431
   [TGSI_OPCODE_SAMPLE_I]     = soa_passthrough,
1432
   [TGSI_OPCODE_SAMPLE_I_MS]  = soa_passthrough,
1433
   [TGSI_OPCODE_SAMPLE_B]     = soa_passthrough,
1434
   [TGSI_OPCODE_SAMPLE_C]     = soa_passthrough,
1435
   [TGSI_OPCODE_SAMPLE_C_LZ]  = soa_passthrough,
1436
   [TGSI_OPCODE_SAMPLE_D]     = soa_passthrough,
1437
   [TGSI_OPCODE_SAMPLE_L]     = soa_passthrough,
1438
   [TGSI_OPCODE_GATHER4]      = soa_passthrough,
1439
   [TGSI_OPCODE_SVIEWINFO]    = soa_passthrough,
1440
   [TGSI_OPCODE_SAMPLE_POS]   = soa_passthrough,
1441
   [TGSI_OPCODE_SAMPLE_INFO]  = soa_passthrough,
1442
   [TGSI_OPCODE_UARL]         = soa_per_channel,
1443
   [TGSI_OPCODE_UCMP]         = soa_per_channel,
1444
   [TGSI_OPCODE_IABS]         = soa_per_channel,
1445
   [TGSI_OPCODE_ISSG]         = soa_per_channel,
1446
   [TGSI_OPCODE_LOAD]         = soa_unsupported,
1447
   [TGSI_OPCODE_STORE]        = soa_unsupported,
1448
   [TGSI_OPCODE_MFENCE]       = soa_unsupported,
1449
   [TGSI_OPCODE_LFENCE]       = soa_unsupported,
1450
   [TGSI_OPCODE_SFENCE]       = soa_unsupported,
1451
   [TGSI_OPCODE_BARRIER]      = soa_unsupported,
1452
   [TGSI_OPCODE_ATOMUADD]     = soa_unsupported,
1453
   [TGSI_OPCODE_ATOMXCHG]     = soa_unsupported,
1454
   [TGSI_OPCODE_ATOMCAS]      = soa_unsupported,
1455
   [TGSI_OPCODE_ATOMAND]      = soa_unsupported,
1456
   [TGSI_OPCODE_ATOMOR]       = soa_unsupported,
1457
   [TGSI_OPCODE_ATOMXOR]      = soa_unsupported,
1458
   [TGSI_OPCODE_ATOMUMIN]     = soa_unsupported,
1459
   [TGSI_OPCODE_ATOMUMAX]     = soa_unsupported,
1460
   [TGSI_OPCODE_ATOMIMIN]     = soa_unsupported,
1461
   [TGSI_OPCODE_ATOMIMAX]     = soa_unsupported,
1462
   [TGSI_OPCODE_TEX2]         = soa_passthrough,
1463
   [TGSI_OPCODE_TXB2]         = soa_passthrough,
1464
   [TGSI_OPCODE_TXL2]         = soa_passthrough,
1465
};
1466
 
1467
static bool
1468
ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
1469
{
1470
   return (d->Register.Indirect ||
1471
         (d->Register.Dimension && d->Dimension.Indirect));
1472
}
1473
 
1474
static int
1475
ra_dst_index(const struct tgsi_full_dst_register *d)
1476
{
1477
   assert(!d->Register.Indirect);
1478
   return d->Register.Index;
1479
}
1480
 
1481
static int
1482
ra_dst_dimension(const struct tgsi_full_dst_register *d)
1483
{
1484
   if (d->Register.Dimension) {
1485
      assert(!d->Dimension.Indirect);
1486
      return d->Dimension.Index;
1487
   }
1488
   else {
1489
      return 0;
1490
   }
1491
}
1492
 
1493
static bool
1494
ra_is_src_indirect(const struct tgsi_full_src_register *s)
1495
{
1496
   return (s->Register.Indirect ||
1497
         (s->Register.Dimension && s->Dimension.Indirect));
1498
}
1499
 
1500
static int
1501
ra_src_index(const struct tgsi_full_src_register *s)
1502
{
1503
   assert(!s->Register.Indirect);
1504
   return s->Register.Index;
1505
}
1506
 
1507
static int
1508
ra_src_dimension(const struct tgsi_full_src_register *s)
1509
{
1510
   if (s->Register.Dimension) {
1511
      assert(!s->Dimension.Indirect);
1512
      return s->Dimension.Index;
1513
   }
1514
   else {
1515
      return 0;
1516
   }
1517
}
1518
 
1519
/**
1520
 * Infer the type of either the sources or the destination.
1521
 */
1522
static enum toy_type
1523
ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
1524
{
1525
   enum tgsi_opcode_type type;
1526
 
1527
   if (is_dst)
1528
      type = tgsi_opcode_infer_dst_type(tgsi_opcode);
1529
   else
1530
      type = tgsi_opcode_infer_src_type(tgsi_opcode);
1531
 
1532
   switch (type) {
1533
   case TGSI_TYPE_UNSIGNED:
1534
      return TOY_TYPE_UD;
1535
   case TGSI_TYPE_SIGNED:
1536
      return TOY_TYPE_D;
1537
   case TGSI_TYPE_FLOAT:
1538
      return TOY_TYPE_F;
1539
   case TGSI_TYPE_UNTYPED:
1540
   case TGSI_TYPE_VOID:
1541
   case TGSI_TYPE_DOUBLE:
1542
   default:
1543
      assert(!"unsupported TGSI type");
1544
      return TOY_TYPE_UD;
1545
   }
1546
}
1547
 
1548
/**
1549
 * Return the type of an operand of the specified instruction.
1550
 */
1551
static enum toy_type
1552
ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
1553
            int operand, bool is_dst)
1554
{
1555
   enum toy_type type;
1556
   enum tgsi_file_type file;
1557
 
1558
   /* we need to look at both src and dst for MOV */
1559
   /* XXX it should not be this complex */
1560
   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
1561
      const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
1562
      const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
1563
 
1564
      if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
1565
         type = TOY_TYPE_D;
1566
      }
1567
      else if (src_file == TGSI_FILE_IMMEDIATE &&
1568
               !tgsi_inst->Src[0].Register.Indirect) {
1569
         const int src_idx = tgsi_inst->Src[0].Register.Index;
1570
         type = tgsi->imm_data.types[src_idx];
1571
      }
1572
      else {
1573
         /* this is the best we can do */
1574
         type = TOY_TYPE_F;
1575
      }
1576
 
1577
      return type;
1578
   }
1579
   else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) {
1580
      if (!is_dst && operand == 0)
1581
         type = TOY_TYPE_UD;
1582
      else
1583
         type = TOY_TYPE_F;
1584
 
1585
      return type;
1586
   }
1587
 
1588
   type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
1589
 
1590
   /* fix the type */
1591
   file = (is_dst) ?
1592
      tgsi_inst->Dst[operand].Register.File :
1593
      tgsi_inst->Src[operand].Register.File;
1594
   switch (file) {
1595
   case TGSI_FILE_SAMPLER:
1596
   case TGSI_FILE_RESOURCE:
1597
   case TGSI_FILE_SAMPLER_VIEW:
1598
      type = TOY_TYPE_D;
1599
      break;
1600
   case TGSI_FILE_ADDRESS:
1601
      assert(type == TOY_TYPE_D);
1602
      break;
1603
   default:
1604
      break;
1605
   }
1606
 
1607
   return type;
1608
}
1609
 
1610
/**
1611
 * Allocate a VRF register.
1612
 */
1613
static int
1614
ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
1615
{
1616
   const int count = (tgsi->aos) ? 1 : 4;
1617
   return tc_alloc_vrf(tgsi->tc, count);
1618
}
1619
 
1620
/**
1621
 * Construct the key for VRF mapping look-up.
1622
 */
1623
static void *
1624
ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
1625
{
1626
   intptr_t key;
1627
 
1628
   /* this is ugly... */
1629
   assert(file  < 1 << 4);
1630
   assert(dim   < 1 << 12);
1631
   assert(index < 1 << 16);
1632
   key = (file << 28) | (dim << 16) | index;
1633
 
1634
   return intptr_to_pointer(key);
1635
}
1636
 
1637
/**
1638
 * Map a TGSI register to a VRF register.
1639
 */
1640
static int
1641
ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
1642
           int dim, int index, bool *is_new)
1643
{
1644
   void *key, *val;
1645
   intptr_t vrf;
1646
 
1647
   key = ra_get_map_key(file, dim, index);
1648
 
1649
   /*
1650
    * because we allocate vrf from 1 and on, val is never NULL as long as the
1651
    * key exists
1652
    */
1653
   val = util_hash_table_get(tgsi->reg_mapping, key);
1654
   if (val) {
1655
      vrf = pointer_to_intptr(val);
1656
 
1657
      if (is_new)
1658
         *is_new = false;
1659
   }
1660
   else {
1661
      vrf = (intptr_t) ra_alloc_reg(tgsi, file);
1662
 
1663
      /* add to the mapping */
1664
      val = intptr_to_pointer(vrf);
1665
      util_hash_table_set(tgsi->reg_mapping, key, val);
1666
 
1667
      if (is_new)
1668
         *is_new = true;
1669
   }
1670
 
1671
   return (int) vrf;
1672
}
1673
 
1674
/**
1675
 * Return true if the destination aliases any of the sources.
1676
 */
1677
static bool
1678
ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
1679
{
1680
   const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1681
   int i;
1682
 
1683
   /* we need a scratch register for indirect dst anyway */
1684
   if (ra_dst_is_indirect(d))
1685
      return true;
1686
 
1687
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1688
      const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
1689
 
1690
      if (s->Register.File != d->Register.File)
1691
         continue;
1692
 
1693
      /*
1694
       * we can go on to check dimension and index respectively, but
1695
       * keep it simple for now
1696
       */
1697
      if (ra_is_src_indirect(s))
1698
         return true;
1699
      if (ra_src_dimension(s) == ra_dst_dimension(d) &&
1700
          ra_src_index(s) == ra_dst_index(d))
1701
         return true;
1702
   }
1703
 
1704
   return false;
1705
}
1706
 
1707
/**
1708
 * Return the toy register for a TGSI destination operand.
1709
 */
1710
static struct toy_dst
1711
ra_get_dst(struct toy_tgsi *tgsi,
1712
           const struct tgsi_full_instruction *tgsi_inst, int dst_index,
1713
           bool *is_scratch)
1714
{
1715
   const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
1716
   bool need_vrf = false;
1717
   struct toy_dst dst;
1718
 
1719
   switch (d->Register.File) {
1720
   case TGSI_FILE_NULL:
1721
      dst = tdst_null();
1722
      break;
1723
   case TGSI_FILE_OUTPUT:
1724
   case TGSI_FILE_TEMPORARY:
1725
   case TGSI_FILE_ADDRESS:
1726
   case TGSI_FILE_PREDICATE:
1727
      need_vrf = true;
1728
      break;
1729
   default:
1730
      assert(!"unhandled dst file");
1731
      dst = tdst_null();
1732
      break;
1733
   }
1734
 
1735
   if (need_vrf) {
1736
      /* XXX we do not always need a scratch given the conditions... */
1737
      const bool need_scratch =
1738
         (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
1739
          tgsi_inst->Instruction.Saturate);
1740
      const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
1741
      int vrf;
1742
 
1743
      if (need_scratch) {
1744
         vrf = ra_alloc_reg(tgsi, d->Register.File);
1745
      }
1746
      else {
1747
         vrf = ra_map_reg(tgsi, d->Register.File,
1748
               ra_dst_dimension(d), ra_dst_index(d), NULL);
1749
      }
1750
 
1751
      if (is_scratch)
1752
         *is_scratch = need_scratch;
1753
 
1754
      dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1755
            false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
1756
   }
1757
 
1758
   return dst;
1759
}
1760
 
1761
static struct toy_src
1762
ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
1763
                   enum toy_type type, int vrf)
1764
{
1765
   return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
1766
                    false, 0,
1767
                    s->Register.SwizzleX, s->Register.SwizzleY,
1768
                    s->Register.SwizzleZ, s->Register.SwizzleW,
1769
                    s->Register.Absolute, s->Register.Negate,
1770
                    vrf * TOY_REG_WIDTH);
1771
}
1772
 
1773
static int
1774
init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
1775
              enum tgsi_file_type file, int index,
1776
              const struct tgsi_ind_register *indirect,
1777
              const struct tgsi_dimension *dimension,
1778
              const struct tgsi_ind_register *dim_indirect)
1779
{
1780
   struct toy_src src;
1781
   int num_src = 0;
1782
 
1783
   /* src[0]: TGSI file */
1784
   inst->src[num_src++] = tsrc_imm_d(file);
1785
 
1786
   /* src[1]: TGSI dimension */
1787
   inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
1788
 
1789
   /* src[2]: TGSI dimension indirection */
1790
   if (dim_indirect) {
1791
      const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
1792
            dim_indirect->Index, NULL);
1793
 
1794
      src = tsrc(TOY_FILE_VRF, vrf, 0);
1795
      src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1796
   }
1797
   else {
1798
      src = tsrc_imm_d(0);
1799
   }
1800
 
1801
   inst->src[num_src++] = src;
1802
 
1803
   /* src[3]: TGSI index */
1804
   inst->src[num_src++] = tsrc_imm_d(index);
1805
 
1806
   /* src[4]: TGSI index indirection */
1807
   if (indirect) {
1808
      const int vrf = ra_map_reg(tgsi, indirect->File, 0,
1809
            indirect->Index, NULL);
1810
 
1811
      src = tsrc(TOY_FILE_VRF, vrf, 0);
1812
      src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
1813
   }
1814
   else {
1815
      src = tsrc_imm_d(0);
1816
   }
1817
 
1818
   inst->src[num_src++] = src;
1819
 
1820
   return num_src;
1821
}
1822
 
1823
static struct toy_src
1824
ra_get_src_indirect(struct toy_tgsi *tgsi,
1825
                    const struct tgsi_full_instruction *tgsi_inst,
1826
                    int src_index)
1827
{
1828
   const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1829
   bool need_vrf = false, is_resource = false;
1830
   struct toy_src src;
1831
 
1832
   switch (s->Register.File) {
1833
   case TGSI_FILE_NULL:
1834
      src = tsrc_null();
1835
      break;
1836
   case TGSI_FILE_SAMPLER:
1837
   case TGSI_FILE_RESOURCE:
1838
   case TGSI_FILE_SAMPLER_VIEW:
1839
      is_resource = true;
1840
      /* fall through */
1841
   case TGSI_FILE_CONSTANT:
1842
   case TGSI_FILE_INPUT:
1843
   case TGSI_FILE_SYSTEM_VALUE:
1844
   case TGSI_FILE_TEMPORARY:
1845
   case TGSI_FILE_ADDRESS:
1846
   case TGSI_FILE_IMMEDIATE:
1847
   case TGSI_FILE_PREDICATE:
1848
      need_vrf = true;
1849
      break;
1850
   default:
1851
      assert(!"unhandled src file");
1852
      src = tsrc_null();
1853
      break;
1854
   }
1855
 
1856
   if (need_vrf) {
1857
      const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
1858
      int vrf;
1859
 
1860
      if (is_resource) {
1861
         assert(!s->Register.Dimension);
1862
         assert(s->Register.Indirect);
1863
 
1864
         vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
1865
      }
1866
      else {
1867
         vrf = ra_alloc_reg(tgsi, s->Register.File);
1868
      }
1869
 
1870
      src = ra_get_src_for_vrf(s, type, vrf);
1871
 
1872
      /* emit indirect fetch */
1873
      if (!is_resource) {
1874
         struct toy_inst *inst;
1875
 
1876
         inst = tc_add(tgsi->tc);
1877
         inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
1878
         inst->dst = tdst_from(src);
1879
         inst->dst.writemask = TOY_WRITEMASK_XYZW;
1880
 
1881
         init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
1882
               (s->Register.Indirect) ? &s->Indirect : NULL,
1883
               (s->Register.Dimension) ? &s->Dimension : NULL,
1884
               (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
1885
      }
1886
   }
1887
 
1888
   return src;
1889
}
1890
 
1891
/**
1892
 * Return the toy register for a TGSI source operand.
1893
 */
1894
static struct toy_src
1895
ra_get_src(struct toy_tgsi *tgsi,
1896
           const struct tgsi_full_instruction *tgsi_inst,
1897
           int src_index)
1898
{
1899
   const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
1900
   bool need_vrf = false;
1901
   struct toy_src src;
1902
 
1903
   if (ra_is_src_indirect(s))
1904
      return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
1905
 
1906
   switch (s->Register.File) {
1907
   case TGSI_FILE_NULL:
1908
      src = tsrc_null();
1909
      break;
1910
   case TGSI_FILE_CONSTANT:
1911
   case TGSI_FILE_INPUT:
1912
   case TGSI_FILE_SYSTEM_VALUE:
1913
      need_vrf = true;
1914
      break;
1915
   case TGSI_FILE_TEMPORARY:
1916
   case TGSI_FILE_ADDRESS:
1917
   case TGSI_FILE_PREDICATE:
1918
      need_vrf = true;
1919
      break;
1920
   case TGSI_FILE_SAMPLER:
1921
   case TGSI_FILE_RESOURCE:
1922
   case TGSI_FILE_SAMPLER_VIEW:
1923
      assert(!s->Register.Dimension);
1924
      src = tsrc_imm_d(s->Register.Index);
1925
      break;
1926
   case TGSI_FILE_IMMEDIATE:
1927
      {
1928
         const uint32_t *imm;
1929
         enum toy_type imm_type;
1930
         bool is_scalar;
1931
 
1932
         imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
1933
 
1934
         is_scalar =
1935
            (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
1936
             imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
1937
             imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
1938
 
1939
         if (is_scalar) {
1940
            const enum toy_type type =
1941
               ra_get_type(tgsi, tgsi_inst, src_index, false);
1942
 
1943
            /* ignore imm_type */
1944
            src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
1945
            src.type = type;
1946
            src.absolute = s->Register.Absolute;
1947
            src.negate = s->Register.Negate;
1948
         }
1949
         else {
1950
            need_vrf = true;
1951
         }
1952
      }
1953
      break;
1954
   default:
1955
      assert(!"unhandled src file");
1956
      src = tsrc_null();
1957
      break;
1958
   }
1959
 
1960
   if (need_vrf) {
1961
      const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
1962
      bool is_new;
1963
      int vrf;
1964
 
1965
      vrf = ra_map_reg(tgsi, s->Register.File,
1966
            ra_src_dimension(s), ra_src_index(s), &is_new);
1967
 
1968
      src = ra_get_src_for_vrf(s, type, vrf);
1969
 
1970
      if (is_new) {
1971
         switch (s->Register.File) {
1972
         case TGSI_FILE_TEMPORARY:
1973
         case TGSI_FILE_ADDRESS:
1974
         case TGSI_FILE_PREDICATE:
1975
            {
1976
               struct toy_dst dst = tdst_from(src);
1977
               dst.writemask = TOY_WRITEMASK_XYZW;
1978
 
1979
               /* always initialize registers before use */
1980
               if (tgsi->aos) {
1981
                  tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
1982
               }
1983
               else {
1984
                  struct toy_dst tdst[4];
1985
                  int i;
1986
 
1987
                  tdst_transpose(dst, tdst);
1988
 
1989
                  for (i = 0; i < 4; i++) {
1990
                     tc_MOV(tgsi->tc, tdst[i],
1991
                           tsrc_type(tsrc_imm_d(0), type));
1992
                  }
1993
               }
1994
            }
1995
            break;
1996
         default:
1997
            break;
1998
         }
1999
      }
2000
 
2001
   }
2002
 
2003
   return src;
2004
}
2005
 
2006
static void
2007
parse_instruction(struct toy_tgsi *tgsi,
2008
                  const struct tgsi_full_instruction *tgsi_inst)
2009
{
2010
   struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
2011
   struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
2012
   bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
2013
   toy_tgsi_translate translate;
2014
   int i;
2015
 
2016
   /* convert TGSI registers to toy registers */
2017
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
2018
      src[i] = ra_get_src(tgsi, tgsi_inst, i);
2019
   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
2020
      dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
2021
 
2022
   /* translate the instruction */
2023
   translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
2024
   if (!translate) {
2025
      if (tgsi->translate_table == soa_translate_table)
2026
         soa_unsupported(tgsi->tc, tgsi_inst, dst, src);
2027
      else
2028
         aos_unsupported(tgsi->tc, tgsi_inst, dst, src);
2029
   }
2030
   translate(tgsi->tc, tgsi_inst, dst, src);
2031
 
2032
   /* write the result to the real destinations if needed */
2033
   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2034
      const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2035
 
2036
      if (!dst_is_scratch[i])
2037
         continue;
2038
 
2039
      if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
2040
         tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
2041
 
2042
      tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
2043
 
2044
      /* emit indirect store */
2045
      if (ra_dst_is_indirect(d)) {
2046
         struct toy_inst *inst;
2047
 
2048
         inst = tc_add(tgsi->tc);
2049
         inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
2050
         inst->dst = dst[i];
2051
 
2052
         init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
2053
               (d->Register.Indirect) ? &d->Indirect : NULL,
2054
               (d->Register.Dimension) ? &d->Dimension : NULL,
2055
               (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
2056
      }
2057
      else {
2058
         const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
2059
         struct toy_dst real_dst;
2060
         int vrf;
2061
 
2062
         vrf = ra_map_reg(tgsi, d->Register.File,
2063
               ra_dst_dimension(d), ra_dst_index(d), NULL);
2064
         real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
2065
               false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
2066
 
2067
         if (tgsi->aos) {
2068
            tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
2069
         }
2070
         else {
2071
            struct toy_dst tdst[4];
2072
            struct toy_src tsrc[4];
2073
            int j;
2074
 
2075
            tdst_transpose(real_dst, tdst);
2076
            tsrc_transpose(tsrc_from(dst[i]), tsrc);
2077
 
2078
            for (j = 0; j < 4; j++)
2079
               tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
2080
         }
2081
      }
2082
 
2083
      tgsi->tc->templ.saturate = false;
2084
   }
2085
 
2086
   switch (tgsi_inst->Instruction.Opcode) {
2087
   case TGSI_OPCODE_KILL_IF:
2088
   case TGSI_OPCODE_KILL:
2089
      tgsi->uses_kill = true;
2090
      break;
2091
   }
2092
 
2093
   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
2094
      const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
2095
      if (s->Register.File == TGSI_FILE_CONSTANT && s->Register.Indirect)
2096
         tgsi->const_indirect = true;
2097
   }
2098
 
2099
   /* remember channels written */
2100
   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
2101
      const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
2102
 
2103
      if (d->Register.File != TGSI_FILE_OUTPUT)
2104
         continue;
2105
      for (i = 0; i < tgsi->num_outputs; i++) {
2106
         if (tgsi->outputs[i].index == d->Register.Index) {
2107
            tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
2108
            break;
2109
         }
2110
      }
2111
   }
2112
}
2113
 
2114
static void
2115
decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2116
{
2117
   static const struct tgsi_declaration_interp default_interp = {
2118
      TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
2119
   };
2120
   const struct tgsi_declaration_interp *interp =
2121
      (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
2122
   int index;
2123
 
2124
   if (decl->Range.Last >= Elements(tgsi->inputs)) {
2125
      assert(!"invalid IN");
2126
      return;
2127
   }
2128
 
2129
   for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2130
      const int slot = tgsi->num_inputs++;
2131
 
2132
      tgsi->inputs[slot].index = index;
2133
      tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
2134
      if (decl->Declaration.Semantic) {
2135
         tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
2136
         tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
2137
      }
2138
      else {
2139
         tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2140
         tgsi->inputs[slot].semantic_index = index;
2141
      }
2142
      tgsi->inputs[slot].interp = interp->Interpolate;
2143
      tgsi->inputs[slot].centroid = interp->Location == TGSI_INTERPOLATE_LOC_CENTROID;
2144
   }
2145
}
2146
 
2147
static void
2148
decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2149
{
2150
   int index;
2151
 
2152
   if (decl->Range.Last >= Elements(tgsi->outputs)) {
2153
      assert(!"invalid OUT");
2154
      return;
2155
   }
2156
 
2157
   assert(decl->Declaration.Semantic);
2158
 
2159
   for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2160
      const int slot = tgsi->num_outputs++;
2161
 
2162
      tgsi->outputs[slot].index = index;
2163
      tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
2164
      tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
2165
      tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
2166
      tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
2167
   }
2168
}
2169
 
2170
static void
2171
decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
2172
{
2173
   int index;
2174
 
2175
   if (decl->Range.Last >= Elements(tgsi->system_values)) {
2176
      assert(!"invalid SV");
2177
      return;
2178
   }
2179
 
2180
   for (index = decl->Range.First; index <= decl->Range.Last; index++) {
2181
      const int slot = tgsi->num_system_values++;
2182
 
2183
      tgsi->system_values[slot].index = index;
2184
      if (decl->Declaration.Semantic) {
2185
         tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
2186
         tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
2187
      }
2188
      else {
2189
         tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
2190
         tgsi->system_values[slot].semantic_index = index;
2191
      }
2192
   }
2193
}
2194
 
2195
/**
2196
 * Emit an instruction to fetch the value of a TGSI register.
2197
 */
2198
static void
2199
fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
2200
{
2201
   struct toy_dst dst;
2202
   int vrf;
2203
   enum toy_opcode opcode;
2204
   enum toy_type type = TOY_TYPE_F;
2205
 
2206
   switch (file) {
2207
   case TGSI_FILE_INPUT:
2208
      opcode = TOY_OPCODE_TGSI_IN;
2209
      break;
2210
   case TGSI_FILE_CONSTANT:
2211
      opcode = TOY_OPCODE_TGSI_CONST;
2212
      break;
2213
   case TGSI_FILE_SYSTEM_VALUE:
2214
      opcode = TOY_OPCODE_TGSI_SV;
2215
      break;
2216
   case TGSI_FILE_IMMEDIATE:
2217
      opcode = TOY_OPCODE_TGSI_IMM;
2218
      toy_tgsi_get_imm(tgsi, idx, &type);
2219
      break;
2220
   default:
2221
      /* no need to fetch */
2222
      return;
2223
      break;
2224
   }
2225
 
2226
   vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
2227
   dst = tdst(TOY_FILE_VRF, vrf, 0);
2228
   dst = tdst_type(dst, type);
2229
 
2230
   tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
2231
}
2232
 
2233
static void
2234
parse_declaration(struct toy_tgsi *tgsi,
2235
                  const struct tgsi_full_declaration *decl)
2236
{
2237
   int i;
2238
 
2239
   switch (decl->Declaration.File) {
2240
   case TGSI_FILE_INPUT:
2241
      decl_add_in(tgsi, decl);
2242
      break;
2243
   case TGSI_FILE_OUTPUT:
2244
      decl_add_out(tgsi, decl);
2245
      break;
2246
   case TGSI_FILE_SYSTEM_VALUE:
2247
      decl_add_sv(tgsi, decl);
2248
      break;
2249
   case TGSI_FILE_IMMEDIATE:
2250
      /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
2251
      assert(!"unexpected immediate declaration");
2252
      break;
2253
   case TGSI_FILE_CONSTANT:
2254
      if (tgsi->const_count <= decl->Range.Last)
2255
         tgsi->const_count = decl->Range.Last + 1;
2256
      break;
2257
   case TGSI_FILE_NULL:
2258
   case TGSI_FILE_TEMPORARY:
2259
   case TGSI_FILE_SAMPLER:
2260
   case TGSI_FILE_PREDICATE:
2261
   case TGSI_FILE_ADDRESS:
2262
   case TGSI_FILE_RESOURCE:
2263
   case TGSI_FILE_SAMPLER_VIEW:
2264
      /* nothing to do */
2265
      break;
2266
   default:
2267
      assert(!"unhandled TGSI file");
2268
      break;
2269
   }
2270
 
2271
   /* fetch the registers now */
2272
   for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2273
      const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
2274
      fetch_source(tgsi, decl->Declaration.File, dim, i);
2275
   }
2276
}
2277
 
2278
static int
2279
add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
2280
{
2281
   /* reallocate the buffer if necessary */
2282
   if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
2283
      const int cur_size = tgsi->imm_data.size;
2284
      int new_size;
2285
      enum toy_type *new_types;
2286
      uint32_t (*new_buf)[4];
2287
 
2288
      new_size = (cur_size) ? cur_size << 1 : 16;
2289
      while (new_size <= tgsi->imm_data.cur)
2290
         new_size <<= 1;
2291
 
2292
      new_buf = REALLOC(tgsi->imm_data.buf,
2293
            cur_size * sizeof(new_buf[0]),
2294
            new_size * sizeof(new_buf[0]));
2295
      new_types = REALLOC(tgsi->imm_data.types,
2296
            cur_size * sizeof(new_types[0]),
2297
            new_size * sizeof(new_types[0]));
2298
      if (!new_buf || !new_types) {
2299
         FREE(new_buf);
2300
         FREE(new_types);
2301
         return -1;
2302
      }
2303
 
2304
      tgsi->imm_data.buf = new_buf;
2305
      tgsi->imm_data.types = new_types;
2306
      tgsi->imm_data.size = new_size;
2307
   }
2308
 
2309
   tgsi->imm_data.types[tgsi->imm_data.cur] = type;
2310
   memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
2311
         buf, sizeof(tgsi->imm_data.buf[0]));
2312
 
2313
   return tgsi->imm_data.cur++;
2314
}
2315
 
2316
static void
2317
parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
2318
{
2319
   enum toy_type type;
2320
   uint32_t imm_buf[4];
2321
   int idx;
2322
 
2323
   switch (imm->Immediate.DataType) {
2324
   case TGSI_IMM_FLOAT32:
2325
      type = TOY_TYPE_F;
2326
      imm_buf[0] = fui(imm->u[0].Float);
2327
      imm_buf[1] = fui(imm->u[1].Float);
2328
      imm_buf[2] = fui(imm->u[2].Float);
2329
      imm_buf[3] = fui(imm->u[3].Float);
2330
      break;
2331
   case TGSI_IMM_INT32:
2332
      type = TOY_TYPE_D;
2333
      imm_buf[0] = (uint32_t) imm->u[0].Int;
2334
      imm_buf[1] = (uint32_t) imm->u[1].Int;
2335
      imm_buf[2] = (uint32_t) imm->u[2].Int;
2336
      imm_buf[3] = (uint32_t) imm->u[3].Int;
2337
      break;
2338
   case TGSI_IMM_UINT32:
2339
      type = TOY_TYPE_UD;
2340
      imm_buf[0] = imm->u[0].Uint;
2341
      imm_buf[1] = imm->u[1].Uint;
2342
      imm_buf[2] = imm->u[2].Uint;
2343
      imm_buf[3] = imm->u[3].Uint;
2344
      break;
2345
   default:
2346
      assert(!"unhandled TGSI imm type");
2347
      type = TOY_TYPE_F;
2348
      memset(imm_buf, 0, sizeof(imm_buf));
2349
      break;
2350
   }
2351
 
2352
   idx = add_imm(tgsi, type, imm_buf);
2353
   if (idx >= 0)
2354
      fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
2355
   else
2356
      tc_fail(tgsi->tc, "failed to add TGSI imm");
2357
}
2358
 
2359
static void
2360
parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
2361
{
2362
   switch (prop->Property.PropertyName) {
2363
   case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
2364
      tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
2365
      break;
2366
   case TGSI_PROPERTY_FS_COORD_ORIGIN:
2367
      tgsi->props.fs_coord_origin = prop->u[0].Data;
2368
      break;
2369
   case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2370
      tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
2371
      break;
2372
   case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2373
      tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
2374
      break;
2375
   case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2376
      tgsi->props.fs_depth_layout = prop->u[0].Data;
2377
      break;
2378
   case TGSI_PROPERTY_GS_INPUT_PRIM:
2379
      tgsi->props.gs_input_prim = prop->u[0].Data;
2380
      break;
2381
   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2382
      tgsi->props.gs_output_prim = prop->u[0].Data;
2383
      break;
2384
   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2385
      tgsi->props.gs_max_output_vertices = prop->u[0].Data;
2386
      break;
2387
   default:
2388
      assert(!"unhandled TGSI property");
2389
      break;
2390
   }
2391
}
2392
 
2393
static void
2394
parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
2395
{
2396
   switch (token->Token.Type) {
2397
   case TGSI_TOKEN_TYPE_DECLARATION:
2398
      parse_declaration(tgsi, &token->FullDeclaration);
2399
      break;
2400
   case TGSI_TOKEN_TYPE_IMMEDIATE:
2401
      parse_immediate(tgsi, &token->FullImmediate);
2402
      break;
2403
   case TGSI_TOKEN_TYPE_INSTRUCTION:
2404
      parse_instruction(tgsi, &token->FullInstruction);
2405
      break;
2406
   case TGSI_TOKEN_TYPE_PROPERTY:
2407
      parse_property(tgsi, &token->FullProperty);
2408
      break;
2409
   default:
2410
      assert(!"unhandled TGSI token type");
2411
      break;
2412
   }
2413
}
2414
 
2415
static enum pipe_error
2416
dump_reg_mapping(void *key, void *val, void *data)
2417
{
2418
   int tgsi_file, tgsi_dim, tgsi_index;
2419
   uint32_t sig, vrf;
2420
 
2421
   sig = (uint32_t) pointer_to_intptr(key);
2422
   vrf = (uint32_t) pointer_to_intptr(val);
2423
 
2424
   /* see ra_get_map_key() */
2425
   tgsi_file =  (sig >> 28) & 0xf;
2426
   tgsi_dim =   (sig >> 16) & 0xfff;
2427
   tgsi_index = (sig >> 0)  & 0xffff;
2428
 
2429
   if (tgsi_dim) {
2430
      ilo_printf("  v%d:\t%s[%d][%d]\n", vrf,
2431
                 tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index);
2432
   }
2433
   else {
2434
      ilo_printf("  v%d:\t%s[%d]\n", vrf,
2435
                 tgsi_file_name(tgsi_file), tgsi_index);
2436
   }
2437
 
2438
   return PIPE_OK;
2439
}
2440
 
2441
/**
2442
 * Dump the TGSI translator, currently only the register mapping.
2443
 */
2444
void
2445
toy_tgsi_dump(const struct toy_tgsi *tgsi)
2446
{
2447
   util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
2448
}
2449
 
2450
/**
2451
 * Clean up the TGSI translator.
2452
 */
2453
void
2454
toy_tgsi_cleanup(struct toy_tgsi *tgsi)
2455
{
2456
   FREE(tgsi->imm_data.buf);
2457
   FREE(tgsi->imm_data.types);
2458
 
2459
   util_hash_table_destroy(tgsi->reg_mapping);
2460
}
2461
 
2462
static unsigned
2463
reg_mapping_hash(void *key)
2464
{
2465
   return (unsigned) pointer_to_intptr(key);
2466
}
2467
 
2468
static int
2469
reg_mapping_compare(void *key1, void *key2)
2470
{
2471
   return (key1 != key2);
2472
}
2473
 
2474
/**
2475
 * Initialize the TGSI translator.
2476
 */
2477
static bool
2478
init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
2479
{
2480
   memset(tgsi, 0, sizeof(*tgsi));
2481
 
2482
   tgsi->tc = tc;
2483
   tgsi->aos = aos;
2484
   tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
2485
 
2486
   /* create a mapping of TGSI registers to VRF reigsters */
2487
   tgsi->reg_mapping =
2488
      util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
2489
 
2490
   return (tgsi->reg_mapping != NULL);
2491
}
2492
 
2493
/**
2494
 * Translate TGSI tokens into toy instructions.
2495
 */
2496
void
2497
toy_compiler_translate_tgsi(struct toy_compiler *tc,
2498
                            const struct tgsi_token *tokens, bool aos,
2499
                            struct toy_tgsi *tgsi)
2500
{
2501
   struct tgsi_parse_context parse;
2502
 
2503
   if (!init_tgsi(tgsi, tc, aos)) {
2504
      tc_fail(tc, "failed to initialize TGSI translator");
2505
      return;
2506
   }
2507
 
2508
   tgsi_parse_init(&parse, tokens);
2509
   while (!tgsi_parse_end_of_tokens(&parse)) {
2510
      tgsi_parse_token(&parse);
2511
      parse_token(tgsi, &parse.FullToken);
2512
   }
2513
   tgsi_parse_free(&parse);
2514
}
2515
 
2516
/**
2517
 * Map the TGSI register to VRF register.
2518
 */
2519
int
2520
toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
2521
                 enum tgsi_file_type file, int dimension, int index)
2522
{
2523
   void *key, *val;
2524
 
2525
   key = ra_get_map_key(file, dimension, index);
2526
 
2527
   val = util_hash_table_get(tgsi->reg_mapping, key);
2528
 
2529
   return (val) ? pointer_to_intptr(val) : -1;
2530
}