Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/**************************************************************************
2
 *
3
 * Copyright 2010 VMware, Inc.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
/**
29
 * @file
30
 * TGSI to LLVM IR translation -- AoS.
31
 *
32
 * FIXME:
33
 * - No control flow support: the existing control flow code should be factored
34
 * out into from the SoA code into a common module and shared.
35
 * - No derivatives. Derivate logic should be pluggable, just like the samplers.
36
 *
37
 * @author Jose Fonseca 
38
 */
39
 
40
#include "pipe/p_config.h"
41
#include "pipe/p_shader_tokens.h"
42
#include "util/u_debug.h"
43
#include "util/u_math.h"
44
#include "util/u_memory.h"
45
#include "tgsi/tgsi_dump.h"
46
#include "tgsi/tgsi_info.h"
47
#include "tgsi/tgsi_parse.h"
48
#include "tgsi/tgsi_util.h"
49
#include "tgsi/tgsi_scan.h"
50
#include "lp_bld_type.h"
51
#include "lp_bld_const.h"
52
#include "lp_bld_arit.h"
53
#include "lp_bld_logic.h"
54
#include "lp_bld_swizzle.h"
55
#include "lp_bld_flow.h"
56
#include "lp_bld_quad.h"
57
#include "lp_bld_tgsi.h"
58
#include "lp_bld_debug.h"
59
#include "lp_bld_sample.h"
60
 
61
 
62
/**
63
 * Wrapper around lp_build_swizzle_aos which translates swizzles to another
64
 * ordering.
65
 */
66
static LLVMValueRef
67
swizzle_aos(struct lp_build_tgsi_context *bld_base,
68
            LLVMValueRef a,
69
            unsigned swizzle_x,
70
            unsigned swizzle_y,
71
            unsigned swizzle_z,
72
            unsigned swizzle_w)
73
{
74
   unsigned char swizzles[4];
75
   struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base);
76
 
77
   assert(swizzle_x < 4);
78
   assert(swizzle_y < 4);
79
   assert(swizzle_z < 4);
80
   assert(swizzle_w < 4);
81
 
82
   swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x];
83
   swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y];
84
   swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z];
85
   swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w];
86
 
87
   return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles);
88
}
89
 
90
 
91
static LLVMValueRef
92
swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld,
93
                   LLVMValueRef a,
94
                   unsigned chan)
95
{
96
   chan = bld->swizzles[chan];
97
   return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4);
98
}
99
 
100
 
101
static LLVMValueRef
102
emit_fetch_constant(
103
   struct lp_build_tgsi_context * bld_base,
104
   const struct tgsi_full_src_register * reg,
105
   enum tgsi_opcode_type stype,
106
   unsigned swizzle)
107
{
108
   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
109
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
110
   struct lp_type type = bld_base->base.type;
111
   LLVMValueRef res;
112
   unsigned chan;
113
 
114
   assert(!reg->Register.Indirect);
115
 
116
   /*
117
    * Get the constants components
118
    */
119
 
120
   res = bld->bld_base.base.undef;
121
   for (chan = 0; chan < 4; ++chan) {
122
      LLVMValueRef index;
123
      LLVMValueRef scalar_ptr;
124
      LLVMValueRef scalar;
125
      LLVMValueRef swizzle;
126
 
127
      index = lp_build_const_int32(bld->bld_base.base.gallivm,
128
                                   reg->Register.Index * 4 + chan);
129
 
130
      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");
131
 
132
      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
133
 
134
      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);
135
 
136
      /*
137
       * NOTE: constants array is always assumed to be RGBA
138
       */
139
 
140
      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
141
                                     bld->swizzles[chan]);
142
 
143
      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
144
   }
145
 
146
   /*
147
    * Broadcast the first quaternion to all others.
148
    *
149
    * XXX: could be factored into a reusable function.
150
    */
151
 
152
   if (type.length > 4) {
153
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
154
      unsigned i;
155
 
156
      for (chan = 0; chan < 4; ++chan) {
157
         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
158
      }
159
 
160
      for (i = 4; i < type.length; ++i) {
161
         shuffles[i] = shuffles[i % 4];
162
      }
163
 
164
      res = LLVMBuildShuffleVector(builder,
165
                                   res, bld->bld_base.base.undef,
166
                                   LLVMConstVector(shuffles, type.length),
167
                                   "");
168
   }
169
   return res;
170
}
171
 
172
static LLVMValueRef
173
emit_fetch_immediate(
174
   struct lp_build_tgsi_context * bld_base,
175
   const struct tgsi_full_src_register * reg,
176
   enum tgsi_opcode_type stype,
177
   unsigned swizzle)
178
{
179
   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
180
   LLVMValueRef res = bld->immediates[reg->Register.Index];
181
   assert(res);
182
   return res;
183
}
184
 
185
static LLVMValueRef
186
emit_fetch_input(
187
   struct lp_build_tgsi_context * bld_base,
188
   const struct tgsi_full_src_register * reg,
189
   enum tgsi_opcode_type stype,
190
   unsigned swizzle)
191
{
192
   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
193
   LLVMValueRef res = bld->inputs[reg->Register.Index];
194
   assert(!reg->Register.Indirect);
195
   assert(res);
196
   return res;
197
}
198
 
199
static LLVMValueRef
200
emit_fetch_temporary(
201
   struct lp_build_tgsi_context * bld_base,
202
   const struct tgsi_full_src_register * reg,
203
   enum tgsi_opcode_type stype,
204
   unsigned swizzle)
205
{
206
   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
207
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208
   LLVMValueRef temp_ptr = bld->temps[reg->Register.Index];
209
   LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, "");
210
   assert(!reg->Register.Indirect);
211
   if (!res)
212
      return bld->bld_base.base.undef;
213
 
214
   return res;
215
}
216
 
217
/**
218
 * Register store.
219
 */
220
void
221
lp_emit_store_aos(
222
   struct lp_build_tgsi_aos_context *bld,
223
   const struct tgsi_full_instruction *inst,
224
   unsigned index,
225
   LLVMValueRef value)
226
{
227
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
228
   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
229
   LLVMValueRef mask = NULL;
230
   LLVMValueRef ptr;
231
 
232
   /*
233
    * Saturate the value
234
    */
235
 
236
   switch (inst->Instruction.Saturate) {
237
   case TGSI_SAT_NONE:
238
      break;
239
 
240
   case TGSI_SAT_ZERO_ONE:
241
      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
242
      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
243
      break;
244
 
245
   case TGSI_SAT_MINUS_PLUS_ONE:
246
      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
247
      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
248
      break;
249
 
250
   default:
251
      assert(0);
252
   }
253
 
254
   /*
255
    * Translate the register file
256
    */
257
 
258
   assert(!reg->Register.Indirect);
259
 
260
   switch (reg->Register.File) {
261
   case TGSI_FILE_OUTPUT:
262
      ptr = bld->outputs[reg->Register.Index];
263
      break;
264
 
265
   case TGSI_FILE_TEMPORARY:
266
      ptr = bld->temps[reg->Register.Index];
267
      break;
268
 
269
   case TGSI_FILE_ADDRESS:
270
      ptr = bld->addr[reg->Indirect.Index];
271
      break;
272
 
273
   case TGSI_FILE_PREDICATE:
274
      ptr = bld->preds[reg->Register.Index];
275
      break;
276
 
277
   default:
278
      assert(0);
279
      return;
280
   }
281
 
282
   if (!ptr)
283
      return;
284
   /*
285
    * Predicate
286
    */
287
 
288
   if (inst->Instruction.Predicate) {
289
      LLVMValueRef pred;
290
 
291
      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);
292
 
293
      pred = LLVMBuildLoad(builder,
294
                           bld->preds[inst->Predicate.Index], "");
295
 
296
      /*
297
       * Convert the value to an integer mask.
298
       */
299
      pred = lp_build_compare(bld->bld_base.base.gallivm,
300
                               bld->bld_base.base.type,
301
                               PIPE_FUNC_NOTEQUAL,
302
                               pred,
303
                               bld->bld_base.base.zero);
304
 
305
      if (inst->Predicate.Negate) {
306
         pred = LLVMBuildNot(builder, pred, "");
307
      }
308
 
309
      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
310
                         inst->Predicate.SwizzleX,
311
                         inst->Predicate.SwizzleY,
312
                         inst->Predicate.SwizzleZ,
313
                         inst->Predicate.SwizzleW);
314
 
315
      if (mask) {
316
         mask = LLVMBuildAnd(builder, mask, pred, "");
317
      } else {
318
         mask = pred;
319
      }
320
   }
321
 
322
   /*
323
    * Writemask
324
    */
325
 
326
   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
327
      LLVMValueRef writemask;
328
 
329
      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
330
                                                   bld->bld_base.base.type,
331
                                                   reg->Register.WriteMask,
332
                                                   TGSI_NUM_CHANNELS,
333
                                                   bld->swizzles);
334
 
335
      if (mask) {
336
         mask = LLVMBuildAnd(builder, mask, writemask, "");
337
      } else {
338
         mask = writemask;
339
      }
340
   }
341
 
342
   if (mask) {
343
      LLVMValueRef orig_value;
344
 
345
      orig_value = LLVMBuildLoad(builder, ptr, "");
346
      value = lp_build_select(&bld->bld_base.base,
347
                              mask, value, orig_value);
348
   }
349
 
350
   LLVMBuildStore(builder, value, ptr);
351
}
352
 
353
 
354
/**
355
 * High-level instruction translators.
356
 */
357
 
358
static LLVMValueRef
359
emit_tex(struct lp_build_tgsi_aos_context *bld,
360
         const struct tgsi_full_instruction *inst,
361
         enum lp_build_tex_modifier modifier)
362
{
363
   unsigned target;
364
   unsigned unit;
365
   LLVMValueRef coords;
366
   struct lp_derivatives derivs = { {NULL}, {NULL} };
367
 
368
   if (!bld->sampler) {
369
      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
370
      return bld->bld_base.base.undef;
371
   }
372
 
373
   target = inst->Texture.Texture;
374
 
375
   coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL);
376
 
377
   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
378
      /* probably not going to work */
379
      derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL);
380
      derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL);
381
      unit = inst->Src[3].Register.Index;
382
   }
383
   else {
384
      unit = inst->Src[1].Register.Index;
385
   }
386
   return bld->sampler->emit_fetch_texel(bld->sampler,
387
                                         &bld->bld_base.base,
388
                                         target, unit,
389
                                         coords, derivs,
390
                                         modifier);
391
}
392
 
393
 
394
void
395
lp_emit_declaration_aos(
396
   struct lp_build_tgsi_aos_context *bld,
397
   const struct tgsi_full_declaration *decl)
398
{
399
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
400
   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);
401
 
402
   unsigned first = decl->Range.First;
403
   unsigned last = decl->Range.Last;
404
   unsigned idx;
405
 
406
   for (idx = first; idx <= last; ++idx) {
407
      switch (decl->Declaration.File) {
408
      case TGSI_FILE_TEMPORARY:
409
         assert(idx < LP_MAX_TGSI_TEMPS);
410
         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
411
            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
412
            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
413
                                                     vec_type, array_size, "");
414
         } else {
415
            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
416
         }
417
         break;
418
 
419
      case TGSI_FILE_OUTPUT:
420
         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
421
         break;
422
 
423
      case TGSI_FILE_ADDRESS:
424
         assert(idx < LP_MAX_TGSI_ADDRS);
425
         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
426
         break;
427
 
428
      case TGSI_FILE_PREDICATE:
429
         assert(idx < LP_MAX_TGSI_PREDS);
430
         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
431
         break;
432
 
433
      default:
434
         /* don't need to declare other vars */
435
         break;
436
      }
437
   }
438
}
439
 
440
 
441
/**
442
 * Emit LLVM for one TGSI instruction.
443
 * \param return TRUE for success, FALSE otherwise
444
 */
445
boolean
446
lp_emit_instruction_aos(
447
   struct lp_build_tgsi_aos_context *bld,
448
   const struct tgsi_full_instruction *inst,
449
   const struct tgsi_opcode_info *info,
450
   int *pc)
451
{
452
   LLVMValueRef src0, src1, src2;
453
   LLVMValueRef tmp0, tmp1;
454
   LLVMValueRef dst0 = NULL;
455
 
456
   /*
457
    * Stores and write masks are handled in a general fashion after the long
458
    * instruction opcode switch statement.
459
    *
460
    * Although not stricitly necessary, we avoid generating instructions for
461
    * channels which won't be stored, in cases where's that easy. For some
462
    * complex instructions, like texture sampling, it is more convenient to
463
    * assume a full writemask and then let LLVM optimization passes eliminate
464
    * redundant code.
465
    */
466
 
467
   (*pc)++;
468
 
469
   assert(info->num_dst <= 1);
470
   if (info->num_dst) {
471
      dst0 = bld->bld_base.base.undef;
472
   }
473
 
474
   switch (inst->Instruction.Opcode) {
475
   case TGSI_OPCODE_ARL:
476
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
477
      dst0 = lp_build_floor(&bld->bld_base.base, src0);
478
      break;
479
 
480
   case TGSI_OPCODE_MOV:
481
      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
482
      break;
483
 
484
   case TGSI_OPCODE_LIT:
485
      return FALSE;
486
 
487
   case TGSI_OPCODE_RCP:
488
   /* TGSI_OPCODE_RECIP */
489
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
490
      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
491
      break;
492
 
493
   case TGSI_OPCODE_RSQ:
494
   /* TGSI_OPCODE_RECIPSQRT */
495
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
496
      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
497
      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
498
      break;
499
 
500
   case TGSI_OPCODE_EXP:
501
      return FALSE;
502
 
503
   case TGSI_OPCODE_LOG:
504
      return FALSE;
505
 
506
   case TGSI_OPCODE_MUL:
507
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
508
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
509
      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
510
      break;
511
 
512
   case TGSI_OPCODE_ADD:
513
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
514
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
515
      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
516
      break;
517
 
518
   case TGSI_OPCODE_DP3:
519
   /* TGSI_OPCODE_DOT3 */
520
      return FALSE;
521
 
522
   case TGSI_OPCODE_DP4:
523
   /* TGSI_OPCODE_DOT4 */
524
      return FALSE;
525
 
526
   case TGSI_OPCODE_DST:
527
      return FALSE;
528
 
529
   case TGSI_OPCODE_MIN:
530
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
531
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
532
      dst0 = lp_build_min(&bld->bld_base.base, src0, src1);
533
      break;
534
 
535
   case TGSI_OPCODE_MAX:
536
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
537
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
538
      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
539
      break;
540
 
541
   case TGSI_OPCODE_SLT:
542
   /* TGSI_OPCODE_SETLT */
543
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
544
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
545
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
546
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
547
      break;
548
 
549
   case TGSI_OPCODE_SGE:
550
   /* TGSI_OPCODE_SETGE */
551
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
552
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
553
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
554
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
555
      break;
556
 
557
   case TGSI_OPCODE_MAD:
558
   /* TGSI_OPCODE_MADD */
559
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
560
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
561
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
562
      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
563
      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
564
      break;
565
 
566
   case TGSI_OPCODE_SUB:
567
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
568
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
569
      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
570
      break;
571
 
572
   case TGSI_OPCODE_LRP:
573
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
574
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
575
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
576
      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
577
      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
578
      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
579
      break;
580
 
581
   case TGSI_OPCODE_CND:
582
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
583
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
584
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
585
      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
586
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
587
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
588
      break;
589
 
590
   case TGSI_OPCODE_DP2A:
591
      return FALSE;
592
 
593
   case TGSI_OPCODE_FRC:
594
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
595
      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
596
      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
597
      break;
598
 
599
   case TGSI_OPCODE_CLAMP:
600
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
601
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
602
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
603
      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
604
      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
605
      break;
606
 
607
   case TGSI_OPCODE_FLR:
608
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
609
      dst0 = lp_build_floor(&bld->bld_base.base, src0);
610
      break;
611
 
612
   case TGSI_OPCODE_ROUND:
613
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
614
      dst0 = lp_build_round(&bld->bld_base.base, src0);
615
      break;
616
 
617
   case TGSI_OPCODE_EX2:
618
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
619
      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
620
      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
621
      break;
622
 
623
   case TGSI_OPCODE_LG2:
624
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
625
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
626
      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
627
      break;
628
 
629
   case TGSI_OPCODE_POW:
630
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
631
      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
632
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
633
      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
634
      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
635
      break;
636
 
637
   case TGSI_OPCODE_XPD:
638
      return FALSE;
639
 
640
   case TGSI_OPCODE_RCC:
641
      /* deprecated? */
642
      assert(0);
643
      return FALSE;
644
 
645
   case TGSI_OPCODE_DPH:
646
      return FALSE;
647
 
648
   case TGSI_OPCODE_COS:
649
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
650
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
651
      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
652
      break;
653
 
654
   case TGSI_OPCODE_DDX:
655
      return FALSE;
656
 
657
   case TGSI_OPCODE_DDY:
658
      return FALSE;
659
 
660
   case TGSI_OPCODE_KILL:
661
      return FALSE;
662
 
663
   case TGSI_OPCODE_KILL_IF:
664
      return FALSE;
665
 
666
   case TGSI_OPCODE_PK2H:
667
      return FALSE;
668
      break;
669
 
670
   case TGSI_OPCODE_PK2US:
671
      return FALSE;
672
      break;
673
 
674
   case TGSI_OPCODE_PK4B:
675
      return FALSE;
676
      break;
677
 
678
   case TGSI_OPCODE_PK4UB:
679
      return FALSE;
680
 
681
   case TGSI_OPCODE_RFL:
682
      return FALSE;
683
 
684
   case TGSI_OPCODE_SEQ:
685
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
686
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
687
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
688
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
689
      break;
690
 
691
   case TGSI_OPCODE_SFL:
692
      dst0 = bld->bld_base.base.zero;
693
      break;
694
 
695
   case TGSI_OPCODE_SGT:
696
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
697
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
698
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
699
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
700
      break;
701
 
702
   case TGSI_OPCODE_SIN:
703
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
704
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
705
      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
706
      break;
707
 
708
   case TGSI_OPCODE_SLE:
709
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
710
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
711
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
712
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
713
      break;
714
 
715
   case TGSI_OPCODE_SNE:
716
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
717
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
718
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
719
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
720
      break;
721
 
722
   case TGSI_OPCODE_STR:
723
      dst0 = bld->bld_base.base.one;
724
      break;
725
 
726
   case TGSI_OPCODE_TEX:
727
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
728
      break;
729
 
730
   case TGSI_OPCODE_TXD:
731
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
732
      break;
733
 
734
   case TGSI_OPCODE_UP2H:
735
      /* deprecated */
736
      assert (0);
737
      return FALSE;
738
      break;
739
 
740
   case TGSI_OPCODE_UP2US:
741
      /* deprecated */
742
      assert(0);
743
      return FALSE;
744
      break;
745
 
746
   case TGSI_OPCODE_UP4B:
747
      /* deprecated */
748
      assert(0);
749
      return FALSE;
750
      break;
751
 
752
   case TGSI_OPCODE_UP4UB:
753
      /* deprecated */
754
      assert(0);
755
      return FALSE;
756
      break;
757
 
758
   case TGSI_OPCODE_X2D:
759
      /* deprecated? */
760
      assert(0);
761
      return FALSE;
762
      break;
763
 
764
   case TGSI_OPCODE_ARA:
765
      /* deprecated */
766
      assert(0);
767
      return FALSE;
768
      break;
769
 
770
   case TGSI_OPCODE_ARR:
771
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
772
      dst0 = lp_build_round(&bld->bld_base.base, src0);
773
      break;
774
 
775
   case TGSI_OPCODE_BRA:
776
      /* deprecated */
777
      assert(0);
778
      return FALSE;
779
      break;
780
 
781
   case TGSI_OPCODE_CAL:
782
      return FALSE;
783
 
784
   case TGSI_OPCODE_RET:
785
      return FALSE;
786
 
787
   case TGSI_OPCODE_END:
788
      *pc = -1;
789
      break;
790
 
791
   case TGSI_OPCODE_SSG:
792
   /* TGSI_OPCODE_SGN */
793
      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
794
      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
795
      break;
796
 
797
   case TGSI_OPCODE_CMP:
798
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
799
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
800
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
801
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
802
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
803
      break;
804
 
805
   case TGSI_OPCODE_SCS:
806
      return FALSE;
807
 
808
   case TGSI_OPCODE_TXB:
809
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
810
      break;
811
 
812
   case TGSI_OPCODE_NRM:
813
      /* fall-through */
814
   case TGSI_OPCODE_NRM4:
815
      return FALSE;
816
 
817
   case TGSI_OPCODE_DIV:
818
      /* deprecated */
819
      assert(0);
820
      return FALSE;
821
      break;
822
 
823
   case TGSI_OPCODE_DP2:
824
      return FALSE;
825
 
826
   case TGSI_OPCODE_TXL:
827
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
828
      break;
829
 
830
   case TGSI_OPCODE_TXP:
831
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
832
      break;
833
 
834
   case TGSI_OPCODE_BRK:
835
      return FALSE;
836
 
837
   case TGSI_OPCODE_IF:
838
   case TGSI_OPCODE_UIF:
839
      return FALSE;
840
 
841
   case TGSI_OPCODE_BGNLOOP:
842
      return FALSE;
843
 
844
   case TGSI_OPCODE_BGNSUB:
845
      return FALSE;
846
 
847
   case TGSI_OPCODE_ELSE:
848
      return FALSE;
849
 
850
   case TGSI_OPCODE_ENDIF:
851
      return FALSE;
852
 
853
   case TGSI_OPCODE_ENDLOOP:
854
      return FALSE;
855
 
856
   case TGSI_OPCODE_ENDSUB:
857
      return FALSE;
858
 
859
   case TGSI_OPCODE_PUSHA:
860
      /* deprecated? */
861
      assert(0);
862
      return FALSE;
863
      break;
864
 
865
   case TGSI_OPCODE_POPA:
866
      /* deprecated? */
867
      assert(0);
868
      return FALSE;
869
      break;
870
 
871
   case TGSI_OPCODE_CEIL:
872
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
873
      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
874
      break;
875
 
876
   case TGSI_OPCODE_I2F:
877
      /* deprecated? */
878
      assert(0);
879
      return FALSE;
880
      break;
881
 
882
   case TGSI_OPCODE_NOT:
883
      /* deprecated? */
884
      assert(0);
885
      return FALSE;
886
      break;
887
 
888
   case TGSI_OPCODE_TRUNC:
889
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
890
      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
891
      break;
892
 
893
   case TGSI_OPCODE_SHL:
894
      /* deprecated? */
895
      assert(0);
896
      return FALSE;
897
      break;
898
 
899
   case TGSI_OPCODE_ISHR:
900
      /* deprecated? */
901
      assert(0);
902
      return FALSE;
903
      break;
904
 
905
   case TGSI_OPCODE_AND:
906
      /* deprecated? */
907
      assert(0);
908
      return FALSE;
909
      break;
910
 
911
   case TGSI_OPCODE_OR:
912
      /* deprecated? */
913
      assert(0);
914
      return FALSE;
915
      break;
916
 
917
   case TGSI_OPCODE_MOD:
918
      /* deprecated? */
919
      assert(0);
920
      return FALSE;
921
      break;
922
 
923
   case TGSI_OPCODE_XOR:
924
      /* deprecated? */
925
      assert(0);
926
      return FALSE;
927
      break;
928
 
929
   case TGSI_OPCODE_SAD:
930
      /* deprecated? */
931
      assert(0);
932
      return FALSE;
933
      break;
934
 
935
   case TGSI_OPCODE_TXF:
936
      /* deprecated? */
937
      assert(0);
938
      return FALSE;
939
      break;
940
 
941
   case TGSI_OPCODE_TXQ:
942
      /* deprecated? */
943
      assert(0);
944
      return FALSE;
945
      break;
946
 
947
   case TGSI_OPCODE_CONT:
948
      return FALSE;
949
 
950
   case TGSI_OPCODE_EMIT:
951
      return FALSE;
952
      break;
953
 
954
   case TGSI_OPCODE_ENDPRIM:
955
      return FALSE;
956
      break;
957
 
958
   case TGSI_OPCODE_NOP:
959
      break;
960
 
961
   default:
962
      return FALSE;
963
   }
964
 
965
   if (info->num_dst) {
966
      lp_emit_store_aos(bld, inst, 0, dst0);
967
   }
968
 
969
   return TRUE;
970
}
971
 
972
 
973
void
974
lp_build_tgsi_aos(struct gallivm_state *gallivm,
975
                  const struct tgsi_token *tokens,
976
                  struct lp_type type,
977
                  const unsigned char swizzles[4],
978
                  LLVMValueRef consts_ptr,
979
                  const LLVMValueRef *inputs,
980
                  LLVMValueRef *outputs,
981
                  struct lp_build_sampler_aos *sampler,
982
                  const struct tgsi_shader_info *info)
983
{
984
   struct lp_build_tgsi_aos_context bld;
985
   struct tgsi_parse_context parse;
986
   uint num_immediates = 0;
987
   unsigned chan;
988
   int pc = 0;
989
 
990
   /* Setup build context */
991
   memset(&bld, 0, sizeof bld);
992
   lp_build_context_init(&bld.bld_base.base, gallivm, type);
993
   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
994
   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
995
   lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type));
996
 
997
   for (chan = 0; chan < 4; ++chan) {
998
      bld.swizzles[chan] = swizzles[chan];
999
      bld.inv_swizzles[swizzles[chan]] = chan;
1000
   }
1001
 
1002
   bld.inputs = inputs;
1003
   bld.outputs = outputs;
1004
   bld.consts_ptr = consts_ptr;
1005
   bld.sampler = sampler;
1006
   bld.indirect_files = info->indirect_files;
1007
   bld.bld_base.emit_swizzle = swizzle_aos;
1008
   bld.bld_base.info = info;
1009
 
1010
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
1011
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
1012
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
1013
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
1014
 
1015
   /* Set opcode actions */
1016
   lp_set_default_actions_cpu(&bld.bld_base);
1017
 
1018
   if (!lp_bld_tgsi_list_init(&bld.bld_base)) {
1019
      return;
1020
   }
1021
 
1022
   tgsi_parse_init(&parse, tokens);
1023
 
1024
   while (!tgsi_parse_end_of_tokens(&parse)) {
1025
      tgsi_parse_token(&parse);
1026
 
1027
      switch(parse.FullToken.Token.Type) {
1028
      case TGSI_TOKEN_TYPE_DECLARATION:
1029
         /* Inputs already interpolated */
1030
         lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration);
1031
         break;
1032
 
1033
      case TGSI_TOKEN_TYPE_INSTRUCTION:
1034
         /* save expanded instruction */
1035
         lp_bld_tgsi_add_instruction(&bld.bld_base,
1036
                                     &parse.FullToken.FullInstruction);
1037
         break;
1038
 
1039
      case TGSI_TOKEN_TYPE_IMMEDIATE:
1040
         /* simply copy the immediate values into the next immediates[] slot */
1041
         {
1042
            const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1043
            float imm[4];
1044
            assert(size <= 4);
1045
            assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
1046
            for (chan = 0; chan < 4; ++chan) {
1047
               imm[chan] = 0.0f;
1048
            }
1049
            for (chan = 0; chan < size; ++chan) {
1050
               unsigned swizzle = bld.swizzles[chan];
1051
               imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float;
1052
            }
1053
            bld.immediates[num_immediates] =
1054
                     lp_build_const_aos(gallivm, type,
1055
                                        imm[0], imm[1], imm[2], imm[3],
1056
                                        NULL);
1057
            num_immediates++;
1058
         }
1059
         break;
1060
 
1061
      case TGSI_TOKEN_TYPE_PROPERTY:
1062
         break;
1063
 
1064
      default:
1065
         assert(0);
1066
      }
1067
   }
1068
 
1069
   while (pc != -1) {
1070
      struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc;
1071
      const struct tgsi_opcode_info *opcode_info =
1072
         tgsi_get_opcode_info(instr->Instruction.Opcode);
1073
      if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc))
1074
         _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
1075
                       opcode_info->mnemonic);
1076
   }
1077
 
1078
   if (0) {
1079
      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
1080
      LLVMValueRef function = LLVMGetBasicBlockParent(block);
1081
      debug_printf("11111111111111111111111111111 \n");
1082
      tgsi_dump(tokens, 0);
1083
      lp_debug_dump_value(function);
1084
      debug_printf("2222222222222222222222222222 \n");
1085
   }
1086
   tgsi_parse_free(&parse);
1087
   FREE(bld.bld_base.instructions);
1088
 
1089
   if (0) {
1090
      LLVMModuleRef module = LLVMGetGlobalParent(
1091
         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
1092
      LLVMDumpModule(module);
1093
   }
1094
 
1095
}
1096