Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/**************************************************************************
2
 *
3
 * Copyright 2009 VMware, Inc.
4
 * Copyright 2007-2008 VMware, Inc.
5
 * All Rights Reserved.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the
9
 * "Software"), to deal in the Software without restriction, including
10
 * without limitation the rights to use, copy, modify, merge, publish,
11
 * distribute, sub license, and/or sell copies of the Software, and to
12
 * permit persons to whom the Software is furnished to do so, subject to
13
 * the following conditions:
14
 *
15
 * The above copyright notice and this permission notice (including the
16
 * next paragraph) shall be included in all copies or substantial portions
17
 * of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 *
27
 **************************************************************************/
28
 
29
/**
30
 * @file
31
 * TGSI to LLVM IR translation -- SoA.
32
 *
33
 * @author Jose Fonseca 
34
 *
35
 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36
 * Brian Paul, and others.
37
 */
38
 
39
#include "pipe/p_config.h"
40
#include "pipe/p_shader_tokens.h"
41
#include "util/u_debug.h"
42
#include "util/u_math.h"
43
#include "util/u_memory.h"
44
#include "tgsi/tgsi_dump.h"
45
#include "tgsi/tgsi_exec.h"
46
#include "tgsi/tgsi_info.h"
47
#include "tgsi/tgsi_parse.h"
48
#include "tgsi/tgsi_util.h"
49
#include "tgsi/tgsi_scan.h"
50
#include "tgsi/tgsi_strings.h"
51
#include "lp_bld_tgsi_action.h"
52
#include "lp_bld_type.h"
53
#include "lp_bld_const.h"
54
#include "lp_bld_arit.h"
55
#include "lp_bld_bitarit.h"
56
#include "lp_bld_gather.h"
57
#include "lp_bld_init.h"
58
#include "lp_bld_logic.h"
59
#include "lp_bld_swizzle.h"
60
#include "lp_bld_flow.h"
61
#include "lp_bld_quad.h"
62
#include "lp_bld_tgsi.h"
63
#include "lp_bld_limits.h"
64
#include "lp_bld_debug.h"
65
#include "lp_bld_printf.h"
66
#include "lp_bld_sample.h"
67
#include "lp_bld_struct.h"
68
 
69
/* SM 4.0 says that subroutines can nest 32 deep and
70
 * we need one more for our main function */
71
#define LP_MAX_NUM_FUNCS 33
72
 
73
#define DUMP_GS_EMITS 0
74
 
75
/*
76
 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
77
 * instruction.
78
 *
79
 * TODO:
80
 * - take execution masks in consideration
81
 * - debug control-flow instructions
82
 */
83
#define DEBUG_EXECUTION 0
84
 
85
 
86
/*
87
 * Emit code to print a register value.
88
 */
89
static void
90
emit_dump_reg(struct gallivm_state *gallivm,
91
              unsigned file,
92
              unsigned index,
93
              unsigned chan,
94
              LLVMValueRef value)
95
{
96
   char buf[32];
97
 
98
   util_snprintf(buf, sizeof buf, "    %s[%u].%c = ",
99
                 tgsi_file_name(file),
100
                 index, "xyzw"[chan]);
101
 
102
   lp_build_print_value(gallivm, buf, value);
103
}
104
 
105
/*
106
 * Return the context for the current function.
107
 * (always 'main', if shader doesn't do any function calls)
108
 */
109
static INLINE struct function_ctx *
110
func_ctx(struct lp_exec_mask *mask)
111
{
112
   assert(mask->function_stack_size > 0);
113
   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
114
   return &mask->function_stack[mask->function_stack_size - 1];
115
}
116
 
117
/*
118
 * Returns true if we're in a loop.
119
 * It's global, meaning that it returns true even if there's
120
 * no loop inside the current function, but we were inside
121
 * a loop inside another function, from which this one was called.
122
 */
123
static INLINE boolean
124
mask_has_loop(struct lp_exec_mask *mask)
125
{
126
   int i;
127
   for (i = mask->function_stack_size - 1; i >= 0; --i) {
128
      const struct function_ctx *ctx = &mask->function_stack[i];
129
      if (ctx->loop_stack_size > 0)
130
         return TRUE;
131
   }
132
   return FALSE;
133
}
134
 
135
/*
136
 * Returns true if we're inside a switch statement.
137
 * It's global, meaning that it returns true even if there's
138
 * no switch in the current function, but we were inside
139
 * a switch inside another function, from which this one was called.
140
 */
141
static INLINE boolean
142
mask_has_switch(struct lp_exec_mask *mask)
143
{
144
   int i;
145
   for (i = mask->function_stack_size - 1; i >= 0; --i) {
146
      const struct function_ctx *ctx = &mask->function_stack[i];
147
      if (ctx->switch_stack_size > 0)
148
         return TRUE;
149
   }
150
   return FALSE;
151
}
152
 
153
/*
154
 * Returns true if we're inside a conditional.
155
 * It's global, meaning that it returns true even if there's
156
 * no conditional in the current function, but we were inside
157
 * a conditional inside another function, from which this one was called.
158
 */
159
static INLINE boolean
160
mask_has_cond(struct lp_exec_mask *mask)
161
{
162
   int i;
163
   for (i = mask->function_stack_size - 1; i >= 0; --i) {
164
      const struct function_ctx *ctx = &mask->function_stack[i];
165
      if (ctx->cond_stack_size > 0)
166
         return TRUE;
167
   }
168
   return FALSE;
169
}
170
 
171
 
172
/*
173
 * Initialize a function context at the specified index.
174
 */
175
static void
176
lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
177
{
178
   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
179
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
180
   struct function_ctx *ctx =  &mask->function_stack[function_idx];
181
 
182
   ctx->cond_stack_size = 0;
183
   ctx->loop_stack_size = 0;
184
   ctx->switch_stack_size = 0;
185
 
186
   if (function_idx == 0) {
187
      ctx->ret_mask = mask->ret_mask;
188
   }
189
 
190
   ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
191
                                       int_type, "looplimiter");
192
   LLVMBuildStore(
193
      builder,
194
      LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
195
      ctx->loop_limiter);
196
}
197
 
198
static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
199
{
200
   mask->bld = bld;
201
   mask->has_mask = FALSE;
202
   mask->ret_in_main = FALSE;
203
   /* For the main function */
204
   mask->function_stack_size = 1;
205
 
206
   mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
207
   mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
208
         mask->cond_mask = mask->switch_mask =
209
         LLVMConstAllOnes(mask->int_vec_type);
210
 
211
   mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
212
                                 sizeof(mask->function_stack[0]));
213
   lp_exec_mask_function_init(mask, 0);
214
}
215
 
216
static void
217
lp_exec_mask_fini(struct lp_exec_mask *mask)
218
{
219
   FREE(mask->function_stack);
220
}
221
 
222
static void lp_exec_mask_update(struct lp_exec_mask *mask)
223
{
224
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
225
   boolean has_loop_mask = mask_has_loop(mask);
226
   boolean has_cond_mask = mask_has_cond(mask);
227
   boolean has_switch_mask = mask_has_switch(mask);
228
   boolean has_ret_mask = mask->function_stack_size > 1 ||
229
         mask->ret_in_main;
230
 
231
   if (has_loop_mask) {
232
      /*for loops we need to update the entire mask at runtime */
233
      LLVMValueRef tmp;
234
      assert(mask->break_mask);
235
      tmp = LLVMBuildAnd(builder,
236
                         mask->cont_mask,
237
                         mask->break_mask,
238
                         "maskcb");
239
      mask->exec_mask = LLVMBuildAnd(builder,
240
                                     mask->cond_mask,
241
                                     tmp,
242
                                     "maskfull");
243
   } else
244
      mask->exec_mask = mask->cond_mask;
245
 
246
   if (has_switch_mask) {
247
      mask->exec_mask = LLVMBuildAnd(builder,
248
                                     mask->exec_mask,
249
                                     mask->switch_mask,
250
                                     "switchmask");
251
   }
252
 
253
   if (has_ret_mask) {
254
      mask->exec_mask = LLVMBuildAnd(builder,
255
                                     mask->exec_mask,
256
                                     mask->ret_mask,
257
                                     "callmask");
258
   }
259
 
260
   mask->has_mask = (has_cond_mask ||
261
                     has_loop_mask ||
262
                     has_switch_mask ||
263
                     has_ret_mask);
264
}
265
 
266
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
267
                                   LLVMValueRef val)
268
{
269
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
270
   struct function_ctx *ctx = func_ctx(mask);
271
 
272
   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
273
      ctx->cond_stack_size++;
274
      return;
275
   }
276
   if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
277
      assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
278
   }
279
   ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
280
   assert(LLVMTypeOf(val) == mask->int_vec_type);
281
   mask->cond_mask = LLVMBuildAnd(builder,
282
                                  mask->cond_mask,
283
                                  val,
284
                                  "");
285
   lp_exec_mask_update(mask);
286
}
287
 
288
static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
289
{
290
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
291
   struct function_ctx *ctx = func_ctx(mask);
292
   LLVMValueRef prev_mask;
293
   LLVMValueRef inv_mask;
294
 
295
   assert(ctx->cond_stack_size);
296
   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
297
      return;
298
   prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
299
   if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
300
      assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
301
   }
302
 
303
   inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
304
 
305
   mask->cond_mask = LLVMBuildAnd(builder,
306
                                  inv_mask,
307
                                  prev_mask, "");
308
   lp_exec_mask_update(mask);
309
}
310
 
311
static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
312
{
313
   struct function_ctx *ctx = func_ctx(mask);
314
   assert(ctx->cond_stack_size);
315
   --ctx->cond_stack_size;
316
   if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
317
      return;
318
   mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
319
   lp_exec_mask_update(mask);
320
}
321
 
322
static void lp_exec_bgnloop(struct lp_exec_mask *mask)
323
{
324
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
325
   struct function_ctx *ctx = func_ctx(mask);
326
 
327
   if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
328
      ++ctx->loop_stack_size;
329
      return;
330
   }
331
 
332
   ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
333
      ctx->break_type;
334
   ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
335
 
336
   ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
337
   ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
338
   ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
339
   ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
340
   ++ctx->loop_stack_size;
341
 
342
   ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
343
   LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
344
 
345
   ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
346
 
347
   LLVMBuildBr(builder, ctx->loop_block);
348
   LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
349
 
350
   mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
351
 
352
   lp_exec_mask_update(mask);
353
}
354
 
355
static void lp_exec_break(struct lp_exec_mask *mask,
356
                          struct lp_build_tgsi_context * bld_base)
357
{
358
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
359
   struct function_ctx *ctx = func_ctx(mask);
360
 
361
   if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
362
      LLVMValueRef exec_mask = LLVMBuildNot(builder,
363
                                            mask->exec_mask,
364
                                            "break");
365
 
366
      mask->break_mask = LLVMBuildAnd(builder,
367
                                      mask->break_mask,
368
                                      exec_mask, "break_full");
369
   }
370
   else {
371
      unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
372
      boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
373
                              opcode == TGSI_OPCODE_CASE);
374
 
375
 
376
      if (ctx->switch_in_default) {
377
         /*
378
          * stop default execution but only if this is an unconditional switch.
379
          * (The condition here is not perfect since dead code after break is
380
          * allowed but should be sufficient since false negatives are just
381
          * unoptimized - so we don't have to pre-evaluate that).
382
          */
383
         if(break_always && ctx->switch_pc) {
384
            bld_base->pc = ctx->switch_pc;
385
            return;
386
         }
387
      }
388
 
389
      if (break_always) {
390
         mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
391
      }
392
      else {
393
         LLVMValueRef exec_mask = LLVMBuildNot(builder,
394
                                               mask->exec_mask,
395
                                               "break");
396
         mask->switch_mask = LLVMBuildAnd(builder,
397
                                          mask->switch_mask,
398
                                          exec_mask, "break_switch");
399
      }
400
   }
401
 
402
   lp_exec_mask_update(mask);
403
}
404
 
405
static void lp_exec_break_condition(struct lp_exec_mask *mask,
406
                                    LLVMValueRef cond)
407
{
408
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
409
   struct function_ctx *ctx = func_ctx(mask);
410
   LLVMValueRef cond_mask = LLVMBuildAnd(builder,
411
                                         mask->exec_mask,
412
                                         cond, "cond_mask");
413
   cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
414
 
415
   if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
416
      mask->break_mask = LLVMBuildAnd(builder,
417
                                      mask->break_mask,
418
                                      cond_mask, "breakc_full");
419
   }
420
   else {
421
      mask->switch_mask = LLVMBuildAnd(builder,
422
                                       mask->switch_mask,
423
                                       cond_mask, "breakc_switch");
424
   }
425
 
426
   lp_exec_mask_update(mask);
427
}
428
 
429
static void lp_exec_continue(struct lp_exec_mask *mask)
430
{
431
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
432
   LLVMValueRef exec_mask = LLVMBuildNot(builder,
433
                                         mask->exec_mask,
434
                                         "");
435
 
436
   mask->cont_mask = LLVMBuildAnd(builder,
437
                                  mask->cont_mask,
438
                                  exec_mask, "");
439
 
440
   lp_exec_mask_update(mask);
441
}
442
 
443
 
444
static void lp_exec_endloop(struct gallivm_state *gallivm,
445
                            struct lp_exec_mask *mask)
446
{
447
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
448
   struct function_ctx *ctx = func_ctx(mask);
449
   LLVMBasicBlockRef endloop;
450
   LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
451
   LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
452
                                               mask->bld->type.width *
453
                                               mask->bld->type.length);
454
   LLVMValueRef i1cond, i2cond, icond, limiter;
455
 
456
   assert(mask->break_mask);
457
 
458
 
459
   assert(ctx->loop_stack_size);
460
   if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
461
      --ctx->loop_stack_size;
462
      return;
463
   }
464
 
465
   /*
466
    * Restore the cont_mask, but don't pop
467
    */
468
   mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
469
   lp_exec_mask_update(mask);
470
 
471
   /*
472
    * Unlike the continue mask, the break_mask must be preserved across loop
473
    * iterations
474
    */
475
   LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
476
 
477
   /* Decrement the loop limiter */
478
   limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
479
 
480
   limiter = LLVMBuildSub(
481
      builder,
482
      limiter,
483
      LLVMConstInt(int_type, 1, false),
484
      "");
485
 
486
   LLVMBuildStore(builder, limiter, ctx->loop_limiter);
487
 
488
   /* i1cond = (mask != 0) */
489
   i1cond = LLVMBuildICmp(
490
      builder,
491
      LLVMIntNE,
492
      LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
493
      LLVMConstNull(reg_type), "i1cond");
494
 
495
   /* i2cond = (looplimiter > 0) */
496
   i2cond = LLVMBuildICmp(
497
      builder,
498
      LLVMIntSGT,
499
      limiter,
500
      LLVMConstNull(int_type), "i2cond");
501
 
502
   /* if( i1cond && i2cond ) */
503
   icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
504
 
505
   endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
506
 
507
   LLVMBuildCondBr(builder,
508
                   icond, ctx->loop_block, endloop);
509
 
510
   LLVMPositionBuilderAtEnd(builder, endloop);
511
 
512
   assert(ctx->loop_stack_size);
513
   --ctx->loop_stack_size;
514
   mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
515
   mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
516
   ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
517
   ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
518
   ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
519
         ctx->switch_stack_size];
520
 
521
   lp_exec_mask_update(mask);
522
}
523
 
524
static void lp_exec_switch(struct lp_exec_mask *mask,
525
                           LLVMValueRef switchval)
526
{
527
   struct function_ctx *ctx = func_ctx(mask);
528
 
529
   if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
530
       ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
531
      ctx->switch_stack_size++;
532
      return;
533
   }
534
 
535
   ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
536
      ctx->break_type;
537
   ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
538
 
539
   ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
540
   ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
541
   ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
542
   ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
543
   ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
544
   ctx->switch_stack_size++;
545
 
546
   mask->switch_mask = LLVMConstNull(mask->int_vec_type);
547
   ctx->switch_val = switchval;
548
   ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
549
   ctx->switch_in_default = false;
550
   ctx->switch_pc = 0;
551
 
552
   lp_exec_mask_update(mask);
553
}
554
 
555
static void lp_exec_endswitch(struct lp_exec_mask *mask,
556
                              struct lp_build_tgsi_context * bld_base)
557
{
558
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
559
   struct function_ctx *ctx = func_ctx(mask);
560
 
561
   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
562
      ctx->switch_stack_size--;
563
      return;
564
   }
565
 
566
   /* check if there's deferred default if so do it now */
567
   if (ctx->switch_pc && !ctx->switch_in_default) {
568
      LLVMValueRef prevmask, defaultmask;
569
      unsigned tmp_pc;
570
      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
571
      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
572
      mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
573
      ctx->switch_in_default = true;
574
 
575
      lp_exec_mask_update(mask);
576
 
577
      assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
578
             TGSI_OPCODE_DEFAULT);
579
 
580
      tmp_pc = bld_base->pc;
581
      bld_base->pc = ctx->switch_pc;
582
      /*
583
       * re-purpose switch_pc to point to here again, since we stop execution of
584
       * the deferred default after next break.
585
       */
586
      ctx->switch_pc = tmp_pc - 1;
587
 
588
      return;
589
   }
590
 
591
   else if (ctx->switch_pc && ctx->switch_in_default) {
592
      assert(bld_base->pc == ctx->switch_pc + 1);
593
   }
594
 
595
   ctx->switch_stack_size--;
596
   mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
597
   ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
598
   ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
599
   ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
600
   ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
601
 
602
   ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
603
 
604
   lp_exec_mask_update(mask);
605
}
606
 
607
static void lp_exec_case(struct lp_exec_mask *mask,
608
                         LLVMValueRef caseval)
609
{
610
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
611
   struct function_ctx *ctx = func_ctx(mask);
612
 
613
   LLVMValueRef casemask, prevmask;
614
 
615
   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
616
      return;
617
   }
618
 
619
   /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
620
   if (!ctx->switch_in_default) {
621
      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
622
      casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
623
      ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
624
                                             ctx->switch_mask_default, "sw_default_mask");
625
      casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
626
      mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
627
 
628
      lp_exec_mask_update(mask);
629
   }
630
}
631
 
632
/*
633
 * Analyse default statement in a switch.
634
 * \return true if default is last statement, false otherwise
635
 * \param default_pc_start contains pc of instruction to jump to
636
 *                         if default wasn't last but there's no
637
 *                         fallthrough into default.
638
 */
639
static boolean default_analyse_is_last(struct lp_exec_mask *mask,
640
                                       struct lp_build_tgsi_context * bld_base,
641
                                       int *default_pc_start)
642
{
643
   unsigned pc = bld_base->pc;
644
   struct function_ctx *ctx = func_ctx(mask);
645
   unsigned curr_switch_stack = ctx->switch_stack_size;
646
 
647
   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
648
      return false;
649
   }
650
 
651
   /* skip over case statements which are together with default */
652
   while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
653
      pc++;
654
   }
655
 
656
   while (pc != -1 && pc < bld_base->num_instructions) {
657
      unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
658
      switch (opcode) {
659
      case TGSI_OPCODE_CASE:
660
         if (curr_switch_stack == ctx->switch_stack_size) {
661
            *default_pc_start = pc - 1;
662
            return false;
663
         }
664
         break;
665
      case TGSI_OPCODE_SWITCH:
666
         curr_switch_stack++;
667
         break;
668
      case TGSI_OPCODE_ENDSWITCH:
669
         if (curr_switch_stack == ctx->switch_stack_size) {
670
            *default_pc_start = pc - 1;
671
            return true;
672
         }
673
         curr_switch_stack--;
674
         break;
675
      }
676
      pc++;
677
   }
678
   /* should never arrive here */
679
   assert(0);
680
   return true;
681
}
682
 
683
static void lp_exec_default(struct lp_exec_mask *mask,
684
                            struct lp_build_tgsi_context * bld_base)
685
{
686
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
687
   struct function_ctx *ctx = func_ctx(mask);
688
 
689
   int default_exec_pc;
690
   boolean default_is_last;
691
 
692
   if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
693
      return;
694
   }
695
 
696
   /*
697
    * This is a messy opcode, because it may not be always at the end and
698
    * there can be fallthrough in and out of it.
699
    */
700
 
701
   default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
702
   /*
703
    * If it is last statement in switch (note that case statements appearing
704
    * "at the same time" as default don't change that) everything is just fine,
705
    * update switch mask and go on. This means we can handle default with
706
    * fallthrough INTO it without overhead, if it is last.
707
    */
708
   if (default_is_last) {
709
      LLVMValueRef prevmask, defaultmask;
710
      prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
711
      defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
712
      defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
713
      mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
714
      ctx->switch_in_default = true;
715
 
716
      lp_exec_mask_update(mask);
717
   }
718
   else {
719
      /*
720
       * Technically, "case" immediately before default isn't really a
721
       * fallthrough, however we still have to count them as such as we
722
       * already have updated the masks.
723
       * If that happens in practice could add a switch optimizer pass
724
       * which just gets rid of all case statements appearing together with
725
       * default (or could do switch analysis at switch start time instead).
726
       */
727
      unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
728
      boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
729
                         opcode != TGSI_OPCODE_SWITCH);
730
      /*
731
       * If it is not last statement and there was no fallthrough into it,
732
       * we record the PC and continue execution at next case (again, those
733
       * case encountered at the same time don't count). At endswitch
734
       * time, we update switchmask, and go back executing the code we skipped
735
       * until the next break (possibly re-executing some code with changed mask
736
       * if there was a fallthrough out of default).
737
       * Finally, if it is not last statement and there was a fallthrough into it,
738
       * do the same as with the former case, except instead of skipping the code
739
       * just execute it without updating the mask, then go back and re-execute.
740
       */
741
      ctx->switch_pc = bld_base->pc;
742
      if (!ft_into) {
743
         bld_base->pc = default_exec_pc;
744
      }
745
   }
746
}
747
 
748
 
749
/* stores val into an address pointed to by dst_ptr.
750
 * mask->exec_mask is used to figure out which bits of val
751
 * should be stored into the address
752
 * (0 means don't store this bit, 1 means do store).
753
 */
754
static void lp_exec_mask_store(struct lp_exec_mask *mask,
755
                               struct lp_build_context *bld_store,
756
                               LLVMValueRef pred,
757
                               LLVMValueRef val,
758
                               LLVMValueRef dst_ptr)
759
{
760
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
761
 
762
   assert(lp_check_value(bld_store->type, val));
763
   assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
764
   assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
765
 
766
   /* Mix the predicate and execution mask */
767
   if (mask->has_mask) {
768
      if (pred) {
769
         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
770
      } else {
771
         pred = mask->exec_mask;
772
      }
773
   }
774
 
775
   if (pred) {
776
      LLVMValueRef res, dst;
777
 
778
      dst = LLVMBuildLoad(builder, dst_ptr, "");
779
      res = lp_build_select(bld_store, pred, val, dst);
780
      LLVMBuildStore(builder, res, dst_ptr);
781
   } else
782
      LLVMBuildStore(builder, val, dst_ptr);
783
}
784
 
785
static void lp_exec_mask_call(struct lp_exec_mask *mask,
786
                              int func,
787
                              int *pc)
788
{
789
   if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
790
      return;
791
   }
792
 
793
   lp_exec_mask_function_init(mask, mask->function_stack_size);
794
   mask->function_stack[mask->function_stack_size].pc = *pc;
795
   mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
796
   mask->function_stack_size++;
797
   *pc = func;
798
}
799
 
800
static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
801
{
802
   LLVMBuilderRef builder = mask->bld->gallivm->builder;
803
   struct function_ctx *ctx = func_ctx(mask);
804
   LLVMValueRef exec_mask;
805
 
806
   if (ctx->cond_stack_size == 0 &&
807
       ctx->loop_stack_size == 0 &&
808
       ctx->switch_stack_size == 0 &&
809
       mask->function_stack_size == 1) {
810
      /* returning from main() */
811
      *pc = -1;
812
      return;
813
   }
814
 
815
   if (mask->function_stack_size == 1) {
816
      /*
817
       * This requires special handling since we need to ensure
818
       * we don't drop the mask even if we have no call stack
819
       * (e.g. after a ret in a if clause after the endif)
820
       */
821
      mask->ret_in_main = TRUE;
822
   }
823
 
824
   exec_mask = LLVMBuildNot(builder,
825
                            mask->exec_mask,
826
                            "ret");
827
 
828
   mask->ret_mask = LLVMBuildAnd(builder,
829
                                 mask->ret_mask,
830
                                 exec_mask, "ret_full");
831
 
832
   lp_exec_mask_update(mask);
833
}
834
 
835
static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
836
{
837
}
838
 
839
static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
840
{
841
   struct function_ctx *ctx;
842
 
843
   assert(mask->function_stack_size > 1);
844
   assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
845
 
846
   ctx = func_ctx(mask);
847
   mask->function_stack_size--;
848
 
849
   *pc = ctx->pc;
850
   mask->ret_mask = ctx->ret_mask;
851
 
852
   lp_exec_mask_update(mask);
853
}
854
 
855
 
856
static LLVMValueRef
857
get_file_ptr(struct lp_build_tgsi_soa_context *bld,
858
             unsigned file,
859
             unsigned index,
860
             unsigned chan)
861
{
862
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
863
   LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
864
   LLVMValueRef var_of_array;
865
 
866
   switch (file) {
867
   case TGSI_FILE_TEMPORARY:
868
      array_of_vars = bld->temps;
869
      var_of_array = bld->temps_array;
870
      break;
871
   case TGSI_FILE_OUTPUT:
872
      array_of_vars = bld->outputs;
873
      var_of_array = bld->outputs_array;
874
      break;
875
   default:
876
      assert(0);
877
      return NULL;
878
   }
879
 
880
   assert(chan < 4);
881
 
882
   if (bld->indirect_files & (1 << file)) {
883
      LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
884
      return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
885
   }
886
   else {
887
      assert(index <= bld->bld_base.info->file_max[file]);
888
      return array_of_vars[index][chan];
889
   }
890
}
891
 
892
 
893
/**
894
 * Return pointer to a temporary register channel (src or dest).
895
 * Note that indirect addressing cannot be handled here.
896
 * \param index  which temporary register
897
 * \param chan  which channel of the temp register.
898
 */
899
LLVMValueRef
900
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
901
             unsigned index,
902
             unsigned chan)
903
{
904
   return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
905
}
906
 
907
/**
908
 * Return pointer to a output register channel (src or dest).
909
 * Note that indirect addressing cannot be handled here.
910
 * \param index  which output register
911
 * \param chan  which channel of the output register.
912
 */
913
LLVMValueRef
914
lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
915
               unsigned index,
916
               unsigned chan)
917
{
918
   return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
919
}
920
 
921
/*
922
 * If we have indirect addressing in outputs copy our alloca array
923
 * to the outputs slots specified by the caller to make sure
924
 * our outputs are delivered consistently via the same interface.
925
 */
926
static void
927
gather_outputs(struct lp_build_tgsi_soa_context * bld)
928
{
929
   if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
930
      unsigned index, chan;
931
      assert(bld->bld_base.info->num_outputs <=
932
             bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
933
      for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
934
         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
935
            bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
936
         }
937
      }
938
   }
939
}
940
 
941
/**
942
 * Gather vector.
943
 * XXX the lp_build_gather() function should be capable of doing this
944
 * with a little work.
945
 */
946
static LLVMValueRef
947
build_gather(struct lp_build_tgsi_context *bld_base,
948
             LLVMValueRef base_ptr,
949
             LLVMValueRef indexes,
950
             LLVMValueRef overflow_mask)
951
{
952
   struct gallivm_state *gallivm = bld_base->base.gallivm;
953
   LLVMBuilderRef builder = gallivm->builder;
954
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
955
   struct lp_build_context *bld = &bld_base->base;
956
   LLVMValueRef res = bld->undef;
957
   unsigned i;
958
 
959
   /*
960
    * overflow_mask is a vector telling us which channels
961
    * in the vector overflowed. We use the overflow behavior for
962
    * constant buffers which is defined as:
963
    * Out of bounds access to constant buffer returns 0 in all
964
    * components. Out of bounds behavior is always with respect
965
    * to the size of the buffer bound at that slot.
966
    */
967
 
968
   if (overflow_mask) {
969
      /*
970
       * We avoid per-element control flow here (also due to llvm going crazy,
971
       * though I suspect it's better anyway since overflow is likely rare).
972
       * Note that since we still fetch from buffers even if num_elements was
973
       * zero (in this case we'll fetch from index zero) the jit func callers
974
       * MUST provide valid fake constant buffers of size 4x32 (the values do
975
       * not matter), otherwise we'd still need (not per element though)
976
       * control flow.
977
       */
978
      indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
979
   }
980
 
981
   /*
982
    * Loop over elements of index_vec, load scalar value, insert it into 'res'.
983
    */
984
   for (i = 0; i < bld->type.length; i++) {
985
      LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
986
      LLVMValueRef index = LLVMBuildExtractElement(builder,
987
                                                   indexes, ii, "");
988
      LLVMValueRef scalar_ptr, scalar;
989
 
990
      scalar_ptr = LLVMBuildGEP(builder, base_ptr,
991
                                &index, 1, "gather_ptr");
992
      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
993
 
994
      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
995
   }
996
 
997
   if (overflow_mask) {
998
      res = lp_build_select(bld, overflow_mask, bld->zero, res);
999
   }
1000
 
1001
   return res;
1002
}
1003
 
1004
 
1005
/**
1006
 * Scatter/store vector.
1007
 */
1008
static void
1009
emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
1010
                  LLVMValueRef base_ptr,
1011
                  LLVMValueRef indexes,
1012
                  LLVMValueRef values,
1013
                  struct lp_exec_mask *mask,
1014
                  LLVMValueRef pred)
1015
{
1016
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1017
   LLVMBuilderRef builder = gallivm->builder;
1018
   unsigned i;
1019
 
1020
   /* Mix the predicate and execution mask */
1021
   if (mask->has_mask) {
1022
      if (pred) {
1023
         pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
1024
      }
1025
      else {
1026
         pred = mask->exec_mask;
1027
      }
1028
   }
1029
 
1030
   /*
1031
    * Loop over elements of index_vec, store scalar value.
1032
    */
1033
   for (i = 0; i < bld->bld_base.base.type.length; i++) {
1034
      LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1035
      LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
1036
      LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
1037
      LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
1038
      LLVMValueRef scalar_pred = pred ?
1039
         LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
1040
 
1041
      if (0)
1042
         lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
1043
                         ii, val, index, scalar_ptr);
1044
 
1045
      if (scalar_pred) {
1046
         LLVMValueRef real_val, dst_val;
1047
         dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
1048
         real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
1049
         LLVMBuildStore(builder, real_val, scalar_ptr);
1050
      }
1051
      else {
1052
         LLVMBuildStore(builder, val, scalar_ptr);
1053
      }
1054
   }
1055
}
1056
 
1057
 
1058
/**
1059
 * Read the current value of the ADDR register, convert the floats to
1060
 * ints, add the base index and return the vector of offsets.
1061
 * The offsets will be used to index into the constant buffer or
1062
 * temporary register file.
1063
 */
1064
static LLVMValueRef
1065
get_indirect_index(struct lp_build_tgsi_soa_context *bld,
1066
                   unsigned reg_file, unsigned reg_index,
1067
                   const struct tgsi_ind_register *indirect_reg)
1068
{
1069
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1070
   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1071
   /* always use X component of address register */
1072
   unsigned swizzle = indirect_reg->Swizzle;
1073
   LLVMValueRef base;
1074
   LLVMValueRef rel;
1075
   LLVMValueRef max_index;
1076
   LLVMValueRef index;
1077
 
1078
   assert(bld->indirect_files & (1 << reg_file));
1079
 
1080
   base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
1081
 
1082
   assert(swizzle < 4);
1083
   switch (indirect_reg->File) {
1084
   case TGSI_FILE_ADDRESS:
1085
      rel = LLVMBuildLoad(builder,
1086
                          bld->addr[indirect_reg->Index][swizzle],
1087
                          "load addr reg");
1088
      /* ADDR LLVM values already have LLVM integer type. */
1089
      break;
1090
   case TGSI_FILE_TEMPORARY:
1091
      rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
1092
      rel = LLVMBuildLoad(builder, rel, "load temp reg");
1093
      /* TEMP LLVM values always have LLVM float type, but for indirection, the
1094
       * value actually stored is expected to be an integer */
1095
      rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
1096
      break;
1097
   default:
1098
      assert(0);
1099
      rel = uint_bld->zero;
1100
   }
1101
 
1102
   index = lp_build_add(uint_bld, base, rel);
1103
 
1104
   /*
1105
    * emit_fetch_constant handles constant buffer overflow so this code
1106
    * is pointless for them.
1107
    * Furthermore the D3D10 spec in section 6.5 says:
1108
    * If the constant buffer bound to a slot is larger than the size
1109
    * declared in the shader for that slot, implementations are allowed
1110
    * to return incorrect data (not necessarily 0) for indices that are
1111
    * larger than the declared size but smaller than the buffer size.
1112
    */
1113
   if (reg_file != TGSI_FILE_CONSTANT) {
1114
      max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
1115
                                         uint_bld->type,
1116
                                         bld->bld_base.info->file_max[reg_file]);
1117
 
1118
      assert(!uint_bld->type.sign);
1119
      index = lp_build_min(uint_bld, index, max_index);
1120
   }
1121
 
1122
   return index;
1123
}
1124
 
1125
static struct lp_build_context *
1126
stype_to_fetch(struct lp_build_tgsi_context * bld_base,
1127
	       enum tgsi_opcode_type stype)
1128
{
1129
   struct lp_build_context *bld_fetch;
1130
 
1131
   switch (stype) {
1132
   case TGSI_TYPE_FLOAT:
1133
   case TGSI_TYPE_UNTYPED:
1134
      bld_fetch = &bld_base->base;
1135
      break;
1136
   case TGSI_TYPE_UNSIGNED:
1137
      bld_fetch = &bld_base->uint_bld;
1138
      break;
1139
   case TGSI_TYPE_SIGNED:
1140
      bld_fetch = &bld_base->int_bld;
1141
      break;
1142
   case TGSI_TYPE_VOID:
1143
   case TGSI_TYPE_DOUBLE:
1144
   default:
1145
      assert(0);
1146
      bld_fetch = NULL;
1147
      break;
1148
   }
1149
   return bld_fetch;
1150
}
1151
 
1152
static LLVMValueRef
1153
get_soa_array_offsets(struct lp_build_context *uint_bld,
1154
                      LLVMValueRef indirect_index,
1155
                      unsigned chan_index,
1156
                      boolean need_perelement_offset)
1157
{
1158
   struct gallivm_state *gallivm = uint_bld->gallivm;
1159
   LLVMValueRef chan_vec =
1160
      lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
1161
   LLVMValueRef length_vec =
1162
      lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
1163
   LLVMValueRef index_vec;
1164
 
1165
   /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
1166
   index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1167
   index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
1168
   index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
1169
 
1170
   if (need_perelement_offset) {
1171
      LLVMValueRef pixel_offsets;
1172
      int i;
1173
     /* build pixel offset vector: {0, 1, 2, 3, ...} */
1174
      pixel_offsets = uint_bld->undef;
1175
      for (i = 0; i < uint_bld->type.length; i++) {
1176
         LLVMValueRef ii = lp_build_const_int32(gallivm, i);
1177
         pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
1178
                                                ii, ii, "");
1179
      }
1180
      index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
1181
   }
1182
   return index_vec;
1183
}
1184
 
1185
static LLVMValueRef
1186
emit_fetch_constant(
1187
   struct lp_build_tgsi_context * bld_base,
1188
   const struct tgsi_full_src_register * reg,
1189
   enum tgsi_opcode_type stype,
1190
   unsigned swizzle)
1191
{
1192
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1193
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1194
   LLVMBuilderRef builder = gallivm->builder;
1195
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
1196
   unsigned dimension = 0;
1197
   LLVMValueRef consts_ptr;
1198
   LLVMValueRef num_consts;
1199
   LLVMValueRef res;
1200
 
1201
   /* XXX: Handle fetching xyzw components as a vector */
1202
   assert(swizzle != ~0);
1203
 
1204
   if (reg->Register.Dimension) {
1205
      assert(!reg->Dimension.Indirect);
1206
      dimension = reg->Dimension.Index;
1207
      assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
1208
   }
1209
 
1210
   consts_ptr = bld->consts[dimension];
1211
   num_consts = bld->consts_sizes[dimension];
1212
 
1213
   if (reg->Register.Indirect) {
1214
      LLVMValueRef indirect_index;
1215
      LLVMValueRef swizzle_vec =
1216
         lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
1217
      LLVMValueRef index_vec;  /* index into the const buffer */
1218
      LLVMValueRef overflow_mask;
1219
 
1220
      indirect_index = get_indirect_index(bld,
1221
                                          reg->Register.File,
1222
                                          reg->Register.Index,
1223
                                          ®->Indirect);
1224
 
1225
      /* All fetches are from the same constant buffer, so
1226
       * we need to propagate the size to a vector to do a
1227
       * vector comparison */
1228
      num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1229
      /* Construct a boolean vector telling us which channels
1230
       * overflow the bound constant buffer */
1231
      overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1232
                                       indirect_index, num_consts);
1233
 
1234
      /* index_vec = indirect_index * 4 + swizzle */
1235
      index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
1236
      index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
1237
 
1238
      /* Gather values from the constant buffer */
1239
      res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask);
1240
   }
1241
   else {
1242
      LLVMValueRef index;  /* index into the const buffer */
1243
      LLVMValueRef scalar, scalar_ptr;
1244
 
1245
      index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1246
 
1247
      scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
1248
                                &index, 1, "");
1249
      scalar = LLVMBuildLoad(builder, scalar_ptr, "");
1250
      res = lp_build_broadcast_scalar(&bld_base->base, scalar);
1251
   }
1252
 
1253
   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1254
      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1255
      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1256
   }
1257
 
1258
   return res;
1259
}
1260
 
1261
static LLVMValueRef
1262
emit_fetch_immediate(
1263
   struct lp_build_tgsi_context * bld_base,
1264
   const struct tgsi_full_src_register * reg,
1265
   enum tgsi_opcode_type stype,
1266
   unsigned swizzle)
1267
{
1268
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1269
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1270
   LLVMBuilderRef builder = gallivm->builder;
1271
   LLVMValueRef res = NULL;
1272
 
1273
   if (bld->use_immediates_array || reg->Register.Indirect) {
1274
      LLVMValueRef imms_array;
1275
      LLVMTypeRef fptr_type;
1276
 
1277
      /* cast imms_array pointer to float* */
1278
      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1279
      imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
1280
 
1281
      if (reg->Register.Indirect) {
1282
         LLVMValueRef indirect_index;
1283
         LLVMValueRef index_vec;  /* index into the immediate register array */
1284
 
1285
         indirect_index = get_indirect_index(bld,
1286
                                             reg->Register.File,
1287
                                             reg->Register.Index,
1288
                                             ®->Indirect);
1289
         /*
1290
          * Unlike for other reg classes, adding pixel offsets is unnecessary -
1291
          * immediates are stored as full vectors (FIXME??? - might be better
1292
          * to store them the same as constants) but all elements are the same
1293
          * in any case.
1294
          */
1295
         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1296
                                           indirect_index,
1297
                                           swizzle,
1298
                                           FALSE);
1299
 
1300
         /* Gather values from the immediate register array */
1301
         res = build_gather(bld_base, imms_array, index_vec, NULL);
1302
      } else {
1303
         LLVMValueRef lindex = lp_build_const_int32(gallivm,
1304
                                        reg->Register.Index * 4 + swizzle);
1305
         LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
1306
                                                bld->imms_array, &lindex, 1, "");
1307
         res = LLVMBuildLoad(builder, imms_ptr, "");
1308
      }
1309
   }
1310
   else {
1311
      res = bld->immediates[reg->Register.Index][swizzle];
1312
   }
1313
 
1314
   if (stype == TGSI_TYPE_UNSIGNED) {
1315
      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1316
   } else if (stype == TGSI_TYPE_SIGNED) {
1317
      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1318
   }
1319
   return res;
1320
}
1321
 
1322
static LLVMValueRef
1323
emit_fetch_input(
1324
   struct lp_build_tgsi_context * bld_base,
1325
   const struct tgsi_full_src_register * reg,
1326
   enum tgsi_opcode_type stype,
1327
   unsigned swizzle)
1328
{
1329
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1330
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1331
   LLVMBuilderRef builder = gallivm->builder;
1332
   LLVMValueRef res;
1333
 
1334
   if (reg->Register.Indirect) {
1335
      LLVMValueRef indirect_index;
1336
      LLVMValueRef index_vec;  /* index into the input reg array */
1337
      LLVMValueRef inputs_array;
1338
      LLVMTypeRef fptr_type;
1339
 
1340
      indirect_index = get_indirect_index(bld,
1341
                                          reg->Register.File,
1342
                                          reg->Register.Index,
1343
                                          ®->Indirect);
1344
 
1345
      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1346
                                        indirect_index,
1347
                                        swizzle,
1348
                                        TRUE);
1349
 
1350
      /* cast inputs_array pointer to float* */
1351
      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1352
      inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1353
 
1354
      /* Gather values from the input register array */
1355
      res = build_gather(bld_base, inputs_array, index_vec, NULL);
1356
   } else {
1357
      if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1358
         LLVMValueRef lindex = lp_build_const_int32(gallivm,
1359
                                        reg->Register.Index * 4 + swizzle);
1360
         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
1361
                                                bld->inputs_array, &lindex, 1, "");
1362
         res = LLVMBuildLoad(builder, input_ptr, "");
1363
      }
1364
      else {
1365
         res = bld->inputs[reg->Register.Index][swizzle];
1366
      }
1367
   }
1368
 
1369
   assert(res);
1370
 
1371
   if (stype == TGSI_TYPE_UNSIGNED) {
1372
      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1373
   } else if (stype == TGSI_TYPE_SIGNED) {
1374
      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1375
   }
1376
 
1377
   return res;
1378
}
1379
 
1380
 
1381
static LLVMValueRef
1382
emit_fetch_gs_input(
1383
   struct lp_build_tgsi_context * bld_base,
1384
   const struct tgsi_full_src_register * reg,
1385
   enum tgsi_opcode_type stype,
1386
   unsigned swizzle)
1387
{
1388
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1389
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1390
   const struct tgsi_shader_info *info = bld->bld_base.info;
1391
   LLVMBuilderRef builder = gallivm->builder;
1392
   LLVMValueRef attrib_index = NULL;
1393
   LLVMValueRef vertex_index = NULL;
1394
   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1395
   LLVMValueRef res;
1396
 
1397
   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1398
      /* This is really a system value not a regular input */
1399
      assert(!reg->Register.Indirect);
1400
      assert(!reg->Dimension.Indirect);
1401
      res = bld->system_values.prim_id;
1402
      if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1403
         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1404
      }
1405
      return res;
1406
   }
1407
 
1408
   if (reg->Register.Indirect) {
1409
      attrib_index = get_indirect_index(bld,
1410
                                        reg->Register.File,
1411
                                        reg->Register.Index,
1412
                                        ®->Indirect);
1413
   } else {
1414
      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1415
   }
1416
 
1417
   if (reg->Dimension.Indirect) {
1418
      vertex_index = get_indirect_index(bld,
1419
                                        reg->Register.File,
1420
                                        reg->Dimension.Index,
1421
                                        ®->DimIndirect);
1422
   } else {
1423
      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1424
   }
1425
 
1426
   res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
1427
                                    reg->Dimension.Indirect,
1428
                                    vertex_index,
1429
                                    reg->Register.Indirect,
1430
                                    attrib_index,
1431
                                    swizzle_index);
1432
 
1433
   assert(res);
1434
 
1435
   if (stype == TGSI_TYPE_UNSIGNED) {
1436
      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1437
   } else if (stype == TGSI_TYPE_SIGNED) {
1438
      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1439
   }
1440
 
1441
   return res;
1442
}
1443
 
1444
static LLVMValueRef
1445
emit_fetch_temporary(
1446
   struct lp_build_tgsi_context * bld_base,
1447
   const struct tgsi_full_src_register * reg,
1448
   enum tgsi_opcode_type stype,
1449
   unsigned swizzle)
1450
{
1451
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1452
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1453
   LLVMBuilderRef builder = gallivm->builder;
1454
   LLVMValueRef res;
1455
 
1456
   if (reg->Register.Indirect) {
1457
      LLVMValueRef indirect_index;
1458
      LLVMValueRef index_vec;  /* index into the temp reg array */
1459
      LLVMValueRef temps_array;
1460
      LLVMTypeRef fptr_type;
1461
 
1462
      indirect_index = get_indirect_index(bld,
1463
                                          reg->Register.File,
1464
                                          reg->Register.Index,
1465
                                          ®->Indirect);
1466
 
1467
      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1468
                                        indirect_index,
1469
                                        swizzle,
1470
                                        TRUE);
1471
 
1472
      /* cast temps_array pointer to float* */
1473
      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1474
      temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1475
 
1476
      /* Gather values from the temporary register array */
1477
      res = build_gather(bld_base, temps_array, index_vec, NULL);
1478
   }
1479
   else {
1480
      LLVMValueRef temp_ptr;
1481
      temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1482
      res = LLVMBuildLoad(builder, temp_ptr, "");
1483
   }
1484
 
1485
   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
1486
      struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1487
      res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1488
   }
1489
 
1490
   return res;
1491
}
1492
 
1493
static LLVMValueRef
1494
emit_fetch_system_value(
1495
   struct lp_build_tgsi_context * bld_base,
1496
   const struct tgsi_full_src_register * reg,
1497
   enum tgsi_opcode_type stype,
1498
   unsigned swizzle)
1499
{
1500
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1501
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1502
   const struct tgsi_shader_info *info = bld->bld_base.info;
1503
   LLVMBuilderRef builder = gallivm->builder;
1504
   LLVMValueRef res;
1505
   enum tgsi_opcode_type atype; // Actual type of the value
1506
 
1507
   assert(!reg->Register.Indirect);
1508
 
1509
   switch (info->system_value_semantic_name[reg->Register.Index]) {
1510
   case TGSI_SEMANTIC_INSTANCEID:
1511
      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1512
      atype = TGSI_TYPE_UNSIGNED;
1513
      break;
1514
 
1515
   case TGSI_SEMANTIC_VERTEXID:
1516
      res = bld->system_values.vertex_id;
1517
      atype = TGSI_TYPE_UNSIGNED;
1518
      break;
1519
 
1520
   case TGSI_SEMANTIC_VERTEXID_NOBASE:
1521
      res = bld->system_values.vertex_id_nobase;
1522
      atype = TGSI_TYPE_UNSIGNED;
1523
      break;
1524
 
1525
   case TGSI_SEMANTIC_BASEVERTEX:
1526
      res = bld->system_values.basevertex;
1527
      atype = TGSI_TYPE_UNSIGNED;
1528
      break;
1529
 
1530
   case TGSI_SEMANTIC_PRIMID:
1531
      res = bld->system_values.prim_id;
1532
      atype = TGSI_TYPE_UNSIGNED;
1533
      break;
1534
 
1535
   default:
1536
      assert(!"unexpected semantic in emit_fetch_system_value");
1537
      res = bld_base->base.zero;
1538
      atype = TGSI_TYPE_FLOAT;
1539
      break;
1540
   }
1541
 
1542
   if (atype != stype) {
1543
      if (stype == TGSI_TYPE_FLOAT) {
1544
         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1545
      } else if (stype == TGSI_TYPE_UNSIGNED) {
1546
         res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1547
      } else if (stype == TGSI_TYPE_SIGNED) {
1548
         res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1549
      }
1550
   }
1551
 
1552
   return res;
1553
}
1554
 
1555
/**
1556
 * Register fetch with derivatives.
1557
 */
1558
static void
1559
emit_fetch_deriv(
1560
   struct lp_build_tgsi_soa_context *bld,
1561
   LLVMValueRef src,
1562
   LLVMValueRef *res,
1563
   LLVMValueRef *ddx,
1564
   LLVMValueRef *ddy)
1565
{
1566
   if(res)
1567
      *res = src;
1568
 
1569
   /* TODO: use interpolation coeffs for inputs */
1570
 
1571
   if(ddx)
1572
      *ddx = lp_build_ddx(&bld->bld_base.base, src);
1573
 
1574
   if(ddy)
1575
      *ddy = lp_build_ddy(&bld->bld_base.base, src);
1576
}
1577
 
1578
 
1579
/**
1580
 * Predicate.
1581
 */
1582
static void
1583
emit_fetch_predicate(
1584
   struct lp_build_tgsi_soa_context *bld,
1585
   const struct tgsi_full_instruction *inst,
1586
   LLVMValueRef *pred)
1587
{
1588
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1589
   unsigned index;
1590
   unsigned char swizzles[4];
1591
   LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
1592
   LLVMValueRef value;
1593
   unsigned chan;
1594
 
1595
   if (!inst->Instruction.Predicate) {
1596
      TGSI_FOR_EACH_CHANNEL( chan ) {
1597
         pred[chan] = NULL;
1598
      }
1599
      return;
1600
   }
1601
 
1602
   swizzles[0] = inst->Predicate.SwizzleX;
1603
   swizzles[1] = inst->Predicate.SwizzleY;
1604
   swizzles[2] = inst->Predicate.SwizzleZ;
1605
   swizzles[3] = inst->Predicate.SwizzleW;
1606
 
1607
   index = inst->Predicate.Index;
1608
   assert(index < LP_MAX_TGSI_PREDS);
1609
 
1610
   TGSI_FOR_EACH_CHANNEL( chan ) {
1611
      unsigned swizzle = swizzles[chan];
1612
 
1613
      /*
1614
       * Only fetch the predicate register channels that are actually listed
1615
       * in the swizzles
1616
       */
1617
      if (!unswizzled[swizzle]) {
1618
         value = LLVMBuildLoad(builder,
1619
                               bld->preds[index][swizzle], "");
1620
 
1621
         /*
1622
          * Convert the value to an integer mask.
1623
          *
1624
          * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
1625
          * is needlessly causing two comparisons due to storing the intermediate
1626
          * result as float vector instead of an integer mask vector.
1627
          */
1628
         value = lp_build_compare(bld->bld_base.base.gallivm,
1629
                                  bld->bld_base.base.type,
1630
                                  PIPE_FUNC_NOTEQUAL,
1631
                                  value,
1632
                                  bld->bld_base.base.zero);
1633
         if (inst->Predicate.Negate) {
1634
            value = LLVMBuildNot(builder, value, "");
1635
         }
1636
 
1637
         unswizzled[swizzle] = value;
1638
      } else {
1639
         value = unswizzled[swizzle];
1640
      }
1641
 
1642
      pred[chan] = value;
1643
   }
1644
}
1645
 
1646
 
1647
/**
1648
 * Register store.
1649
 */
1650
static void
1651
emit_store_chan(
1652
   struct lp_build_tgsi_context *bld_base,
1653
   const struct tgsi_full_instruction *inst,
1654
   unsigned index,
1655
   unsigned chan_index,
1656
   LLVMValueRef pred,
1657
   LLVMValueRef value)
1658
{
1659
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1660
   struct gallivm_state *gallivm = bld_base->base.gallivm;
1661
   LLVMBuilderRef builder = gallivm->builder;
1662
   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1663
   struct lp_build_context *float_bld = &bld_base->base;
1664
   struct lp_build_context *int_bld = &bld_base->int_bld;
1665
   LLVMValueRef indirect_index = NULL;
1666
   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
1667
 
1668
   /*
1669
    * Apply saturation.
1670
    *
1671
    * It is always assumed to be float.
1672
    */
1673
   switch( inst->Instruction.Saturate ) {
1674
   case TGSI_SAT_NONE:
1675
      break;
1676
 
1677
   case TGSI_SAT_ZERO_ONE:
1678
      assert(dtype == TGSI_TYPE_FLOAT ||
1679
             dtype == TGSI_TYPE_UNTYPED);
1680
      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1681
      value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1682
      break;
1683
 
1684
   case TGSI_SAT_MINUS_PLUS_ONE:
1685
      assert(dtype == TGSI_TYPE_FLOAT ||
1686
             dtype == TGSI_TYPE_UNTYPED);
1687
      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1688
      /* This will give -1.0 for NaN which is probably not what we want. */
1689
      value = lp_build_max_ext(float_bld, value,
1690
                               lp_build_const_vec(gallivm, float_bld->type, -1.0),
1691
                               GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1692
      value = lp_build_min(float_bld, value, float_bld->one);
1693
      break;
1694
 
1695
   default:
1696
      assert(0);
1697
   }
1698
 
1699
   if (reg->Register.Indirect) {
1700
      indirect_index = get_indirect_index(bld,
1701
                                          reg->Register.File,
1702
                                          reg->Register.Index,
1703
                                          ®->Indirect);
1704
   } else {
1705
      assert(reg->Register.Index <=
1706
                             bld_base->info->file_max[reg->Register.File]);
1707
   }
1708
 
1709
   if (DEBUG_EXECUTION) {
1710
      emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1711
   }
1712
 
1713
   switch( reg->Register.File ) {
1714
   case TGSI_FILE_OUTPUT:
1715
      /* Outputs are always stored as floats */
1716
      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1717
 
1718
      if (reg->Register.Indirect) {
1719
         LLVMValueRef index_vec;  /* indexes into the output registers */
1720
         LLVMValueRef outputs_array;
1721
         LLVMTypeRef fptr_type;
1722
 
1723
         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1724
                                           indirect_index,
1725
                                           chan_index,
1726
                                           TRUE);
1727
 
1728
         fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1729
         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1730
 
1731
         /* Scatter store values into output registers */
1732
         emit_mask_scatter(bld, outputs_array, index_vec, value,
1733
                           &bld->exec_mask, pred);
1734
      }
1735
      else {
1736
         LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1737
                                                  chan_index);
1738
         lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
1739
      }
1740
      break;
1741
 
1742
   case TGSI_FILE_TEMPORARY:
1743
      /* Temporaries are always stored as floats */
1744
      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1745
 
1746
      if (reg->Register.Indirect) {
1747
         LLVMValueRef index_vec;  /* indexes into the temp registers */
1748
         LLVMValueRef temps_array;
1749
         LLVMTypeRef fptr_type;
1750
 
1751
         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1752
                                           indirect_index,
1753
                                           chan_index,
1754
                                           TRUE);
1755
 
1756
         fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1757
         temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1758
 
1759
         /* Scatter store values into temp registers */
1760
         emit_mask_scatter(bld, temps_array, index_vec, value,
1761
                           &bld->exec_mask, pred);
1762
      }
1763
      else {
1764
         LLVMValueRef temp_ptr;
1765
         temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1766
         lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
1767
      }
1768
      break;
1769
 
1770
   case TGSI_FILE_ADDRESS:
1771
      assert(dtype == TGSI_TYPE_SIGNED);
1772
      assert(LLVMTypeOf(value) == int_bld->vec_type);
1773
      value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1774
      lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
1775
                         bld->addr[reg->Register.Index][chan_index]);
1776
      break;
1777
 
1778
   case TGSI_FILE_PREDICATE:
1779
      assert(LLVMTypeOf(value) == float_bld->vec_type);
1780
      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1781
      lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
1782
                         bld->preds[reg->Register.Index][chan_index]);
1783
      break;
1784
 
1785
   default:
1786
      assert( 0 );
1787
   }
1788
 
1789
   (void)dtype;
1790
}
1791
 
1792
/*
1793
 * Called at the beginning of the translation of each TGSI instruction, to
1794
 * emit some debug code.
1795
 */
1796
static void
1797
emit_debug(
1798
   struct lp_build_tgsi_context * bld_base,
1799
   const struct tgsi_full_instruction * inst,
1800
   const struct tgsi_opcode_info * info)
1801
 
1802
{
1803
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1804
 
1805
   if (DEBUG_EXECUTION) {
1806
      /*
1807
       * Dump the TGSI instruction.
1808
       */
1809
 
1810
      struct gallivm_state *gallivm = bld_base->base.gallivm;
1811
      char buf[512];
1812
      buf[0] = '$';
1813
      buf[1] = ' ';
1814
      tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1815
      lp_build_printf(gallivm, buf);
1816
 
1817
      /* Dump the execution mask.
1818
       */
1819
      if (bld->exec_mask.has_mask) {
1820
         lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1821
      }
1822
   }
1823
}
1824
 
1825
static void
1826
emit_store(
1827
   struct lp_build_tgsi_context * bld_base,
1828
   const struct tgsi_full_instruction * inst,
1829
   const struct tgsi_opcode_info * info,
1830
   LLVMValueRef dst[4])
1831
 
1832
{
1833
   unsigned chan_index;
1834
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1835
 
1836
   if(info->num_dst) {
1837
      LLVMValueRef pred[TGSI_NUM_CHANNELS];
1838
 
1839
      emit_fetch_predicate( bld, inst, pred );
1840
 
1841
      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1842
         emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
1843
      }
1844
   }
1845
}
1846
 
1847
static unsigned
1848
tgsi_to_pipe_tex_target(unsigned tgsi_target)
1849
{
1850
   switch (tgsi_target) {
1851
   case TGSI_TEXTURE_BUFFER:
1852
      return PIPE_BUFFER;
1853
   case TGSI_TEXTURE_1D:
1854
   case TGSI_TEXTURE_SHADOW1D:
1855
      return PIPE_TEXTURE_1D;
1856
   case TGSI_TEXTURE_2D:
1857
   case TGSI_TEXTURE_SHADOW2D:
1858
   case TGSI_TEXTURE_2D_MSAA:
1859
      return PIPE_TEXTURE_2D;
1860
   case TGSI_TEXTURE_3D:
1861
      return PIPE_TEXTURE_3D;
1862
   case TGSI_TEXTURE_CUBE:
1863
   case TGSI_TEXTURE_SHADOWCUBE:
1864
      return PIPE_TEXTURE_CUBE;
1865
   case TGSI_TEXTURE_RECT:
1866
   case TGSI_TEXTURE_SHADOWRECT:
1867
      return PIPE_TEXTURE_RECT;
1868
   case TGSI_TEXTURE_1D_ARRAY:
1869
   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1870
      return PIPE_TEXTURE_1D_ARRAY;
1871
   case TGSI_TEXTURE_2D_ARRAY:
1872
   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1873
   case TGSI_TEXTURE_2D_ARRAY_MSAA:
1874
      return PIPE_TEXTURE_2D_ARRAY;
1875
   case TGSI_TEXTURE_CUBE_ARRAY:
1876
   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1877
      return PIPE_TEXTURE_CUBE_ARRAY;
1878
   default:
1879
      assert(0);
1880
      return PIPE_BUFFER;
1881
   }
1882
}
1883
 
1884
 
1885
static enum lp_sampler_lod_property
1886
lp_build_lod_property(
1887
   struct lp_build_tgsi_context *bld_base,
1888
   const struct tgsi_full_instruction *inst,
1889
   unsigned src_op)
1890
{
1891
   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
1892
   enum lp_sampler_lod_property lod_property;
1893
 
1894
   /*
1895
    * Not much we can do here. We could try catching inputs declared
1896
    * with constant interpolation but not sure it's worth it - since for
1897
    * TEX opcodes as well as FETCH/LD the lod comes from same reg as
1898
    * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
1899
    * like the constant/immediate recognition below.
1900
    * What seems to be of more value would be to recognize temps holding
1901
    * broadcasted scalars but no way we can do it.
1902
    * Tried asking llvm but without any success (using LLVMIsConstant
1903
    * even though this isn't exactly what we'd need), even as simple as
1904
    * IMM[0] UINT32 (0,-1,0,0)
1905
    * MOV TEMP[0] IMM[0].yyyy
1906
    * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
1907
    * doesn't work.
1908
    * This means there's ZERO chance this will ever catch a scalar lod
1909
    * with traditional tex opcodes as well as texel fetches, since the lod
1910
    * comes from the same reg as coords (except some test shaders using
1911
    * constant coords maybe).
1912
    * There's at least hope for sample opcodes as well as size queries.
1913
    */
1914
   if (reg->Register.File == TGSI_FILE_CONSTANT ||
1915
       reg->Register.File == TGSI_FILE_IMMEDIATE) {
1916
      lod_property = LP_SAMPLER_LOD_SCALAR;
1917
   }
1918
   else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) {
1919
      if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
1920
         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1921
      }
1922
      else {
1923
         lod_property = LP_SAMPLER_LOD_PER_QUAD;
1924
      }
1925
   }
1926
   else {
1927
      /* never use scalar (per-quad) lod the results are just too wrong. */
1928
      lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1929
   }
1930
   return lod_property;
1931
}
1932
 
1933
 
1934
/**
1935
 * High-level instruction translators.
1936
 */
1937
 
1938
static void
1939
emit_tex( struct lp_build_tgsi_soa_context *bld,
1940
          const struct tgsi_full_instruction *inst,
1941
          enum lp_build_tex_modifier modifier,
1942
          LLVMValueRef *texel,
1943
          unsigned sampler_reg,
1944
          enum lp_sampler_op_type sampler_op)
1945
{
1946
   unsigned unit = inst->Src[sampler_reg].Register.Index;
1947
   LLVMValueRef oow = NULL;
1948
   LLVMValueRef lod = NULL;
1949
   LLVMValueRef coords[5];
1950
   LLVMValueRef offsets[3] = { NULL };
1951
   struct lp_derivatives derivs;
1952
   struct lp_sampler_params params;
1953
   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1954
   unsigned num_derivs, num_offsets, i;
1955
   unsigned shadow_coord = 0;
1956
   unsigned layer_coord = 0;
1957
   unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
1958
 
1959
   memset(¶ms, 0, sizeof(params));
1960
 
1961
   if (!bld->sampler) {
1962
      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
1963
      for (i = 0; i < 4; i++) {
1964
         texel[i] = bld->bld_base.base.undef;
1965
      }
1966
      return;
1967
   }
1968
 
1969
   switch (inst->Texture.Texture) {
1970
   case TGSI_TEXTURE_1D_ARRAY:
1971
      layer_coord = 1;
1972
      /* fallthrough */
1973
   case TGSI_TEXTURE_1D:
1974
      num_offsets = 1;
1975
      num_derivs = 1;
1976
      break;
1977
   case TGSI_TEXTURE_2D_ARRAY:
1978
      layer_coord = 2;
1979
      /* fallthrough */
1980
   case TGSI_TEXTURE_2D:
1981
   case TGSI_TEXTURE_RECT:
1982
      num_offsets = 2;
1983
      num_derivs = 2;
1984
      break;
1985
   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1986
      layer_coord = 1;
1987
      /* fallthrough */
1988
   case TGSI_TEXTURE_SHADOW1D:
1989
      shadow_coord = 2;
1990
      num_offsets = 1;
1991
      num_derivs = 1;
1992
      break;
1993
   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1994
      layer_coord = 2;
1995
      shadow_coord = 3;
1996
      num_offsets = 2;
1997
      num_derivs = 2;
1998
      break;
1999
   case TGSI_TEXTURE_SHADOW2D:
2000
   case TGSI_TEXTURE_SHADOWRECT:
2001
      shadow_coord = 2;
2002
      num_offsets = 2;
2003
      num_derivs = 2;
2004
      break;
2005
   case TGSI_TEXTURE_CUBE:
2006
      num_offsets = 2;
2007
      num_derivs = 3;
2008
      break;
2009
   case TGSI_TEXTURE_3D:
2010
      num_offsets = 3;
2011
      num_derivs = 3;
2012
      break;
2013
   case TGSI_TEXTURE_SHADOWCUBE:
2014
      shadow_coord = 3;
2015
      num_offsets = 2;
2016
      num_derivs = 3;
2017
      break;
2018
   case TGSI_TEXTURE_CUBE_ARRAY:
2019
      num_offsets = 2;
2020
      num_derivs = 3;
2021
      layer_coord = 3;
2022
      break;
2023
   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2024
      num_offsets = 2;
2025
      num_derivs = 3;
2026
      layer_coord = 3;
2027
      shadow_coord = 4; /* shadow coord special different reg */
2028
      break;
2029
   case TGSI_TEXTURE_2D_MSAA:
2030
   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2031
   default:
2032
      assert(0);
2033
      return;
2034
   }
2035
 
2036
   /* Note lod and especially projected are illegal in a LOT of cases */
2037
   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2038
       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2039
      if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2040
          inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2041
         /* note that shadow cube array with bias/explicit lod does not exist */
2042
         lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2043
      }
2044
      else {
2045
         lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2046
      }
2047
      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2048
         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2049
      }
2050
      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2051
         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2052
      }
2053
      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2054
   }
2055
 
2056
   if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2057
      oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2058
      oow = lp_build_rcp(&bld->bld_base.base, oow);
2059
   }
2060
 
2061
   for (i = 0; i < num_derivs; i++) {
2062
      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2063
      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2064
         coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2065
   }
2066
   for (i = num_derivs; i < 5; i++) {
2067
      coords[i] = bld->bld_base.base.undef;
2068
   }
2069
 
2070
   /* Layer coord always goes into 3rd slot, except for cube map arrays */
2071
   if (layer_coord) {
2072
      if (layer_coord == 3) {
2073
         coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2074
      }
2075
      else {
2076
         coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2077
      }
2078
      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2079
         coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2080
   }
2081
   /* Shadow coord occupies always 5th slot. */
2082
   if (shadow_coord) {
2083
      sample_key |= LP_SAMPLER_SHADOW;
2084
      if (shadow_coord == 4) {
2085
         coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2086
      }
2087
      else {
2088
         coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2089
      }
2090
      if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2091
         coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2092
   }
2093
 
2094
   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2095
      unsigned dim;
2096
      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2097
      for (dim = 0; dim < num_derivs; ++dim) {
2098
         derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2099
         derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2100
      }
2101
      params.derivs = &derivs;
2102
      /*
2103
       * could also check all src regs if constant but I doubt such
2104
       * cases exist in practice.
2105
       */
2106
      if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
2107
         if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2108
            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2109
         }
2110
         else {
2111
            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2112
         }
2113
      }
2114
      else {
2115
         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2116
      }
2117
   }
2118
   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2119
 
2120
   /* we don't handle the 4 offset version of tg4 */
2121
   if (inst->Texture.NumOffsets == 1) {
2122
      unsigned dim;
2123
      sample_key |= LP_SAMPLER_OFFSETS;
2124
      for (dim = 0; dim < num_offsets; dim++) {
2125
         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2126
      }
2127
   }
2128
 
2129
   params.type = bld->bld_base.base.type;
2130
   params.sample_key = sample_key;
2131
   params.texture_index = unit;
2132
   params.sampler_index = unit;
2133
   params.context_ptr = bld->context_ptr;
2134
   params.coords = coords;
2135
   params.offsets = offsets;
2136
   params.lod = lod;
2137
   params.texel = texel;
2138
 
2139
   bld->sampler->emit_tex_sample(bld->sampler,
2140
                                 bld->bld_base.base.gallivm,
2141
                                 ¶ms);
2142
}
2143
 
2144
static void
2145
emit_sample(struct lp_build_tgsi_soa_context *bld,
2146
            const struct tgsi_full_instruction *inst,
2147
            enum lp_build_tex_modifier modifier,
2148
            boolean compare,
2149
            LLVMValueRef *texel)
2150
{
2151
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2152
   unsigned texture_unit, sampler_unit;
2153
   LLVMValueRef lod = NULL;
2154
   LLVMValueRef coords[5];
2155
   LLVMValueRef offsets[3] = { NULL };
2156
   struct lp_derivatives derivs;
2157
   struct lp_sampler_params params;
2158
   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2159
 
2160
   unsigned num_offsets, num_derivs, i;
2161
   unsigned layer_coord = 0;
2162
   unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
2163
 
2164
   memset(¶ms, 0, sizeof(params));
2165
 
2166
   if (!bld->sampler) {
2167
      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2168
      for (i = 0; i < 4; i++) {
2169
         texel[i] = bld->bld_base.base.undef;
2170
      }
2171
      return;
2172
   }
2173
 
2174
   /*
2175
    * unlike old-style tex opcodes the texture/sampler indices
2176
    * always come from src1 and src2 respectively.
2177
    */
2178
   texture_unit = inst->Src[1].Register.Index;
2179
   sampler_unit = inst->Src[2].Register.Index;
2180
 
2181
   /*
2182
    * Note inst->Texture.Texture will contain the number of offsets,
2183
    * however the target information is NOT there and comes from the
2184
    * declared sampler views instead.
2185
    */
2186
   switch (bld->sv[texture_unit].Resource) {
2187
   case TGSI_TEXTURE_1D:
2188
      num_offsets = 1;
2189
      num_derivs = 1;
2190
      break;
2191
   case TGSI_TEXTURE_1D_ARRAY:
2192
      layer_coord = 1;
2193
      num_offsets = 1;
2194
      num_derivs = 1;
2195
      break;
2196
   case TGSI_TEXTURE_2D:
2197
   case TGSI_TEXTURE_RECT:
2198
      num_offsets = 2;
2199
      num_derivs = 2;
2200
      break;
2201
   case TGSI_TEXTURE_2D_ARRAY:
2202
      layer_coord = 2;
2203
      num_offsets = 2;
2204
      num_derivs = 2;
2205
      break;
2206
   case TGSI_TEXTURE_CUBE:
2207
      num_offsets = 2;
2208
      num_derivs = 3;
2209
      break;
2210
   case TGSI_TEXTURE_3D:
2211
      num_offsets = 3;
2212
      num_derivs = 3;
2213
      break;
2214
   case TGSI_TEXTURE_CUBE_ARRAY:
2215
      layer_coord = 3;
2216
      num_offsets = 2;
2217
      num_derivs = 3;
2218
      break;
2219
   default:
2220
      assert(0);
2221
      return;
2222
   }
2223
 
2224
   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2225
       modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2226
      lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2227
      if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2228
         sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2229
      }
2230
      else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2231
         sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2232
      }
2233
      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2234
   }
2235
   else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2236
      /* XXX might be better to explicitly pass the level zero information */
2237
      sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2238
      lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2239
   }
2240
 
2241
   for (i = 0; i < num_derivs; i++) {
2242
      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2243
   }
2244
   for (i = num_derivs; i < 5; i++) {
2245
      coords[i] = bld->bld_base.base.undef;
2246
   }
2247
 
2248
   /* Layer coord always goes into 3rd slot, except for cube map arrays */
2249
   if (layer_coord) {
2250
      if (layer_coord == 3)
2251
         coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2252
      else
2253
         coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2254
   }
2255
   /* Shadow coord occupies always 5th slot. */
2256
   if (compare) {
2257
      sample_key |= LP_SAMPLER_SHADOW;
2258
      coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2259
   }
2260
 
2261
   if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2262
      unsigned dim;
2263
      sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2264
      for (dim = 0; dim < num_derivs; ++dim) {
2265
         derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2266
         derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2267
      }
2268
      params.derivs = &derivs;
2269
      /*
2270
       * could also check all src regs if constant but I doubt such
2271
       * cases exist in practice.
2272
       */
2273
      if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
2274
         if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
2275
            lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2276
         }
2277
         else {
2278
            lod_property = LP_SAMPLER_LOD_PER_QUAD;
2279
         }
2280
      }
2281
      else {
2282
         lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2283
      }
2284
   }
2285
 
2286
   /* some advanced gather instructions (txgo) would require 4 offsets */
2287
   if (inst->Texture.NumOffsets == 1) {
2288
      unsigned dim;
2289
      sample_key |= LP_SAMPLER_OFFSETS;
2290
      for (dim = 0; dim < num_offsets; dim++) {
2291
         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2292
      }
2293
   }
2294
   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2295
 
2296
   params.type = bld->bld_base.base.type;
2297
   params.sample_key = sample_key;
2298
   params.texture_index = texture_unit;
2299
   params.sampler_index = sampler_unit;
2300
   params.context_ptr = bld->context_ptr;
2301
   params.coords = coords;
2302
   params.offsets = offsets;
2303
   params.lod = lod;
2304
   params.texel = texel;
2305
 
2306
   bld->sampler->emit_tex_sample(bld->sampler,
2307
                                 bld->bld_base.base.gallivm,
2308
                                 ¶ms);
2309
 
2310
   if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2311
       inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2312
       inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2313
       inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
2314
      unsigned char swizzles[4];
2315
      swizzles[0] = inst->Src[1].Register.SwizzleX;
2316
      swizzles[1] = inst->Src[1].Register.SwizzleY;
2317
      swizzles[2] = inst->Src[1].Register.SwizzleZ;
2318
      swizzles[3] = inst->Src[1].Register.SwizzleW;
2319
 
2320
      lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2321
   }
2322
}
2323
 
2324
static void
2325
emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2326
                   const struct tgsi_full_instruction *inst,
2327
                   LLVMValueRef *texel,
2328
                   boolean is_samplei)
2329
{
2330
   unsigned unit, target;
2331
   LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2332
   LLVMValueRef explicit_lod = NULL;
2333
   LLVMValueRef coords[5];
2334
   LLVMValueRef offsets[3] = { NULL };
2335
   struct lp_sampler_params params;
2336
   enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2337
   unsigned dims, i;
2338
   unsigned layer_coord = 0;
2339
   unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2340
 
2341
   memset(¶ms, 0, sizeof(params));
2342
 
2343
   if (!bld->sampler) {
2344
      _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2345
      for (i = 0; i < 4; i++) {
2346
         texel[i] = coord_undef;
2347
      }
2348
      return;
2349
   }
2350
 
2351
   unit = inst->Src[1].Register.Index;
2352
 
2353
   if (is_samplei) {
2354
      target = bld->sv[unit].Resource;
2355
   }
2356
   else {
2357
      target = inst->Texture.Texture;
2358
   }
2359
 
2360
   switch (target) {
2361
   case TGSI_TEXTURE_1D:
2362
   case TGSI_TEXTURE_BUFFER:
2363
      dims = 1;
2364
      break;
2365
   case TGSI_TEXTURE_1D_ARRAY:
2366
      layer_coord = 1;
2367
      dims = 1;
2368
      break;
2369
   case TGSI_TEXTURE_2D:
2370
   case TGSI_TEXTURE_RECT:
2371
   case TGSI_TEXTURE_2D_MSAA:
2372
      dims = 2;
2373
      break;
2374
   case TGSI_TEXTURE_2D_ARRAY:
2375
   case TGSI_TEXTURE_2D_ARRAY_MSAA:
2376
      layer_coord = 2;
2377
      dims = 2;
2378
      break;
2379
   case TGSI_TEXTURE_3D:
2380
      dims = 3;
2381
      break;
2382
   default:
2383
      assert(0);
2384
      return;
2385
   }
2386
 
2387
   /* always have lod except for buffers and msaa targets ? */
2388
   if (target != TGSI_TEXTURE_BUFFER &&
2389
       target != TGSI_TEXTURE_2D_MSAA &&
2390
       target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
2391
      sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2392
      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2393
      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2394
   }
2395
   /* XXX: for real msaa support, the w component would be the sample index. */
2396
 
2397
   for (i = 0; i < dims; i++) {
2398
      coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2399
   }
2400
   /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2401
   for (i = dims; i < 5; i++) {
2402
      coords[i] = coord_undef;
2403
   }
2404
   if (layer_coord)
2405
      coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2406
 
2407
   if (inst->Texture.NumOffsets == 1) {
2408
      unsigned dim;
2409
      sample_key |= LP_SAMPLER_OFFSETS;
2410
      for (dim = 0; dim < dims; dim++) {
2411
         offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2412
      }
2413
   }
2414
   sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2415
 
2416
   params.type = bld->bld_base.base.type;
2417
   params.sample_key = sample_key;
2418
   params.texture_index = unit;
2419
   params.sampler_index = unit;
2420
   params.context_ptr = bld->context_ptr;
2421
   params.coords = coords;
2422
   params.offsets = offsets;
2423
   params.derivs = NULL;
2424
   params.lod = explicit_lod;
2425
   params.texel = texel;
2426
 
2427
   bld->sampler->emit_tex_sample(bld->sampler,
2428
                                 bld->bld_base.base.gallivm,
2429
                                 ¶ms);
2430
 
2431
   if (is_samplei &&
2432
       (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
2433
        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
2434
        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
2435
        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
2436
      unsigned char swizzles[4];
2437
      swizzles[0] = inst->Src[1].Register.SwizzleX;
2438
      swizzles[1] = inst->Src[1].Register.SwizzleY;
2439
      swizzles[2] = inst->Src[1].Register.SwizzleZ;
2440
      swizzles[3] = inst->Src[1].Register.SwizzleW;
2441
 
2442
      lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2443
   }
2444
}
2445
 
2446
static void
2447
emit_size_query( struct lp_build_tgsi_soa_context *bld,
2448
                 const struct tgsi_full_instruction *inst,
2449
                 LLVMValueRef *sizes_out,
2450
                 boolean is_sviewinfo)
2451
{
2452
   LLVMValueRef explicit_lod;
2453
   enum lp_sampler_lod_property lod_property;
2454
   unsigned has_lod;
2455
   unsigned i;
2456
   unsigned unit = inst->Src[1].Register.Index;
2457
   unsigned target, pipe_target;
2458
 
2459
   if (is_sviewinfo) {
2460
      target = bld->sv[unit].Resource;
2461
   }
2462
   else {
2463
      target = inst->Texture.Texture;
2464
   }
2465
   switch (target) {
2466
   case TGSI_TEXTURE_BUFFER:
2467
   case TGSI_TEXTURE_RECT:
2468
   case TGSI_TEXTURE_SHADOWRECT:
2469
      has_lod = 0;
2470
      break;
2471
   default:
2472
      has_lod = 1;
2473
      break;
2474
   }
2475
 
2476
   if (!bld->sampler) {
2477
      _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2478
      for (i = 0; i < 4; i++)
2479
         sizes_out[i] = bld->bld_base.int_bld.undef;
2480
      return;
2481
   }
2482
 
2483
   if (has_lod) {
2484
      explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2485
      lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2486
   }
2487
   else {
2488
      explicit_lod = NULL;
2489
      lod_property = LP_SAMPLER_LOD_SCALAR;
2490
   }
2491
 
2492
 
2493
   pipe_target = tgsi_to_pipe_tex_target(target);
2494
 
2495
   bld->sampler->emit_size_query(bld->sampler,
2496
                                 bld->bld_base.base.gallivm,
2497
                                 bld->bld_base.int_bld.type,
2498
                                 unit, pipe_target,
2499
                                 bld->context_ptr,
2500
                                 TRUE,
2501
                                 lod_property,
2502
                                 explicit_lod,
2503
                                 sizes_out);
2504
}
2505
 
2506
static boolean
2507
near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2508
                   int pc)
2509
{
2510
   int i;
2511
 
2512
   for (i = 0; i < 5; i++) {
2513
      unsigned opcode;
2514
 
2515
      if (pc + i >= bld->bld_base.info->num_instructions)
2516
         return TRUE;
2517
 
2518
      opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2519
 
2520
      if (opcode == TGSI_OPCODE_END)
2521
         return TRUE;
2522
 
2523
      if (opcode == TGSI_OPCODE_TEX ||
2524
         opcode == TGSI_OPCODE_TXP ||
2525
         opcode == TGSI_OPCODE_TXD ||
2526
         opcode == TGSI_OPCODE_TXB ||
2527
         opcode == TGSI_OPCODE_TXL ||
2528
         opcode == TGSI_OPCODE_TXF ||
2529
         opcode == TGSI_OPCODE_TXQ ||
2530
         opcode == TGSI_OPCODE_TEX2 ||
2531
         opcode == TGSI_OPCODE_TXB2 ||
2532
         opcode == TGSI_OPCODE_TXL2 ||
2533
         opcode == TGSI_OPCODE_SAMPLE ||
2534
         opcode == TGSI_OPCODE_SAMPLE_B ||
2535
         opcode == TGSI_OPCODE_SAMPLE_C ||
2536
         opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2537
         opcode == TGSI_OPCODE_SAMPLE_D ||
2538
         opcode == TGSI_OPCODE_SAMPLE_I ||
2539
         opcode == TGSI_OPCODE_SAMPLE_L ||
2540
         opcode == TGSI_OPCODE_SVIEWINFO ||
2541
         opcode == TGSI_OPCODE_CAL ||
2542
         opcode == TGSI_OPCODE_CALLNZ ||
2543
         opcode == TGSI_OPCODE_IF ||
2544
         opcode == TGSI_OPCODE_UIF ||
2545
         opcode == TGSI_OPCODE_BGNLOOP ||
2546
         opcode == TGSI_OPCODE_SWITCH)
2547
         return FALSE;
2548
   }
2549
 
2550
   return TRUE;
2551
}
2552
 
2553
 
2554
 
2555
/**
2556
 * Kill fragment if any of the src register values are negative.
2557
 */
2558
static void
2559
emit_kill_if(
2560
   struct lp_build_tgsi_soa_context *bld,
2561
   const struct tgsi_full_instruction *inst,
2562
   int pc)
2563
{
2564
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2565
   const struct tgsi_full_src_register *reg = &inst->Src[0];
2566
   LLVMValueRef terms[TGSI_NUM_CHANNELS];
2567
   LLVMValueRef mask;
2568
   unsigned chan_index;
2569
 
2570
   memset(&terms, 0, sizeof terms);
2571
 
2572
   TGSI_FOR_EACH_CHANNEL( chan_index ) {
2573
      unsigned swizzle;
2574
 
2575
      /* Unswizzle channel */
2576
      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2577
 
2578
      /* Check if the component has not been already tested. */
2579
      assert(swizzle < TGSI_NUM_CHANNELS);
2580
      if( !terms[swizzle] )
2581
         /* TODO: change the comparison operator instead of setting the sign */
2582
         terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2583
   }
2584
 
2585
   mask = NULL;
2586
   TGSI_FOR_EACH_CHANNEL( chan_index ) {
2587
      if(terms[chan_index]) {
2588
         LLVMValueRef chan_mask;
2589
 
2590
         /*
2591
          * If term < 0 then mask = 0 else mask = ~0.
2592
          */
2593
         chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2594
 
2595
         if(mask)
2596
            mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2597
         else
2598
            mask = chan_mask;
2599
      }
2600
   }
2601
 
2602
   if (bld->exec_mask.has_mask) {
2603
      LLVMValueRef invmask;
2604
      invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2605
      mask = LLVMBuildOr(builder, mask, invmask, "");
2606
   }
2607
 
2608
   lp_build_mask_update(bld->mask, mask);
2609
   if (!near_end_of_shader(bld, pc))
2610
      lp_build_mask_check(bld->mask);
2611
}
2612
 
2613
 
2614
/**
2615
 * Unconditional fragment kill.
2616
 * The only predication is the execution mask which will apply if
2617
 * we're inside a loop or conditional.
2618
 */
2619
static void
2620
emit_kill(struct lp_build_tgsi_soa_context *bld,
2621
          int pc)
2622
{
2623
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2624
   LLVMValueRef mask;
2625
 
2626
   /* For those channels which are "alive", disable fragment shader
2627
    * execution.
2628
    */
2629
   if (bld->exec_mask.has_mask) {
2630
      mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2631
   }
2632
   else {
2633
      LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2634
      mask = zero;
2635
   }
2636
 
2637
   lp_build_mask_update(bld->mask, mask);
2638
 
2639
   if (!near_end_of_shader(bld, pc))
2640
      lp_build_mask_check(bld->mask);
2641
}
2642
 
2643
 
2644
/**
2645
 * Emit code which will dump the value of all the temporary registers
2646
 * to stdout.
2647
 */
2648
static void
2649
emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2650
               unsigned file)
2651
{
2652
   const struct tgsi_shader_info *info = bld->bld_base.info;
2653
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2654
   LLVMBuilderRef builder = gallivm->builder;
2655
   LLVMValueRef reg_ptr;
2656
   int index;
2657
   int max_index = info->file_max[file];
2658
 
2659
   /*
2660
    * Some register files, particularly constants, can be very large,
2661
    * and dumping everything could make this unusably slow.
2662
    */
2663
   max_index = MIN2(max_index, 32);
2664
 
2665
   for (index = 0; index <= max_index; index++) {
2666
      LLVMValueRef res;
2667
      unsigned mask;
2668
      int chan;
2669
 
2670
      if (index < 8 * sizeof(unsigned) &&
2671
          (info->file_mask[file] & (1 << index)) == 0)  {
2672
         /* This was not declared.*/
2673
         continue;
2674
      }
2675
 
2676
      if (file == TGSI_FILE_INPUT) {
2677
         mask = info->input_usage_mask[index];
2678
      } else {
2679
         mask = TGSI_WRITEMASK_XYZW;
2680
      }
2681
 
2682
      for (chan = 0; chan < 4; chan++) {
2683
         if ((mask & (1 << chan)) == 0) {
2684
            /* This channel is not used.*/
2685
            continue;
2686
         }
2687
 
2688
         if (file == TGSI_FILE_CONSTANT) {
2689
            struct tgsi_full_src_register reg;
2690
            memset(®, 0, sizeof reg);
2691
            reg.Register.File = file;
2692
            reg.Register.Index = index;
2693
            reg.Register.SwizzleX = 0;
2694
            reg.Register.SwizzleY = 1;
2695
            reg.Register.SwizzleZ = 2;
2696
            reg.Register.SwizzleW = 3;
2697
 
2698
            res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
2699
            if (!res) {
2700
               continue;
2701
            }
2702
         } else if (file == TGSI_FILE_INPUT) {
2703
            res = bld->inputs[index][chan];
2704
            if (!res) {
2705
               continue;
2706
            }
2707
         } else if (file == TGSI_FILE_TEMPORARY) {
2708
            reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2709
            assert(reg_ptr);
2710
            res = LLVMBuildLoad(builder, reg_ptr, "");
2711
         } else if (file == TGSI_FILE_OUTPUT) {
2712
            reg_ptr = lp_get_output_ptr(bld, index, chan);
2713
            assert(reg_ptr);
2714
            res = LLVMBuildLoad(builder, reg_ptr, "");
2715
         } else {
2716
            assert(0);
2717
            continue;
2718
         }
2719
 
2720
         emit_dump_reg(gallivm, file, index, chan, res);
2721
      }
2722
   }
2723
}
2724
 
2725
 
2726
 
2727
void
2728
lp_emit_declaration_soa(
2729
   struct lp_build_tgsi_context *bld_base,
2730
   const struct tgsi_full_declaration *decl)
2731
{
2732
   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2733
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2734
   LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2735
   const unsigned first = decl->Range.First;
2736
   const unsigned last = decl->Range.Last;
2737
   unsigned idx, i;
2738
 
2739
   assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2740
 
2741
   switch (decl->Declaration.File) {
2742
   case TGSI_FILE_TEMPORARY:
2743
      if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2744
         assert(last < LP_MAX_INLINED_TEMPS);
2745
         for (idx = first; idx <= last; ++idx) {
2746
            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2747
               bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2748
         }
2749
      }
2750
      break;
2751
 
2752
   case TGSI_FILE_OUTPUT:
2753
      if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2754
         for (idx = first; idx <= last; ++idx) {
2755
            for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2756
               bld->outputs[idx][i] = lp_build_alloca(gallivm,
2757
                                                      vec_type, "output");
2758
         }
2759
      }
2760
      break;
2761
 
2762
   case TGSI_FILE_ADDRESS:
2763
      /* ADDR registers are only allocated with an integer LLVM IR type,
2764
       * as they are guaranteed to always have integers.
2765
       * XXX: Not sure if this exception is worthwhile (or the whole idea of
2766
       * an ADDR register for that matter).
2767
       */
2768
      assert(last < LP_MAX_TGSI_ADDRS);
2769
      for (idx = first; idx <= last; ++idx) {
2770
         assert(idx < LP_MAX_TGSI_ADDRS);
2771
         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2772
            bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2773
      }
2774
      break;
2775
 
2776
   case TGSI_FILE_PREDICATE:
2777
      assert(last < LP_MAX_TGSI_PREDS);
2778
      for (idx = first; idx <= last; ++idx) {
2779
         for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2780
            bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
2781
                                                 "predicate");
2782
      }
2783
      break;
2784
 
2785
   case TGSI_FILE_SAMPLER_VIEW:
2786
      /*
2787
       * The target stored here MUST match whatever there actually
2788
       * is in the set sampler views (what about return type?).
2789
       */
2790
      assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2791
      for (idx = first; idx <= last; ++idx) {
2792
         bld->sv[idx] = decl->SamplerView;
2793
      }
2794
      break;
2795
 
2796
   case TGSI_FILE_CONSTANT:
2797
   {
2798
      /*
2799
       * We could trivially fetch the per-buffer pointer when fetching the
2800
       * constant, relying on llvm to figure out it's always the same pointer
2801
       * anyway. However, doing so results in a huge (more than factor of 10)
2802
       * slowdown in llvm compilation times for some (but not all) shaders
2803
       * (more specifically, the IR optimization spends way more time in
2804
       * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2805
       */
2806
      unsigned idx2D = decl->Dim.Index2D;
2807
      LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2808
      assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2809
      bld->consts[idx2D] =
2810
         lp_build_array_get(gallivm, bld->consts_ptr, index2D);
2811
      bld->consts_sizes[idx2D] =
2812
         lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
2813
   }
2814
      break;
2815
 
2816
   default:
2817
      /* don't need to declare other vars */
2818
      break;
2819
   }
2820
}
2821
 
2822
 
2823
void lp_emit_immediate_soa(
2824
   struct lp_build_tgsi_context *bld_base,
2825
   const struct tgsi_full_immediate *imm)
2826
{
2827
   struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2828
   struct gallivm_state * gallivm = bld_base->base.gallivm;
2829
   LLVMValueRef imms[4];
2830
   unsigned i;
2831
   const uint size = imm->Immediate.NrTokens - 1;
2832
   assert(size <= 4);
2833
   switch (imm->Immediate.DataType) {
2834
   case TGSI_IMM_FLOAT32:
2835
      for( i = 0; i < size; ++i )
2836
         imms[i] =
2837
               lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
2838
 
2839
      break;
2840
   case TGSI_IMM_UINT32:
2841
      for( i = 0; i < size; ++i ) {
2842
         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
2843
         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2844
      }
2845
 
2846
      break;
2847
   case TGSI_IMM_INT32:
2848
      for( i = 0; i < size; ++i ) {
2849
         LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
2850
         imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
2851
      }
2852
 
2853
      break;
2854
   }
2855
   for( i = size; i < 4; ++i )
2856
      imms[i] = bld_base->base.undef;
2857
 
2858
   if (bld->use_immediates_array) {
2859
      unsigned index = bld->num_immediates;
2860
      struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2861
      LLVMBuilderRef builder = gallivm->builder;
2862
 
2863
      assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
2864
      for (i = 0; i < 4; ++i ) {
2865
         LLVMValueRef lindex = lp_build_const_int32(
2866
                  bld->bld_base.base.gallivm, index * 4 + i);
2867
         LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2868
                                             bld->imms_array, &lindex, 1, "");
2869
         LLVMBuildStore(builder, imms[i], imm_ptr);
2870
      }
2871
   } else {
2872
      /* simply copy the immediate values into the next immediates[] slot */
2873
      unsigned i;
2874
      const uint size = imm->Immediate.NrTokens - 1;
2875
      assert(size <= 4);
2876
      assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
2877
 
2878
      for(i = 0; i < 4; ++i )
2879
         bld->immediates[bld->num_immediates][i] = imms[i];
2880
 
2881
      if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
2882
         unsigned index = bld->num_immediates;
2883
         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2884
         LLVMBuilderRef builder = gallivm->builder;
2885
         for (i = 0; i < 4; ++i ) {
2886
            LLVMValueRef lindex = lp_build_const_int32(
2887
                     bld->bld_base.base.gallivm, index * 4 + i);
2888
            LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
2889
                                                bld->imms_array, &lindex, 1, "");
2890
            LLVMBuildStore(builder,
2891
                           bld->immediates[index][i],
2892
                           imm_ptr);
2893
         }
2894
      }
2895
   }
2896
 
2897
   bld->num_immediates++;
2898
}
2899
 
2900
static void
2901
ddx_emit(
2902
   const struct lp_build_tgsi_action * action,
2903
   struct lp_build_tgsi_context * bld_base,
2904
   struct lp_build_emit_data * emit_data)
2905
{
2906
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2907
 
2908
   emit_fetch_deriv(bld, emit_data->args[0], NULL,
2909
                    &emit_data->output[emit_data->chan], NULL);
2910
}
2911
 
2912
static void
2913
ddy_emit(
2914
   const struct lp_build_tgsi_action * action,
2915
   struct lp_build_tgsi_context * bld_base,
2916
   struct lp_build_emit_data * emit_data)
2917
{
2918
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2919
 
2920
   emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
2921
                    &emit_data->output[emit_data->chan]);
2922
}
2923
 
2924
static void
2925
kill_emit(
2926
   const struct lp_build_tgsi_action * action,
2927
   struct lp_build_tgsi_context * bld_base,
2928
   struct lp_build_emit_data * emit_data)
2929
{
2930
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2931
 
2932
   emit_kill(bld, bld_base->pc - 1);
2933
}
2934
 
2935
static void
2936
kill_if_emit(
2937
   const struct lp_build_tgsi_action * action,
2938
   struct lp_build_tgsi_context * bld_base,
2939
   struct lp_build_emit_data * emit_data)
2940
{
2941
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2942
 
2943
   emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
2944
}
2945
 
2946
static void
2947
tex_emit(
2948
   const struct lp_build_tgsi_action * action,
2949
   struct lp_build_tgsi_context * bld_base,
2950
   struct lp_build_emit_data * emit_data)
2951
{
2952
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2953
 
2954
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2955
            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2956
}
2957
 
2958
static void
2959
tex2_emit(
2960
   const struct lp_build_tgsi_action * action,
2961
   struct lp_build_tgsi_context * bld_base,
2962
   struct lp_build_emit_data * emit_data)
2963
{
2964
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2965
 
2966
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
2967
            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2968
}
2969
 
2970
static void
2971
txb_emit(
2972
   const struct lp_build_tgsi_action * action,
2973
   struct lp_build_tgsi_context * bld_base,
2974
   struct lp_build_emit_data * emit_data)
2975
{
2976
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2977
 
2978
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2979
            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
2980
}
2981
 
2982
static void
2983
txb2_emit(
2984
   const struct lp_build_tgsi_action * action,
2985
   struct lp_build_tgsi_context * bld_base,
2986
   struct lp_build_emit_data * emit_data)
2987
{
2988
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
2989
 
2990
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
2991
            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
2992
}
2993
 
2994
static void
2995
txd_emit(
2996
   const struct lp_build_tgsi_action * action,
2997
   struct lp_build_tgsi_context * bld_base,
2998
   struct lp_build_emit_data * emit_data)
2999
{
3000
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3001
 
3002
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3003
            emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3004
}
3005
 
3006
static void
3007
txl_emit(
3008
   const struct lp_build_tgsi_action * action,
3009
   struct lp_build_tgsi_context * bld_base,
3010
   struct lp_build_emit_data * emit_data)
3011
{
3012
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3013
 
3014
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3015
            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3016
}
3017
 
3018
static void
3019
txl2_emit(
3020
   const struct lp_build_tgsi_action * action,
3021
   struct lp_build_tgsi_context * bld_base,
3022
   struct lp_build_emit_data * emit_data)
3023
{
3024
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3025
 
3026
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3027
            emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3028
}
3029
 
3030
static void
3031
txp_emit(
3032
   const struct lp_build_tgsi_action * action,
3033
   struct lp_build_tgsi_context * bld_base,
3034
   struct lp_build_emit_data * emit_data)
3035
{
3036
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3037
 
3038
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3039
            emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3040
}
3041
 
3042
static void
3043
tg4_emit(
3044
   const struct lp_build_tgsi_action * action,
3045
   struct lp_build_tgsi_context * bld_base,
3046
   struct lp_build_emit_data * emit_data)
3047
{
3048
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3049
 
3050
   emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3051
            emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3052
}
3053
 
3054
static void
3055
txq_emit(
3056
   const struct lp_build_tgsi_action * action,
3057
   struct lp_build_tgsi_context * bld_base,
3058
   struct lp_build_emit_data * emit_data)
3059
{
3060
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3061
 
3062
   emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
3063
}
3064
 
3065
static void
3066
txf_emit(
3067
   const struct lp_build_tgsi_action * action,
3068
   struct lp_build_tgsi_context * bld_base,
3069
   struct lp_build_emit_data * emit_data)
3070
{
3071
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3072
 
3073
   emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
3074
}
3075
 
3076
static void
3077
sample_i_emit(
3078
   const struct lp_build_tgsi_action * action,
3079
   struct lp_build_tgsi_context * bld_base,
3080
   struct lp_build_emit_data * emit_data)
3081
{
3082
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3083
 
3084
   emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
3085
}
3086
 
3087
static void
3088
sample_emit(
3089
   const struct lp_build_tgsi_action * action,
3090
   struct lp_build_tgsi_context * bld_base,
3091
   struct lp_build_emit_data * emit_data)
3092
{
3093
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094
 
3095
   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3096
               FALSE, emit_data->output);
3097
}
3098
 
3099
static void
3100
sample_b_emit(
3101
   const struct lp_build_tgsi_action * action,
3102
   struct lp_build_tgsi_context * bld_base,
3103
   struct lp_build_emit_data * emit_data)
3104
{
3105
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3106
 
3107
   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3108
               FALSE, emit_data->output);
3109
}
3110
 
3111
static void
3112
sample_c_emit(
3113
   const struct lp_build_tgsi_action * action,
3114
   struct lp_build_tgsi_context * bld_base,
3115
   struct lp_build_emit_data * emit_data)
3116
{
3117
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3118
 
3119
   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3120
               TRUE, emit_data->output);
3121
}
3122
 
3123
static void
3124
sample_c_lz_emit(
3125
   const struct lp_build_tgsi_action * action,
3126
   struct lp_build_tgsi_context * bld_base,
3127
   struct lp_build_emit_data * emit_data)
3128
{
3129
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3130
 
3131
   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3132
               TRUE, emit_data->output);
3133
}
3134
 
3135
static void
3136
sample_d_emit(
3137
   const struct lp_build_tgsi_action * action,
3138
   struct lp_build_tgsi_context * bld_base,
3139
   struct lp_build_emit_data * emit_data)
3140
{
3141
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3142
 
3143
   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3144
               FALSE, emit_data->output);
3145
}
3146
 
3147
static void
3148
sample_l_emit(
3149
   const struct lp_build_tgsi_action * action,
3150
   struct lp_build_tgsi_context * bld_base,
3151
   struct lp_build_emit_data * emit_data)
3152
{
3153
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3154
 
3155
   emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3156
               FALSE, emit_data->output);
3157
}
3158
 
3159
static void
3160
sviewinfo_emit(
3161
   const struct lp_build_tgsi_action * action,
3162
   struct lp_build_tgsi_context * bld_base,
3163
   struct lp_build_emit_data * emit_data)
3164
{
3165
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3166
 
3167
   emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
3168
}
3169
 
3170
static LLVMValueRef
3171
mask_vec(struct lp_build_tgsi_context *bld_base)
3172
{
3173
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3174
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3175
   struct lp_exec_mask *exec_mask = &bld->exec_mask;
3176
 
3177
   if (!exec_mask->has_mask) {
3178
      return lp_build_mask_value(bld->mask);
3179
   }
3180
   return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
3181
                       exec_mask->exec_mask, "");
3182
}
3183
 
3184
static void
3185
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3186
                          LLVMValueRef ptr,
3187
                          LLVMValueRef mask)
3188
{
3189
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3190
   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3191
 
3192
   current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3193
 
3194
   LLVMBuildStore(builder, current_vec, ptr);
3195
}
3196
 
3197
static void
3198
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3199
                             LLVMValueRef ptr,
3200
                             LLVMValueRef mask)
3201
{
3202
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3203
   LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
3204
 
3205
   current_vec = lp_build_select(&bld_base->uint_bld,
3206
                                 mask,
3207
                                 bld_base->uint_bld.zero,
3208
                                 current_vec);
3209
 
3210
   LLVMBuildStore(builder, current_vec, ptr);
3211
}
3212
 
3213
static LLVMValueRef
3214
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3215
                                  LLVMValueRef current_mask_vec,
3216
                                  LLVMValueRef total_emitted_vertices_vec)
3217
{
3218
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3219
   struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3220
   LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3221
                                        total_emitted_vertices_vec,
3222
                                        bld->max_output_vertices_vec);
3223
 
3224
   return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3225
}
3226
 
3227
static void
3228
emit_vertex(
3229
   const struct lp_build_tgsi_action * action,
3230
   struct lp_build_tgsi_context * bld_base,
3231
   struct lp_build_emit_data * emit_data)
3232
{
3233
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3234
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3235
 
3236
   if (bld->gs_iface->emit_vertex) {
3237
      LLVMValueRef mask = mask_vec(bld_base);
3238
      LLVMValueRef total_emitted_vertices_vec =
3239
         LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3240
      mask = clamp_mask_to_max_output_vertices(bld, mask,
3241
                                               total_emitted_vertices_vec);
3242
      gather_outputs(bld);
3243
      bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
3244
                                 bld->outputs,
3245
                                 total_emitted_vertices_vec);
3246
      increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
3247
                                mask);
3248
      increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
3249
                                mask);
3250
#if DUMP_GS_EMITS
3251
      lp_build_print_value(bld->bld_base.base.gallivm,
3252
                           " +++ emit vertex masked ones = ",
3253
                           mask);
3254
      lp_build_print_value(bld->bld_base.base.gallivm,
3255
                           " +++ emit vertex emitted = ",
3256
                           total_emitted_vertices_vec);
3257
#endif
3258
   }
3259
}
3260
 
3261
 
3262
static void
3263
end_primitive_masked(struct lp_build_tgsi_context * bld_base,
3264
                     LLVMValueRef mask)
3265
{
3266
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3267
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3268
 
3269
   if (bld->gs_iface->end_primitive) {
3270
      struct lp_build_context *uint_bld = &bld_base->uint_bld;
3271
      LLVMValueRef emitted_vertices_vec =
3272
         LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
3273
      LLVMValueRef emitted_prims_vec =
3274
         LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3275
 
3276
      LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3277
                                               emitted_vertices_vec,
3278
                                               uint_bld->zero);
3279
      /* We need to combine the current execution mask with the mask
3280
         telling us which, if any, execution slots actually have
3281
         unemitted primitives, this way we make sure that end_primitives
3282
         executes only on the paths that have unflushed vertices */
3283
      mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
3284
 
3285
      bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
3286
                                   emitted_vertices_vec,
3287
                                   emitted_prims_vec);
3288
 
3289
#if DUMP_GS_EMITS
3290
      lp_build_print_value(bld->bld_base.base.gallivm,
3291
                           " +++ end prim masked ones = ",
3292
                           mask);
3293
      lp_build_print_value(bld->bld_base.base.gallivm,
3294
                           " +++ end prim emitted verts1 = ",
3295
                           emitted_vertices_vec);
3296
      lp_build_print_value(bld->bld_base.base.gallivm,
3297
                           " +++ end prim emitted prims1 = ",
3298
                           LLVMBuildLoad(builder,
3299
                                         bld->emitted_prims_vec_ptr, ""));
3300
#endif
3301
      increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
3302
                                mask);
3303
      clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
3304
                                   mask);
3305
#if DUMP_GS_EMITS
3306
      lp_build_print_value(bld->bld_base.base.gallivm,
3307
                           " +++ end prim emitted verts2 = ",
3308
                           LLVMBuildLoad(builder,
3309
                                         bld->emitted_vertices_vec_ptr, ""));
3310
#endif
3311
   }
3312
 
3313
}
3314
 
3315
static void
3316
end_primitive(
3317
   const struct lp_build_tgsi_action * action,
3318
   struct lp_build_tgsi_context * bld_base,
3319
   struct lp_build_emit_data * emit_data)
3320
{
3321
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3322
 
3323
   if (bld->gs_iface->end_primitive) {
3324
      LLVMValueRef mask = mask_vec(bld_base);
3325
      end_primitive_masked(bld_base, mask);
3326
   }
3327
}
3328
 
3329
static void
3330
cal_emit(
3331
   const struct lp_build_tgsi_action * action,
3332
   struct lp_build_tgsi_context * bld_base,
3333
   struct lp_build_emit_data * emit_data)
3334
{
3335
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3336
 
3337
   lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
3338
                     &bld_base->pc);
3339
}
3340
 
3341
static void
3342
ret_emit(
3343
   const struct lp_build_tgsi_action * action,
3344
   struct lp_build_tgsi_context * bld_base,
3345
   struct lp_build_emit_data * emit_data)
3346
{
3347
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3348
 
3349
   lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
3350
}
3351
 
3352
static void
3353
brk_emit(
3354
   const struct lp_build_tgsi_action * action,
3355
   struct lp_build_tgsi_context * bld_base,
3356
   struct lp_build_emit_data * emit_data)
3357
{
3358
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3359
 
3360
   lp_exec_break(&bld->exec_mask, bld_base);
3361
}
3362
 
3363
static void
3364
breakc_emit(
3365
   const struct lp_build_tgsi_action * action,
3366
   struct lp_build_tgsi_context * bld_base,
3367
   struct lp_build_emit_data * emit_data)
3368
{
3369
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3370
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3371
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
3372
   LLVMValueRef unsigned_cond =
3373
      LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
3374
   LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3375
                                    unsigned_cond,
3376
                                    uint_bld->zero);
3377
 
3378
   lp_exec_break_condition(&bld->exec_mask, cond);
3379
}
3380
 
3381
static void
3382
if_emit(
3383
   const struct lp_build_tgsi_action * action,
3384
   struct lp_build_tgsi_context * bld_base,
3385
   struct lp_build_emit_data * emit_data)
3386
{
3387
   LLVMValueRef tmp;
3388
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3389
 
3390
   tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
3391
                      emit_data->args[0], bld->bld_base.base.zero);
3392
   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3393
}
3394
 
3395
static void
3396
uif_emit(
3397
   const struct lp_build_tgsi_action * action,
3398
   struct lp_build_tgsi_context * bld_base,
3399
   struct lp_build_emit_data * emit_data)
3400
{
3401
   LLVMValueRef tmp;
3402
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3403
   struct lp_build_context *uint_bld = &bld_base->uint_bld;
3404
 
3405
   tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
3406
                      emit_data->args[0], uint_bld->zero);
3407
   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
3408
}
3409
 
3410
static void
3411
case_emit(
3412
   const struct lp_build_tgsi_action * action,
3413
   struct lp_build_tgsi_context * bld_base,
3414
   struct lp_build_emit_data * emit_data)
3415
{
3416
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3417
 
3418
   lp_exec_case(&bld->exec_mask, emit_data->args[0]);
3419
}
3420
 
3421
static void
3422
default_emit(
3423
   const struct lp_build_tgsi_action * action,
3424
   struct lp_build_tgsi_context * bld_base,
3425
   struct lp_build_emit_data * emit_data)
3426
{
3427
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3428
 
3429
   lp_exec_default(&bld->exec_mask, bld_base);
3430
}
3431
 
3432
static void
3433
switch_emit(
3434
   const struct lp_build_tgsi_action * action,
3435
   struct lp_build_tgsi_context * bld_base,
3436
   struct lp_build_emit_data * emit_data)
3437
{
3438
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3439
 
3440
   lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
3441
}
3442
 
3443
static void
3444
endswitch_emit(
3445
   const struct lp_build_tgsi_action * action,
3446
   struct lp_build_tgsi_context * bld_base,
3447
   struct lp_build_emit_data * emit_data)
3448
{
3449
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3450
 
3451
   lp_exec_endswitch(&bld->exec_mask, bld_base);
3452
}
3453
 
3454
static void
3455
bgnloop_emit(
3456
   const struct lp_build_tgsi_action * action,
3457
   struct lp_build_tgsi_context * bld_base,
3458
   struct lp_build_emit_data * emit_data)
3459
{
3460
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3461
 
3462
   lp_exec_bgnloop(&bld->exec_mask);
3463
}
3464
 
3465
static void
3466
bgnsub_emit(
3467
   const struct lp_build_tgsi_action * action,
3468
   struct lp_build_tgsi_context * bld_base,
3469
   struct lp_build_emit_data * emit_data)
3470
{
3471
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3472
 
3473
   lp_exec_mask_bgnsub(&bld->exec_mask);
3474
}
3475
 
3476
static void
3477
else_emit(
3478
   const struct lp_build_tgsi_action * action,
3479
   struct lp_build_tgsi_context * bld_base,
3480
   struct lp_build_emit_data * emit_data)
3481
{
3482
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3483
 
3484
   lp_exec_mask_cond_invert(&bld->exec_mask);
3485
}
3486
 
3487
static void
3488
endif_emit(
3489
   const struct lp_build_tgsi_action * action,
3490
   struct lp_build_tgsi_context * bld_base,
3491
   struct lp_build_emit_data * emit_data)
3492
{
3493
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3494
 
3495
   lp_exec_mask_cond_pop(&bld->exec_mask);
3496
}
3497
 
3498
static void
3499
endloop_emit(
3500
   const struct lp_build_tgsi_action * action,
3501
   struct lp_build_tgsi_context * bld_base,
3502
   struct lp_build_emit_data * emit_data)
3503
{
3504
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3505
 
3506
   lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
3507
}
3508
 
3509
static void
3510
endsub_emit(
3511
   const struct lp_build_tgsi_action * action,
3512
   struct lp_build_tgsi_context * bld_base,
3513
   struct lp_build_emit_data * emit_data)
3514
{
3515
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3516
 
3517
   lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
3518
}
3519
 
3520
static void
3521
cont_emit(
3522
   const struct lp_build_tgsi_action * action,
3523
   struct lp_build_tgsi_context * bld_base,
3524
   struct lp_build_emit_data * emit_data)
3525
{
3526
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3527
 
3528
   lp_exec_continue(&bld->exec_mask);
3529
}
3530
 
3531
static void emit_prologue(struct lp_build_tgsi_context * bld_base)
3532
{
3533
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3534
   struct gallivm_state * gallivm = bld_base->base.gallivm;
3535
 
3536
   if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
3537
      LLVMValueRef array_size =
3538
         lp_build_const_int32(gallivm,
3539
                         bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
3540
      bld->temps_array = lp_build_array_alloca(gallivm,
3541
                                              bld_base->base.vec_type, array_size,
3542
                                              "temp_array");
3543
   }
3544
 
3545
   if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
3546
      LLVMValueRef array_size =
3547
         lp_build_const_int32(gallivm,
3548
                            bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
3549
      bld->outputs_array = lp_build_array_alloca(gallivm,
3550
                                                bld_base->base.vec_type, array_size,
3551
                                                "output_array");
3552
   }
3553
 
3554
   if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3555
      LLVMValueRef array_size =
3556
         lp_build_const_int32(gallivm,
3557
                         bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
3558
      bld->imms_array = lp_build_array_alloca(gallivm,
3559
                                              bld_base->base.vec_type, array_size,
3560
                                              "imms_array");
3561
   }
3562
 
3563
   /* If we have indirect addressing in inputs we need to copy them into
3564
    * our alloca array to be able to iterate over them */
3565
   if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
3566
      unsigned index, chan;
3567
      LLVMTypeRef vec_type = bld_base->base.vec_type;
3568
      LLVMValueRef array_size = lp_build_const_int32(gallivm,
3569
            bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
3570
      bld->inputs_array = lp_build_array_alloca(gallivm,
3571
                                               vec_type, array_size,
3572
                                               "input_array");
3573
 
3574
      assert(bld_base->info->num_inputs
3575
                        <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
3576
 
3577
      for (index = 0; index < bld_base->info->num_inputs; ++index) {
3578
         for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
3579
            LLVMValueRef lindex =
3580
               lp_build_const_int32(gallivm, index * 4 + chan);
3581
            LLVMValueRef input_ptr =
3582
               LLVMBuildGEP(gallivm->builder, bld->inputs_array,
3583
                            &lindex, 1, "");
3584
            LLVMValueRef value = bld->inputs[index][chan];
3585
            if (value)
3586
               LLVMBuildStore(gallivm->builder, value, input_ptr);
3587
         }
3588
      }
3589
   }
3590
 
3591
   if (bld->gs_iface) {
3592
      struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
3593
      bld->emitted_prims_vec_ptr =
3594
         lp_build_alloca(gallivm,
3595
                         uint_bld->vec_type,
3596
                         "emitted_prims_ptr");
3597
      bld->emitted_vertices_vec_ptr =
3598
         lp_build_alloca(gallivm,
3599
                         uint_bld->vec_type,
3600
                         "emitted_vertices_ptr");
3601
      bld->total_emitted_vertices_vec_ptr =
3602
         lp_build_alloca(gallivm,
3603
                         uint_bld->vec_type,
3604
                         "total_emitted_vertices_ptr");
3605
 
3606
      LLVMBuildStore(gallivm->builder, uint_bld->zero,
3607
                     bld->emitted_prims_vec_ptr);
3608
      LLVMBuildStore(gallivm->builder, uint_bld->zero,
3609
                     bld->emitted_vertices_vec_ptr);
3610
      LLVMBuildStore(gallivm->builder, uint_bld->zero,
3611
                     bld->total_emitted_vertices_vec_ptr);
3612
   }
3613
 
3614
   if (DEBUG_EXECUTION) {
3615
      lp_build_printf(gallivm, "\n");
3616
      emit_dump_file(bld, TGSI_FILE_CONSTANT);
3617
      if (!bld->gs_iface)
3618
         emit_dump_file(bld, TGSI_FILE_INPUT);
3619
   }
3620
}
3621
 
3622
static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
3623
{
3624
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3625
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3626
 
3627
   if (DEBUG_EXECUTION) {
3628
      /* for debugging */
3629
      if (0) {
3630
         emit_dump_file(bld, TGSI_FILE_TEMPORARY);
3631
      }
3632
      emit_dump_file(bld, TGSI_FILE_OUTPUT);
3633
      lp_build_printf(bld_base->base.gallivm, "\n");
3634
   }
3635
 
3636
   /* If we have indirect addressing in outputs we need to copy our alloca array
3637
    * to the outputs slots specified by the caller */
3638
   if (bld->gs_iface) {
3639
      LLVMValueRef total_emitted_vertices_vec;
3640
      LLVMValueRef emitted_prims_vec;
3641
      /* implicit end_primitives, needed in case there are any unflushed
3642
         vertices in the cache. Note must not call end_primitive here
3643
         since the exec_mask is not valid at this point. */
3644
      end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
3645
 
3646
      total_emitted_vertices_vec =
3647
         LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
3648
      emitted_prims_vec =
3649
         LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
3650
 
3651
      bld->gs_iface->gs_epilogue(bld->gs_iface,
3652
                                 &bld->bld_base,
3653
                                 total_emitted_vertices_vec,
3654
                                 emitted_prims_vec);
3655
   } else {
3656
      gather_outputs(bld);
3657
   }
3658
}
3659
 
3660
void
3661
lp_build_tgsi_soa(struct gallivm_state *gallivm,
3662
                  const struct tgsi_token *tokens,
3663
                  struct lp_type type,
3664
                  struct lp_build_mask_context *mask,
3665
                  LLVMValueRef consts_ptr,
3666
                  LLVMValueRef const_sizes_ptr,
3667
                  const struct lp_bld_tgsi_system_values *system_values,
3668
                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
3669
                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
3670
                  LLVMValueRef context_ptr,
3671
                  struct lp_build_sampler_soa *sampler,
3672
                  const struct tgsi_shader_info *info,
3673
                  const struct lp_build_tgsi_gs_iface *gs_iface)
3674
{
3675
   struct lp_build_tgsi_soa_context bld;
3676
 
3677
   struct lp_type res_type;
3678
 
3679
   assert(type.length <= LP_MAX_VECTOR_LENGTH);
3680
   memset(&res_type, 0, sizeof res_type);
3681
   res_type.width = type.width;
3682
   res_type.length = type.length;
3683
   res_type.sign = 1;
3684
 
3685
   /* Setup build context */
3686
   memset(&bld, 0, sizeof bld);
3687
   lp_build_context_init(&bld.bld_base.base, gallivm, type);
3688
   lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
3689
   lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
3690
   lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
3691
   bld.mask = mask;
3692
   bld.inputs = inputs;
3693
   bld.outputs = outputs;
3694
   bld.consts_ptr = consts_ptr;
3695
   bld.const_sizes_ptr = const_sizes_ptr;
3696
   bld.sampler = sampler;
3697
   bld.bld_base.info = info;
3698
   bld.indirect_files = info->indirect_files;
3699
   bld.context_ptr = context_ptr;
3700
 
3701
   /*
3702
    * If the number of temporaries is rather large then we just
3703
    * allocate them as an array right from the start and treat
3704
    * like indirect temporaries.
3705
    */
3706
   if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
3707
      bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
3708
   }
3709
   /*
3710
    * For performance reason immediates are always backed in a static
3711
    * array, but if their number is too great, we have to use just
3712
    * a dynamically allocated array.
3713
    */
3714
   bld.use_immediates_array =
3715
         (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
3716
   if (bld.use_immediates_array) {
3717
      bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
3718
   }
3719
 
3720
 
3721
   bld.bld_base.soa = TRUE;
3722
   bld.bld_base.emit_debug = emit_debug;
3723
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
3724
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
3725
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
3726
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
3727
   bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
3728
   bld.bld_base.emit_store = emit_store;
3729
 
3730
   bld.bld_base.emit_declaration = lp_emit_declaration_soa;
3731
   bld.bld_base.emit_immediate = lp_emit_immediate_soa;
3732
 
3733
   bld.bld_base.emit_prologue = emit_prologue;
3734
   bld.bld_base.emit_epilogue = emit_epilogue;
3735
 
3736
   /* Set opcode actions */
3737
   lp_set_default_actions_cpu(&bld.bld_base);
3738
 
3739
   bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
3740
   bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
3741
   bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
3742
   bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
3743
   bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
3744
   bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
3745
   bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
3746
   bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
3747
   bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
3748
   bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
3749
   bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
3750
   bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
3751
   bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
3752
   bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
3753
   bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
3754
   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
3755
   bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
3756
   bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
3757
   bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
3758
   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
3759
   bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
3760
   bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
3761
   bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
3762
   bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
3763
   bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
3764
   bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
3765
   bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
3766
   bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
3767
   bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
3768
   bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
3769
   bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
3770
   bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
3771
   /* DX10 sampling ops */
3772
   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
3773
   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
3774
   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
3775
   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
3776
   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
3777
   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
3778
   bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
3779
   bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
3780
 
3781
   if (gs_iface) {
3782
      /* There's no specific value for this because it should always
3783
       * be set, but apps using ext_geometry_shader4 quite often
3784
       * were forgetting so we're using MAX_VERTEX_VARYING from
3785
       * that spec even though we could debug_assert if it's not
3786
       * set, but that's a lot uglier. */
3787
      uint max_output_vertices;
3788
 
3789
      /* inputs are always indirect with gs */
3790
      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
3791
      bld.gs_iface = gs_iface;
3792
      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
3793
      bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
3794
      bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
3795
 
3796
      max_output_vertices =
3797
            info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
3798
      if (!max_output_vertices)
3799
         max_output_vertices = 32;
3800
 
3801
      bld.max_output_vertices_vec =
3802
         lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3803
                                max_output_vertices);
3804
   }
3805
 
3806
   lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3807
 
3808
   bld.system_values = *system_values;
3809
 
3810
   lp_build_tgsi_llvm(&bld.bld_base, tokens);
3811
 
3812
   if (0) {
3813
      LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
3814
      LLVMValueRef function = LLVMGetBasicBlockParent(block);
3815
      debug_printf("11111111111111111111111111111 \n");
3816
      tgsi_dump(tokens, 0);
3817
      lp_debug_dump_value(function);
3818
      debug_printf("2222222222222222222222222222 \n");
3819
   }
3820
 
3821
   if (0) {
3822
      LLVMModuleRef module = LLVMGetGlobalParent(
3823
         LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
3824
      LLVMDumpModule(module);
3825
 
3826
   }
3827
   lp_exec_mask_fini(&bld.exec_mask);
3828
}