Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
3
 * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
4
 * Copyright © 2010 Intel Corporation
5
 * Copyright © 2011 Bryan Cain
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the "Software"),
9
 * to deal in the Software without restriction, including without limitation
10
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11
 * and/or sell copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the next
15
 * paragraph) shall be included in all copies or substantial portions of the
16
 * Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24
 * DEALINGS IN THE SOFTWARE.
25
 */
26
 
27
/**
28
 * \file glsl_to_tgsi.cpp
29
 *
30
 * Translate GLSL IR to TGSI.
31
 */
32
 
33
#include 
34
#include "main/compiler.h"
35
#include "ir.h"
36
#include "ir_visitor.h"
37
#include "ir_expression_flattening.h"
38
#include "glsl_types.h"
39
#include "glsl_parser_extras.h"
40
#include "../glsl/program.h"
41
#include "ir_optimization.h"
42
#include "ast.h"
43
 
44
#include "main/mtypes.h"
45
#include "main/shaderobj.h"
46
#include "program/hash_table.h"
47
 
48
extern "C" {
49
#include "main/shaderapi.h"
50
#include "main/uniforms.h"
51
#include "program/prog_instruction.h"
52
#include "program/prog_optimize.h"
53
#include "program/prog_print.h"
54
#include "program/program.h"
55
#include "program/prog_parameter.h"
56
#include "program/sampler.h"
57
 
58
#include "pipe/p_compiler.h"
59
#include "pipe/p_context.h"
60
#include "pipe/p_screen.h"
61
#include "pipe/p_shader_tokens.h"
62
#include "pipe/p_state.h"
63
#include "util/u_math.h"
64
#include "tgsi/tgsi_ureg.h"
65
#include "tgsi/tgsi_info.h"
66
#include "st_context.h"
67
#include "st_program.h"
68
#include "st_glsl_to_tgsi.h"
69
#include "st_mesa_to_tgsi.h"
70
}
71
 
72
#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
73
#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
74
                           (1 << PROGRAM_ENV_PARAM) |    \
75
                           (1 << PROGRAM_STATE_VAR) |    \
76
                           (1 << PROGRAM_CONSTANT) |     \
77
                           (1 << PROGRAM_UNIFORM))
78
 
79
/**
80
 * Maximum number of temporary registers.
81
 *
82
 * It is too big for stack allocated arrays -- it will cause stack overflow on
83
 * Windows and likely Mac OS X.
84
 */
85
#define MAX_TEMPS         4096
86
 
87
/**
88
 * Maximum number of arrays
89
 */
90
#define MAX_ARRAYS        256
91
 
92
/* will be 4 for GLSL 4.00 */
93
#define MAX_GLSL_TEXTURE_OFFSET 1
94
 
95
class st_src_reg;
96
class st_dst_reg;
97
 
98
static int swizzle_for_size(int size);
99
 
100
/**
101
 * This struct is a corresponding struct to TGSI ureg_src.
102
 */
103
class st_src_reg {
104
public:
105
   st_src_reg(gl_register_file file, int index, const glsl_type *type)
106
   {
107
      this->file = file;
108
      this->index = index;
109
      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
110
         this->swizzle = swizzle_for_size(type->vector_elements);
111
      else
112
         this->swizzle = SWIZZLE_XYZW;
113
      this->negate = 0;
114
      this->index2D = 0;
115
      this->type = type ? type->base_type : GLSL_TYPE_ERROR;
116
      this->reladdr = NULL;
117
   }
118
 
119
   st_src_reg(gl_register_file file, int index, int type)
120
   {
121
      this->type = type;
122
      this->file = file;
123
      this->index = index;
124
      this->index2D = 0;
125
      this->swizzle = SWIZZLE_XYZW;
126
      this->negate = 0;
127
      this->reladdr = NULL;
128
   }
129
 
130
   st_src_reg(gl_register_file file, int index, int type, int index2D)
131
   {
132
      this->type = type;
133
      this->file = file;
134
      this->index = index;
135
      this->index2D = index2D;
136
      this->swizzle = SWIZZLE_XYZW;
137
      this->negate = 0;
138
      this->reladdr = NULL;
139
   }
140
 
141
   st_src_reg()
142
   {
143
      this->type = GLSL_TYPE_ERROR;
144
      this->file = PROGRAM_UNDEFINED;
145
      this->index = 0;
146
      this->index2D = 0;
147
      this->swizzle = 0;
148
      this->negate = 0;
149
      this->reladdr = NULL;
150
   }
151
 
152
   explicit st_src_reg(st_dst_reg reg);
153
 
154
   gl_register_file file; /**< PROGRAM_* from Mesa */
155
   int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
156
   int index2D;
157
   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
158
   int negate; /**< NEGATE_XYZW mask from mesa */
159
   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
160
   /** Register index should be offset by the integer in this reg. */
161
   st_src_reg *reladdr;
162
};
163
 
164
class st_dst_reg {
165
public:
166
   st_dst_reg(gl_register_file file, int writemask, int type)
167
   {
168
      this->file = file;
169
      this->index = 0;
170
      this->writemask = writemask;
171
      this->cond_mask = COND_TR;
172
      this->reladdr = NULL;
173
      this->type = type;
174
   }
175
 
176
   st_dst_reg()
177
   {
178
      this->type = GLSL_TYPE_ERROR;
179
      this->file = PROGRAM_UNDEFINED;
180
      this->index = 0;
181
      this->writemask = 0;
182
      this->cond_mask = COND_TR;
183
      this->reladdr = NULL;
184
   }
185
 
186
   explicit st_dst_reg(st_src_reg reg);
187
 
188
   gl_register_file file; /**< PROGRAM_* from Mesa */
189
   int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
190
   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
191
   GLuint cond_mask:4;
192
   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
193
   /** Register index should be offset by the integer in this reg. */
194
   st_src_reg *reladdr;
195
};
196
 
197
st_src_reg::st_src_reg(st_dst_reg reg)
198
{
199
   this->type = reg.type;
200
   this->file = reg.file;
201
   this->index = reg.index;
202
   this->swizzle = SWIZZLE_XYZW;
203
   this->negate = 0;
204
   this->reladdr = reg.reladdr;
205
   this->index2D = 0;
206
}
207
 
208
st_dst_reg::st_dst_reg(st_src_reg reg)
209
{
210
   this->type = reg.type;
211
   this->file = reg.file;
212
   this->index = reg.index;
213
   this->writemask = WRITEMASK_XYZW;
214
   this->cond_mask = COND_TR;
215
   this->reladdr = reg.reladdr;
216
}
217
 
218
class glsl_to_tgsi_instruction : public exec_node {
219
public:
220
   /* Callers of this ralloc-based new need not call delete. It's
221
    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
222
   static void* operator new(size_t size, void *ctx)
223
   {
224
      void *node;
225
 
226
      node = rzalloc_size(ctx, size);
227
      assert(node != NULL);
228
 
229
      return node;
230
   }
231
 
232
   unsigned op;
233
   st_dst_reg dst;
234
   st_src_reg src[3];
235
   /** Pointer to the ir source this tree came from for debugging */
236
   ir_instruction *ir;
237
   GLboolean cond_update;
238
   bool saturate;
239
   int sampler; /**< sampler index */
240
   int tex_target; /**< One of TEXTURE_*_INDEX */
241
   GLboolean tex_shadow;
242
   struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
243
   unsigned tex_offset_num_offset;
244
   int dead_mask; /**< Used in dead code elimination */
245
 
246
   class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
247
};
248
 
249
class variable_storage : public exec_node {
250
public:
251
   variable_storage(ir_variable *var, gl_register_file file, int index)
252
      : file(file), index(index), var(var)
253
   {
254
      /* empty */
255
   }
256
 
257
   gl_register_file file;
258
   int index;
259
   ir_variable *var; /* variable that maps to this, if any */
260
};
261
 
262
class immediate_storage : public exec_node {
263
public:
264
   immediate_storage(gl_constant_value *values, int size, int type)
265
   {
266
      memcpy(this->values, values, size * sizeof(gl_constant_value));
267
      this->size = size;
268
      this->type = type;
269
   }
270
 
271
   gl_constant_value values[4];
272
   int size; /**< Number of components (1-4) */
273
   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
274
};
275
 
276
class function_entry : public exec_node {
277
public:
278
   ir_function_signature *sig;
279
 
280
   /**
281
    * identifier of this function signature used by the program.
282
    *
283
    * At the point that TGSI instructions for function calls are
284
    * generated, we don't know the address of the first instruction of
285
    * the function body.  So we make the BranchTarget that is called a
286
    * small integer and rewrite them during set_branchtargets().
287
    */
288
   int sig_id;
289
 
290
   /**
291
    * Pointer to first instruction of the function body.
292
    *
293
    * Set during function body emits after main() is processed.
294
    */
295
   glsl_to_tgsi_instruction *bgn_inst;
296
 
297
   /**
298
    * Index of the first instruction of the function body in actual TGSI.
299
    *
300
    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
301
    */
302
   int inst;
303
 
304
   /** Storage for the return value. */
305
   st_src_reg return_reg;
306
};
307
 
308
struct glsl_to_tgsi_visitor : public ir_visitor {
309
public:
310
   glsl_to_tgsi_visitor();
311
   ~glsl_to_tgsi_visitor();
312
 
313
   function_entry *current_function;
314
 
315
   struct gl_context *ctx;
316
   struct gl_program *prog;
317
   struct gl_shader_program *shader_program;
318
   struct gl_shader_compiler_options *options;
319
 
320
   int next_temp;
321
 
322
   unsigned array_sizes[MAX_ARRAYS];
323
   unsigned next_array;
324
 
325
   int num_address_regs;
326
   int samplers_used;
327
   bool indirect_addr_consts;
328
 
329
   int glsl_version;
330
   bool native_integers;
331
   bool have_sqrt;
332
 
333
   variable_storage *find_variable_storage(ir_variable *var);
334
 
335
   int add_constant(gl_register_file file, gl_constant_value values[4],
336
                    int size, int datatype, GLuint *swizzle_out);
337
 
338
   function_entry *get_function_signature(ir_function_signature *sig);
339
 
340
   st_src_reg get_temp(const glsl_type *type);
341
   void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
342
 
343
   st_src_reg st_src_reg_for_float(float val);
344
   st_src_reg st_src_reg_for_int(int val);
345
   st_src_reg st_src_reg_for_type(int type, int val);
346
 
347
   /**
348
    * \name Visit methods
349
    *
350
    * As typical for the visitor pattern, there must be one \c visit method for
351
    * each concrete subclass of \c ir_instruction.  Virtual base classes within
352
    * the hierarchy should not have \c visit methods.
353
    */
354
   /*@{*/
355
   virtual void visit(ir_variable *);
356
   virtual void visit(ir_loop *);
357
   virtual void visit(ir_loop_jump *);
358
   virtual void visit(ir_function_signature *);
359
   virtual void visit(ir_function *);
360
   virtual void visit(ir_expression *);
361
   virtual void visit(ir_swizzle *);
362
   virtual void visit(ir_dereference_variable  *);
363
   virtual void visit(ir_dereference_array *);
364
   virtual void visit(ir_dereference_record *);
365
   virtual void visit(ir_assignment *);
366
   virtual void visit(ir_constant *);
367
   virtual void visit(ir_call *);
368
   virtual void visit(ir_return *);
369
   virtual void visit(ir_discard *);
370
   virtual void visit(ir_texture *);
371
   virtual void visit(ir_if *);
372
   /*@}*/
373
 
374
   st_src_reg result;
375
 
376
   /** List of variable_storage */
377
   exec_list variables;
378
 
379
   /** List of immediate_storage */
380
   exec_list immediates;
381
   unsigned num_immediates;
382
 
383
   /** List of function_entry */
384
   exec_list function_signatures;
385
   int next_signature_id;
386
 
387
   /** List of glsl_to_tgsi_instruction */
388
   exec_list instructions;
389
 
390
   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
391
 
392
   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
393
        		        st_dst_reg dst, st_src_reg src0);
394
 
395
   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
396
        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
397
 
398
   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
399
        		        st_dst_reg dst,
400
        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
401
 
402
   unsigned get_opcode(ir_instruction *ir, unsigned op,
403
                    st_dst_reg dst,
404
                    st_src_reg src0, st_src_reg src1);
405
 
406
   /**
407
    * Emit the correct dot-product instruction for the type of arguments
408
    */
409
   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
410
                                     st_dst_reg dst,
411
                                     st_src_reg src0,
412
                                     st_src_reg src1,
413
                                     unsigned elements);
414
 
415
   void emit_scalar(ir_instruction *ir, unsigned op,
416
        	    st_dst_reg dst, st_src_reg src0);
417
 
418
   void emit_scalar(ir_instruction *ir, unsigned op,
419
        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
420
 
421
   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
422
 
423
   void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
424
 
425
   void emit_scs(ir_instruction *ir, unsigned op,
426
        	 st_dst_reg dst, const st_src_reg &src);
427
 
428
   bool try_emit_mad(ir_expression *ir,
429
              int mul_operand);
430
   bool try_emit_mad_for_and_not(ir_expression *ir,
431
              int mul_operand);
432
   bool try_emit_sat(ir_expression *ir);
433
 
434
   void emit_swz(ir_expression *ir);
435
 
436
   bool process_move_condition(ir_rvalue *ir);
437
 
438
   void simplify_cmp(void);
439
 
440
   void rename_temp_register(int index, int new_index);
441
   int get_first_temp_read(int index);
442
   int get_first_temp_write(int index);
443
   int get_last_temp_read(int index);
444
   int get_last_temp_write(int index);
445
 
446
   void copy_propagate(void);
447
   void eliminate_dead_code(void);
448
   int eliminate_dead_code_advanced(void);
449
   void merge_registers(void);
450
   void renumber_registers(void);
451
 
452
   void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
453
                       st_dst_reg *l, st_src_reg *r);
454
 
455
   void *mem_ctx;
456
};
457
 
458
static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
459
 
460
static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
461
 
462
static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
463
 
464
static void
465
fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
466
 
467
static void
468
fail_link(struct gl_shader_program *prog, const char *fmt, ...)
469
{
470
   va_list args;
471
   va_start(args, fmt);
472
   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
473
   va_end(args);
474
 
475
   prog->LinkStatus = GL_FALSE;
476
}
477
 
478
static int
479
swizzle_for_size(int size)
480
{
481
   int size_swizzles[4] = {
482
      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
483
      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
484
      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
485
      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
486
   };
487
 
488
   assert((size >= 1) && (size <= 4));
489
   return size_swizzles[size - 1];
490
}
491
 
492
static bool
493
is_tex_instruction(unsigned opcode)
494
{
495
   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
496
   return info->is_tex;
497
}
498
 
499
static unsigned
500
num_inst_dst_regs(unsigned opcode)
501
{
502
   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
503
   return info->num_dst;
504
}
505
 
506
static unsigned
507
num_inst_src_regs(unsigned opcode)
508
{
509
   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
510
   return info->is_tex ? info->num_src - 1 : info->num_src;
511
}
512
 
513
glsl_to_tgsi_instruction *
514
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
515
        		 st_dst_reg dst,
516
        		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
517
{
518
   glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
519
   int num_reladdr = 0, i;
520
 
521
   op = get_opcode(ir, op, dst, src0, src1);
522
 
523
   /* If we have to do relative addressing, we want to load the ARL
524
    * reg directly for one of the regs, and preload the other reladdr
525
    * sources into temps.
526
    */
527
   num_reladdr += dst.reladdr != NULL;
528
   num_reladdr += src0.reladdr != NULL;
529
   num_reladdr += src1.reladdr != NULL;
530
   num_reladdr += src2.reladdr != NULL;
531
 
532
   reladdr_to_temp(ir, &src2, &num_reladdr);
533
   reladdr_to_temp(ir, &src1, &num_reladdr);
534
   reladdr_to_temp(ir, &src0, &num_reladdr);
535
 
536
   if (dst.reladdr) {
537
      emit_arl(ir, address_reg, *dst.reladdr);
538
      num_reladdr--;
539
   }
540
   assert(num_reladdr == 0);
541
 
542
   inst->op = op;
543
   inst->dst = dst;
544
   inst->src[0] = src0;
545
   inst->src[1] = src1;
546
   inst->src[2] = src2;
547
   inst->ir = ir;
548
   inst->dead_mask = 0;
549
 
550
   inst->function = NULL;
551
 
552
   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
553
      this->num_address_regs = 1;
554
 
555
   /* Update indirect addressing status used by TGSI */
556
   if (dst.reladdr) {
557
      switch(dst.file) {
558
      case PROGRAM_LOCAL_PARAM:
559
      case PROGRAM_ENV_PARAM:
560
      case PROGRAM_STATE_VAR:
561
      case PROGRAM_CONSTANT:
562
      case PROGRAM_UNIFORM:
563
         this->indirect_addr_consts = true;
564
         break;
565
      case PROGRAM_IMMEDIATE:
566
         assert(!"immediates should not have indirect addressing");
567
         break;
568
      default:
569
         break;
570
      }
571
   }
572
   else {
573
      for (i=0; i<3; i++) {
574
         if(inst->src[i].reladdr) {
575
            switch(inst->src[i].file) {
576
            case PROGRAM_LOCAL_PARAM:
577
            case PROGRAM_ENV_PARAM:
578
            case PROGRAM_STATE_VAR:
579
            case PROGRAM_CONSTANT:
580
            case PROGRAM_UNIFORM:
581
               this->indirect_addr_consts = true;
582
               break;
583
            case PROGRAM_IMMEDIATE:
584
               assert(!"immediates should not have indirect addressing");
585
               break;
586
            default:
587
               break;
588
            }
589
         }
590
      }
591
   }
592
 
593
   this->instructions.push_tail(inst);
594
 
595
   if (native_integers)
596
      try_emit_float_set(ir, op, dst);
597
 
598
   return inst;
599
}
600
 
601
 
602
glsl_to_tgsi_instruction *
603
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
604
        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
605
{
606
   return emit(ir, op, dst, src0, src1, undef_src);
607
}
608
 
609
glsl_to_tgsi_instruction *
610
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
611
        		 st_dst_reg dst, st_src_reg src0)
612
{
613
   assert(dst.writemask != 0);
614
   return emit(ir, op, dst, src0, undef_src, undef_src);
615
}
616
 
617
glsl_to_tgsi_instruction *
618
glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
619
{
620
   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
621
}
622
 
623
 /**
624
 * Emits the code to convert the result of float SET instructions to integers.
625
 */
626
void
627
glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
628
        		 st_dst_reg dst)
629
{
630
   if ((op == TGSI_OPCODE_SEQ ||
631
        op == TGSI_OPCODE_SNE ||
632
        op == TGSI_OPCODE_SGE ||
633
        op == TGSI_OPCODE_SLT))
634
   {
635
      st_src_reg src = st_src_reg(dst);
636
      src.negate = ~src.negate;
637
      dst.type = GLSL_TYPE_FLOAT;
638
      emit(ir, TGSI_OPCODE_F2I, dst, src);
639
   }
640
}
641
 
642
/**
643
 * Determines whether to use an integer, unsigned integer, or float opcode
644
 * based on the operands and input opcode, then emits the result.
645
 */
646
unsigned
647
glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
648
        		 st_dst_reg dst,
649
        		 st_src_reg src0, st_src_reg src1)
650
{
651
   int type = GLSL_TYPE_FLOAT;
652
 
653
   assert(src0.type != GLSL_TYPE_ARRAY);
654
   assert(src0.type != GLSL_TYPE_STRUCT);
655
   assert(src1.type != GLSL_TYPE_ARRAY);
656
   assert(src1.type != GLSL_TYPE_STRUCT);
657
 
658
   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
659
      type = GLSL_TYPE_FLOAT;
660
   else if (native_integers)
661
      type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
662
 
663
#define case4(c, f, i, u) \
664
   case TGSI_OPCODE_##c: \
665
      if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
666
      else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
667
      else op = TGSI_OPCODE_##f; \
668
      break;
669
#define case3(f, i, u)  case4(f, f, i, u)
670
#define case2fi(f, i)   case4(f, f, i, i)
671
#define case2iu(i, u)   case4(i, LAST, i, u)
672
 
673
   switch(op) {
674
      case2fi(ADD, UADD);
675
      case2fi(MUL, UMUL);
676
      case2fi(MAD, UMAD);
677
      case3(DIV, IDIV, UDIV);
678
      case3(MAX, IMAX, UMAX);
679
      case3(MIN, IMIN, UMIN);
680
      case2iu(MOD, UMOD);
681
 
682
      case2fi(SEQ, USEQ);
683
      case2fi(SNE, USNE);
684
      case3(SGE, ISGE, USGE);
685
      case3(SLT, ISLT, USLT);
686
 
687
      case2iu(ISHR, USHR);
688
 
689
      case2fi(SSG, ISSG);
690
      case3(ABS, IABS, IABS);
691
 
692
      default: break;
693
   }
694
 
695
   assert(op != TGSI_OPCODE_LAST);
696
   return op;
697
}
698
 
699
glsl_to_tgsi_instruction *
700
glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
701
        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
702
        		    unsigned elements)
703
{
704
   static const unsigned dot_opcodes[] = {
705
      TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
706
   };
707
 
708
   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
709
}
710
 
711
/**
712
 * Emits TGSI scalar opcodes to produce unique answers across channels.
713
 *
714
 * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
715
 * channel determines the result across all channels.  So to do a vec4
716
 * of this operation, we want to emit a scalar per source channel used
717
 * to produce dest channels.
718
 */
719
void
720
glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
721
        		        st_dst_reg dst,
722
        			st_src_reg orig_src0, st_src_reg orig_src1)
723
{
724
   int i, j;
725
   int done_mask = ~dst.writemask;
726
 
727
   /* TGSI RCP is a scalar operation splatting results to all channels,
728
    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
729
    * dst channels.
730
    */
731
   for (i = 0; i < 4; i++) {
732
      GLuint this_mask = (1 << i);
733
      glsl_to_tgsi_instruction *inst;
734
      st_src_reg src0 = orig_src0;
735
      st_src_reg src1 = orig_src1;
736
 
737
      if (done_mask & this_mask)
738
         continue;
739
 
740
      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
741
      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
742
      for (j = i + 1; j < 4; j++) {
743
         /* If there is another enabled component in the destination that is
744
          * derived from the same inputs, generate its value on this pass as
745
          * well.
746
          */
747
         if (!(done_mask & (1 << j)) &&
748
             GET_SWZ(src0.swizzle, j) == src0_swiz &&
749
             GET_SWZ(src1.swizzle, j) == src1_swiz) {
750
            this_mask |= (1 << j);
751
         }
752
      }
753
      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
754
        			   src0_swiz, src0_swiz);
755
      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
756
        			  src1_swiz, src1_swiz);
757
 
758
      inst = emit(ir, op, dst, src0, src1);
759
      inst->dst.writemask = this_mask;
760
      done_mask |= this_mask;
761
   }
762
}
763
 
764
void
765
glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
766
        		        st_dst_reg dst, st_src_reg src0)
767
{
768
   st_src_reg undef = undef_src;
769
 
770
   undef.swizzle = SWIZZLE_XXXX;
771
 
772
   emit_scalar(ir, op, dst, src0, undef);
773
}
774
 
775
void
776
glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
777
        		        st_dst_reg dst, st_src_reg src0)
778
{
779
   int op = TGSI_OPCODE_ARL;
780
 
781
   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
782
      op = TGSI_OPCODE_UARL;
783
 
784
   emit(NULL, op, dst, src0);
785
}
786
 
787
/**
788
 * Emit an TGSI_OPCODE_SCS instruction
789
 *
790
 * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
791
 * Instead of splatting its result across all four components of the
792
 * destination, it writes one value to the \c x component and another value to
793
 * the \c y component.
794
 *
795
 * \param ir        IR instruction being processed
796
 * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
797
 *                  on which value is desired.
798
 * \param dst       Destination register
799
 * \param src       Source register
800
 */
801
void
802
glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
803
        		     st_dst_reg dst,
804
        		     const st_src_reg &src)
805
{
806
   /* Vertex programs cannot use the SCS opcode.
807
    */
808
   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
809
      emit_scalar(ir, op, dst, src);
810
      return;
811
   }
812
 
813
   const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
814
   const unsigned scs_mask = (1U << component);
815
   int done_mask = ~dst.writemask;
816
   st_src_reg tmp;
817
 
818
   assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
819
 
820
   /* If there are compnents in the destination that differ from the component
821
    * that will be written by the SCS instrution, we'll need a temporary.
822
    */
823
   if (scs_mask != unsigned(dst.writemask)) {
824
      tmp = get_temp(glsl_type::vec4_type);
825
   }
826
 
827
   for (unsigned i = 0; i < 4; i++) {
828
      unsigned this_mask = (1U << i);
829
      st_src_reg src0 = src;
830
 
831
      if ((done_mask & this_mask) != 0)
832
         continue;
833
 
834
      /* The source swizzle specified which component of the source generates
835
       * sine / cosine for the current component in the destination.  The SCS
836
       * instruction requires that this value be swizzle to the X component.
837
       * Replace the current swizzle with a swizzle that puts the source in
838
       * the X component.
839
       */
840
      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
841
 
842
      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
843
        			   src0_swiz, src0_swiz);
844
      for (unsigned j = i + 1; j < 4; j++) {
845
         /* If there is another enabled component in the destination that is
846
          * derived from the same inputs, generate its value on this pass as
847
          * well.
848
          */
849
         if (!(done_mask & (1 << j)) &&
850
             GET_SWZ(src0.swizzle, j) == src0_swiz) {
851
            this_mask |= (1 << j);
852
         }
853
      }
854
 
855
      if (this_mask != scs_mask) {
856
         glsl_to_tgsi_instruction *inst;
857
         st_dst_reg tmp_dst = st_dst_reg(tmp);
858
 
859
         /* Emit the SCS instruction.
860
          */
861
         inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
862
         inst->dst.writemask = scs_mask;
863
 
864
         /* Move the result of the SCS instruction to the desired location in
865
          * the destination.
866
          */
867
         tmp.swizzle = MAKE_SWIZZLE4(component, component,
868
        			     component, component);
869
         inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
870
         inst->dst.writemask = this_mask;
871
      } else {
872
         /* Emit the SCS instruction to write directly to the destination.
873
          */
874
         glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
875
         inst->dst.writemask = scs_mask;
876
      }
877
 
878
      done_mask |= this_mask;
879
   }
880
}
881
 
882
int
883
glsl_to_tgsi_visitor::add_constant(gl_register_file file,
884
        		     gl_constant_value values[4], int size, int datatype,
885
        		     GLuint *swizzle_out)
886
{
887
   if (file == PROGRAM_CONSTANT) {
888
      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
889
                                              size, datatype, swizzle_out);
890
   } else {
891
      int index = 0;
892
      immediate_storage *entry;
893
      assert(file == PROGRAM_IMMEDIATE);
894
 
895
      /* Search immediate storage to see if we already have an identical
896
       * immediate that we can use instead of adding a duplicate entry.
897
       */
898
      foreach_iter(exec_list_iterator, iter, this->immediates) {
899
         entry = (immediate_storage *)iter.get();
900
 
901
         if (entry->size == size &&
902
             entry->type == datatype &&
903
             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
904
             return index;
905
         }
906
         index++;
907
      }
908
 
909
      /* Add this immediate to the list. */
910
      entry = new(mem_ctx) immediate_storage(values, size, datatype);
911
      this->immediates.push_tail(entry);
912
      this->num_immediates++;
913
      return index;
914
   }
915
}
916
 
917
st_src_reg
918
glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
919
{
920
   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
921
   union gl_constant_value uval;
922
 
923
   uval.f = val;
924
   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
925
 
926
   return src;
927
}
928
 
929
st_src_reg
930
glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
931
{
932
   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
933
   union gl_constant_value uval;
934
 
935
   assert(native_integers);
936
 
937
   uval.i = val;
938
   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
939
 
940
   return src;
941
}
942
 
943
st_src_reg
944
glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
945
{
946
   if (native_integers)
947
      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
948
                                       st_src_reg_for_int(val);
949
   else
950
      return st_src_reg_for_float(val);
951
}
952
 
953
static int
954
type_size(const struct glsl_type *type)
955
{
956
   unsigned int i;
957
   int size;
958
 
959
   switch (type->base_type) {
960
   case GLSL_TYPE_UINT:
961
   case GLSL_TYPE_INT:
962
   case GLSL_TYPE_FLOAT:
963
   case GLSL_TYPE_BOOL:
964
      if (type->is_matrix()) {
965
         return type->matrix_columns;
966
      } else {
967
         /* Regardless of size of vector, it gets a vec4. This is bad
968
          * packing for things like floats, but otherwise arrays become a
969
          * mess.  Hopefully a later pass over the code can pack scalars
970
          * down if appropriate.
971
          */
972
         return 1;
973
      }
974
   case GLSL_TYPE_ARRAY:
975
      assert(type->length > 0);
976
      return type_size(type->fields.array) * type->length;
977
   case GLSL_TYPE_STRUCT:
978
      size = 0;
979
      for (i = 0; i < type->length; i++) {
980
         size += type_size(type->fields.structure[i].type);
981
      }
982
      return size;
983
   case GLSL_TYPE_SAMPLER:
984
      /* Samplers take up one slot in UNIFORMS[], but they're baked in
985
       * at link time.
986
       */
987
      return 1;
988
   case GLSL_TYPE_INTERFACE:
989
   case GLSL_TYPE_VOID:
990
   case GLSL_TYPE_ERROR:
991
      assert(!"Invalid type in type_size");
992
      break;
993
   }
994
   return 0;
995
}
996
 
997
/**
998
 * In the initial pass of codegen, we assign temporary numbers to
999
 * intermediate results.  (not SSA -- variable assignments will reuse
1000
 * storage).
1001
 */
1002
st_src_reg
1003
glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
1004
{
1005
   st_src_reg src;
1006
 
1007
   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
1008
   src.reladdr = NULL;
1009
   src.negate = 0;
1010
 
1011
   if (!options->EmitNoIndirectTemp &&
1012
       (type->is_array() || type->is_matrix())) {
1013
 
1014
      src.file = PROGRAM_ARRAY;
1015
      src.index = next_array << 16 | 0x8000;
1016
      array_sizes[next_array] = type_size(type);
1017
      ++next_array;
1018
 
1019
   } else {
1020
      src.file = PROGRAM_TEMPORARY;
1021
      src.index = next_temp;
1022
      next_temp += type_size(type);
1023
   }
1024
 
1025
   if (type->is_array() || type->is_record()) {
1026
      src.swizzle = SWIZZLE_NOOP;
1027
   } else {
1028
      src.swizzle = swizzle_for_size(type->vector_elements);
1029
   }
1030
 
1031
   return src;
1032
}
1033
 
1034
variable_storage *
1035
glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1036
{
1037
 
1038
   variable_storage *entry;
1039
 
1040
   foreach_iter(exec_list_iterator, iter, this->variables) {
1041
      entry = (variable_storage *)iter.get();
1042
 
1043
      if (entry->var == var)
1044
         return entry;
1045
   }
1046
 
1047
   return NULL;
1048
}
1049
 
1050
void
1051
glsl_to_tgsi_visitor::visit(ir_variable *ir)
1052
{
1053
   if (strcmp(ir->name, "gl_FragCoord") == 0) {
1054
      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1055
 
1056
      fp->OriginUpperLeft = ir->origin_upper_left;
1057
      fp->PixelCenterInteger = ir->pixel_center_integer;
1058
   }
1059
 
1060
   if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1061
      unsigned int i;
1062
      const ir_state_slot *const slots = ir->state_slots;
1063
      assert(ir->state_slots != NULL);
1064
 
1065
      /* Check if this statevar's setup in the STATE file exactly
1066
       * matches how we'll want to reference it as a
1067
       * struct/array/whatever.  If not, then we need to move it into
1068
       * temporary storage and hope that it'll get copy-propagated
1069
       * out.
1070
       */
1071
      for (i = 0; i < ir->num_state_slots; i++) {
1072
         if (slots[i].swizzle != SWIZZLE_XYZW) {
1073
            break;
1074
         }
1075
      }
1076
 
1077
      variable_storage *storage;
1078
      st_dst_reg dst;
1079
      if (i == ir->num_state_slots) {
1080
         /* We'll set the index later. */
1081
         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1082
         this->variables.push_tail(storage);
1083
 
1084
         dst = undef_dst;
1085
      } else {
1086
         /* The variable_storage constructor allocates slots based on the size
1087
          * of the type.  However, this had better match the number of state
1088
          * elements that we're going to copy into the new temporary.
1089
          */
1090
         assert((int) ir->num_state_slots == type_size(ir->type));
1091
 
1092
         dst = st_dst_reg(get_temp(ir->type));
1093
 
1094
         storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
1095
 
1096
         this->variables.push_tail(storage);
1097
      }
1098
 
1099
 
1100
      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1101
         int index = _mesa_add_state_reference(this->prog->Parameters,
1102
        				       (gl_state_index *)slots[i].tokens);
1103
 
1104
         if (storage->file == PROGRAM_STATE_VAR) {
1105
            if (storage->index == -1) {
1106
               storage->index = index;
1107
            } else {
1108
               assert(index == storage->index + (int)i);
1109
            }
1110
         } else {
1111
         	/* We use GLSL_TYPE_FLOAT here regardless of the actual type of
1112
         	 * the data being moved since MOV does not care about the type of
1113
         	 * data it is moving, and we don't want to declare registers with
1114
         	 * array or struct types.
1115
         	 */
1116
            st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
1117
            src.swizzle = slots[i].swizzle;
1118
            emit(ir, TGSI_OPCODE_MOV, dst, src);
1119
            /* even a float takes up a whole vec4 reg in a struct/array. */
1120
            dst.index++;
1121
         }
1122
      }
1123
 
1124
      if (storage->file == PROGRAM_TEMPORARY &&
1125
          dst.index != storage->index + (int) ir->num_state_slots) {
1126
         fail_link(this->shader_program,
1127
        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
1128
        	   ir->name, dst.index - storage->index,
1129
        	   type_size(ir->type));
1130
      }
1131
   }
1132
}
1133
 
1134
void
1135
glsl_to_tgsi_visitor::visit(ir_loop *ir)
1136
{
1137
   ir_dereference_variable *counter = NULL;
1138
 
1139
   if (ir->counter != NULL)
1140
      counter = new(ir) ir_dereference_variable(ir->counter);
1141
 
1142
   if (ir->from != NULL) {
1143
      assert(ir->counter != NULL);
1144
 
1145
      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
1146
 
1147
      a->accept(this);
1148
      delete a;
1149
   }
1150
 
1151
   emit(NULL, TGSI_OPCODE_BGNLOOP);
1152
 
1153
   if (ir->to) {
1154
      ir_expression *e =
1155
         new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1156
        		       counter, ir->to);
1157
      ir_if *if_stmt =  new(ir) ir_if(e);
1158
 
1159
      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1160
 
1161
      if_stmt->then_instructions.push_tail(brk);
1162
 
1163
      if_stmt->accept(this);
1164
 
1165
      delete if_stmt;
1166
      delete e;
1167
      delete brk;
1168
   }
1169
 
1170
   visit_exec_list(&ir->body_instructions, this);
1171
 
1172
   if (ir->increment) {
1173
      ir_expression *e =
1174
         new(ir) ir_expression(ir_binop_add, counter->type,
1175
        		       counter, ir->increment);
1176
 
1177
      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1178
 
1179
      a->accept(this);
1180
      delete a;
1181
      delete e;
1182
   }
1183
 
1184
   emit(NULL, TGSI_OPCODE_ENDLOOP);
1185
}
1186
 
1187
void
1188
glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1189
{
1190
   switch (ir->mode) {
1191
   case ir_loop_jump::jump_break:
1192
      emit(NULL, TGSI_OPCODE_BRK);
1193
      break;
1194
   case ir_loop_jump::jump_continue:
1195
      emit(NULL, TGSI_OPCODE_CONT);
1196
      break;
1197
   }
1198
}
1199
 
1200
 
1201
void
1202
glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1203
{
1204
   assert(0);
1205
   (void)ir;
1206
}
1207
 
1208
void
1209
glsl_to_tgsi_visitor::visit(ir_function *ir)
1210
{
1211
   /* Ignore function bodies other than main() -- we shouldn't see calls to
1212
    * them since they should all be inlined before we get to glsl_to_tgsi.
1213
    */
1214
   if (strcmp(ir->name, "main") == 0) {
1215
      const ir_function_signature *sig;
1216
      exec_list empty;
1217
 
1218
      sig = ir->matching_signature(&empty);
1219
 
1220
      assert(sig);
1221
 
1222
      foreach_iter(exec_list_iterator, iter, sig->body) {
1223
         ir_instruction *ir = (ir_instruction *)iter.get();
1224
 
1225
         ir->accept(this);
1226
      }
1227
   }
1228
}
1229
 
1230
bool
1231
glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1232
{
1233
   int nonmul_operand = 1 - mul_operand;
1234
   st_src_reg a, b, c;
1235
   st_dst_reg result_dst;
1236
 
1237
   ir_expression *expr = ir->operands[mul_operand]->as_expression();
1238
   if (!expr || expr->operation != ir_binop_mul)
1239
      return false;
1240
 
1241
   expr->operands[0]->accept(this);
1242
   a = this->result;
1243
   expr->operands[1]->accept(this);
1244
   b = this->result;
1245
   ir->operands[nonmul_operand]->accept(this);
1246
   c = this->result;
1247
 
1248
   this->result = get_temp(ir->type);
1249
   result_dst = st_dst_reg(this->result);
1250
   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1251
   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1252
 
1253
   return true;
1254
}
1255
 
1256
/**
1257
 * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1258
 *
1259
 * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
1260
 * implemented using multiplication, and logical-or is implemented using
1261
 * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
1262
 * As result, the logical expression (a & !b) can be rewritten as:
1263
 *
1264
 *     - a * !b
1265
 *     - a * (1 - b)
1266
 *     - (a * 1) - (a * b)
1267
 *     - a + -(a * b)
1268
 *     - a + (a * -b)
1269
 *
1270
 * This final expression can be implemented as a single MAD(a, -b, a)
1271
 * instruction.
1272
 */
1273
bool
1274
glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1275
{
1276
   const int other_operand = 1 - try_operand;
1277
   st_src_reg a, b;
1278
 
1279
   ir_expression *expr = ir->operands[try_operand]->as_expression();
1280
   if (!expr || expr->operation != ir_unop_logic_not)
1281
      return false;
1282
 
1283
   ir->operands[other_operand]->accept(this);
1284
   a = this->result;
1285
   expr->operands[0]->accept(this);
1286
   b = this->result;
1287
 
1288
   b.negate = ~b.negate;
1289
 
1290
   this->result = get_temp(ir->type);
1291
   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1292
 
1293
   return true;
1294
}
1295
 
1296
bool
1297
glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1298
{
1299
   /* Emit saturates in the vertex shader only if SM 3.0 is supported.
1300
    */
1301
   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1302
       !st_context(this->ctx)->has_shader_model3) {
1303
      return false;
1304
   }
1305
 
1306
   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1307
   if (!sat_src)
1308
      return false;
1309
 
1310
   sat_src->accept(this);
1311
   st_src_reg src = this->result;
1312
 
1313
   /* If we generated an expression instruction into a temporary in
1314
    * processing the saturate's operand, apply the saturate to that
1315
    * instruction.  Otherwise, generate a MOV to do the saturate.
1316
    *
1317
    * Note that we have to be careful to only do this optimization if
1318
    * the instruction in question was what generated src->result.  For
1319
    * example, ir_dereference_array might generate a MUL instruction
1320
    * to create the reladdr, and return us a src reg using that
1321
    * reladdr.  That MUL result is not the value we're trying to
1322
    * saturate.
1323
    */
1324
   ir_expression *sat_src_expr = sat_src->as_expression();
1325
   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
1326
			sat_src_expr->operation == ir_binop_add ||
1327
			sat_src_expr->operation == ir_binop_dot)) {
1328
      glsl_to_tgsi_instruction *new_inst;
1329
      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
1330
      new_inst->saturate = true;
1331
   } else {
1332
      this->result = get_temp(ir->type);
1333
      st_dst_reg result_dst = st_dst_reg(this->result);
1334
      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1335
      glsl_to_tgsi_instruction *inst;
1336
      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
1337
      inst->saturate = true;
1338
   }
1339
 
1340
   return true;
1341
}
1342
 
1343
void
1344
glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1345
        			    st_src_reg *reg, int *num_reladdr)
1346
{
1347
   if (!reg->reladdr)
1348
      return;
1349
 
1350
   emit_arl(ir, address_reg, *reg->reladdr);
1351
 
1352
   if (*num_reladdr != 1) {
1353
      st_src_reg temp = get_temp(glsl_type::vec4_type);
1354
 
1355
      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1356
      *reg = temp;
1357
   }
1358
 
1359
   (*num_reladdr)--;
1360
}
1361
 
1362
void
1363
glsl_to_tgsi_visitor::visit(ir_expression *ir)
1364
{
1365
   unsigned int operand;
1366
   st_src_reg op[Elements(ir->operands)];
1367
   st_src_reg result_src;
1368
   st_dst_reg result_dst;
1369
 
1370
   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1371
    */
1372
   if (ir->operation == ir_binop_add) {
1373
      if (try_emit_mad(ir, 1))
1374
         return;
1375
      if (try_emit_mad(ir, 0))
1376
         return;
1377
   }
1378
 
1379
   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1380
    */
1381
   if (ir->operation == ir_binop_logic_and) {
1382
      if (try_emit_mad_for_and_not(ir, 1))
1383
	 return;
1384
      if (try_emit_mad_for_and_not(ir, 0))
1385
	 return;
1386
   }
1387
 
1388
   if (try_emit_sat(ir))
1389
      return;
1390
 
1391
   if (ir->operation == ir_quadop_vector)
1392
      assert(!"ir_quadop_vector should have been lowered");
1393
 
1394
   for (operand = 0; operand < ir->get_num_operands(); operand++) {
1395
      this->result.file = PROGRAM_UNDEFINED;
1396
      ir->operands[operand]->accept(this);
1397
      if (this->result.file == PROGRAM_UNDEFINED) {
1398
         printf("Failed to get tree for expression operand:\n");
1399
         ir->operands[operand]->print();
1400
         printf("\n");
1401
         exit(1);
1402
      }
1403
      op[operand] = this->result;
1404
 
1405
      /* Matrix expression operands should have been broken down to vector
1406
       * operations already.
1407
       */
1408
      assert(!ir->operands[operand]->type->is_matrix());
1409
   }
1410
 
1411
   int vector_elements = ir->operands[0]->type->vector_elements;
1412
   if (ir->operands[1]) {
1413
      vector_elements = MAX2(vector_elements,
1414
        		     ir->operands[1]->type->vector_elements);
1415
   }
1416
 
1417
   this->result.file = PROGRAM_UNDEFINED;
1418
 
1419
   /* Storage for our result.  Ideally for an assignment we'd be using
1420
    * the actual storage for the result here, instead.
1421
    */
1422
   result_src = get_temp(ir->type);
1423
   /* convenience for the emit functions below. */
1424
   result_dst = st_dst_reg(result_src);
1425
   /* Limit writes to the channels that will be used by result_src later.
1426
    * This does limit this temp's use as a temporary for multi-instruction
1427
    * sequences.
1428
    */
1429
   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1430
 
1431
   switch (ir->operation) {
1432
   case ir_unop_logic_not:
1433
      if (result_dst.type != GLSL_TYPE_FLOAT)
1434
         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1435
      else {
1436
         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
1437
          * older GPUs implement SEQ using multiple instructions (i915 uses two
1438
          * SGE instructions and a MUL instruction).  Since our logic values are
1439
          * 0.0 and 1.0, 1-x also implements !x.
1440
          */
1441
         op[0].negate = ~op[0].negate;
1442
         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1443
      }
1444
      break;
1445
   case ir_unop_neg:
1446
      if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1447
         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1448
      else {
1449
         op[0].negate = ~op[0].negate;
1450
         result_src = op[0];
1451
      }
1452
      break;
1453
   case ir_unop_abs:
1454
      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1455
      break;
1456
   case ir_unop_sign:
1457
      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1458
      break;
1459
   case ir_unop_rcp:
1460
      emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1461
      break;
1462
 
1463
   case ir_unop_exp2:
1464
      emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1465
      break;
1466
   case ir_unop_exp:
1467
   case ir_unop_log:
1468
      assert(!"not reached: should be handled by ir_explog_to_explog2");
1469
      break;
1470
   case ir_unop_log2:
1471
      emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1472
      break;
1473
   case ir_unop_sin:
1474
      emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1475
      break;
1476
   case ir_unop_cos:
1477
      emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1478
      break;
1479
   case ir_unop_sin_reduced:
1480
      emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1481
      break;
1482
   case ir_unop_cos_reduced:
1483
      emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1484
      break;
1485
 
1486
   case ir_unop_dFdx:
1487
      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1488
      break;
1489
   case ir_unop_dFdy:
1490
   {
1491
      /* The X component contains 1 or -1 depending on whether the framebuffer
1492
       * is a FBO or the window system buffer, respectively.
1493
       * It is then multiplied with the source operand of DDY.
1494
       */
1495
      static const gl_state_index transform_y_state[STATE_LENGTH]
1496
         = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
1497
 
1498
      unsigned transform_y_index =
1499
         _mesa_add_state_reference(this->prog->Parameters,
1500
                                   transform_y_state);
1501
 
1502
      st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
1503
                                          transform_y_index,
1504
                                          glsl_type::vec4_type);
1505
      transform_y.swizzle = SWIZZLE_XXXX;
1506
 
1507
      st_src_reg temp = get_temp(glsl_type::vec4_type);
1508
 
1509
      emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
1510
      emit(ir, TGSI_OPCODE_DDY, result_dst, temp);
1511
      break;
1512
   }
1513
 
1514
   case ir_unop_noise: {
1515
      /* At some point, a motivated person could add a better
1516
       * implementation of noise.  Currently not even the nvidia
1517
       * binary drivers do anything more than this.  In any case, the
1518
       * place to do this is in the GL state tracker, not the poor
1519
       * driver.
1520
       */
1521
      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1522
      break;
1523
   }
1524
 
1525
   case ir_binop_add:
1526
      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1527
      break;
1528
   case ir_binop_sub:
1529
      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1530
      break;
1531
 
1532
   case ir_binop_mul:
1533
      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1534
      break;
1535
   case ir_binop_div:
1536
      if (result_dst.type == GLSL_TYPE_FLOAT)
1537
         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1538
      else
1539
         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1540
      break;
1541
   case ir_binop_mod:
1542
      if (result_dst.type == GLSL_TYPE_FLOAT)
1543
         assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1544
      else
1545
         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1546
      break;
1547
 
1548
   case ir_binop_less:
1549
      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1550
      break;
1551
   case ir_binop_greater:
1552
      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1553
      break;
1554
   case ir_binop_lequal:
1555
      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1556
      break;
1557
   case ir_binop_gequal:
1558
      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1559
      break;
1560
   case ir_binop_equal:
1561
      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1562
      break;
1563
   case ir_binop_nequal:
1564
      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1565
      break;
1566
   case ir_binop_all_equal:
1567
      /* "==" operator producing a scalar boolean. */
1568
      if (ir->operands[0]->type->is_vector() ||
1569
          ir->operands[1]->type->is_vector()) {
1570
         st_src_reg temp = get_temp(native_integers ?
1571
               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1572
               glsl_type::vec4_type);
1573
 
1574
         if (native_integers) {
1575
            st_dst_reg temp_dst = st_dst_reg(temp);
1576
            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1577
 
1578
            emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1579
 
1580
            /* Emit 1-3 AND operations to combine the SEQ results. */
1581
            switch (ir->operands[0]->type->vector_elements) {
1582
            case 2:
1583
               break;
1584
            case 3:
1585
               temp_dst.writemask = WRITEMASK_Y;
1586
               temp1.swizzle = SWIZZLE_YYYY;
1587
               temp2.swizzle = SWIZZLE_ZZZZ;
1588
               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1589
               break;
1590
            case 4:
1591
               temp_dst.writemask = WRITEMASK_X;
1592
               temp1.swizzle = SWIZZLE_XXXX;
1593
               temp2.swizzle = SWIZZLE_YYYY;
1594
               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1595
               temp_dst.writemask = WRITEMASK_Y;
1596
               temp1.swizzle = SWIZZLE_ZZZZ;
1597
               temp2.swizzle = SWIZZLE_WWWW;
1598
               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1599
            }
1600
 
1601
            temp1.swizzle = SWIZZLE_XXXX;
1602
            temp2.swizzle = SWIZZLE_YYYY;
1603
            emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1604
         } else {
1605
            emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1606
 
1607
            /* After the dot-product, the value will be an integer on the
1608
             * range [0,4].  Zero becomes 1.0, and positive values become zero.
1609
             */
1610
            emit_dp(ir, result_dst, temp, temp, vector_elements);
1611
 
1612
            /* Negating the result of the dot-product gives values on the range
1613
             * [-4, 0].  Zero becomes 1.0, and negative values become zero.
1614
             * This is achieved using SGE.
1615
             */
1616
            st_src_reg sge_src = result_src;
1617
            sge_src.negate = ~sge_src.negate;
1618
            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1619
         }
1620
      } else {
1621
         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1622
      }
1623
      break;
1624
   case ir_binop_any_nequal:
1625
      /* "!=" operator producing a scalar boolean. */
1626
      if (ir->operands[0]->type->is_vector() ||
1627
          ir->operands[1]->type->is_vector()) {
1628
         st_src_reg temp = get_temp(native_integers ?
1629
               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1630
               glsl_type::vec4_type);
1631
         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1632
 
1633
         if (native_integers) {
1634
            st_dst_reg temp_dst = st_dst_reg(temp);
1635
            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1636
 
1637
            /* Emit 1-3 OR operations to combine the SNE results. */
1638
            switch (ir->operands[0]->type->vector_elements) {
1639
            case 2:
1640
               break;
1641
            case 3:
1642
               temp_dst.writemask = WRITEMASK_Y;
1643
               temp1.swizzle = SWIZZLE_YYYY;
1644
               temp2.swizzle = SWIZZLE_ZZZZ;
1645
               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1646
               break;
1647
            case 4:
1648
               temp_dst.writemask = WRITEMASK_X;
1649
               temp1.swizzle = SWIZZLE_XXXX;
1650
               temp2.swizzle = SWIZZLE_YYYY;
1651
               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1652
               temp_dst.writemask = WRITEMASK_Y;
1653
               temp1.swizzle = SWIZZLE_ZZZZ;
1654
               temp2.swizzle = SWIZZLE_WWWW;
1655
               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1656
            }
1657
 
1658
            temp1.swizzle = SWIZZLE_XXXX;
1659
            temp2.swizzle = SWIZZLE_YYYY;
1660
            emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1661
         } else {
1662
            /* After the dot-product, the value will be an integer on the
1663
             * range [0,4].  Zero stays zero, and positive values become 1.0.
1664
             */
1665
            glsl_to_tgsi_instruction *const dp =
1666
                  emit_dp(ir, result_dst, temp, temp, vector_elements);
1667
            if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1668
               /* The clamping to [0,1] can be done for free in the fragment
1669
                * shader with a saturate.
1670
                */
1671
               dp->saturate = true;
1672
            } else {
1673
               /* Negating the result of the dot-product gives values on the range
1674
                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1675
                * achieved using SLT.
1676
                */
1677
               st_src_reg slt_src = result_src;
1678
               slt_src.negate = ~slt_src.negate;
1679
               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1680
            }
1681
         }
1682
      } else {
1683
         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1684
      }
1685
      break;
1686
 
1687
   case ir_unop_any: {
1688
      assert(ir->operands[0]->type->is_vector());
1689
 
1690
      /* After the dot-product, the value will be an integer on the
1691
       * range [0,4].  Zero stays zero, and positive values become 1.0.
1692
       */
1693
      glsl_to_tgsi_instruction *const dp =
1694
         emit_dp(ir, result_dst, op[0], op[0],
1695
                 ir->operands[0]->type->vector_elements);
1696
      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1697
          result_dst.type == GLSL_TYPE_FLOAT) {
1698
	      /* The clamping to [0,1] can be done for free in the fragment
1699
	       * shader with a saturate.
1700
	       */
1701
	      dp->saturate = true;
1702
      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1703
	      /* Negating the result of the dot-product gives values on the range
1704
	       * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
1705
	       * is achieved using SLT.
1706
	       */
1707
	      st_src_reg slt_src = result_src;
1708
	      slt_src.negate = ~slt_src.negate;
1709
	      emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1710
      }
1711
      else {
1712
         /* Use SNE 0 if integers are being used as boolean values. */
1713
         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1714
      }
1715
      break;
1716
   }
1717
 
1718
   case ir_binop_logic_xor:
1719
      if (native_integers)
1720
         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1721
      else
1722
         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1723
      break;
1724
 
1725
   case ir_binop_logic_or: {
1726
      if (native_integers) {
1727
         /* If integers are used as booleans, we can use an actual "or"
1728
          * instruction.
1729
          */
1730
         assert(native_integers);
1731
         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1732
      } else {
1733
         /* After the addition, the value will be an integer on the
1734
          * range [0,2].  Zero stays zero, and positive values become 1.0.
1735
          */
1736
         glsl_to_tgsi_instruction *add =
1737
            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1738
         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1739
            /* The clamping to [0,1] can be done for free in the fragment
1740
             * shader with a saturate if floats are being used as boolean values.
1741
             */
1742
            add->saturate = true;
1743
         } else {
1744
            /* Negating the result of the addition gives values on the range
1745
             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
1746
             * is achieved using SLT.
1747
             */
1748
            st_src_reg slt_src = result_src;
1749
            slt_src.negate = ~slt_src.negate;
1750
            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1751
         }
1752
      }
1753
      break;
1754
   }
1755
 
1756
   case ir_binop_logic_and:
1757
      /* If native integers are disabled, the bool args are stored as float 0.0
1758
       * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
1759
       * actual AND opcode.
1760
       */
1761
      if (native_integers)
1762
         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1763
      else
1764
         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1765
      break;
1766
 
1767
   case ir_binop_dot:
1768
      assert(ir->operands[0]->type->is_vector());
1769
      assert(ir->operands[0]->type == ir->operands[1]->type);
1770
      emit_dp(ir, result_dst, op[0], op[1],
1771
              ir->operands[0]->type->vector_elements);
1772
      break;
1773
 
1774
   case ir_unop_sqrt:
1775
      if (have_sqrt) {
1776
         emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
1777
      }
1778
      else {
1779
         /* sqrt(x) = x * rsq(x). */
1780
         emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1781
         emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1782
         /* For incoming channels <= 0, set the result to 0. */
1783
         op[0].negate = ~op[0].negate;
1784
         emit(ir, TGSI_OPCODE_CMP, result_dst,
1785
              op[0], result_src, st_src_reg_for_float(0.0));
1786
      }
1787
      break;
1788
   case ir_unop_rsq:
1789
      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1790
      break;
1791
   case ir_unop_i2f:
1792
      if (native_integers) {
1793
         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1794
         break;
1795
      }
1796
      /* fallthrough to next case otherwise */
1797
   case ir_unop_b2f:
1798
      if (native_integers) {
1799
         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1800
         break;
1801
      }
1802
      /* fallthrough to next case otherwise */
1803
   case ir_unop_i2u:
1804
   case ir_unop_u2i:
1805
      /* Converting between signed and unsigned integers is a no-op. */
1806
      result_src = op[0];
1807
      break;
1808
   case ir_unop_b2i:
1809
      if (native_integers) {
1810
         /* Booleans are stored as integers using ~0 for true and 0 for false.
1811
          * GLSL requires that int(bool) return 1 for true and 0 for false.
1812
          * This conversion is done with AND, but it could be done with NEG.
1813
          */
1814
         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1815
      } else {
1816
         /* Booleans and integers are both stored as floats when native
1817
          * integers are disabled.
1818
          */
1819
         result_src = op[0];
1820
      }
1821
      break;
1822
   case ir_unop_f2i:
1823
      if (native_integers)
1824
         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1825
      else
1826
         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1827
      break;
1828
   case ir_unop_f2u:
1829
      if (native_integers)
1830
         emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
1831
      else
1832
         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1833
      break;
1834
   case ir_unop_bitcast_f2i:
1835
      result_src = op[0];
1836
      result_src.type = GLSL_TYPE_INT;
1837
      break;
1838
   case ir_unop_bitcast_f2u:
1839
      result_src = op[0];
1840
      result_src.type = GLSL_TYPE_UINT;
1841
      break;
1842
   case ir_unop_bitcast_i2f:
1843
   case ir_unop_bitcast_u2f:
1844
      result_src = op[0];
1845
      result_src.type = GLSL_TYPE_FLOAT;
1846
      break;
1847
   case ir_unop_f2b:
1848
      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1849
      break;
1850
   case ir_unop_i2b:
1851
      if (native_integers)
1852
         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1853
      else
1854
         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1855
      break;
1856
   case ir_unop_trunc:
1857
      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1858
      break;
1859
   case ir_unop_ceil:
1860
      emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
1861
      break;
1862
   case ir_unop_floor:
1863
      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1864
      break;
1865
   case ir_unop_round_even:
1866
      emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
1867
      break;
1868
   case ir_unop_fract:
1869
      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1870
      break;
1871
 
1872
   case ir_binop_min:
1873
      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1874
      break;
1875
   case ir_binop_max:
1876
      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1877
      break;
1878
   case ir_binop_pow:
1879
      emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1880
      break;
1881
 
1882
   case ir_unop_bit_not:
1883
      if (native_integers) {
1884
         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1885
         break;
1886
      }
1887
   case ir_unop_u2f:
1888
      if (native_integers) {
1889
         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1890
         break;
1891
      }
1892
   case ir_binop_lshift:
1893
      if (native_integers) {
1894
         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
1895
         break;
1896
      }
1897
   case ir_binop_rshift:
1898
      if (native_integers) {
1899
         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
1900
         break;
1901
      }
1902
   case ir_binop_bit_and:
1903
      if (native_integers) {
1904
         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1905
         break;
1906
      }
1907
   case ir_binop_bit_xor:
1908
      if (native_integers) {
1909
         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1910
         break;
1911
      }
1912
   case ir_binop_bit_or:
1913
      if (native_integers) {
1914
         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1915
         break;
1916
      }
1917
 
1918
      assert(!"GLSL 1.30 features unsupported");
1919
      break;
1920
 
1921
   case ir_binop_ubo_load: {
1922
      ir_constant *uniform_block = ir->operands[0]->as_constant();
1923
      ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1924
      unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
1925
      st_src_reg index_reg = get_temp(glsl_type::uint_type);
1926
      st_src_reg cbuf;
1927
 
1928
      cbuf.type = glsl_type::vec4_type->base_type;
1929
      cbuf.file = PROGRAM_CONSTANT;
1930
      cbuf.index = 0;
1931
      cbuf.index2D = uniform_block->value.u[0] + 1;
1932
      cbuf.reladdr = NULL;
1933
      cbuf.negate = 0;
1934
 
1935
      assert(ir->type->is_vector() || ir->type->is_scalar());
1936
 
1937
      if (const_offset_ir) {
1938
         index_reg = st_src_reg_for_int(const_offset / 16);
1939
      } else {
1940
         emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4));
1941
      }
1942
 
1943
      cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
1944
      cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
1945
                                    const_offset % 16 / 4,
1946
                                    const_offset % 16 / 4,
1947
                                    const_offset % 16 / 4);
1948
 
1949
      cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
1950
      memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
1951
 
1952
      if (ir->type->base_type == GLSL_TYPE_BOOL) {
1953
         emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
1954
      } else {
1955
         emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
1956
      }
1957
      break;
1958
   }
1959
   case ir_triop_lrp:
1960
      /* note: we have to reorder the three args here */
1961
      emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
1962
      break;
1963
   case ir_unop_pack_snorm_2x16:
1964
   case ir_unop_pack_unorm_2x16:
1965
   case ir_unop_pack_half_2x16:
1966
   case ir_unop_pack_snorm_4x8:
1967
   case ir_unop_pack_unorm_4x8:
1968
   case ir_unop_unpack_snorm_2x16:
1969
   case ir_unop_unpack_unorm_2x16:
1970
   case ir_unop_unpack_half_2x16:
1971
   case ir_unop_unpack_half_2x16_split_x:
1972
   case ir_unop_unpack_half_2x16_split_y:
1973
   case ir_unop_unpack_snorm_4x8:
1974
   case ir_unop_unpack_unorm_4x8:
1975
   case ir_binop_pack_half_2x16_split:
1976
   case ir_unop_bitfield_reverse:
1977
   case ir_unop_bit_count:
1978
   case ir_unop_find_msb:
1979
   case ir_unop_find_lsb:
1980
   case ir_binop_bfm:
1981
   case ir_triop_bfi:
1982
   case ir_triop_bitfield_extract:
1983
   case ir_quadop_bitfield_insert:
1984
   case ir_quadop_vector:
1985
   case ir_binop_vector_extract:
1986
   case ir_triop_vector_insert:
1987
      /* This operation is not supported, or should have already been handled.
1988
       */
1989
      assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
1990
      break;
1991
   }
1992
 
1993
   this->result = result_src;
1994
}
1995
 
1996
 
1997
void
1998
glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1999
{
2000
   st_src_reg src;
2001
   int i;
2002
   int swizzle[4];
2003
 
2004
   /* Note that this is only swizzles in expressions, not those on the left
2005
    * hand side of an assignment, which do write masking.  See ir_assignment
2006
    * for that.
2007
    */
2008
 
2009
   ir->val->accept(this);
2010
   src = this->result;
2011
   assert(src.file != PROGRAM_UNDEFINED);
2012
 
2013
   for (i = 0; i < 4; i++) {
2014
      if (i < ir->type->vector_elements) {
2015
         switch (i) {
2016
         case 0:
2017
            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
2018
            break;
2019
         case 1:
2020
            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
2021
            break;
2022
         case 2:
2023
            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
2024
            break;
2025
         case 3:
2026
            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
2027
            break;
2028
         }
2029
      } else {
2030
         /* If the type is smaller than a vec4, replicate the last
2031
          * channel out.
2032
          */
2033
         swizzle[i] = swizzle[ir->type->vector_elements - 1];
2034
      }
2035
   }
2036
 
2037
   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2038
 
2039
   this->result = src;
2040
}
2041
 
2042
void
2043
glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
2044
{
2045
   variable_storage *entry = find_variable_storage(ir->var);
2046
   ir_variable *var = ir->var;
2047
 
2048
   if (!entry) {
2049
      switch (var->mode) {
2050
      case ir_var_uniform:
2051
         entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
2052
        				       var->location);
2053
         this->variables.push_tail(entry);
2054
         break;
2055
      case ir_var_shader_in:
2056
         /* The linker assigns locations for varyings and attributes,
2057
          * including deprecated builtins (like gl_Color), user-assign
2058
          * generic attributes (glBindVertexLocation), and
2059
          * user-defined varyings.
2060
          */
2061
         assert(var->location != -1);
2062
         entry = new(mem_ctx) variable_storage(var,
2063
                                               PROGRAM_INPUT,
2064
                                               var->location);
2065
         break;
2066
      case ir_var_shader_out:
2067
         assert(var->location != -1);
2068
         entry = new(mem_ctx) variable_storage(var,
2069
                                               PROGRAM_OUTPUT,
2070
                                               var->location + var->index);
2071
         break;
2072
      case ir_var_system_value:
2073
         entry = new(mem_ctx) variable_storage(var,
2074
                                               PROGRAM_SYSTEM_VALUE,
2075
                                               var->location);
2076
         break;
2077
      case ir_var_auto:
2078
      case ir_var_temporary:
2079
         st_src_reg src = get_temp(var->type);
2080
 
2081
         entry = new(mem_ctx) variable_storage(var, src.file, src.index);
2082
         this->variables.push_tail(entry);
2083
 
2084
         break;
2085
      }
2086
 
2087
      if (!entry) {
2088
         printf("Failed to make storage for %s\n", var->name);
2089
         exit(1);
2090
      }
2091
   }
2092
 
2093
   this->result = st_src_reg(entry->file, entry->index, var->type);
2094
   if (!native_integers)
2095
      this->result.type = GLSL_TYPE_FLOAT;
2096
}
2097
 
2098
void
2099
glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
2100
{
2101
   ir_constant *index;
2102
   st_src_reg src;
2103
   int element_size = type_size(ir->type);
2104
 
2105
   index = ir->array_index->constant_expression_value();
2106
 
2107
   ir->array->accept(this);
2108
   src = this->result;
2109
 
2110
   if (index) {
2111
      src.index += index->value.i[0] * element_size;
2112
   } else {
2113
      /* Variable index array dereference.  It eats the "vec4" of the
2114
       * base of the array and an index that offsets the TGSI register
2115
       * index.
2116
       */
2117
      ir->array_index->accept(this);
2118
 
2119
      st_src_reg index_reg;
2120
 
2121
      if (element_size == 1) {
2122
         index_reg = this->result;
2123
      } else {
2124
         index_reg = get_temp(native_integers ?
2125
                              glsl_type::int_type : glsl_type::float_type);
2126
 
2127
         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
2128
              this->result, st_src_reg_for_type(index_reg.type, element_size));
2129
      }
2130
 
2131
      /* If there was already a relative address register involved, add the
2132
       * new and the old together to get the new offset.
2133
       */
2134
      if (src.reladdr != NULL) {
2135
         st_src_reg accum_reg = get_temp(native_integers ?
2136
                                glsl_type::int_type : glsl_type::float_type);
2137
 
2138
         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2139
              index_reg, *src.reladdr);
2140
 
2141
         index_reg = accum_reg;
2142
      }
2143
 
2144
      src.reladdr = ralloc(mem_ctx, st_src_reg);
2145
      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2146
   }
2147
 
2148
   /* If the type is smaller than a vec4, replicate the last channel out. */
2149
   if (ir->type->is_scalar() || ir->type->is_vector())
2150
      src.swizzle = swizzle_for_size(ir->type->vector_elements);
2151
   else
2152
      src.swizzle = SWIZZLE_NOOP;
2153
 
2154
   /* Change the register type to the element type of the array. */
2155
   src.type = ir->type->base_type;
2156
 
2157
   this->result = src;
2158
}
2159
 
2160
void
2161
glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2162
{
2163
   unsigned int i;
2164
   const glsl_type *struct_type = ir->record->type;
2165
   int offset = 0;
2166
 
2167
   ir->record->accept(this);
2168
 
2169
   for (i = 0; i < struct_type->length; i++) {
2170
      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2171
         break;
2172
      offset += type_size(struct_type->fields.structure[i].type);
2173
   }
2174
 
2175
   /* If the type is smaller than a vec4, replicate the last channel out. */
2176
   if (ir->type->is_scalar() || ir->type->is_vector())
2177
      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2178
   else
2179
      this->result.swizzle = SWIZZLE_NOOP;
2180
 
2181
   this->result.index += offset;
2182
   this->result.type = ir->type->base_type;
2183
}
2184
 
2185
/**
2186
 * We want to be careful in assignment setup to hit the actual storage
2187
 * instead of potentially using a temporary like we might with the
2188
 * ir_dereference handler.
2189
 */
2190
static st_dst_reg
2191
get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
2192
{
2193
   /* The LHS must be a dereference.  If the LHS is a variable indexed array
2194
    * access of a vector, it must be separated into a series conditional moves
2195
    * before reaching this point (see ir_vec_index_to_cond_assign).
2196
    */
2197
   assert(ir->as_dereference());
2198
   ir_dereference_array *deref_array = ir->as_dereference_array();
2199
   if (deref_array) {
2200
      assert(!deref_array->array->type->is_vector());
2201
   }
2202
 
2203
   /* Use the rvalue deref handler for the most part.  We'll ignore
2204
    * swizzles in it and write swizzles using writemask, though.
2205
    */
2206
   ir->accept(v);
2207
   return st_dst_reg(v->result);
2208
}
2209
 
2210
/**
2211
 * Process the condition of a conditional assignment
2212
 *
2213
 * Examines the condition of a conditional assignment to generate the optimal
2214
 * first operand of a \c CMP instruction.  If the condition is a relational
2215
 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2216
 * used as the source for the \c CMP instruction.  Otherwise the comparison
2217
 * is processed to a boolean result, and the boolean result is used as the
2218
 * operand to the CMP instruction.
2219
 */
2220
bool
2221
glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2222
{
2223
   ir_rvalue *src_ir = ir;
2224
   bool negate = true;
2225
   bool switch_order = false;
2226
 
2227
   ir_expression *const expr = ir->as_expression();
2228
   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2229
      bool zero_on_left = false;
2230
 
2231
      if (expr->operands[0]->is_zero()) {
2232
         src_ir = expr->operands[1];
2233
         zero_on_left = true;
2234
      } else if (expr->operands[1]->is_zero()) {
2235
         src_ir = expr->operands[0];
2236
         zero_on_left = false;
2237
      }
2238
 
2239
      /*      a is -  0  +            -  0  +
2240
       * (a <  0)  T  F  F  ( a < 0)  T  F  F
2241
       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
2242
       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2243
       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2244
       * (a >  0)  F  F  T  (-a < 0)  F  F  T
2245
       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
2246
       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
2247
       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
2248
       *
2249
       * Note that exchanging the order of 0 and 'a' in the comparison simply
2250
       * means that the value of 'a' should be negated.
2251
       */
2252
      if (src_ir != ir) {
2253
         switch (expr->operation) {
2254
         case ir_binop_less:
2255
            switch_order = false;
2256
            negate = zero_on_left;
2257
            break;
2258
 
2259
         case ir_binop_greater:
2260
            switch_order = false;
2261
            negate = !zero_on_left;
2262
            break;
2263
 
2264
         case ir_binop_lequal:
2265
            switch_order = true;
2266
            negate = !zero_on_left;
2267
            break;
2268
 
2269
         case ir_binop_gequal:
2270
            switch_order = true;
2271
            negate = zero_on_left;
2272
            break;
2273
 
2274
         default:
2275
            /* This isn't the right kind of comparison afterall, so make sure
2276
             * the whole condition is visited.
2277
             */
2278
            src_ir = ir;
2279
            break;
2280
         }
2281
      }
2282
   }
2283
 
2284
   src_ir->accept(this);
2285
 
2286
   /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2287
    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
2288
    * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2289
    * computing the condition.
2290
    */
2291
   if (negate)
2292
      this->result.negate = ~this->result.negate;
2293
 
2294
   return switch_order;
2295
}
2296
 
2297
void
2298
glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
2299
                                     st_dst_reg *l, st_src_reg *r)
2300
{
2301
   if (type->base_type == GLSL_TYPE_STRUCT) {
2302
      for (unsigned int i = 0; i < type->length; i++) {
2303
         emit_block_mov(ir, type->fields.structure[i].type, l, r);
2304
      }
2305
      return;
2306
   }
2307
 
2308
   if (type->is_array()) {
2309
      for (unsigned int i = 0; i < type->length; i++) {
2310
         emit_block_mov(ir, type->fields.array, l, r);
2311
      }
2312
      return;
2313
   }
2314
 
2315
   if (type->is_matrix()) {
2316
      const struct glsl_type *vec_type;
2317
 
2318
      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
2319
					 type->vector_elements, 1);
2320
 
2321
      for (int i = 0; i < type->matrix_columns; i++) {
2322
         emit_block_mov(ir, vec_type, l, r);
2323
      }
2324
      return;
2325
   }
2326
 
2327
   assert(type->is_scalar() || type->is_vector());
2328
 
2329
   r->type = type->base_type;
2330
   emit(ir, TGSI_OPCODE_MOV, *l, *r);
2331
   l->index++;
2332
   r->index++;
2333
}
2334
 
2335
void
2336
glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2337
{
2338
   st_dst_reg l;
2339
   st_src_reg r;
2340
   int i;
2341
 
2342
   ir->rhs->accept(this);
2343
   r = this->result;
2344
 
2345
   l = get_assignment_lhs(ir->lhs, this);
2346
 
2347
   /* FINISHME: This should really set to the correct maximal writemask for each
2348
    * FINISHME: component written (in the loops below).  This case can only
2349
    * FINISHME: occur for matrices, arrays, and structures.
2350
    */
2351
   if (ir->write_mask == 0) {
2352
      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2353
      l.writemask = WRITEMASK_XYZW;
2354
   } else if (ir->lhs->type->is_scalar() &&
2355
              ir->lhs->variable_referenced()->mode == ir_var_shader_out) {
2356
      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2357
       * FINISHME: W component of fragment shader output zero, work correctly.
2358
       */
2359
      l.writemask = WRITEMASK_XYZW;
2360
   } else {
2361
      int swizzles[4];
2362
      int first_enabled_chan = 0;
2363
      int rhs_chan = 0;
2364
 
2365
      l.writemask = ir->write_mask;
2366
 
2367
      for (int i = 0; i < 4; i++) {
2368
         if (l.writemask & (1 << i)) {
2369
            first_enabled_chan = GET_SWZ(r.swizzle, i);
2370
            break;
2371
         }
2372
      }
2373
 
2374
      /* Swizzle a small RHS vector into the channels being written.
2375
       *
2376
       * glsl ir treats write_mask as dictating how many channels are
2377
       * present on the RHS while TGSI treats write_mask as just
2378
       * showing which channels of the vec4 RHS get written.
2379
       */
2380
      for (int i = 0; i < 4; i++) {
2381
         if (l.writemask & (1 << i))
2382
            swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2383
         else
2384
            swizzles[i] = first_enabled_chan;
2385
      }
2386
      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2387
        			swizzles[2], swizzles[3]);
2388
   }
2389
 
2390
   assert(l.file != PROGRAM_UNDEFINED);
2391
   assert(r.file != PROGRAM_UNDEFINED);
2392
 
2393
   if (ir->condition) {
2394
      const bool switch_order = this->process_move_condition(ir->condition);
2395
      st_src_reg condition = this->result;
2396
 
2397
      for (i = 0; i < type_size(ir->lhs->type); i++) {
2398
         st_src_reg l_src = st_src_reg(l);
2399
         st_src_reg condition_temp = condition;
2400
         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
2401
 
2402
         if (native_integers) {
2403
            /* This is necessary because TGSI's CMP instruction expects the
2404
             * condition to be a float, and we store booleans as integers.
2405
             * TODO: really want to avoid i2f path and use UCMP. Requires
2406
             * changes to process_move_condition though too.
2407
             */
2408
            condition_temp = get_temp(glsl_type::vec4_type);
2409
            condition.negate = 0;
2410
            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
2411
            condition_temp.swizzle = condition.swizzle;
2412
         }
2413
 
2414
         if (switch_order) {
2415
            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
2416
         } else {
2417
            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
2418
         }
2419
 
2420
         l.index++;
2421
         r.index++;
2422
      }
2423
   } else if (ir->rhs->as_expression() &&
2424
              this->instructions.get_tail() &&
2425
              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2426
              type_size(ir->lhs->type) == 1 &&
2427
              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
2428
      /* To avoid emitting an extra MOV when assigning an expression to a
2429
       * variable, emit the last instruction of the expression again, but
2430
       * replace the destination register with the target of the assignment.
2431
       * Dead code elimination will remove the original instruction.
2432
       */
2433
      glsl_to_tgsi_instruction *inst, *new_inst;
2434
      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2435
      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
2436
      new_inst->saturate = inst->saturate;
2437
      inst->dead_mask = inst->dst.writemask;
2438
   } else {
2439
      emit_block_mov(ir, ir->rhs->type, &l, &r);
2440
   }
2441
}
2442
 
2443
 
2444
void
2445
glsl_to_tgsi_visitor::visit(ir_constant *ir)
2446
{
2447
   st_src_reg src;
2448
   GLfloat stack_vals[4] = { 0 };
2449
   gl_constant_value *values = (gl_constant_value *) stack_vals;
2450
   GLenum gl_type = GL_NONE;
2451
   unsigned int i;
2452
   static int in_array = 0;
2453
   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2454
 
2455
   /* Unfortunately, 4 floats is all we can get into
2456
    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
2457
    * aggregate constant and move each constant value into it.  If we
2458
    * get lucky, copy propagation will eliminate the extra moves.
2459
    */
2460
   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2461
      st_src_reg temp_base = get_temp(ir->type);
2462
      st_dst_reg temp = st_dst_reg(temp_base);
2463
 
2464
      foreach_iter(exec_list_iterator, iter, ir->components) {
2465
         ir_constant *field_value = (ir_constant *)iter.get();
2466
         int size = type_size(field_value->type);
2467
 
2468
         assert(size > 0);
2469
 
2470
         field_value->accept(this);
2471
         src = this->result;
2472
 
2473
         for (i = 0; i < (unsigned int)size; i++) {
2474
            emit(ir, TGSI_OPCODE_MOV, temp, src);
2475
 
2476
            src.index++;
2477
            temp.index++;
2478
         }
2479
      }
2480
      this->result = temp_base;
2481
      return;
2482
   }
2483
 
2484
   if (ir->type->is_array()) {
2485
      st_src_reg temp_base = get_temp(ir->type);
2486
      st_dst_reg temp = st_dst_reg(temp_base);
2487
      int size = type_size(ir->type->fields.array);
2488
 
2489
      assert(size > 0);
2490
      in_array++;
2491
 
2492
      for (i = 0; i < ir->type->length; i++) {
2493
         ir->array_elements[i]->accept(this);
2494
         src = this->result;
2495
         for (int j = 0; j < size; j++) {
2496
            emit(ir, TGSI_OPCODE_MOV, temp, src);
2497
 
2498
            src.index++;
2499
            temp.index++;
2500
         }
2501
      }
2502
      this->result = temp_base;
2503
      in_array--;
2504
      return;
2505
   }
2506
 
2507
   if (ir->type->is_matrix()) {
2508
      st_src_reg mat = get_temp(ir->type);
2509
      st_dst_reg mat_column = st_dst_reg(mat);
2510
 
2511
      for (i = 0; i < ir->type->matrix_columns; i++) {
2512
         assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2513
         values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2514
 
2515
         src = st_src_reg(file, -1, ir->type->base_type);
2516
         src.index = add_constant(file,
2517
                                  values,
2518
                                  ir->type->vector_elements,
2519
                                  GL_FLOAT,
2520
                                  &src.swizzle);
2521
         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
2522
 
2523
         mat_column.index++;
2524
      }
2525
 
2526
      this->result = mat;
2527
      return;
2528
   }
2529
 
2530
   switch (ir->type->base_type) {
2531
   case GLSL_TYPE_FLOAT:
2532
      gl_type = GL_FLOAT;
2533
      for (i = 0; i < ir->type->vector_elements; i++) {
2534
         values[i].f = ir->value.f[i];
2535
      }
2536
      break;
2537
   case GLSL_TYPE_UINT:
2538
      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
2539
      for (i = 0; i < ir->type->vector_elements; i++) {
2540
         if (native_integers)
2541
            values[i].u = ir->value.u[i];
2542
         else
2543
            values[i].f = ir->value.u[i];
2544
      }
2545
      break;
2546
   case GLSL_TYPE_INT:
2547
      gl_type = native_integers ? GL_INT : GL_FLOAT;
2548
      for (i = 0; i < ir->type->vector_elements; i++) {
2549
         if (native_integers)
2550
            values[i].i = ir->value.i[i];
2551
         else
2552
            values[i].f = ir->value.i[i];
2553
      }
2554
      break;
2555
   case GLSL_TYPE_BOOL:
2556
      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
2557
      for (i = 0; i < ir->type->vector_elements; i++) {
2558
         if (native_integers)
2559
            values[i].u = ir->value.b[i] ? ~0 : 0;
2560
         else
2561
            values[i].f = ir->value.b[i];
2562
      }
2563
      break;
2564
   default:
2565
      assert(!"Non-float/uint/int/bool constant");
2566
   }
2567
 
2568
   this->result = st_src_reg(file, -1, ir->type);
2569
   this->result.index = add_constant(file,
2570
                                     values,
2571
                                     ir->type->vector_elements,
2572
                                     gl_type,
2573
                                     &this->result.swizzle);
2574
}
2575
 
2576
function_entry *
2577
glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2578
{
2579
   function_entry *entry;
2580
 
2581
   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2582
      entry = (function_entry *)iter.get();
2583
 
2584
      if (entry->sig == sig)
2585
         return entry;
2586
   }
2587
 
2588
   entry = ralloc(mem_ctx, function_entry);
2589
   entry->sig = sig;
2590
   entry->sig_id = this->next_signature_id++;
2591
   entry->bgn_inst = NULL;
2592
 
2593
   /* Allocate storage for all the parameters. */
2594
   foreach_iter(exec_list_iterator, iter, sig->parameters) {
2595
      ir_variable *param = (ir_variable *)iter.get();
2596
      variable_storage *storage;
2597
 
2598
      storage = find_variable_storage(param);
2599
      assert(!storage);
2600
 
2601
      st_src_reg src = get_temp(param->type);
2602
 
2603
      storage = new(mem_ctx) variable_storage(param, src.file, src.index);
2604
      this->variables.push_tail(storage);
2605
   }
2606
 
2607
   if (!sig->return_type->is_void()) {
2608
      entry->return_reg = get_temp(sig->return_type);
2609
   } else {
2610
      entry->return_reg = undef_src;
2611
   }
2612
 
2613
   this->function_signatures.push_tail(entry);
2614
   return entry;
2615
}
2616
 
2617
void
2618
glsl_to_tgsi_visitor::visit(ir_call *ir)
2619
{
2620
   glsl_to_tgsi_instruction *call_inst;
2621
   ir_function_signature *sig = ir->callee;
2622
   function_entry *entry = get_function_signature(sig);
2623
   int i;
2624
 
2625
   /* Process in parameters. */
2626
   exec_list_iterator sig_iter = sig->parameters.iterator();
2627
   foreach_iter(exec_list_iterator, iter, *ir) {
2628
      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2629
      ir_variable *param = (ir_variable *)sig_iter.get();
2630
 
2631
      if (param->mode == ir_var_function_in ||
2632
          param->mode == ir_var_function_inout) {
2633
         variable_storage *storage = find_variable_storage(param);
2634
         assert(storage);
2635
 
2636
         param_rval->accept(this);
2637
         st_src_reg r = this->result;
2638
 
2639
         st_dst_reg l;
2640
         l.file = storage->file;
2641
         l.index = storage->index;
2642
         l.reladdr = NULL;
2643
         l.writemask = WRITEMASK_XYZW;
2644
         l.cond_mask = COND_TR;
2645
 
2646
         for (i = 0; i < type_size(param->type); i++) {
2647
            emit(ir, TGSI_OPCODE_MOV, l, r);
2648
            l.index++;
2649
            r.index++;
2650
         }
2651
      }
2652
 
2653
      sig_iter.next();
2654
   }
2655
   assert(!sig_iter.has_next());
2656
 
2657
   /* Emit call instruction */
2658
   call_inst = emit(ir, TGSI_OPCODE_CAL);
2659
   call_inst->function = entry;
2660
 
2661
   /* Process out parameters. */
2662
   sig_iter = sig->parameters.iterator();
2663
   foreach_iter(exec_list_iterator, iter, *ir) {
2664
      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2665
      ir_variable *param = (ir_variable *)sig_iter.get();
2666
 
2667
      if (param->mode == ir_var_function_out ||
2668
          param->mode == ir_var_function_inout) {
2669
         variable_storage *storage = find_variable_storage(param);
2670
         assert(storage);
2671
 
2672
         st_src_reg r;
2673
         r.file = storage->file;
2674
         r.index = storage->index;
2675
         r.reladdr = NULL;
2676
         r.swizzle = SWIZZLE_NOOP;
2677
         r.negate = 0;
2678
 
2679
         param_rval->accept(this);
2680
         st_dst_reg l = st_dst_reg(this->result);
2681
 
2682
         for (i = 0; i < type_size(param->type); i++) {
2683
            emit(ir, TGSI_OPCODE_MOV, l, r);
2684
            l.index++;
2685
            r.index++;
2686
         }
2687
      }
2688
 
2689
      sig_iter.next();
2690
   }
2691
   assert(!sig_iter.has_next());
2692
 
2693
   /* Process return value. */
2694
   this->result = entry->return_reg;
2695
}
2696
 
2697
void
2698
glsl_to_tgsi_visitor::visit(ir_texture *ir)
2699
{
2700
   st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset, sample_index;
2701
   st_dst_reg result_dst, coord_dst, cube_sc_dst;
2702
   glsl_to_tgsi_instruction *inst = NULL;
2703
   unsigned opcode = TGSI_OPCODE_NOP;
2704
   const glsl_type *sampler_type = ir->sampler->type;
2705
   bool is_cube_array = false;
2706
 
2707
   /* if we are a cube array sampler */
2708
   if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
2709
        sampler_type->sampler_array)) {
2710
      is_cube_array = true;
2711
   }
2712
 
2713
   if (ir->coordinate) {
2714
      ir->coordinate->accept(this);
2715
 
2716
      /* Put our coords in a temp.  We'll need to modify them for shadow,
2717
       * projection, or LOD, so the only case we'd use it as is is if
2718
       * we're doing plain old texturing.  The optimization passes on
2719
       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
2720
       */
2721
      coord = get_temp(glsl_type::vec4_type);
2722
      coord_dst = st_dst_reg(coord);
2723
      coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
2724
      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2725
   }
2726
 
2727
   if (ir->projector) {
2728
      ir->projector->accept(this);
2729
      projector = this->result;
2730
   }
2731
 
2732
   /* Storage for our result.  Ideally for an assignment we'd be using
2733
    * the actual storage for the result here, instead.
2734
    */
2735
   result_src = get_temp(ir->type);
2736
   result_dst = st_dst_reg(result_src);
2737
 
2738
   switch (ir->op) {
2739
   case ir_tex:
2740
      opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
2741
      if (ir->offset) {
2742
         ir->offset->accept(this);
2743
         offset = this->result;
2744
      }
2745
      break;
2746
   case ir_txb:
2747
      opcode = is_cube_array ? TGSI_OPCODE_TXB2 : TGSI_OPCODE_TXB;
2748
      ir->lod_info.bias->accept(this);
2749
      lod_info = this->result;
2750
      if (ir->offset) {
2751
         ir->offset->accept(this);
2752
         offset = this->result;
2753
      }
2754
      break;
2755
   case ir_txl:
2756
      opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
2757
      ir->lod_info.lod->accept(this);
2758
      lod_info = this->result;
2759
      if (ir->offset) {
2760
         ir->offset->accept(this);
2761
         offset = this->result;
2762
      }
2763
      break;
2764
   case ir_txd:
2765
      opcode = TGSI_OPCODE_TXD;
2766
      ir->lod_info.grad.dPdx->accept(this);
2767
      dx = this->result;
2768
      ir->lod_info.grad.dPdy->accept(this);
2769
      dy = this->result;
2770
      if (ir->offset) {
2771
         ir->offset->accept(this);
2772
         offset = this->result;
2773
      }
2774
      break;
2775
   case ir_txs:
2776
      opcode = TGSI_OPCODE_TXQ;
2777
      ir->lod_info.lod->accept(this);
2778
      lod_info = this->result;
2779
      break;
2780
   case ir_txf:
2781
      opcode = TGSI_OPCODE_TXF;
2782
      ir->lod_info.lod->accept(this);
2783
      lod_info = this->result;
2784
      if (ir->offset) {
2785
         ir->offset->accept(this);
2786
         offset = this->result;
2787
      }
2788
      break;
2789
   case ir_txf_ms:
2790
      opcode = TGSI_OPCODE_TXF;
2791
      ir->lod_info.sample_index->accept(this);
2792
      sample_index = this->result;
2793
      break;
2794
   case ir_lod:
2795
      assert(!"Unexpected ir_lod opcode");
2796
      break;
2797
   }
2798
 
2799
   if (ir->projector) {
2800
      if (opcode == TGSI_OPCODE_TEX) {
2801
         /* Slot the projector in as the last component of the coord. */
2802
         coord_dst.writemask = WRITEMASK_W;
2803
         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2804
         coord_dst.writemask = WRITEMASK_XYZW;
2805
         opcode = TGSI_OPCODE_TXP;
2806
      } else {
2807
         st_src_reg coord_w = coord;
2808
         coord_w.swizzle = SWIZZLE_WWWW;
2809
 
2810
         /* For the other TEX opcodes there's no projective version
2811
          * since the last slot is taken up by LOD info.  Do the
2812
          * projective divide now.
2813
          */
2814
         coord_dst.writemask = WRITEMASK_W;
2815
         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2816
 
2817
         /* In the case where we have to project the coordinates "by hand,"
2818
          * the shadow comparator value must also be projected.
2819
          */
2820
         st_src_reg tmp_src = coord;
2821
         if (ir->shadow_comparitor) {
2822
            /* Slot the shadow value in as the second to last component of the
2823
             * coord.
2824
             */
2825
            ir->shadow_comparitor->accept(this);
2826
 
2827
            tmp_src = get_temp(glsl_type::vec4_type);
2828
            st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2829
 
2830
	    /* Projective division not allowed for array samplers. */
2831
	    assert(!sampler_type->sampler_array);
2832
 
2833
            tmp_dst.writemask = WRITEMASK_Z;
2834
            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2835
 
2836
            tmp_dst.writemask = WRITEMASK_XY;
2837
            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2838
         }
2839
 
2840
         coord_dst.writemask = WRITEMASK_XYZ;
2841
         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2842
 
2843
         coord_dst.writemask = WRITEMASK_XYZW;
2844
         coord.swizzle = SWIZZLE_XYZW;
2845
      }
2846
   }
2847
 
2848
   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2849
    * comparator was put in the correct place (and projected) by the code,
2850
    * above, that handles by-hand projection.
2851
    */
2852
   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2853
      /* Slot the shadow value in as the second to last component of the
2854
       * coord.
2855
       */
2856
      ir->shadow_comparitor->accept(this);
2857
 
2858
      if (is_cube_array) {
2859
         cube_sc = get_temp(glsl_type::float_type);
2860
         cube_sc_dst = st_dst_reg(cube_sc);
2861
         cube_sc_dst.writemask = WRITEMASK_X;
2862
         emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
2863
         cube_sc_dst.writemask = WRITEMASK_X;
2864
      }
2865
      else {
2866
         if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
2867
              sampler_type->sampler_array) ||
2868
             sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
2869
            coord_dst.writemask = WRITEMASK_W;
2870
         } else {
2871
            coord_dst.writemask = WRITEMASK_Z;
2872
         }
2873
 
2874
         emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2875
         coord_dst.writemask = WRITEMASK_XYZW;
2876
      }
2877
   }
2878
 
2879
   if (ir->op == ir_txf_ms) {
2880
      coord_dst.writemask = WRITEMASK_W;
2881
      emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
2882
      coord_dst.writemask = WRITEMASK_XYZW;
2883
   } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
2884
       opcode == TGSI_OPCODE_TXF) {
2885
      /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2886
      coord_dst.writemask = WRITEMASK_W;
2887
      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2888
      coord_dst.writemask = WRITEMASK_XYZW;
2889
   }
2890
 
2891
   if (opcode == TGSI_OPCODE_TXD)
2892
      inst = emit(ir, opcode, result_dst, coord, dx, dy);
2893
   else if (opcode == TGSI_OPCODE_TXQ)
2894
      inst = emit(ir, opcode, result_dst, lod_info);
2895
   else if (opcode == TGSI_OPCODE_TXF) {
2896
      inst = emit(ir, opcode, result_dst, coord);
2897
   } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
2898
      inst = emit(ir, opcode, result_dst, coord, lod_info);
2899
   } else if (opcode == TGSI_OPCODE_TEX2) {
2900
      inst = emit(ir, opcode, result_dst, coord, cube_sc);
2901
   } else
2902
      inst = emit(ir, opcode, result_dst, coord);
2903
 
2904
   if (ir->shadow_comparitor)
2905
      inst->tex_shadow = GL_TRUE;
2906
 
2907
   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2908
        					   this->shader_program,
2909
        					   this->prog);
2910
 
2911
   if (ir->offset) {
2912
       inst->tex_offset_num_offset = 1;
2913
       inst->tex_offsets[0].Index = offset.index;
2914
       inst->tex_offsets[0].File = offset.file;
2915
       inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
2916
       inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
2917
       inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
2918
   }
2919
 
2920
   switch (sampler_type->sampler_dimensionality) {
2921
   case GLSL_SAMPLER_DIM_1D:
2922
      inst->tex_target = (sampler_type->sampler_array)
2923
         ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2924
      break;
2925
   case GLSL_SAMPLER_DIM_2D:
2926
      inst->tex_target = (sampler_type->sampler_array)
2927
         ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2928
      break;
2929
   case GLSL_SAMPLER_DIM_3D:
2930
      inst->tex_target = TEXTURE_3D_INDEX;
2931
      break;
2932
   case GLSL_SAMPLER_DIM_CUBE:
2933
      inst->tex_target = (sampler_type->sampler_array)
2934
         ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
2935
      break;
2936
   case GLSL_SAMPLER_DIM_RECT:
2937
      inst->tex_target = TEXTURE_RECT_INDEX;
2938
      break;
2939
   case GLSL_SAMPLER_DIM_BUF:
2940
      inst->tex_target = TEXTURE_BUFFER_INDEX;
2941
      break;
2942
   case GLSL_SAMPLER_DIM_EXTERNAL:
2943
      inst->tex_target = TEXTURE_EXTERNAL_INDEX;
2944
      break;
2945
   case GLSL_SAMPLER_DIM_MS:
2946
      inst->tex_target = (sampler_type->sampler_array)
2947
         ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
2948
      break;
2949
   default:
2950
      assert(!"Should not get here.");
2951
   }
2952
 
2953
   this->result = result_src;
2954
}
2955
 
2956
void
2957
glsl_to_tgsi_visitor::visit(ir_return *ir)
2958
{
2959
   if (ir->get_value()) {
2960
      st_dst_reg l;
2961
      int i;
2962
 
2963
      assert(current_function);
2964
 
2965
      ir->get_value()->accept(this);
2966
      st_src_reg r = this->result;
2967
 
2968
      l = st_dst_reg(current_function->return_reg);
2969
 
2970
      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2971
         emit(ir, TGSI_OPCODE_MOV, l, r);
2972
         l.index++;
2973
         r.index++;
2974
      }
2975
   }
2976
 
2977
   emit(ir, TGSI_OPCODE_RET);
2978
}
2979
 
2980
void
2981
glsl_to_tgsi_visitor::visit(ir_discard *ir)
2982
{
2983
   if (ir->condition) {
2984
      ir->condition->accept(this);
2985
      this->result.negate = ~this->result.negate;
2986
      emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result);
2987
   } else {
2988
      /* unconditional kil */
2989
      emit(ir, TGSI_OPCODE_KILL);
2990
   }
2991
}
2992
 
2993
void
2994
glsl_to_tgsi_visitor::visit(ir_if *ir)
2995
{
2996
   unsigned if_opcode;
2997
   glsl_to_tgsi_instruction *if_inst;
2998
 
2999
   ir->condition->accept(this);
3000
   assert(this->result.file != PROGRAM_UNDEFINED);
3001
 
3002
   if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
3003
 
3004
   if_inst = emit(ir->condition, if_opcode, undef_dst, this->result);
3005
 
3006
   this->instructions.push_tail(if_inst);
3007
 
3008
   visit_exec_list(&ir->then_instructions, this);
3009
 
3010
   if (!ir->else_instructions.is_empty()) {
3011
      emit(ir->condition, TGSI_OPCODE_ELSE);
3012
      visit_exec_list(&ir->else_instructions, this);
3013
   }
3014
 
3015
   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
3016
}
3017
 
3018
glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
3019
{
3020
   result.file = PROGRAM_UNDEFINED;
3021
   next_temp = 1;
3022
   next_array = 0;
3023
   next_signature_id = 1;
3024
   num_immediates = 0;
3025
   current_function = NULL;
3026
   num_address_regs = 0;
3027
   samplers_used = 0;
3028
   indirect_addr_consts = false;
3029
   glsl_version = 0;
3030
   native_integers = false;
3031
   mem_ctx = ralloc_context(NULL);
3032
   ctx = NULL;
3033
   prog = NULL;
3034
   shader_program = NULL;
3035
   options = NULL;
3036
}
3037
 
3038
glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
3039
{
3040
   ralloc_free(mem_ctx);
3041
}
3042
 
3043
extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
3044
{
3045
   delete v;
3046
}
3047
 
3048
 
3049
/**
3050
 * Count resources used by the given gpu program (number of texture
3051
 * samplers, etc).
3052
 */
3053
static void
3054
count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
3055
{
3056
   v->samplers_used = 0;
3057
 
3058
   foreach_iter(exec_list_iterator, iter, v->instructions) {
3059
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3060
 
3061
      if (is_tex_instruction(inst->op)) {
3062
         v->samplers_used |= 1 << inst->sampler;
3063
 
3064
         if (inst->tex_shadow) {
3065
            prog->ShadowSamplers |= 1 << inst->sampler;
3066
         }
3067
      }
3068
   }
3069
 
3070
   prog->SamplersUsed = v->samplers_used;
3071
 
3072
   if (v->shader_program != NULL)
3073
      _mesa_update_shader_textures_used(v->shader_program, prog);
3074
}
3075
 
3076
static void
3077
set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
3078
        		struct gl_shader_program *shader_program,
3079
        		const char *name, const glsl_type *type,
3080
        		ir_constant *val)
3081
{
3082
   if (type->is_record()) {
3083
      ir_constant *field_constant;
3084
 
3085
      field_constant = (ir_constant *)val->components.get_head();
3086
 
3087
      for (unsigned int i = 0; i < type->length; i++) {
3088
         const glsl_type *field_type = type->fields.structure[i].type;
3089
         const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
3090
        				    type->fields.structure[i].name);
3091
         set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
3092
        			 field_type, field_constant);
3093
         field_constant = (ir_constant *)field_constant->next;
3094
      }
3095
      return;
3096
   }
3097
 
3098
   unsigned offset;
3099
   unsigned index = _mesa_get_uniform_location(ctx, shader_program, name,
3100
					       &offset);
3101
   if (offset == GL_INVALID_INDEX) {
3102
      fail_link(shader_program,
3103
        	"Couldn't find uniform for initializer %s\n", name);
3104
      return;
3105
   }
3106
   int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset);
3107
 
3108
   for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
3109
      ir_constant *element;
3110
      const glsl_type *element_type;
3111
      if (type->is_array()) {
3112
         element = val->array_elements[i];
3113
         element_type = type->fields.array;
3114
      } else {
3115
         element = val;
3116
         element_type = type;
3117
      }
3118
 
3119
      void *values;
3120
 
3121
      if (element_type->base_type == GLSL_TYPE_BOOL) {
3122
         int *conv = ralloc_array(mem_ctx, int, element_type->components());
3123
         for (unsigned int j = 0; j < element_type->components(); j++) {
3124
            conv[j] = element->value.b[j];
3125
         }
3126
         values = (void *)conv;
3127
         element_type = glsl_type::get_instance(GLSL_TYPE_INT,
3128
        					element_type->vector_elements,
3129
        					1);
3130
      } else {
3131
         values = &element->value;
3132
      }
3133
 
3134
      if (element_type->is_matrix()) {
3135
         _mesa_uniform_matrix(ctx, shader_program,
3136
        		      element_type->matrix_columns,
3137
        		      element_type->vector_elements,
3138
        		      loc, 1, GL_FALSE, (GLfloat *)values);
3139
      } else {
3140
         _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
3141
        	       values, element_type->gl_type);
3142
      }
3143
 
3144
      loc++;
3145
   }
3146
}
3147
 
3148
/**
3149
 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
3150
 * are read from the given src in this instruction
3151
 */
3152
static int
3153
get_src_arg_mask(st_dst_reg dst, st_src_reg src)
3154
{
3155
   int read_mask = 0, comp;
3156
 
3157
   /* Now, given the src swizzle and the written channels, find which
3158
    * components are actually read
3159
    */
3160
   for (comp = 0; comp < 4; ++comp) {
3161
      const unsigned coord = GET_SWZ(src.swizzle, comp);
3162
      ASSERT(coord < 4);
3163
      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
3164
         read_mask |= 1 << coord;
3165
   }
3166
 
3167
   return read_mask;
3168
}
3169
 
3170
/**
3171
 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
3172
 * instruction is the first instruction to write to register T0.  There are
3173
 * several lowering passes done in GLSL IR (e.g. branches and
3174
 * relative addressing) that create a large number of conditional assignments
3175
 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
3176
 *
3177
 * Here is why this conversion is safe:
3178
 * CMP T0, T1 T2 T0 can be expanded to:
3179
 * if (T1 < 0.0)
3180
 * 	MOV T0, T2;
3181
 * else
3182
 * 	MOV T0, T0;
3183
 *
3184
 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
3185
 * as the original program.  If (T1 < 0.0) evaluates to false, executing
3186
 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
3187
 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
3188
 * because any instruction that was going to read from T0 after this was going
3189
 * to read a garbage value anyway.
3190
 */
3191
void
3192
glsl_to_tgsi_visitor::simplify_cmp(void)
3193
{
3194
   unsigned *tempWrites;
3195
   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
3196
 
3197
   tempWrites = new unsigned[MAX_TEMPS];
3198
   if (!tempWrites) {
3199
      return;
3200
   }
3201
   memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
3202
   memset(outputWrites, 0, sizeof(outputWrites));
3203
 
3204
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3205
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3206
      unsigned prevWriteMask = 0;
3207
 
3208
      /* Give up if we encounter relative addressing or flow control. */
3209
      if (inst->dst.reladdr ||
3210
          tgsi_get_opcode_info(inst->op)->is_branch ||
3211
          inst->op == TGSI_OPCODE_BGNSUB ||
3212
          inst->op == TGSI_OPCODE_CONT ||
3213
          inst->op == TGSI_OPCODE_END ||
3214
          inst->op == TGSI_OPCODE_ENDSUB ||
3215
          inst->op == TGSI_OPCODE_RET) {
3216
         break;
3217
      }
3218
 
3219
      if (inst->dst.file == PROGRAM_OUTPUT) {
3220
         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
3221
         prevWriteMask = outputWrites[inst->dst.index];
3222
         outputWrites[inst->dst.index] |= inst->dst.writemask;
3223
      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
3224
         assert(inst->dst.index < MAX_TEMPS);
3225
         prevWriteMask = tempWrites[inst->dst.index];
3226
         tempWrites[inst->dst.index] |= inst->dst.writemask;
3227
      } else
3228
         continue;
3229
 
3230
      /* For a CMP to be considered a conditional write, the destination
3231
       * register and source register two must be the same. */
3232
      if (inst->op == TGSI_OPCODE_CMP
3233
          && !(inst->dst.writemask & prevWriteMask)
3234
          && inst->src[2].file == inst->dst.file
3235
          && inst->src[2].index == inst->dst.index
3236
          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
3237
 
3238
         inst->op = TGSI_OPCODE_MOV;
3239
         inst->src[0] = inst->src[1];
3240
      }
3241
   }
3242
 
3243
   delete [] tempWrites;
3244
}
3245
 
3246
/* Replaces all references to a temporary register index with another index. */
3247
void
3248
glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
3249
{
3250
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3251
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3252
      unsigned j;
3253
 
3254
      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3255
         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3256
             inst->src[j].index == index) {
3257
            inst->src[j].index = new_index;
3258
         }
3259
      }
3260
 
3261
      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3262
         inst->dst.index = new_index;
3263
      }
3264
   }
3265
}
3266
 
3267
int
3268
glsl_to_tgsi_visitor::get_first_temp_read(int index)
3269
{
3270
   int depth = 0; /* loop depth */
3271
   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3272
   unsigned i = 0, j;
3273
 
3274
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3275
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3276
 
3277
      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3278
         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3279
             inst->src[j].index == index) {
3280
            return (depth == 0) ? i : loop_start;
3281
         }
3282
      }
3283
 
3284
      if (inst->op == TGSI_OPCODE_BGNLOOP) {
3285
         if(depth++ == 0)
3286
            loop_start = i;
3287
      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3288
         if (--depth == 0)
3289
            loop_start = -1;
3290
      }
3291
      assert(depth >= 0);
3292
 
3293
      i++;
3294
   }
3295
 
3296
   return -1;
3297
}
3298
 
3299
int
3300
glsl_to_tgsi_visitor::get_first_temp_write(int index)
3301
{
3302
   int depth = 0; /* loop depth */
3303
   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3304
   int i = 0;
3305
 
3306
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3307
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3308
 
3309
      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3310
         return (depth == 0) ? i : loop_start;
3311
      }
3312
 
3313
      if (inst->op == TGSI_OPCODE_BGNLOOP) {
3314
         if(depth++ == 0)
3315
            loop_start = i;
3316
      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3317
         if (--depth == 0)
3318
            loop_start = -1;
3319
      }
3320
      assert(depth >= 0);
3321
 
3322
      i++;
3323
   }
3324
 
3325
   return -1;
3326
}
3327
 
3328
int
3329
glsl_to_tgsi_visitor::get_last_temp_read(int index)
3330
{
3331
   int depth = 0; /* loop depth */
3332
   int last = -1; /* index of last instruction that reads the temporary */
3333
   unsigned i = 0, j;
3334
 
3335
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3336
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3337
 
3338
      for (j=0; j < num_inst_src_regs(inst->op); j++) {
3339
         if (inst->src[j].file == PROGRAM_TEMPORARY &&
3340
             inst->src[j].index == index) {
3341
            last = (depth == 0) ? i : -2;
3342
         }
3343
      }
3344
 
3345
      if (inst->op == TGSI_OPCODE_BGNLOOP)
3346
         depth++;
3347
      else if (inst->op == TGSI_OPCODE_ENDLOOP)
3348
         if (--depth == 0 && last == -2)
3349
            last = i;
3350
      assert(depth >= 0);
3351
 
3352
      i++;
3353
   }
3354
 
3355
   assert(last >= -1);
3356
   return last;
3357
}
3358
 
3359
int
3360
glsl_to_tgsi_visitor::get_last_temp_write(int index)
3361
{
3362
   int depth = 0; /* loop depth */
3363
   int last = -1; /* index of last instruction that writes to the temporary */
3364
   int i = 0;
3365
 
3366
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3367
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3368
 
3369
      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3370
         last = (depth == 0) ? i : -2;
3371
 
3372
      if (inst->op == TGSI_OPCODE_BGNLOOP)
3373
         depth++;
3374
      else if (inst->op == TGSI_OPCODE_ENDLOOP)
3375
         if (--depth == 0 && last == -2)
3376
            last = i;
3377
      assert(depth >= 0);
3378
 
3379
      i++;
3380
   }
3381
 
3382
   assert(last >= -1);
3383
   return last;
3384
}
3385
 
3386
/*
3387
 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3388
 * channels for copy propagation and updates following instructions to
3389
 * use the original versions.
3390
 *
3391
 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3392
 * will occur.  As an example, a TXP production before this pass:
3393
 *
3394
 * 0: MOV TEMP[1], INPUT[4].xyyy;
3395
 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3396
 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3397
 *
3398
 * and after:
3399
 *
3400
 * 0: MOV TEMP[1], INPUT[4].xyyy;
3401
 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3402
 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3403
 *
3404
 * which allows for dead code elimination on TEMP[1]'s writes.
3405
 */
3406
void
3407
glsl_to_tgsi_visitor::copy_propagate(void)
3408
{
3409
   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3410
        					    glsl_to_tgsi_instruction *,
3411
        					    this->next_temp * 4);
3412
   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3413
   int level = 0;
3414
 
3415
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3416
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3417
 
3418
      assert(inst->dst.file != PROGRAM_TEMPORARY
3419
             || inst->dst.index < this->next_temp);
3420
 
3421
      /* First, do any copy propagation possible into the src regs. */
3422
      for (int r = 0; r < 3; r++) {
3423
         glsl_to_tgsi_instruction *first = NULL;
3424
         bool good = true;
3425
         int acp_base = inst->src[r].index * 4;
3426
 
3427
         if (inst->src[r].file != PROGRAM_TEMPORARY ||
3428
             inst->src[r].reladdr)
3429
            continue;
3430
 
3431
         /* See if we can find entries in the ACP consisting of MOVs
3432
          * from the same src register for all the swizzled channels
3433
          * of this src register reference.
3434
          */
3435
         for (int i = 0; i < 4; i++) {
3436
            int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3437
            glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3438
 
3439
            if (!copy_chan) {
3440
               good = false;
3441
               break;
3442
            }
3443
 
3444
            assert(acp_level[acp_base + src_chan] <= level);
3445
 
3446
            if (!first) {
3447
               first = copy_chan;
3448
            } else {
3449
               if (first->src[0].file != copy_chan->src[0].file ||
3450
        	   first->src[0].index != copy_chan->src[0].index) {
3451
        	  good = false;
3452
        	  break;
3453
               }
3454
            }
3455
         }
3456
 
3457
         if (good) {
3458
            /* We've now validated that we can copy-propagate to
3459
             * replace this src register reference.  Do it.
3460
             */
3461
            inst->src[r].file = first->src[0].file;
3462
            inst->src[r].index = first->src[0].index;
3463
 
3464
            int swizzle = 0;
3465
            for (int i = 0; i < 4; i++) {
3466
               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3467
               glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3468
               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3469
        		   (3 * i));
3470
            }
3471
            inst->src[r].swizzle = swizzle;
3472
         }
3473
      }
3474
 
3475
      switch (inst->op) {
3476
      case TGSI_OPCODE_BGNLOOP:
3477
      case TGSI_OPCODE_ENDLOOP:
3478
         /* End of a basic block, clear the ACP entirely. */
3479
         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3480
         break;
3481
 
3482
      case TGSI_OPCODE_IF:
3483
      case TGSI_OPCODE_UIF:
3484
         ++level;
3485
         break;
3486
 
3487
      case TGSI_OPCODE_ENDIF:
3488
      case TGSI_OPCODE_ELSE:
3489
         /* Clear all channels written inside the block from the ACP, but
3490
          * leaving those that were not touched.
3491
          */
3492
         for (int r = 0; r < this->next_temp; r++) {
3493
            for (int c = 0; c < 4; c++) {
3494
               if (!acp[4 * r + c])
3495
        	  continue;
3496
 
3497
               if (acp_level[4 * r + c] >= level)
3498
        	  acp[4 * r + c] = NULL;
3499
            }
3500
         }
3501
         if (inst->op == TGSI_OPCODE_ENDIF)
3502
            --level;
3503
         break;
3504
 
3505
      default:
3506
         /* Continuing the block, clear any written channels from
3507
          * the ACP.
3508
          */
3509
         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3510
            /* Any temporary might be written, so no copy propagation
3511
             * across this instruction.
3512
             */
3513
            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3514
         } else if (inst->dst.file == PROGRAM_OUTPUT &&
3515
        	    inst->dst.reladdr) {
3516
            /* Any output might be written, so no copy propagation
3517
             * from outputs across this instruction.
3518
             */
3519
            for (int r = 0; r < this->next_temp; r++) {
3520
               for (int c = 0; c < 4; c++) {
3521
        	  if (!acp[4 * r + c])
3522
        	     continue;
3523
 
3524
        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3525
        	     acp[4 * r + c] = NULL;
3526
               }
3527
            }
3528
         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3529
        	    inst->dst.file == PROGRAM_OUTPUT) {
3530
            /* Clear where it's used as dst. */
3531
            if (inst->dst.file == PROGRAM_TEMPORARY) {
3532
               for (int c = 0; c < 4; c++) {
3533
        	  if (inst->dst.writemask & (1 << c)) {
3534
        	     acp[4 * inst->dst.index + c] = NULL;
3535
        	  }
3536
               }
3537
            }
3538
 
3539
            /* Clear where it's used as src. */
3540
            for (int r = 0; r < this->next_temp; r++) {
3541
               for (int c = 0; c < 4; c++) {
3542
        	  if (!acp[4 * r + c])
3543
        	     continue;
3544
 
3545
        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3546
 
3547
        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3548
        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
3549
        	      inst->dst.writemask & (1 << src_chan))
3550
        	  {
3551
        	     acp[4 * r + c] = NULL;
3552
        	  }
3553
               }
3554
            }
3555
         }
3556
         break;
3557
      }
3558
 
3559
      /* If this is a copy, add it to the ACP. */
3560
      if (inst->op == TGSI_OPCODE_MOV &&
3561
          inst->dst.file == PROGRAM_TEMPORARY &&
3562
          !(inst->dst.file == inst->src[0].file &&
3563
             inst->dst.index == inst->src[0].index) &&
3564
          !inst->dst.reladdr &&
3565
          !inst->saturate &&
3566
          !inst->src[0].reladdr &&
3567
          !inst->src[0].negate) {
3568
         for (int i = 0; i < 4; i++) {
3569
            if (inst->dst.writemask & (1 << i)) {
3570
               acp[4 * inst->dst.index + i] = inst;
3571
               acp_level[4 * inst->dst.index + i] = level;
3572
            }
3573
         }
3574
      }
3575
   }
3576
 
3577
   ralloc_free(acp_level);
3578
   ralloc_free(acp);
3579
}
3580
 
3581
/*
3582
 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3583
 *
3584
 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3585
 * will occur.  As an example, a TXP production after copy propagation but
3586
 * before this pass:
3587
 *
3588
 * 0: MOV TEMP[1], INPUT[4].xyyy;
3589
 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3590
 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3591
 *
3592
 * and after this pass:
3593
 *
3594
 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3595
 *
3596
 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3597
 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3598
 */
3599
void
3600
glsl_to_tgsi_visitor::eliminate_dead_code(void)
3601
{
3602
   int i;
3603
 
3604
   for (i=0; i < this->next_temp; i++) {
3605
      int last_read = get_last_temp_read(i);
3606
      int j = 0;
3607
 
3608
      foreach_iter(exec_list_iterator, iter, this->instructions) {
3609
         glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3610
 
3611
         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3612
             j > last_read)
3613
         {
3614
            iter.remove();
3615
            delete inst;
3616
         }
3617
 
3618
         j++;
3619
      }
3620
   }
3621
}
3622
 
3623
/*
3624
 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
3625
 * code elimination.  This is less primitive than eliminate_dead_code(), as it
3626
 * is per-channel and can detect consecutive writes without a read between them
3627
 * as dead code.  However, there is some dead code that can be eliminated by
3628
 * eliminate_dead_code() but not this function - for example, this function
3629
 * cannot eliminate an instruction writing to a register that is never read and
3630
 * is the only instruction writing to that register.
3631
 *
3632
 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3633
 * will occur.
3634
 */
3635
int
3636
glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
3637
{
3638
   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
3639
                                                     glsl_to_tgsi_instruction *,
3640
                                                     this->next_temp * 4);
3641
   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3642
   int level = 0;
3643
   int removed = 0;
3644
 
3645
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3646
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3647
 
3648
      assert(inst->dst.file != PROGRAM_TEMPORARY
3649
             || inst->dst.index < this->next_temp);
3650
 
3651
      switch (inst->op) {
3652
      case TGSI_OPCODE_BGNLOOP:
3653
      case TGSI_OPCODE_ENDLOOP:
3654
      case TGSI_OPCODE_CONT:
3655
      case TGSI_OPCODE_BRK:
3656
         /* End of a basic block, clear the write array entirely.
3657
          *
3658
          * This keeps us from killing dead code when the writes are
3659
          * on either side of a loop, even when the register isn't touched
3660
          * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
3661
          * dead code of this type, so it shouldn't make a difference as long as
3662
          * the dead code elimination pass in the GLSL compiler does its job.
3663
          */
3664
         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3665
         break;
3666
 
3667
      case TGSI_OPCODE_ENDIF:
3668
      case TGSI_OPCODE_ELSE:
3669
         /* Promote the recorded level of all channels written inside the
3670
          * preceding if or else block to the level above the if/else block.
3671
          */
3672
         for (int r = 0; r < this->next_temp; r++) {
3673
            for (int c = 0; c < 4; c++) {
3674
               if (!writes[4 * r + c])
3675
        	         continue;
3676
 
3677
               if (write_level[4 * r + c] == level)
3678
        	         write_level[4 * r + c] = level-1;
3679
            }
3680
         }
3681
 
3682
         if(inst->op == TGSI_OPCODE_ENDIF)
3683
            --level;
3684
 
3685
         break;
3686
 
3687
      case TGSI_OPCODE_IF:
3688
      case TGSI_OPCODE_UIF:
3689
         ++level;
3690
         /* fallthrough to default case to mark the condition as read */
3691
 
3692
      default:
3693
         /* Continuing the block, clear any channels from the write array that
3694
          * are read by this instruction.
3695
          */
3696
         for (unsigned i = 0; i < Elements(inst->src); i++) {
3697
            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
3698
               /* Any temporary might be read, so no dead code elimination
3699
                * across this instruction.
3700
                */
3701
               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3702
            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
3703
               /* Clear where it's used as src. */
3704
               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
3705
               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
3706
               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
3707
               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
3708
 
3709
               for (int c = 0; c < 4; c++) {
3710
              	   if (src_chans & (1 << c)) {
3711
              	      writes[4 * inst->src[i].index + c] = NULL;
3712
              	   }
3713
               }
3714
            }
3715
         }
3716
         break;
3717
      }
3718
 
3719
      /* If this instruction writes to a temporary, add it to the write array.
3720
       * If there is already an instruction in the write array for one or more
3721
       * of the channels, flag that channel write as dead.
3722
       */
3723
      if (inst->dst.file == PROGRAM_TEMPORARY &&
3724
          !inst->dst.reladdr &&
3725
          !inst->saturate) {
3726
         for (int c = 0; c < 4; c++) {
3727
            if (inst->dst.writemask & (1 << c)) {
3728
               if (writes[4 * inst->dst.index + c]) {
3729
                  if (write_level[4 * inst->dst.index + c] < level)
3730
                     continue;
3731
                  else
3732
                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
3733
               }
3734
               writes[4 * inst->dst.index + c] = inst;
3735
               write_level[4 * inst->dst.index + c] = level;
3736
            }
3737
         }
3738
      }
3739
   }
3740
 
3741
   /* Anything still in the write array at this point is dead code. */
3742
   for (int r = 0; r < this->next_temp; r++) {
3743
      for (int c = 0; c < 4; c++) {
3744
         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
3745
         if (inst)
3746
            inst->dead_mask |= (1 << c);
3747
      }
3748
   }
3749
 
3750
   /* Now actually remove the instructions that are completely dead and update
3751
    * the writemask of other instructions with dead channels.
3752
    */
3753
   foreach_iter(exec_list_iterator, iter, this->instructions) {
3754
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3755
 
3756
      if (!inst->dead_mask || !inst->dst.writemask)
3757
         continue;
3758
      else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
3759
         iter.remove();
3760
         delete inst;
3761
         removed++;
3762
      } else
3763
         inst->dst.writemask &= ~(inst->dead_mask);
3764
   }
3765
 
3766
   ralloc_free(write_level);
3767
   ralloc_free(writes);
3768
 
3769
   return removed;
3770
}
3771
 
3772
/* Merges temporary registers together where possible to reduce the number of
3773
 * registers needed to run a program.
3774
 *
3775
 * Produces optimal code only after copy propagation and dead code elimination
3776
 * have been run. */
3777
void
3778
glsl_to_tgsi_visitor::merge_registers(void)
3779
{
3780
   int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3781
   int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3782
   int i, j;
3783
 
3784
   /* Read the indices of the last read and first write to each temp register
3785
    * into an array so that we don't have to traverse the instruction list as
3786
    * much. */
3787
   for (i=0; i < this->next_temp; i++) {
3788
      last_reads[i] = get_last_temp_read(i);
3789
      first_writes[i] = get_first_temp_write(i);
3790
   }
3791
 
3792
   /* Start looking for registers with non-overlapping usages that can be
3793
    * merged together. */
3794
   for (i=0; i < this->next_temp; i++) {
3795
      /* Don't touch unused registers. */
3796
      if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3797
 
3798
      for (j=0; j < this->next_temp; j++) {
3799
         /* Don't touch unused registers. */
3800
         if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3801
 
3802
         /* We can merge the two registers if the first write to j is after or
3803
          * in the same instruction as the last read from i.  Note that the
3804
          * register at index i will always be used earlier or at the same time
3805
          * as the register at index j. */
3806
         if (first_writes[i] <= first_writes[j] &&
3807
             last_reads[i] <= first_writes[j])
3808
         {
3809
            rename_temp_register(j, i); /* Replace all references to j with i.*/
3810
 
3811
            /* Update the first_writes and last_reads arrays with the new
3812
             * values for the merged register index, and mark the newly unused
3813
             * register index as such. */
3814
            last_reads[i] = last_reads[j];
3815
            first_writes[j] = -1;
3816
            last_reads[j] = -1;
3817
         }
3818
      }
3819
   }
3820
 
3821
   ralloc_free(last_reads);
3822
   ralloc_free(first_writes);
3823
}
3824
 
3825
/* Reassign indices to temporary registers by reusing unused indices created
3826
 * by optimization passes. */
3827
void
3828
glsl_to_tgsi_visitor::renumber_registers(void)
3829
{
3830
   int i = 0;
3831
   int new_index = 0;
3832
 
3833
   for (i=0; i < this->next_temp; i++) {
3834
      if (get_first_temp_read(i) < 0) continue;
3835
      if (i != new_index)
3836
         rename_temp_register(i, new_index);
3837
      new_index++;
3838
   }
3839
 
3840
   this->next_temp = new_index;
3841
}
3842
 
3843
/**
3844
 * Returns a fragment program which implements the current pixel transfer ops.
3845
 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
3846
 */
3847
extern "C" void
3848
get_pixel_transfer_visitor(struct st_fragment_program *fp,
3849
                           glsl_to_tgsi_visitor *original,
3850
                           int scale_and_bias, int pixel_maps)
3851
{
3852
   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3853
   struct st_context *st = st_context(original->ctx);
3854
   struct gl_program *prog = &fp->Base.Base;
3855
   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
3856
   st_src_reg coord, src0;
3857
   st_dst_reg dst0;
3858
   glsl_to_tgsi_instruction *inst;
3859
 
3860
   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3861
   v->ctx = original->ctx;
3862
   v->prog = prog;
3863
   v->shader_program = NULL;
3864
   v->glsl_version = original->glsl_version;
3865
   v->native_integers = original->native_integers;
3866
   v->options = original->options;
3867
   v->next_temp = original->next_temp;
3868
   v->num_address_regs = original->num_address_regs;
3869
   v->samplers_used = prog->SamplersUsed = original->samplers_used;
3870
   v->indirect_addr_consts = original->indirect_addr_consts;
3871
   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3872
   v->num_immediates = original->num_immediates;
3873
 
3874
   /*
3875
    * Get initial pixel color from the texture.
3876
    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
3877
    */
3878
   coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
3879
   src0 = v->get_temp(glsl_type::vec4_type);
3880
   dst0 = st_dst_reg(src0);
3881
   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3882
   inst->sampler = 0;
3883
   inst->tex_target = TEXTURE_2D_INDEX;
3884
 
3885
   prog->InputsRead |= VARYING_BIT_TEX0;
3886
   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
3887
   v->samplers_used |= (1 << 0);
3888
 
3889
   if (scale_and_bias) {
3890
      static const gl_state_index scale_state[STATE_LENGTH] =
3891
         { STATE_INTERNAL, STATE_PT_SCALE,
3892
           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3893
      static const gl_state_index bias_state[STATE_LENGTH] =
3894
         { STATE_INTERNAL, STATE_PT_BIAS,
3895
           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3896
      GLint scale_p, bias_p;
3897
      st_src_reg scale, bias;
3898
 
3899
      scale_p = _mesa_add_state_reference(params, scale_state);
3900
      bias_p = _mesa_add_state_reference(params, bias_state);
3901
 
3902
      /* MAD colorTemp, colorTemp, scale, bias; */
3903
      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
3904
      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
3905
      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
3906
   }
3907
 
3908
   if (pixel_maps) {
3909
      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
3910
      st_dst_reg temp_dst = st_dst_reg(temp);
3911
 
3912
      assert(st->pixel_xfer.pixelmap_texture);
3913
 
3914
      /* With a little effort, we can do four pixel map look-ups with
3915
       * two TEX instructions:
3916
       */
3917
 
3918
      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
3919
      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
3920
      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3921
      inst->sampler = 1;
3922
      inst->tex_target = TEXTURE_2D_INDEX;
3923
 
3924
      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
3925
      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
3926
      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
3927
      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3928
      inst->sampler = 1;
3929
      inst->tex_target = TEXTURE_2D_INDEX;
3930
 
3931
      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
3932
      v->samplers_used |= (1 << 1);
3933
 
3934
      /* MOV colorTemp, temp; */
3935
      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
3936
   }
3937
 
3938
   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3939
    * new visitor. */
3940
   foreach_iter(exec_list_iterator, iter, original->instructions) {
3941
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3942
      glsl_to_tgsi_instruction *newinst;
3943
      st_src_reg src_regs[3];
3944
 
3945
      if (inst->dst.file == PROGRAM_OUTPUT)
3946
         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3947
 
3948
      for (int i=0; i<3; i++) {
3949
         src_regs[i] = inst->src[i];
3950
         if (src_regs[i].file == PROGRAM_INPUT &&
3951
             src_regs[i].index == VARYING_SLOT_COL0)
3952
         {
3953
            src_regs[i].file = PROGRAM_TEMPORARY;
3954
            src_regs[i].index = src0.index;
3955
         }
3956
         else if (src_regs[i].file == PROGRAM_INPUT)
3957
            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
3958
      }
3959
 
3960
      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3961
      newinst->tex_target = inst->tex_target;
3962
   }
3963
 
3964
   /* Make modifications to fragment program info. */
3965
   prog->Parameters = _mesa_combine_parameter_lists(params,
3966
                                                    original->prog->Parameters);
3967
   _mesa_free_parameter_list(params);
3968
   count_resources(v, prog);
3969
   fp->glsl_to_tgsi = v;
3970
}
3971
 
3972
/**
3973
 * Make fragment program for glBitmap:
3974
 *   Sample the texture and kill the fragment if the bit is 0.
3975
 * This program will be combined with the user's fragment program.
3976
 *
3977
 * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
3978
 */
3979
extern "C" void
3980
get_bitmap_visitor(struct st_fragment_program *fp,
3981
                   glsl_to_tgsi_visitor *original, int samplerIndex)
3982
{
3983
   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3984
   struct st_context *st = st_context(original->ctx);
3985
   struct gl_program *prog = &fp->Base.Base;
3986
   st_src_reg coord, src0;
3987
   st_dst_reg dst0;
3988
   glsl_to_tgsi_instruction *inst;
3989
 
3990
   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3991
   v->ctx = original->ctx;
3992
   v->prog = prog;
3993
   v->shader_program = NULL;
3994
   v->glsl_version = original->glsl_version;
3995
   v->native_integers = original->native_integers;
3996
   v->options = original->options;
3997
   v->next_temp = original->next_temp;
3998
   v->num_address_regs = original->num_address_regs;
3999
   v->samplers_used = prog->SamplersUsed = original->samplers_used;
4000
   v->indirect_addr_consts = original->indirect_addr_consts;
4001
   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
4002
   v->num_immediates = original->num_immediates;
4003
 
4004
   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
4005
   coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
4006
   src0 = v->get_temp(glsl_type::vec4_type);
4007
   dst0 = st_dst_reg(src0);
4008
   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
4009
   inst->sampler = samplerIndex;
4010
   inst->tex_target = TEXTURE_2D_INDEX;
4011
 
4012
   prog->InputsRead |= VARYING_BIT_TEX0;
4013
   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
4014
   v->samplers_used |= (1 << samplerIndex);
4015
 
4016
   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
4017
   src0.negate = NEGATE_XYZW;
4018
   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
4019
      src0.swizzle = SWIZZLE_XXXX;
4020
   inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
4021
 
4022
   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
4023
    * new visitor. */
4024
   foreach_iter(exec_list_iterator, iter, original->instructions) {
4025
      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
4026
      glsl_to_tgsi_instruction *newinst;
4027
      st_src_reg src_regs[3];
4028
 
4029
      if (inst->dst.file == PROGRAM_OUTPUT)
4030
         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
4031
 
4032
      for (int i=0; i<3; i++) {
4033
         src_regs[i] = inst->src[i];
4034
         if (src_regs[i].file == PROGRAM_INPUT)
4035
            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
4036
      }
4037
 
4038
      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
4039
      newinst->tex_target = inst->tex_target;
4040
   }
4041
 
4042
   /* Make modifications to fragment program info. */
4043
   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
4044
   count_resources(v, prog);
4045
   fp->glsl_to_tgsi = v;
4046
}
4047
 
4048
/* ------------------------- TGSI conversion stuff -------------------------- */
4049
struct label {
4050
   unsigned branch_target;
4051
   unsigned token;
4052
};
4053
 
4054
/**
4055
 * Intermediate state used during shader translation.
4056
 */
4057
struct st_translate {
4058
   struct ureg_program *ureg;
4059
 
4060
   struct ureg_dst temps[MAX_TEMPS];
4061
   struct ureg_dst arrays[MAX_ARRAYS];
4062
   struct ureg_src *constants;
4063
   struct ureg_src *immediates;
4064
   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
4065
   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
4066
   struct ureg_dst address[1];
4067
   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
4068
   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
4069
 
4070
   unsigned array_sizes[MAX_ARRAYS];
4071
 
4072
   const GLuint *inputMapping;
4073
   const GLuint *outputMapping;
4074
 
4075
   /* For every instruction that contains a label (eg CALL), keep
4076
    * details so that we can go back afterwards and emit the correct
4077
    * tgsi instruction number for each label.
4078
    */
4079
   struct label *labels;
4080
   unsigned labels_size;
4081
   unsigned labels_count;
4082
 
4083
   /* Keep a record of the tgsi instruction number that each mesa
4084
    * instruction starts at, will be used to fix up labels after
4085
    * translation.
4086
    */
4087
   unsigned *insn;
4088
   unsigned insn_size;
4089
   unsigned insn_count;
4090
 
4091
   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
4092
 
4093
   boolean error;
4094
};
4095
 
4096
/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
4097
static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
4098
   TGSI_SEMANTIC_FACE,
4099
   TGSI_SEMANTIC_VERTEXID,
4100
   TGSI_SEMANTIC_INSTANCEID
4101
};
4102
 
4103
/**
4104
 * Make note of a branch to a label in the TGSI code.
4105
 * After we've emitted all instructions, we'll go over the list
4106
 * of labels built here and patch the TGSI code with the actual
4107
 * location of each label.
4108
 */
4109
static unsigned *get_label(struct st_translate *t, unsigned branch_target)
4110
{
4111
   unsigned i;
4112
 
4113
   if (t->labels_count + 1 >= t->labels_size) {
4114
      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
4115
      t->labels = (struct label *)realloc(t->labels,
4116
                                          t->labels_size * sizeof(struct label));
4117
      if (t->labels == NULL) {
4118
         static unsigned dummy;
4119
         t->error = TRUE;
4120
         return &dummy;
4121
      }
4122
   }
4123
 
4124
   i = t->labels_count++;
4125
   t->labels[i].branch_target = branch_target;
4126
   return &t->labels[i].token;
4127
}
4128
 
4129
/**
4130
 * Called prior to emitting the TGSI code for each instruction.
4131
 * Allocate additional space for instructions if needed.
4132
 * Update the insn[] array so the next glsl_to_tgsi_instruction points to
4133
 * the next TGSI instruction.
4134
 */
4135
static void set_insn_start(struct st_translate *t, unsigned start)
4136
{
4137
   if (t->insn_count + 1 >= t->insn_size) {
4138
      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
4139
      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
4140
      if (t->insn == NULL) {
4141
         t->error = TRUE;
4142
         return;
4143
      }
4144
   }
4145
 
4146
   t->insn[t->insn_count++] = start;
4147
}
4148
 
4149
/**
4150
 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
4151
 */
4152
static struct ureg_src
4153
emit_immediate(struct st_translate *t,
4154
               gl_constant_value values[4],
4155
               int type, int size)
4156
{
4157
   struct ureg_program *ureg = t->ureg;
4158
 
4159
   switch(type)
4160
   {
4161
   case GL_FLOAT:
4162
      return ureg_DECL_immediate(ureg, &values[0].f, size);
4163
   case GL_INT:
4164
      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
4165
   case GL_UNSIGNED_INT:
4166
   case GL_BOOL:
4167
      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
4168
   default:
4169
      assert(!"should not get here - type must be float, int, uint, or bool");
4170
      return ureg_src_undef();
4171
   }
4172
}
4173
 
4174
/**
4175
 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
4176
 */
4177
static struct ureg_dst
4178
dst_register(struct st_translate *t,
4179
             gl_register_file file,
4180
             GLuint index)
4181
{
4182
   unsigned array;
4183
 
4184
   switch(file) {
4185
   case PROGRAM_UNDEFINED:
4186
      return ureg_dst_undef();
4187
 
4188
   case PROGRAM_TEMPORARY:
4189
      assert(index >= 0);
4190
      assert(index < (int) Elements(t->temps));
4191
 
4192
      if (ureg_dst_is_undef(t->temps[index]))
4193
         t->temps[index] = ureg_DECL_local_temporary(t->ureg);
4194
 
4195
      return t->temps[index];
4196
 
4197
   case PROGRAM_ARRAY:
4198
      array = index >> 16;
4199
 
4200
      assert(array >= 0);
4201
      assert(array < (int) Elements(t->arrays));
4202
 
4203
      if (ureg_dst_is_undef(t->arrays[array]))
4204
         t->arrays[array] = ureg_DECL_array_temporary(
4205
            t->ureg, t->array_sizes[array], TRUE);
4206
 
4207
      return ureg_dst_array_offset(t->arrays[array],
4208
                                   (int)(index & 0xFFFF) - 0x8000);
4209
 
4210
   case PROGRAM_OUTPUT:
4211
      if (t->procType == TGSI_PROCESSOR_VERTEX)
4212
         assert(index < VARYING_SLOT_MAX);
4213
      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
4214
         assert(index < FRAG_RESULT_MAX);
4215
      else
4216
         assert(index < VARYING_SLOT_MAX);
4217
 
4218
      assert(t->outputMapping[index] < Elements(t->outputs));
4219
 
4220
      return t->outputs[t->outputMapping[index]];
4221
 
4222
   case PROGRAM_ADDRESS:
4223
      return t->address[index];
4224
 
4225
   default:
4226
      assert(!"unknown dst register file");
4227
      return ureg_dst_undef();
4228
   }
4229
}
4230
 
4231
/**
4232
 * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
4233
 */
4234
static struct ureg_src
4235
src_register(struct st_translate *t,
4236
             gl_register_file file,
4237
             GLint index, GLint index2D)
4238
{
4239
   switch(file) {
4240
   case PROGRAM_UNDEFINED:
4241
      return ureg_src_undef();
4242
 
4243
   case PROGRAM_TEMPORARY:
4244
   case PROGRAM_ARRAY:
4245
      return ureg_src(dst_register(t, file, index));
4246
 
4247
   case PROGRAM_ENV_PARAM:
4248
   case PROGRAM_LOCAL_PARAM:
4249
   case PROGRAM_UNIFORM:
4250
      assert(index >= 0);
4251
      return t->constants[index];
4252
   case PROGRAM_STATE_VAR:
4253
   case PROGRAM_CONSTANT:       /* ie, immediate */
4254
      if (index2D) {
4255
         struct ureg_src src;
4256
         src = ureg_src_register(TGSI_FILE_CONSTANT, 0);
4257
         src.Dimension = 1;
4258
         src.DimensionIndex = index2D;
4259
         return src;
4260
      } else if (index < 0)
4261
         return ureg_DECL_constant(t->ureg, 0);
4262
      else
4263
         return t->constants[index];
4264
 
4265
   case PROGRAM_IMMEDIATE:
4266
      return t->immediates[index];
4267
 
4268
   case PROGRAM_INPUT:
4269
      assert(t->inputMapping[index] < Elements(t->inputs));
4270
      return t->inputs[t->inputMapping[index]];
4271
 
4272
   case PROGRAM_OUTPUT:
4273
      assert(t->outputMapping[index] < Elements(t->outputs));
4274
      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
4275
 
4276
   case PROGRAM_ADDRESS:
4277
      return ureg_src(t->address[index]);
4278
 
4279
   case PROGRAM_SYSTEM_VALUE:
4280
      assert(index < (int) Elements(t->systemValues));
4281
      return t->systemValues[index];
4282
 
4283
   default:
4284
      assert(!"unknown src register file");
4285
      return ureg_src_undef();
4286
   }
4287
}
4288
 
4289
/**
4290
 * Create a TGSI ureg_dst register from an st_dst_reg.
4291
 */
4292
static struct ureg_dst
4293
translate_dst(struct st_translate *t,
4294
              const st_dst_reg *dst_reg,
4295
              bool saturate, bool clamp_color)
4296
{
4297
   struct ureg_dst dst = dst_register(t,
4298
                                      dst_reg->file,
4299
                                      dst_reg->index);
4300
 
4301
   dst = ureg_writemask(dst, dst_reg->writemask);
4302
 
4303
   if (saturate)
4304
      dst = ureg_saturate(dst);
4305
   else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
4306
      /* Clamp colors for ARB_color_buffer_float. */
4307
      switch (t->procType) {
4308
      case TGSI_PROCESSOR_VERTEX:
4309
         /* XXX if the geometry shader is present, this must be done there
4310
          * instead of here. */
4311
         if (dst_reg->index == VARYING_SLOT_COL0 ||
4312
             dst_reg->index == VARYING_SLOT_COL1 ||
4313
             dst_reg->index == VARYING_SLOT_BFC0 ||
4314
             dst_reg->index == VARYING_SLOT_BFC1) {
4315
            dst = ureg_saturate(dst);
4316
         }
4317
         break;
4318
 
4319
      case TGSI_PROCESSOR_FRAGMENT:
4320
         if (dst_reg->index >= FRAG_RESULT_COLOR) {
4321
            dst = ureg_saturate(dst);
4322
         }
4323
         break;
4324
      }
4325
   }
4326
 
4327
   if (dst_reg->reladdr != NULL) {
4328
      assert(dst_reg->file != PROGRAM_TEMPORARY);
4329
      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
4330
   }
4331
 
4332
   return dst;
4333
}
4334
 
4335
/**
4336
 * Create a TGSI ureg_src register from an st_src_reg.
4337
 */
4338
static struct ureg_src
4339
translate_src(struct st_translate *t, const st_src_reg *src_reg)
4340
{
4341
   struct ureg_src src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D);
4342
 
4343
   src = ureg_swizzle(src,
4344
                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
4345
                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
4346
                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
4347
                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
4348
 
4349
   if ((src_reg->negate & 0xf) == NEGATE_XYZW)
4350
      src = ureg_negate(src);
4351
 
4352
   if (src_reg->reladdr != NULL) {
4353
      assert(src_reg->file != PROGRAM_TEMPORARY);
4354
      src = ureg_src_indirect(src, ureg_src(t->address[0]));
4355
   }
4356
 
4357
   return src;
4358
}
4359
 
4360
static struct tgsi_texture_offset
4361
translate_tex_offset(struct st_translate *t,
4362
                     const struct tgsi_texture_offset *in_offset)
4363
{
4364
   struct tgsi_texture_offset offset;
4365
   struct ureg_src imm_src;
4366
 
4367
   assert(in_offset->File == PROGRAM_IMMEDIATE);
4368
   imm_src = t->immediates[in_offset->Index];
4369
 
4370
   offset.File = imm_src.File;
4371
   offset.Index = imm_src.Index;
4372
   offset.SwizzleX = imm_src.SwizzleX;
4373
   offset.SwizzleY = imm_src.SwizzleY;
4374
   offset.SwizzleZ = imm_src.SwizzleZ;
4375
   offset.File = TGSI_FILE_IMMEDIATE;
4376
   offset.Padding = 0;
4377
 
4378
   return offset;
4379
}
4380
 
4381
static void
4382
compile_tgsi_instruction(struct st_translate *t,
4383
                         const glsl_to_tgsi_instruction *inst,
4384
                         bool clamp_dst_color_output)
4385
{
4386
   struct ureg_program *ureg = t->ureg;
4387
   GLuint i;
4388
   struct ureg_dst dst[1];
4389
   struct ureg_src src[4];
4390
   struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
4391
 
4392
   unsigned num_dst;
4393
   unsigned num_src;
4394
   unsigned tex_target;
4395
 
4396
   num_dst = num_inst_dst_regs(inst->op);
4397
   num_src = num_inst_src_regs(inst->op);
4398
 
4399
   if (num_dst)
4400
      dst[0] = translate_dst(t,
4401
                             &inst->dst,
4402
                             inst->saturate,
4403
                             clamp_dst_color_output);
4404
 
4405
   for (i = 0; i < num_src; i++)
4406
      src[i] = translate_src(t, &inst->src[i]);
4407
 
4408
   switch(inst->op) {
4409
   case TGSI_OPCODE_BGNLOOP:
4410
   case TGSI_OPCODE_CAL:
4411
   case TGSI_OPCODE_ELSE:
4412
   case TGSI_OPCODE_ENDLOOP:
4413
   case TGSI_OPCODE_IF:
4414
   case TGSI_OPCODE_UIF:
4415
      assert(num_dst == 0);
4416
      ureg_label_insn(ureg,
4417
                      inst->op,
4418
                      src, num_src,
4419
                      get_label(t,
4420
                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
4421
      return;
4422
 
4423
   case TGSI_OPCODE_TEX:
4424
   case TGSI_OPCODE_TXB:
4425
   case TGSI_OPCODE_TXD:
4426
   case TGSI_OPCODE_TXL:
4427
   case TGSI_OPCODE_TXP:
4428
   case TGSI_OPCODE_TXQ:
4429
   case TGSI_OPCODE_TXF:
4430
   case TGSI_OPCODE_TEX2:
4431
   case TGSI_OPCODE_TXB2:
4432
   case TGSI_OPCODE_TXL2:
4433
      src[num_src++] = t->samplers[inst->sampler];
4434
      for (i = 0; i < inst->tex_offset_num_offset; i++) {
4435
         texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
4436
      }
4437
      tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
4438
 
4439
      ureg_tex_insn(ureg,
4440
                    inst->op,
4441
                    dst, num_dst,
4442
                    tex_target,
4443
                    texoffsets, inst->tex_offset_num_offset,
4444
                    src, num_src);
4445
      return;
4446
 
4447
   case TGSI_OPCODE_SCS:
4448
      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
4449
      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
4450
      break;
4451
 
4452
   default:
4453
      ureg_insn(ureg,
4454
                inst->op,
4455
                dst, num_dst,
4456
                src, num_src);
4457
      break;
4458
   }
4459
}
4460
 
4461
/**
4462
 * Emit the TGSI instructions for inverting and adjusting WPOS.
4463
 * This code is unavoidable because it also depends on whether
4464
 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
4465
 */
4466
static void
4467
emit_wpos_adjustment( struct st_translate *t,
4468
                      const struct gl_program *program,
4469
                      boolean invert,
4470
                      GLfloat adjX, GLfloat adjY[2])
4471
{
4472
   struct ureg_program *ureg = t->ureg;
4473
 
4474
   /* Fragment program uses fragment position input.
4475
    * Need to replace instances of INPUT[WPOS] with temp T
4476
    * where T = INPUT[WPOS] by y is inverted.
4477
    */
4478
   static const gl_state_index wposTransformState[STATE_LENGTH]
4479
      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
4480
          (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4481
 
4482
   /* XXX: note we are modifying the incoming shader here!  Need to
4483
    * do this before emitting the constant decls below, or this
4484
    * will be missed:
4485
    */
4486
   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
4487
                                                       wposTransformState);
4488
 
4489
   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
4490
   struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
4491
   struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];
4492
 
4493
   /* First, apply the coordinate shift: */
4494
   if (adjX || adjY[0] || adjY[1]) {
4495
      if (adjY[0] != adjY[1]) {
4496
         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
4497
          * depending on whether inversion is actually going to be applied
4498
          * or not, which is determined by testing against the inversion
4499
          * state variable used below, which will be either +1 or -1.
4500
          */
4501
         struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
4502
 
4503
         ureg_CMP(ureg, adj_temp,
4504
                  ureg_scalar(wpostrans, invert ? 2 : 0),
4505
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
4506
                  ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
4507
         ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
4508
      } else {
4509
         ureg_ADD(ureg, wpos_temp, wpos_input,
4510
                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
4511
      }
4512
      wpos_input = ureg_src(wpos_temp);
4513
   } else {
4514
      /* MOV wpos_temp, input[wpos]
4515
       */
4516
      ureg_MOV( ureg, wpos_temp, wpos_input );
4517
   }
4518
 
4519
   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
4520
    * inversion/identity, or the other way around if we're drawing to an FBO.
4521
    */
4522
   if (invert) {
4523
      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
4524
       */
4525
      ureg_MAD( ureg,
4526
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
4527
                wpos_input,
4528
                ureg_scalar(wpostrans, 0),
4529
                ureg_scalar(wpostrans, 1));
4530
   } else {
4531
      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
4532
       */
4533
      ureg_MAD( ureg,
4534
                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
4535
                wpos_input,
4536
                ureg_scalar(wpostrans, 2),
4537
                ureg_scalar(wpostrans, 3));
4538
   }
4539
 
4540
   /* Use wpos_temp as position input from here on:
4541
    */
4542
   t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
4543
}
4544
 
4545
 
4546
/**
4547
 * Emit fragment position/ooordinate code.
4548
 */
4549
static void
4550
emit_wpos(struct st_context *st,
4551
          struct st_translate *t,
4552
          const struct gl_program *program,
4553
          struct ureg_program *ureg)
4554
{
4555
   const struct gl_fragment_program *fp =
4556
      (const struct gl_fragment_program *) program;
4557
   struct pipe_screen *pscreen = st->pipe->screen;
4558
   GLfloat adjX = 0.0f;
4559
   GLfloat adjY[2] = { 0.0f, 0.0f };
4560
   boolean invert = FALSE;
4561
 
4562
   /* Query the pixel center conventions supported by the pipe driver and set
4563
    * adjX, adjY to help out if it cannot handle the requested one internally.
4564
    *
4565
    * The bias of the y-coordinate depends on whether y-inversion takes place
4566
    * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
4567
    * drawing to an FBO (causes additional inversion), and whether the the pipe
4568
    * driver origin and the requested origin differ (the latter condition is
4569
    * stored in the 'invert' variable).
4570
    *
4571
    * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
4572
    *
4573
    * center shift only:
4574
    * i -> h: +0.5
4575
    * h -> i: -0.5
4576
    *
4577
    * inversion only:
4578
    * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
4579
    * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
4580
    * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
4581
    * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
4582
    *
4583
    * inversion and center shift:
4584
    * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
4585
    * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
4586
    * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
4587
    * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
4588
    */
4589
   if (fp->OriginUpperLeft) {
4590
      /* Fragment shader wants origin in upper-left */
4591
      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
4592
         /* the driver supports upper-left origin */
4593
      }
4594
      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
4595
         /* the driver supports lower-left origin, need to invert Y */
4596
         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4597
         invert = TRUE;
4598
      }
4599
      else
4600
         assert(0);
4601
   }
4602
   else {
4603
      /* Fragment shader wants origin in lower-left */
4604
      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
4605
         /* the driver supports lower-left origin */
4606
         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4607
      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
4608
         /* the driver supports upper-left origin, need to invert Y */
4609
         invert = TRUE;
4610
      else
4611
         assert(0);
4612
   }
4613
 
4614
   if (fp->PixelCenterInteger) {
4615
      /* Fragment shader wants pixel center integer */
4616
      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4617
         /* the driver supports pixel center integer */
4618
         adjY[1] = 1.0f;
4619
         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4620
      }
4621
      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4622
         /* the driver supports pixel center half integer, need to bias X,Y */
4623
         adjX = -0.5f;
4624
         adjY[0] = -0.5f;
4625
         adjY[1] = 0.5f;
4626
      }
4627
      else
4628
         assert(0);
4629
   }
4630
   else {
4631
      /* Fragment shader wants pixel center half integer */
4632
      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4633
         /* the driver supports pixel center half integer */
4634
      }
4635
      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4636
         /* the driver supports pixel center integer, need to bias X,Y */
4637
         adjX = adjY[0] = adjY[1] = 0.5f;
4638
         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4639
      }
4640
      else
4641
         assert(0);
4642
   }
4643
 
4644
   /* we invert after adjustment so that we avoid the MOV to temporary,
4645
    * and reuse the adjustment ADD instead */
4646
   emit_wpos_adjustment(t, program, invert, adjX, adjY);
4647
}
4648
 
4649
/**
4650
 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
4651
 * TGSI uses +1 for front, -1 for back.
4652
 * This function converts the TGSI value to the GL value.  Simply clamping/
4653
 * saturating the value to [0,1] does the job.
4654
 */
4655
static void
4656
emit_face_var(struct st_translate *t)
4657
{
4658
   struct ureg_program *ureg = t->ureg;
4659
   struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
4660
   struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
4661
 
4662
   /* MOV_SAT face_temp, input[face] */
4663
   face_temp = ureg_saturate(face_temp);
4664
   ureg_MOV(ureg, face_temp, face_input);
4665
 
4666
   /* Use face_temp as face input from here on: */
4667
   t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
4668
}
4669
 
4670
static void
4671
emit_edgeflags(struct st_translate *t)
4672
{
4673
   struct ureg_program *ureg = t->ureg;
4674
   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
4675
   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
4676
 
4677
   ureg_MOV(ureg, edge_dst, edge_src);
4678
}
4679
 
4680
/**
4681
 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
4682
 * \param program  the program to translate
4683
 * \param numInputs  number of input registers used
4684
 * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
4685
 *                      input indexes
4686
 * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
4687
 * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
4688
 *                            each input
4689
 * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
4690
 * \param numOutputs  number of output registers used
4691
 * \param outputMapping  maps Mesa fragment program outputs to TGSI
4692
 *                       generic outputs
4693
 * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
4694
 * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
4695
 *                             each output
4696
 *
4697
 * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
4698
 */
4699
extern "C" enum pipe_error
4700
st_translate_program(
4701
   struct gl_context *ctx,
4702
   uint procType,
4703
   struct ureg_program *ureg,
4704
   glsl_to_tgsi_visitor *program,
4705
   const struct gl_program *proginfo,
4706
   GLuint numInputs,
4707
   const GLuint inputMapping[],
4708
   const ubyte inputSemanticName[],
4709
   const ubyte inputSemanticIndex[],
4710
   const GLuint interpMode[],
4711
   const GLboolean is_centroid[],
4712
   GLuint numOutputs,
4713
   const GLuint outputMapping[],
4714
   const ubyte outputSemanticName[],
4715
   const ubyte outputSemanticIndex[],
4716
   boolean passthrough_edgeflags,
4717
   boolean clamp_color)
4718
{
4719
   struct st_translate *t;
4720
   unsigned i;
4721
   enum pipe_error ret = PIPE_OK;
4722
 
4723
   assert(numInputs <= Elements(t->inputs));
4724
   assert(numOutputs <= Elements(t->outputs));
4725
 
4726
   t = CALLOC_STRUCT(st_translate);
4727
   if (!t) {
4728
      ret = PIPE_ERROR_OUT_OF_MEMORY;
4729
      goto out;
4730
   }
4731
 
4732
   memset(t, 0, sizeof *t);
4733
 
4734
   t->procType = procType;
4735
   t->inputMapping = inputMapping;
4736
   t->outputMapping = outputMapping;
4737
   t->ureg = ureg;
4738
 
4739
   if (program->shader_program) {
4740
      for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
4741
         struct gl_uniform_storage *const storage =
4742
               &program->shader_program->UniformStorage[i];
4743
 
4744
         _mesa_uniform_detach_all_driver_storage(storage);
4745
      }
4746
   }
4747
 
4748
   /*
4749
    * Declare input attributes.
4750
    */
4751
   if (procType == TGSI_PROCESSOR_FRAGMENT) {
4752
      for (i = 0; i < numInputs; i++) {
4753
         t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
4754
                                                        inputSemanticName[i],
4755
                                                        inputSemanticIndex[i],
4756
                                                        interpMode[i], 0,
4757
                                                        is_centroid[i]);
4758
      }
4759
 
4760
      if (proginfo->InputsRead & VARYING_BIT_POS) {
4761
         /* Must do this after setting up t->inputs, and before
4762
          * emitting constant references, below:
4763
          */
4764
          emit_wpos(st_context(ctx), t, proginfo, ureg);
4765
      }
4766
 
4767
      if (proginfo->InputsRead & VARYING_BIT_FACE)
4768
         emit_face_var(t);
4769
 
4770
      /*
4771
       * Declare output attributes.
4772
       */
4773
      for (i = 0; i < numOutputs; i++) {
4774
         switch (outputSemanticName[i]) {
4775
         case TGSI_SEMANTIC_POSITION:
4776
            t->outputs[i] = ureg_DECL_output(ureg,
4777
                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
4778
                                             outputSemanticIndex[i]);
4779
            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
4780
            break;
4781
         case TGSI_SEMANTIC_STENCIL:
4782
            t->outputs[i] = ureg_DECL_output(ureg,
4783
                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
4784
                                             outputSemanticIndex[i]);
4785
            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
4786
            break;
4787
         case TGSI_SEMANTIC_COLOR:
4788
            t->outputs[i] = ureg_DECL_output(ureg,
4789
                                             TGSI_SEMANTIC_COLOR,
4790
                                             outputSemanticIndex[i]);
4791
            break;
4792
         default:
4793
            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
4794
            ret = PIPE_ERROR_BAD_INPUT;
4795
            goto out;
4796
         }
4797
      }
4798
   }
4799
   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
4800
      for (i = 0; i < numInputs; i++) {
4801
         t->inputs[i] = ureg_DECL_gs_input(ureg,
4802
                                           i,
4803
                                           inputSemanticName[i],
4804
                                           inputSemanticIndex[i]);
4805
      }
4806
 
4807
      for (i = 0; i < numOutputs; i++) {
4808
         t->outputs[i] = ureg_DECL_output(ureg,
4809
                                          outputSemanticName[i],
4810
                                          outputSemanticIndex[i]);
4811
      }
4812
   }
4813
   else {
4814
      assert(procType == TGSI_PROCESSOR_VERTEX);
4815
 
4816
      for (i = 0; i < numInputs; i++) {
4817
         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
4818
      }
4819
 
4820
      for (i = 0; i < numOutputs; i++) {
4821
         t->outputs[i] = ureg_DECL_output(ureg,
4822
                                          outputSemanticName[i],
4823
                                          outputSemanticIndex[i]);
4824
      }
4825
      if (passthrough_edgeflags)
4826
         emit_edgeflags(t);
4827
   }
4828
 
4829
   /* Declare address register.
4830
    */
4831
   if (program->num_address_regs > 0) {
4832
      assert(program->num_address_regs == 1);
4833
      t->address[0] = ureg_DECL_address(ureg);
4834
   }
4835
 
4836
   /* Declare misc input registers
4837
    */
4838
   {
4839
      GLbitfield sysInputs = proginfo->SystemValuesRead;
4840
      unsigned numSys = 0;
4841
      for (i = 0; sysInputs; i++) {
4842
         if (sysInputs & (1 << i)) {
4843
            unsigned semName = mesa_sysval_to_semantic[i];
4844
            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4845
            if (semName == TGSI_SEMANTIC_INSTANCEID ||
4846
                semName == TGSI_SEMANTIC_VERTEXID) {
4847
               /* From Gallium perspective, these system values are always
4848
                * integer, and require native integer support.  However, if
4849
                * native integer is supported on the vertex stage but not the
4850
                * pixel stage (e.g, i915g + draw), Mesa will generate IR that
4851
                * assumes these system values are floats. To resolve the
4852
                * inconsistency, we insert a U2F.
4853
                */
4854
               struct st_context *st = st_context(ctx);
4855
               struct pipe_screen *pscreen = st->pipe->screen;
4856
               assert(procType == TGSI_PROCESSOR_VERTEX);
4857
               assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
4858
               if (!ctx->Const.NativeIntegers) {
4859
                  struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
4860
                  ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
4861
                  t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
4862
               }
4863
            }
4864
            numSys++;
4865
            sysInputs &= ~(1 << i);
4866
         }
4867
      }
4868
   }
4869
 
4870
   /* Copy over array sizes
4871
    */
4872
   memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
4873
 
4874
   /* Emit constants and uniforms.  TGSI uses a single index space for these,
4875
    * so we put all the translated regs in t->constants.
4876
    */
4877
   if (proginfo->Parameters) {
4878
      t->constants = (struct ureg_src *)
4879
         calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
4880
      if (t->constants == NULL) {
4881
         ret = PIPE_ERROR_OUT_OF_MEMORY;
4882
         goto out;
4883
      }
4884
 
4885
      for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4886
         switch (proginfo->Parameters->Parameters[i].Type) {
4887
         case PROGRAM_ENV_PARAM:
4888
         case PROGRAM_LOCAL_PARAM:
4889
         case PROGRAM_STATE_VAR:
4890
         case PROGRAM_UNIFORM:
4891
            t->constants[i] = ureg_DECL_constant(ureg, i);
4892
            break;
4893
 
4894
         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
4895
          * addressing of the const buffer.
4896
          * FIXME: Be smarter and recognize param arrays:
4897
          * indirect addressing is only valid within the referenced
4898
          * array.
4899
          */
4900
         case PROGRAM_CONSTANT:
4901
            if (program->indirect_addr_consts)
4902
               t->constants[i] = ureg_DECL_constant(ureg, i);
4903
            else
4904
               t->constants[i] = emit_immediate(t,
4905
                                                proginfo->Parameters->ParameterValues[i],
4906
                                                proginfo->Parameters->Parameters[i].DataType,
4907
                                                4);
4908
            break;
4909
         default:
4910
            break;
4911
         }
4912
      }
4913
   }
4914
 
4915
   if (program->shader_program) {
4916
      unsigned num_ubos = program->shader_program->NumUniformBlocks;
4917
 
4918
      for (i = 0; i < num_ubos; i++) {
4919
         ureg_DECL_constant2D(t->ureg, 0, program->shader_program->UniformBlocks[i].UniformBufferSize / 4, i + 1);
4920
      }
4921
   }
4922
 
4923
   /* Emit immediate values.
4924
    */
4925
   t->immediates = (struct ureg_src *)
4926
      calloc(program->num_immediates, sizeof(struct ureg_src));
4927
   if (t->immediates == NULL) {
4928
      ret = PIPE_ERROR_OUT_OF_MEMORY;
4929
      goto out;
4930
   }
4931
   i = 0;
4932
   foreach_iter(exec_list_iterator, iter, program->immediates) {
4933
      immediate_storage *imm = (immediate_storage *)iter.get();
4934
      assert(i < program->num_immediates);
4935
      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
4936
   }
4937
   assert(i == program->num_immediates);
4938
 
4939
   /* texture samplers */
4940
   for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; i++) {
4941
      if (program->samplers_used & (1 << i)) {
4942
         t->samplers[i] = ureg_DECL_sampler(ureg, i);
4943
      }
4944
   }
4945
 
4946
   /* Emit each instruction in turn:
4947
    */
4948
   foreach_iter(exec_list_iterator, iter, program->instructions) {
4949
      set_insn_start(t, ureg_get_instruction_number(ureg));
4950
      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
4951
                               clamp_color);
4952
   }
4953
 
4954
   /* Fix up all emitted labels:
4955
    */
4956
   for (i = 0; i < t->labels_count; i++) {
4957
      ureg_fixup_label(ureg, t->labels[i].token,
4958
                       t->insn[t->labels[i].branch_target]);
4959
   }
4960
 
4961
   if (program->shader_program) {
4962
      /* This has to be done last.  Any operation the can cause
4963
       * prog->ParameterValues to get reallocated (e.g., anything that adds a
4964
       * program constant) has to happen before creating this linkage.
4965
       */
4966
      for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4967
         if (program->shader_program->_LinkedShaders[i] == NULL)
4968
            continue;
4969
 
4970
         _mesa_associate_uniform_storage(ctx, program->shader_program,
4971
               program->shader_program->_LinkedShaders[i]->Program->Parameters);
4972
      }
4973
   }
4974
 
4975
out:
4976
   if (t) {
4977
      free(t->insn);
4978
      free(t->labels);
4979
      free(t->constants);
4980
      free(t->immediates);
4981
 
4982
      if (t->error) {
4983
         debug_printf("%s: translate error flag set\n", __FUNCTION__);
4984
      }
4985
 
4986
      free(t);
4987
   }
4988
 
4989
   return ret;
4990
}
4991
/* ----------------------------- End TGSI code ------------------------------ */
4992
 
4993
/**
4994
 * Convert a shader's GLSL IR into a Mesa gl_program, although without
4995
 * generating Mesa IR.
4996
 */
4997
static struct gl_program *
4998
get_mesa_program(struct gl_context *ctx,
4999
                 struct gl_shader_program *shader_program,
5000
                 struct gl_shader *shader)
5001
{
5002
   glsl_to_tgsi_visitor* v;
5003
   struct gl_program *prog;
5004
   GLenum target;
5005
   bool progress;
5006
   struct gl_shader_compiler_options *options =
5007
         &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
5008
   struct pipe_screen *pscreen = ctx->st->pipe->screen;
5009
   unsigned ptarget;
5010
 
5011
   switch (shader->Type) {
5012
   case GL_VERTEX_SHADER:
5013
      target = GL_VERTEX_PROGRAM_ARB;
5014
      ptarget = PIPE_SHADER_VERTEX;
5015
      break;
5016
   case GL_FRAGMENT_SHADER:
5017
      target = GL_FRAGMENT_PROGRAM_ARB;
5018
      ptarget = PIPE_SHADER_FRAGMENT;
5019
      break;
5020
   case GL_GEOMETRY_SHADER:
5021
      target = GL_GEOMETRY_PROGRAM_NV;
5022
      ptarget = PIPE_SHADER_GEOMETRY;
5023
      break;
5024
   default:
5025
      assert(!"should not be reached");
5026
      return NULL;
5027
   }
5028
 
5029
   validate_ir_tree(shader->ir);
5030
 
5031
   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
5032
   if (!prog)
5033
      return NULL;
5034
   prog->Parameters = _mesa_new_parameter_list();
5035
   v = new glsl_to_tgsi_visitor();
5036
   v->ctx = ctx;
5037
   v->prog = prog;
5038
   v->shader_program = shader_program;
5039
   v->options = options;
5040
   v->glsl_version = ctx->Const.GLSLVersion;
5041
   v->native_integers = ctx->Const.NativeIntegers;
5042
 
5043
   v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
5044
                                            PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
5045
 
5046
   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
5047
					       prog->Parameters);
5048
 
5049
   /* Remove reads from output registers. */
5050
   lower_output_reads(shader->ir);
5051
 
5052
   /* Emit intermediate IR for main(). */
5053
   visit_exec_list(shader->ir, v);
5054
 
5055
   /* Now emit bodies for any functions that were used. */
5056
   do {
5057
      progress = GL_FALSE;
5058
 
5059
      foreach_iter(exec_list_iterator, iter, v->function_signatures) {
5060
         function_entry *entry = (function_entry *)iter.get();
5061
 
5062
         if (!entry->bgn_inst) {
5063
            v->current_function = entry;
5064
 
5065
            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
5066
            entry->bgn_inst->function = entry;
5067
 
5068
            visit_exec_list(&entry->sig->body, v);
5069
 
5070
            glsl_to_tgsi_instruction *last;
5071
            last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
5072
            if (last->op != TGSI_OPCODE_RET)
5073
               v->emit(NULL, TGSI_OPCODE_RET);
5074
 
5075
            glsl_to_tgsi_instruction *end;
5076
            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
5077
            end->function = entry;
5078
 
5079
            progress = GL_TRUE;
5080
         }
5081
      }
5082
   } while (progress);
5083
 
5084
#if 0
5085
   /* Print out some information (for debugging purposes) used by the
5086
    * optimization passes. */
5087
   for (i=0; i < v->next_temp; i++) {
5088
      int fr = v->get_first_temp_read(i);
5089
      int fw = v->get_first_temp_write(i);
5090
      int lr = v->get_last_temp_read(i);
5091
      int lw = v->get_last_temp_write(i);
5092
 
5093
      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
5094
      assert(fw <= fr);
5095
   }
5096
#endif
5097
 
5098
   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
5099
   v->simplify_cmp();
5100
   v->copy_propagate();
5101
   while (v->eliminate_dead_code_advanced());
5102
 
5103
   v->eliminate_dead_code();
5104
   v->merge_registers();
5105
   v->renumber_registers();
5106
 
5107
   /* Write the END instruction. */
5108
   v->emit(NULL, TGSI_OPCODE_END);
5109
 
5110
   if (ctx->Shader.Flags & GLSL_DUMP) {
5111
      printf("\n");
5112
      printf("GLSL IR for linked %s program %d:\n",
5113
             _mesa_glsl_shader_target_name(shader->Type),
5114
             shader_program->Name);
5115
      _mesa_print_ir(shader->ir, NULL);
5116
      printf("\n");
5117
      printf("\n");
5118
      fflush(stdout);
5119
   }
5120
 
5121
   prog->Instructions = NULL;
5122
   prog->NumInstructions = 0;
5123
 
5124
   do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
5125
   count_resources(v, prog);
5126
 
5127
   _mesa_reference_program(ctx, &shader->Program, prog);
5128
 
5129
   /* This has to be done last.  Any operation the can cause
5130
    * prog->ParameterValues to get reallocated (e.g., anything that adds a
5131
    * program constant) has to happen before creating this linkage.
5132
    */
5133
   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
5134
   if (!shader_program->LinkStatus) {
5135
      return NULL;
5136
   }
5137
 
5138
   struct st_vertex_program *stvp;
5139
   struct st_fragment_program *stfp;
5140
   struct st_geometry_program *stgp;
5141
 
5142
   switch (shader->Type) {
5143
   case GL_VERTEX_SHADER:
5144
      stvp = (struct st_vertex_program *)prog;
5145
      stvp->glsl_to_tgsi = v;
5146
      break;
5147
   case GL_FRAGMENT_SHADER:
5148
      stfp = (struct st_fragment_program *)prog;
5149
      stfp->glsl_to_tgsi = v;
5150
      break;
5151
   case GL_GEOMETRY_SHADER:
5152
      stgp = (struct st_geometry_program *)prog;
5153
      stgp->glsl_to_tgsi = v;
5154
      break;
5155
   default:
5156
      assert(!"should not be reached");
5157
      return NULL;
5158
   }
5159
 
5160
   return prog;
5161
}
5162
 
5163
extern "C" {
5164
 
5165
struct gl_shader *
5166
st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
5167
{
5168
   struct gl_shader *shader;
5169
   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
5170
          type == GL_GEOMETRY_SHADER_ARB);
5171
   shader = rzalloc(NULL, struct gl_shader);
5172
   if (shader) {
5173
      shader->Type = type;
5174
      shader->Name = name;
5175
      _mesa_init_shader(ctx, shader);
5176
   }
5177
   return shader;
5178
}
5179
 
5180
struct gl_shader_program *
5181
st_new_shader_program(struct gl_context *ctx, GLuint name)
5182
{
5183
   struct gl_shader_program *shProg;
5184
   shProg = rzalloc(NULL, struct gl_shader_program);
5185
   if (shProg) {
5186
      shProg->Name = name;
5187
      _mesa_init_shader_program(ctx, shProg);
5188
   }
5189
   return shProg;
5190
}
5191
 
5192
/**
5193
 * Link a shader.
5194
 * Called via ctx->Driver.LinkShader()
5195
 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
5196
 * with code lowering and other optimizations.
5197
 */
5198
GLboolean
5199
st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
5200
{
5201
   assert(prog->LinkStatus);
5202
 
5203
   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5204
      if (prog->_LinkedShaders[i] == NULL)
5205
         continue;
5206
 
5207
      bool progress;
5208
      exec_list *ir = prog->_LinkedShaders[i]->ir;
5209
      const struct gl_shader_compiler_options *options =
5210
            &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
5211
 
5212
      /* If there are forms of indirect addressing that the driver
5213
       * cannot handle, perform the lowering pass.
5214
       */
5215
      if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
5216
          options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
5217
         lower_variable_index_to_cond_assign(ir,
5218
                                             options->EmitNoIndirectInput,
5219
                                             options->EmitNoIndirectOutput,
5220
                                             options->EmitNoIndirectTemp,
5221
                                             options->EmitNoIndirectUniform);
5222
      }
5223
 
5224
      if (ctx->Extensions.ARB_shading_language_packing) {
5225
         unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
5226
                               LOWER_UNPACK_SNORM_2x16 |
5227
                               LOWER_PACK_UNORM_2x16 |
5228
                               LOWER_UNPACK_UNORM_2x16 |
5229
                               LOWER_PACK_SNORM_4x8 |
5230
                               LOWER_UNPACK_SNORM_4x8 |
5231
                               LOWER_UNPACK_UNORM_4x8 |
5232
                               LOWER_PACK_UNORM_4x8 |
5233
                               LOWER_PACK_HALF_2x16 |
5234
                               LOWER_UNPACK_HALF_2x16;
5235
 
5236
         lower_packing_builtins(ir, lower_inst);
5237
      }
5238
 
5239
      do_mat_op_to_vec(ir);
5240
      lower_instructions(ir,
5241
                         MOD_TO_FRACT |
5242
                         DIV_TO_MUL_RCP |
5243
                         EXP_TO_EXP2 |
5244
                         LOG_TO_LOG2 |
5245
                         (options->EmitNoPow ? POW_TO_EXP2 : 0) |
5246
                         (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
5247
 
5248
      lower_ubo_reference(prog->_LinkedShaders[i], ir);
5249
      do_vec_index_to_cond_assign(ir);
5250
      lower_vector_insert(ir, true);
5251
      lower_quadop_vector(ir, false);
5252
      lower_noise(ir);
5253
      if (options->MaxIfDepth == 0) {
5254
         lower_discard(ir);
5255
      }
5256
 
5257
      do {
5258
         progress = false;
5259
 
5260
         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
5261
 
5262
         progress = do_common_optimization(ir, true, true,
5263
					   options->MaxUnrollIterations, options)
5264
	   || progress;
5265
 
5266
         progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
5267
 
5268
      } while (progress);
5269
 
5270
      validate_ir_tree(ir);
5271
   }
5272
 
5273
   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5274
      struct gl_program *linked_prog;
5275
 
5276
      if (prog->_LinkedShaders[i] == NULL)
5277
         continue;
5278
 
5279
      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
5280
 
5281
      if (linked_prog) {
5282
	 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5283
				 linked_prog);
5284
         if (!ctx->Driver.ProgramStringNotify(ctx,
5285
                                              _mesa_program_index_to_target(i),
5286
                                              linked_prog)) {
5287
	    _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
5288
				    NULL);
5289
            _mesa_reference_program(ctx, &linked_prog, NULL);
5290
            return GL_FALSE;
5291
         }
5292
      }
5293
 
5294
      _mesa_reference_program(ctx, &linked_prog, NULL);
5295
   }
5296
 
5297
   return GL_TRUE;
5298
}
5299
 
5300
void
5301
st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
5302
                                const GLuint outputMapping[],
5303
                                struct pipe_stream_output_info *so)
5304
{
5305
   unsigned i;
5306
   struct gl_transform_feedback_info *info =
5307
      &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
5308
 
5309
   for (i = 0; i < info->NumOutputs; i++) {
5310
      so->output[i].register_index =
5311
         outputMapping[info->Outputs[i].OutputRegister];
5312
      so->output[i].start_component = info->Outputs[i].ComponentOffset;
5313
      so->output[i].num_components = info->Outputs[i].NumComponents;
5314
      so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
5315
      so->output[i].dst_offset = info->Outputs[i].DstOffset;
5316
   }
5317
 
5318
   for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
5319
      so->stride[i] = info->BufferStride[i];
5320
   }
5321
   so->num_outputs = info->NumOutputs;
5322
}
5323
 
5324
} /* extern "C" */