Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3770 Serge 1
/**************************************************************************
2
 *
3
 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4
 * All Rights Reserved.
5
 * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the
9
 * "Software"), to deal in the Software without restriction, including
10
 * without limitation the rights to use, copy, modify, merge, publish,
11
 * distribute, sub license, and/or sell copies of the Software, and to
12
 * permit persons to whom the Software is furnished to do so, subject to
13
 * the following conditions:
14
 *
15
 * The above copyright notice and this permission notice (including the
16
 * next paragraph) shall be included in all copies or substantial portions
17
 * of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 *
27
 **************************************************************************/
28
 
29
/**
30
 * TGSI interpreter/executor.
31
 *
32
 * Flow control information:
33
 *
34
 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35
 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36
 * care since a condition may be true for some quad components but false
37
 * for other components.
38
 *
39
 * We basically execute all statements (even if they're in the part of
40
 * an IF/ELSE clause that's "not taken") and use a special mask to
41
 * control writing to destination registers.  This is the ExecMask.
42
 * See store_dest().
43
 *
44
 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45
 * ContMask) which are controlled by the flow control instructions (namely:
46
 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47
 *
48
 *
49
 * Authors:
50
 *   Michal Krol
51
 *   Brian Paul
52
 */
53
 
54
#include "pipe/p_compiler.h"
55
#include "pipe/p_state.h"
56
#include "pipe/p_shader_tokens.h"
57
#include "tgsi/tgsi_dump.h"
58
#include "tgsi/tgsi_parse.h"
59
#include "tgsi/tgsi_util.h"
60
#include "tgsi_exec.h"
61
#include "util/u_memory.h"
62
#include "util/u_math.h"
63
 
64
 
65
#define DEBUG_EXECUTION 0
66
 
67
 
68
#define FAST_MATH 0
69
 
70
#define TILE_TOP_LEFT     0
71
#define TILE_TOP_RIGHT    1
72
#define TILE_BOTTOM_LEFT  2
73
#define TILE_BOTTOM_RIGHT 3
74
 
75
static void
76
micro_abs(union tgsi_exec_channel *dst,
77
          const union tgsi_exec_channel *src)
78
{
79
   dst->f[0] = fabsf(src->f[0]);
80
   dst->f[1] = fabsf(src->f[1]);
81
   dst->f[2] = fabsf(src->f[2]);
82
   dst->f[3] = fabsf(src->f[3]);
83
}
84
 
85
static void
86
micro_arl(union tgsi_exec_channel *dst,
87
          const union tgsi_exec_channel *src)
88
{
89
   dst->i[0] = (int)floorf(src->f[0]);
90
   dst->i[1] = (int)floorf(src->f[1]);
91
   dst->i[2] = (int)floorf(src->f[2]);
92
   dst->i[3] = (int)floorf(src->f[3]);
93
}
94
 
95
static void
96
micro_arr(union tgsi_exec_channel *dst,
97
          const union tgsi_exec_channel *src)
98
{
99
   dst->i[0] = (int)floorf(src->f[0] + 0.5f);
100
   dst->i[1] = (int)floorf(src->f[1] + 0.5f);
101
   dst->i[2] = (int)floorf(src->f[2] + 0.5f);
102
   dst->i[3] = (int)floorf(src->f[3] + 0.5f);
103
}
104
 
105
static void
106
micro_ceil(union tgsi_exec_channel *dst,
107
           const union tgsi_exec_channel *src)
108
{
109
   dst->f[0] = ceilf(src->f[0]);
110
   dst->f[1] = ceilf(src->f[1]);
111
   dst->f[2] = ceilf(src->f[2]);
112
   dst->f[3] = ceilf(src->f[3]);
113
}
114
 
115
static void
116
micro_clamp(union tgsi_exec_channel *dst,
117
            const union tgsi_exec_channel *src0,
118
            const union tgsi_exec_channel *src1,
119
            const union tgsi_exec_channel *src2)
120
{
121
   dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
122
   dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
123
   dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
124
   dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
125
}
126
 
127
static void
128
micro_cmp(union tgsi_exec_channel *dst,
129
          const union tgsi_exec_channel *src0,
130
          const union tgsi_exec_channel *src1,
131
          const union tgsi_exec_channel *src2)
132
{
133
   dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
134
   dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
135
   dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
136
   dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
137
}
138
 
139
static void
140
micro_cnd(union tgsi_exec_channel *dst,
141
          const union tgsi_exec_channel *src0,
142
          const union tgsi_exec_channel *src1,
143
          const union tgsi_exec_channel *src2)
144
{
145
   dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
146
   dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
147
   dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
148
   dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
149
}
150
 
151
static void
152
micro_cos(union tgsi_exec_channel *dst,
153
          const union tgsi_exec_channel *src)
154
{
155
   dst->f[0] = cosf(src->f[0]);
156
   dst->f[1] = cosf(src->f[1]);
157
   dst->f[2] = cosf(src->f[2]);
158
   dst->f[3] = cosf(src->f[3]);
159
}
160
 
161
static void
162
micro_ddx(union tgsi_exec_channel *dst,
163
          const union tgsi_exec_channel *src)
164
{
165
   dst->f[0] =
166
   dst->f[1] =
167
   dst->f[2] =
168
   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
169
}
170
 
171
static void
172
micro_ddy(union tgsi_exec_channel *dst,
173
          const union tgsi_exec_channel *src)
174
{
175
   dst->f[0] =
176
   dst->f[1] =
177
   dst->f[2] =
178
   dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
179
}
180
 
181
static void
182
micro_exp2(union tgsi_exec_channel *dst,
183
           const union tgsi_exec_channel *src)
184
{
185
#if FAST_MATH
186
   dst->f[0] = util_fast_exp2(src->f[0]);
187
   dst->f[1] = util_fast_exp2(src->f[1]);
188
   dst->f[2] = util_fast_exp2(src->f[2]);
189
   dst->f[3] = util_fast_exp2(src->f[3]);
190
#else
191
#if DEBUG
192
   /* Inf is okay for this instruction, so clamp it to silence assertions. */
193
   uint i;
194
   union tgsi_exec_channel clamped;
195
 
196
   for (i = 0; i < 4; i++) {
197
      if (src->f[i] > 127.99999f) {
198
         clamped.f[i] = 127.99999f;
199
      } else if (src->f[i] < -126.99999f) {
200
         clamped.f[i] = -126.99999f;
201
      } else {
202
         clamped.f[i] = src->f[i];
203
      }
204
   }
205
   src = &clamped;
206
#endif /* DEBUG */
207
 
208
   dst->f[0] = powf(2.0f, src->f[0]);
209
   dst->f[1] = powf(2.0f, src->f[1]);
210
   dst->f[2] = powf(2.0f, src->f[2]);
211
   dst->f[3] = powf(2.0f, src->f[3]);
212
#endif /* FAST_MATH */
213
}
214
 
215
static void
216
micro_flr(union tgsi_exec_channel *dst,
217
          const union tgsi_exec_channel *src)
218
{
219
   dst->f[0] = floorf(src->f[0]);
220
   dst->f[1] = floorf(src->f[1]);
221
   dst->f[2] = floorf(src->f[2]);
222
   dst->f[3] = floorf(src->f[3]);
223
}
224
 
225
static void
226
micro_frc(union tgsi_exec_channel *dst,
227
          const union tgsi_exec_channel *src)
228
{
229
   dst->f[0] = src->f[0] - floorf(src->f[0]);
230
   dst->f[1] = src->f[1] - floorf(src->f[1]);
231
   dst->f[2] = src->f[2] - floorf(src->f[2]);
232
   dst->f[3] = src->f[3] - floorf(src->f[3]);
233
}
234
 
235
static void
236
micro_iabs(union tgsi_exec_channel *dst,
237
           const union tgsi_exec_channel *src)
238
{
239
   dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
240
   dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
241
   dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
242
   dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
243
}
244
 
245
static void
246
micro_ineg(union tgsi_exec_channel *dst,
247
           const union tgsi_exec_channel *src)
248
{
249
   dst->i[0] = -src->i[0];
250
   dst->i[1] = -src->i[1];
251
   dst->i[2] = -src->i[2];
252
   dst->i[3] = -src->i[3];
253
}
254
 
255
static void
256
micro_lg2(union tgsi_exec_channel *dst,
257
          const union tgsi_exec_channel *src)
258
{
259
#if FAST_MATH
260
   dst->f[0] = util_fast_log2(src->f[0]);
261
   dst->f[1] = util_fast_log2(src->f[1]);
262
   dst->f[2] = util_fast_log2(src->f[2]);
263
   dst->f[3] = util_fast_log2(src->f[3]);
264
#else
265
   dst->f[0] = logf(src->f[0]) * 1.442695f;
266
   dst->f[1] = logf(src->f[1]) * 1.442695f;
267
   dst->f[2] = logf(src->f[2]) * 1.442695f;
268
   dst->f[3] = logf(src->f[3]) * 1.442695f;
269
#endif
270
}
271
 
272
static void
273
micro_lrp(union tgsi_exec_channel *dst,
274
          const union tgsi_exec_channel *src0,
275
          const union tgsi_exec_channel *src1,
276
          const union tgsi_exec_channel *src2)
277
{
278
   dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
279
   dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
280
   dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
281
   dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
282
}
283
 
284
static void
285
micro_mad(union tgsi_exec_channel *dst,
286
          const union tgsi_exec_channel *src0,
287
          const union tgsi_exec_channel *src1,
288
          const union tgsi_exec_channel *src2)
289
{
290
   dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
291
   dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
292
   dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
293
   dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
294
}
295
 
296
static void
297
micro_mov(union tgsi_exec_channel *dst,
298
          const union tgsi_exec_channel *src)
299
{
300
   dst->u[0] = src->u[0];
301
   dst->u[1] = src->u[1];
302
   dst->u[2] = src->u[2];
303
   dst->u[3] = src->u[3];
304
}
305
 
306
static void
307
micro_rcp(union tgsi_exec_channel *dst,
308
          const union tgsi_exec_channel *src)
309
{
310
#if 0 /* for debugging */
311
   assert(src->f[0] != 0.0f);
312
   assert(src->f[1] != 0.0f);
313
   assert(src->f[2] != 0.0f);
314
   assert(src->f[3] != 0.0f);
315
#endif
316
   dst->f[0] = 1.0f / src->f[0];
317
   dst->f[1] = 1.0f / src->f[1];
318
   dst->f[2] = 1.0f / src->f[2];
319
   dst->f[3] = 1.0f / src->f[3];
320
}
321
 
322
static void
323
micro_rnd(union tgsi_exec_channel *dst,
324
          const union tgsi_exec_channel *src)
325
{
326
   dst->f[0] = floorf(src->f[0] + 0.5f);
327
   dst->f[1] = floorf(src->f[1] + 0.5f);
328
   dst->f[2] = floorf(src->f[2] + 0.5f);
329
   dst->f[3] = floorf(src->f[3] + 0.5f);
330
}
331
 
332
static void
333
micro_rsq(union tgsi_exec_channel *dst,
334
          const union tgsi_exec_channel *src)
335
{
336
#if 0 /* for debugging */
337
   assert(src->f[0] != 0.0f);
338
   assert(src->f[1] != 0.0f);
339
   assert(src->f[2] != 0.0f);
340
   assert(src->f[3] != 0.0f);
341
#endif
342
   dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
343
   dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
344
   dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
345
   dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
346
}
347
 
348
static void
349
micro_sqrt(union tgsi_exec_channel *dst,
350
           const union tgsi_exec_channel *src)
351
{
352
   dst->f[0] = sqrtf(fabsf(src->f[0]));
353
   dst->f[1] = sqrtf(fabsf(src->f[1]));
354
   dst->f[2] = sqrtf(fabsf(src->f[2]));
355
   dst->f[3] = sqrtf(fabsf(src->f[3]));
356
}
357
 
358
static void
359
micro_seq(union tgsi_exec_channel *dst,
360
          const union tgsi_exec_channel *src0,
361
          const union tgsi_exec_channel *src1)
362
{
363
   dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
364
   dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
365
   dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
366
   dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
367
}
368
 
369
static void
370
micro_sge(union tgsi_exec_channel *dst,
371
          const union tgsi_exec_channel *src0,
372
          const union tgsi_exec_channel *src1)
373
{
374
   dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
375
   dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
376
   dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
377
   dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
378
}
379
 
380
static void
381
micro_sgn(union tgsi_exec_channel *dst,
382
          const union tgsi_exec_channel *src)
383
{
384
   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
385
   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
386
   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
387
   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
388
}
389
 
390
static void
391
micro_isgn(union tgsi_exec_channel *dst,
392
          const union tgsi_exec_channel *src)
393
{
394
   dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
395
   dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
396
   dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
397
   dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
398
}
399
 
400
static void
401
micro_sgt(union tgsi_exec_channel *dst,
402
          const union tgsi_exec_channel *src0,
403
          const union tgsi_exec_channel *src1)
404
{
405
   dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
406
   dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
407
   dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
408
   dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
409
}
410
 
411
static void
412
micro_sin(union tgsi_exec_channel *dst,
413
          const union tgsi_exec_channel *src)
414
{
415
   dst->f[0] = sinf(src->f[0]);
416
   dst->f[1] = sinf(src->f[1]);
417
   dst->f[2] = sinf(src->f[2]);
418
   dst->f[3] = sinf(src->f[3]);
419
}
420
 
421
static void
422
micro_sle(union tgsi_exec_channel *dst,
423
          const union tgsi_exec_channel *src0,
424
          const union tgsi_exec_channel *src1)
425
{
426
   dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
427
   dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
428
   dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
429
   dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
430
}
431
 
432
static void
433
micro_slt(union tgsi_exec_channel *dst,
434
          const union tgsi_exec_channel *src0,
435
          const union tgsi_exec_channel *src1)
436
{
437
   dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
438
   dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
439
   dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
440
   dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
441
}
442
 
443
static void
444
micro_sne(union tgsi_exec_channel *dst,
445
          const union tgsi_exec_channel *src0,
446
          const union tgsi_exec_channel *src1)
447
{
448
   dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
449
   dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
450
   dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
451
   dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
452
}
453
 
454
static void
455
micro_sfl(union tgsi_exec_channel *dst)
456
{
457
   dst->f[0] = 0.0f;
458
   dst->f[1] = 0.0f;
459
   dst->f[2] = 0.0f;
460
   dst->f[3] = 0.0f;
461
}
462
 
463
static void
464
micro_str(union tgsi_exec_channel *dst)
465
{
466
   dst->f[0] = 1.0f;
467
   dst->f[1] = 1.0f;
468
   dst->f[2] = 1.0f;
469
   dst->f[3] = 1.0f;
470
}
471
 
472
static void
473
micro_trunc(union tgsi_exec_channel *dst,
474
            const union tgsi_exec_channel *src)
475
{
476
   dst->f[0] = (float)(int)src->f[0];
477
   dst->f[1] = (float)(int)src->f[1];
478
   dst->f[2] = (float)(int)src->f[2];
479
   dst->f[3] = (float)(int)src->f[3];
480
}
481
 
482
 
483
enum tgsi_exec_datatype {
484
   TGSI_EXEC_DATA_FLOAT,
485
   TGSI_EXEC_DATA_INT,
486
   TGSI_EXEC_DATA_UINT
487
};
488
 
489
/*
490
 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
491
 */
492
#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
493
#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
494
#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
495
#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
496
#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
497
#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
498
 
499
 
500
/** The execution mask depends on the conditional mask and the loop mask */
501
#define UPDATE_EXEC_MASK(MACH) \
502
      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
503
 
504
 
505
static const union tgsi_exec_channel ZeroVec =
506
   { { 0.0, 0.0, 0.0, 0.0 } };
507
 
508
static const union tgsi_exec_channel OneVec = {
509
   {1.0f, 1.0f, 1.0f, 1.0f}
510
};
511
 
512
static const union tgsi_exec_channel P128Vec = {
513
   {128.0f, 128.0f, 128.0f, 128.0f}
514
};
515
 
516
static const union tgsi_exec_channel M128Vec = {
517
   {-128.0f, -128.0f, -128.0f, -128.0f}
518
};
519
 
520
 
521
/**
522
 * Assert that none of the float values in 'chan' are infinite or NaN.
523
 * NaN and Inf may occur normally during program execution and should
524
 * not lead to crashes, etc.  But when debugging, it's helpful to catch
525
 * them.
526
 */
527
static INLINE void
528
check_inf_or_nan(const union tgsi_exec_channel *chan)
529
{
530
   assert(!util_is_inf_or_nan((chan)->f[0]));
531
   assert(!util_is_inf_or_nan((chan)->f[1]));
532
   assert(!util_is_inf_or_nan((chan)->f[2]));
533
   assert(!util_is_inf_or_nan((chan)->f[3]));
534
}
535
 
536
 
537
#ifdef DEBUG
538
static void
539
print_chan(const char *msg, const union tgsi_exec_channel *chan)
540
{
541
   debug_printf("%s = {%f, %f, %f, %f}\n",
542
                msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
543
}
544
#endif
545
 
546
 
547
#ifdef DEBUG
548
static void
549
print_temp(const struct tgsi_exec_machine *mach, uint index)
550
{
551
   const struct tgsi_exec_vector *tmp = &mach->Temps[index];
552
   int i;
553
   debug_printf("Temp[%u] =\n", index);
554
   for (i = 0; i < 4; i++) {
555
      debug_printf("  %c: { %f, %f, %f, %f }\n",
556
                   "XYZW"[i],
557
                   tmp->xyzw[i].f[0],
558
                   tmp->xyzw[i].f[1],
559
                   tmp->xyzw[i].f[2],
560
                   tmp->xyzw[i].f[3]);
561
   }
562
}
563
#endif
564
 
565
 
566
void
567
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
568
                               unsigned num_bufs,
569
                               const void **bufs,
570
                               const unsigned *buf_sizes)
571
{
572
   unsigned i;
573
 
574
   for (i = 0; i < num_bufs; i++) {
575
      mach->Consts[i] = bufs[i];
576
      mach->ConstsSize[i] = buf_sizes[i];
577
   }
578
}
579
 
580
 
581
/**
582
 * Check if there's a potential src/dst register data dependency when
583
 * using SOA execution.
584
 * Example:
585
 *   MOV T, T.yxwz;
586
 * This would expand into:
587
 *   MOV t0, t1;
588
 *   MOV t1, t0;
589
 *   MOV t2, t3;
590
 *   MOV t3, t2;
591
 * The second instruction will have the wrong value for t0 if executed as-is.
592
 */
593
boolean
594
tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
595
{
596
   uint i, chan;
597
 
598
   uint writemask = inst->Dst[0].Register.WriteMask;
599
   if (writemask == TGSI_WRITEMASK_X ||
600
       writemask == TGSI_WRITEMASK_Y ||
601
       writemask == TGSI_WRITEMASK_Z ||
602
       writemask == TGSI_WRITEMASK_W ||
603
       writemask == TGSI_WRITEMASK_NONE) {
604
      /* no chance of data dependency */
605
      return FALSE;
606
   }
607
 
608
   /* loop over src regs */
609
   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
610
      if ((inst->Src[i].Register.File ==
611
           inst->Dst[0].Register.File) &&
612
          ((inst->Src[i].Register.Index ==
613
            inst->Dst[0].Register.Index) ||
614
           inst->Src[i].Register.Indirect ||
615
           inst->Dst[0].Register.Indirect)) {
616
         /* loop over dest channels */
617
         uint channelsWritten = 0x0;
618
         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
619
            if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
620
               /* check if we're reading a channel that's been written */
621
               uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
622
               if (channelsWritten & (1 << swizzle)) {
623
                  return TRUE;
624
               }
625
 
626
               channelsWritten |= (1 << chan);
627
            }
628
         }
629
      }
630
   }
631
   return FALSE;
632
}
633
 
634
 
635
/**
636
 * Initialize machine state by expanding tokens to full instructions,
637
 * allocating temporary storage, setting up constants, etc.
638
 * After this, we can call tgsi_exec_machine_run() many times.
639
 */
640
void
641
tgsi_exec_machine_bind_shader(
642
   struct tgsi_exec_machine *mach,
643
   const struct tgsi_token *tokens,
644
   struct tgsi_sampler *sampler)
645
{
646
   uint k;
647
   struct tgsi_parse_context parse;
648
   struct tgsi_full_instruction *instructions;
649
   struct tgsi_full_declaration *declarations;
650
   uint maxInstructions = 10, numInstructions = 0;
651
   uint maxDeclarations = 10, numDeclarations = 0;
652
 
653
#if 0
654
   tgsi_dump(tokens, 0);
655
#endif
656
 
657
   util_init_math();
658
 
659
 
660
   mach->Tokens = tokens;
661
   mach->Sampler = sampler;
662
 
663
   if (!tokens) {
664
      /* unbind and free all */
665
      FREE(mach->Declarations);
666
      mach->Declarations = NULL;
667
      mach->NumDeclarations = 0;
668
 
669
      FREE(mach->Instructions);
670
      mach->Instructions = NULL;
671
      mach->NumInstructions = 0;
672
 
673
      return;
674
   }
675
 
676
   k = tgsi_parse_init (&parse, mach->Tokens);
677
   if (k != TGSI_PARSE_OK) {
678
      debug_printf( "Problem parsing!\n" );
679
      return;
680
   }
681
 
682
   mach->Processor = parse.FullHeader.Processor.Processor;
683
   mach->ImmLimit = 0;
684
   mach->NumOutputs = 0;
685
 
686
   if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
687
       !mach->UsedGeometryShader) {
688
      struct tgsi_exec_vector *inputs;
689
      struct tgsi_exec_vector *outputs;
690
 
691
      inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
692
                            TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS,
693
                            16);
694
 
695
      if (!inputs)
696
         return;
697
 
698
      outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
699
                             TGSI_MAX_TOTAL_VERTICES, 16);
700
 
701
      if (!outputs) {
702
         align_free(inputs);
703
         return;
704
      }
705
 
706
      align_free(mach->Inputs);
707
      align_free(mach->Outputs);
708
 
709
      mach->Inputs = inputs;
710
      mach->Outputs = outputs;
711
      mach->UsedGeometryShader = TRUE;
712
   }
713
 
714
   declarations = (struct tgsi_full_declaration *)
715
      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
716
 
717
   if (!declarations) {
718
      return;
719
   }
720
 
721
   instructions = (struct tgsi_full_instruction *)
722
      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
723
 
724
   if (!instructions) {
725
      FREE( declarations );
726
      return;
727
   }
728
 
729
   while( !tgsi_parse_end_of_tokens( &parse ) ) {
730
      uint i;
731
 
732
      tgsi_parse_token( &parse );
733
      switch( parse.FullToken.Token.Type ) {
734
      case TGSI_TOKEN_TYPE_DECLARATION:
735
         /* save expanded declaration */
736
         if (numDeclarations == maxDeclarations) {
737
            declarations = REALLOC(declarations,
738
                                   maxDeclarations
739
                                   * sizeof(struct tgsi_full_declaration),
740
                                   (maxDeclarations + 10)
741
                                   * sizeof(struct tgsi_full_declaration));
742
            maxDeclarations += 10;
743
         }
744
         if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
745
            unsigned reg;
746
            for (reg = parse.FullToken.FullDeclaration.Range.First;
747
                 reg <= parse.FullToken.FullDeclaration.Range.Last;
748
                 ++reg) {
749
               ++mach->NumOutputs;
750
            }
751
         }
752
         memcpy(declarations + numDeclarations,
753
                &parse.FullToken.FullDeclaration,
754
                sizeof(declarations[0]));
755
         numDeclarations++;
756
         break;
757
 
758
      case TGSI_TOKEN_TYPE_IMMEDIATE:
759
         {
760
            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
761
            assert( size <= 4 );
762
            assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
763
 
764
            for( i = 0; i < size; i++ ) {
765
               mach->Imms[mach->ImmLimit][i] =
766
		  parse.FullToken.FullImmediate.u[i].Float;
767
            }
768
            mach->ImmLimit += 1;
769
         }
770
         break;
771
 
772
      case TGSI_TOKEN_TYPE_INSTRUCTION:
773
 
774
         /* save expanded instruction */
775
         if (numInstructions == maxInstructions) {
776
            instructions = REALLOC(instructions,
777
                                   maxInstructions
778
                                   * sizeof(struct tgsi_full_instruction),
779
                                   (maxInstructions + 10)
780
                                   * sizeof(struct tgsi_full_instruction));
781
            maxInstructions += 10;
782
         }
783
 
784
         memcpy(instructions + numInstructions,
785
                &parse.FullToken.FullInstruction,
786
                sizeof(instructions[0]));
787
 
788
         numInstructions++;
789
         break;
790
 
791
      case TGSI_TOKEN_TYPE_PROPERTY:
792
         break;
793
 
794
      default:
795
         assert( 0 );
796
      }
797
   }
798
   tgsi_parse_free (&parse);
799
 
800
   FREE(mach->Declarations);
801
   mach->Declarations = declarations;
802
   mach->NumDeclarations = numDeclarations;
803
 
804
   FREE(mach->Instructions);
805
   mach->Instructions = instructions;
806
   mach->NumInstructions = numInstructions;
807
}
808
 
809
 
810
struct tgsi_exec_machine *
811
tgsi_exec_machine_create( void )
812
{
813
   struct tgsi_exec_machine *mach;
814
   uint i;
815
 
816
   mach = align_malloc( sizeof *mach, 16 );
817
   if (!mach)
818
      goto fail;
819
 
820
   memset(mach, 0, sizeof(*mach));
821
 
822
   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
823
   mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
824
   mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
825
 
826
   mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
827
   mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
828
   if (!mach->Inputs || !mach->Outputs)
829
      goto fail;
830
 
831
   /* Setup constants needed by the SSE2 executor. */
832
   for( i = 0; i < 4; i++ ) {
833
      mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
834
      mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
835
      mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
836
      mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF;    /* not used */
837
      mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
838
      mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f;    /* not used */
839
      mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
840
      mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
841
      mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
842
      mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
843
   }
844
 
845
#ifdef DEBUG
846
   /* silence warnings */
847
   (void) print_chan;
848
   (void) print_temp;
849
#endif
850
 
851
   return mach;
852
 
853
fail:
854
   if (mach) {
855
      align_free(mach->Inputs);
856
      align_free(mach->Outputs);
857
      align_free(mach);
858
   }
859
   return NULL;
860
}
861
 
862
 
863
void
864
tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
865
{
866
   if (mach) {
867
      FREE(mach->Instructions);
868
      FREE(mach->Declarations);
869
 
870
      align_free(mach->Inputs);
871
      align_free(mach->Outputs);
872
 
873
      align_free(mach);
874
   }
875
}
876
 
877
static void
878
micro_add(union tgsi_exec_channel *dst,
879
          const union tgsi_exec_channel *src0,
880
          const union tgsi_exec_channel *src1)
881
{
882
   dst->f[0] = src0->f[0] + src1->f[0];
883
   dst->f[1] = src0->f[1] + src1->f[1];
884
   dst->f[2] = src0->f[2] + src1->f[2];
885
   dst->f[3] = src0->f[3] + src1->f[3];
886
}
887
 
888
static void
889
micro_div(
890
   union tgsi_exec_channel *dst,
891
   const union tgsi_exec_channel *src0,
892
   const union tgsi_exec_channel *src1 )
893
{
894
   if (src1->f[0] != 0) {
895
      dst->f[0] = src0->f[0] / src1->f[0];
896
   }
897
   if (src1->f[1] != 0) {
898
      dst->f[1] = src0->f[1] / src1->f[1];
899
   }
900
   if (src1->f[2] != 0) {
901
      dst->f[2] = src0->f[2] / src1->f[2];
902
   }
903
   if (src1->f[3] != 0) {
904
      dst->f[3] = src0->f[3] / src1->f[3];
905
   }
906
}
907
 
908
static void
909
micro_rcc(union tgsi_exec_channel *dst,
910
          const union tgsi_exec_channel *src)
911
{
912
   uint i;
913
 
914
   for (i = 0; i < 4; i++) {
915
      float recip = 1.0f / src->f[i];
916
 
917
      if (recip > 0.0f) {
918
         if (recip > 1.884467e+019f) {
919
            dst->f[i] = 1.884467e+019f;
920
         }
921
         else if (recip < 5.42101e-020f) {
922
            dst->f[i] = 5.42101e-020f;
923
         }
924
         else {
925
            dst->f[i] = recip;
926
         }
927
      }
928
      else {
929
         if (recip < -1.884467e+019f) {
930
            dst->f[i] = -1.884467e+019f;
931
         }
932
         else if (recip > -5.42101e-020f) {
933
            dst->f[i] = -5.42101e-020f;
934
         }
935
         else {
936
            dst->f[i] = recip;
937
         }
938
      }
939
   }
940
}
941
 
942
static void
943
micro_lt(
944
   union tgsi_exec_channel *dst,
945
   const union tgsi_exec_channel *src0,
946
   const union tgsi_exec_channel *src1,
947
   const union tgsi_exec_channel *src2,
948
   const union tgsi_exec_channel *src3 )
949
{
950
   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
951
   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
952
   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
953
   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
954
}
955
 
956
static void
957
micro_max(union tgsi_exec_channel *dst,
958
          const union tgsi_exec_channel *src0,
959
          const union tgsi_exec_channel *src1)
960
{
961
   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
962
   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
963
   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
964
   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
965
}
966
 
967
static void
968
micro_min(union tgsi_exec_channel *dst,
969
          const union tgsi_exec_channel *src0,
970
          const union tgsi_exec_channel *src1)
971
{
972
   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
973
   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
974
   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
975
   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
976
}
977
 
978
static void
979
micro_mul(union tgsi_exec_channel *dst,
980
          const union tgsi_exec_channel *src0,
981
          const union tgsi_exec_channel *src1)
982
{
983
   dst->f[0] = src0->f[0] * src1->f[0];
984
   dst->f[1] = src0->f[1] * src1->f[1];
985
   dst->f[2] = src0->f[2] * src1->f[2];
986
   dst->f[3] = src0->f[3] * src1->f[3];
987
}
988
 
989
static void
990
micro_neg(
991
   union tgsi_exec_channel *dst,
992
   const union tgsi_exec_channel *src )
993
{
994
   dst->f[0] = -src->f[0];
995
   dst->f[1] = -src->f[1];
996
   dst->f[2] = -src->f[2];
997
   dst->f[3] = -src->f[3];
998
}
999
 
1000
static void
1001
micro_pow(
1002
   union tgsi_exec_channel *dst,
1003
   const union tgsi_exec_channel *src0,
1004
   const union tgsi_exec_channel *src1 )
1005
{
1006
#if FAST_MATH
1007
   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1008
   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1009
   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1010
   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1011
#else
1012
   dst->f[0] = powf( src0->f[0], src1->f[0] );
1013
   dst->f[1] = powf( src0->f[1], src1->f[1] );
1014
   dst->f[2] = powf( src0->f[2], src1->f[2] );
1015
   dst->f[3] = powf( src0->f[3], src1->f[3] );
1016
#endif
1017
}
1018
 
1019
static void
1020
micro_sub(union tgsi_exec_channel *dst,
1021
          const union tgsi_exec_channel *src0,
1022
          const union tgsi_exec_channel *src1)
1023
{
1024
   dst->f[0] = src0->f[0] - src1->f[0];
1025
   dst->f[1] = src0->f[1] - src1->f[1];
1026
   dst->f[2] = src0->f[2] - src1->f[2];
1027
   dst->f[3] = src0->f[3] - src1->f[3];
1028
}
1029
 
1030
static void
1031
fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1032
                       const uint chan_index,
1033
                       const uint file,
1034
                       const uint swizzle,
1035
                       const union tgsi_exec_channel *index,
1036
                       const union tgsi_exec_channel *index2D,
1037
                       union tgsi_exec_channel *chan)
1038
{
1039
   uint i;
1040
 
1041
   assert(swizzle < 4);
1042
 
1043
   switch (file) {
1044
   case TGSI_FILE_CONSTANT:
1045
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1046
         assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1047
         assert(mach->Consts[index2D->i[i]]);
1048
 
1049
         if (index->i[i] < 0) {
1050
            chan->u[i] = 0;
1051
         } else {
1052
            /* NOTE: copying the const value as a uint instead of float */
1053
            const uint constbuf = index2D->i[i];
1054
            const uint *buf = (const uint *)mach->Consts[constbuf];
1055
            const int pos = index->i[i] * 4 + swizzle;
1056
            /* const buffer bounds check */
1057
            if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1058
               if (0) {
1059
                  /* Debug: print warning */
1060
                  static int count = 0;
1061
                  if (count++ < 100)
1062
                     debug_printf("TGSI Exec: const buffer index %d"
1063
                                  " out of bounds\n", pos);
1064
               }
1065
               chan->u[i] = 0;
1066
            }
1067
            else
1068
               chan->u[i] = buf[pos];
1069
         }
1070
      }
1071
      break;
1072
 
1073
   case TGSI_FILE_INPUT:
1074
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1075
         /*
1076
         if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1077
            debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1078
                         index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1079
                         index2D->i[i], index->i[i]);
1080
                         }*/
1081
         int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1082
         assert(pos >= 0);
1083
         assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1084
         chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1085
      }
1086
      break;
1087
 
1088
   case TGSI_FILE_SYSTEM_VALUE:
1089
      /* XXX no swizzling at this point.  Will be needed if we put
1090
       * gl_FragCoord, for example, in a sys value register.
1091
       */
1092
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1093
         chan->u[i] = mach->SystemValue[index->i[i]].u[i];
1094
      }
1095
      break;
1096
 
1097
   case TGSI_FILE_TEMPORARY:
1098
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1099
         assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1100
         assert(index2D->i[i] == 0);
1101
 
1102
         chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1103
      }
1104
      break;
1105
 
1106
   case TGSI_FILE_IMMEDIATE:
1107
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1108
         assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1109
         assert(index2D->i[i] == 0);
1110
 
1111
         chan->f[i] = mach->Imms[index->i[i]][swizzle];
1112
      }
1113
      break;
1114
 
1115
   case TGSI_FILE_ADDRESS:
1116
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1117
         assert(index->i[i] >= 0);
1118
         assert(index2D->i[i] == 0);
1119
 
1120
         chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1121
      }
1122
      break;
1123
 
1124
   case TGSI_FILE_PREDICATE:
1125
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1126
         assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1127
         assert(index2D->i[i] == 0);
1128
 
1129
         chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1130
      }
1131
      break;
1132
 
1133
   case TGSI_FILE_OUTPUT:
1134
      /* vertex/fragment output vars can be read too */
1135
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1136
         assert(index->i[i] >= 0);
1137
         assert(index2D->i[i] == 0);
1138
 
1139
         chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1140
      }
1141
      break;
1142
 
1143
   default:
1144
      assert(0);
1145
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1146
         chan->u[i] = 0;
1147
      }
1148
   }
1149
}
1150
 
1151
static void
1152
fetch_source(const struct tgsi_exec_machine *mach,
1153
             union tgsi_exec_channel *chan,
1154
             const struct tgsi_full_src_register *reg,
1155
             const uint chan_index,
1156
             enum tgsi_exec_datatype src_datatype)
1157
{
1158
   union tgsi_exec_channel index;
1159
   union tgsi_exec_channel index2D;
1160
   uint swizzle;
1161
 
1162
   /* We start with a direct index into a register file.
1163
    *
1164
    *    file[1],
1165
    *    where:
1166
    *       file = Register.File
1167
    *       [1] = Register.Index
1168
    */
1169
   index.i[0] =
1170
   index.i[1] =
1171
   index.i[2] =
1172
   index.i[3] = reg->Register.Index;
1173
 
1174
   /* There is an extra source register that indirectly subscripts
1175
    * a register file. The direct index now becomes an offset
1176
    * that is being added to the indirect register.
1177
    *
1178
    *    file[ind[2].x+1],
1179
    *    where:
1180
    *       ind = Indirect.File
1181
    *       [2] = Indirect.Index
1182
    *       .x = Indirect.SwizzleX
1183
    */
1184
   if (reg->Register.Indirect) {
1185
      union tgsi_exec_channel index2;
1186
      union tgsi_exec_channel indir_index;
1187
      const uint execmask = mach->ExecMask;
1188
      uint i;
1189
 
1190
      /* which address register (always zero now) */
1191
      index2.i[0] =
1192
      index2.i[1] =
1193
      index2.i[2] =
1194
      index2.i[3] = reg->Indirect.Index;
1195
      /* get current value of address register[swizzle] */
1196
      swizzle = reg->Indirect.Swizzle;
1197
      fetch_src_file_channel(mach,
1198
                             chan_index,
1199
                             reg->Indirect.File,
1200
                             swizzle,
1201
                             &index2,
1202
                             &ZeroVec,
1203
                             &indir_index);
1204
 
1205
      /* add value of address register to the offset */
1206
      index.i[0] += indir_index.i[0];
1207
      index.i[1] += indir_index.i[1];
1208
      index.i[2] += indir_index.i[2];
1209
      index.i[3] += indir_index.i[3];
1210
 
1211
      /* for disabled execution channels, zero-out the index to
1212
       * avoid using a potential garbage value.
1213
       */
1214
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1215
         if ((execmask & (1 << i)) == 0)
1216
            index.i[i] = 0;
1217
      }
1218
   }
1219
 
1220
   /* There is an extra source register that is a second
1221
    * subscript to a register file. Effectively it means that
1222
    * the register file is actually a 2D array of registers.
1223
    *
1224
    *    file[3][1],
1225
    *    where:
1226
    *       [3] = Dimension.Index
1227
    */
1228
   if (reg->Register.Dimension) {
1229
      index2D.i[0] =
1230
      index2D.i[1] =
1231
      index2D.i[2] =
1232
      index2D.i[3] = reg->Dimension.Index;
1233
 
1234
      /* Again, the second subscript index can be addressed indirectly
1235
       * identically to the first one.
1236
       * Nothing stops us from indirectly addressing the indirect register,
1237
       * but there is no need for that, so we won't exercise it.
1238
       *
1239
       *    file[ind[4].y+3][1],
1240
       *    where:
1241
       *       ind = DimIndirect.File
1242
       *       [4] = DimIndirect.Index
1243
       *       .y = DimIndirect.SwizzleX
1244
       */
1245
      if (reg->Dimension.Indirect) {
1246
         union tgsi_exec_channel index2;
1247
         union tgsi_exec_channel indir_index;
1248
         const uint execmask = mach->ExecMask;
1249
         uint i;
1250
 
1251
         index2.i[0] =
1252
         index2.i[1] =
1253
         index2.i[2] =
1254
         index2.i[3] = reg->DimIndirect.Index;
1255
 
1256
         swizzle = reg->DimIndirect.Swizzle;
1257
         fetch_src_file_channel(mach,
1258
                                chan_index,
1259
                                reg->DimIndirect.File,
1260
                                swizzle,
1261
                                &index2,
1262
                                &ZeroVec,
1263
                                &indir_index);
1264
 
1265
         index2D.i[0] += indir_index.i[0];
1266
         index2D.i[1] += indir_index.i[1];
1267
         index2D.i[2] += indir_index.i[2];
1268
         index2D.i[3] += indir_index.i[3];
1269
 
1270
         /* for disabled execution channels, zero-out the index to
1271
          * avoid using a potential garbage value.
1272
          */
1273
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1274
            if ((execmask & (1 << i)) == 0) {
1275
               index2D.i[i] = 0;
1276
            }
1277
         }
1278
      }
1279
 
1280
      /* If by any chance there was a need for a 3D array of register
1281
       * files, we would have to check whether Dimension is followed
1282
       * by a dimension register and continue the saga.
1283
       */
1284
   } else {
1285
      index2D.i[0] =
1286
      index2D.i[1] =
1287
      index2D.i[2] =
1288
      index2D.i[3] = 0;
1289
   }
1290
 
1291
   swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1292
   fetch_src_file_channel(mach,
1293
                          chan_index,
1294
                          reg->Register.File,
1295
                          swizzle,
1296
                          &index,
1297
                          &index2D,
1298
                          chan);
1299
 
1300
   if (reg->Register.Absolute) {
1301
      if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1302
         micro_abs(chan, chan);
1303
      } else {
1304
         micro_iabs(chan, chan);
1305
      }
1306
   }
1307
 
1308
   if (reg->Register.Negate) {
1309
      if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1310
         micro_neg(chan, chan);
1311
      } else {
1312
         micro_ineg(chan, chan);
1313
      }
1314
   }
1315
}
1316
 
1317
static void
1318
store_dest(struct tgsi_exec_machine *mach,
1319
           const union tgsi_exec_channel *chan,
1320
           const struct tgsi_full_dst_register *reg,
1321
           const struct tgsi_full_instruction *inst,
1322
           uint chan_index,
1323
           enum tgsi_exec_datatype dst_datatype)
1324
{
1325
   uint i;
1326
   union tgsi_exec_channel null;
1327
   union tgsi_exec_channel *dst;
1328
   union tgsi_exec_channel index2D;
1329
   uint execmask = mach->ExecMask;
1330
   int offset = 0;  /* indirection offset */
1331
   int index;
1332
 
1333
   /* for debugging */
1334
   if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1335
      check_inf_or_nan(chan);
1336
   }
1337
 
1338
   /* There is an extra source register that indirectly subscripts
1339
    * a register file. The direct index now becomes an offset
1340
    * that is being added to the indirect register.
1341
    *
1342
    *    file[ind[2].x+1],
1343
    *    where:
1344
    *       ind = Indirect.File
1345
    *       [2] = Indirect.Index
1346
    *       .x = Indirect.SwizzleX
1347
    */
1348
   if (reg->Register.Indirect) {
1349
      union tgsi_exec_channel index;
1350
      union tgsi_exec_channel indir_index;
1351
      uint swizzle;
1352
 
1353
      /* which address register (always zero for now) */
1354
      index.i[0] =
1355
      index.i[1] =
1356
      index.i[2] =
1357
      index.i[3] = reg->Indirect.Index;
1358
 
1359
      /* get current value of address register[swizzle] */
1360
      swizzle = reg->Indirect.Swizzle;
1361
 
1362
      /* fetch values from the address/indirection register */
1363
      fetch_src_file_channel(mach,
1364
                             chan_index,
1365
                             reg->Indirect.File,
1366
                             swizzle,
1367
                             &index,
1368
                             &ZeroVec,
1369
                             &indir_index);
1370
 
1371
      /* save indirection offset */
1372
      offset = indir_index.i[0];
1373
   }
1374
 
1375
   /* There is an extra source register that is a second
1376
    * subscript to a register file. Effectively it means that
1377
    * the register file is actually a 2D array of registers.
1378
    *
1379
    *    file[3][1],
1380
    *    where:
1381
    *       [3] = Dimension.Index
1382
    */
1383
   if (reg->Register.Dimension) {
1384
      index2D.i[0] =
1385
      index2D.i[1] =
1386
      index2D.i[2] =
1387
      index2D.i[3] = reg->Dimension.Index;
1388
 
1389
      /* Again, the second subscript index can be addressed indirectly
1390
       * identically to the first one.
1391
       * Nothing stops us from indirectly addressing the indirect register,
1392
       * but there is no need for that, so we won't exercise it.
1393
       *
1394
       *    file[ind[4].y+3][1],
1395
       *    where:
1396
       *       ind = DimIndirect.File
1397
       *       [4] = DimIndirect.Index
1398
       *       .y = DimIndirect.SwizzleX
1399
       */
1400
      if (reg->Dimension.Indirect) {
1401
         union tgsi_exec_channel index2;
1402
         union tgsi_exec_channel indir_index;
1403
         const uint execmask = mach->ExecMask;
1404
         unsigned swizzle;
1405
         uint i;
1406
 
1407
         index2.i[0] =
1408
         index2.i[1] =
1409
         index2.i[2] =
1410
         index2.i[3] = reg->DimIndirect.Index;
1411
 
1412
         swizzle = reg->DimIndirect.Swizzle;
1413
         fetch_src_file_channel(mach,
1414
                                chan_index,
1415
                                reg->DimIndirect.File,
1416
                                swizzle,
1417
                                &index2,
1418
                                &ZeroVec,
1419
                                &indir_index);
1420
 
1421
         index2D.i[0] += indir_index.i[0];
1422
         index2D.i[1] += indir_index.i[1];
1423
         index2D.i[2] += indir_index.i[2];
1424
         index2D.i[3] += indir_index.i[3];
1425
 
1426
         /* for disabled execution channels, zero-out the index to
1427
          * avoid using a potential garbage value.
1428
          */
1429
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1430
            if ((execmask & (1 << i)) == 0) {
1431
               index2D.i[i] = 0;
1432
            }
1433
         }
1434
      }
1435
 
1436
      /* If by any chance there was a need for a 3D array of register
1437
       * files, we would have to check whether Dimension is followed
1438
       * by a dimension register and continue the saga.
1439
       */
1440
   } else {
1441
      index2D.i[0] =
1442
      index2D.i[1] =
1443
      index2D.i[2] =
1444
      index2D.i[3] = 0;
1445
   }
1446
 
1447
   switch (reg->Register.File) {
1448
   case TGSI_FILE_NULL:
1449
      dst = &null;
1450
      break;
1451
 
1452
   case TGSI_FILE_OUTPUT:
1453
      index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1454
         + reg->Register.Index;
1455
      dst = &mach->Outputs[offset + index].xyzw[chan_index];
1456
#if 0
1457
      debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1458
                   mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1459
                   reg->Register.Index);
1460
      if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1461
         debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1462
         for (i = 0; i < TGSI_QUAD_SIZE; i++)
1463
            if (execmask & (1 << i))
1464
               debug_printf("%f, ", chan->f[i]);
1465
         debug_printf(")\n");
1466
      }
1467
#endif
1468
      break;
1469
 
1470
   case TGSI_FILE_TEMPORARY:
1471
      index = reg->Register.Index;
1472
      assert( index < TGSI_EXEC_NUM_TEMPS );
1473
      dst = &mach->Temps[offset + index].xyzw[chan_index];
1474
      break;
1475
 
1476
   case TGSI_FILE_ADDRESS:
1477
      index = reg->Register.Index;
1478
      dst = &mach->Addrs[index].xyzw[chan_index];
1479
      break;
1480
 
1481
   case TGSI_FILE_PREDICATE:
1482
      index = reg->Register.Index;
1483
      assert(index < TGSI_EXEC_NUM_PREDS);
1484
      dst = &mach->Predicates[index].xyzw[chan_index];
1485
      break;
1486
 
1487
   default:
1488
      assert( 0 );
1489
      return;
1490
   }
1491
 
1492
   if (inst->Instruction.Predicate) {
1493
      uint swizzle;
1494
      union tgsi_exec_channel *pred;
1495
 
1496
      switch (chan_index) {
1497
      case TGSI_CHAN_X:
1498
         swizzle = inst->Predicate.SwizzleX;
1499
         break;
1500
      case TGSI_CHAN_Y:
1501
         swizzle = inst->Predicate.SwizzleY;
1502
         break;
1503
      case TGSI_CHAN_Z:
1504
         swizzle = inst->Predicate.SwizzleZ;
1505
         break;
1506
      case TGSI_CHAN_W:
1507
         swizzle = inst->Predicate.SwizzleW;
1508
         break;
1509
      default:
1510
         assert(0);
1511
         return;
1512
      }
1513
 
1514
      assert(inst->Predicate.Index == 0);
1515
 
1516
      pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1517
 
1518
      if (inst->Predicate.Negate) {
1519
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1520
            if (pred->u[i]) {
1521
               execmask &= ~(1 << i);
1522
            }
1523
         }
1524
      } else {
1525
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1526
            if (!pred->u[i]) {
1527
               execmask &= ~(1 << i);
1528
            }
1529
         }
1530
      }
1531
   }
1532
 
1533
   switch (inst->Instruction.Saturate) {
1534
   case TGSI_SAT_NONE:
1535
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1536
         if (execmask & (1 << i))
1537
            dst->i[i] = chan->i[i];
1538
      break;
1539
 
1540
   case TGSI_SAT_ZERO_ONE:
1541
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1542
         if (execmask & (1 << i)) {
1543
            if (chan->f[i] < 0.0f)
1544
               dst->f[i] = 0.0f;
1545
            else if (chan->f[i] > 1.0f)
1546
               dst->f[i] = 1.0f;
1547
            else
1548
               dst->i[i] = chan->i[i];
1549
         }
1550
      break;
1551
 
1552
   case TGSI_SAT_MINUS_PLUS_ONE:
1553
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1554
         if (execmask & (1 << i)) {
1555
            if (chan->f[i] < -1.0f)
1556
               dst->f[i] = -1.0f;
1557
            else if (chan->f[i] > 1.0f)
1558
               dst->f[i] = 1.0f;
1559
            else
1560
               dst->i[i] = chan->i[i];
1561
         }
1562
      break;
1563
 
1564
   default:
1565
      assert( 0 );
1566
   }
1567
}
1568
 
1569
#define FETCH(VAL,INDEX,CHAN)\
1570
    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1571
 
1572
#define IFETCH(VAL,INDEX,CHAN)\
1573
    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1574
 
1575
 
1576
/**
1577
 * Execute ARB-style KIL which is predicated by a src register.
1578
 * Kill fragment if any of the four values is less than zero.
1579
 */
1580
static void
1581
exec_kil(struct tgsi_exec_machine *mach,
1582
         const struct tgsi_full_instruction *inst)
1583
{
1584
   uint uniquemask;
1585
   uint chan_index;
1586
   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1587
   union tgsi_exec_channel r[1];
1588
 
1589
   /* This mask stores component bits that were already tested. */
1590
   uniquemask = 0;
1591
 
1592
   for (chan_index = 0; chan_index < 4; chan_index++)
1593
   {
1594
      uint swizzle;
1595
      uint i;
1596
 
1597
      /* unswizzle channel */
1598
      swizzle = tgsi_util_get_full_src_register_swizzle (
1599
                        &inst->Src[0],
1600
                        chan_index);
1601
 
1602
      /* check if the component has not been already tested */
1603
      if (uniquemask & (1 << swizzle))
1604
         continue;
1605
      uniquemask |= 1 << swizzle;
1606
 
1607
      FETCH(&r[0], 0, chan_index);
1608
      for (i = 0; i < 4; i++)
1609
         if (r[0].f[i] < 0.0f)
1610
            kilmask |= 1 << i;
1611
   }
1612
 
1613
   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1614
}
1615
 
1616
/**
1617
 * Execute NVIDIA-style KIL which is predicated by a condition code.
1618
 * Kill fragment if the condition code is TRUE.
1619
 */
1620
static void
1621
exec_kilp(struct tgsi_exec_machine *mach,
1622
          const struct tgsi_full_instruction *inst)
1623
{
1624
   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1625
 
1626
   /* "unconditional" kil */
1627
   kilmask = mach->ExecMask;
1628
   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1629
}
1630
 
1631
static void
1632
emit_vertex(struct tgsi_exec_machine *mach)
1633
{
1634
   /* FIXME: check for exec mask correctly
1635
   unsigned i;
1636
   for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1637
         if ((mach->ExecMask & (1 << i)))
1638
   */
1639
   if (mach->ExecMask) {
1640
      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1641
      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1642
   }
1643
}
1644
 
1645
static void
1646
emit_primitive(struct tgsi_exec_machine *mach)
1647
{
1648
   unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1649
   /* FIXME: check for exec mask correctly
1650
   unsigned i;
1651
   for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1652
         if ((mach->ExecMask & (1 << i)))
1653
   */
1654
   if (mach->ExecMask) {
1655
      ++(*prim_count);
1656
      debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1657
      mach->Primitives[*prim_count] = 0;
1658
   }
1659
}
1660
 
1661
static void
1662
conditional_emit_primitive(struct tgsi_exec_machine *mach)
1663
{
1664
   if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1665
      int emitted_verts =
1666
         mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
1667
      if (emitted_verts) {
1668
         emit_primitive(mach);
1669
      }
1670
   }
1671
}
1672
 
1673
 
1674
/*
1675
 * Fetch four texture samples using STR texture coordinates.
1676
 */
1677
static void
1678
fetch_texel( struct tgsi_sampler *sampler,
1679
             const unsigned sview_idx,
1680
             const unsigned sampler_idx,
1681
             const union tgsi_exec_channel *s,
1682
             const union tgsi_exec_channel *t,
1683
             const union tgsi_exec_channel *p,
1684
             const union tgsi_exec_channel *c0,
1685
             const union tgsi_exec_channel *c1,
1686
             float derivs[3][2][TGSI_QUAD_SIZE],
1687
             const int8_t offset[3],
1688
             enum tgsi_sampler_control control,
1689
             union tgsi_exec_channel *r,
1690
             union tgsi_exec_channel *g,
1691
             union tgsi_exec_channel *b,
1692
             union tgsi_exec_channel *a )
1693
{
1694
   uint j;
1695
   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1696
 
1697
   /* FIXME: handle explicit derivs, offsets */
1698
   sampler->get_samples(sampler, sview_idx, sampler_idx,
1699
                        s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
1700
 
1701
   for (j = 0; j < 4; j++) {
1702
      r->f[j] = rgba[0][j];
1703
      g->f[j] = rgba[1][j];
1704
      b->f[j] = rgba[2][j];
1705
      a->f[j] = rgba[3][j];
1706
   }
1707
}
1708
 
1709
 
1710
#define TEX_MODIFIER_NONE           0
1711
#define TEX_MODIFIER_PROJECTED      1
1712
#define TEX_MODIFIER_LOD_BIAS       2
1713
#define TEX_MODIFIER_EXPLICIT_LOD   3
1714
#define TEX_MODIFIER_LEVEL_ZERO     4
1715
 
1716
 
1717
/*
1718
 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
1719
 */
1720
static void
1721
fetch_texel_offsets(struct tgsi_exec_machine *mach,
1722
                    const struct tgsi_full_instruction *inst,
1723
                    int8_t offsets[3])
1724
{
1725
   if (inst->Texture.NumOffsets == 1) {
1726
      union tgsi_exec_channel index;
1727
      union tgsi_exec_channel offset[3];
1728
      index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
1729
      fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1730
                             inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
1731
      fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1732
                             inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
1733
      fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1734
                             inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
1735
     offsets[0] = offset[0].i[0];
1736
     offsets[1] = offset[1].i[0];
1737
     offsets[2] = offset[2].i[0];
1738
   } else {
1739
     assert(inst->Texture.NumOffsets == 0);
1740
     offsets[0] = offsets[1] = offsets[2] = 0;
1741
   }
1742
}
1743
 
1744
 
1745
/*
1746
 * Fetch dx and dy values for one channel (s, t or r).
1747
 * Put dx values into one float array, dy values into another.
1748
 */
1749
static void
1750
fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
1751
                           const struct tgsi_full_instruction *inst,
1752
                           unsigned regdsrcx,
1753
                           unsigned chan,
1754
                           float derivs[2][TGSI_QUAD_SIZE])
1755
{
1756
   union tgsi_exec_channel d;
1757
   FETCH(&d, regdsrcx, chan);
1758
   derivs[0][0] = d.f[0];
1759
   derivs[0][1] = d.f[1];
1760
   derivs[0][2] = d.f[2];
1761
   derivs[0][3] = d.f[3];
1762
   FETCH(&d, regdsrcx + 1, chan);
1763
   derivs[1][0] = d.f[0];
1764
   derivs[1][1] = d.f[1];
1765
   derivs[1][2] = d.f[2];
1766
   derivs[1][3] = d.f[3];
1767
}
1768
 
1769
 
1770
/*
1771
 * execute a texture instruction.
1772
 *
1773
 * modifier is used to control the channel routing for the\
1774
 * instruction variants like proj, lod, and texture with lod bias.
1775
 * sampler indicates which src register the sampler is contained in.
1776
 */
1777
static void
1778
exec_tex(struct tgsi_exec_machine *mach,
1779
         const struct tgsi_full_instruction *inst,
1780
         uint modifier, uint sampler)
1781
{
1782
   const uint unit = inst->Src[sampler].Register.Index;
1783
   const union tgsi_exec_channel *args[5], *proj = NULL;
1784
   union tgsi_exec_channel r[5];
1785
   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
1786
   uint chan;
1787
   int8_t offsets[3];
1788
   int dim, shadow_ref, i;
1789
 
1790
   /* always fetch all 3 offsets, overkill but keeps code simple */
1791
   fetch_texel_offsets(mach, inst, offsets);
1792
 
1793
   assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
1794
   assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
1795
 
1796
   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref);
1797
 
1798
   assert(dim <= 4);
1799
   if (shadow_ref >= 0)
1800
      assert(shadow_ref >= dim && shadow_ref < Elements(args));
1801
 
1802
   /* fetch modifier to the last argument */
1803
   if (modifier != TEX_MODIFIER_NONE) {
1804
      const int last = Elements(args) - 1;
1805
 
1806
      /* fetch modifier from src0.w or src1.x */
1807
      if (sampler == 1) {
1808
         assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
1809
         FETCH(&r[last], 0, TGSI_CHAN_W);
1810
      }
1811
      else {
1812
         assert(shadow_ref != 4);
1813
         FETCH(&r[last], 1, TGSI_CHAN_X);
1814
      }
1815
 
1816
      if (modifier != TEX_MODIFIER_PROJECTED) {
1817
         args[last] = &r[last];
1818
      }
1819
      else {
1820
         proj = &r[last];
1821
         args[last] = &ZeroVec;
1822
      }
1823
 
1824
      /* point unused arguments to zero vector */
1825
      for (i = dim; i < last; i++)
1826
         args[i] = &ZeroVec;
1827
 
1828
      if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
1829
         control = tgsi_sampler_lod_explicit;
1830
      else if (modifier == TEX_MODIFIER_LOD_BIAS)
1831
         control = tgsi_sampler_lod_bias;
1832
   }
1833
   else {
1834
      for (i = dim; i < Elements(args); i++)
1835
         args[i] = &ZeroVec;
1836
   }
1837
 
1838
   /* fetch coordinates */
1839
   for (i = 0; i < dim; i++) {
1840
      FETCH(&r[i], 0, TGSI_CHAN_X + i);
1841
 
1842
      if (proj)
1843
         micro_div(&r[i], &r[i], proj);
1844
 
1845
      args[i] = &r[i];
1846
   }
1847
 
1848
   /* fetch reference value */
1849
   if (shadow_ref >= 0) {
1850
      FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
1851
 
1852
      if (proj)
1853
         micro_div(&r[shadow_ref], &r[shadow_ref], proj);
1854
 
1855
      args[shadow_ref] = &r[shadow_ref];
1856
   }
1857
 
1858
   fetch_texel(mach->Sampler, unit, unit,
1859
         args[0], args[1], args[2], args[3], args[4],
1860
         NULL, offsets, control,
1861
         &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
1862
 
1863
#if 0
1864
   debug_printf("fetch r: %g %g %g %g\n",
1865
         r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
1866
   debug_printf("fetch g: %g %g %g %g\n",
1867
         r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
1868
   debug_printf("fetch b: %g %g %g %g\n",
1869
         r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
1870
   debug_printf("fetch a: %g %g %g %g\n",
1871
         r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
1872
#endif
1873
 
1874
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1875
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1876
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1877
      }
1878
   }
1879
}
1880
 
1881
 
1882
static void
1883
exec_txd(struct tgsi_exec_machine *mach,
1884
         const struct tgsi_full_instruction *inst)
1885
{
1886
   const uint unit = inst->Src[3].Register.Index;
1887
   union tgsi_exec_channel r[4];
1888
   float derivs[3][2][TGSI_QUAD_SIZE];
1889
   uint chan;
1890
   int8_t offsets[3];
1891
 
1892
   /* always fetch all 3 offsets, overkill but keeps code simple */
1893
   fetch_texel_offsets(mach, inst, offsets);
1894
 
1895
   switch (inst->Texture.Texture) {
1896
   case TGSI_TEXTURE_1D:
1897
      FETCH(&r[0], 0, TGSI_CHAN_X);
1898
 
1899
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1900
 
1901
      fetch_texel(mach->Sampler, unit, unit,
1902
                  &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
1903
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1904
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
1905
      break;
1906
 
1907
   case TGSI_TEXTURE_SHADOW1D:
1908
   case TGSI_TEXTURE_1D_ARRAY:
1909
   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1910
      /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
1911
      FETCH(&r[0], 0, TGSI_CHAN_X);
1912
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1913
      FETCH(&r[2], 0, TGSI_CHAN_Z);
1914
 
1915
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1916
 
1917
      fetch_texel(mach->Sampler, unit, unit,
1918
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
1919
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1920
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
1921
      break;
1922
 
1923
   case TGSI_TEXTURE_2D:
1924
   case TGSI_TEXTURE_RECT:
1925
      FETCH(&r[0], 0, TGSI_CHAN_X);
1926
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1927
 
1928
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1929
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
1930
 
1931
      fetch_texel(mach->Sampler, unit, unit,
1932
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
1933
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1934
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
1935
      break;
1936
 
1937
 
1938
   case TGSI_TEXTURE_SHADOW2D:
1939
   case TGSI_TEXTURE_SHADOWRECT:
1940
   case TGSI_TEXTURE_2D_ARRAY:
1941
   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1942
      /* only SHADOW2D_ARRAY actually needs W */
1943
      FETCH(&r[0], 0, TGSI_CHAN_X);
1944
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1945
      FETCH(&r[2], 0, TGSI_CHAN_Z);
1946
      FETCH(&r[3], 0, TGSI_CHAN_W);
1947
 
1948
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1949
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
1950
 
1951
      fetch_texel(mach->Sampler, unit, unit,
1952
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
1953
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1954
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
1955
      break;
1956
 
1957
   case TGSI_TEXTURE_3D:
1958
   case TGSI_TEXTURE_CUBE:
1959
   case TGSI_TEXTURE_CUBE_ARRAY:
1960
      /* only TEXTURE_CUBE_ARRAY actually needs W */
1961
      FETCH(&r[0], 0, TGSI_CHAN_X);
1962
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1963
      FETCH(&r[2], 0, TGSI_CHAN_Z);
1964
      FETCH(&r[3], 0, TGSI_CHAN_W);
1965
 
1966
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1967
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
1968
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
1969
 
1970
      fetch_texel(mach->Sampler, unit, unit,
1971
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
1972
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1973
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
1974
      break;
1975
 
1976
   default:
1977
      assert(0);
1978
   }
1979
 
1980
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1981
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1982
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1983
      }
1984
   }
1985
}
1986
 
1987
 
1988
static void
1989
exec_txf(struct tgsi_exec_machine *mach,
1990
         const struct tgsi_full_instruction *inst)
1991
{
1992
   const uint unit = inst->Src[1].Register.Index;
1993
   union tgsi_exec_channel r[4];
1994
   uint chan;
1995
   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1996
   int j;
1997
   int8_t offsets[3];
1998
   unsigned target;
1999
 
2000
   /* always fetch all 3 offsets, overkill but keeps code simple */
2001
   fetch_texel_offsets(mach, inst, offsets);
2002
 
2003
   IFETCH(&r[3], 0, TGSI_CHAN_W);
2004
 
2005
   if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
2006
      target = mach->SamplerViews[unit].Resource;
2007
   }
2008
   else {
2009
      target = inst->Texture.Texture;
2010
   }
2011
   switch(target) {
2012
   case TGSI_TEXTURE_3D:
2013
   case TGSI_TEXTURE_2D_ARRAY:
2014
   case TGSI_TEXTURE_SHADOW2D_ARRAY:
2015
      IFETCH(&r[2], 0, TGSI_CHAN_Z);
2016
      /* fallthrough */
2017
   case TGSI_TEXTURE_2D:
2018
   case TGSI_TEXTURE_RECT:
2019
   case TGSI_TEXTURE_SHADOW1D_ARRAY:
2020
   case TGSI_TEXTURE_SHADOW2D:
2021
   case TGSI_TEXTURE_SHADOWRECT:
2022
   case TGSI_TEXTURE_1D_ARRAY:
2023
      IFETCH(&r[1], 0, TGSI_CHAN_Y);
2024
      /* fallthrough */
2025
   case TGSI_TEXTURE_BUFFER:
2026
   case TGSI_TEXTURE_1D:
2027
   case TGSI_TEXTURE_SHADOW1D:
2028
      IFETCH(&r[0], 0, TGSI_CHAN_X);
2029
      break;
2030
   default:
2031
      assert(0);
2032
      break;
2033
   }
2034
 
2035
   mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2036
                            offsets, rgba);
2037
 
2038
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2039
      r[0].f[j] = rgba[0][j];
2040
      r[1].f[j] = rgba[1][j];
2041
      r[2].f[j] = rgba[2][j];
2042
      r[3].f[j] = rgba[3][j];
2043
   }
2044
 
2045
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2046
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2047
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2048
      }
2049
   }
2050
}
2051
 
2052
static void
2053
exec_txq(struct tgsi_exec_machine *mach,
2054
         const struct tgsi_full_instruction *inst)
2055
{
2056
   const uint unit = inst->Src[1].Register.Index;
2057
   int result[4];
2058
   union tgsi_exec_channel r[4], src;
2059
   uint chan;
2060
   int i,j;
2061
 
2062
   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2063
 
2064
   mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2065
 
2066
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2067
      for (j = 0; j < 4; j++) {
2068
         r[j].i[i] = result[j];
2069
      }
2070
   }
2071
 
2072
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2073
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2074
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2075
                    TGSI_EXEC_DATA_INT);
2076
      }
2077
   }
2078
}
2079
 
2080
static void
2081
exec_sample(struct tgsi_exec_machine *mach,
2082
            const struct tgsi_full_instruction *inst,
2083
            uint modifier, boolean compare)
2084
{
2085
   const uint resource_unit = inst->Src[1].Register.Index;
2086
   const uint sampler_unit = inst->Src[2].Register.Index;
2087
   union tgsi_exec_channel r[4], c1;
2088
   const union tgsi_exec_channel *lod = &ZeroVec;
2089
   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
2090
   uint chan;
2091
   int8_t offsets[3];
2092
 
2093
   /* always fetch all 3 offsets, overkill but keeps code simple */
2094
   fetch_texel_offsets(mach, inst, offsets);
2095
 
2096
   assert(modifier != TEX_MODIFIER_PROJECTED);
2097
 
2098
   if (modifier != TEX_MODIFIER_NONE) {
2099
      if (modifier == TEX_MODIFIER_LOD_BIAS) {
2100
         FETCH(&c1, 3, TGSI_CHAN_X);
2101
         lod = &c1;
2102
         control = tgsi_sampler_lod_bias;
2103
      }
2104
      else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2105
         FETCH(&c1, 3, TGSI_CHAN_X);
2106
         lod = &c1;
2107
         control = tgsi_sampler_lod_explicit;
2108
      }
2109
      else {
2110
         assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2111
         control = tgsi_sampler_lod_zero;
2112
      }
2113
   }
2114
 
2115
   FETCH(&r[0], 0, TGSI_CHAN_X);
2116
 
2117
   switch (mach->SamplerViews[resource_unit].Resource) {
2118
   case TGSI_TEXTURE_1D:
2119
      if (compare) {
2120
         FETCH(&r[2], 3, TGSI_CHAN_X);
2121
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2122
                     &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2123
                     NULL, offsets, control,
2124
                     &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2125
      }
2126
      else {
2127
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2128
                     &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2129
                     NULL, offsets, control,
2130
                     &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2131
      }
2132
      break;
2133
 
2134
   case TGSI_TEXTURE_1D_ARRAY:
2135
   case TGSI_TEXTURE_2D:
2136
   case TGSI_TEXTURE_RECT:
2137
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2138
      if (compare) {
2139
         FETCH(&r[2], 3, TGSI_CHAN_X);
2140
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2141
                     &r[0], &r[1], &r[2], &ZeroVec, lod,    /* S, T, P, C, LOD */
2142
                     NULL, offsets, control,
2143
                     &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2144
      }
2145
      else {
2146
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2147
                     &r[0], &r[1], &ZeroVec, &ZeroVec, lod,    /* S, T, P, C, LOD */
2148
                     NULL, offsets, control,
2149
                     &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2150
      }
2151
      break;
2152
 
2153
   case TGSI_TEXTURE_2D_ARRAY:
2154
   case TGSI_TEXTURE_3D:
2155
   case TGSI_TEXTURE_CUBE:
2156
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2157
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2158
      if(compare) {
2159
         FETCH(&r[3], 3, TGSI_CHAN_X);
2160
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2161
                     &r[0], &r[1], &r[2], &r[3], lod,
2162
                     NULL, offsets, control,
2163
                     &r[0], &r[1], &r[2], &r[3]);
2164
      }
2165
      else {
2166
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2167
                     &r[0], &r[1], &r[2], &ZeroVec, lod,
2168
                     NULL, offsets, control,
2169
                     &r[0], &r[1], &r[2], &r[3]);
2170
      }
2171
      break;
2172
 
2173
   case TGSI_TEXTURE_CUBE_ARRAY:
2174
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2175
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2176
      FETCH(&r[3], 0, TGSI_CHAN_W);
2177
      if(compare) {
2178
         FETCH(&r[4], 3, TGSI_CHAN_X);
2179
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2180
                     &r[0], &r[1], &r[2], &r[3], &r[4],
2181
                     NULL, offsets, control,
2182
                     &r[0], &r[1], &r[2], &r[3]);
2183
      }
2184
      else {
2185
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2186
                     &r[0], &r[1], &r[2], &r[3], lod,
2187
                     NULL, offsets, control,
2188
                     &r[0], &r[1], &r[2], &r[3]);
2189
      }
2190
      break;
2191
 
2192
 
2193
   default:
2194
      assert(0);
2195
   }
2196
 
2197
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2198
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2199
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2200
      }
2201
   }
2202
}
2203
 
2204
static void
2205
exec_sample_d(struct tgsi_exec_machine *mach,
2206
              const struct tgsi_full_instruction *inst)
2207
{
2208
   const uint resource_unit = inst->Src[1].Register.Index;
2209
   const uint sampler_unit = inst->Src[2].Register.Index;
2210
   union tgsi_exec_channel r[4];
2211
   float derivs[3][2][TGSI_QUAD_SIZE];
2212
   uint chan;
2213
   int8_t offsets[3];
2214
 
2215
   /* always fetch all 3 offsets, overkill but keeps code simple */
2216
   fetch_texel_offsets(mach, inst, offsets);
2217
 
2218
   FETCH(&r[0], 0, TGSI_CHAN_X);
2219
 
2220
   switch (mach->SamplerViews[resource_unit].Resource) {
2221
   case TGSI_TEXTURE_1D:
2222
   case TGSI_TEXTURE_1D_ARRAY:
2223
      /* only 1D array actually needs Y */
2224
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2225
 
2226
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2227
 
2228
      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2229
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2230
                  derivs, offsets, tgsi_sampler_derivs_explicit,
2231
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2232
      break;
2233
 
2234
   case TGSI_TEXTURE_2D:
2235
   case TGSI_TEXTURE_RECT:
2236
   case TGSI_TEXTURE_2D_ARRAY:
2237
      /* only 2D array actually needs Z */
2238
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2239
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2240
 
2241
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2242
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2243
 
2244
      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2245
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
2246
                  derivs, offsets, tgsi_sampler_derivs_explicit,
2247
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2248
      break;
2249
 
2250
   case TGSI_TEXTURE_3D:
2251
   case TGSI_TEXTURE_CUBE:
2252
   case TGSI_TEXTURE_CUBE_ARRAY:
2253
      /* only cube array actually needs W */
2254
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2255
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2256
      FETCH(&r[3], 0, TGSI_CHAN_W);
2257
 
2258
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2259
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2260
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2261
 
2262
      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2263
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2264
                  derivs, offsets, tgsi_sampler_derivs_explicit,
2265
                  &r[0], &r[1], &r[2], &r[3]);
2266
      break;
2267
 
2268
   default:
2269
      assert(0);
2270
   }
2271
 
2272
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2273
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2274
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2275
      }
2276
   }
2277
}
2278
 
2279
 
2280
/**
2281
 * Evaluate a constant-valued coefficient at the position of the
2282
 * current quad.
2283
 */
2284
static void
2285
eval_constant_coef(
2286
   struct tgsi_exec_machine *mach,
2287
   unsigned attrib,
2288
   unsigned chan )
2289
{
2290
   unsigned i;
2291
 
2292
   for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2293
      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2294
   }
2295
}
2296
 
2297
/**
2298
 * Evaluate a linear-valued coefficient at the position of the
2299
 * current quad.
2300
 */
2301
static void
2302
eval_linear_coef(
2303
   struct tgsi_exec_machine *mach,
2304
   unsigned attrib,
2305
   unsigned chan )
2306
{
2307
   const float x = mach->QuadPos.xyzw[0].f[0];
2308
   const float y = mach->QuadPos.xyzw[1].f[0];
2309
   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2310
   const float dady = mach->InterpCoefs[attrib].dady[chan];
2311
   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2312
   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2313
   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2314
   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2315
   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2316
}
2317
 
2318
/**
2319
 * Evaluate a perspective-valued coefficient at the position of the
2320
 * current quad.
2321
 */
2322
static void
2323
eval_perspective_coef(
2324
   struct tgsi_exec_machine *mach,
2325
   unsigned attrib,
2326
   unsigned chan )
2327
{
2328
   const float x = mach->QuadPos.xyzw[0].f[0];
2329
   const float y = mach->QuadPos.xyzw[1].f[0];
2330
   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2331
   const float dady = mach->InterpCoefs[attrib].dady[chan];
2332
   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2333
   const float *w = mach->QuadPos.xyzw[3].f;
2334
   /* divide by W here */
2335
   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2336
   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2337
   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2338
   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2339
}
2340
 
2341
 
2342
typedef void (* eval_coef_func)(
2343
   struct tgsi_exec_machine *mach,
2344
   unsigned attrib,
2345
   unsigned chan );
2346
 
2347
static void
2348
exec_declaration(struct tgsi_exec_machine *mach,
2349
                 const struct tgsi_full_declaration *decl)
2350
{
2351
   if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2352
      mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2353
      return;
2354
   }
2355
 
2356
   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2357
      if (decl->Declaration.File == TGSI_FILE_INPUT) {
2358
         uint first, last, mask;
2359
 
2360
         first = decl->Range.First;
2361
         last = decl->Range.Last;
2362
         mask = decl->Declaration.UsageMask;
2363
 
2364
         /* XXX we could remove this special-case code since
2365
          * mach->InterpCoefs[first].a0 should already have the
2366
          * front/back-face value.  But we should first update the
2367
          * ureg code to emit the right UsageMask value (WRITEMASK_X).
2368
          * Then, we could remove the tgsi_exec_machine::Face field.
2369
          */
2370
         /* XXX make FACE a system value */
2371
         if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2372
            uint i;
2373
 
2374
            assert(decl->Semantic.Index == 0);
2375
            assert(first == last);
2376
 
2377
            for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2378
               mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2379
            }
2380
         } else {
2381
            eval_coef_func eval;
2382
            uint i, j;
2383
 
2384
            switch (decl->Interp.Interpolate) {
2385
            case TGSI_INTERPOLATE_CONSTANT:
2386
               eval = eval_constant_coef;
2387
               break;
2388
 
2389
            case TGSI_INTERPOLATE_LINEAR:
2390
               eval = eval_linear_coef;
2391
               break;
2392
 
2393
            case TGSI_INTERPOLATE_PERSPECTIVE:
2394
               eval = eval_perspective_coef;
2395
               break;
2396
 
2397
            case TGSI_INTERPOLATE_COLOR:
2398
               eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2399
               break;
2400
 
2401
            default:
2402
               assert(0);
2403
               return;
2404
            }
2405
 
2406
            for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2407
               if (mask & (1 << j)) {
2408
                  for (i = first; i <= last; i++) {
2409
                     eval(mach, i, j);
2410
                  }
2411
               }
2412
            }
2413
         }
2414
 
2415
         if (DEBUG_EXECUTION) {
2416
            uint i, j;
2417
            for (i = first; i <= last; ++i) {
2418
               debug_printf("IN[%2u] = ", i);
2419
               for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2420
                  if (j > 0) {
2421
                     debug_printf("         ");
2422
                  }
2423
                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
2424
                               mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
2425
                               mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
2426
                               mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
2427
                               mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
2428
               }
2429
            }
2430
         }
2431
      }
2432
   }
2433
 
2434
   if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
2435
      mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First;
2436
   }
2437
}
2438
 
2439
 
2440
typedef void (* micro_op)(union tgsi_exec_channel *dst);
2441
 
2442
static void
2443
exec_vector(struct tgsi_exec_machine *mach,
2444
            const struct tgsi_full_instruction *inst,
2445
            micro_op op,
2446
            enum tgsi_exec_datatype dst_datatype)
2447
{
2448
   unsigned int chan;
2449
 
2450
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2451
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2452
         union tgsi_exec_channel dst;
2453
 
2454
         op(&dst);
2455
         store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2456
      }
2457
   }
2458
}
2459
 
2460
typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2461
                                const union tgsi_exec_channel *src);
2462
 
2463
static void
2464
exec_scalar_unary(struct tgsi_exec_machine *mach,
2465
                  const struct tgsi_full_instruction *inst,
2466
                  micro_unary_op op,
2467
                  enum tgsi_exec_datatype dst_datatype,
2468
                  enum tgsi_exec_datatype src_datatype)
2469
{
2470
   unsigned int chan;
2471
   union tgsi_exec_channel src;
2472
   union tgsi_exec_channel dst;
2473
 
2474
   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2475
   op(&dst, &src);
2476
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2477
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2478
         store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2479
      }
2480
   }
2481
}
2482
 
2483
static void
2484
exec_vector_unary(struct tgsi_exec_machine *mach,
2485
                  const struct tgsi_full_instruction *inst,
2486
                  micro_unary_op op,
2487
                  enum tgsi_exec_datatype dst_datatype,
2488
                  enum tgsi_exec_datatype src_datatype)
2489
{
2490
   unsigned int chan;
2491
   struct tgsi_exec_vector dst;
2492
 
2493
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2494
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2495
         union tgsi_exec_channel src;
2496
 
2497
         fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2498
         op(&dst.xyzw[chan], &src);
2499
      }
2500
   }
2501
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2502
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2503
         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2504
      }
2505
   }
2506
}
2507
 
2508
typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2509
                                 const union tgsi_exec_channel *src0,
2510
                                 const union tgsi_exec_channel *src1);
2511
 
2512
static void
2513
exec_scalar_binary(struct tgsi_exec_machine *mach,
2514
                   const struct tgsi_full_instruction *inst,
2515
                   micro_binary_op op,
2516
                   enum tgsi_exec_datatype dst_datatype,
2517
                   enum tgsi_exec_datatype src_datatype)
2518
{
2519
   unsigned int chan;
2520
   union tgsi_exec_channel src[2];
2521
   union tgsi_exec_channel dst;
2522
 
2523
   fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2524
   fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_Y, src_datatype);
2525
   op(&dst, &src[0], &src[1]);
2526
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2527
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2528
         store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2529
      }
2530
   }
2531
}
2532
 
2533
static void
2534
exec_vector_binary(struct tgsi_exec_machine *mach,
2535
                   const struct tgsi_full_instruction *inst,
2536
                   micro_binary_op op,
2537
                   enum tgsi_exec_datatype dst_datatype,
2538
                   enum tgsi_exec_datatype src_datatype)
2539
{
2540
   unsigned int chan;
2541
   struct tgsi_exec_vector dst;
2542
 
2543
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2544
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2545
         union tgsi_exec_channel src[2];
2546
 
2547
         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2548
         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2549
         op(&dst.xyzw[chan], &src[0], &src[1]);
2550
      }
2551
   }
2552
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2553
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2554
         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2555
      }
2556
   }
2557
}
2558
 
2559
typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
2560
                                  const union tgsi_exec_channel *src0,
2561
                                  const union tgsi_exec_channel *src1,
2562
                                  const union tgsi_exec_channel *src2);
2563
 
2564
static void
2565
exec_vector_trinary(struct tgsi_exec_machine *mach,
2566
                    const struct tgsi_full_instruction *inst,
2567
                    micro_trinary_op op,
2568
                    enum tgsi_exec_datatype dst_datatype,
2569
                    enum tgsi_exec_datatype src_datatype)
2570
{
2571
   unsigned int chan;
2572
   struct tgsi_exec_vector dst;
2573
 
2574
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2575
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2576
         union tgsi_exec_channel src[3];
2577
 
2578
         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2579
         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2580
         fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
2581
         op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
2582
      }
2583
   }
2584
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2585
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2586
         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2587
      }
2588
   }
2589
}
2590
 
2591
static void
2592
exec_dp3(struct tgsi_exec_machine *mach,
2593
         const struct tgsi_full_instruction *inst)
2594
{
2595
   unsigned int chan;
2596
   union tgsi_exec_channel arg[3];
2597
 
2598
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2599
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2600
   micro_mul(&arg[2], &arg[0], &arg[1]);
2601
 
2602
   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2603
      fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2604
      fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2605
      micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2606
   }
2607
 
2608
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2609
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2610
         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2611
      }
2612
   }
2613
}
2614
 
2615
static void
2616
exec_dp4(struct tgsi_exec_machine *mach,
2617
         const struct tgsi_full_instruction *inst)
2618
{
2619
   unsigned int chan;
2620
   union tgsi_exec_channel arg[3];
2621
 
2622
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2623
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2624
   micro_mul(&arg[2], &arg[0], &arg[1]);
2625
 
2626
   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2627
      fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2628
      fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2629
      micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2630
   }
2631
 
2632
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2633
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2634
         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2635
      }
2636
   }
2637
}
2638
 
2639
static void
2640
exec_dp2a(struct tgsi_exec_machine *mach,
2641
          const struct tgsi_full_instruction *inst)
2642
{
2643
   unsigned int chan;
2644
   union tgsi_exec_channel arg[3];
2645
 
2646
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2647
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2648
   micro_mul(&arg[2], &arg[0], &arg[1]);
2649
 
2650
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2651
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2652
   micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2653
 
2654
   fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2655
   micro_add(&arg[0], &arg[0], &arg[1]);
2656
 
2657
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2658
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2659
         store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2660
      }
2661
   }
2662
}
2663
 
2664
static void
2665
exec_dph(struct tgsi_exec_machine *mach,
2666
         const struct tgsi_full_instruction *inst)
2667
{
2668
   unsigned int chan;
2669
   union tgsi_exec_channel arg[3];
2670
 
2671
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2672
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2673
   micro_mul(&arg[2], &arg[0], &arg[1]);
2674
 
2675
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2676
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2677
   micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2678
 
2679
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2680
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2681
   micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2682
 
2683
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2684
   micro_add(&arg[0], &arg[0], &arg[1]);
2685
 
2686
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2687
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2688
         store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2689
      }
2690
   }
2691
}
2692
 
2693
static void
2694
exec_dp2(struct tgsi_exec_machine *mach,
2695
         const struct tgsi_full_instruction *inst)
2696
{
2697
   unsigned int chan;
2698
   union tgsi_exec_channel arg[3];
2699
 
2700
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2701
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2702
   micro_mul(&arg[2], &arg[0], &arg[1]);
2703
 
2704
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2705
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2706
   micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2707
 
2708
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2709
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2710
         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2711
      }
2712
   }
2713
}
2714
 
2715
static void
2716
exec_nrm4(struct tgsi_exec_machine *mach,
2717
          const struct tgsi_full_instruction *inst)
2718
{
2719
   unsigned int chan;
2720
   union tgsi_exec_channel arg[4];
2721
   union tgsi_exec_channel scale;
2722
 
2723
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2724
   micro_mul(&scale, &arg[0], &arg[0]);
2725
 
2726
   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2727
      union tgsi_exec_channel product;
2728
 
2729
      fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2730
      micro_mul(&product, &arg[chan], &arg[chan]);
2731
      micro_add(&scale, &scale, &product);
2732
   }
2733
 
2734
   micro_rsq(&scale, &scale);
2735
 
2736
   for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) {
2737
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2738
         micro_mul(&arg[chan], &arg[chan], &scale);
2739
         store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2740
      }
2741
   }
2742
}
2743
 
2744
static void
2745
exec_nrm3(struct tgsi_exec_machine *mach,
2746
          const struct tgsi_full_instruction *inst)
2747
{
2748
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2749
      unsigned int chan;
2750
      union tgsi_exec_channel arg[3];
2751
      union tgsi_exec_channel scale;
2752
 
2753
      fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2754
      micro_mul(&scale, &arg[0], &arg[0]);
2755
 
2756
      for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2757
         union tgsi_exec_channel product;
2758
 
2759
         fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2760
         micro_mul(&product, &arg[chan], &arg[chan]);
2761
         micro_add(&scale, &scale, &product);
2762
      }
2763
 
2764
      micro_rsq(&scale, &scale);
2765
 
2766
      for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) {
2767
         if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2768
            micro_mul(&arg[chan], &arg[chan], &scale);
2769
            store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2770
         }
2771
      }
2772
   }
2773
 
2774
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2775
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2776
   }
2777
}
2778
 
2779
static void
2780
exec_scs(struct tgsi_exec_machine *mach,
2781
         const struct tgsi_full_instruction *inst)
2782
{
2783
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
2784
      union tgsi_exec_channel arg;
2785
      union tgsi_exec_channel result;
2786
 
2787
      fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2788
 
2789
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2790
         micro_cos(&result, &arg);
2791
         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2792
      }
2793
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2794
         micro_sin(&result, &arg);
2795
         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2796
      }
2797
   }
2798
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2799
      store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2800
   }
2801
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2802
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2803
   }
2804
}
2805
 
2806
static void
2807
exec_x2d(struct tgsi_exec_machine *mach,
2808
         const struct tgsi_full_instruction *inst)
2809
{
2810
   union tgsi_exec_channel r[4];
2811
   union tgsi_exec_channel d[2];
2812
 
2813
   fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2814
   fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2815
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
2816
      fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2817
      micro_mul(&r[2], &r[2], &r[0]);
2818
      fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2819
      micro_mul(&r[3], &r[3], &r[1]);
2820
      micro_add(&r[2], &r[2], &r[3]);
2821
      fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2822
      micro_add(&d[0], &r[2], &r[3]);
2823
   }
2824
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
2825
      fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2826
      micro_mul(&r[2], &r[2], &r[0]);
2827
      fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2828
      micro_mul(&r[3], &r[3], &r[1]);
2829
      micro_add(&r[2], &r[2], &r[3]);
2830
      fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2831
      micro_add(&d[1], &r[2], &r[3]);
2832
   }
2833
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2834
      store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2835
   }
2836
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2837
      store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2838
   }
2839
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2840
      store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2841
   }
2842
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2843
      store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2844
   }
2845
}
2846
 
2847
static void
2848
exec_rfl(struct tgsi_exec_machine *mach,
2849
         const struct tgsi_full_instruction *inst)
2850
{
2851
   union tgsi_exec_channel r[9];
2852
 
2853
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2854
      /* r0 = dp3(src0, src0) */
2855
      fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2856
      micro_mul(&r[0], &r[2], &r[2]);
2857
      fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2858
      micro_mul(&r[8], &r[4], &r[4]);
2859
      micro_add(&r[0], &r[0], &r[8]);
2860
      fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2861
      micro_mul(&r[8], &r[6], &r[6]);
2862
      micro_add(&r[0], &r[0], &r[8]);
2863
 
2864
      /* r1 = dp3(src0, src1) */
2865
      fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2866
      micro_mul(&r[1], &r[2], &r[3]);
2867
      fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2868
      micro_mul(&r[8], &r[4], &r[5]);
2869
      micro_add(&r[1], &r[1], &r[8]);
2870
      fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2871
      micro_mul(&r[8], &r[6], &r[7]);
2872
      micro_add(&r[1], &r[1], &r[8]);
2873
 
2874
      /* r1 = 2 * r1 / r0 */
2875
      micro_add(&r[1], &r[1], &r[1]);
2876
      micro_div(&r[1], &r[1], &r[0]);
2877
 
2878
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2879
         micro_mul(&r[2], &r[2], &r[1]);
2880
         micro_sub(&r[2], &r[2], &r[3]);
2881
         store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2882
      }
2883
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2884
         micro_mul(&r[4], &r[4], &r[1]);
2885
         micro_sub(&r[4], &r[4], &r[5]);
2886
         store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2887
      }
2888
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2889
         micro_mul(&r[6], &r[6], &r[1]);
2890
         micro_sub(&r[6], &r[6], &r[7]);
2891
         store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2892
      }
2893
   }
2894
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2895
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2896
   }
2897
}
2898
 
2899
static void
2900
exec_xpd(struct tgsi_exec_machine *mach,
2901
         const struct tgsi_full_instruction *inst)
2902
{
2903
   union tgsi_exec_channel r[6];
2904
   union tgsi_exec_channel d[3];
2905
 
2906
   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2907
   fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2908
 
2909
   micro_mul(&r[2], &r[0], &r[1]);
2910
 
2911
   fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2912
   fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2913
 
2914
   micro_mul(&r[5], &r[3], &r[4] );
2915
   micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
2916
 
2917
   fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2918
 
2919
   micro_mul(&r[3], &r[3], &r[2]);
2920
 
2921
   fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2922
 
2923
   micro_mul(&r[1], &r[1], &r[5]);
2924
   micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
2925
 
2926
   micro_mul(&r[5], &r[5], &r[4]);
2927
   micro_mul(&r[0], &r[0], &r[2]);
2928
   micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
2929
 
2930
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2931
      store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2932
   }
2933
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2934
      store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2935
   }
2936
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2937
      store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2938
   }
2939
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2940
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2941
   }
2942
}
2943
 
2944
static void
2945
exec_dst(struct tgsi_exec_machine *mach,
2946
         const struct tgsi_full_instruction *inst)
2947
{
2948
   union tgsi_exec_channel r[2];
2949
   union tgsi_exec_channel d[4];
2950
 
2951
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2952
      fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2953
      fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2954
      micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
2955
   }
2956
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2957
      fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2958
   }
2959
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2960
      fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2961
   }
2962
 
2963
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2964
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2965
   }
2966
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2967
      store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2968
   }
2969
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2970
      store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2971
   }
2972
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2973
      store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2974
   }
2975
}
2976
 
2977
static void
2978
exec_log(struct tgsi_exec_machine *mach,
2979
         const struct tgsi_full_instruction *inst)
2980
{
2981
   union tgsi_exec_channel r[3];
2982
 
2983
   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2984
   micro_abs(&r[2], &r[0]);  /* r2 = abs(r0) */
2985
   micro_lg2(&r[1], &r[2]);  /* r1 = lg2(r2) */
2986
   micro_flr(&r[0], &r[1]);  /* r0 = floor(r1) */
2987
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2988
      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2989
   }
2990
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2991
      micro_exp2(&r[0], &r[0]);       /* r0 = 2 ^ r0 */
2992
      micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
2993
      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2994
   }
2995
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2996
      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2997
   }
2998
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2999
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3000
   }
3001
}
3002
 
3003
static void
3004
exec_exp(struct tgsi_exec_machine *mach,
3005
         const struct tgsi_full_instruction *inst)
3006
{
3007
   union tgsi_exec_channel r[3];
3008
 
3009
   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3010
   micro_flr(&r[1], &r[0]);  /* r1 = floor(r0) */
3011
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3012
      micro_exp2(&r[2], &r[1]);       /* r2 = 2 ^ r1 */
3013
      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3014
   }
3015
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3016
      micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3017
      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3018
   }
3019
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3020
      micro_exp2(&r[2], &r[0]);       /* r2 = 2 ^ r0 */
3021
      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3022
   }
3023
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3024
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3025
   }
3026
}
3027
 
3028
static void
3029
exec_lit(struct tgsi_exec_machine *mach,
3030
         const struct tgsi_full_instruction *inst)
3031
{
3032
   union tgsi_exec_channel r[3];
3033
   union tgsi_exec_channel d[3];
3034
 
3035
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3036
      fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3037
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3038
         fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3039
         micro_max(&r[1], &r[1], &ZeroVec);
3040
 
3041
         fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3042
         micro_min(&r[2], &r[2], &P128Vec);
3043
         micro_max(&r[2], &r[2], &M128Vec);
3044
         micro_pow(&r[1], &r[1], &r[2]);
3045
         micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3046
         store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3047
      }
3048
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3049
         micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3050
         store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3051
      }
3052
   }
3053
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3054
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3055
   }
3056
 
3057
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3058
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3059
   }
3060
}
3061
 
3062
static void
3063
exec_break(struct tgsi_exec_machine *mach)
3064
{
3065
   if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3066
      /* turn off loop channels for each enabled exec channel */
3067
      mach->LoopMask &= ~mach->ExecMask;
3068
      /* Todo: if mach->LoopMask == 0, jump to end of loop */
3069
      UPDATE_EXEC_MASK(mach);
3070
   } else {
3071
      assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3072
 
3073
      mach->Switch.mask = 0x0;
3074
 
3075
      UPDATE_EXEC_MASK(mach);
3076
   }
3077
}
3078
 
3079
static void
3080
exec_switch(struct tgsi_exec_machine *mach,
3081
            const struct tgsi_full_instruction *inst)
3082
{
3083
   assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3084
   assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3085
 
3086
   mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3087
   fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3088
   mach->Switch.mask = 0x0;
3089
   mach->Switch.defaultMask = 0x0;
3090
 
3091
   mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3092
   mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3093
 
3094
   UPDATE_EXEC_MASK(mach);
3095
}
3096
 
3097
static void
3098
exec_case(struct tgsi_exec_machine *mach,
3099
          const struct tgsi_full_instruction *inst)
3100
{
3101
   uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3102
   union tgsi_exec_channel src;
3103
   uint mask = 0;
3104
 
3105
   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3106
 
3107
   if (mach->Switch.selector.u[0] == src.u[0]) {
3108
      mask |= 0x1;
3109
   }
3110
   if (mach->Switch.selector.u[1] == src.u[1]) {
3111
      mask |= 0x2;
3112
   }
3113
   if (mach->Switch.selector.u[2] == src.u[2]) {
3114
      mask |= 0x4;
3115
   }
3116
   if (mach->Switch.selector.u[3] == src.u[3]) {
3117
      mask |= 0x8;
3118
   }
3119
 
3120
   mach->Switch.defaultMask |= mask;
3121
 
3122
   mach->Switch.mask |= mask & prevMask;
3123
 
3124
   UPDATE_EXEC_MASK(mach);
3125
}
3126
 
3127
/* FIXME: this will only work if default is last */
3128
static void
3129
exec_default(struct tgsi_exec_machine *mach)
3130
{
3131
   uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3132
 
3133
   mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3134
 
3135
   UPDATE_EXEC_MASK(mach);
3136
}
3137
 
3138
static void
3139
exec_endswitch(struct tgsi_exec_machine *mach)
3140
{
3141
   mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3142
   mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3143
 
3144
   UPDATE_EXEC_MASK(mach);
3145
}
3146
 
3147
static void
3148
micro_i2f(union tgsi_exec_channel *dst,
3149
          const union tgsi_exec_channel *src)
3150
{
3151
   dst->f[0] = (float)src->i[0];
3152
   dst->f[1] = (float)src->i[1];
3153
   dst->f[2] = (float)src->i[2];
3154
   dst->f[3] = (float)src->i[3];
3155
}
3156
 
3157
static void
3158
micro_not(union tgsi_exec_channel *dst,
3159
          const union tgsi_exec_channel *src)
3160
{
3161
   dst->u[0] = ~src->u[0];
3162
   dst->u[1] = ~src->u[1];
3163
   dst->u[2] = ~src->u[2];
3164
   dst->u[3] = ~src->u[3];
3165
}
3166
 
3167
static void
3168
micro_shl(union tgsi_exec_channel *dst,
3169
          const union tgsi_exec_channel *src0,
3170
          const union tgsi_exec_channel *src1)
3171
{
3172
   dst->u[0] = src0->u[0] << src1->u[0];
3173
   dst->u[1] = src0->u[1] << src1->u[1];
3174
   dst->u[2] = src0->u[2] << src1->u[2];
3175
   dst->u[3] = src0->u[3] << src1->u[3];
3176
}
3177
 
3178
static void
3179
micro_and(union tgsi_exec_channel *dst,
3180
          const union tgsi_exec_channel *src0,
3181
          const union tgsi_exec_channel *src1)
3182
{
3183
   dst->u[0] = src0->u[0] & src1->u[0];
3184
   dst->u[1] = src0->u[1] & src1->u[1];
3185
   dst->u[2] = src0->u[2] & src1->u[2];
3186
   dst->u[3] = src0->u[3] & src1->u[3];
3187
}
3188
 
3189
static void
3190
micro_or(union tgsi_exec_channel *dst,
3191
         const union tgsi_exec_channel *src0,
3192
         const union tgsi_exec_channel *src1)
3193
{
3194
   dst->u[0] = src0->u[0] | src1->u[0];
3195
   dst->u[1] = src0->u[1] | src1->u[1];
3196
   dst->u[2] = src0->u[2] | src1->u[2];
3197
   dst->u[3] = src0->u[3] | src1->u[3];
3198
}
3199
 
3200
static void
3201
micro_xor(union tgsi_exec_channel *dst,
3202
          const union tgsi_exec_channel *src0,
3203
          const union tgsi_exec_channel *src1)
3204
{
3205
   dst->u[0] = src0->u[0] ^ src1->u[0];
3206
   dst->u[1] = src0->u[1] ^ src1->u[1];
3207
   dst->u[2] = src0->u[2] ^ src1->u[2];
3208
   dst->u[3] = src0->u[3] ^ src1->u[3];
3209
}
3210
 
3211
static void
3212
micro_mod(union tgsi_exec_channel *dst,
3213
          const union tgsi_exec_channel *src0,
3214
          const union tgsi_exec_channel *src1)
3215
{
3216
   dst->i[0] = src0->i[0] % src1->i[0];
3217
   dst->i[1] = src0->i[1] % src1->i[1];
3218
   dst->i[2] = src0->i[2] % src1->i[2];
3219
   dst->i[3] = src0->i[3] % src1->i[3];
3220
}
3221
 
3222
static void
3223
micro_f2i(union tgsi_exec_channel *dst,
3224
          const union tgsi_exec_channel *src)
3225
{
3226
   dst->i[0] = (int)src->f[0];
3227
   dst->i[1] = (int)src->f[1];
3228
   dst->i[2] = (int)src->f[2];
3229
   dst->i[3] = (int)src->f[3];
3230
}
3231
 
3232
static void
3233
micro_idiv(union tgsi_exec_channel *dst,
3234
           const union tgsi_exec_channel *src0,
3235
           const union tgsi_exec_channel *src1)
3236
{
3237
   dst->i[0] = src0->i[0] / src1->i[0];
3238
   dst->i[1] = src0->i[1] / src1->i[1];
3239
   dst->i[2] = src0->i[2] / src1->i[2];
3240
   dst->i[3] = src0->i[3] / src1->i[3];
3241
}
3242
 
3243
static void
3244
micro_imax(union tgsi_exec_channel *dst,
3245
           const union tgsi_exec_channel *src0,
3246
           const union tgsi_exec_channel *src1)
3247
{
3248
   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
3249
   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
3250
   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
3251
   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
3252
}
3253
 
3254
static void
3255
micro_imin(union tgsi_exec_channel *dst,
3256
           const union tgsi_exec_channel *src0,
3257
           const union tgsi_exec_channel *src1)
3258
{
3259
   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
3260
   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
3261
   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
3262
   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
3263
}
3264
 
3265
static void
3266
micro_isge(union tgsi_exec_channel *dst,
3267
           const union tgsi_exec_channel *src0,
3268
           const union tgsi_exec_channel *src1)
3269
{
3270
   dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
3271
   dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
3272
   dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
3273
   dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
3274
}
3275
 
3276
static void
3277
micro_ishr(union tgsi_exec_channel *dst,
3278
           const union tgsi_exec_channel *src0,
3279
           const union tgsi_exec_channel *src1)
3280
{
3281
   dst->i[0] = src0->i[0] >> src1->i[0];
3282
   dst->i[1] = src0->i[1] >> src1->i[1];
3283
   dst->i[2] = src0->i[2] >> src1->i[2];
3284
   dst->i[3] = src0->i[3] >> src1->i[3];
3285
}
3286
 
3287
static void
3288
micro_islt(union tgsi_exec_channel *dst,
3289
           const union tgsi_exec_channel *src0,
3290
           const union tgsi_exec_channel *src1)
3291
{
3292
   dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
3293
   dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
3294
   dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
3295
   dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
3296
}
3297
 
3298
static void
3299
micro_f2u(union tgsi_exec_channel *dst,
3300
          const union tgsi_exec_channel *src)
3301
{
3302
   dst->u[0] = (uint)src->f[0];
3303
   dst->u[1] = (uint)src->f[1];
3304
   dst->u[2] = (uint)src->f[2];
3305
   dst->u[3] = (uint)src->f[3];
3306
}
3307
 
3308
static void
3309
micro_u2f(union tgsi_exec_channel *dst,
3310
          const union tgsi_exec_channel *src)
3311
{
3312
   dst->f[0] = (float)src->u[0];
3313
   dst->f[1] = (float)src->u[1];
3314
   dst->f[2] = (float)src->u[2];
3315
   dst->f[3] = (float)src->u[3];
3316
}
3317
 
3318
static void
3319
micro_uadd(union tgsi_exec_channel *dst,
3320
           const union tgsi_exec_channel *src0,
3321
           const union tgsi_exec_channel *src1)
3322
{
3323
   dst->u[0] = src0->u[0] + src1->u[0];
3324
   dst->u[1] = src0->u[1] + src1->u[1];
3325
   dst->u[2] = src0->u[2] + src1->u[2];
3326
   dst->u[3] = src0->u[3] + src1->u[3];
3327
}
3328
 
3329
static void
3330
micro_udiv(union tgsi_exec_channel *dst,
3331
           const union tgsi_exec_channel *src0,
3332
           const union tgsi_exec_channel *src1)
3333
{
3334
   dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
3335
   dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
3336
   dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
3337
   dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
3338
}
3339
 
3340
static void
3341
micro_umad(union tgsi_exec_channel *dst,
3342
           const union tgsi_exec_channel *src0,
3343
           const union tgsi_exec_channel *src1,
3344
           const union tgsi_exec_channel *src2)
3345
{
3346
   dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
3347
   dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
3348
   dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
3349
   dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
3350
}
3351
 
3352
static void
3353
micro_umax(union tgsi_exec_channel *dst,
3354
           const union tgsi_exec_channel *src0,
3355
           const union tgsi_exec_channel *src1)
3356
{
3357
   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
3358
   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
3359
   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
3360
   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
3361
}
3362
 
3363
static void
3364
micro_umin(union tgsi_exec_channel *dst,
3365
           const union tgsi_exec_channel *src0,
3366
           const union tgsi_exec_channel *src1)
3367
{
3368
   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
3369
   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
3370
   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
3371
   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
3372
}
3373
 
3374
static void
3375
micro_umod(union tgsi_exec_channel *dst,
3376
           const union tgsi_exec_channel *src0,
3377
           const union tgsi_exec_channel *src1)
3378
{
3379
   dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
3380
   dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
3381
   dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
3382
   dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
3383
}
3384
 
3385
static void
3386
micro_umul(union tgsi_exec_channel *dst,
3387
           const union tgsi_exec_channel *src0,
3388
           const union tgsi_exec_channel *src1)
3389
{
3390
   dst->u[0] = src0->u[0] * src1->u[0];
3391
   dst->u[1] = src0->u[1] * src1->u[1];
3392
   dst->u[2] = src0->u[2] * src1->u[2];
3393
   dst->u[3] = src0->u[3] * src1->u[3];
3394
}
3395
 
3396
static void
3397
micro_useq(union tgsi_exec_channel *dst,
3398
           const union tgsi_exec_channel *src0,
3399
           const union tgsi_exec_channel *src1)
3400
{
3401
   dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
3402
   dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
3403
   dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
3404
   dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
3405
}
3406
 
3407
static void
3408
micro_usge(union tgsi_exec_channel *dst,
3409
           const union tgsi_exec_channel *src0,
3410
           const union tgsi_exec_channel *src1)
3411
{
3412
   dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
3413
   dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
3414
   dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
3415
   dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
3416
}
3417
 
3418
static void
3419
micro_ushr(union tgsi_exec_channel *dst,
3420
           const union tgsi_exec_channel *src0,
3421
           const union tgsi_exec_channel *src1)
3422
{
3423
   dst->u[0] = src0->u[0] >> src1->u[0];
3424
   dst->u[1] = src0->u[1] >> src1->u[1];
3425
   dst->u[2] = src0->u[2] >> src1->u[2];
3426
   dst->u[3] = src0->u[3] >> src1->u[3];
3427
}
3428
 
3429
static void
3430
micro_uslt(union tgsi_exec_channel *dst,
3431
           const union tgsi_exec_channel *src0,
3432
           const union tgsi_exec_channel *src1)
3433
{
3434
   dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
3435
   dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
3436
   dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
3437
   dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
3438
}
3439
 
3440
static void
3441
micro_usne(union tgsi_exec_channel *dst,
3442
           const union tgsi_exec_channel *src0,
3443
           const union tgsi_exec_channel *src1)
3444
{
3445
   dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
3446
   dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
3447
   dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
3448
   dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
3449
}
3450
 
3451
static void
3452
micro_uarl(union tgsi_exec_channel *dst,
3453
           const union tgsi_exec_channel *src)
3454
{
3455
   dst->i[0] = src->u[0];
3456
   dst->i[1] = src->u[1];
3457
   dst->i[2] = src->u[2];
3458
   dst->i[3] = src->u[3];
3459
}
3460
 
3461
static void
3462
micro_ucmp(union tgsi_exec_channel *dst,
3463
           const union tgsi_exec_channel *src0,
3464
           const union tgsi_exec_channel *src1,
3465
           const union tgsi_exec_channel *src2)
3466
{
3467
   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
3468
   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
3469
   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
3470
   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
3471
}
3472
 
3473
static void
3474
exec_instruction(
3475
   struct tgsi_exec_machine *mach,
3476
   const struct tgsi_full_instruction *inst,
3477
   int *pc )
3478
{
3479
   union tgsi_exec_channel r[10];
3480
 
3481
   (*pc)++;
3482
 
3483
   switch (inst->Instruction.Opcode) {
3484
   case TGSI_OPCODE_ARL:
3485
      exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3486
      break;
3487
 
3488
   case TGSI_OPCODE_MOV:
3489
      exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3490
      break;
3491
 
3492
   case TGSI_OPCODE_LIT:
3493
      exec_lit(mach, inst);
3494
      break;
3495
 
3496
   case TGSI_OPCODE_RCP:
3497
      exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3498
      break;
3499
 
3500
   case TGSI_OPCODE_RSQ:
3501
      exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3502
      break;
3503
 
3504
   case TGSI_OPCODE_EXP:
3505
      exec_exp(mach, inst);
3506
      break;
3507
 
3508
   case TGSI_OPCODE_LOG:
3509
      exec_log(mach, inst);
3510
      break;
3511
 
3512
   case TGSI_OPCODE_MUL:
3513
      exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3514
      break;
3515
 
3516
   case TGSI_OPCODE_ADD:
3517
      exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3518
      break;
3519
 
3520
   case TGSI_OPCODE_DP3:
3521
      exec_dp3(mach, inst);
3522
      break;
3523
 
3524
   case TGSI_OPCODE_DP4:
3525
      exec_dp4(mach, inst);
3526
      break;
3527
 
3528
   case TGSI_OPCODE_DST:
3529
      exec_dst(mach, inst);
3530
      break;
3531
 
3532
   case TGSI_OPCODE_MIN:
3533
      exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3534
      break;
3535
 
3536
   case TGSI_OPCODE_MAX:
3537
      exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3538
      break;
3539
 
3540
   case TGSI_OPCODE_SLT:
3541
      exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3542
      break;
3543
 
3544
   case TGSI_OPCODE_SGE:
3545
      exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3546
      break;
3547
 
3548
   case TGSI_OPCODE_MAD:
3549
      exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3550
      break;
3551
 
3552
   case TGSI_OPCODE_SUB:
3553
      exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3554
      break;
3555
 
3556
   case TGSI_OPCODE_LRP:
3557
      exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3558
      break;
3559
 
3560
   case TGSI_OPCODE_CND:
3561
      exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3562
      break;
3563
 
3564
   case TGSI_OPCODE_SQRT:
3565
      exec_vector_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3566
      break;
3567
 
3568
   case TGSI_OPCODE_DP2A:
3569
      exec_dp2a(mach, inst);
3570
      break;
3571
 
3572
   case TGSI_OPCODE_FRC:
3573
      exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3574
      break;
3575
 
3576
   case TGSI_OPCODE_CLAMP:
3577
      exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3578
      break;
3579
 
3580
   case TGSI_OPCODE_FLR:
3581
      exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3582
      break;
3583
 
3584
   case TGSI_OPCODE_ROUND:
3585
      exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3586
      break;
3587
 
3588
   case TGSI_OPCODE_EX2:
3589
      exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3590
      break;
3591
 
3592
   case TGSI_OPCODE_LG2:
3593
      exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3594
      break;
3595
 
3596
   case TGSI_OPCODE_POW:
3597
      exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3598
      break;
3599
 
3600
   case TGSI_OPCODE_XPD:
3601
      exec_xpd(mach, inst);
3602
      break;
3603
 
3604
   case TGSI_OPCODE_ABS:
3605
      exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3606
      break;
3607
 
3608
   case TGSI_OPCODE_RCC:
3609
      exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3610
      break;
3611
 
3612
   case TGSI_OPCODE_DPH:
3613
      exec_dph(mach, inst);
3614
      break;
3615
 
3616
   case TGSI_OPCODE_COS:
3617
      exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3618
      break;
3619
 
3620
   case TGSI_OPCODE_DDX:
3621
      exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3622
      break;
3623
 
3624
   case TGSI_OPCODE_DDY:
3625
      exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3626
      break;
3627
 
3628
   case TGSI_OPCODE_KILP:
3629
      exec_kilp (mach, inst);
3630
      break;
3631
 
3632
   case TGSI_OPCODE_KIL:
3633
      exec_kil (mach, inst);
3634
      break;
3635
 
3636
   case TGSI_OPCODE_PK2H:
3637
      assert (0);
3638
      break;
3639
 
3640
   case TGSI_OPCODE_PK2US:
3641
      assert (0);
3642
      break;
3643
 
3644
   case TGSI_OPCODE_PK4B:
3645
      assert (0);
3646
      break;
3647
 
3648
   case TGSI_OPCODE_PK4UB:
3649
      assert (0);
3650
      break;
3651
 
3652
   case TGSI_OPCODE_RFL:
3653
      exec_rfl(mach, inst);
3654
      break;
3655
 
3656
   case TGSI_OPCODE_SEQ:
3657
      exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3658
      break;
3659
 
3660
   case TGSI_OPCODE_SFL:
3661
      exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT);
3662
      break;
3663
 
3664
   case TGSI_OPCODE_SGT:
3665
      exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3666
      break;
3667
 
3668
   case TGSI_OPCODE_SIN:
3669
      exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3670
      break;
3671
 
3672
   case TGSI_OPCODE_SLE:
3673
      exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3674
      break;
3675
 
3676
   case TGSI_OPCODE_SNE:
3677
      exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3678
      break;
3679
 
3680
   case TGSI_OPCODE_STR:
3681
      exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT);
3682
      break;
3683
 
3684
   case TGSI_OPCODE_TEX:
3685
      /* simple texture lookup */
3686
      /* src[0] = texcoord */
3687
      /* src[1] = sampler unit */
3688
      exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
3689
      break;
3690
 
3691
   case TGSI_OPCODE_TXB:
3692
      /* Texture lookup with lod bias */
3693
      /* src[0] = texcoord (src[0].w = LOD bias) */
3694
      /* src[1] = sampler unit */
3695
      exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
3696
      break;
3697
 
3698
   case TGSI_OPCODE_TXD:
3699
      /* Texture lookup with explict partial derivatives */
3700
      /* src[0] = texcoord */
3701
      /* src[1] = d[strq]/dx */
3702
      /* src[2] = d[strq]/dy */
3703
      /* src[3] = sampler unit */
3704
      exec_txd(mach, inst);
3705
      break;
3706
 
3707
   case TGSI_OPCODE_TXL:
3708
      /* Texture lookup with explit LOD */
3709
      /* src[0] = texcoord (src[0].w = LOD) */
3710
      /* src[1] = sampler unit */
3711
      exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
3712
      break;
3713
 
3714
   case TGSI_OPCODE_TXP:
3715
      /* Texture lookup with projection */
3716
      /* src[0] = texcoord (src[0].w = projection) */
3717
      /* src[1] = sampler unit */
3718
      exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
3719
      break;
3720
 
3721
   case TGSI_OPCODE_UP2H:
3722
      assert (0);
3723
      break;
3724
 
3725
   case TGSI_OPCODE_UP2US:
3726
      assert (0);
3727
      break;
3728
 
3729
   case TGSI_OPCODE_UP4B:
3730
      assert (0);
3731
      break;
3732
 
3733
   case TGSI_OPCODE_UP4UB:
3734
      assert (0);
3735
      break;
3736
 
3737
   case TGSI_OPCODE_X2D:
3738
      exec_x2d(mach, inst);
3739
      break;
3740
 
3741
   case TGSI_OPCODE_ARA:
3742
      assert (0);
3743
      break;
3744
 
3745
   case TGSI_OPCODE_ARR:
3746
      exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3747
      break;
3748
 
3749
   case TGSI_OPCODE_BRA:
3750
      assert (0);
3751
      break;
3752
 
3753
   case TGSI_OPCODE_CAL:
3754
      /* skip the call if no execution channels are enabled */
3755
      if (mach->ExecMask) {
3756
         /* do the call */
3757
 
3758
         /* First, record the depths of the execution stacks.
3759
          * This is important for deeply nested/looped return statements.
3760
          * We have to unwind the stacks by the correct amount.  For a
3761
          * real code generator, we could determine the number of entries
3762
          * to pop off each stack with simple static analysis and avoid
3763
          * implementing this data structure at run time.
3764
          */
3765
         mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
3766
         mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
3767
         mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
3768
         mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
3769
         mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
3770
         /* note that PC was already incremented above */
3771
         mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
3772
 
3773
         mach->CallStackTop++;
3774
 
3775
         /* Second, push the Cond, Loop, Cont, Func stacks */
3776
         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3777
         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3778
         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3779
         assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3780
         assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3781
         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
3782
 
3783
         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3784
         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3785
         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3786
         mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3787
         mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3788
         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
3789
 
3790
         /* Finally, jump to the subroutine */
3791
         *pc = inst->Label.Label;
3792
      }
3793
      break;
3794
 
3795
   case TGSI_OPCODE_RET:
3796
      mach->FuncMask &= ~mach->ExecMask;
3797
      UPDATE_EXEC_MASK(mach);
3798
 
3799
      if (mach->FuncMask == 0x0) {
3800
         /* really return now (otherwise, keep executing */
3801
 
3802
         if (mach->CallStackTop == 0) {
3803
            /* returning from main() */
3804
            mach->CondStackTop = 0;
3805
            mach->LoopStackTop = 0;
3806
            *pc = -1;
3807
            return;
3808
         }
3809
 
3810
         assert(mach->CallStackTop > 0);
3811
         mach->CallStackTop--;
3812
 
3813
         mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3814
         mach->CondMask = mach->CondStack[mach->CondStackTop];
3815
 
3816
         mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3817
         mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3818
 
3819
         mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3820
         mach->ContMask = mach->ContStack[mach->ContStackTop];
3821
 
3822
         mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3823
         mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3824
 
3825
         mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3826
         mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3827
 
3828
         assert(mach->FuncStackTop > 0);
3829
         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3830
 
3831
         *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3832
 
3833
         UPDATE_EXEC_MASK(mach);
3834
      }
3835
      break;
3836
 
3837
   case TGSI_OPCODE_SSG:
3838
      exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3839
      break;
3840
 
3841
   case TGSI_OPCODE_CMP:
3842
      exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3843
      break;
3844
 
3845
   case TGSI_OPCODE_SCS:
3846
      exec_scs(mach, inst);
3847
      break;
3848
 
3849
   case TGSI_OPCODE_NRM:
3850
      exec_nrm3(mach, inst);
3851
      break;
3852
 
3853
   case TGSI_OPCODE_NRM4:
3854
      exec_nrm4(mach, inst);
3855
      break;
3856
 
3857
   case TGSI_OPCODE_DIV:
3858
      exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3859
      break;
3860
 
3861
   case TGSI_OPCODE_DP2:
3862
      exec_dp2(mach, inst);
3863
      break;
3864
 
3865
   case TGSI_OPCODE_IF:
3866
      /* push CondMask */
3867
      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3868
      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3869
      FETCH( &r[0], 0, TGSI_CHAN_X );
3870
      /* update CondMask */
3871
      if( ! r[0].f[0] ) {
3872
         mach->CondMask &= ~0x1;
3873
      }
3874
      if( ! r[0].f[1] ) {
3875
         mach->CondMask &= ~0x2;
3876
      }
3877
      if( ! r[0].f[2] ) {
3878
         mach->CondMask &= ~0x4;
3879
      }
3880
      if( ! r[0].f[3] ) {
3881
         mach->CondMask &= ~0x8;
3882
      }
3883
      UPDATE_EXEC_MASK(mach);
3884
      /* Todo: If CondMask==0, jump to ELSE */
3885
      break;
3886
 
3887
   case TGSI_OPCODE_UIF:
3888
      /* push CondMask */
3889
      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3890
      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3891
      IFETCH( &r[0], 0, TGSI_CHAN_X );
3892
      /* update CondMask */
3893
      if( ! r[0].u[0] ) {
3894
         mach->CondMask &= ~0x1;
3895
      }
3896
      if( ! r[0].u[1] ) {
3897
         mach->CondMask &= ~0x2;
3898
      }
3899
      if( ! r[0].u[2] ) {
3900
         mach->CondMask &= ~0x4;
3901
      }
3902
      if( ! r[0].u[3] ) {
3903
         mach->CondMask &= ~0x8;
3904
      }
3905
      UPDATE_EXEC_MASK(mach);
3906
      /* Todo: If CondMask==0, jump to ELSE */
3907
      break;
3908
 
3909
   case TGSI_OPCODE_ELSE:
3910
      /* invert CondMask wrt previous mask */
3911
      {
3912
         uint prevMask;
3913
         assert(mach->CondStackTop > 0);
3914
         prevMask = mach->CondStack[mach->CondStackTop - 1];
3915
         mach->CondMask = ~mach->CondMask & prevMask;
3916
         UPDATE_EXEC_MASK(mach);
3917
         /* Todo: If CondMask==0, jump to ENDIF */
3918
      }
3919
      break;
3920
 
3921
   case TGSI_OPCODE_ENDIF:
3922
      /* pop CondMask */
3923
      assert(mach->CondStackTop > 0);
3924
      mach->CondMask = mach->CondStack[--mach->CondStackTop];
3925
      UPDATE_EXEC_MASK(mach);
3926
      break;
3927
 
3928
   case TGSI_OPCODE_END:
3929
      /* make sure we end primitives which haven't
3930
       * been explicitly emitted */
3931
      conditional_emit_primitive(mach);
3932
      /* halt execution */
3933
      *pc = -1;
3934
      break;
3935
 
3936
   case TGSI_OPCODE_PUSHA:
3937
      assert (0);
3938
      break;
3939
 
3940
   case TGSI_OPCODE_POPA:
3941
      assert (0);
3942
      break;
3943
 
3944
   case TGSI_OPCODE_CEIL:
3945
      exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3946
      break;
3947
 
3948
   case TGSI_OPCODE_I2F:
3949
      exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3950
      break;
3951
 
3952
   case TGSI_OPCODE_NOT:
3953
      exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3954
      break;
3955
 
3956
   case TGSI_OPCODE_TRUNC:
3957
      exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3958
      break;
3959
 
3960
   case TGSI_OPCODE_SHL:
3961
      exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3962
      break;
3963
 
3964
   case TGSI_OPCODE_AND:
3965
      exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3966
      break;
3967
 
3968
   case TGSI_OPCODE_OR:
3969
      exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3970
      break;
3971
 
3972
   case TGSI_OPCODE_MOD:
3973
      exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3974
      break;
3975
 
3976
   case TGSI_OPCODE_XOR:
3977
      exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3978
      break;
3979
 
3980
   case TGSI_OPCODE_SAD:
3981
      assert (0);
3982
      break;
3983
 
3984
   case TGSI_OPCODE_TXF:
3985
      exec_txf(mach, inst);
3986
      break;
3987
 
3988
   case TGSI_OPCODE_TXQ:
3989
      exec_txq(mach, inst);
3990
      break;
3991
 
3992
   case TGSI_OPCODE_EMIT:
3993
      emit_vertex(mach);
3994
      break;
3995
 
3996
   case TGSI_OPCODE_ENDPRIM:
3997
      emit_primitive(mach);
3998
      break;
3999
 
4000
   case TGSI_OPCODE_BGNLOOP:
4001
      /* push LoopMask and ContMasks */
4002
      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4003
      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4004
      assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4005
      assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
4006
 
4007
      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
4008
      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
4009
      mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
4010
      mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
4011
      mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
4012
      break;
4013
 
4014
   case TGSI_OPCODE_ENDLOOP:
4015
      /* Restore ContMask, but don't pop */
4016
      assert(mach->ContStackTop > 0);
4017
      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
4018
      UPDATE_EXEC_MASK(mach);
4019
      if (mach->ExecMask) {
4020
         /* repeat loop: jump to instruction just past BGNLOOP */
4021
         assert(mach->LoopLabelStackTop > 0);
4022
         *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
4023
      }
4024
      else {
4025
         /* exit loop: pop LoopMask */
4026
         assert(mach->LoopStackTop > 0);
4027
         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
4028
         /* pop ContMask */
4029
         assert(mach->ContStackTop > 0);
4030
         mach->ContMask = mach->ContStack[--mach->ContStackTop];
4031
         assert(mach->LoopLabelStackTop > 0);
4032
         --mach->LoopLabelStackTop;
4033
 
4034
         mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
4035
      }
4036
      UPDATE_EXEC_MASK(mach);
4037
      break;
4038
 
4039
   case TGSI_OPCODE_BRK:
4040
      exec_break(mach);
4041
      break;
4042
 
4043
   case TGSI_OPCODE_CONT:
4044
      /* turn off cont channels for each enabled exec channel */
4045
      mach->ContMask &= ~mach->ExecMask;
4046
      /* Todo: if mach->LoopMask == 0, jump to end of loop */
4047
      UPDATE_EXEC_MASK(mach);
4048
      break;
4049
 
4050
   case TGSI_OPCODE_BGNSUB:
4051
      /* no-op */
4052
      break;
4053
 
4054
   case TGSI_OPCODE_ENDSUB:
4055
      /*
4056
       * XXX: This really should be a no-op. We should never reach this opcode.
4057
       */
4058
 
4059
      assert(mach->CallStackTop > 0);
4060
      mach->CallStackTop--;
4061
 
4062
      mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
4063
      mach->CondMask = mach->CondStack[mach->CondStackTop];
4064
 
4065
      mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
4066
      mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
4067
 
4068
      mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
4069
      mach->ContMask = mach->ContStack[mach->ContStackTop];
4070
 
4071
      mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
4072
      mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
4073
 
4074
      mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
4075
      mach->BreakType = mach->BreakStack[mach->BreakStackTop];
4076
 
4077
      assert(mach->FuncStackTop > 0);
4078
      mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
4079
 
4080
      *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
4081
 
4082
      UPDATE_EXEC_MASK(mach);
4083
      break;
4084
 
4085
   case TGSI_OPCODE_NOP:
4086
      break;
4087
 
4088
   case TGSI_OPCODE_BREAKC:
4089
      IFETCH(&r[0], 0, TGSI_CHAN_X);
4090
      /* update CondMask */
4091
      if (r[0].u[0] && (mach->ExecMask & 0x1)) {
4092
         mach->LoopMask &= ~0x1;
4093
      }
4094
      if (r[0].u[1] && (mach->ExecMask & 0x2)) {
4095
         mach->LoopMask &= ~0x2;
4096
      }
4097
      if (r[0].u[2] && (mach->ExecMask & 0x4)) {
4098
         mach->LoopMask &= ~0x4;
4099
      }
4100
      if (r[0].u[3] && (mach->ExecMask & 0x8)) {
4101
         mach->LoopMask &= ~0x8;
4102
      }
4103
      /* Todo: if mach->LoopMask == 0, jump to end of loop */
4104
      UPDATE_EXEC_MASK(mach);
4105
      break;
4106
 
4107
   case TGSI_OPCODE_F2I:
4108
      exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
4109
      break;
4110
 
4111
   case TGSI_OPCODE_IDIV:
4112
      exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4113
      break;
4114
 
4115
   case TGSI_OPCODE_IMAX:
4116
      exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4117
      break;
4118
 
4119
   case TGSI_OPCODE_IMIN:
4120
      exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4121
      break;
4122
 
4123
   case TGSI_OPCODE_INEG:
4124
      exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4125
      break;
4126
 
4127
   case TGSI_OPCODE_ISGE:
4128
      exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4129
      break;
4130
 
4131
   case TGSI_OPCODE_ISHR:
4132
      exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4133
      break;
4134
 
4135
   case TGSI_OPCODE_ISLT:
4136
      exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4137
      break;
4138
 
4139
   case TGSI_OPCODE_F2U:
4140
      exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
4141
      break;
4142
 
4143
   case TGSI_OPCODE_U2F:
4144
      exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
4145
      break;
4146
 
4147
   case TGSI_OPCODE_UADD:
4148
      exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4149
      break;
4150
 
4151
   case TGSI_OPCODE_UDIV:
4152
      exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4153
      break;
4154
 
4155
   case TGSI_OPCODE_UMAD:
4156
      exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4157
      break;
4158
 
4159
   case TGSI_OPCODE_UMAX:
4160
      exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4161
      break;
4162
 
4163
   case TGSI_OPCODE_UMIN:
4164
      exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4165
      break;
4166
 
4167
   case TGSI_OPCODE_UMOD:
4168
      exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4169
      break;
4170
 
4171
   case TGSI_OPCODE_UMUL:
4172
      exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4173
      break;
4174
 
4175
   case TGSI_OPCODE_USEQ:
4176
      exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4177
      break;
4178
 
4179
   case TGSI_OPCODE_USGE:
4180
      exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4181
      break;
4182
 
4183
   case TGSI_OPCODE_USHR:
4184
      exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4185
      break;
4186
 
4187
   case TGSI_OPCODE_USLT:
4188
      exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4189
      break;
4190
 
4191
   case TGSI_OPCODE_USNE:
4192
      exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4193
      break;
4194
 
4195
   case TGSI_OPCODE_SWITCH:
4196
      exec_switch(mach, inst);
4197
      break;
4198
 
4199
   case TGSI_OPCODE_CASE:
4200
      exec_case(mach, inst);
4201
      break;
4202
 
4203
   case TGSI_OPCODE_DEFAULT:
4204
      exec_default(mach);
4205
      break;
4206
 
4207
   case TGSI_OPCODE_ENDSWITCH:
4208
      exec_endswitch(mach);
4209
      break;
4210
 
4211
   case TGSI_OPCODE_SAMPLE_I:
4212
      exec_txf(mach, inst);
4213
      break;
4214
 
4215
   case TGSI_OPCODE_SAMPLE_I_MS:
4216
      assert(0);
4217
      break;
4218
 
4219
   case TGSI_OPCODE_SAMPLE:
4220
      exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
4221
      break;
4222
 
4223
   case TGSI_OPCODE_SAMPLE_B:
4224
      exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
4225
      break;
4226
 
4227
   case TGSI_OPCODE_SAMPLE_C:
4228
      exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
4229
      break;
4230
 
4231
   case TGSI_OPCODE_SAMPLE_C_LZ:
4232
      exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
4233
      break;
4234
 
4235
   case TGSI_OPCODE_SAMPLE_D:
4236
      exec_sample_d(mach, inst);
4237
      break;
4238
 
4239
   case TGSI_OPCODE_SAMPLE_L:
4240
      exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
4241
      break;
4242
 
4243
   case TGSI_OPCODE_GATHER4:
4244
      assert(0);
4245
      break;
4246
 
4247
   case TGSI_OPCODE_SVIEWINFO:
4248
      exec_txq(mach, inst);
4249
      break;
4250
 
4251
   case TGSI_OPCODE_SAMPLE_POS:
4252
      assert(0);
4253
      break;
4254
 
4255
   case TGSI_OPCODE_SAMPLE_INFO:
4256
      assert(0);
4257
      break;
4258
 
4259
   case TGSI_OPCODE_UARL:
4260
      exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
4261
      break;
4262
 
4263
   case TGSI_OPCODE_UCMP:
4264
      exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4265
      break;
4266
 
4267
   case TGSI_OPCODE_IABS:
4268
      exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4269
      break;
4270
 
4271
   case TGSI_OPCODE_ISSG:
4272
      exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4273
      break;
4274
 
4275
   case TGSI_OPCODE_TEX2:
4276
      /* simple texture lookup */
4277
      /* src[0] = texcoord */
4278
      /* src[1] = compare */
4279
      /* src[2] = sampler unit */
4280
      exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
4281
      break;
4282
   case TGSI_OPCODE_TXB2:
4283
      /* simple texture lookup */
4284
      /* src[0] = texcoord */
4285
      /* src[1] = bias */
4286
      /* src[2] = sampler unit */
4287
      exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
4288
      break;
4289
   case TGSI_OPCODE_TXL2:
4290
      /* simple texture lookup */
4291
      /* src[0] = texcoord */
4292
      /* src[1] = lod */
4293
      /* src[2] = sampler unit */
4294
      exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
4295
      break;
4296
   default:
4297
      assert( 0 );
4298
   }
4299
}
4300
 
4301
 
4302
/**
4303
 * Run TGSI interpreter.
4304
 * \return bitmask of "alive" quad components
4305
 */
4306
uint
4307
tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
4308
{
4309
   uint i;
4310
   int pc = 0;
4311
   uint default_mask = 0xf;
4312
 
4313
   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
4314
   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
4315
 
4316
   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
4317
      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
4318
      mach->Primitives[0] = 0;
4319
      /* GS runs on a single primitive for now */
4320
      default_mask = 0x1;
4321
   }
4322
 
4323
   mach->CondMask = default_mask;
4324
   mach->LoopMask = default_mask;
4325
   mach->ContMask = default_mask;
4326
   mach->FuncMask = default_mask;
4327
   mach->ExecMask = default_mask;
4328
 
4329
   mach->Switch.mask = default_mask;
4330
 
4331
   assert(mach->CondStackTop == 0);
4332
   assert(mach->LoopStackTop == 0);
4333
   assert(mach->ContStackTop == 0);
4334
   assert(mach->SwitchStackTop == 0);
4335
   assert(mach->BreakStackTop == 0);
4336
   assert(mach->CallStackTop == 0);
4337
 
4338
 
4339
   /* execute declarations (interpolants) */
4340
   for (i = 0; i < mach->NumDeclarations; i++) {
4341
      exec_declaration( mach, mach->Declarations+i );
4342
   }
4343
 
4344
   {
4345
#if DEBUG_EXECUTION
4346
      struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
4347
      struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
4348
      uint inst = 1;
4349
 
4350
      memset(mach->Temps, 0, sizeof(temps));
4351
      memset(mach->Outputs, 0, sizeof(outputs));
4352
      memset(temps, 0, sizeof(temps));
4353
      memset(outputs, 0, sizeof(outputs));
4354
#endif
4355
 
4356
      /* execute instructions, until pc is set to -1 */
4357
      while (pc != -1) {
4358
 
4359
#if DEBUG_EXECUTION
4360
         uint i;
4361
 
4362
         tgsi_dump_instruction(&mach->Instructions[pc], inst++);
4363
#endif
4364
 
4365
         assert(pc < (int) mach->NumInstructions);
4366
         exec_instruction(mach, mach->Instructions + pc, &pc);
4367
 
4368
#if DEBUG_EXECUTION
4369
         for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
4370
            if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
4371
               uint j;
4372
 
4373
               memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
4374
               debug_printf("TEMP[%2u] = ", i);
4375
               for (j = 0; j < 4; j++) {
4376
                  if (j > 0) {
4377
                     debug_printf("           ");
4378
                  }
4379
                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4380
                               temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
4381
                               temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
4382
                               temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
4383
                               temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
4384
               }
4385
            }
4386
         }
4387
         for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
4388
            if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
4389
               uint j;
4390
 
4391
               memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
4392
               debug_printf("OUT[%2u] =  ", i);
4393
               for (j = 0; j < 4; j++) {
4394
                  if (j > 0) {
4395
                     debug_printf("           ");
4396
                  }
4397
                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4398
                               outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
4399
                               outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
4400
                               outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
4401
                               outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
4402
               }
4403
            }
4404
         }
4405
#endif
4406
      }
4407
   }
4408
 
4409
#if 0
4410
   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
4411
   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
4412
      /*
4413
       * Scale back depth component.
4414
       */
4415
      for (i = 0; i < 4; i++)
4416
         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
4417
   }
4418
#endif
4419
 
4420
   /* Strictly speaking, these assertions aren't really needed but they
4421
    * can potentially catch some bugs in the control flow code.
4422
    */
4423
   assert(mach->CondStackTop == 0);
4424
   assert(mach->LoopStackTop == 0);
4425
   assert(mach->ContStackTop == 0);
4426
   assert(mach->SwitchStackTop == 0);
4427
   assert(mach->BreakStackTop == 0);
4428
   assert(mach->CallStackTop == 0);
4429
 
4430
   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4431
}