Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/**************************************************************************
2
 *
3
 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4
 * All Rights Reserved.
5
 * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the
9
 * "Software"), to deal in the Software without restriction, including
10
 * without limitation the rights to use, copy, modify, merge, publish,
11
 * distribute, sub license, and/or sell copies of the Software, and to
12
 * permit persons to whom the Software is furnished to do so, subject to
13
 * the following conditions:
14
 *
15
 * The above copyright notice and this permission notice (including the
16
 * next paragraph) shall be included in all copies or substantial portions
17
 * of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 *
27
 **************************************************************************/
28
 
29
/**
30
 * TGSI interpreter/executor.
31
 *
32
 * Flow control information:
33
 *
34
 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35
 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36
 * care since a condition may be true for some quad components but false
37
 * for other components.
38
 *
39
 * We basically execute all statements (even if they're in the part of
40
 * an IF/ELSE clause that's "not taken") and use a special mask to
41
 * control writing to destination registers.  This is the ExecMask.
42
 * See store_dest().
43
 *
44
 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45
 * ContMask) which are controlled by the flow control instructions (namely:
46
 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47
 *
48
 *
49
 * Authors:
50
 *   Michal Krol
51
 *   Brian Paul
52
 */
53
 
54
#include "pipe/p_compiler.h"
55
#include "pipe/p_state.h"
56
#include "pipe/p_shader_tokens.h"
57
#include "tgsi/tgsi_dump.h"
58
#include "tgsi/tgsi_parse.h"
59
#include "tgsi/tgsi_util.h"
60
#include "tgsi_exec.h"
61
#include "util/u_memory.h"
62
#include "util/u_math.h"
63
 
64
 
65
#define DEBUG_EXECUTION 0
66
 
67
 
68
#define FAST_MATH 0
69
 
70
#define TILE_TOP_LEFT     0
71
#define TILE_TOP_RIGHT    1
72
#define TILE_BOTTOM_LEFT  2
73
#define TILE_BOTTOM_RIGHT 3
74
 
75
static void
76
micro_abs(union tgsi_exec_channel *dst,
77
          const union tgsi_exec_channel *src)
78
{
79
   dst->f[0] = fabsf(src->f[0]);
80
   dst->f[1] = fabsf(src->f[1]);
81
   dst->f[2] = fabsf(src->f[2]);
82
   dst->f[3] = fabsf(src->f[3]);
83
}
84
 
85
static void
86
micro_arl(union tgsi_exec_channel *dst,
87
          const union tgsi_exec_channel *src)
88
{
89
   dst->i[0] = (int)floorf(src->f[0]);
90
   dst->i[1] = (int)floorf(src->f[1]);
91
   dst->i[2] = (int)floorf(src->f[2]);
92
   dst->i[3] = (int)floorf(src->f[3]);
93
}
94
 
95
static void
96
micro_arr(union tgsi_exec_channel *dst,
97
          const union tgsi_exec_channel *src)
98
{
99
   dst->i[0] = (int)floorf(src->f[0] + 0.5f);
100
   dst->i[1] = (int)floorf(src->f[1] + 0.5f);
101
   dst->i[2] = (int)floorf(src->f[2] + 0.5f);
102
   dst->i[3] = (int)floorf(src->f[3] + 0.5f);
103
}
104
 
105
static void
106
micro_ceil(union tgsi_exec_channel *dst,
107
           const union tgsi_exec_channel *src)
108
{
109
   dst->f[0] = ceilf(src->f[0]);
110
   dst->f[1] = ceilf(src->f[1]);
111
   dst->f[2] = ceilf(src->f[2]);
112
   dst->f[3] = ceilf(src->f[3]);
113
}
114
 
115
static void
116
micro_clamp(union tgsi_exec_channel *dst,
117
            const union tgsi_exec_channel *src0,
118
            const union tgsi_exec_channel *src1,
119
            const union tgsi_exec_channel *src2)
120
{
121
   dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
122
   dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
123
   dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
124
   dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
125
}
126
 
127
static void
128
micro_cmp(union tgsi_exec_channel *dst,
129
          const union tgsi_exec_channel *src0,
130
          const union tgsi_exec_channel *src1,
131
          const union tgsi_exec_channel *src2)
132
{
133
   dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
134
   dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
135
   dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
136
   dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
137
}
138
 
139
static void
140
micro_cnd(union tgsi_exec_channel *dst,
141
          const union tgsi_exec_channel *src0,
142
          const union tgsi_exec_channel *src1,
143
          const union tgsi_exec_channel *src2)
144
{
145
   dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
146
   dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
147
   dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
148
   dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
149
}
150
 
151
static void
152
micro_cos(union tgsi_exec_channel *dst,
153
          const union tgsi_exec_channel *src)
154
{
155
   dst->f[0] = cosf(src->f[0]);
156
   dst->f[1] = cosf(src->f[1]);
157
   dst->f[2] = cosf(src->f[2]);
158
   dst->f[3] = cosf(src->f[3]);
159
}
160
 
161
static void
162
micro_ddx(union tgsi_exec_channel *dst,
163
          const union tgsi_exec_channel *src)
164
{
165
   dst->f[0] =
166
   dst->f[1] =
167
   dst->f[2] =
168
   dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
169
}
170
 
171
static void
172
micro_ddy(union tgsi_exec_channel *dst,
173
          const union tgsi_exec_channel *src)
174
{
175
   dst->f[0] =
176
   dst->f[1] =
177
   dst->f[2] =
178
   dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
179
}
180
 
181
static void
182
micro_exp2(union tgsi_exec_channel *dst,
183
           const union tgsi_exec_channel *src)
184
{
185
#if FAST_MATH
186
   dst->f[0] = util_fast_exp2(src->f[0]);
187
   dst->f[1] = util_fast_exp2(src->f[1]);
188
   dst->f[2] = util_fast_exp2(src->f[2]);
189
   dst->f[3] = util_fast_exp2(src->f[3]);
190
#else
191
#if DEBUG
192
   /* Inf is okay for this instruction, so clamp it to silence assertions. */
193
   uint i;
194
   union tgsi_exec_channel clamped;
195
 
196
   for (i = 0; i < 4; i++) {
197
      if (src->f[i] > 127.99999f) {
198
         clamped.f[i] = 127.99999f;
199
      } else if (src->f[i] < -126.99999f) {
200
         clamped.f[i] = -126.99999f;
201
      } else {
202
         clamped.f[i] = src->f[i];
203
      }
204
   }
205
   src = &clamped;
206
#endif /* DEBUG */
207
 
208
   dst->f[0] = powf(2.0f, src->f[0]);
209
   dst->f[1] = powf(2.0f, src->f[1]);
210
   dst->f[2] = powf(2.0f, src->f[2]);
211
   dst->f[3] = powf(2.0f, src->f[3]);
212
#endif /* FAST_MATH */
213
}
214
 
215
static void
216
micro_flr(union tgsi_exec_channel *dst,
217
          const union tgsi_exec_channel *src)
218
{
219
   dst->f[0] = floorf(src->f[0]);
220
   dst->f[1] = floorf(src->f[1]);
221
   dst->f[2] = floorf(src->f[2]);
222
   dst->f[3] = floorf(src->f[3]);
223
}
224
 
225
static void
226
micro_frc(union tgsi_exec_channel *dst,
227
          const union tgsi_exec_channel *src)
228
{
229
   dst->f[0] = src->f[0] - floorf(src->f[0]);
230
   dst->f[1] = src->f[1] - floorf(src->f[1]);
231
   dst->f[2] = src->f[2] - floorf(src->f[2]);
232
   dst->f[3] = src->f[3] - floorf(src->f[3]);
233
}
234
 
235
static void
236
micro_iabs(union tgsi_exec_channel *dst,
237
           const union tgsi_exec_channel *src)
238
{
239
   dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
240
   dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
241
   dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
242
   dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
243
}
244
 
245
static void
246
micro_ineg(union tgsi_exec_channel *dst,
247
           const union tgsi_exec_channel *src)
248
{
249
   dst->i[0] = -src->i[0];
250
   dst->i[1] = -src->i[1];
251
   dst->i[2] = -src->i[2];
252
   dst->i[3] = -src->i[3];
253
}
254
 
255
static void
256
micro_lg2(union tgsi_exec_channel *dst,
257
          const union tgsi_exec_channel *src)
258
{
259
#if FAST_MATH
260
   dst->f[0] = util_fast_log2(src->f[0]);
261
   dst->f[1] = util_fast_log2(src->f[1]);
262
   dst->f[2] = util_fast_log2(src->f[2]);
263
   dst->f[3] = util_fast_log2(src->f[3]);
264
#else
265
   dst->f[0] = logf(src->f[0]) * 1.442695f;
266
   dst->f[1] = logf(src->f[1]) * 1.442695f;
267
   dst->f[2] = logf(src->f[2]) * 1.442695f;
268
   dst->f[3] = logf(src->f[3]) * 1.442695f;
269
#endif
270
}
271
 
272
static void
273
micro_lrp(union tgsi_exec_channel *dst,
274
          const union tgsi_exec_channel *src0,
275
          const union tgsi_exec_channel *src1,
276
          const union tgsi_exec_channel *src2)
277
{
278
   dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
279
   dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
280
   dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
281
   dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
282
}
283
 
284
static void
285
micro_mad(union tgsi_exec_channel *dst,
286
          const union tgsi_exec_channel *src0,
287
          const union tgsi_exec_channel *src1,
288
          const union tgsi_exec_channel *src2)
289
{
290
   dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
291
   dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
292
   dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
293
   dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
294
}
295
 
296
static void
297
micro_mov(union tgsi_exec_channel *dst,
298
          const union tgsi_exec_channel *src)
299
{
300
   dst->u[0] = src->u[0];
301
   dst->u[1] = src->u[1];
302
   dst->u[2] = src->u[2];
303
   dst->u[3] = src->u[3];
304
}
305
 
306
static void
307
micro_rcp(union tgsi_exec_channel *dst,
308
          const union tgsi_exec_channel *src)
309
{
310
#if 0 /* for debugging */
311
   assert(src->f[0] != 0.0f);
312
   assert(src->f[1] != 0.0f);
313
   assert(src->f[2] != 0.0f);
314
   assert(src->f[3] != 0.0f);
315
#endif
316
   dst->f[0] = 1.0f / src->f[0];
317
   dst->f[1] = 1.0f / src->f[1];
318
   dst->f[2] = 1.0f / src->f[2];
319
   dst->f[3] = 1.0f / src->f[3];
320
}
321
 
322
static void
323
micro_rnd(union tgsi_exec_channel *dst,
324
          const union tgsi_exec_channel *src)
325
{
326
   dst->f[0] = floorf(src->f[0] + 0.5f);
327
   dst->f[1] = floorf(src->f[1] + 0.5f);
328
   dst->f[2] = floorf(src->f[2] + 0.5f);
329
   dst->f[3] = floorf(src->f[3] + 0.5f);
330
}
331
 
332
static void
333
micro_rsq(union tgsi_exec_channel *dst,
334
          const union tgsi_exec_channel *src)
335
{
336
#if 0 /* for debugging */
337
   assert(src->f[0] != 0.0f);
338
   assert(src->f[1] != 0.0f);
339
   assert(src->f[2] != 0.0f);
340
   assert(src->f[3] != 0.0f);
341
#endif
342
   dst->f[0] = 1.0f / sqrtf(src->f[0]);
343
   dst->f[1] = 1.0f / sqrtf(src->f[1]);
344
   dst->f[2] = 1.0f / sqrtf(src->f[2]);
345
   dst->f[3] = 1.0f / sqrtf(src->f[3]);
346
}
347
 
348
static void
349
micro_sqrt(union tgsi_exec_channel *dst,
350
           const union tgsi_exec_channel *src)
351
{
352
   dst->f[0] = sqrtf(src->f[0]);
353
   dst->f[1] = sqrtf(src->f[1]);
354
   dst->f[2] = sqrtf(src->f[2]);
355
   dst->f[3] = sqrtf(src->f[3]);
356
}
357
 
358
static void
359
micro_seq(union tgsi_exec_channel *dst,
360
          const union tgsi_exec_channel *src0,
361
          const union tgsi_exec_channel *src1)
362
{
363
   dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
364
   dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
365
   dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
366
   dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
367
}
368
 
369
static void
370
micro_sge(union tgsi_exec_channel *dst,
371
          const union tgsi_exec_channel *src0,
372
          const union tgsi_exec_channel *src1)
373
{
374
   dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
375
   dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
376
   dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
377
   dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
378
}
379
 
380
static void
381
micro_sgn(union tgsi_exec_channel *dst,
382
          const union tgsi_exec_channel *src)
383
{
384
   dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
385
   dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
386
   dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
387
   dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
388
}
389
 
390
static void
391
micro_isgn(union tgsi_exec_channel *dst,
392
          const union tgsi_exec_channel *src)
393
{
394
   dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
395
   dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
396
   dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
397
   dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
398
}
399
 
400
static void
401
micro_sgt(union tgsi_exec_channel *dst,
402
          const union tgsi_exec_channel *src0,
403
          const union tgsi_exec_channel *src1)
404
{
405
   dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
406
   dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
407
   dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
408
   dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
409
}
410
 
411
static void
412
micro_sin(union tgsi_exec_channel *dst,
413
          const union tgsi_exec_channel *src)
414
{
415
   dst->f[0] = sinf(src->f[0]);
416
   dst->f[1] = sinf(src->f[1]);
417
   dst->f[2] = sinf(src->f[2]);
418
   dst->f[3] = sinf(src->f[3]);
419
}
420
 
421
static void
422
micro_sle(union tgsi_exec_channel *dst,
423
          const union tgsi_exec_channel *src0,
424
          const union tgsi_exec_channel *src1)
425
{
426
   dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
427
   dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
428
   dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
429
   dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
430
}
431
 
432
static void
433
micro_slt(union tgsi_exec_channel *dst,
434
          const union tgsi_exec_channel *src0,
435
          const union tgsi_exec_channel *src1)
436
{
437
   dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
438
   dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
439
   dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
440
   dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
441
}
442
 
443
static void
444
micro_sne(union tgsi_exec_channel *dst,
445
          const union tgsi_exec_channel *src0,
446
          const union tgsi_exec_channel *src1)
447
{
448
   dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
449
   dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
450
   dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
451
   dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
452
}
453
 
454
static void
455
micro_sfl(union tgsi_exec_channel *dst)
456
{
457
   dst->f[0] = 0.0f;
458
   dst->f[1] = 0.0f;
459
   dst->f[2] = 0.0f;
460
   dst->f[3] = 0.0f;
461
}
462
 
463
static void
464
micro_str(union tgsi_exec_channel *dst)
465
{
466
   dst->f[0] = 1.0f;
467
   dst->f[1] = 1.0f;
468
   dst->f[2] = 1.0f;
469
   dst->f[3] = 1.0f;
470
}
471
 
472
static void
473
micro_trunc(union tgsi_exec_channel *dst,
474
            const union tgsi_exec_channel *src)
475
{
476
   dst->f[0] = (float)(int)src->f[0];
477
   dst->f[1] = (float)(int)src->f[1];
478
   dst->f[2] = (float)(int)src->f[2];
479
   dst->f[3] = (float)(int)src->f[3];
480
}
481
 
482
 
483
enum tgsi_exec_datatype {
484
   TGSI_EXEC_DATA_FLOAT,
485
   TGSI_EXEC_DATA_INT,
486
   TGSI_EXEC_DATA_UINT
487
};
488
 
489
/*
490
 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
491
 */
492
#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
493
#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
494
#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
495
#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
496
#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
497
#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
498
 
499
 
500
/** The execution mask depends on the conditional mask and the loop mask */
501
#define UPDATE_EXEC_MASK(MACH) \
502
      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
503
 
504
 
505
static const union tgsi_exec_channel ZeroVec =
506
   { { 0.0, 0.0, 0.0, 0.0 } };
507
 
508
static const union tgsi_exec_channel OneVec = {
509
   {1.0f, 1.0f, 1.0f, 1.0f}
510
};
511
 
512
static const union tgsi_exec_channel P128Vec = {
513
   {128.0f, 128.0f, 128.0f, 128.0f}
514
};
515
 
516
static const union tgsi_exec_channel M128Vec = {
517
   {-128.0f, -128.0f, -128.0f, -128.0f}
518
};
519
 
520
 
521
/**
522
 * Assert that none of the float values in 'chan' are infinite or NaN.
523
 * NaN and Inf may occur normally during program execution and should
524
 * not lead to crashes, etc.  But when debugging, it's helpful to catch
525
 * them.
526
 */
527
static INLINE void
528
check_inf_or_nan(const union tgsi_exec_channel *chan)
529
{
530
   assert(!util_is_inf_or_nan((chan)->f[0]));
531
   assert(!util_is_inf_or_nan((chan)->f[1]));
532
   assert(!util_is_inf_or_nan((chan)->f[2]));
533
   assert(!util_is_inf_or_nan((chan)->f[3]));
534
}
535
 
536
 
537
#ifdef DEBUG
538
static void
539
print_chan(const char *msg, const union tgsi_exec_channel *chan)
540
{
541
   debug_printf("%s = {%f, %f, %f, %f}\n",
542
                msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
543
}
544
#endif
545
 
546
 
547
#ifdef DEBUG
548
static void
549
print_temp(const struct tgsi_exec_machine *mach, uint index)
550
{
551
   const struct tgsi_exec_vector *tmp = &mach->Temps[index];
552
   int i;
553
   debug_printf("Temp[%u] =\n", index);
554
   for (i = 0; i < 4; i++) {
555
      debug_printf("  %c: { %f, %f, %f, %f }\n",
556
                   "XYZW"[i],
557
                   tmp->xyzw[i].f[0],
558
                   tmp->xyzw[i].f[1],
559
                   tmp->xyzw[i].f[2],
560
                   tmp->xyzw[i].f[3]);
561
   }
562
}
563
#endif
564
 
565
 
566
void
567
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
568
                               unsigned num_bufs,
569
                               const void **bufs,
570
                               const unsigned *buf_sizes)
571
{
572
   unsigned i;
573
 
574
   for (i = 0; i < num_bufs; i++) {
575
      mach->Consts[i] = bufs[i];
576
      mach->ConstsSize[i] = buf_sizes[i];
577
   }
578
}
579
 
580
 
581
/**
582
 * Check if there's a potential src/dst register data dependency when
583
 * using SOA execution.
584
 * Example:
585
 *   MOV T, T.yxwz;
586
 * This would expand into:
587
 *   MOV t0, t1;
588
 *   MOV t1, t0;
589
 *   MOV t2, t3;
590
 *   MOV t3, t2;
591
 * The second instruction will have the wrong value for t0 if executed as-is.
592
 */
593
boolean
594
tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
595
{
596
   uint i, chan;
597
 
598
   uint writemask = inst->Dst[0].Register.WriteMask;
599
   if (writemask == TGSI_WRITEMASK_X ||
600
       writemask == TGSI_WRITEMASK_Y ||
601
       writemask == TGSI_WRITEMASK_Z ||
602
       writemask == TGSI_WRITEMASK_W ||
603
       writemask == TGSI_WRITEMASK_NONE) {
604
      /* no chance of data dependency */
605
      return FALSE;
606
   }
607
 
608
   /* loop over src regs */
609
   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
610
      if ((inst->Src[i].Register.File ==
611
           inst->Dst[0].Register.File) &&
612
          ((inst->Src[i].Register.Index ==
613
            inst->Dst[0].Register.Index) ||
614
           inst->Src[i].Register.Indirect ||
615
           inst->Dst[0].Register.Indirect)) {
616
         /* loop over dest channels */
617
         uint channelsWritten = 0x0;
618
         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
619
            if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
620
               /* check if we're reading a channel that's been written */
621
               uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
622
               if (channelsWritten & (1 << swizzle)) {
623
                  return TRUE;
624
               }
625
 
626
               channelsWritten |= (1 << chan);
627
            }
628
         }
629
      }
630
   }
631
   return FALSE;
632
}
633
 
634
 
635
/**
636
 * Initialize machine state by expanding tokens to full instructions,
637
 * allocating temporary storage, setting up constants, etc.
638
 * After this, we can call tgsi_exec_machine_run() many times.
639
 */
640
void
641
tgsi_exec_machine_bind_shader(
642
   struct tgsi_exec_machine *mach,
643
   const struct tgsi_token *tokens,
644
   struct tgsi_sampler *sampler)
645
{
646
   uint k;
647
   struct tgsi_parse_context parse;
648
   struct tgsi_full_instruction *instructions;
649
   struct tgsi_full_declaration *declarations;
650
   uint maxInstructions = 10, numInstructions = 0;
651
   uint maxDeclarations = 10, numDeclarations = 0;
652
 
653
#if 0
654
   tgsi_dump(tokens, 0);
655
#endif
656
 
657
   util_init_math();
658
 
659
 
660
   mach->Tokens = tokens;
661
   mach->Sampler = sampler;
662
 
663
   if (!tokens) {
664
      /* unbind and free all */
665
      FREE(mach->Declarations);
666
      mach->Declarations = NULL;
667
      mach->NumDeclarations = 0;
668
 
669
      FREE(mach->Instructions);
670
      mach->Instructions = NULL;
671
      mach->NumInstructions = 0;
672
 
673
      return;
674
   }
675
 
676
   k = tgsi_parse_init (&parse, mach->Tokens);
677
   if (k != TGSI_PARSE_OK) {
678
      debug_printf( "Problem parsing!\n" );
679
      return;
680
   }
681
 
682
   mach->Processor = parse.FullHeader.Processor.Processor;
683
   mach->ImmLimit = 0;
684
   mach->NumOutputs = 0;
685
 
686
   if (mach->Processor == TGSI_PROCESSOR_GEOMETRY &&
687
       !mach->UsedGeometryShader) {
688
      struct tgsi_exec_vector *inputs;
689
      struct tgsi_exec_vector *outputs;
690
 
691
      inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
692
                            TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS,
693
                            16);
694
 
695
      if (!inputs)
696
         return;
697
 
698
      outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
699
                             TGSI_MAX_TOTAL_VERTICES, 16);
700
 
701
      if (!outputs) {
702
         align_free(inputs);
703
         return;
704
      }
705
 
706
      align_free(mach->Inputs);
707
      align_free(mach->Outputs);
708
 
709
      mach->Inputs = inputs;
710
      mach->Outputs = outputs;
711
      mach->UsedGeometryShader = TRUE;
712
   }
713
 
714
   declarations = (struct tgsi_full_declaration *)
715
      MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
716
 
717
   if (!declarations) {
718
      return;
719
   }
720
 
721
   instructions = (struct tgsi_full_instruction *)
722
      MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
723
 
724
   if (!instructions) {
725
      FREE( declarations );
726
      return;
727
   }
728
 
729
   while( !tgsi_parse_end_of_tokens( &parse ) ) {
730
      uint i;
731
 
732
      tgsi_parse_token( &parse );
733
      switch( parse.FullToken.Token.Type ) {
734
      case TGSI_TOKEN_TYPE_DECLARATION:
735
         /* save expanded declaration */
736
         if (numDeclarations == maxDeclarations) {
737
            declarations = REALLOC(declarations,
738
                                   maxDeclarations
739
                                   * sizeof(struct tgsi_full_declaration),
740
                                   (maxDeclarations + 10)
741
                                   * sizeof(struct tgsi_full_declaration));
742
            maxDeclarations += 10;
743
         }
744
         if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
745
            unsigned reg;
746
            for (reg = parse.FullToken.FullDeclaration.Range.First;
747
                 reg <= parse.FullToken.FullDeclaration.Range.Last;
748
                 ++reg) {
749
               ++mach->NumOutputs;
750
            }
751
         }
752
         memcpy(declarations + numDeclarations,
753
                &parse.FullToken.FullDeclaration,
754
                sizeof(declarations[0]));
755
         numDeclarations++;
756
         break;
757
 
758
      case TGSI_TOKEN_TYPE_IMMEDIATE:
759
         {
760
            uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
761
            assert( size <= 4 );
762
            assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
763
 
764
            for( i = 0; i < size; i++ ) {
765
               mach->Imms[mach->ImmLimit][i] =
766
		  parse.FullToken.FullImmediate.u[i].Float;
767
            }
768
            mach->ImmLimit += 1;
769
         }
770
         break;
771
 
772
      case TGSI_TOKEN_TYPE_INSTRUCTION:
773
 
774
         /* save expanded instruction */
775
         if (numInstructions == maxInstructions) {
776
            instructions = REALLOC(instructions,
777
                                   maxInstructions
778
                                   * sizeof(struct tgsi_full_instruction),
779
                                   (maxInstructions + 10)
780
                                   * sizeof(struct tgsi_full_instruction));
781
            maxInstructions += 10;
782
         }
783
 
784
         memcpy(instructions + numInstructions,
785
                &parse.FullToken.FullInstruction,
786
                sizeof(instructions[0]));
787
 
788
         numInstructions++;
789
         break;
790
 
791
      case TGSI_TOKEN_TYPE_PROPERTY:
792
         break;
793
 
794
      default:
795
         assert( 0 );
796
      }
797
   }
798
   tgsi_parse_free (&parse);
799
 
800
   FREE(mach->Declarations);
801
   mach->Declarations = declarations;
802
   mach->NumDeclarations = numDeclarations;
803
 
804
   FREE(mach->Instructions);
805
   mach->Instructions = instructions;
806
   mach->NumInstructions = numInstructions;
807
}
808
 
809
 
810
struct tgsi_exec_machine *
811
tgsi_exec_machine_create( void )
812
{
813
   struct tgsi_exec_machine *mach;
814
   uint i;
815
 
816
   mach = align_malloc( sizeof *mach, 16 );
817
   if (!mach)
818
      goto fail;
819
 
820
   memset(mach, 0, sizeof(*mach));
821
 
822
   mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
823
   mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
824
   mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
825
 
826
   mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
827
   mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16);
828
   if (!mach->Inputs || !mach->Outputs)
829
      goto fail;
830
 
831
   /* Setup constants needed by the SSE2 executor. */
832
   for( i = 0; i < 4; i++ ) {
833
      mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
834
      mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
835
      mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
836
      mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF;    /* not used */
837
      mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
838
      mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f;    /* not used */
839
      mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
840
      mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
841
      mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
842
      mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
843
   }
844
 
845
#ifdef DEBUG
846
   /* silence warnings */
847
   (void) print_chan;
848
   (void) print_temp;
849
#endif
850
 
851
   return mach;
852
 
853
fail:
854
   if (mach) {
855
      align_free(mach->Inputs);
856
      align_free(mach->Outputs);
857
      align_free(mach);
858
   }
859
   return NULL;
860
}
861
 
862
 
863
void
864
tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
865
{
866
   if (mach) {
867
      FREE(mach->Instructions);
868
      FREE(mach->Declarations);
869
 
870
      align_free(mach->Inputs);
871
      align_free(mach->Outputs);
872
 
873
      align_free(mach);
874
   }
875
}
876
 
877
static void
878
micro_add(union tgsi_exec_channel *dst,
879
          const union tgsi_exec_channel *src0,
880
          const union tgsi_exec_channel *src1)
881
{
882
   dst->f[0] = src0->f[0] + src1->f[0];
883
   dst->f[1] = src0->f[1] + src1->f[1];
884
   dst->f[2] = src0->f[2] + src1->f[2];
885
   dst->f[3] = src0->f[3] + src1->f[3];
886
}
887
 
888
static void
889
micro_div(
890
   union tgsi_exec_channel *dst,
891
   const union tgsi_exec_channel *src0,
892
   const union tgsi_exec_channel *src1 )
893
{
894
   if (src1->f[0] != 0) {
895
      dst->f[0] = src0->f[0] / src1->f[0];
896
   }
897
   if (src1->f[1] != 0) {
898
      dst->f[1] = src0->f[1] / src1->f[1];
899
   }
900
   if (src1->f[2] != 0) {
901
      dst->f[2] = src0->f[2] / src1->f[2];
902
   }
903
   if (src1->f[3] != 0) {
904
      dst->f[3] = src0->f[3] / src1->f[3];
905
   }
906
}
907
 
908
static void
909
micro_rcc(union tgsi_exec_channel *dst,
910
          const union tgsi_exec_channel *src)
911
{
912
   uint i;
913
 
914
   for (i = 0; i < 4; i++) {
915
      float recip = 1.0f / src->f[i];
916
 
917
      if (recip > 0.0f) {
918
         if (recip > 1.884467e+019f) {
919
            dst->f[i] = 1.884467e+019f;
920
         }
921
         else if (recip < 5.42101e-020f) {
922
            dst->f[i] = 5.42101e-020f;
923
         }
924
         else {
925
            dst->f[i] = recip;
926
         }
927
      }
928
      else {
929
         if (recip < -1.884467e+019f) {
930
            dst->f[i] = -1.884467e+019f;
931
         }
932
         else if (recip > -5.42101e-020f) {
933
            dst->f[i] = -5.42101e-020f;
934
         }
935
         else {
936
            dst->f[i] = recip;
937
         }
938
      }
939
   }
940
}
941
 
942
static void
943
micro_lt(
944
   union tgsi_exec_channel *dst,
945
   const union tgsi_exec_channel *src0,
946
   const union tgsi_exec_channel *src1,
947
   const union tgsi_exec_channel *src2,
948
   const union tgsi_exec_channel *src3 )
949
{
950
   dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
951
   dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
952
   dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
953
   dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
954
}
955
 
956
static void
957
micro_max(union tgsi_exec_channel *dst,
958
          const union tgsi_exec_channel *src0,
959
          const union tgsi_exec_channel *src1)
960
{
961
   dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
962
   dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
963
   dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
964
   dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
965
}
966
 
967
static void
968
micro_min(union tgsi_exec_channel *dst,
969
          const union tgsi_exec_channel *src0,
970
          const union tgsi_exec_channel *src1)
971
{
972
   dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
973
   dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
974
   dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
975
   dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
976
}
977
 
978
static void
979
micro_mul(union tgsi_exec_channel *dst,
980
          const union tgsi_exec_channel *src0,
981
          const union tgsi_exec_channel *src1)
982
{
983
   dst->f[0] = src0->f[0] * src1->f[0];
984
   dst->f[1] = src0->f[1] * src1->f[1];
985
   dst->f[2] = src0->f[2] * src1->f[2];
986
   dst->f[3] = src0->f[3] * src1->f[3];
987
}
988
 
989
static void
990
micro_neg(
991
   union tgsi_exec_channel *dst,
992
   const union tgsi_exec_channel *src )
993
{
994
   dst->f[0] = -src->f[0];
995
   dst->f[1] = -src->f[1];
996
   dst->f[2] = -src->f[2];
997
   dst->f[3] = -src->f[3];
998
}
999
 
1000
static void
1001
micro_pow(
1002
   union tgsi_exec_channel *dst,
1003
   const union tgsi_exec_channel *src0,
1004
   const union tgsi_exec_channel *src1 )
1005
{
1006
#if FAST_MATH
1007
   dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1008
   dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1009
   dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1010
   dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1011
#else
1012
   dst->f[0] = powf( src0->f[0], src1->f[0] );
1013
   dst->f[1] = powf( src0->f[1], src1->f[1] );
1014
   dst->f[2] = powf( src0->f[2], src1->f[2] );
1015
   dst->f[3] = powf( src0->f[3], src1->f[3] );
1016
#endif
1017
}
1018
 
1019
static void
1020
micro_sub(union tgsi_exec_channel *dst,
1021
          const union tgsi_exec_channel *src0,
1022
          const union tgsi_exec_channel *src1)
1023
{
1024
   dst->f[0] = src0->f[0] - src1->f[0];
1025
   dst->f[1] = src0->f[1] - src1->f[1];
1026
   dst->f[2] = src0->f[2] - src1->f[2];
1027
   dst->f[3] = src0->f[3] - src1->f[3];
1028
}
1029
 
1030
static void
1031
fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1032
                       const uint chan_index,
1033
                       const uint file,
1034
                       const uint swizzle,
1035
                       const union tgsi_exec_channel *index,
1036
                       const union tgsi_exec_channel *index2D,
1037
                       union tgsi_exec_channel *chan)
1038
{
1039
   uint i;
1040
 
1041
   assert(swizzle < 4);
1042
 
1043
   switch (file) {
1044
   case TGSI_FILE_CONSTANT:
1045
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1046
         assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1047
         assert(mach->Consts[index2D->i[i]]);
1048
 
1049
         if (index->i[i] < 0) {
1050
            chan->u[i] = 0;
1051
         } else {
1052
            /* NOTE: copying the const value as a uint instead of float */
1053
            const uint constbuf = index2D->i[i];
1054
            const uint *buf = (const uint *)mach->Consts[constbuf];
1055
            const int pos = index->i[i] * 4 + swizzle;
1056
            /* const buffer bounds check */
1057
            if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1058
               if (0) {
1059
                  /* Debug: print warning */
1060
                  static int count = 0;
1061
                  if (count++ < 100)
1062
                     debug_printf("TGSI Exec: const buffer index %d"
1063
                                  " out of bounds\n", pos);
1064
               }
1065
               chan->u[i] = 0;
1066
            }
1067
            else
1068
               chan->u[i] = buf[pos];
1069
         }
1070
      }
1071
      break;
1072
 
1073
   case TGSI_FILE_INPUT:
1074
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1075
         /*
1076
         if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1077
            debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1078
                         index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1079
                         index2D->i[i], index->i[i]);
1080
                         }*/
1081
         int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1082
         assert(pos >= 0);
1083
         assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1084
         chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1085
      }
1086
      break;
1087
 
1088
   case TGSI_FILE_SYSTEM_VALUE:
1089
      /* XXX no swizzling at this point.  Will be needed if we put
1090
       * gl_FragCoord, for example, in a sys value register.
1091
       */
1092
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1093
         chan->u[i] = mach->SystemValue[index->i[i]].u[i];
1094
      }
1095
      break;
1096
 
1097
   case TGSI_FILE_TEMPORARY:
1098
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1099
         assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1100
         assert(index2D->i[i] == 0);
1101
 
1102
         chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1103
      }
1104
      break;
1105
 
1106
   case TGSI_FILE_IMMEDIATE:
1107
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1108
         assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1109
         assert(index2D->i[i] == 0);
1110
 
1111
         chan->f[i] = mach->Imms[index->i[i]][swizzle];
1112
      }
1113
      break;
1114
 
1115
   case TGSI_FILE_ADDRESS:
1116
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1117
         assert(index->i[i] >= 0);
1118
         assert(index2D->i[i] == 0);
1119
 
1120
         chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1121
      }
1122
      break;
1123
 
1124
   case TGSI_FILE_PREDICATE:
1125
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1126
         assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1127
         assert(index2D->i[i] == 0);
1128
 
1129
         chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1130
      }
1131
      break;
1132
 
1133
   case TGSI_FILE_OUTPUT:
1134
      /* vertex/fragment output vars can be read too */
1135
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1136
         assert(index->i[i] >= 0);
1137
         assert(index2D->i[i] == 0);
1138
 
1139
         chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1140
      }
1141
      break;
1142
 
1143
   default:
1144
      assert(0);
1145
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1146
         chan->u[i] = 0;
1147
      }
1148
   }
1149
}
1150
 
1151
static void
1152
fetch_source(const struct tgsi_exec_machine *mach,
1153
             union tgsi_exec_channel *chan,
1154
             const struct tgsi_full_src_register *reg,
1155
             const uint chan_index,
1156
             enum tgsi_exec_datatype src_datatype)
1157
{
1158
   union tgsi_exec_channel index;
1159
   union tgsi_exec_channel index2D;
1160
   uint swizzle;
1161
 
1162
   /* We start with a direct index into a register file.
1163
    *
1164
    *    file[1],
1165
    *    where:
1166
    *       file = Register.File
1167
    *       [1] = Register.Index
1168
    */
1169
   index.i[0] =
1170
   index.i[1] =
1171
   index.i[2] =
1172
   index.i[3] = reg->Register.Index;
1173
 
1174
   /* There is an extra source register that indirectly subscripts
1175
    * a register file. The direct index now becomes an offset
1176
    * that is being added to the indirect register.
1177
    *
1178
    *    file[ind[2].x+1],
1179
    *    where:
1180
    *       ind = Indirect.File
1181
    *       [2] = Indirect.Index
1182
    *       .x = Indirect.SwizzleX
1183
    */
1184
   if (reg->Register.Indirect) {
1185
      union tgsi_exec_channel index2;
1186
      union tgsi_exec_channel indir_index;
1187
      const uint execmask = mach->ExecMask;
1188
      uint i;
1189
 
1190
      /* which address register (always zero now) */
1191
      index2.i[0] =
1192
      index2.i[1] =
1193
      index2.i[2] =
1194
      index2.i[3] = reg->Indirect.Index;
1195
      /* get current value of address register[swizzle] */
1196
      swizzle = reg->Indirect.Swizzle;
1197
      fetch_src_file_channel(mach,
1198
                             chan_index,
1199
                             reg->Indirect.File,
1200
                             swizzle,
1201
                             &index2,
1202
                             &ZeroVec,
1203
                             &indir_index);
1204
 
1205
      /* add value of address register to the offset */
1206
      index.i[0] += indir_index.i[0];
1207
      index.i[1] += indir_index.i[1];
1208
      index.i[2] += indir_index.i[2];
1209
      index.i[3] += indir_index.i[3];
1210
 
1211
      /* for disabled execution channels, zero-out the index to
1212
       * avoid using a potential garbage value.
1213
       */
1214
      for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1215
         if ((execmask & (1 << i)) == 0)
1216
            index.i[i] = 0;
1217
      }
1218
   }
1219
 
1220
   /* There is an extra source register that is a second
1221
    * subscript to a register file. Effectively it means that
1222
    * the register file is actually a 2D array of registers.
1223
    *
1224
    *    file[3][1],
1225
    *    where:
1226
    *       [3] = Dimension.Index
1227
    */
1228
   if (reg->Register.Dimension) {
1229
      index2D.i[0] =
1230
      index2D.i[1] =
1231
      index2D.i[2] =
1232
      index2D.i[3] = reg->Dimension.Index;
1233
 
1234
      /* Again, the second subscript index can be addressed indirectly
1235
       * identically to the first one.
1236
       * Nothing stops us from indirectly addressing the indirect register,
1237
       * but there is no need for that, so we won't exercise it.
1238
       *
1239
       *    file[ind[4].y+3][1],
1240
       *    where:
1241
       *       ind = DimIndirect.File
1242
       *       [4] = DimIndirect.Index
1243
       *       .y = DimIndirect.SwizzleX
1244
       */
1245
      if (reg->Dimension.Indirect) {
1246
         union tgsi_exec_channel index2;
1247
         union tgsi_exec_channel indir_index;
1248
         const uint execmask = mach->ExecMask;
1249
         uint i;
1250
 
1251
         index2.i[0] =
1252
         index2.i[1] =
1253
         index2.i[2] =
1254
         index2.i[3] = reg->DimIndirect.Index;
1255
 
1256
         swizzle = reg->DimIndirect.Swizzle;
1257
         fetch_src_file_channel(mach,
1258
                                chan_index,
1259
                                reg->DimIndirect.File,
1260
                                swizzle,
1261
                                &index2,
1262
                                &ZeroVec,
1263
                                &indir_index);
1264
 
1265
         index2D.i[0] += indir_index.i[0];
1266
         index2D.i[1] += indir_index.i[1];
1267
         index2D.i[2] += indir_index.i[2];
1268
         index2D.i[3] += indir_index.i[3];
1269
 
1270
         /* for disabled execution channels, zero-out the index to
1271
          * avoid using a potential garbage value.
1272
          */
1273
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1274
            if ((execmask & (1 << i)) == 0) {
1275
               index2D.i[i] = 0;
1276
            }
1277
         }
1278
      }
1279
 
1280
      /* If by any chance there was a need for a 3D array of register
1281
       * files, we would have to check whether Dimension is followed
1282
       * by a dimension register and continue the saga.
1283
       */
1284
   } else {
1285
      index2D.i[0] =
1286
      index2D.i[1] =
1287
      index2D.i[2] =
1288
      index2D.i[3] = 0;
1289
   }
1290
 
1291
   swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1292
   fetch_src_file_channel(mach,
1293
                          chan_index,
1294
                          reg->Register.File,
1295
                          swizzle,
1296
                          &index,
1297
                          &index2D,
1298
                          chan);
1299
 
1300
   if (reg->Register.Absolute) {
1301
      if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1302
         micro_abs(chan, chan);
1303
      } else {
1304
         micro_iabs(chan, chan);
1305
      }
1306
   }
1307
 
1308
   if (reg->Register.Negate) {
1309
      if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1310
         micro_neg(chan, chan);
1311
      } else {
1312
         micro_ineg(chan, chan);
1313
      }
1314
   }
1315
}
1316
 
1317
static void
1318
store_dest(struct tgsi_exec_machine *mach,
1319
           const union tgsi_exec_channel *chan,
1320
           const struct tgsi_full_dst_register *reg,
1321
           const struct tgsi_full_instruction *inst,
1322
           uint chan_index,
1323
           enum tgsi_exec_datatype dst_datatype)
1324
{
1325
   uint i;
1326
   union tgsi_exec_channel null;
1327
   union tgsi_exec_channel *dst;
1328
   union tgsi_exec_channel index2D;
1329
   uint execmask = mach->ExecMask;
1330
   int offset = 0;  /* indirection offset */
1331
   int index;
1332
 
1333
   /* for debugging */
1334
   if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1335
      check_inf_or_nan(chan);
1336
   }
1337
 
1338
   /* There is an extra source register that indirectly subscripts
1339
    * a register file. The direct index now becomes an offset
1340
    * that is being added to the indirect register.
1341
    *
1342
    *    file[ind[2].x+1],
1343
    *    where:
1344
    *       ind = Indirect.File
1345
    *       [2] = Indirect.Index
1346
    *       .x = Indirect.SwizzleX
1347
    */
1348
   if (reg->Register.Indirect) {
1349
      union tgsi_exec_channel index;
1350
      union tgsi_exec_channel indir_index;
1351
      uint swizzle;
1352
 
1353
      /* which address register (always zero for now) */
1354
      index.i[0] =
1355
      index.i[1] =
1356
      index.i[2] =
1357
      index.i[3] = reg->Indirect.Index;
1358
 
1359
      /* get current value of address register[swizzle] */
1360
      swizzle = reg->Indirect.Swizzle;
1361
 
1362
      /* fetch values from the address/indirection register */
1363
      fetch_src_file_channel(mach,
1364
                             chan_index,
1365
                             reg->Indirect.File,
1366
                             swizzle,
1367
                             &index,
1368
                             &ZeroVec,
1369
                             &indir_index);
1370
 
1371
      /* save indirection offset */
1372
      offset = indir_index.i[0];
1373
   }
1374
 
1375
   /* There is an extra source register that is a second
1376
    * subscript to a register file. Effectively it means that
1377
    * the register file is actually a 2D array of registers.
1378
    *
1379
    *    file[3][1],
1380
    *    where:
1381
    *       [3] = Dimension.Index
1382
    */
1383
   if (reg->Register.Dimension) {
1384
      index2D.i[0] =
1385
      index2D.i[1] =
1386
      index2D.i[2] =
1387
      index2D.i[3] = reg->Dimension.Index;
1388
 
1389
      /* Again, the second subscript index can be addressed indirectly
1390
       * identically to the first one.
1391
       * Nothing stops us from indirectly addressing the indirect register,
1392
       * but there is no need for that, so we won't exercise it.
1393
       *
1394
       *    file[ind[4].y+3][1],
1395
       *    where:
1396
       *       ind = DimIndirect.File
1397
       *       [4] = DimIndirect.Index
1398
       *       .y = DimIndirect.SwizzleX
1399
       */
1400
      if (reg->Dimension.Indirect) {
1401
         union tgsi_exec_channel index2;
1402
         union tgsi_exec_channel indir_index;
1403
         const uint execmask = mach->ExecMask;
1404
         unsigned swizzle;
1405
         uint i;
1406
 
1407
         index2.i[0] =
1408
         index2.i[1] =
1409
         index2.i[2] =
1410
         index2.i[3] = reg->DimIndirect.Index;
1411
 
1412
         swizzle = reg->DimIndirect.Swizzle;
1413
         fetch_src_file_channel(mach,
1414
                                chan_index,
1415
                                reg->DimIndirect.File,
1416
                                swizzle,
1417
                                &index2,
1418
                                &ZeroVec,
1419
                                &indir_index);
1420
 
1421
         index2D.i[0] += indir_index.i[0];
1422
         index2D.i[1] += indir_index.i[1];
1423
         index2D.i[2] += indir_index.i[2];
1424
         index2D.i[3] += indir_index.i[3];
1425
 
1426
         /* for disabled execution channels, zero-out the index to
1427
          * avoid using a potential garbage value.
1428
          */
1429
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1430
            if ((execmask & (1 << i)) == 0) {
1431
               index2D.i[i] = 0;
1432
            }
1433
         }
1434
      }
1435
 
1436
      /* If by any chance there was a need for a 3D array of register
1437
       * files, we would have to check whether Dimension is followed
1438
       * by a dimension register and continue the saga.
1439
       */
1440
   } else {
1441
      index2D.i[0] =
1442
      index2D.i[1] =
1443
      index2D.i[2] =
1444
      index2D.i[3] = 0;
1445
   }
1446
 
1447
   switch (reg->Register.File) {
1448
   case TGSI_FILE_NULL:
1449
      dst = &null;
1450
      break;
1451
 
1452
   case TGSI_FILE_OUTPUT:
1453
      index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1454
         + reg->Register.Index;
1455
      dst = &mach->Outputs[offset + index].xyzw[chan_index];
1456
#if 0
1457
      debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1458
                   mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1459
                   reg->Register.Index);
1460
      if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1461
         debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1462
         for (i = 0; i < TGSI_QUAD_SIZE; i++)
1463
            if (execmask & (1 << i))
1464
               debug_printf("%f, ", chan->f[i]);
1465
         debug_printf(")\n");
1466
      }
1467
#endif
1468
      break;
1469
 
1470
   case TGSI_FILE_TEMPORARY:
1471
      index = reg->Register.Index;
1472
      assert( index < TGSI_EXEC_NUM_TEMPS );
1473
      dst = &mach->Temps[offset + index].xyzw[chan_index];
1474
      break;
1475
 
1476
   case TGSI_FILE_ADDRESS:
1477
      index = reg->Register.Index;
1478
      dst = &mach->Addrs[index].xyzw[chan_index];
1479
      break;
1480
 
1481
   case TGSI_FILE_PREDICATE:
1482
      index = reg->Register.Index;
1483
      assert(index < TGSI_EXEC_NUM_PREDS);
1484
      dst = &mach->Predicates[index].xyzw[chan_index];
1485
      break;
1486
 
1487
   default:
1488
      assert( 0 );
1489
      return;
1490
   }
1491
 
1492
   if (inst->Instruction.Predicate) {
1493
      uint swizzle;
1494
      union tgsi_exec_channel *pred;
1495
 
1496
      switch (chan_index) {
1497
      case TGSI_CHAN_X:
1498
         swizzle = inst->Predicate.SwizzleX;
1499
         break;
1500
      case TGSI_CHAN_Y:
1501
         swizzle = inst->Predicate.SwizzleY;
1502
         break;
1503
      case TGSI_CHAN_Z:
1504
         swizzle = inst->Predicate.SwizzleZ;
1505
         break;
1506
      case TGSI_CHAN_W:
1507
         swizzle = inst->Predicate.SwizzleW;
1508
         break;
1509
      default:
1510
         assert(0);
1511
         return;
1512
      }
1513
 
1514
      assert(inst->Predicate.Index == 0);
1515
 
1516
      pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1517
 
1518
      if (inst->Predicate.Negate) {
1519
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1520
            if (pred->u[i]) {
1521
               execmask &= ~(1 << i);
1522
            }
1523
         }
1524
      } else {
1525
         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1526
            if (!pred->u[i]) {
1527
               execmask &= ~(1 << i);
1528
            }
1529
         }
1530
      }
1531
   }
1532
 
1533
   switch (inst->Instruction.Saturate) {
1534
   case TGSI_SAT_NONE:
1535
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1536
         if (execmask & (1 << i))
1537
            dst->i[i] = chan->i[i];
1538
      break;
1539
 
1540
   case TGSI_SAT_ZERO_ONE:
1541
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1542
         if (execmask & (1 << i)) {
1543
            if (chan->f[i] < 0.0f)
1544
               dst->f[i] = 0.0f;
1545
            else if (chan->f[i] > 1.0f)
1546
               dst->f[i] = 1.0f;
1547
            else
1548
               dst->i[i] = chan->i[i];
1549
         }
1550
      break;
1551
 
1552
   case TGSI_SAT_MINUS_PLUS_ONE:
1553
      for (i = 0; i < TGSI_QUAD_SIZE; i++)
1554
         if (execmask & (1 << i)) {
1555
            if (chan->f[i] < -1.0f)
1556
               dst->f[i] = -1.0f;
1557
            else if (chan->f[i] > 1.0f)
1558
               dst->f[i] = 1.0f;
1559
            else
1560
               dst->i[i] = chan->i[i];
1561
         }
1562
      break;
1563
 
1564
   default:
1565
      assert( 0 );
1566
   }
1567
}
1568
 
1569
#define FETCH(VAL,INDEX,CHAN)\
1570
    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1571
 
1572
#define IFETCH(VAL,INDEX,CHAN)\
1573
    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1574
 
1575
 
1576
/**
1577
 * Execute ARB-style KIL which is predicated by a src register.
1578
 * Kill fragment if any of the four values is less than zero.
1579
 */
1580
static void
1581
exec_kill_if(struct tgsi_exec_machine *mach,
1582
             const struct tgsi_full_instruction *inst)
1583
{
1584
   uint uniquemask;
1585
   uint chan_index;
1586
   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1587
   union tgsi_exec_channel r[1];
1588
 
1589
   /* This mask stores component bits that were already tested. */
1590
   uniquemask = 0;
1591
 
1592
   for (chan_index = 0; chan_index < 4; chan_index++)
1593
   {
1594
      uint swizzle;
1595
      uint i;
1596
 
1597
      /* unswizzle channel */
1598
      swizzle = tgsi_util_get_full_src_register_swizzle (
1599
                        &inst->Src[0],
1600
                        chan_index);
1601
 
1602
      /* check if the component has not been already tested */
1603
      if (uniquemask & (1 << swizzle))
1604
         continue;
1605
      uniquemask |= 1 << swizzle;
1606
 
1607
      FETCH(&r[0], 0, chan_index);
1608
      for (i = 0; i < 4; i++)
1609
         if (r[0].f[i] < 0.0f)
1610
            kilmask |= 1 << i;
1611
   }
1612
 
1613
   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1614
}
1615
 
1616
/**
1617
 * Unconditional fragment kill/discard.
1618
 */
1619
static void
1620
exec_kill(struct tgsi_exec_machine *mach,
1621
          const struct tgsi_full_instruction *inst)
1622
{
1623
   uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1624
 
1625
   /* kill fragment for all fragments currently executing */
1626
   kilmask = mach->ExecMask;
1627
   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
1628
}
1629
 
1630
static void
1631
emit_vertex(struct tgsi_exec_machine *mach)
1632
{
1633
   /* FIXME: check for exec mask correctly
1634
   unsigned i;
1635
   for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1636
         if ((mach->ExecMask & (1 << i)))
1637
   */
1638
   if (mach->ExecMask) {
1639
      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
1640
      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
1641
   }
1642
}
1643
 
1644
static void
1645
emit_primitive(struct tgsi_exec_machine *mach)
1646
{
1647
   unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
1648
   /* FIXME: check for exec mask correctly
1649
   unsigned i;
1650
   for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1651
         if ((mach->ExecMask & (1 << i)))
1652
   */
1653
   if (mach->ExecMask) {
1654
      ++(*prim_count);
1655
      debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
1656
      mach->Primitives[*prim_count] = 0;
1657
   }
1658
}
1659
 
1660
static void
1661
conditional_emit_primitive(struct tgsi_exec_machine *mach)
1662
{
1663
   if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
1664
      int emitted_verts =
1665
         mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
1666
      if (emitted_verts) {
1667
         emit_primitive(mach);
1668
      }
1669
   }
1670
}
1671
 
1672
 
1673
/*
1674
 * Fetch four texture samples using STR texture coordinates.
1675
 */
1676
static void
1677
fetch_texel( struct tgsi_sampler *sampler,
1678
             const unsigned sview_idx,
1679
             const unsigned sampler_idx,
1680
             const union tgsi_exec_channel *s,
1681
             const union tgsi_exec_channel *t,
1682
             const union tgsi_exec_channel *p,
1683
             const union tgsi_exec_channel *c0,
1684
             const union tgsi_exec_channel *c1,
1685
             float derivs[3][2][TGSI_QUAD_SIZE],
1686
             const int8_t offset[3],
1687
             enum tgsi_sampler_control control,
1688
             union tgsi_exec_channel *r,
1689
             union tgsi_exec_channel *g,
1690
             union tgsi_exec_channel *b,
1691
             union tgsi_exec_channel *a )
1692
{
1693
   uint j;
1694
   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1695
 
1696
   /* FIXME: handle explicit derivs, offsets */
1697
   sampler->get_samples(sampler, sview_idx, sampler_idx,
1698
                        s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
1699
 
1700
   for (j = 0; j < 4; j++) {
1701
      r->f[j] = rgba[0][j];
1702
      g->f[j] = rgba[1][j];
1703
      b->f[j] = rgba[2][j];
1704
      a->f[j] = rgba[3][j];
1705
   }
1706
}
1707
 
1708
 
1709
#define TEX_MODIFIER_NONE           0
1710
#define TEX_MODIFIER_PROJECTED      1
1711
#define TEX_MODIFIER_LOD_BIAS       2
1712
#define TEX_MODIFIER_EXPLICIT_LOD   3
1713
#define TEX_MODIFIER_LEVEL_ZERO     4
1714
 
1715
 
1716
/*
1717
 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
1718
 */
1719
static void
1720
fetch_texel_offsets(struct tgsi_exec_machine *mach,
1721
                    const struct tgsi_full_instruction *inst,
1722
                    int8_t offsets[3])
1723
{
1724
   if (inst->Texture.NumOffsets == 1) {
1725
      union tgsi_exec_channel index;
1726
      union tgsi_exec_channel offset[3];
1727
      index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
1728
      fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1729
                             inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
1730
      fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1731
                             inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
1732
      fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
1733
                             inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
1734
     offsets[0] = offset[0].i[0];
1735
     offsets[1] = offset[1].i[0];
1736
     offsets[2] = offset[2].i[0];
1737
   } else {
1738
     assert(inst->Texture.NumOffsets == 0);
1739
     offsets[0] = offsets[1] = offsets[2] = 0;
1740
   }
1741
}
1742
 
1743
 
1744
/*
1745
 * Fetch dx and dy values for one channel (s, t or r).
1746
 * Put dx values into one float array, dy values into another.
1747
 */
1748
static void
1749
fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
1750
                           const struct tgsi_full_instruction *inst,
1751
                           unsigned regdsrcx,
1752
                           unsigned chan,
1753
                           float derivs[2][TGSI_QUAD_SIZE])
1754
{
1755
   union tgsi_exec_channel d;
1756
   FETCH(&d, regdsrcx, chan);
1757
   derivs[0][0] = d.f[0];
1758
   derivs[0][1] = d.f[1];
1759
   derivs[0][2] = d.f[2];
1760
   derivs[0][3] = d.f[3];
1761
   FETCH(&d, regdsrcx + 1, chan);
1762
   derivs[1][0] = d.f[0];
1763
   derivs[1][1] = d.f[1];
1764
   derivs[1][2] = d.f[2];
1765
   derivs[1][3] = d.f[3];
1766
}
1767
 
1768
 
1769
/*
1770
 * execute a texture instruction.
1771
 *
1772
 * modifier is used to control the channel routing for the\
1773
 * instruction variants like proj, lod, and texture with lod bias.
1774
 * sampler indicates which src register the sampler is contained in.
1775
 */
1776
static void
1777
exec_tex(struct tgsi_exec_machine *mach,
1778
         const struct tgsi_full_instruction *inst,
1779
         uint modifier, uint sampler)
1780
{
1781
   const uint unit = inst->Src[sampler].Register.Index;
1782
   const union tgsi_exec_channel *args[5], *proj = NULL;
1783
   union tgsi_exec_channel r[5];
1784
   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
1785
   uint chan;
1786
   int8_t offsets[3];
1787
   int dim, shadow_ref, i;
1788
 
1789
   /* always fetch all 3 offsets, overkill but keeps code simple */
1790
   fetch_texel_offsets(mach, inst, offsets);
1791
 
1792
   assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
1793
   assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
1794
 
1795
   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref);
1796
 
1797
   assert(dim <= 4);
1798
   if (shadow_ref >= 0)
1799
      assert(shadow_ref >= dim && shadow_ref < Elements(args));
1800
 
1801
   /* fetch modifier to the last argument */
1802
   if (modifier != TEX_MODIFIER_NONE) {
1803
      const int last = Elements(args) - 1;
1804
 
1805
      /* fetch modifier from src0.w or src1.x */
1806
      if (sampler == 1) {
1807
         assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
1808
         FETCH(&r[last], 0, TGSI_CHAN_W);
1809
      }
1810
      else {
1811
         assert(shadow_ref != 4);
1812
         FETCH(&r[last], 1, TGSI_CHAN_X);
1813
      }
1814
 
1815
      if (modifier != TEX_MODIFIER_PROJECTED) {
1816
         args[last] = &r[last];
1817
      }
1818
      else {
1819
         proj = &r[last];
1820
         args[last] = &ZeroVec;
1821
      }
1822
 
1823
      /* point unused arguments to zero vector */
1824
      for (i = dim; i < last; i++)
1825
         args[i] = &ZeroVec;
1826
 
1827
      if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
1828
         control = tgsi_sampler_lod_explicit;
1829
      else if (modifier == TEX_MODIFIER_LOD_BIAS)
1830
         control = tgsi_sampler_lod_bias;
1831
   }
1832
   else {
1833
      for (i = dim; i < Elements(args); i++)
1834
         args[i] = &ZeroVec;
1835
   }
1836
 
1837
   /* fetch coordinates */
1838
   for (i = 0; i < dim; i++) {
1839
      FETCH(&r[i], 0, TGSI_CHAN_X + i);
1840
 
1841
      if (proj)
1842
         micro_div(&r[i], &r[i], proj);
1843
 
1844
      args[i] = &r[i];
1845
   }
1846
 
1847
   /* fetch reference value */
1848
   if (shadow_ref >= 0) {
1849
      FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
1850
 
1851
      if (proj)
1852
         micro_div(&r[shadow_ref], &r[shadow_ref], proj);
1853
 
1854
      args[shadow_ref] = &r[shadow_ref];
1855
   }
1856
 
1857
   fetch_texel(mach->Sampler, unit, unit,
1858
         args[0], args[1], args[2], args[3], args[4],
1859
         NULL, offsets, control,
1860
         &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
1861
 
1862
#if 0
1863
   debug_printf("fetch r: %g %g %g %g\n",
1864
         r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
1865
   debug_printf("fetch g: %g %g %g %g\n",
1866
         r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
1867
   debug_printf("fetch b: %g %g %g %g\n",
1868
         r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
1869
   debug_printf("fetch a: %g %g %g %g\n",
1870
         r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
1871
#endif
1872
 
1873
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1874
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1875
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1876
      }
1877
   }
1878
}
1879
 
1880
 
1881
static void
1882
exec_txd(struct tgsi_exec_machine *mach,
1883
         const struct tgsi_full_instruction *inst)
1884
{
1885
   const uint unit = inst->Src[3].Register.Index;
1886
   union tgsi_exec_channel r[4];
1887
   float derivs[3][2][TGSI_QUAD_SIZE];
1888
   uint chan;
1889
   int8_t offsets[3];
1890
 
1891
   /* always fetch all 3 offsets, overkill but keeps code simple */
1892
   fetch_texel_offsets(mach, inst, offsets);
1893
 
1894
   switch (inst->Texture.Texture) {
1895
   case TGSI_TEXTURE_1D:
1896
      FETCH(&r[0], 0, TGSI_CHAN_X);
1897
 
1898
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1899
 
1900
      fetch_texel(mach->Sampler, unit, unit,
1901
                  &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
1902
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1903
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
1904
      break;
1905
 
1906
   case TGSI_TEXTURE_SHADOW1D:
1907
   case TGSI_TEXTURE_1D_ARRAY:
1908
   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1909
      /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
1910
      FETCH(&r[0], 0, TGSI_CHAN_X);
1911
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1912
      FETCH(&r[2], 0, TGSI_CHAN_Z);
1913
 
1914
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1915
 
1916
      fetch_texel(mach->Sampler, unit, unit,
1917
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
1918
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1919
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
1920
      break;
1921
 
1922
   case TGSI_TEXTURE_2D:
1923
   case TGSI_TEXTURE_RECT:
1924
      FETCH(&r[0], 0, TGSI_CHAN_X);
1925
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1926
 
1927
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1928
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
1929
 
1930
      fetch_texel(mach->Sampler, unit, unit,
1931
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
1932
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1933
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
1934
      break;
1935
 
1936
 
1937
   case TGSI_TEXTURE_SHADOW2D:
1938
   case TGSI_TEXTURE_SHADOWRECT:
1939
   case TGSI_TEXTURE_2D_ARRAY:
1940
   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1941
      /* only SHADOW2D_ARRAY actually needs W */
1942
      FETCH(&r[0], 0, TGSI_CHAN_X);
1943
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1944
      FETCH(&r[2], 0, TGSI_CHAN_Z);
1945
      FETCH(&r[3], 0, TGSI_CHAN_W);
1946
 
1947
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1948
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
1949
 
1950
      fetch_texel(mach->Sampler, unit, unit,
1951
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
1952
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1953
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
1954
      break;
1955
 
1956
   case TGSI_TEXTURE_3D:
1957
   case TGSI_TEXTURE_CUBE:
1958
   case TGSI_TEXTURE_CUBE_ARRAY:
1959
      /* only TEXTURE_CUBE_ARRAY actually needs W */
1960
      FETCH(&r[0], 0, TGSI_CHAN_X);
1961
      FETCH(&r[1], 0, TGSI_CHAN_Y);
1962
      FETCH(&r[2], 0, TGSI_CHAN_Z);
1963
      FETCH(&r[3], 0, TGSI_CHAN_W);
1964
 
1965
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
1966
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
1967
      fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
1968
 
1969
      fetch_texel(mach->Sampler, unit, unit,
1970
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
1971
                  derivs, offsets, tgsi_sampler_derivs_explicit,
1972
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
1973
      break;
1974
 
1975
   default:
1976
      assert(0);
1977
   }
1978
 
1979
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1980
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1981
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
1982
      }
1983
   }
1984
}
1985
 
1986
 
1987
static void
1988
exec_txf(struct tgsi_exec_machine *mach,
1989
         const struct tgsi_full_instruction *inst)
1990
{
1991
   const uint unit = inst->Src[1].Register.Index;
1992
   union tgsi_exec_channel r[4];
1993
   uint chan;
1994
   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1995
   int j;
1996
   int8_t offsets[3];
1997
   unsigned target;
1998
 
1999
   /* always fetch all 3 offsets, overkill but keeps code simple */
2000
   fetch_texel_offsets(mach, inst, offsets);
2001
 
2002
   IFETCH(&r[3], 0, TGSI_CHAN_W);
2003
 
2004
   if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) {
2005
      target = mach->SamplerViews[unit].Resource;
2006
   }
2007
   else {
2008
      target = inst->Texture.Texture;
2009
   }
2010
   switch(target) {
2011
   case TGSI_TEXTURE_3D:
2012
   case TGSI_TEXTURE_2D_ARRAY:
2013
   case TGSI_TEXTURE_SHADOW2D_ARRAY:
2014
      IFETCH(&r[2], 0, TGSI_CHAN_Z);
2015
      /* fallthrough */
2016
   case TGSI_TEXTURE_2D:
2017
   case TGSI_TEXTURE_RECT:
2018
   case TGSI_TEXTURE_SHADOW1D_ARRAY:
2019
   case TGSI_TEXTURE_SHADOW2D:
2020
   case TGSI_TEXTURE_SHADOWRECT:
2021
   case TGSI_TEXTURE_1D_ARRAY:
2022
      IFETCH(&r[1], 0, TGSI_CHAN_Y);
2023
      /* fallthrough */
2024
   case TGSI_TEXTURE_BUFFER:
2025
   case TGSI_TEXTURE_1D:
2026
   case TGSI_TEXTURE_SHADOW1D:
2027
      IFETCH(&r[0], 0, TGSI_CHAN_X);
2028
      break;
2029
   default:
2030
      assert(0);
2031
      break;
2032
   }
2033
 
2034
   mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2035
                            offsets, rgba);
2036
 
2037
   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2038
      r[0].f[j] = rgba[0][j];
2039
      r[1].f[j] = rgba[1][j];
2040
      r[2].f[j] = rgba[2][j];
2041
      r[3].f[j] = rgba[3][j];
2042
   }
2043
 
2044
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2045
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2046
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2047
      }
2048
   }
2049
}
2050
 
2051
static void
2052
exec_txq(struct tgsi_exec_machine *mach,
2053
         const struct tgsi_full_instruction *inst)
2054
{
2055
   const uint unit = inst->Src[1].Register.Index;
2056
   int result[4];
2057
   union tgsi_exec_channel r[4], src;
2058
   uint chan;
2059
   int i,j;
2060
 
2061
   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2062
 
2063
   mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2064
 
2065
   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2066
      for (j = 0; j < 4; j++) {
2067
         r[j].i[i] = result[j];
2068
      }
2069
   }
2070
 
2071
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2072
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2073
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2074
                    TGSI_EXEC_DATA_INT);
2075
      }
2076
   }
2077
}
2078
 
2079
static void
2080
exec_sample(struct tgsi_exec_machine *mach,
2081
            const struct tgsi_full_instruction *inst,
2082
            uint modifier, boolean compare)
2083
{
2084
   const uint resource_unit = inst->Src[1].Register.Index;
2085
   const uint sampler_unit = inst->Src[2].Register.Index;
2086
   union tgsi_exec_channel r[4], c1;
2087
   const union tgsi_exec_channel *lod = &ZeroVec;
2088
   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
2089
   uint chan;
2090
   int8_t offsets[3];
2091
 
2092
   /* always fetch all 3 offsets, overkill but keeps code simple */
2093
   fetch_texel_offsets(mach, inst, offsets);
2094
 
2095
   assert(modifier != TEX_MODIFIER_PROJECTED);
2096
 
2097
   if (modifier != TEX_MODIFIER_NONE) {
2098
      if (modifier == TEX_MODIFIER_LOD_BIAS) {
2099
         FETCH(&c1, 3, TGSI_CHAN_X);
2100
         lod = &c1;
2101
         control = tgsi_sampler_lod_bias;
2102
      }
2103
      else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2104
         FETCH(&c1, 3, TGSI_CHAN_X);
2105
         lod = &c1;
2106
         control = tgsi_sampler_lod_explicit;
2107
      }
2108
      else {
2109
         assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2110
         control = tgsi_sampler_lod_zero;
2111
      }
2112
   }
2113
 
2114
   FETCH(&r[0], 0, TGSI_CHAN_X);
2115
 
2116
   switch (mach->SamplerViews[resource_unit].Resource) {
2117
   case TGSI_TEXTURE_1D:
2118
      if (compare) {
2119
         FETCH(&r[2], 3, TGSI_CHAN_X);
2120
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2121
                     &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2122
                     NULL, offsets, control,
2123
                     &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2124
      }
2125
      else {
2126
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2127
                     &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2128
                     NULL, offsets, control,
2129
                     &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2130
      }
2131
      break;
2132
 
2133
   case TGSI_TEXTURE_1D_ARRAY:
2134
   case TGSI_TEXTURE_2D:
2135
   case TGSI_TEXTURE_RECT:
2136
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2137
      if (compare) {
2138
         FETCH(&r[2], 3, TGSI_CHAN_X);
2139
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2140
                     &r[0], &r[1], &r[2], &ZeroVec, lod,    /* S, T, P, C, LOD */
2141
                     NULL, offsets, control,
2142
                     &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2143
      }
2144
      else {
2145
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2146
                     &r[0], &r[1], &ZeroVec, &ZeroVec, lod,    /* S, T, P, C, LOD */
2147
                     NULL, offsets, control,
2148
                     &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2149
      }
2150
      break;
2151
 
2152
   case TGSI_TEXTURE_2D_ARRAY:
2153
   case TGSI_TEXTURE_3D:
2154
   case TGSI_TEXTURE_CUBE:
2155
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2156
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2157
      if(compare) {
2158
         FETCH(&r[3], 3, TGSI_CHAN_X);
2159
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2160
                     &r[0], &r[1], &r[2], &r[3], lod,
2161
                     NULL, offsets, control,
2162
                     &r[0], &r[1], &r[2], &r[3]);
2163
      }
2164
      else {
2165
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2166
                     &r[0], &r[1], &r[2], &ZeroVec, lod,
2167
                     NULL, offsets, control,
2168
                     &r[0], &r[1], &r[2], &r[3]);
2169
      }
2170
      break;
2171
 
2172
   case TGSI_TEXTURE_CUBE_ARRAY:
2173
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2174
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2175
      FETCH(&r[3], 0, TGSI_CHAN_W);
2176
      if(compare) {
2177
         FETCH(&r[4], 3, TGSI_CHAN_X);
2178
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2179
                     &r[0], &r[1], &r[2], &r[3], &r[4],
2180
                     NULL, offsets, control,
2181
                     &r[0], &r[1], &r[2], &r[3]);
2182
      }
2183
      else {
2184
         fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2185
                     &r[0], &r[1], &r[2], &r[3], lod,
2186
                     NULL, offsets, control,
2187
                     &r[0], &r[1], &r[2], &r[3]);
2188
      }
2189
      break;
2190
 
2191
 
2192
   default:
2193
      assert(0);
2194
   }
2195
 
2196
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2197
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2198
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2199
      }
2200
   }
2201
}
2202
 
2203
static void
2204
exec_sample_d(struct tgsi_exec_machine *mach,
2205
              const struct tgsi_full_instruction *inst)
2206
{
2207
   const uint resource_unit = inst->Src[1].Register.Index;
2208
   const uint sampler_unit = inst->Src[2].Register.Index;
2209
   union tgsi_exec_channel r[4];
2210
   float derivs[3][2][TGSI_QUAD_SIZE];
2211
   uint chan;
2212
   int8_t offsets[3];
2213
 
2214
   /* always fetch all 3 offsets, overkill but keeps code simple */
2215
   fetch_texel_offsets(mach, inst, offsets);
2216
 
2217
   FETCH(&r[0], 0, TGSI_CHAN_X);
2218
 
2219
   switch (mach->SamplerViews[resource_unit].Resource) {
2220
   case TGSI_TEXTURE_1D:
2221
   case TGSI_TEXTURE_1D_ARRAY:
2222
      /* only 1D array actually needs Y */
2223
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2224
 
2225
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2226
 
2227
      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2228
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2229
                  derivs, offsets, tgsi_sampler_derivs_explicit,
2230
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2231
      break;
2232
 
2233
   case TGSI_TEXTURE_2D:
2234
   case TGSI_TEXTURE_RECT:
2235
   case TGSI_TEXTURE_2D_ARRAY:
2236
      /* only 2D array actually needs Z */
2237
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2238
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2239
 
2240
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2241
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2242
 
2243
      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2244
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
2245
                  derivs, offsets, tgsi_sampler_derivs_explicit,
2246
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2247
      break;
2248
 
2249
   case TGSI_TEXTURE_3D:
2250
   case TGSI_TEXTURE_CUBE:
2251
   case TGSI_TEXTURE_CUBE_ARRAY:
2252
      /* only cube array actually needs W */
2253
      FETCH(&r[1], 0, TGSI_CHAN_Y);
2254
      FETCH(&r[2], 0, TGSI_CHAN_Z);
2255
      FETCH(&r[3], 0, TGSI_CHAN_W);
2256
 
2257
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2258
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2259
      fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2260
 
2261
      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2262
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2263
                  derivs, offsets, tgsi_sampler_derivs_explicit,
2264
                  &r[0], &r[1], &r[2], &r[3]);
2265
      break;
2266
 
2267
   default:
2268
      assert(0);
2269
   }
2270
 
2271
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2272
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2273
         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2274
      }
2275
   }
2276
}
2277
 
2278
 
2279
/**
2280
 * Evaluate a constant-valued coefficient at the position of the
2281
 * current quad.
2282
 */
2283
static void
2284
eval_constant_coef(
2285
   struct tgsi_exec_machine *mach,
2286
   unsigned attrib,
2287
   unsigned chan )
2288
{
2289
   unsigned i;
2290
 
2291
   for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2292
      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2293
   }
2294
}
2295
 
2296
/**
2297
 * Evaluate a linear-valued coefficient at the position of the
2298
 * current quad.
2299
 */
2300
static void
2301
eval_linear_coef(
2302
   struct tgsi_exec_machine *mach,
2303
   unsigned attrib,
2304
   unsigned chan )
2305
{
2306
   const float x = mach->QuadPos.xyzw[0].f[0];
2307
   const float y = mach->QuadPos.xyzw[1].f[0];
2308
   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2309
   const float dady = mach->InterpCoefs[attrib].dady[chan];
2310
   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2311
   mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2312
   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2313
   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2314
   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2315
}
2316
 
2317
/**
2318
 * Evaluate a perspective-valued coefficient at the position of the
2319
 * current quad.
2320
 */
2321
static void
2322
eval_perspective_coef(
2323
   struct tgsi_exec_machine *mach,
2324
   unsigned attrib,
2325
   unsigned chan )
2326
{
2327
   const float x = mach->QuadPos.xyzw[0].f[0];
2328
   const float y = mach->QuadPos.xyzw[1].f[0];
2329
   const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2330
   const float dady = mach->InterpCoefs[attrib].dady[chan];
2331
   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2332
   const float *w = mach->QuadPos.xyzw[3].f;
2333
   /* divide by W here */
2334
   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2335
   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2336
   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2337
   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2338
}
2339
 
2340
 
2341
typedef void (* eval_coef_func)(
2342
   struct tgsi_exec_machine *mach,
2343
   unsigned attrib,
2344
   unsigned chan );
2345
 
2346
static void
2347
exec_declaration(struct tgsi_exec_machine *mach,
2348
                 const struct tgsi_full_declaration *decl)
2349
{
2350
   if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2351
      mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2352
      return;
2353
   }
2354
 
2355
   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
2356
      if (decl->Declaration.File == TGSI_FILE_INPUT) {
2357
         uint first, last, mask;
2358
 
2359
         first = decl->Range.First;
2360
         last = decl->Range.Last;
2361
         mask = decl->Declaration.UsageMask;
2362
 
2363
         /* XXX we could remove this special-case code since
2364
          * mach->InterpCoefs[first].a0 should already have the
2365
          * front/back-face value.  But we should first update the
2366
          * ureg code to emit the right UsageMask value (WRITEMASK_X).
2367
          * Then, we could remove the tgsi_exec_machine::Face field.
2368
          */
2369
         /* XXX make FACE a system value */
2370
         if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2371
            uint i;
2372
 
2373
            assert(decl->Semantic.Index == 0);
2374
            assert(first == last);
2375
 
2376
            for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2377
               mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2378
            }
2379
         } else {
2380
            eval_coef_func eval;
2381
            uint i, j;
2382
 
2383
            switch (decl->Interp.Interpolate) {
2384
            case TGSI_INTERPOLATE_CONSTANT:
2385
               eval = eval_constant_coef;
2386
               break;
2387
 
2388
            case TGSI_INTERPOLATE_LINEAR:
2389
               eval = eval_linear_coef;
2390
               break;
2391
 
2392
            case TGSI_INTERPOLATE_PERSPECTIVE:
2393
               eval = eval_perspective_coef;
2394
               break;
2395
 
2396
            case TGSI_INTERPOLATE_COLOR:
2397
               eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2398
               break;
2399
 
2400
            default:
2401
               assert(0);
2402
               return;
2403
            }
2404
 
2405
            for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2406
               if (mask & (1 << j)) {
2407
                  for (i = first; i <= last; i++) {
2408
                     eval(mach, i, j);
2409
                  }
2410
               }
2411
            }
2412
         }
2413
 
2414
         if (DEBUG_EXECUTION) {
2415
            uint i, j;
2416
            for (i = first; i <= last; ++i) {
2417
               debug_printf("IN[%2u] = ", i);
2418
               for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2419
                  if (j > 0) {
2420
                     debug_printf("         ");
2421
                  }
2422
                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
2423
                               mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
2424
                               mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
2425
                               mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
2426
                               mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
2427
               }
2428
            }
2429
         }
2430
      }
2431
   }
2432
 
2433
   if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
2434
      mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First;
2435
   }
2436
}
2437
 
2438
 
2439
typedef void (* micro_op)(union tgsi_exec_channel *dst);
2440
 
2441
static void
2442
exec_vector(struct tgsi_exec_machine *mach,
2443
            const struct tgsi_full_instruction *inst,
2444
            micro_op op,
2445
            enum tgsi_exec_datatype dst_datatype)
2446
{
2447
   unsigned int chan;
2448
 
2449
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2450
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2451
         union tgsi_exec_channel dst;
2452
 
2453
         op(&dst);
2454
         store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2455
      }
2456
   }
2457
}
2458
 
2459
typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2460
                                const union tgsi_exec_channel *src);
2461
 
2462
static void
2463
exec_scalar_unary(struct tgsi_exec_machine *mach,
2464
                  const struct tgsi_full_instruction *inst,
2465
                  micro_unary_op op,
2466
                  enum tgsi_exec_datatype dst_datatype,
2467
                  enum tgsi_exec_datatype src_datatype)
2468
{
2469
   unsigned int chan;
2470
   union tgsi_exec_channel src;
2471
   union tgsi_exec_channel dst;
2472
 
2473
   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2474
   op(&dst, &src);
2475
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2476
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2477
         store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2478
      }
2479
   }
2480
}
2481
 
2482
static void
2483
exec_vector_unary(struct tgsi_exec_machine *mach,
2484
                  const struct tgsi_full_instruction *inst,
2485
                  micro_unary_op op,
2486
                  enum tgsi_exec_datatype dst_datatype,
2487
                  enum tgsi_exec_datatype src_datatype)
2488
{
2489
   unsigned int chan;
2490
   struct tgsi_exec_vector dst;
2491
 
2492
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2493
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2494
         union tgsi_exec_channel src;
2495
 
2496
         fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2497
         op(&dst.xyzw[chan], &src);
2498
      }
2499
   }
2500
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2501
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2502
         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2503
      }
2504
   }
2505
}
2506
 
2507
typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2508
                                 const union tgsi_exec_channel *src0,
2509
                                 const union tgsi_exec_channel *src1);
2510
 
2511
static void
2512
exec_scalar_binary(struct tgsi_exec_machine *mach,
2513
                   const struct tgsi_full_instruction *inst,
2514
                   micro_binary_op op,
2515
                   enum tgsi_exec_datatype dst_datatype,
2516
                   enum tgsi_exec_datatype src_datatype)
2517
{
2518
   unsigned int chan;
2519
   union tgsi_exec_channel src[2];
2520
   union tgsi_exec_channel dst;
2521
 
2522
   fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2523
   fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
2524
   op(&dst, &src[0], &src[1]);
2525
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2526
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2527
         store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
2528
      }
2529
   }
2530
}
2531
 
2532
static void
2533
exec_vector_binary(struct tgsi_exec_machine *mach,
2534
                   const struct tgsi_full_instruction *inst,
2535
                   micro_binary_op op,
2536
                   enum tgsi_exec_datatype dst_datatype,
2537
                   enum tgsi_exec_datatype src_datatype)
2538
{
2539
   unsigned int chan;
2540
   struct tgsi_exec_vector dst;
2541
 
2542
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2543
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2544
         union tgsi_exec_channel src[2];
2545
 
2546
         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2547
         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2548
         op(&dst.xyzw[chan], &src[0], &src[1]);
2549
      }
2550
   }
2551
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2552
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2553
         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2554
      }
2555
   }
2556
}
2557
 
2558
typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
2559
                                  const union tgsi_exec_channel *src0,
2560
                                  const union tgsi_exec_channel *src1,
2561
                                  const union tgsi_exec_channel *src2);
2562
 
2563
static void
2564
exec_vector_trinary(struct tgsi_exec_machine *mach,
2565
                    const struct tgsi_full_instruction *inst,
2566
                    micro_trinary_op op,
2567
                    enum tgsi_exec_datatype dst_datatype,
2568
                    enum tgsi_exec_datatype src_datatype)
2569
{
2570
   unsigned int chan;
2571
   struct tgsi_exec_vector dst;
2572
 
2573
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2574
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2575
         union tgsi_exec_channel src[3];
2576
 
2577
         fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2578
         fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2579
         fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
2580
         op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
2581
      }
2582
   }
2583
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2584
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2585
         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
2586
      }
2587
   }
2588
}
2589
 
2590
static void
2591
exec_dp3(struct tgsi_exec_machine *mach,
2592
         const struct tgsi_full_instruction *inst)
2593
{
2594
   unsigned int chan;
2595
   union tgsi_exec_channel arg[3];
2596
 
2597
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2598
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2599
   micro_mul(&arg[2], &arg[0], &arg[1]);
2600
 
2601
   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2602
      fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2603
      fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2604
      micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2605
   }
2606
 
2607
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2608
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2609
         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2610
      }
2611
   }
2612
}
2613
 
2614
static void
2615
exec_dp4(struct tgsi_exec_machine *mach,
2616
         const struct tgsi_full_instruction *inst)
2617
{
2618
   unsigned int chan;
2619
   union tgsi_exec_channel arg[3];
2620
 
2621
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2622
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2623
   micro_mul(&arg[2], &arg[0], &arg[1]);
2624
 
2625
   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2626
      fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2627
      fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
2628
      micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2629
   }
2630
 
2631
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2632
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2633
         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2634
      }
2635
   }
2636
}
2637
 
2638
static void
2639
exec_dp2a(struct tgsi_exec_machine *mach,
2640
          const struct tgsi_full_instruction *inst)
2641
{
2642
   unsigned int chan;
2643
   union tgsi_exec_channel arg[3];
2644
 
2645
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2646
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2647
   micro_mul(&arg[2], &arg[0], &arg[1]);
2648
 
2649
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2650
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2651
   micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2652
 
2653
   fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2654
   micro_add(&arg[0], &arg[0], &arg[1]);
2655
 
2656
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2657
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2658
         store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2659
      }
2660
   }
2661
}
2662
 
2663
static void
2664
exec_dph(struct tgsi_exec_machine *mach,
2665
         const struct tgsi_full_instruction *inst)
2666
{
2667
   unsigned int chan;
2668
   union tgsi_exec_channel arg[3];
2669
 
2670
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2671
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2672
   micro_mul(&arg[2], &arg[0], &arg[1]);
2673
 
2674
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2675
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2676
   micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2677
 
2678
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2679
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2680
   micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
2681
 
2682
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2683
   micro_add(&arg[0], &arg[0], &arg[1]);
2684
 
2685
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2686
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2687
         store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2688
      }
2689
   }
2690
}
2691
 
2692
static void
2693
exec_dp2(struct tgsi_exec_machine *mach,
2694
         const struct tgsi_full_instruction *inst)
2695
{
2696
   unsigned int chan;
2697
   union tgsi_exec_channel arg[3];
2698
 
2699
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2700
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2701
   micro_mul(&arg[2], &arg[0], &arg[1]);
2702
 
2703
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2704
   fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2705
   micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
2706
 
2707
   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2708
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2709
         store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2710
      }
2711
   }
2712
}
2713
 
2714
static void
2715
exec_nrm4(struct tgsi_exec_machine *mach,
2716
          const struct tgsi_full_instruction *inst)
2717
{
2718
   unsigned int chan;
2719
   union tgsi_exec_channel arg[4];
2720
   union tgsi_exec_channel scale;
2721
 
2722
   fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2723
   micro_mul(&scale, &arg[0], &arg[0]);
2724
 
2725
   for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
2726
      union tgsi_exec_channel product;
2727
 
2728
      fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2729
      micro_mul(&product, &arg[chan], &arg[chan]);
2730
      micro_add(&scale, &scale, &product);
2731
   }
2732
 
2733
   micro_rsq(&scale, &scale);
2734
 
2735
   for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) {
2736
      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2737
         micro_mul(&arg[chan], &arg[chan], &scale);
2738
         store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2739
      }
2740
   }
2741
}
2742
 
2743
static void
2744
exec_nrm3(struct tgsi_exec_machine *mach,
2745
          const struct tgsi_full_instruction *inst)
2746
{
2747
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2748
      unsigned int chan;
2749
      union tgsi_exec_channel arg[3];
2750
      union tgsi_exec_channel scale;
2751
 
2752
      fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2753
      micro_mul(&scale, &arg[0], &arg[0]);
2754
 
2755
      for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
2756
         union tgsi_exec_channel product;
2757
 
2758
         fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
2759
         micro_mul(&product, &arg[chan], &arg[chan]);
2760
         micro_add(&scale, &scale, &product);
2761
      }
2762
 
2763
      micro_rsq(&scale, &scale);
2764
 
2765
      for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) {
2766
         if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2767
            micro_mul(&arg[chan], &arg[chan], &scale);
2768
            store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2769
         }
2770
      }
2771
   }
2772
 
2773
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2774
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2775
   }
2776
}
2777
 
2778
static void
2779
exec_scs(struct tgsi_exec_machine *mach,
2780
         const struct tgsi_full_instruction *inst)
2781
{
2782
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
2783
      union tgsi_exec_channel arg;
2784
      union tgsi_exec_channel result;
2785
 
2786
      fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2787
 
2788
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2789
         micro_cos(&result, &arg);
2790
         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2791
      }
2792
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2793
         micro_sin(&result, &arg);
2794
         store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2795
      }
2796
   }
2797
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2798
      store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2799
   }
2800
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2801
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2802
   }
2803
}
2804
 
2805
static void
2806
exec_x2d(struct tgsi_exec_machine *mach,
2807
         const struct tgsi_full_instruction *inst)
2808
{
2809
   union tgsi_exec_channel r[4];
2810
   union tgsi_exec_channel d[2];
2811
 
2812
   fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2813
   fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2814
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
2815
      fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2816
      micro_mul(&r[2], &r[2], &r[0]);
2817
      fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2818
      micro_mul(&r[3], &r[3], &r[1]);
2819
      micro_add(&r[2], &r[2], &r[3]);
2820
      fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2821
      micro_add(&d[0], &r[2], &r[3]);
2822
   }
2823
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
2824
      fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2825
      micro_mul(&r[2], &r[2], &r[0]);
2826
      fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2827
      micro_mul(&r[3], &r[3], &r[1]);
2828
      micro_add(&r[2], &r[2], &r[3]);
2829
      fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2830
      micro_add(&d[1], &r[2], &r[3]);
2831
   }
2832
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2833
      store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2834
   }
2835
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2836
      store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2837
   }
2838
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2839
      store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2840
   }
2841
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2842
      store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2843
   }
2844
}
2845
 
2846
static void
2847
exec_rfl(struct tgsi_exec_machine *mach,
2848
         const struct tgsi_full_instruction *inst)
2849
{
2850
   union tgsi_exec_channel r[9];
2851
 
2852
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
2853
      /* r0 = dp3(src0, src0) */
2854
      fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2855
      micro_mul(&r[0], &r[2], &r[2]);
2856
      fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2857
      micro_mul(&r[8], &r[4], &r[4]);
2858
      micro_add(&r[0], &r[0], &r[8]);
2859
      fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2860
      micro_mul(&r[8], &r[6], &r[6]);
2861
      micro_add(&r[0], &r[0], &r[8]);
2862
 
2863
      /* r1 = dp3(src0, src1) */
2864
      fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2865
      micro_mul(&r[1], &r[2], &r[3]);
2866
      fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2867
      micro_mul(&r[8], &r[4], &r[5]);
2868
      micro_add(&r[1], &r[1], &r[8]);
2869
      fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2870
      micro_mul(&r[8], &r[6], &r[7]);
2871
      micro_add(&r[1], &r[1], &r[8]);
2872
 
2873
      /* r1 = 2 * r1 / r0 */
2874
      micro_add(&r[1], &r[1], &r[1]);
2875
      micro_div(&r[1], &r[1], &r[0]);
2876
 
2877
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2878
         micro_mul(&r[2], &r[2], &r[1]);
2879
         micro_sub(&r[2], &r[2], &r[3]);
2880
         store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2881
      }
2882
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2883
         micro_mul(&r[4], &r[4], &r[1]);
2884
         micro_sub(&r[4], &r[4], &r[5]);
2885
         store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2886
      }
2887
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2888
         micro_mul(&r[6], &r[6], &r[1]);
2889
         micro_sub(&r[6], &r[6], &r[7]);
2890
         store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2891
      }
2892
   }
2893
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2894
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2895
   }
2896
}
2897
 
2898
static void
2899
exec_xpd(struct tgsi_exec_machine *mach,
2900
         const struct tgsi_full_instruction *inst)
2901
{
2902
   union tgsi_exec_channel r[6];
2903
   union tgsi_exec_channel d[3];
2904
 
2905
   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2906
   fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2907
 
2908
   micro_mul(&r[2], &r[0], &r[1]);
2909
 
2910
   fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2911
   fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2912
 
2913
   micro_mul(&r[5], &r[3], &r[4] );
2914
   micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
2915
 
2916
   fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2917
 
2918
   micro_mul(&r[3], &r[3], &r[2]);
2919
 
2920
   fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2921
 
2922
   micro_mul(&r[1], &r[1], &r[5]);
2923
   micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
2924
 
2925
   micro_mul(&r[5], &r[5], &r[4]);
2926
   micro_mul(&r[0], &r[0], &r[2]);
2927
   micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
2928
 
2929
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2930
      store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2931
   }
2932
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2933
      store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2934
   }
2935
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2936
      store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2937
   }
2938
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2939
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2940
   }
2941
}
2942
 
2943
static void
2944
exec_dst(struct tgsi_exec_machine *mach,
2945
         const struct tgsi_full_instruction *inst)
2946
{
2947
   union tgsi_exec_channel r[2];
2948
   union tgsi_exec_channel d[4];
2949
 
2950
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2951
      fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2952
      fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2953
      micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
2954
   }
2955
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2956
      fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2957
   }
2958
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2959
      fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2960
   }
2961
 
2962
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2963
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2964
   }
2965
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2966
      store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2967
   }
2968
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2969
      store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2970
   }
2971
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2972
      store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2973
   }
2974
}
2975
 
2976
static void
2977
exec_log(struct tgsi_exec_machine *mach,
2978
         const struct tgsi_full_instruction *inst)
2979
{
2980
   union tgsi_exec_channel r[3];
2981
 
2982
   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2983
   micro_abs(&r[2], &r[0]);  /* r2 = abs(r0) */
2984
   micro_lg2(&r[1], &r[2]);  /* r1 = lg2(r2) */
2985
   micro_flr(&r[0], &r[1]);  /* r0 = floor(r1) */
2986
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2987
      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
2988
   }
2989
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2990
      micro_exp2(&r[0], &r[0]);       /* r0 = 2 ^ r0 */
2991
      micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
2992
      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
2993
   }
2994
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
2995
      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
2996
   }
2997
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
2998
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
2999
   }
3000
}
3001
 
3002
static void
3003
exec_exp(struct tgsi_exec_machine *mach,
3004
         const struct tgsi_full_instruction *inst)
3005
{
3006
   union tgsi_exec_channel r[3];
3007
 
3008
   fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3009
   micro_flr(&r[1], &r[0]);  /* r1 = floor(r0) */
3010
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3011
      micro_exp2(&r[2], &r[1]);       /* r2 = 2 ^ r1 */
3012
      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3013
   }
3014
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3015
      micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3016
      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3017
   }
3018
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3019
      micro_exp2(&r[2], &r[0]);       /* r2 = 2 ^ r0 */
3020
      store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3021
   }
3022
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3023
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3024
   }
3025
}
3026
 
3027
static void
3028
exec_lit(struct tgsi_exec_machine *mach,
3029
         const struct tgsi_full_instruction *inst)
3030
{
3031
   union tgsi_exec_channel r[3];
3032
   union tgsi_exec_channel d[3];
3033
 
3034
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3035
      fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3036
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3037
         fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3038
         micro_max(&r[1], &r[1], &ZeroVec);
3039
 
3040
         fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3041
         micro_min(&r[2], &r[2], &P128Vec);
3042
         micro_max(&r[2], &r[2], &M128Vec);
3043
         micro_pow(&r[1], &r[1], &r[2]);
3044
         micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3045
         store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3046
      }
3047
      if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3048
         micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3049
         store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3050
      }
3051
   }
3052
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3053
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3054
   }
3055
 
3056
   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3057
      store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3058
   }
3059
}
3060
 
3061
static void
3062
exec_break(struct tgsi_exec_machine *mach)
3063
{
3064
   if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3065
      /* turn off loop channels for each enabled exec channel */
3066
      mach->LoopMask &= ~mach->ExecMask;
3067
      /* Todo: if mach->LoopMask == 0, jump to end of loop */
3068
      UPDATE_EXEC_MASK(mach);
3069
   } else {
3070
      assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3071
 
3072
      mach->Switch.mask = 0x0;
3073
 
3074
      UPDATE_EXEC_MASK(mach);
3075
   }
3076
}
3077
 
3078
static void
3079
exec_switch(struct tgsi_exec_machine *mach,
3080
            const struct tgsi_full_instruction *inst)
3081
{
3082
   assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3083
   assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3084
 
3085
   mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3086
   fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3087
   mach->Switch.mask = 0x0;
3088
   mach->Switch.defaultMask = 0x0;
3089
 
3090
   mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3091
   mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3092
 
3093
   UPDATE_EXEC_MASK(mach);
3094
}
3095
 
3096
static void
3097
exec_case(struct tgsi_exec_machine *mach,
3098
          const struct tgsi_full_instruction *inst)
3099
{
3100
   uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3101
   union tgsi_exec_channel src;
3102
   uint mask = 0;
3103
 
3104
   fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3105
 
3106
   if (mach->Switch.selector.u[0] == src.u[0]) {
3107
      mask |= 0x1;
3108
   }
3109
   if (mach->Switch.selector.u[1] == src.u[1]) {
3110
      mask |= 0x2;
3111
   }
3112
   if (mach->Switch.selector.u[2] == src.u[2]) {
3113
      mask |= 0x4;
3114
   }
3115
   if (mach->Switch.selector.u[3] == src.u[3]) {
3116
      mask |= 0x8;
3117
   }
3118
 
3119
   mach->Switch.defaultMask |= mask;
3120
 
3121
   mach->Switch.mask |= mask & prevMask;
3122
 
3123
   UPDATE_EXEC_MASK(mach);
3124
}
3125
 
3126
/* FIXME: this will only work if default is last */
3127
static void
3128
exec_default(struct tgsi_exec_machine *mach)
3129
{
3130
   uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3131
 
3132
   mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3133
 
3134
   UPDATE_EXEC_MASK(mach);
3135
}
3136
 
3137
static void
3138
exec_endswitch(struct tgsi_exec_machine *mach)
3139
{
3140
   mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3141
   mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3142
 
3143
   UPDATE_EXEC_MASK(mach);
3144
}
3145
 
3146
static void
3147
micro_i2f(union tgsi_exec_channel *dst,
3148
          const union tgsi_exec_channel *src)
3149
{
3150
   dst->f[0] = (float)src->i[0];
3151
   dst->f[1] = (float)src->i[1];
3152
   dst->f[2] = (float)src->i[2];
3153
   dst->f[3] = (float)src->i[3];
3154
}
3155
 
3156
static void
3157
micro_not(union tgsi_exec_channel *dst,
3158
          const union tgsi_exec_channel *src)
3159
{
3160
   dst->u[0] = ~src->u[0];
3161
   dst->u[1] = ~src->u[1];
3162
   dst->u[2] = ~src->u[2];
3163
   dst->u[3] = ~src->u[3];
3164
}
3165
 
3166
static void
3167
micro_shl(union tgsi_exec_channel *dst,
3168
          const union tgsi_exec_channel *src0,
3169
          const union tgsi_exec_channel *src1)
3170
{
3171
   dst->u[0] = src0->u[0] << src1->u[0];
3172
   dst->u[1] = src0->u[1] << src1->u[1];
3173
   dst->u[2] = src0->u[2] << src1->u[2];
3174
   dst->u[3] = src0->u[3] << src1->u[3];
3175
}
3176
 
3177
static void
3178
micro_and(union tgsi_exec_channel *dst,
3179
          const union tgsi_exec_channel *src0,
3180
          const union tgsi_exec_channel *src1)
3181
{
3182
   dst->u[0] = src0->u[0] & src1->u[0];
3183
   dst->u[1] = src0->u[1] & src1->u[1];
3184
   dst->u[2] = src0->u[2] & src1->u[2];
3185
   dst->u[3] = src0->u[3] & src1->u[3];
3186
}
3187
 
3188
static void
3189
micro_or(union tgsi_exec_channel *dst,
3190
         const union tgsi_exec_channel *src0,
3191
         const union tgsi_exec_channel *src1)
3192
{
3193
   dst->u[0] = src0->u[0] | src1->u[0];
3194
   dst->u[1] = src0->u[1] | src1->u[1];
3195
   dst->u[2] = src0->u[2] | src1->u[2];
3196
   dst->u[3] = src0->u[3] | src1->u[3];
3197
}
3198
 
3199
static void
3200
micro_xor(union tgsi_exec_channel *dst,
3201
          const union tgsi_exec_channel *src0,
3202
          const union tgsi_exec_channel *src1)
3203
{
3204
   dst->u[0] = src0->u[0] ^ src1->u[0];
3205
   dst->u[1] = src0->u[1] ^ src1->u[1];
3206
   dst->u[2] = src0->u[2] ^ src1->u[2];
3207
   dst->u[3] = src0->u[3] ^ src1->u[3];
3208
}
3209
 
3210
static void
3211
micro_mod(union tgsi_exec_channel *dst,
3212
          const union tgsi_exec_channel *src0,
3213
          const union tgsi_exec_channel *src1)
3214
{
3215
   dst->i[0] = src0->i[0] % src1->i[0];
3216
   dst->i[1] = src0->i[1] % src1->i[1];
3217
   dst->i[2] = src0->i[2] % src1->i[2];
3218
   dst->i[3] = src0->i[3] % src1->i[3];
3219
}
3220
 
3221
static void
3222
micro_f2i(union tgsi_exec_channel *dst,
3223
          const union tgsi_exec_channel *src)
3224
{
3225
   dst->i[0] = (int)src->f[0];
3226
   dst->i[1] = (int)src->f[1];
3227
   dst->i[2] = (int)src->f[2];
3228
   dst->i[3] = (int)src->f[3];
3229
}
3230
 
3231
static void
3232
micro_idiv(union tgsi_exec_channel *dst,
3233
           const union tgsi_exec_channel *src0,
3234
           const union tgsi_exec_channel *src1)
3235
{
3236
   dst->i[0] = src0->i[0] / src1->i[0];
3237
   dst->i[1] = src0->i[1] / src1->i[1];
3238
   dst->i[2] = src0->i[2] / src1->i[2];
3239
   dst->i[3] = src0->i[3] / src1->i[3];
3240
}
3241
 
3242
static void
3243
micro_imax(union tgsi_exec_channel *dst,
3244
           const union tgsi_exec_channel *src0,
3245
           const union tgsi_exec_channel *src1)
3246
{
3247
   dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
3248
   dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
3249
   dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
3250
   dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
3251
}
3252
 
3253
static void
3254
micro_imin(union tgsi_exec_channel *dst,
3255
           const union tgsi_exec_channel *src0,
3256
           const union tgsi_exec_channel *src1)
3257
{
3258
   dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
3259
   dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
3260
   dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
3261
   dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
3262
}
3263
 
3264
static void
3265
micro_isge(union tgsi_exec_channel *dst,
3266
           const union tgsi_exec_channel *src0,
3267
           const union tgsi_exec_channel *src1)
3268
{
3269
   dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
3270
   dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
3271
   dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
3272
   dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
3273
}
3274
 
3275
static void
3276
micro_ishr(union tgsi_exec_channel *dst,
3277
           const union tgsi_exec_channel *src0,
3278
           const union tgsi_exec_channel *src1)
3279
{
3280
   dst->i[0] = src0->i[0] >> src1->i[0];
3281
   dst->i[1] = src0->i[1] >> src1->i[1];
3282
   dst->i[2] = src0->i[2] >> src1->i[2];
3283
   dst->i[3] = src0->i[3] >> src1->i[3];
3284
}
3285
 
3286
static void
3287
micro_islt(union tgsi_exec_channel *dst,
3288
           const union tgsi_exec_channel *src0,
3289
           const union tgsi_exec_channel *src1)
3290
{
3291
   dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
3292
   dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
3293
   dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
3294
   dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
3295
}
3296
 
3297
static void
3298
micro_f2u(union tgsi_exec_channel *dst,
3299
          const union tgsi_exec_channel *src)
3300
{
3301
   dst->u[0] = (uint)src->f[0];
3302
   dst->u[1] = (uint)src->f[1];
3303
   dst->u[2] = (uint)src->f[2];
3304
   dst->u[3] = (uint)src->f[3];
3305
}
3306
 
3307
static void
3308
micro_u2f(union tgsi_exec_channel *dst,
3309
          const union tgsi_exec_channel *src)
3310
{
3311
   dst->f[0] = (float)src->u[0];
3312
   dst->f[1] = (float)src->u[1];
3313
   dst->f[2] = (float)src->u[2];
3314
   dst->f[3] = (float)src->u[3];
3315
}
3316
 
3317
static void
3318
micro_uadd(union tgsi_exec_channel *dst,
3319
           const union tgsi_exec_channel *src0,
3320
           const union tgsi_exec_channel *src1)
3321
{
3322
   dst->u[0] = src0->u[0] + src1->u[0];
3323
   dst->u[1] = src0->u[1] + src1->u[1];
3324
   dst->u[2] = src0->u[2] + src1->u[2];
3325
   dst->u[3] = src0->u[3] + src1->u[3];
3326
}
3327
 
3328
static void
3329
micro_udiv(union tgsi_exec_channel *dst,
3330
           const union tgsi_exec_channel *src0,
3331
           const union tgsi_exec_channel *src1)
3332
{
3333
   dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
3334
   dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
3335
   dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
3336
   dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
3337
}
3338
 
3339
static void
3340
micro_umad(union tgsi_exec_channel *dst,
3341
           const union tgsi_exec_channel *src0,
3342
           const union tgsi_exec_channel *src1,
3343
           const union tgsi_exec_channel *src2)
3344
{
3345
   dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
3346
   dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
3347
   dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
3348
   dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
3349
}
3350
 
3351
static void
3352
micro_umax(union tgsi_exec_channel *dst,
3353
           const union tgsi_exec_channel *src0,
3354
           const union tgsi_exec_channel *src1)
3355
{
3356
   dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
3357
   dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
3358
   dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
3359
   dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
3360
}
3361
 
3362
static void
3363
micro_umin(union tgsi_exec_channel *dst,
3364
           const union tgsi_exec_channel *src0,
3365
           const union tgsi_exec_channel *src1)
3366
{
3367
   dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
3368
   dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
3369
   dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
3370
   dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
3371
}
3372
 
3373
static void
3374
micro_umod(union tgsi_exec_channel *dst,
3375
           const union tgsi_exec_channel *src0,
3376
           const union tgsi_exec_channel *src1)
3377
{
3378
   dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
3379
   dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
3380
   dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
3381
   dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
3382
}
3383
 
3384
static void
3385
micro_umul(union tgsi_exec_channel *dst,
3386
           const union tgsi_exec_channel *src0,
3387
           const union tgsi_exec_channel *src1)
3388
{
3389
   dst->u[0] = src0->u[0] * src1->u[0];
3390
   dst->u[1] = src0->u[1] * src1->u[1];
3391
   dst->u[2] = src0->u[2] * src1->u[2];
3392
   dst->u[3] = src0->u[3] * src1->u[3];
3393
}
3394
 
3395
static void
3396
micro_useq(union tgsi_exec_channel *dst,
3397
           const union tgsi_exec_channel *src0,
3398
           const union tgsi_exec_channel *src1)
3399
{
3400
   dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
3401
   dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
3402
   dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
3403
   dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
3404
}
3405
 
3406
static void
3407
micro_usge(union tgsi_exec_channel *dst,
3408
           const union tgsi_exec_channel *src0,
3409
           const union tgsi_exec_channel *src1)
3410
{
3411
   dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
3412
   dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
3413
   dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
3414
   dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
3415
}
3416
 
3417
static void
3418
micro_ushr(union tgsi_exec_channel *dst,
3419
           const union tgsi_exec_channel *src0,
3420
           const union tgsi_exec_channel *src1)
3421
{
3422
   dst->u[0] = src0->u[0] >> src1->u[0];
3423
   dst->u[1] = src0->u[1] >> src1->u[1];
3424
   dst->u[2] = src0->u[2] >> src1->u[2];
3425
   dst->u[3] = src0->u[3] >> src1->u[3];
3426
}
3427
 
3428
static void
3429
micro_uslt(union tgsi_exec_channel *dst,
3430
           const union tgsi_exec_channel *src0,
3431
           const union tgsi_exec_channel *src1)
3432
{
3433
   dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
3434
   dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
3435
   dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
3436
   dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
3437
}
3438
 
3439
static void
3440
micro_usne(union tgsi_exec_channel *dst,
3441
           const union tgsi_exec_channel *src0,
3442
           const union tgsi_exec_channel *src1)
3443
{
3444
   dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
3445
   dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
3446
   dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
3447
   dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
3448
}
3449
 
3450
static void
3451
micro_uarl(union tgsi_exec_channel *dst,
3452
           const union tgsi_exec_channel *src)
3453
{
3454
   dst->i[0] = src->u[0];
3455
   dst->i[1] = src->u[1];
3456
   dst->i[2] = src->u[2];
3457
   dst->i[3] = src->u[3];
3458
}
3459
 
3460
static void
3461
micro_ucmp(union tgsi_exec_channel *dst,
3462
           const union tgsi_exec_channel *src0,
3463
           const union tgsi_exec_channel *src1,
3464
           const union tgsi_exec_channel *src2)
3465
{
3466
   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
3467
   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
3468
   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
3469
   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
3470
}
3471
 
3472
static void
3473
exec_instruction(
3474
   struct tgsi_exec_machine *mach,
3475
   const struct tgsi_full_instruction *inst,
3476
   int *pc )
3477
{
3478
   union tgsi_exec_channel r[10];
3479
 
3480
   (*pc)++;
3481
 
3482
   switch (inst->Instruction.Opcode) {
3483
   case TGSI_OPCODE_ARL:
3484
      exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3485
      break;
3486
 
3487
   case TGSI_OPCODE_MOV:
3488
      exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
3489
      break;
3490
 
3491
   case TGSI_OPCODE_LIT:
3492
      exec_lit(mach, inst);
3493
      break;
3494
 
3495
   case TGSI_OPCODE_RCP:
3496
      exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3497
      break;
3498
 
3499
   case TGSI_OPCODE_RSQ:
3500
      exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3501
      break;
3502
 
3503
   case TGSI_OPCODE_EXP:
3504
      exec_exp(mach, inst);
3505
      break;
3506
 
3507
   case TGSI_OPCODE_LOG:
3508
      exec_log(mach, inst);
3509
      break;
3510
 
3511
   case TGSI_OPCODE_MUL:
3512
      exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3513
      break;
3514
 
3515
   case TGSI_OPCODE_ADD:
3516
      exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3517
      break;
3518
 
3519
   case TGSI_OPCODE_DP3:
3520
      exec_dp3(mach, inst);
3521
      break;
3522
 
3523
   case TGSI_OPCODE_DP4:
3524
      exec_dp4(mach, inst);
3525
      break;
3526
 
3527
   case TGSI_OPCODE_DST:
3528
      exec_dst(mach, inst);
3529
      break;
3530
 
3531
   case TGSI_OPCODE_MIN:
3532
      exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3533
      break;
3534
 
3535
   case TGSI_OPCODE_MAX:
3536
      exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3537
      break;
3538
 
3539
   case TGSI_OPCODE_SLT:
3540
      exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3541
      break;
3542
 
3543
   case TGSI_OPCODE_SGE:
3544
      exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3545
      break;
3546
 
3547
   case TGSI_OPCODE_MAD:
3548
      exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3549
      break;
3550
 
3551
   case TGSI_OPCODE_SUB:
3552
      exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3553
      break;
3554
 
3555
   case TGSI_OPCODE_LRP:
3556
      exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3557
      break;
3558
 
3559
   case TGSI_OPCODE_CND:
3560
      exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3561
      break;
3562
 
3563
   case TGSI_OPCODE_SQRT:
3564
      exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3565
      break;
3566
 
3567
   case TGSI_OPCODE_DP2A:
3568
      exec_dp2a(mach, inst);
3569
      break;
3570
 
3571
   case TGSI_OPCODE_FRC:
3572
      exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3573
      break;
3574
 
3575
   case TGSI_OPCODE_CLAMP:
3576
      exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3577
      break;
3578
 
3579
   case TGSI_OPCODE_FLR:
3580
      exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3581
      break;
3582
 
3583
   case TGSI_OPCODE_ROUND:
3584
      exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3585
      break;
3586
 
3587
   case TGSI_OPCODE_EX2:
3588
      exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3589
      break;
3590
 
3591
   case TGSI_OPCODE_LG2:
3592
      exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3593
      break;
3594
 
3595
   case TGSI_OPCODE_POW:
3596
      exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3597
      break;
3598
 
3599
   case TGSI_OPCODE_XPD:
3600
      exec_xpd(mach, inst);
3601
      break;
3602
 
3603
   case TGSI_OPCODE_ABS:
3604
      exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3605
      break;
3606
 
3607
   case TGSI_OPCODE_RCC:
3608
      exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3609
      break;
3610
 
3611
   case TGSI_OPCODE_DPH:
3612
      exec_dph(mach, inst);
3613
      break;
3614
 
3615
   case TGSI_OPCODE_COS:
3616
      exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3617
      break;
3618
 
3619
   case TGSI_OPCODE_DDX:
3620
      exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3621
      break;
3622
 
3623
   case TGSI_OPCODE_DDY:
3624
      exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3625
      break;
3626
 
3627
   case TGSI_OPCODE_KILL:
3628
      exec_kill (mach, inst);
3629
      break;
3630
 
3631
   case TGSI_OPCODE_KILL_IF:
3632
      exec_kill_if (mach, inst);
3633
      break;
3634
 
3635
   case TGSI_OPCODE_PK2H:
3636
      assert (0);
3637
      break;
3638
 
3639
   case TGSI_OPCODE_PK2US:
3640
      assert (0);
3641
      break;
3642
 
3643
   case TGSI_OPCODE_PK4B:
3644
      assert (0);
3645
      break;
3646
 
3647
   case TGSI_OPCODE_PK4UB:
3648
      assert (0);
3649
      break;
3650
 
3651
   case TGSI_OPCODE_RFL:
3652
      exec_rfl(mach, inst);
3653
      break;
3654
 
3655
   case TGSI_OPCODE_SEQ:
3656
      exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3657
      break;
3658
 
3659
   case TGSI_OPCODE_SFL:
3660
      exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT);
3661
      break;
3662
 
3663
   case TGSI_OPCODE_SGT:
3664
      exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3665
      break;
3666
 
3667
   case TGSI_OPCODE_SIN:
3668
      exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3669
      break;
3670
 
3671
   case TGSI_OPCODE_SLE:
3672
      exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3673
      break;
3674
 
3675
   case TGSI_OPCODE_SNE:
3676
      exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3677
      break;
3678
 
3679
   case TGSI_OPCODE_STR:
3680
      exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT);
3681
      break;
3682
 
3683
   case TGSI_OPCODE_TEX:
3684
      /* simple texture lookup */
3685
      /* src[0] = texcoord */
3686
      /* src[1] = sampler unit */
3687
      exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
3688
      break;
3689
 
3690
   case TGSI_OPCODE_TXB:
3691
      /* Texture lookup with lod bias */
3692
      /* src[0] = texcoord (src[0].w = LOD bias) */
3693
      /* src[1] = sampler unit */
3694
      exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
3695
      break;
3696
 
3697
   case TGSI_OPCODE_TXD:
3698
      /* Texture lookup with explict partial derivatives */
3699
      /* src[0] = texcoord */
3700
      /* src[1] = d[strq]/dx */
3701
      /* src[2] = d[strq]/dy */
3702
      /* src[3] = sampler unit */
3703
      exec_txd(mach, inst);
3704
      break;
3705
 
3706
   case TGSI_OPCODE_TXL:
3707
      /* Texture lookup with explit LOD */
3708
      /* src[0] = texcoord (src[0].w = LOD) */
3709
      /* src[1] = sampler unit */
3710
      exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
3711
      break;
3712
 
3713
   case TGSI_OPCODE_TXP:
3714
      /* Texture lookup with projection */
3715
      /* src[0] = texcoord (src[0].w = projection) */
3716
      /* src[1] = sampler unit */
3717
      exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
3718
      break;
3719
 
3720
   case TGSI_OPCODE_UP2H:
3721
      assert (0);
3722
      break;
3723
 
3724
   case TGSI_OPCODE_UP2US:
3725
      assert (0);
3726
      break;
3727
 
3728
   case TGSI_OPCODE_UP4B:
3729
      assert (0);
3730
      break;
3731
 
3732
   case TGSI_OPCODE_UP4UB:
3733
      assert (0);
3734
      break;
3735
 
3736
   case TGSI_OPCODE_X2D:
3737
      exec_x2d(mach, inst);
3738
      break;
3739
 
3740
   case TGSI_OPCODE_ARA:
3741
      assert (0);
3742
      break;
3743
 
3744
   case TGSI_OPCODE_ARR:
3745
      exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
3746
      break;
3747
 
3748
   case TGSI_OPCODE_BRA:
3749
      assert (0);
3750
      break;
3751
 
3752
   case TGSI_OPCODE_CAL:
3753
      /* skip the call if no execution channels are enabled */
3754
      if (mach->ExecMask) {
3755
         /* do the call */
3756
 
3757
         /* First, record the depths of the execution stacks.
3758
          * This is important for deeply nested/looped return statements.
3759
          * We have to unwind the stacks by the correct amount.  For a
3760
          * real code generator, we could determine the number of entries
3761
          * to pop off each stack with simple static analysis and avoid
3762
          * implementing this data structure at run time.
3763
          */
3764
         mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
3765
         mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
3766
         mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
3767
         mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
3768
         mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
3769
         /* note that PC was already incremented above */
3770
         mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
3771
 
3772
         mach->CallStackTop++;
3773
 
3774
         /* Second, push the Cond, Loop, Cont, Func stacks */
3775
         assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3776
         assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3777
         assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
3778
         assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3779
         assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3780
         assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
3781
 
3782
         mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3783
         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
3784
         mach->ContStack[mach->ContStackTop++] = mach->ContMask;
3785
         mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3786
         mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3787
         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
3788
 
3789
         /* Finally, jump to the subroutine */
3790
         *pc = inst->Label.Label;
3791
      }
3792
      break;
3793
 
3794
   case TGSI_OPCODE_RET:
3795
      mach->FuncMask &= ~mach->ExecMask;
3796
      UPDATE_EXEC_MASK(mach);
3797
 
3798
      if (mach->FuncMask == 0x0) {
3799
         /* really return now (otherwise, keep executing */
3800
 
3801
         if (mach->CallStackTop == 0) {
3802
            /* returning from main() */
3803
            mach->CondStackTop = 0;
3804
            mach->LoopStackTop = 0;
3805
            *pc = -1;
3806
            return;
3807
         }
3808
 
3809
         assert(mach->CallStackTop > 0);
3810
         mach->CallStackTop--;
3811
 
3812
         mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
3813
         mach->CondMask = mach->CondStack[mach->CondStackTop];
3814
 
3815
         mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
3816
         mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
3817
 
3818
         mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
3819
         mach->ContMask = mach->ContStack[mach->ContStackTop];
3820
 
3821
         mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
3822
         mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
3823
 
3824
         mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
3825
         mach->BreakType = mach->BreakStack[mach->BreakStackTop];
3826
 
3827
         assert(mach->FuncStackTop > 0);
3828
         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
3829
 
3830
         *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
3831
 
3832
         UPDATE_EXEC_MASK(mach);
3833
      }
3834
      break;
3835
 
3836
   case TGSI_OPCODE_SSG:
3837
      exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3838
      break;
3839
 
3840
   case TGSI_OPCODE_CMP:
3841
      exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3842
      break;
3843
 
3844
   case TGSI_OPCODE_SCS:
3845
      exec_scs(mach, inst);
3846
      break;
3847
 
3848
   case TGSI_OPCODE_NRM:
3849
      exec_nrm3(mach, inst);
3850
      break;
3851
 
3852
   case TGSI_OPCODE_NRM4:
3853
      exec_nrm4(mach, inst);
3854
      break;
3855
 
3856
   case TGSI_OPCODE_DIV:
3857
      exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3858
      break;
3859
 
3860
   case TGSI_OPCODE_DP2:
3861
      exec_dp2(mach, inst);
3862
      break;
3863
 
3864
   case TGSI_OPCODE_IF:
3865
      /* push CondMask */
3866
      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3867
      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3868
      FETCH( &r[0], 0, TGSI_CHAN_X );
3869
      /* update CondMask */
3870
      if( ! r[0].f[0] ) {
3871
         mach->CondMask &= ~0x1;
3872
      }
3873
      if( ! r[0].f[1] ) {
3874
         mach->CondMask &= ~0x2;
3875
      }
3876
      if( ! r[0].f[2] ) {
3877
         mach->CondMask &= ~0x4;
3878
      }
3879
      if( ! r[0].f[3] ) {
3880
         mach->CondMask &= ~0x8;
3881
      }
3882
      UPDATE_EXEC_MASK(mach);
3883
      /* Todo: If CondMask==0, jump to ELSE */
3884
      break;
3885
 
3886
   case TGSI_OPCODE_UIF:
3887
      /* push CondMask */
3888
      assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
3889
      mach->CondStack[mach->CondStackTop++] = mach->CondMask;
3890
      IFETCH( &r[0], 0, TGSI_CHAN_X );
3891
      /* update CondMask */
3892
      if( ! r[0].u[0] ) {
3893
         mach->CondMask &= ~0x1;
3894
      }
3895
      if( ! r[0].u[1] ) {
3896
         mach->CondMask &= ~0x2;
3897
      }
3898
      if( ! r[0].u[2] ) {
3899
         mach->CondMask &= ~0x4;
3900
      }
3901
      if( ! r[0].u[3] ) {
3902
         mach->CondMask &= ~0x8;
3903
      }
3904
      UPDATE_EXEC_MASK(mach);
3905
      /* Todo: If CondMask==0, jump to ELSE */
3906
      break;
3907
 
3908
   case TGSI_OPCODE_ELSE:
3909
      /* invert CondMask wrt previous mask */
3910
      {
3911
         uint prevMask;
3912
         assert(mach->CondStackTop > 0);
3913
         prevMask = mach->CondStack[mach->CondStackTop - 1];
3914
         mach->CondMask = ~mach->CondMask & prevMask;
3915
         UPDATE_EXEC_MASK(mach);
3916
         /* Todo: If CondMask==0, jump to ENDIF */
3917
      }
3918
      break;
3919
 
3920
   case TGSI_OPCODE_ENDIF:
3921
      /* pop CondMask */
3922
      assert(mach->CondStackTop > 0);
3923
      mach->CondMask = mach->CondStack[--mach->CondStackTop];
3924
      UPDATE_EXEC_MASK(mach);
3925
      break;
3926
 
3927
   case TGSI_OPCODE_END:
3928
      /* make sure we end primitives which haven't
3929
       * been explicitly emitted */
3930
      conditional_emit_primitive(mach);
3931
      /* halt execution */
3932
      *pc = -1;
3933
      break;
3934
 
3935
   case TGSI_OPCODE_PUSHA:
3936
      assert (0);
3937
      break;
3938
 
3939
   case TGSI_OPCODE_POPA:
3940
      assert (0);
3941
      break;
3942
 
3943
   case TGSI_OPCODE_CEIL:
3944
      exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3945
      break;
3946
 
3947
   case TGSI_OPCODE_I2F:
3948
      exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
3949
      break;
3950
 
3951
   case TGSI_OPCODE_NOT:
3952
      exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3953
      break;
3954
 
3955
   case TGSI_OPCODE_TRUNC:
3956
      exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
3957
      break;
3958
 
3959
   case TGSI_OPCODE_SHL:
3960
      exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3961
      break;
3962
 
3963
   case TGSI_OPCODE_AND:
3964
      exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3965
      break;
3966
 
3967
   case TGSI_OPCODE_OR:
3968
      exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3969
      break;
3970
 
3971
   case TGSI_OPCODE_MOD:
3972
      exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
3973
      break;
3974
 
3975
   case TGSI_OPCODE_XOR:
3976
      exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
3977
      break;
3978
 
3979
   case TGSI_OPCODE_SAD:
3980
      assert (0);
3981
      break;
3982
 
3983
   case TGSI_OPCODE_TXF:
3984
      exec_txf(mach, inst);
3985
      break;
3986
 
3987
   case TGSI_OPCODE_TXQ:
3988
      exec_txq(mach, inst);
3989
      break;
3990
 
3991
   case TGSI_OPCODE_EMIT:
3992
      emit_vertex(mach);
3993
      break;
3994
 
3995
   case TGSI_OPCODE_ENDPRIM:
3996
      emit_primitive(mach);
3997
      break;
3998
 
3999
   case TGSI_OPCODE_BGNLOOP:
4000
      /* push LoopMask and ContMasks */
4001
      assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4002
      assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4003
      assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
4004
      assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
4005
 
4006
      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
4007
      mach->ContStack[mach->ContStackTop++] = mach->ContMask;
4008
      mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
4009
      mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
4010
      mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
4011
      break;
4012
 
4013
   case TGSI_OPCODE_ENDLOOP:
4014
      /* Restore ContMask, but don't pop */
4015
      assert(mach->ContStackTop > 0);
4016
      mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
4017
      UPDATE_EXEC_MASK(mach);
4018
      if (mach->ExecMask) {
4019
         /* repeat loop: jump to instruction just past BGNLOOP */
4020
         assert(mach->LoopLabelStackTop > 0);
4021
         *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
4022
      }
4023
      else {
4024
         /* exit loop: pop LoopMask */
4025
         assert(mach->LoopStackTop > 0);
4026
         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
4027
         /* pop ContMask */
4028
         assert(mach->ContStackTop > 0);
4029
         mach->ContMask = mach->ContStack[--mach->ContStackTop];
4030
         assert(mach->LoopLabelStackTop > 0);
4031
         --mach->LoopLabelStackTop;
4032
 
4033
         mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
4034
      }
4035
      UPDATE_EXEC_MASK(mach);
4036
      break;
4037
 
4038
   case TGSI_OPCODE_BRK:
4039
      exec_break(mach);
4040
      break;
4041
 
4042
   case TGSI_OPCODE_CONT:
4043
      /* turn off cont channels for each enabled exec channel */
4044
      mach->ContMask &= ~mach->ExecMask;
4045
      /* Todo: if mach->LoopMask == 0, jump to end of loop */
4046
      UPDATE_EXEC_MASK(mach);
4047
      break;
4048
 
4049
   case TGSI_OPCODE_BGNSUB:
4050
      /* no-op */
4051
      break;
4052
 
4053
   case TGSI_OPCODE_ENDSUB:
4054
      /*
4055
       * XXX: This really should be a no-op. We should never reach this opcode.
4056
       */
4057
 
4058
      assert(mach->CallStackTop > 0);
4059
      mach->CallStackTop--;
4060
 
4061
      mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
4062
      mach->CondMask = mach->CondStack[mach->CondStackTop];
4063
 
4064
      mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
4065
      mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
4066
 
4067
      mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
4068
      mach->ContMask = mach->ContStack[mach->ContStackTop];
4069
 
4070
      mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
4071
      mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
4072
 
4073
      mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
4074
      mach->BreakType = mach->BreakStack[mach->BreakStackTop];
4075
 
4076
      assert(mach->FuncStackTop > 0);
4077
      mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
4078
 
4079
      *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
4080
 
4081
      UPDATE_EXEC_MASK(mach);
4082
      break;
4083
 
4084
   case TGSI_OPCODE_NOP:
4085
      break;
4086
 
4087
   case TGSI_OPCODE_BREAKC:
4088
      IFETCH(&r[0], 0, TGSI_CHAN_X);
4089
      /* update CondMask */
4090
      if (r[0].u[0] && (mach->ExecMask & 0x1)) {
4091
         mach->LoopMask &= ~0x1;
4092
      }
4093
      if (r[0].u[1] && (mach->ExecMask & 0x2)) {
4094
         mach->LoopMask &= ~0x2;
4095
      }
4096
      if (r[0].u[2] && (mach->ExecMask & 0x4)) {
4097
         mach->LoopMask &= ~0x4;
4098
      }
4099
      if (r[0].u[3] && (mach->ExecMask & 0x8)) {
4100
         mach->LoopMask &= ~0x8;
4101
      }
4102
      /* Todo: if mach->LoopMask == 0, jump to end of loop */
4103
      UPDATE_EXEC_MASK(mach);
4104
      break;
4105
 
4106
   case TGSI_OPCODE_F2I:
4107
      exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
4108
      break;
4109
 
4110
   case TGSI_OPCODE_IDIV:
4111
      exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4112
      break;
4113
 
4114
   case TGSI_OPCODE_IMAX:
4115
      exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4116
      break;
4117
 
4118
   case TGSI_OPCODE_IMIN:
4119
      exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4120
      break;
4121
 
4122
   case TGSI_OPCODE_INEG:
4123
      exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4124
      break;
4125
 
4126
   case TGSI_OPCODE_ISGE:
4127
      exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4128
      break;
4129
 
4130
   case TGSI_OPCODE_ISHR:
4131
      exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4132
      break;
4133
 
4134
   case TGSI_OPCODE_ISLT:
4135
      exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4136
      break;
4137
 
4138
   case TGSI_OPCODE_F2U:
4139
      exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
4140
      break;
4141
 
4142
   case TGSI_OPCODE_U2F:
4143
      exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
4144
      break;
4145
 
4146
   case TGSI_OPCODE_UADD:
4147
      exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4148
      break;
4149
 
4150
   case TGSI_OPCODE_UDIV:
4151
      exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4152
      break;
4153
 
4154
   case TGSI_OPCODE_UMAD:
4155
      exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4156
      break;
4157
 
4158
   case TGSI_OPCODE_UMAX:
4159
      exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4160
      break;
4161
 
4162
   case TGSI_OPCODE_UMIN:
4163
      exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4164
      break;
4165
 
4166
   case TGSI_OPCODE_UMOD:
4167
      exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4168
      break;
4169
 
4170
   case TGSI_OPCODE_UMUL:
4171
      exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4172
      break;
4173
 
4174
   case TGSI_OPCODE_USEQ:
4175
      exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4176
      break;
4177
 
4178
   case TGSI_OPCODE_USGE:
4179
      exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4180
      break;
4181
 
4182
   case TGSI_OPCODE_USHR:
4183
      exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4184
      break;
4185
 
4186
   case TGSI_OPCODE_USLT:
4187
      exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4188
      break;
4189
 
4190
   case TGSI_OPCODE_USNE:
4191
      exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4192
      break;
4193
 
4194
   case TGSI_OPCODE_SWITCH:
4195
      exec_switch(mach, inst);
4196
      break;
4197
 
4198
   case TGSI_OPCODE_CASE:
4199
      exec_case(mach, inst);
4200
      break;
4201
 
4202
   case TGSI_OPCODE_DEFAULT:
4203
      exec_default(mach);
4204
      break;
4205
 
4206
   case TGSI_OPCODE_ENDSWITCH:
4207
      exec_endswitch(mach);
4208
      break;
4209
 
4210
   case TGSI_OPCODE_SAMPLE_I:
4211
      exec_txf(mach, inst);
4212
      break;
4213
 
4214
   case TGSI_OPCODE_SAMPLE_I_MS:
4215
      assert(0);
4216
      break;
4217
 
4218
   case TGSI_OPCODE_SAMPLE:
4219
      exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
4220
      break;
4221
 
4222
   case TGSI_OPCODE_SAMPLE_B:
4223
      exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
4224
      break;
4225
 
4226
   case TGSI_OPCODE_SAMPLE_C:
4227
      exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
4228
      break;
4229
 
4230
   case TGSI_OPCODE_SAMPLE_C_LZ:
4231
      exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
4232
      break;
4233
 
4234
   case TGSI_OPCODE_SAMPLE_D:
4235
      exec_sample_d(mach, inst);
4236
      break;
4237
 
4238
   case TGSI_OPCODE_SAMPLE_L:
4239
      exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
4240
      break;
4241
 
4242
   case TGSI_OPCODE_GATHER4:
4243
      assert(0);
4244
      break;
4245
 
4246
   case TGSI_OPCODE_SVIEWINFO:
4247
      exec_txq(mach, inst);
4248
      break;
4249
 
4250
   case TGSI_OPCODE_SAMPLE_POS:
4251
      assert(0);
4252
      break;
4253
 
4254
   case TGSI_OPCODE_SAMPLE_INFO:
4255
      assert(0);
4256
      break;
4257
 
4258
   case TGSI_OPCODE_UARL:
4259
      exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
4260
      break;
4261
 
4262
   case TGSI_OPCODE_UCMP:
4263
      exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
4264
      break;
4265
 
4266
   case TGSI_OPCODE_IABS:
4267
      exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4268
      break;
4269
 
4270
   case TGSI_OPCODE_ISSG:
4271
      exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
4272
      break;
4273
 
4274
   case TGSI_OPCODE_TEX2:
4275
      /* simple texture lookup */
4276
      /* src[0] = texcoord */
4277
      /* src[1] = compare */
4278
      /* src[2] = sampler unit */
4279
      exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
4280
      break;
4281
   case TGSI_OPCODE_TXB2:
4282
      /* simple texture lookup */
4283
      /* src[0] = texcoord */
4284
      /* src[1] = bias */
4285
      /* src[2] = sampler unit */
4286
      exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
4287
      break;
4288
   case TGSI_OPCODE_TXL2:
4289
      /* simple texture lookup */
4290
      /* src[0] = texcoord */
4291
      /* src[1] = lod */
4292
      /* src[2] = sampler unit */
4293
      exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
4294
      break;
4295
   default:
4296
      assert( 0 );
4297
   }
4298
}
4299
 
4300
 
4301
/**
4302
 * Run TGSI interpreter.
4303
 * \return bitmask of "alive" quad components
4304
 */
4305
uint
4306
tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
4307
{
4308
   uint i;
4309
   int pc = 0;
4310
   uint default_mask = 0xf;
4311
 
4312
   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
4313
   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
4314
 
4315
   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
4316
      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
4317
      mach->Primitives[0] = 0;
4318
      /* GS runs on a single primitive for now */
4319
      default_mask = 0x1;
4320
   }
4321
 
4322
   mach->CondMask = default_mask;
4323
   mach->LoopMask = default_mask;
4324
   mach->ContMask = default_mask;
4325
   mach->FuncMask = default_mask;
4326
   mach->ExecMask = default_mask;
4327
 
4328
   mach->Switch.mask = default_mask;
4329
 
4330
   assert(mach->CondStackTop == 0);
4331
   assert(mach->LoopStackTop == 0);
4332
   assert(mach->ContStackTop == 0);
4333
   assert(mach->SwitchStackTop == 0);
4334
   assert(mach->BreakStackTop == 0);
4335
   assert(mach->CallStackTop == 0);
4336
 
4337
 
4338
   /* execute declarations (interpolants) */
4339
   for (i = 0; i < mach->NumDeclarations; i++) {
4340
      exec_declaration( mach, mach->Declarations+i );
4341
   }
4342
 
4343
   {
4344
#if DEBUG_EXECUTION
4345
      struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
4346
      struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
4347
      uint inst = 1;
4348
 
4349
      memset(mach->Temps, 0, sizeof(temps));
4350
      memset(mach->Outputs, 0, sizeof(outputs));
4351
      memset(temps, 0, sizeof(temps));
4352
      memset(outputs, 0, sizeof(outputs));
4353
#endif
4354
 
4355
      /* execute instructions, until pc is set to -1 */
4356
      while (pc != -1) {
4357
 
4358
#if DEBUG_EXECUTION
4359
         uint i;
4360
 
4361
         tgsi_dump_instruction(&mach->Instructions[pc], inst++);
4362
#endif
4363
 
4364
         assert(pc < (int) mach->NumInstructions);
4365
         exec_instruction(mach, mach->Instructions + pc, &pc);
4366
 
4367
#if DEBUG_EXECUTION
4368
         for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
4369
            if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
4370
               uint j;
4371
 
4372
               memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
4373
               debug_printf("TEMP[%2u] = ", i);
4374
               for (j = 0; j < 4; j++) {
4375
                  if (j > 0) {
4376
                     debug_printf("           ");
4377
                  }
4378
                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4379
                               temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
4380
                               temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
4381
                               temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
4382
                               temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
4383
               }
4384
            }
4385
         }
4386
         for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
4387
            if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
4388
               uint j;
4389
 
4390
               memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
4391
               debug_printf("OUT[%2u] =  ", i);
4392
               for (j = 0; j < 4; j++) {
4393
                  if (j > 0) {
4394
                     debug_printf("           ");
4395
                  }
4396
                  debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
4397
                               outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
4398
                               outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
4399
                               outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
4400
                               outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
4401
               }
4402
            }
4403
         }
4404
#endif
4405
      }
4406
   }
4407
 
4408
#if 0
4409
   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
4410
   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
4411
      /*
4412
       * Scale back depth component.
4413
       */
4414
      for (i = 0; i < 4; i++)
4415
         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
4416
   }
4417
#endif
4418
 
4419
   /* Strictly speaking, these assertions aren't really needed but they
4420
    * can potentially catch some bugs in the control flow code.
4421
    */
4422
   assert(mach->CondStackTop == 0);
4423
   assert(mach->LoopStackTop == 0);
4424
   assert(mach->ContStackTop == 0);
4425
   assert(mach->SwitchStackTop == 0);
4426
   assert(mach->BreakStackTop == 0);
4427
   assert(mach->CallStackTop == 0);
4428
 
4429
   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4430
}