Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3770 | Serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
||
4 | * All Rights Reserved. |
||
5 | * Copyright 2009-2010 VMware, Inc. All rights Reserved. |
||
6 | * |
||
7 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
8 | * copy of this software and associated documentation files (the |
||
9 | * "Software"), to deal in the Software without restriction, including |
||
10 | * without limitation the rights to use, copy, modify, merge, publish, |
||
11 | * distribute, sub license, and/or sell copies of the Software, and to |
||
12 | * permit persons to whom the Software is furnished to do so, subject to |
||
13 | * the following conditions: |
||
14 | * |
||
15 | * The above copyright notice and this permission notice (including the |
||
16 | * next paragraph) shall be included in all copies or substantial portions |
||
17 | * of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
22 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
23 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
26 | * |
||
27 | **************************************************************************/ |
||
28 | |||
29 | /** |
||
30 | * TGSI interpreter/executor. |
||
31 | * |
||
32 | * Flow control information: |
||
33 | * |
||
34 | * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) |
||
35 | * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special |
||
36 | * care since a condition may be true for some quad components but false |
||
37 | * for other components. |
||
38 | * |
||
39 | * We basically execute all statements (even if they're in the part of |
||
40 | * an IF/ELSE clause that's "not taken") and use a special mask to |
||
41 | * control writing to destination registers. This is the ExecMask. |
||
42 | * See store_dest(). |
||
43 | * |
||
44 | * The ExecMask is computed from three other masks (CondMask, LoopMask and |
||
45 | * ContMask) which are controlled by the flow control instructions (namely: |
||
46 | * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). |
||
47 | * |
||
48 | * |
||
49 | * Authors: |
||
50 | * Michal Krol |
||
51 | * Brian Paul |
||
52 | */ |
||
53 | |||
54 | #include "pipe/p_compiler.h" |
||
55 | #include "pipe/p_state.h" |
||
56 | #include "pipe/p_shader_tokens.h" |
||
57 | #include "tgsi/tgsi_dump.h" |
||
58 | #include "tgsi/tgsi_parse.h" |
||
59 | #include "tgsi/tgsi_util.h" |
||
60 | #include "tgsi_exec.h" |
||
61 | #include "util/u_memory.h" |
||
62 | #include "util/u_math.h" |
||
63 | |||
64 | |||
65 | #define DEBUG_EXECUTION 0 |
||
66 | |||
67 | |||
68 | #define FAST_MATH 0 |
||
69 | |||
70 | #define TILE_TOP_LEFT 0 |
||
71 | #define TILE_TOP_RIGHT 1 |
||
72 | #define TILE_BOTTOM_LEFT 2 |
||
73 | #define TILE_BOTTOM_RIGHT 3 |
||
74 | |||
75 | static void |
||
76 | micro_abs(union tgsi_exec_channel *dst, |
||
77 | const union tgsi_exec_channel *src) |
||
78 | { |
||
79 | dst->f[0] = fabsf(src->f[0]); |
||
80 | dst->f[1] = fabsf(src->f[1]); |
||
81 | dst->f[2] = fabsf(src->f[2]); |
||
82 | dst->f[3] = fabsf(src->f[3]); |
||
83 | } |
||
84 | |||
85 | static void |
||
86 | micro_arl(union tgsi_exec_channel *dst, |
||
87 | const union tgsi_exec_channel *src) |
||
88 | { |
||
89 | dst->i[0] = (int)floorf(src->f[0]); |
||
90 | dst->i[1] = (int)floorf(src->f[1]); |
||
91 | dst->i[2] = (int)floorf(src->f[2]); |
||
92 | dst->i[3] = (int)floorf(src->f[3]); |
||
93 | } |
||
94 | |||
95 | static void |
||
96 | micro_arr(union tgsi_exec_channel *dst, |
||
97 | const union tgsi_exec_channel *src) |
||
98 | { |
||
99 | dst->i[0] = (int)floorf(src->f[0] + 0.5f); |
||
100 | dst->i[1] = (int)floorf(src->f[1] + 0.5f); |
||
101 | dst->i[2] = (int)floorf(src->f[2] + 0.5f); |
||
102 | dst->i[3] = (int)floorf(src->f[3] + 0.5f); |
||
103 | } |
||
104 | |||
105 | static void |
||
106 | micro_ceil(union tgsi_exec_channel *dst, |
||
107 | const union tgsi_exec_channel *src) |
||
108 | { |
||
109 | dst->f[0] = ceilf(src->f[0]); |
||
110 | dst->f[1] = ceilf(src->f[1]); |
||
111 | dst->f[2] = ceilf(src->f[2]); |
||
112 | dst->f[3] = ceilf(src->f[3]); |
||
113 | } |
||
114 | |||
115 | static void |
||
116 | micro_clamp(union tgsi_exec_channel *dst, |
||
117 | const union tgsi_exec_channel *src0, |
||
118 | const union tgsi_exec_channel *src1, |
||
119 | const union tgsi_exec_channel *src2) |
||
120 | { |
||
121 | dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; |
||
122 | dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; |
||
123 | dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; |
||
124 | dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; |
||
125 | } |
||
126 | |||
127 | static void |
||
128 | micro_cmp(union tgsi_exec_channel *dst, |
||
129 | const union tgsi_exec_channel *src0, |
||
130 | const union tgsi_exec_channel *src1, |
||
131 | const union tgsi_exec_channel *src2) |
||
132 | { |
||
133 | dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; |
||
134 | dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; |
||
135 | dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; |
||
136 | dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; |
||
137 | } |
||
138 | |||
139 | static void |
||
140 | micro_cnd(union tgsi_exec_channel *dst, |
||
141 | const union tgsi_exec_channel *src0, |
||
142 | const union tgsi_exec_channel *src1, |
||
143 | const union tgsi_exec_channel *src2) |
||
144 | { |
||
145 | dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; |
||
146 | dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; |
||
147 | dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; |
||
148 | dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; |
||
149 | } |
||
150 | |||
151 | static void |
||
152 | micro_cos(union tgsi_exec_channel *dst, |
||
153 | const union tgsi_exec_channel *src) |
||
154 | { |
||
155 | dst->f[0] = cosf(src->f[0]); |
||
156 | dst->f[1] = cosf(src->f[1]); |
||
157 | dst->f[2] = cosf(src->f[2]); |
||
158 | dst->f[3] = cosf(src->f[3]); |
||
159 | } |
||
160 | |||
161 | static void |
||
162 | micro_ddx(union tgsi_exec_channel *dst, |
||
163 | const union tgsi_exec_channel *src) |
||
164 | { |
||
165 | dst->f[0] = |
||
166 | dst->f[1] = |
||
167 | dst->f[2] = |
||
168 | dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; |
||
169 | } |
||
170 | |||
171 | static void |
||
172 | micro_ddy(union tgsi_exec_channel *dst, |
||
173 | const union tgsi_exec_channel *src) |
||
174 | { |
||
175 | dst->f[0] = |
||
176 | dst->f[1] = |
||
177 | dst->f[2] = |
||
178 | dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; |
||
179 | } |
||
180 | |||
181 | static void |
||
182 | micro_exp2(union tgsi_exec_channel *dst, |
||
183 | const union tgsi_exec_channel *src) |
||
184 | { |
||
185 | #if FAST_MATH |
||
186 | dst->f[0] = util_fast_exp2(src->f[0]); |
||
187 | dst->f[1] = util_fast_exp2(src->f[1]); |
||
188 | dst->f[2] = util_fast_exp2(src->f[2]); |
||
189 | dst->f[3] = util_fast_exp2(src->f[3]); |
||
190 | #else |
||
191 | #if DEBUG |
||
192 | /* Inf is okay for this instruction, so clamp it to silence assertions. */ |
||
193 | uint i; |
||
194 | union tgsi_exec_channel clamped; |
||
195 | |||
196 | for (i = 0; i < 4; i++) { |
||
197 | if (src->f[i] > 127.99999f) { |
||
198 | clamped.f[i] = 127.99999f; |
||
199 | } else if (src->f[i] < -126.99999f) { |
||
200 | clamped.f[i] = -126.99999f; |
||
201 | } else { |
||
202 | clamped.f[i] = src->f[i]; |
||
203 | } |
||
204 | } |
||
205 | src = &clamped; |
||
206 | #endif /* DEBUG */ |
||
207 | |||
208 | dst->f[0] = powf(2.0f, src->f[0]); |
||
209 | dst->f[1] = powf(2.0f, src->f[1]); |
||
210 | dst->f[2] = powf(2.0f, src->f[2]); |
||
211 | dst->f[3] = powf(2.0f, src->f[3]); |
||
212 | #endif /* FAST_MATH */ |
||
213 | } |
||
214 | |||
215 | static void |
||
216 | micro_flr(union tgsi_exec_channel *dst, |
||
217 | const union tgsi_exec_channel *src) |
||
218 | { |
||
219 | dst->f[0] = floorf(src->f[0]); |
||
220 | dst->f[1] = floorf(src->f[1]); |
||
221 | dst->f[2] = floorf(src->f[2]); |
||
222 | dst->f[3] = floorf(src->f[3]); |
||
223 | } |
||
224 | |||
225 | static void |
||
226 | micro_frc(union tgsi_exec_channel *dst, |
||
227 | const union tgsi_exec_channel *src) |
||
228 | { |
||
229 | dst->f[0] = src->f[0] - floorf(src->f[0]); |
||
230 | dst->f[1] = src->f[1] - floorf(src->f[1]); |
||
231 | dst->f[2] = src->f[2] - floorf(src->f[2]); |
||
232 | dst->f[3] = src->f[3] - floorf(src->f[3]); |
||
233 | } |
||
234 | |||
235 | static void |
||
236 | micro_iabs(union tgsi_exec_channel *dst, |
||
237 | const union tgsi_exec_channel *src) |
||
238 | { |
||
239 | dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; |
||
240 | dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; |
||
241 | dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; |
||
242 | dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; |
||
243 | } |
||
244 | |||
245 | static void |
||
246 | micro_ineg(union tgsi_exec_channel *dst, |
||
247 | const union tgsi_exec_channel *src) |
||
248 | { |
||
249 | dst->i[0] = -src->i[0]; |
||
250 | dst->i[1] = -src->i[1]; |
||
251 | dst->i[2] = -src->i[2]; |
||
252 | dst->i[3] = -src->i[3]; |
||
253 | } |
||
254 | |||
255 | static void |
||
256 | micro_lg2(union tgsi_exec_channel *dst, |
||
257 | const union tgsi_exec_channel *src) |
||
258 | { |
||
259 | #if FAST_MATH |
||
260 | dst->f[0] = util_fast_log2(src->f[0]); |
||
261 | dst->f[1] = util_fast_log2(src->f[1]); |
||
262 | dst->f[2] = util_fast_log2(src->f[2]); |
||
263 | dst->f[3] = util_fast_log2(src->f[3]); |
||
264 | #else |
||
265 | dst->f[0] = logf(src->f[0]) * 1.442695f; |
||
266 | dst->f[1] = logf(src->f[1]) * 1.442695f; |
||
267 | dst->f[2] = logf(src->f[2]) * 1.442695f; |
||
268 | dst->f[3] = logf(src->f[3]) * 1.442695f; |
||
269 | #endif |
||
270 | } |
||
271 | |||
272 | static void |
||
273 | micro_lrp(union tgsi_exec_channel *dst, |
||
274 | const union tgsi_exec_channel *src0, |
||
275 | const union tgsi_exec_channel *src1, |
||
276 | const union tgsi_exec_channel *src2) |
||
277 | { |
||
278 | dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; |
||
279 | dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; |
||
280 | dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; |
||
281 | dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; |
||
282 | } |
||
283 | |||
284 | static void |
||
285 | micro_mad(union tgsi_exec_channel *dst, |
||
286 | const union tgsi_exec_channel *src0, |
||
287 | const union tgsi_exec_channel *src1, |
||
288 | const union tgsi_exec_channel *src2) |
||
289 | { |
||
290 | dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; |
||
291 | dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; |
||
292 | dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; |
||
293 | dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; |
||
294 | } |
||
295 | |||
296 | static void |
||
297 | micro_mov(union tgsi_exec_channel *dst, |
||
298 | const union tgsi_exec_channel *src) |
||
299 | { |
||
300 | dst->u[0] = src->u[0]; |
||
301 | dst->u[1] = src->u[1]; |
||
302 | dst->u[2] = src->u[2]; |
||
303 | dst->u[3] = src->u[3]; |
||
304 | } |
||
305 | |||
306 | static void |
||
307 | micro_rcp(union tgsi_exec_channel *dst, |
||
308 | const union tgsi_exec_channel *src) |
||
309 | { |
||
310 | #if 0 /* for debugging */ |
||
311 | assert(src->f[0] != 0.0f); |
||
312 | assert(src->f[1] != 0.0f); |
||
313 | assert(src->f[2] != 0.0f); |
||
314 | assert(src->f[3] != 0.0f); |
||
315 | #endif |
||
316 | dst->f[0] = 1.0f / src->f[0]; |
||
317 | dst->f[1] = 1.0f / src->f[1]; |
||
318 | dst->f[2] = 1.0f / src->f[2]; |
||
319 | dst->f[3] = 1.0f / src->f[3]; |
||
320 | } |
||
321 | |||
322 | static void |
||
323 | micro_rnd(union tgsi_exec_channel *dst, |
||
324 | const union tgsi_exec_channel *src) |
||
325 | { |
||
326 | dst->f[0] = floorf(src->f[0] + 0.5f); |
||
327 | dst->f[1] = floorf(src->f[1] + 0.5f); |
||
328 | dst->f[2] = floorf(src->f[2] + 0.5f); |
||
329 | dst->f[3] = floorf(src->f[3] + 0.5f); |
||
330 | } |
||
331 | |||
332 | static void |
||
333 | micro_rsq(union tgsi_exec_channel *dst, |
||
334 | const union tgsi_exec_channel *src) |
||
335 | { |
||
336 | #if 0 /* for debugging */ |
||
337 | assert(src->f[0] != 0.0f); |
||
338 | assert(src->f[1] != 0.0f); |
||
339 | assert(src->f[2] != 0.0f); |
||
340 | assert(src->f[3] != 0.0f); |
||
341 | #endif |
||
342 | dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); |
||
343 | dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); |
||
344 | dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); |
||
345 | dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); |
||
346 | } |
||
347 | |||
348 | static void |
||
349 | micro_sqrt(union tgsi_exec_channel *dst, |
||
350 | const union tgsi_exec_channel *src) |
||
351 | { |
||
352 | dst->f[0] = sqrtf(fabsf(src->f[0])); |
||
353 | dst->f[1] = sqrtf(fabsf(src->f[1])); |
||
354 | dst->f[2] = sqrtf(fabsf(src->f[2])); |
||
355 | dst->f[3] = sqrtf(fabsf(src->f[3])); |
||
356 | } |
||
357 | |||
358 | static void |
||
359 | micro_seq(union tgsi_exec_channel *dst, |
||
360 | const union tgsi_exec_channel *src0, |
||
361 | const union tgsi_exec_channel *src1) |
||
362 | { |
||
363 | dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; |
||
364 | dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; |
||
365 | dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; |
||
366 | dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; |
||
367 | } |
||
368 | |||
369 | static void |
||
370 | micro_sge(union tgsi_exec_channel *dst, |
||
371 | const union tgsi_exec_channel *src0, |
||
372 | const union tgsi_exec_channel *src1) |
||
373 | { |
||
374 | dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; |
||
375 | dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; |
||
376 | dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; |
||
377 | dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; |
||
378 | } |
||
379 | |||
380 | static void |
||
381 | micro_sgn(union tgsi_exec_channel *dst, |
||
382 | const union tgsi_exec_channel *src) |
||
383 | { |
||
384 | dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; |
||
385 | dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; |
||
386 | dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; |
||
387 | dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; |
||
388 | } |
||
389 | |||
390 | static void |
||
391 | micro_isgn(union tgsi_exec_channel *dst, |
||
392 | const union tgsi_exec_channel *src) |
||
393 | { |
||
394 | dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; |
||
395 | dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; |
||
396 | dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; |
||
397 | dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; |
||
398 | } |
||
399 | |||
400 | static void |
||
401 | micro_sgt(union tgsi_exec_channel *dst, |
||
402 | const union tgsi_exec_channel *src0, |
||
403 | const union tgsi_exec_channel *src1) |
||
404 | { |
||
405 | dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; |
||
406 | dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; |
||
407 | dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; |
||
408 | dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; |
||
409 | } |
||
410 | |||
411 | static void |
||
412 | micro_sin(union tgsi_exec_channel *dst, |
||
413 | const union tgsi_exec_channel *src) |
||
414 | { |
||
415 | dst->f[0] = sinf(src->f[0]); |
||
416 | dst->f[1] = sinf(src->f[1]); |
||
417 | dst->f[2] = sinf(src->f[2]); |
||
418 | dst->f[3] = sinf(src->f[3]); |
||
419 | } |
||
420 | |||
421 | static void |
||
422 | micro_sle(union tgsi_exec_channel *dst, |
||
423 | const union tgsi_exec_channel *src0, |
||
424 | const union tgsi_exec_channel *src1) |
||
425 | { |
||
426 | dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; |
||
427 | dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; |
||
428 | dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; |
||
429 | dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; |
||
430 | } |
||
431 | |||
432 | static void |
||
433 | micro_slt(union tgsi_exec_channel *dst, |
||
434 | const union tgsi_exec_channel *src0, |
||
435 | const union tgsi_exec_channel *src1) |
||
436 | { |
||
437 | dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; |
||
438 | dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; |
||
439 | dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; |
||
440 | dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; |
||
441 | } |
||
442 | |||
443 | static void |
||
444 | micro_sne(union tgsi_exec_channel *dst, |
||
445 | const union tgsi_exec_channel *src0, |
||
446 | const union tgsi_exec_channel *src1) |
||
447 | { |
||
448 | dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; |
||
449 | dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; |
||
450 | dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; |
||
451 | dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; |
||
452 | } |
||
453 | |||
454 | static void |
||
455 | micro_sfl(union tgsi_exec_channel *dst) |
||
456 | { |
||
457 | dst->f[0] = 0.0f; |
||
458 | dst->f[1] = 0.0f; |
||
459 | dst->f[2] = 0.0f; |
||
460 | dst->f[3] = 0.0f; |
||
461 | } |
||
462 | |||
463 | static void |
||
464 | micro_str(union tgsi_exec_channel *dst) |
||
465 | { |
||
466 | dst->f[0] = 1.0f; |
||
467 | dst->f[1] = 1.0f; |
||
468 | dst->f[2] = 1.0f; |
||
469 | dst->f[3] = 1.0f; |
||
470 | } |
||
471 | |||
472 | static void |
||
473 | micro_trunc(union tgsi_exec_channel *dst, |
||
474 | const union tgsi_exec_channel *src) |
||
475 | { |
||
476 | dst->f[0] = (float)(int)src->f[0]; |
||
477 | dst->f[1] = (float)(int)src->f[1]; |
||
478 | dst->f[2] = (float)(int)src->f[2]; |
||
479 | dst->f[3] = (float)(int)src->f[3]; |
||
480 | } |
||
481 | |||
482 | |||
483 | enum tgsi_exec_datatype { |
||
484 | TGSI_EXEC_DATA_FLOAT, |
||
485 | TGSI_EXEC_DATA_INT, |
||
486 | TGSI_EXEC_DATA_UINT |
||
487 | }; |
||
488 | |||
489 | /* |
||
490 | * Shorthand locations of various utility registers (_I = Index, _C = Channel) |
||
491 | */ |
||
492 | #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I |
||
493 | #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C |
||
494 | #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I |
||
495 | #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C |
||
496 | #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I |
||
497 | #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C |
||
498 | |||
499 | |||
500 | /** The execution mask depends on the conditional mask and the loop mask */ |
||
501 | #define UPDATE_EXEC_MASK(MACH) \ |
||
502 | MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask |
||
503 | |||
504 | |||
505 | static const union tgsi_exec_channel ZeroVec = |
||
506 | { { 0.0, 0.0, 0.0, 0.0 } }; |
||
507 | |||
508 | static const union tgsi_exec_channel OneVec = { |
||
509 | {1.0f, 1.0f, 1.0f, 1.0f} |
||
510 | }; |
||
511 | |||
512 | static const union tgsi_exec_channel P128Vec = { |
||
513 | {128.0f, 128.0f, 128.0f, 128.0f} |
||
514 | }; |
||
515 | |||
516 | static const union tgsi_exec_channel M128Vec = { |
||
517 | {-128.0f, -128.0f, -128.0f, -128.0f} |
||
518 | }; |
||
519 | |||
520 | |||
521 | /** |
||
522 | * Assert that none of the float values in 'chan' are infinite or NaN. |
||
523 | * NaN and Inf may occur normally during program execution and should |
||
524 | * not lead to crashes, etc. But when debugging, it's helpful to catch |
||
525 | * them. |
||
526 | */ |
||
527 | static INLINE void |
||
528 | check_inf_or_nan(const union tgsi_exec_channel *chan) |
||
529 | { |
||
530 | assert(!util_is_inf_or_nan((chan)->f[0])); |
||
531 | assert(!util_is_inf_or_nan((chan)->f[1])); |
||
532 | assert(!util_is_inf_or_nan((chan)->f[2])); |
||
533 | assert(!util_is_inf_or_nan((chan)->f[3])); |
||
534 | } |
||
535 | |||
536 | |||
537 | #ifdef DEBUG |
||
538 | static void |
||
539 | print_chan(const char *msg, const union tgsi_exec_channel *chan) |
||
540 | { |
||
541 | debug_printf("%s = {%f, %f, %f, %f}\n", |
||
542 | msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); |
||
543 | } |
||
544 | #endif |
||
545 | |||
546 | |||
547 | #ifdef DEBUG |
||
548 | static void |
||
549 | print_temp(const struct tgsi_exec_machine *mach, uint index) |
||
550 | { |
||
551 | const struct tgsi_exec_vector *tmp = &mach->Temps[index]; |
||
552 | int i; |
||
553 | debug_printf("Temp[%u] =\n", index); |
||
554 | for (i = 0; i < 4; i++) { |
||
555 | debug_printf(" %c: { %f, %f, %f, %f }\n", |
||
556 | "XYZW"[i], |
||
557 | tmp->xyzw[i].f[0], |
||
558 | tmp->xyzw[i].f[1], |
||
559 | tmp->xyzw[i].f[2], |
||
560 | tmp->xyzw[i].f[3]); |
||
561 | } |
||
562 | } |
||
563 | #endif |
||
564 | |||
565 | |||
566 | void |
||
567 | tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, |
||
568 | unsigned num_bufs, |
||
569 | const void **bufs, |
||
570 | const unsigned *buf_sizes) |
||
571 | { |
||
572 | unsigned i; |
||
573 | |||
574 | for (i = 0; i < num_bufs; i++) { |
||
575 | mach->Consts[i] = bufs[i]; |
||
576 | mach->ConstsSize[i] = buf_sizes[i]; |
||
577 | } |
||
578 | } |
||
579 | |||
580 | |||
581 | /** |
||
582 | * Check if there's a potential src/dst register data dependency when |
||
583 | * using SOA execution. |
||
584 | * Example: |
||
585 | * MOV T, T.yxwz; |
||
586 | * This would expand into: |
||
587 | * MOV t0, t1; |
||
588 | * MOV t1, t0; |
||
589 | * MOV t2, t3; |
||
590 | * MOV t3, t2; |
||
591 | * The second instruction will have the wrong value for t0 if executed as-is. |
||
592 | */ |
||
593 | boolean |
||
594 | tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) |
||
595 | { |
||
596 | uint i, chan; |
||
597 | |||
598 | uint writemask = inst->Dst[0].Register.WriteMask; |
||
599 | if (writemask == TGSI_WRITEMASK_X || |
||
600 | writemask == TGSI_WRITEMASK_Y || |
||
601 | writemask == TGSI_WRITEMASK_Z || |
||
602 | writemask == TGSI_WRITEMASK_W || |
||
603 | writemask == TGSI_WRITEMASK_NONE) { |
||
604 | /* no chance of data dependency */ |
||
605 | return FALSE; |
||
606 | } |
||
607 | |||
608 | /* loop over src regs */ |
||
609 | for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { |
||
610 | if ((inst->Src[i].Register.File == |
||
611 | inst->Dst[0].Register.File) && |
||
612 | ((inst->Src[i].Register.Index == |
||
613 | inst->Dst[0].Register.Index) || |
||
614 | inst->Src[i].Register.Indirect || |
||
615 | inst->Dst[0].Register.Indirect)) { |
||
616 | /* loop over dest channels */ |
||
617 | uint channelsWritten = 0x0; |
||
618 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
619 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
620 | /* check if we're reading a channel that's been written */ |
||
621 | uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); |
||
622 | if (channelsWritten & (1 << swizzle)) { |
||
623 | return TRUE; |
||
624 | } |
||
625 | |||
626 | channelsWritten |= (1 << chan); |
||
627 | } |
||
628 | } |
||
629 | } |
||
630 | } |
||
631 | return FALSE; |
||
632 | } |
||
633 | |||
634 | |||
635 | /** |
||
636 | * Initialize machine state by expanding tokens to full instructions, |
||
637 | * allocating temporary storage, setting up constants, etc. |
||
638 | * After this, we can call tgsi_exec_machine_run() many times. |
||
639 | */ |
||
640 | void |
||
641 | tgsi_exec_machine_bind_shader( |
||
642 | struct tgsi_exec_machine *mach, |
||
643 | const struct tgsi_token *tokens, |
||
644 | struct tgsi_sampler *sampler) |
||
645 | { |
||
646 | uint k; |
||
647 | struct tgsi_parse_context parse; |
||
648 | struct tgsi_full_instruction *instructions; |
||
649 | struct tgsi_full_declaration *declarations; |
||
650 | uint maxInstructions = 10, numInstructions = 0; |
||
651 | uint maxDeclarations = 10, numDeclarations = 0; |
||
652 | |||
653 | #if 0 |
||
654 | tgsi_dump(tokens, 0); |
||
655 | #endif |
||
656 | |||
657 | util_init_math(); |
||
658 | |||
659 | |||
660 | mach->Tokens = tokens; |
||
661 | mach->Sampler = sampler; |
||
662 | |||
663 | if (!tokens) { |
||
664 | /* unbind and free all */ |
||
665 | FREE(mach->Declarations); |
||
666 | mach->Declarations = NULL; |
||
667 | mach->NumDeclarations = 0; |
||
668 | |||
669 | FREE(mach->Instructions); |
||
670 | mach->Instructions = NULL; |
||
671 | mach->NumInstructions = 0; |
||
672 | |||
673 | return; |
||
674 | } |
||
675 | |||
676 | k = tgsi_parse_init (&parse, mach->Tokens); |
||
677 | if (k != TGSI_PARSE_OK) { |
||
678 | debug_printf( "Problem parsing!\n" ); |
||
679 | return; |
||
680 | } |
||
681 | |||
682 | mach->Processor = parse.FullHeader.Processor.Processor; |
||
683 | mach->ImmLimit = 0; |
||
684 | mach->NumOutputs = 0; |
||
685 | |||
686 | if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && |
||
687 | !mach->UsedGeometryShader) { |
||
688 | struct tgsi_exec_vector *inputs; |
||
689 | struct tgsi_exec_vector *outputs; |
||
690 | |||
691 | inputs = align_malloc(sizeof(struct tgsi_exec_vector) * |
||
692 | TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS, |
||
693 | 16); |
||
694 | |||
695 | if (!inputs) |
||
696 | return; |
||
697 | |||
698 | outputs = align_malloc(sizeof(struct tgsi_exec_vector) * |
||
699 | TGSI_MAX_TOTAL_VERTICES, 16); |
||
700 | |||
701 | if (!outputs) { |
||
702 | align_free(inputs); |
||
703 | return; |
||
704 | } |
||
705 | |||
706 | align_free(mach->Inputs); |
||
707 | align_free(mach->Outputs); |
||
708 | |||
709 | mach->Inputs = inputs; |
||
710 | mach->Outputs = outputs; |
||
711 | mach->UsedGeometryShader = TRUE; |
||
712 | } |
||
713 | |||
714 | declarations = (struct tgsi_full_declaration *) |
||
715 | MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); |
||
716 | |||
717 | if (!declarations) { |
||
718 | return; |
||
719 | } |
||
720 | |||
721 | instructions = (struct tgsi_full_instruction *) |
||
722 | MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); |
||
723 | |||
724 | if (!instructions) { |
||
725 | FREE( declarations ); |
||
726 | return; |
||
727 | } |
||
728 | |||
729 | while( !tgsi_parse_end_of_tokens( &parse ) ) { |
||
730 | uint i; |
||
731 | |||
732 | tgsi_parse_token( &parse ); |
||
733 | switch( parse.FullToken.Token.Type ) { |
||
734 | case TGSI_TOKEN_TYPE_DECLARATION: |
||
735 | /* save expanded declaration */ |
||
736 | if (numDeclarations == maxDeclarations) { |
||
737 | declarations = REALLOC(declarations, |
||
738 | maxDeclarations |
||
739 | * sizeof(struct tgsi_full_declaration), |
||
740 | (maxDeclarations + 10) |
||
741 | * sizeof(struct tgsi_full_declaration)); |
||
742 | maxDeclarations += 10; |
||
743 | } |
||
744 | if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { |
||
745 | unsigned reg; |
||
746 | for (reg = parse.FullToken.FullDeclaration.Range.First; |
||
747 | reg <= parse.FullToken.FullDeclaration.Range.Last; |
||
748 | ++reg) { |
||
749 | ++mach->NumOutputs; |
||
750 | } |
||
751 | } |
||
752 | memcpy(declarations + numDeclarations, |
||
753 | &parse.FullToken.FullDeclaration, |
||
754 | sizeof(declarations[0])); |
||
755 | numDeclarations++; |
||
756 | break; |
||
757 | |||
758 | case TGSI_TOKEN_TYPE_IMMEDIATE: |
||
759 | { |
||
760 | uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; |
||
761 | assert( size <= 4 ); |
||
762 | assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); |
||
763 | |||
764 | for( i = 0; i < size; i++ ) { |
||
765 | mach->Imms[mach->ImmLimit][i] = |
||
766 | parse.FullToken.FullImmediate.u[i].Float; |
||
767 | } |
||
768 | mach->ImmLimit += 1; |
||
769 | } |
||
770 | break; |
||
771 | |||
772 | case TGSI_TOKEN_TYPE_INSTRUCTION: |
||
773 | |||
774 | /* save expanded instruction */ |
||
775 | if (numInstructions == maxInstructions) { |
||
776 | instructions = REALLOC(instructions, |
||
777 | maxInstructions |
||
778 | * sizeof(struct tgsi_full_instruction), |
||
779 | (maxInstructions + 10) |
||
780 | * sizeof(struct tgsi_full_instruction)); |
||
781 | maxInstructions += 10; |
||
782 | } |
||
783 | |||
784 | memcpy(instructions + numInstructions, |
||
785 | &parse.FullToken.FullInstruction, |
||
786 | sizeof(instructions[0])); |
||
787 | |||
788 | numInstructions++; |
||
789 | break; |
||
790 | |||
791 | case TGSI_TOKEN_TYPE_PROPERTY: |
||
792 | break; |
||
793 | |||
794 | default: |
||
795 | assert( 0 ); |
||
796 | } |
||
797 | } |
||
798 | tgsi_parse_free (&parse); |
||
799 | |||
800 | FREE(mach->Declarations); |
||
801 | mach->Declarations = declarations; |
||
802 | mach->NumDeclarations = numDeclarations; |
||
803 | |||
804 | FREE(mach->Instructions); |
||
805 | mach->Instructions = instructions; |
||
806 | mach->NumInstructions = numInstructions; |
||
807 | } |
||
808 | |||
809 | |||
810 | struct tgsi_exec_machine * |
||
811 | tgsi_exec_machine_create( void ) |
||
812 | { |
||
813 | struct tgsi_exec_machine *mach; |
||
814 | uint i; |
||
815 | |||
816 | mach = align_malloc( sizeof *mach, 16 ); |
||
817 | if (!mach) |
||
818 | goto fail; |
||
819 | |||
820 | memset(mach, 0, sizeof(*mach)); |
||
821 | |||
822 | mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; |
||
823 | mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; |
||
824 | mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; |
||
825 | |||
826 | mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); |
||
827 | mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); |
||
828 | if (!mach->Inputs || !mach->Outputs) |
||
829 | goto fail; |
||
830 | |||
831 | /* Setup constants needed by the SSE2 executor. */ |
||
832 | for( i = 0; i < 4; i++ ) { |
||
833 | mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; |
||
834 | mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; |
||
835 | mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; |
||
836 | mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ |
||
837 | mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; |
||
838 | mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ |
||
839 | mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; |
||
840 | mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; |
||
841 | mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; |
||
842 | mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; |
||
843 | } |
||
844 | |||
845 | #ifdef DEBUG |
||
846 | /* silence warnings */ |
||
847 | (void) print_chan; |
||
848 | (void) print_temp; |
||
849 | #endif |
||
850 | |||
851 | return mach; |
||
852 | |||
853 | fail: |
||
854 | if (mach) { |
||
855 | align_free(mach->Inputs); |
||
856 | align_free(mach->Outputs); |
||
857 | align_free(mach); |
||
858 | } |
||
859 | return NULL; |
||
860 | } |
||
861 | |||
862 | |||
863 | void |
||
864 | tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) |
||
865 | { |
||
866 | if (mach) { |
||
867 | FREE(mach->Instructions); |
||
868 | FREE(mach->Declarations); |
||
869 | |||
870 | align_free(mach->Inputs); |
||
871 | align_free(mach->Outputs); |
||
872 | |||
873 | align_free(mach); |
||
874 | } |
||
875 | } |
||
876 | |||
877 | static void |
||
878 | micro_add(union tgsi_exec_channel *dst, |
||
879 | const union tgsi_exec_channel *src0, |
||
880 | const union tgsi_exec_channel *src1) |
||
881 | { |
||
882 | dst->f[0] = src0->f[0] + src1->f[0]; |
||
883 | dst->f[1] = src0->f[1] + src1->f[1]; |
||
884 | dst->f[2] = src0->f[2] + src1->f[2]; |
||
885 | dst->f[3] = src0->f[3] + src1->f[3]; |
||
886 | } |
||
887 | |||
888 | static void |
||
889 | micro_div( |
||
890 | union tgsi_exec_channel *dst, |
||
891 | const union tgsi_exec_channel *src0, |
||
892 | const union tgsi_exec_channel *src1 ) |
||
893 | { |
||
894 | if (src1->f[0] != 0) { |
||
895 | dst->f[0] = src0->f[0] / src1->f[0]; |
||
896 | } |
||
897 | if (src1->f[1] != 0) { |
||
898 | dst->f[1] = src0->f[1] / src1->f[1]; |
||
899 | } |
||
900 | if (src1->f[2] != 0) { |
||
901 | dst->f[2] = src0->f[2] / src1->f[2]; |
||
902 | } |
||
903 | if (src1->f[3] != 0) { |
||
904 | dst->f[3] = src0->f[3] / src1->f[3]; |
||
905 | } |
||
906 | } |
||
907 | |||
908 | static void |
||
909 | micro_rcc(union tgsi_exec_channel *dst, |
||
910 | const union tgsi_exec_channel *src) |
||
911 | { |
||
912 | uint i; |
||
913 | |||
914 | for (i = 0; i < 4; i++) { |
||
915 | float recip = 1.0f / src->f[i]; |
||
916 | |||
917 | if (recip > 0.0f) { |
||
918 | if (recip > 1.884467e+019f) { |
||
919 | dst->f[i] = 1.884467e+019f; |
||
920 | } |
||
921 | else if (recip < 5.42101e-020f) { |
||
922 | dst->f[i] = 5.42101e-020f; |
||
923 | } |
||
924 | else { |
||
925 | dst->f[i] = recip; |
||
926 | } |
||
927 | } |
||
928 | else { |
||
929 | if (recip < -1.884467e+019f) { |
||
930 | dst->f[i] = -1.884467e+019f; |
||
931 | } |
||
932 | else if (recip > -5.42101e-020f) { |
||
933 | dst->f[i] = -5.42101e-020f; |
||
934 | } |
||
935 | else { |
||
936 | dst->f[i] = recip; |
||
937 | } |
||
938 | } |
||
939 | } |
||
940 | } |
||
941 | |||
942 | static void |
||
943 | micro_lt( |
||
944 | union tgsi_exec_channel *dst, |
||
945 | const union tgsi_exec_channel *src0, |
||
946 | const union tgsi_exec_channel *src1, |
||
947 | const union tgsi_exec_channel *src2, |
||
948 | const union tgsi_exec_channel *src3 ) |
||
949 | { |
||
950 | dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; |
||
951 | dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; |
||
952 | dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; |
||
953 | dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; |
||
954 | } |
||
955 | |||
956 | static void |
||
957 | micro_max(union tgsi_exec_channel *dst, |
||
958 | const union tgsi_exec_channel *src0, |
||
959 | const union tgsi_exec_channel *src1) |
||
960 | { |
||
961 | dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; |
||
962 | dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; |
||
963 | dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; |
||
964 | dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; |
||
965 | } |
||
966 | |||
967 | static void |
||
968 | micro_min(union tgsi_exec_channel *dst, |
||
969 | const union tgsi_exec_channel *src0, |
||
970 | const union tgsi_exec_channel *src1) |
||
971 | { |
||
972 | dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; |
||
973 | dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; |
||
974 | dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; |
||
975 | dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; |
||
976 | } |
||
977 | |||
978 | static void |
||
979 | micro_mul(union tgsi_exec_channel *dst, |
||
980 | const union tgsi_exec_channel *src0, |
||
981 | const union tgsi_exec_channel *src1) |
||
982 | { |
||
983 | dst->f[0] = src0->f[0] * src1->f[0]; |
||
984 | dst->f[1] = src0->f[1] * src1->f[1]; |
||
985 | dst->f[2] = src0->f[2] * src1->f[2]; |
||
986 | dst->f[3] = src0->f[3] * src1->f[3]; |
||
987 | } |
||
988 | |||
989 | static void |
||
990 | micro_neg( |
||
991 | union tgsi_exec_channel *dst, |
||
992 | const union tgsi_exec_channel *src ) |
||
993 | { |
||
994 | dst->f[0] = -src->f[0]; |
||
995 | dst->f[1] = -src->f[1]; |
||
996 | dst->f[2] = -src->f[2]; |
||
997 | dst->f[3] = -src->f[3]; |
||
998 | } |
||
999 | |||
1000 | static void |
||
1001 | micro_pow( |
||
1002 | union tgsi_exec_channel *dst, |
||
1003 | const union tgsi_exec_channel *src0, |
||
1004 | const union tgsi_exec_channel *src1 ) |
||
1005 | { |
||
1006 | #if FAST_MATH |
||
1007 | dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); |
||
1008 | dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); |
||
1009 | dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); |
||
1010 | dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); |
||
1011 | #else |
||
1012 | dst->f[0] = powf( src0->f[0], src1->f[0] ); |
||
1013 | dst->f[1] = powf( src0->f[1], src1->f[1] ); |
||
1014 | dst->f[2] = powf( src0->f[2], src1->f[2] ); |
||
1015 | dst->f[3] = powf( src0->f[3], src1->f[3] ); |
||
1016 | #endif |
||
1017 | } |
||
1018 | |||
1019 | static void |
||
1020 | micro_sub(union tgsi_exec_channel *dst, |
||
1021 | const union tgsi_exec_channel *src0, |
||
1022 | const union tgsi_exec_channel *src1) |
||
1023 | { |
||
1024 | dst->f[0] = src0->f[0] - src1->f[0]; |
||
1025 | dst->f[1] = src0->f[1] - src1->f[1]; |
||
1026 | dst->f[2] = src0->f[2] - src1->f[2]; |
||
1027 | dst->f[3] = src0->f[3] - src1->f[3]; |
||
1028 | } |
||
1029 | |||
1030 | static void |
||
1031 | fetch_src_file_channel(const struct tgsi_exec_machine *mach, |
||
1032 | const uint chan_index, |
||
1033 | const uint file, |
||
1034 | const uint swizzle, |
||
1035 | const union tgsi_exec_channel *index, |
||
1036 | const union tgsi_exec_channel *index2D, |
||
1037 | union tgsi_exec_channel *chan) |
||
1038 | { |
||
1039 | uint i; |
||
1040 | |||
1041 | assert(swizzle < 4); |
||
1042 | |||
1043 | switch (file) { |
||
1044 | case TGSI_FILE_CONSTANT: |
||
1045 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1046 | assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); |
||
1047 | assert(mach->Consts[index2D->i[i]]); |
||
1048 | |||
1049 | if (index->i[i] < 0) { |
||
1050 | chan->u[i] = 0; |
||
1051 | } else { |
||
1052 | /* NOTE: copying the const value as a uint instead of float */ |
||
1053 | const uint constbuf = index2D->i[i]; |
||
1054 | const uint *buf = (const uint *)mach->Consts[constbuf]; |
||
1055 | const int pos = index->i[i] * 4 + swizzle; |
||
1056 | /* const buffer bounds check */ |
||
1057 | if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) { |
||
1058 | if (0) { |
||
1059 | /* Debug: print warning */ |
||
1060 | static int count = 0; |
||
1061 | if (count++ < 100) |
||
1062 | debug_printf("TGSI Exec: const buffer index %d" |
||
1063 | " out of bounds\n", pos); |
||
1064 | } |
||
1065 | chan->u[i] = 0; |
||
1066 | } |
||
1067 | else |
||
1068 | chan->u[i] = buf[pos]; |
||
1069 | } |
||
1070 | } |
||
1071 | break; |
||
1072 | |||
1073 | case TGSI_FILE_INPUT: |
||
1074 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1075 | /* |
||
1076 | if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { |
||
1077 | debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", |
||
1078 | index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], |
||
1079 | index2D->i[i], index->i[i]); |
||
1080 | }*/ |
||
1081 | int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; |
||
1082 | assert(pos >= 0); |
||
1083 | assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); |
||
1084 | chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; |
||
1085 | } |
||
1086 | break; |
||
1087 | |||
1088 | case TGSI_FILE_SYSTEM_VALUE: |
||
1089 | /* XXX no swizzling at this point. Will be needed if we put |
||
1090 | * gl_FragCoord, for example, in a sys value register. |
||
1091 | */ |
||
1092 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1093 | chan->u[i] = mach->SystemValue[index->i[i]].u[i]; |
||
1094 | } |
||
1095 | break; |
||
1096 | |||
1097 | case TGSI_FILE_TEMPORARY: |
||
1098 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1099 | assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); |
||
1100 | assert(index2D->i[i] == 0); |
||
1101 | |||
1102 | chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; |
||
1103 | } |
||
1104 | break; |
||
1105 | |||
1106 | case TGSI_FILE_IMMEDIATE: |
||
1107 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1108 | assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); |
||
1109 | assert(index2D->i[i] == 0); |
||
1110 | |||
1111 | chan->f[i] = mach->Imms[index->i[i]][swizzle]; |
||
1112 | } |
||
1113 | break; |
||
1114 | |||
1115 | case TGSI_FILE_ADDRESS: |
||
1116 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1117 | assert(index->i[i] >= 0); |
||
1118 | assert(index2D->i[i] == 0); |
||
1119 | |||
1120 | chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; |
||
1121 | } |
||
1122 | break; |
||
1123 | |||
1124 | case TGSI_FILE_PREDICATE: |
||
1125 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1126 | assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); |
||
1127 | assert(index2D->i[i] == 0); |
||
1128 | |||
1129 | chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; |
||
1130 | } |
||
1131 | break; |
||
1132 | |||
1133 | case TGSI_FILE_OUTPUT: |
||
1134 | /* vertex/fragment output vars can be read too */ |
||
1135 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1136 | assert(index->i[i] >= 0); |
||
1137 | assert(index2D->i[i] == 0); |
||
1138 | |||
1139 | chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; |
||
1140 | } |
||
1141 | break; |
||
1142 | |||
1143 | default: |
||
1144 | assert(0); |
||
1145 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1146 | chan->u[i] = 0; |
||
1147 | } |
||
1148 | } |
||
1149 | } |
||
1150 | |||
1151 | static void |
||
1152 | fetch_source(const struct tgsi_exec_machine *mach, |
||
1153 | union tgsi_exec_channel *chan, |
||
1154 | const struct tgsi_full_src_register *reg, |
||
1155 | const uint chan_index, |
||
1156 | enum tgsi_exec_datatype src_datatype) |
||
1157 | { |
||
1158 | union tgsi_exec_channel index; |
||
1159 | union tgsi_exec_channel index2D; |
||
1160 | uint swizzle; |
||
1161 | |||
1162 | /* We start with a direct index into a register file. |
||
1163 | * |
||
1164 | * file[1], |
||
1165 | * where: |
||
1166 | * file = Register.File |
||
1167 | * [1] = Register.Index |
||
1168 | */ |
||
1169 | index.i[0] = |
||
1170 | index.i[1] = |
||
1171 | index.i[2] = |
||
1172 | index.i[3] = reg->Register.Index; |
||
1173 | |||
1174 | /* There is an extra source register that indirectly subscripts |
||
1175 | * a register file. The direct index now becomes an offset |
||
1176 | * that is being added to the indirect register. |
||
1177 | * |
||
1178 | * file[ind[2].x+1], |
||
1179 | * where: |
||
1180 | * ind = Indirect.File |
||
1181 | * [2] = Indirect.Index |
||
1182 | * .x = Indirect.SwizzleX |
||
1183 | */ |
||
1184 | if (reg->Register.Indirect) { |
||
1185 | union tgsi_exec_channel index2; |
||
1186 | union tgsi_exec_channel indir_index; |
||
1187 | const uint execmask = mach->ExecMask; |
||
1188 | uint i; |
||
1189 | |||
1190 | /* which address register (always zero now) */ |
||
1191 | index2.i[0] = |
||
1192 | index2.i[1] = |
||
1193 | index2.i[2] = |
||
1194 | index2.i[3] = reg->Indirect.Index; |
||
1195 | /* get current value of address register[swizzle] */ |
||
1196 | swizzle = reg->Indirect.Swizzle; |
||
1197 | fetch_src_file_channel(mach, |
||
1198 | chan_index, |
||
1199 | reg->Indirect.File, |
||
1200 | swizzle, |
||
1201 | &index2, |
||
1202 | &ZeroVec, |
||
1203 | &indir_index); |
||
1204 | |||
1205 | /* add value of address register to the offset */ |
||
1206 | index.i[0] += indir_index.i[0]; |
||
1207 | index.i[1] += indir_index.i[1]; |
||
1208 | index.i[2] += indir_index.i[2]; |
||
1209 | index.i[3] += indir_index.i[3]; |
||
1210 | |||
1211 | /* for disabled execution channels, zero-out the index to |
||
1212 | * avoid using a potential garbage value. |
||
1213 | */ |
||
1214 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1215 | if ((execmask & (1 << i)) == 0) |
||
1216 | index.i[i] = 0; |
||
1217 | } |
||
1218 | } |
||
1219 | |||
1220 | /* There is an extra source register that is a second |
||
1221 | * subscript to a register file. Effectively it means that |
||
1222 | * the register file is actually a 2D array of registers. |
||
1223 | * |
||
1224 | * file[3][1], |
||
1225 | * where: |
||
1226 | * [3] = Dimension.Index |
||
1227 | */ |
||
1228 | if (reg->Register.Dimension) { |
||
1229 | index2D.i[0] = |
||
1230 | index2D.i[1] = |
||
1231 | index2D.i[2] = |
||
1232 | index2D.i[3] = reg->Dimension.Index; |
||
1233 | |||
1234 | /* Again, the second subscript index can be addressed indirectly |
||
1235 | * identically to the first one. |
||
1236 | * Nothing stops us from indirectly addressing the indirect register, |
||
1237 | * but there is no need for that, so we won't exercise it. |
||
1238 | * |
||
1239 | * file[ind[4].y+3][1], |
||
1240 | * where: |
||
1241 | * ind = DimIndirect.File |
||
1242 | * [4] = DimIndirect.Index |
||
1243 | * .y = DimIndirect.SwizzleX |
||
1244 | */ |
||
1245 | if (reg->Dimension.Indirect) { |
||
1246 | union tgsi_exec_channel index2; |
||
1247 | union tgsi_exec_channel indir_index; |
||
1248 | const uint execmask = mach->ExecMask; |
||
1249 | uint i; |
||
1250 | |||
1251 | index2.i[0] = |
||
1252 | index2.i[1] = |
||
1253 | index2.i[2] = |
||
1254 | index2.i[3] = reg->DimIndirect.Index; |
||
1255 | |||
1256 | swizzle = reg->DimIndirect.Swizzle; |
||
1257 | fetch_src_file_channel(mach, |
||
1258 | chan_index, |
||
1259 | reg->DimIndirect.File, |
||
1260 | swizzle, |
||
1261 | &index2, |
||
1262 | &ZeroVec, |
||
1263 | &indir_index); |
||
1264 | |||
1265 | index2D.i[0] += indir_index.i[0]; |
||
1266 | index2D.i[1] += indir_index.i[1]; |
||
1267 | index2D.i[2] += indir_index.i[2]; |
||
1268 | index2D.i[3] += indir_index.i[3]; |
||
1269 | |||
1270 | /* for disabled execution channels, zero-out the index to |
||
1271 | * avoid using a potential garbage value. |
||
1272 | */ |
||
1273 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1274 | if ((execmask & (1 << i)) == 0) { |
||
1275 | index2D.i[i] = 0; |
||
1276 | } |
||
1277 | } |
||
1278 | } |
||
1279 | |||
1280 | /* If by any chance there was a need for a 3D array of register |
||
1281 | * files, we would have to check whether Dimension is followed |
||
1282 | * by a dimension register and continue the saga. |
||
1283 | */ |
||
1284 | } else { |
||
1285 | index2D.i[0] = |
||
1286 | index2D.i[1] = |
||
1287 | index2D.i[2] = |
||
1288 | index2D.i[3] = 0; |
||
1289 | } |
||
1290 | |||
1291 | swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); |
||
1292 | fetch_src_file_channel(mach, |
||
1293 | chan_index, |
||
1294 | reg->Register.File, |
||
1295 | swizzle, |
||
1296 | &index, |
||
1297 | &index2D, |
||
1298 | chan); |
||
1299 | |||
1300 | if (reg->Register.Absolute) { |
||
1301 | if (src_datatype == TGSI_EXEC_DATA_FLOAT) { |
||
1302 | micro_abs(chan, chan); |
||
1303 | } else { |
||
1304 | micro_iabs(chan, chan); |
||
1305 | } |
||
1306 | } |
||
1307 | |||
1308 | if (reg->Register.Negate) { |
||
1309 | if (src_datatype == TGSI_EXEC_DATA_FLOAT) { |
||
1310 | micro_neg(chan, chan); |
||
1311 | } else { |
||
1312 | micro_ineg(chan, chan); |
||
1313 | } |
||
1314 | } |
||
1315 | } |
||
1316 | |||
1317 | static void |
||
1318 | store_dest(struct tgsi_exec_machine *mach, |
||
1319 | const union tgsi_exec_channel *chan, |
||
1320 | const struct tgsi_full_dst_register *reg, |
||
1321 | const struct tgsi_full_instruction *inst, |
||
1322 | uint chan_index, |
||
1323 | enum tgsi_exec_datatype dst_datatype) |
||
1324 | { |
||
1325 | uint i; |
||
1326 | union tgsi_exec_channel null; |
||
1327 | union tgsi_exec_channel *dst; |
||
1328 | union tgsi_exec_channel index2D; |
||
1329 | uint execmask = mach->ExecMask; |
||
1330 | int offset = 0; /* indirection offset */ |
||
1331 | int index; |
||
1332 | |||
1333 | /* for debugging */ |
||
1334 | if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { |
||
1335 | check_inf_or_nan(chan); |
||
1336 | } |
||
1337 | |||
1338 | /* There is an extra source register that indirectly subscripts |
||
1339 | * a register file. The direct index now becomes an offset |
||
1340 | * that is being added to the indirect register. |
||
1341 | * |
||
1342 | * file[ind[2].x+1], |
||
1343 | * where: |
||
1344 | * ind = Indirect.File |
||
1345 | * [2] = Indirect.Index |
||
1346 | * .x = Indirect.SwizzleX |
||
1347 | */ |
||
1348 | if (reg->Register.Indirect) { |
||
1349 | union tgsi_exec_channel index; |
||
1350 | union tgsi_exec_channel indir_index; |
||
1351 | uint swizzle; |
||
1352 | |||
1353 | /* which address register (always zero for now) */ |
||
1354 | index.i[0] = |
||
1355 | index.i[1] = |
||
1356 | index.i[2] = |
||
1357 | index.i[3] = reg->Indirect.Index; |
||
1358 | |||
1359 | /* get current value of address register[swizzle] */ |
||
1360 | swizzle = reg->Indirect.Swizzle; |
||
1361 | |||
1362 | /* fetch values from the address/indirection register */ |
||
1363 | fetch_src_file_channel(mach, |
||
1364 | chan_index, |
||
1365 | reg->Indirect.File, |
||
1366 | swizzle, |
||
1367 | &index, |
||
1368 | &ZeroVec, |
||
1369 | &indir_index); |
||
1370 | |||
1371 | /* save indirection offset */ |
||
1372 | offset = indir_index.i[0]; |
||
1373 | } |
||
1374 | |||
1375 | /* There is an extra source register that is a second |
||
1376 | * subscript to a register file. Effectively it means that |
||
1377 | * the register file is actually a 2D array of registers. |
||
1378 | * |
||
1379 | * file[3][1], |
||
1380 | * where: |
||
1381 | * [3] = Dimension.Index |
||
1382 | */ |
||
1383 | if (reg->Register.Dimension) { |
||
1384 | index2D.i[0] = |
||
1385 | index2D.i[1] = |
||
1386 | index2D.i[2] = |
||
1387 | index2D.i[3] = reg->Dimension.Index; |
||
1388 | |||
1389 | /* Again, the second subscript index can be addressed indirectly |
||
1390 | * identically to the first one. |
||
1391 | * Nothing stops us from indirectly addressing the indirect register, |
||
1392 | * but there is no need for that, so we won't exercise it. |
||
1393 | * |
||
1394 | * file[ind[4].y+3][1], |
||
1395 | * where: |
||
1396 | * ind = DimIndirect.File |
||
1397 | * [4] = DimIndirect.Index |
||
1398 | * .y = DimIndirect.SwizzleX |
||
1399 | */ |
||
1400 | if (reg->Dimension.Indirect) { |
||
1401 | union tgsi_exec_channel index2; |
||
1402 | union tgsi_exec_channel indir_index; |
||
1403 | const uint execmask = mach->ExecMask; |
||
1404 | unsigned swizzle; |
||
1405 | uint i; |
||
1406 | |||
1407 | index2.i[0] = |
||
1408 | index2.i[1] = |
||
1409 | index2.i[2] = |
||
1410 | index2.i[3] = reg->DimIndirect.Index; |
||
1411 | |||
1412 | swizzle = reg->DimIndirect.Swizzle; |
||
1413 | fetch_src_file_channel(mach, |
||
1414 | chan_index, |
||
1415 | reg->DimIndirect.File, |
||
1416 | swizzle, |
||
1417 | &index2, |
||
1418 | &ZeroVec, |
||
1419 | &indir_index); |
||
1420 | |||
1421 | index2D.i[0] += indir_index.i[0]; |
||
1422 | index2D.i[1] += indir_index.i[1]; |
||
1423 | index2D.i[2] += indir_index.i[2]; |
||
1424 | index2D.i[3] += indir_index.i[3]; |
||
1425 | |||
1426 | /* for disabled execution channels, zero-out the index to |
||
1427 | * avoid using a potential garbage value. |
||
1428 | */ |
||
1429 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1430 | if ((execmask & (1 << i)) == 0) { |
||
1431 | index2D.i[i] = 0; |
||
1432 | } |
||
1433 | } |
||
1434 | } |
||
1435 | |||
1436 | /* If by any chance there was a need for a 3D array of register |
||
1437 | * files, we would have to check whether Dimension is followed |
||
1438 | * by a dimension register and continue the saga. |
||
1439 | */ |
||
1440 | } else { |
||
1441 | index2D.i[0] = |
||
1442 | index2D.i[1] = |
||
1443 | index2D.i[2] = |
||
1444 | index2D.i[3] = 0; |
||
1445 | } |
||
1446 | |||
1447 | switch (reg->Register.File) { |
||
1448 | case TGSI_FILE_NULL: |
||
1449 | dst = &null; |
||
1450 | break; |
||
1451 | |||
1452 | case TGSI_FILE_OUTPUT: |
||
1453 | index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] |
||
1454 | + reg->Register.Index; |
||
1455 | dst = &mach->Outputs[offset + index].xyzw[chan_index]; |
||
1456 | #if 0 |
||
1457 | debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", |
||
1458 | mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], |
||
1459 | reg->Register.Index); |
||
1460 | if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { |
||
1461 | debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); |
||
1462 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1463 | if (execmask & (1 << i)) |
||
1464 | debug_printf("%f, ", chan->f[i]); |
||
1465 | debug_printf(")\n"); |
||
1466 | } |
||
1467 | #endif |
||
1468 | break; |
||
1469 | |||
1470 | case TGSI_FILE_TEMPORARY: |
||
1471 | index = reg->Register.Index; |
||
1472 | assert( index < TGSI_EXEC_NUM_TEMPS ); |
||
1473 | dst = &mach->Temps[offset + index].xyzw[chan_index]; |
||
1474 | break; |
||
1475 | |||
1476 | case TGSI_FILE_ADDRESS: |
||
1477 | index = reg->Register.Index; |
||
1478 | dst = &mach->Addrs[index].xyzw[chan_index]; |
||
1479 | break; |
||
1480 | |||
1481 | case TGSI_FILE_PREDICATE: |
||
1482 | index = reg->Register.Index; |
||
1483 | assert(index < TGSI_EXEC_NUM_PREDS); |
||
1484 | dst = &mach->Predicates[index].xyzw[chan_index]; |
||
1485 | break; |
||
1486 | |||
1487 | default: |
||
1488 | assert( 0 ); |
||
1489 | return; |
||
1490 | } |
||
1491 | |||
1492 | if (inst->Instruction.Predicate) { |
||
1493 | uint swizzle; |
||
1494 | union tgsi_exec_channel *pred; |
||
1495 | |||
1496 | switch (chan_index) { |
||
1497 | case TGSI_CHAN_X: |
||
1498 | swizzle = inst->Predicate.SwizzleX; |
||
1499 | break; |
||
1500 | case TGSI_CHAN_Y: |
||
1501 | swizzle = inst->Predicate.SwizzleY; |
||
1502 | break; |
||
1503 | case TGSI_CHAN_Z: |
||
1504 | swizzle = inst->Predicate.SwizzleZ; |
||
1505 | break; |
||
1506 | case TGSI_CHAN_W: |
||
1507 | swizzle = inst->Predicate.SwizzleW; |
||
1508 | break; |
||
1509 | default: |
||
1510 | assert(0); |
||
1511 | return; |
||
1512 | } |
||
1513 | |||
1514 | assert(inst->Predicate.Index == 0); |
||
1515 | |||
1516 | pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; |
||
1517 | |||
1518 | if (inst->Predicate.Negate) { |
||
1519 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1520 | if (pred->u[i]) { |
||
1521 | execmask &= ~(1 << i); |
||
1522 | } |
||
1523 | } |
||
1524 | } else { |
||
1525 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1526 | if (!pred->u[i]) { |
||
1527 | execmask &= ~(1 << i); |
||
1528 | } |
||
1529 | } |
||
1530 | } |
||
1531 | } |
||
1532 | |||
1533 | switch (inst->Instruction.Saturate) { |
||
1534 | case TGSI_SAT_NONE: |
||
1535 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1536 | if (execmask & (1 << i)) |
||
1537 | dst->i[i] = chan->i[i]; |
||
1538 | break; |
||
1539 | |||
1540 | case TGSI_SAT_ZERO_ONE: |
||
1541 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1542 | if (execmask & (1 << i)) { |
||
1543 | if (chan->f[i] < 0.0f) |
||
1544 | dst->f[i] = 0.0f; |
||
1545 | else if (chan->f[i] > 1.0f) |
||
1546 | dst->f[i] = 1.0f; |
||
1547 | else |
||
1548 | dst->i[i] = chan->i[i]; |
||
1549 | } |
||
1550 | break; |
||
1551 | |||
1552 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
1553 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1554 | if (execmask & (1 << i)) { |
||
1555 | if (chan->f[i] < -1.0f) |
||
1556 | dst->f[i] = -1.0f; |
||
1557 | else if (chan->f[i] > 1.0f) |
||
1558 | dst->f[i] = 1.0f; |
||
1559 | else |
||
1560 | dst->i[i] = chan->i[i]; |
||
1561 | } |
||
1562 | break; |
||
1563 | |||
1564 | default: |
||
1565 | assert( 0 ); |
||
1566 | } |
||
1567 | } |
||
1568 | |||
1569 | #define FETCH(VAL,INDEX,CHAN)\ |
||
1570 | fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) |
||
1571 | |||
1572 | #define IFETCH(VAL,INDEX,CHAN)\ |
||
1573 | fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) |
||
1574 | |||
1575 | |||
1576 | /** |
||
1577 | * Execute ARB-style KIL which is predicated by a src register. |
||
1578 | * Kill fragment if any of the four values is less than zero. |
||
1579 | */ |
||
1580 | static void |
||
1581 | exec_kil(struct tgsi_exec_machine *mach, |
||
1582 | const struct tgsi_full_instruction *inst) |
||
1583 | { |
||
1584 | uint uniquemask; |
||
1585 | uint chan_index; |
||
1586 | uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ |
||
1587 | union tgsi_exec_channel r[1]; |
||
1588 | |||
1589 | /* This mask stores component bits that were already tested. */ |
||
1590 | uniquemask = 0; |
||
1591 | |||
1592 | for (chan_index = 0; chan_index < 4; chan_index++) |
||
1593 | { |
||
1594 | uint swizzle; |
||
1595 | uint i; |
||
1596 | |||
1597 | /* unswizzle channel */ |
||
1598 | swizzle = tgsi_util_get_full_src_register_swizzle ( |
||
1599 | &inst->Src[0], |
||
1600 | chan_index); |
||
1601 | |||
1602 | /* check if the component has not been already tested */ |
||
1603 | if (uniquemask & (1 << swizzle)) |
||
1604 | continue; |
||
1605 | uniquemask |= 1 << swizzle; |
||
1606 | |||
1607 | FETCH(&r[0], 0, chan_index); |
||
1608 | for (i = 0; i < 4; i++) |
||
1609 | if (r[0].f[i] < 0.0f) |
||
1610 | kilmask |= 1 << i; |
||
1611 | } |
||
1612 | |||
1613 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; |
||
1614 | } |
||
1615 | |||
1616 | /** |
||
1617 | * Execute NVIDIA-style KIL which is predicated by a condition code. |
||
1618 | * Kill fragment if the condition code is TRUE. |
||
1619 | */ |
||
1620 | static void |
||
1621 | exec_kilp(struct tgsi_exec_machine *mach, |
||
1622 | const struct tgsi_full_instruction *inst) |
||
1623 | { |
||
1624 | uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ |
||
1625 | |||
1626 | /* "unconditional" kil */ |
||
1627 | kilmask = mach->ExecMask; |
||
1628 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; |
||
1629 | } |
||
1630 | |||
1631 | static void |
||
1632 | emit_vertex(struct tgsi_exec_machine *mach) |
||
1633 | { |
||
1634 | /* FIXME: check for exec mask correctly |
||
1635 | unsigned i; |
||
1636 | for (i = 0; i < TGSI_QUAD_SIZE; ++i) { |
||
1637 | if ((mach->ExecMask & (1 << i))) |
||
1638 | */ |
||
1639 | if (mach->ExecMask) { |
||
1640 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; |
||
1641 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; |
||
1642 | } |
||
1643 | } |
||
1644 | |||
1645 | static void |
||
1646 | emit_primitive(struct tgsi_exec_machine *mach) |
||
1647 | { |
||
1648 | unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; |
||
1649 | /* FIXME: check for exec mask correctly |
||
1650 | unsigned i; |
||
1651 | for (i = 0; i < TGSI_QUAD_SIZE; ++i) { |
||
1652 | if ((mach->ExecMask & (1 << i))) |
||
1653 | */ |
||
1654 | if (mach->ExecMask) { |
||
1655 | ++(*prim_count); |
||
1656 | debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); |
||
1657 | mach->Primitives[*prim_count] = 0; |
||
1658 | } |
||
1659 | } |
||
1660 | |||
1661 | static void |
||
1662 | conditional_emit_primitive(struct tgsi_exec_machine *mach) |
||
1663 | { |
||
1664 | if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { |
||
1665 | int emitted_verts = |
||
1666 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; |
||
1667 | if (emitted_verts) { |
||
1668 | emit_primitive(mach); |
||
1669 | } |
||
1670 | } |
||
1671 | } |
||
1672 | |||
1673 | |||
1674 | /* |
||
1675 | * Fetch four texture samples using STR texture coordinates. |
||
1676 | */ |
||
1677 | static void |
||
1678 | fetch_texel( struct tgsi_sampler *sampler, |
||
1679 | const unsigned sview_idx, |
||
1680 | const unsigned sampler_idx, |
||
1681 | const union tgsi_exec_channel *s, |
||
1682 | const union tgsi_exec_channel *t, |
||
1683 | const union tgsi_exec_channel *p, |
||
1684 | const union tgsi_exec_channel *c0, |
||
1685 | const union tgsi_exec_channel *c1, |
||
1686 | float derivs[3][2][TGSI_QUAD_SIZE], |
||
1687 | const int8_t offset[3], |
||
1688 | enum tgsi_sampler_control control, |
||
1689 | union tgsi_exec_channel *r, |
||
1690 | union tgsi_exec_channel *g, |
||
1691 | union tgsi_exec_channel *b, |
||
1692 | union tgsi_exec_channel *a ) |
||
1693 | { |
||
1694 | uint j; |
||
1695 | float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; |
||
1696 | |||
1697 | /* FIXME: handle explicit derivs, offsets */ |
||
1698 | sampler->get_samples(sampler, sview_idx, sampler_idx, |
||
1699 | s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); |
||
1700 | |||
1701 | for (j = 0; j < 4; j++) { |
||
1702 | r->f[j] = rgba[0][j]; |
||
1703 | g->f[j] = rgba[1][j]; |
||
1704 | b->f[j] = rgba[2][j]; |
||
1705 | a->f[j] = rgba[3][j]; |
||
1706 | } |
||
1707 | } |
||
1708 | |||
1709 | |||
1710 | #define TEX_MODIFIER_NONE 0 |
||
1711 | #define TEX_MODIFIER_PROJECTED 1 |
||
1712 | #define TEX_MODIFIER_LOD_BIAS 2 |
||
1713 | #define TEX_MODIFIER_EXPLICIT_LOD 3 |
||
1714 | #define TEX_MODIFIER_LEVEL_ZERO 4 |
||
1715 | |||
1716 | |||
1717 | /* |
||
1718 | * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. |
||
1719 | */ |
||
1720 | static void |
||
1721 | fetch_texel_offsets(struct tgsi_exec_machine *mach, |
||
1722 | const struct tgsi_full_instruction *inst, |
||
1723 | int8_t offsets[3]) |
||
1724 | { |
||
1725 | if (inst->Texture.NumOffsets == 1) { |
||
1726 | union tgsi_exec_channel index; |
||
1727 | union tgsi_exec_channel offset[3]; |
||
1728 | index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; |
||
1729 | fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, |
||
1730 | inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); |
||
1731 | fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, |
||
1732 | inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); |
||
1733 | fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, |
||
1734 | inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); |
||
1735 | offsets[0] = offset[0].i[0]; |
||
1736 | offsets[1] = offset[1].i[0]; |
||
1737 | offsets[2] = offset[2].i[0]; |
||
1738 | } else { |
||
1739 | assert(inst->Texture.NumOffsets == 0); |
||
1740 | offsets[0] = offsets[1] = offsets[2] = 0; |
||
1741 | } |
||
1742 | } |
||
1743 | |||
1744 | |||
1745 | /* |
||
1746 | * Fetch dx and dy values for one channel (s, t or r). |
||
1747 | * Put dx values into one float array, dy values into another. |
||
1748 | */ |
||
1749 | static void |
||
1750 | fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, |
||
1751 | const struct tgsi_full_instruction *inst, |
||
1752 | unsigned regdsrcx, |
||
1753 | unsigned chan, |
||
1754 | float derivs[2][TGSI_QUAD_SIZE]) |
||
1755 | { |
||
1756 | union tgsi_exec_channel d; |
||
1757 | FETCH(&d, regdsrcx, chan); |
||
1758 | derivs[0][0] = d.f[0]; |
||
1759 | derivs[0][1] = d.f[1]; |
||
1760 | derivs[0][2] = d.f[2]; |
||
1761 | derivs[0][3] = d.f[3]; |
||
1762 | FETCH(&d, regdsrcx + 1, chan); |
||
1763 | derivs[1][0] = d.f[0]; |
||
1764 | derivs[1][1] = d.f[1]; |
||
1765 | derivs[1][2] = d.f[2]; |
||
1766 | derivs[1][3] = d.f[3]; |
||
1767 | } |
||
1768 | |||
1769 | |||
1770 | /* |
||
1771 | * execute a texture instruction. |
||
1772 | * |
||
1773 | * modifier is used to control the channel routing for the\ |
||
1774 | * instruction variants like proj, lod, and texture with lod bias. |
||
1775 | * sampler indicates which src register the sampler is contained in. |
||
1776 | */ |
||
1777 | static void |
||
1778 | exec_tex(struct tgsi_exec_machine *mach, |
||
1779 | const struct tgsi_full_instruction *inst, |
||
1780 | uint modifier, uint sampler) |
||
1781 | { |
||
1782 | const uint unit = inst->Src[sampler].Register.Index; |
||
1783 | const union tgsi_exec_channel *args[5], *proj = NULL; |
||
1784 | union tgsi_exec_channel r[5]; |
||
1785 | enum tgsi_sampler_control control = tgsi_sampler_lod_none; |
||
1786 | uint chan; |
||
1787 | int8_t offsets[3]; |
||
1788 | int dim, shadow_ref, i; |
||
1789 | |||
1790 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
1791 | fetch_texel_offsets(mach, inst, offsets); |
||
1792 | |||
1793 | assert(modifier != TEX_MODIFIER_LEVEL_ZERO); |
||
1794 | assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); |
||
1795 | |||
1796 | dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref); |
||
1797 | |||
1798 | assert(dim <= 4); |
||
1799 | if (shadow_ref >= 0) |
||
1800 | assert(shadow_ref >= dim && shadow_ref < Elements(args)); |
||
1801 | |||
1802 | /* fetch modifier to the last argument */ |
||
1803 | if (modifier != TEX_MODIFIER_NONE) { |
||
1804 | const int last = Elements(args) - 1; |
||
1805 | |||
1806 | /* fetch modifier from src0.w or src1.x */ |
||
1807 | if (sampler == 1) { |
||
1808 | assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); |
||
1809 | FETCH(&r[last], 0, TGSI_CHAN_W); |
||
1810 | } |
||
1811 | else { |
||
1812 | assert(shadow_ref != 4); |
||
1813 | FETCH(&r[last], 1, TGSI_CHAN_X); |
||
1814 | } |
||
1815 | |||
1816 | if (modifier != TEX_MODIFIER_PROJECTED) { |
||
1817 | args[last] = &r[last]; |
||
1818 | } |
||
1819 | else { |
||
1820 | proj = &r[last]; |
||
1821 | args[last] = &ZeroVec; |
||
1822 | } |
||
1823 | |||
1824 | /* point unused arguments to zero vector */ |
||
1825 | for (i = dim; i < last; i++) |
||
1826 | args[i] = &ZeroVec; |
||
1827 | |||
1828 | if (modifier == TEX_MODIFIER_EXPLICIT_LOD) |
||
1829 | control = tgsi_sampler_lod_explicit; |
||
1830 | else if (modifier == TEX_MODIFIER_LOD_BIAS) |
||
1831 | control = tgsi_sampler_lod_bias; |
||
1832 | } |
||
1833 | else { |
||
1834 | for (i = dim; i < Elements(args); i++) |
||
1835 | args[i] = &ZeroVec; |
||
1836 | } |
||
1837 | |||
1838 | /* fetch coordinates */ |
||
1839 | for (i = 0; i < dim; i++) { |
||
1840 | FETCH(&r[i], 0, TGSI_CHAN_X + i); |
||
1841 | |||
1842 | if (proj) |
||
1843 | micro_div(&r[i], &r[i], proj); |
||
1844 | |||
1845 | args[i] = &r[i]; |
||
1846 | } |
||
1847 | |||
1848 | /* fetch reference value */ |
||
1849 | if (shadow_ref >= 0) { |
||
1850 | FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); |
||
1851 | |||
1852 | if (proj) |
||
1853 | micro_div(&r[shadow_ref], &r[shadow_ref], proj); |
||
1854 | |||
1855 | args[shadow_ref] = &r[shadow_ref]; |
||
1856 | } |
||
1857 | |||
1858 | fetch_texel(mach->Sampler, unit, unit, |
||
1859 | args[0], args[1], args[2], args[3], args[4], |
||
1860 | NULL, offsets, control, |
||
1861 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1862 | |||
1863 | #if 0 |
||
1864 | debug_printf("fetch r: %g %g %g %g\n", |
||
1865 | r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); |
||
1866 | debug_printf("fetch g: %g %g %g %g\n", |
||
1867 | r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); |
||
1868 | debug_printf("fetch b: %g %g %g %g\n", |
||
1869 | r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); |
||
1870 | debug_printf("fetch a: %g %g %g %g\n", |
||
1871 | r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); |
||
1872 | #endif |
||
1873 | |||
1874 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
1875 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
1876 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
1877 | } |
||
1878 | } |
||
1879 | } |
||
1880 | |||
1881 | |||
1882 | static void |
||
1883 | exec_txd(struct tgsi_exec_machine *mach, |
||
1884 | const struct tgsi_full_instruction *inst) |
||
1885 | { |
||
1886 | const uint unit = inst->Src[3].Register.Index; |
||
1887 | union tgsi_exec_channel r[4]; |
||
1888 | float derivs[3][2][TGSI_QUAD_SIZE]; |
||
1889 | uint chan; |
||
1890 | int8_t offsets[3]; |
||
1891 | |||
1892 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
1893 | fetch_texel_offsets(mach, inst, offsets); |
||
1894 | |||
1895 | switch (inst->Texture.Texture) { |
||
1896 | case TGSI_TEXTURE_1D: |
||
1897 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1898 | |||
1899 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1900 | |||
1901 | fetch_texel(mach->Sampler, unit, unit, |
||
1902 | &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
1903 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1904 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1905 | break; |
||
1906 | |||
1907 | case TGSI_TEXTURE_SHADOW1D: |
||
1908 | case TGSI_TEXTURE_1D_ARRAY: |
||
1909 | case TGSI_TEXTURE_SHADOW1D_ARRAY: |
||
1910 | /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ |
||
1911 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1912 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1913 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
1914 | |||
1915 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1916 | |||
1917 | fetch_texel(mach->Sampler, unit, unit, |
||
1918 | &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
1919 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1920 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1921 | break; |
||
1922 | |||
1923 | case TGSI_TEXTURE_2D: |
||
1924 | case TGSI_TEXTURE_RECT: |
||
1925 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1926 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1927 | |||
1928 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1929 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); |
||
1930 | |||
1931 | fetch_texel(mach->Sampler, unit, unit, |
||
1932 | &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
1933 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1934 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1935 | break; |
||
1936 | |||
1937 | |||
1938 | case TGSI_TEXTURE_SHADOW2D: |
||
1939 | case TGSI_TEXTURE_SHADOWRECT: |
||
1940 | case TGSI_TEXTURE_2D_ARRAY: |
||
1941 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
1942 | /* only SHADOW2D_ARRAY actually needs W */ |
||
1943 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1944 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1945 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
1946 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
1947 | |||
1948 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1949 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); |
||
1950 | |||
1951 | fetch_texel(mach->Sampler, unit, unit, |
||
1952 | &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ |
||
1953 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1954 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
1955 | break; |
||
1956 | |||
1957 | case TGSI_TEXTURE_3D: |
||
1958 | case TGSI_TEXTURE_CUBE: |
||
1959 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
1960 | /* only TEXTURE_CUBE_ARRAY actually needs W */ |
||
1961 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1962 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1963 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
1964 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
1965 | |||
1966 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1967 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); |
||
1968 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); |
||
1969 | |||
1970 | fetch_texel(mach->Sampler, unit, unit, |
||
1971 | &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ |
||
1972 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1973 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
1974 | break; |
||
1975 | |||
1976 | default: |
||
1977 | assert(0); |
||
1978 | } |
||
1979 | |||
1980 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
1981 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
1982 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
1983 | } |
||
1984 | } |
||
1985 | } |
||
1986 | |||
1987 | |||
1988 | static void |
||
1989 | exec_txf(struct tgsi_exec_machine *mach, |
||
1990 | const struct tgsi_full_instruction *inst) |
||
1991 | { |
||
1992 | const uint unit = inst->Src[1].Register.Index; |
||
1993 | union tgsi_exec_channel r[4]; |
||
1994 | uint chan; |
||
1995 | float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; |
||
1996 | int j; |
||
1997 | int8_t offsets[3]; |
||
1998 | unsigned target; |
||
1999 | |||
2000 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
2001 | fetch_texel_offsets(mach, inst, offsets); |
||
2002 | |||
2003 | IFETCH(&r[3], 0, TGSI_CHAN_W); |
||
2004 | |||
2005 | if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { |
||
2006 | target = mach->SamplerViews[unit].Resource; |
||
2007 | } |
||
2008 | else { |
||
2009 | target = inst->Texture.Texture; |
||
2010 | } |
||
2011 | switch(target) { |
||
2012 | case TGSI_TEXTURE_3D: |
||
2013 | case TGSI_TEXTURE_2D_ARRAY: |
||
2014 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
2015 | IFETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2016 | /* fallthrough */ |
||
2017 | case TGSI_TEXTURE_2D: |
||
2018 | case TGSI_TEXTURE_RECT: |
||
2019 | case TGSI_TEXTURE_SHADOW1D_ARRAY: |
||
2020 | case TGSI_TEXTURE_SHADOW2D: |
||
2021 | case TGSI_TEXTURE_SHADOWRECT: |
||
2022 | case TGSI_TEXTURE_1D_ARRAY: |
||
2023 | IFETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2024 | /* fallthrough */ |
||
2025 | case TGSI_TEXTURE_BUFFER: |
||
2026 | case TGSI_TEXTURE_1D: |
||
2027 | case TGSI_TEXTURE_SHADOW1D: |
||
2028 | IFETCH(&r[0], 0, TGSI_CHAN_X); |
||
2029 | break; |
||
2030 | default: |
||
2031 | assert(0); |
||
2032 | break; |
||
2033 | } |
||
2034 | |||
2035 | mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, |
||
2036 | offsets, rgba); |
||
2037 | |||
2038 | for (j = 0; j < TGSI_QUAD_SIZE; j++) { |
||
2039 | r[0].f[j] = rgba[0][j]; |
||
2040 | r[1].f[j] = rgba[1][j]; |
||
2041 | r[2].f[j] = rgba[2][j]; |
||
2042 | r[3].f[j] = rgba[3][j]; |
||
2043 | } |
||
2044 | |||
2045 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2046 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2047 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2048 | } |
||
2049 | } |
||
2050 | } |
||
2051 | |||
2052 | static void |
||
2053 | exec_txq(struct tgsi_exec_machine *mach, |
||
2054 | const struct tgsi_full_instruction *inst) |
||
2055 | { |
||
2056 | const uint unit = inst->Src[1].Register.Index; |
||
2057 | int result[4]; |
||
2058 | union tgsi_exec_channel r[4], src; |
||
2059 | uint chan; |
||
2060 | int i,j; |
||
2061 | |||
2062 | fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); |
||
2063 | |||
2064 | mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); |
||
2065 | |||
2066 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
2067 | for (j = 0; j < 4; j++) { |
||
2068 | r[j].i[i] = result[j]; |
||
2069 | } |
||
2070 | } |
||
2071 | |||
2072 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2073 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2074 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, |
||
2075 | TGSI_EXEC_DATA_INT); |
||
2076 | } |
||
2077 | } |
||
2078 | } |
||
2079 | |||
2080 | static void |
||
2081 | exec_sample(struct tgsi_exec_machine *mach, |
||
2082 | const struct tgsi_full_instruction *inst, |
||
2083 | uint modifier, boolean compare) |
||
2084 | { |
||
2085 | const uint resource_unit = inst->Src[1].Register.Index; |
||
2086 | const uint sampler_unit = inst->Src[2].Register.Index; |
||
2087 | union tgsi_exec_channel r[4], c1; |
||
2088 | const union tgsi_exec_channel *lod = &ZeroVec; |
||
2089 | enum tgsi_sampler_control control = tgsi_sampler_lod_none; |
||
2090 | uint chan; |
||
2091 | int8_t offsets[3]; |
||
2092 | |||
2093 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
2094 | fetch_texel_offsets(mach, inst, offsets); |
||
2095 | |||
2096 | assert(modifier != TEX_MODIFIER_PROJECTED); |
||
2097 | |||
2098 | if (modifier != TEX_MODIFIER_NONE) { |
||
2099 | if (modifier == TEX_MODIFIER_LOD_BIAS) { |
||
2100 | FETCH(&c1, 3, TGSI_CHAN_X); |
||
2101 | lod = &c1; |
||
2102 | control = tgsi_sampler_lod_bias; |
||
2103 | } |
||
2104 | else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { |
||
2105 | FETCH(&c1, 3, TGSI_CHAN_X); |
||
2106 | lod = &c1; |
||
2107 | control = tgsi_sampler_lod_explicit; |
||
2108 | } |
||
2109 | else { |
||
2110 | assert(modifier == TEX_MODIFIER_LEVEL_ZERO); |
||
2111 | control = tgsi_sampler_lod_zero; |
||
2112 | } |
||
2113 | } |
||
2114 | |||
2115 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
2116 | |||
2117 | switch (mach->SamplerViews[resource_unit].Resource) { |
||
2118 | case TGSI_TEXTURE_1D: |
||
2119 | if (compare) { |
||
2120 | FETCH(&r[2], 3, TGSI_CHAN_X); |
||
2121 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2122 | &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2123 | NULL, offsets, control, |
||
2124 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
2125 | } |
||
2126 | else { |
||
2127 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2128 | &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2129 | NULL, offsets, control, |
||
2130 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
2131 | } |
||
2132 | break; |
||
2133 | |||
2134 | case TGSI_TEXTURE_1D_ARRAY: |
||
2135 | case TGSI_TEXTURE_2D: |
||
2136 | case TGSI_TEXTURE_RECT: |
||
2137 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2138 | if (compare) { |
||
2139 | FETCH(&r[2], 3, TGSI_CHAN_X); |
||
2140 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2141 | &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2142 | NULL, offsets, control, |
||
2143 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
2144 | } |
||
2145 | else { |
||
2146 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2147 | &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2148 | NULL, offsets, control, |
||
2149 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
2150 | } |
||
2151 | break; |
||
2152 | |||
2153 | case TGSI_TEXTURE_2D_ARRAY: |
||
2154 | case TGSI_TEXTURE_3D: |
||
2155 | case TGSI_TEXTURE_CUBE: |
||
2156 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2157 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2158 | if(compare) { |
||
2159 | FETCH(&r[3], 3, TGSI_CHAN_X); |
||
2160 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2161 | &r[0], &r[1], &r[2], &r[3], lod, |
||
2162 | NULL, offsets, control, |
||
2163 | &r[0], &r[1], &r[2], &r[3]); |
||
2164 | } |
||
2165 | else { |
||
2166 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2167 | &r[0], &r[1], &r[2], &ZeroVec, lod, |
||
2168 | NULL, offsets, control, |
||
2169 | &r[0], &r[1], &r[2], &r[3]); |
||
2170 | } |
||
2171 | break; |
||
2172 | |||
2173 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
2174 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2175 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2176 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
2177 | if(compare) { |
||
2178 | FETCH(&r[4], 3, TGSI_CHAN_X); |
||
2179 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2180 | &r[0], &r[1], &r[2], &r[3], &r[4], |
||
2181 | NULL, offsets, control, |
||
2182 | &r[0], &r[1], &r[2], &r[3]); |
||
2183 | } |
||
2184 | else { |
||
2185 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2186 | &r[0], &r[1], &r[2], &r[3], lod, |
||
2187 | NULL, offsets, control, |
||
2188 | &r[0], &r[1], &r[2], &r[3]); |
||
2189 | } |
||
2190 | break; |
||
2191 | |||
2192 | |||
2193 | default: |
||
2194 | assert(0); |
||
2195 | } |
||
2196 | |||
2197 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2198 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2199 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2200 | } |
||
2201 | } |
||
2202 | } |
||
2203 | |||
2204 | static void |
||
2205 | exec_sample_d(struct tgsi_exec_machine *mach, |
||
2206 | const struct tgsi_full_instruction *inst) |
||
2207 | { |
||
2208 | const uint resource_unit = inst->Src[1].Register.Index; |
||
2209 | const uint sampler_unit = inst->Src[2].Register.Index; |
||
2210 | union tgsi_exec_channel r[4]; |
||
2211 | float derivs[3][2][TGSI_QUAD_SIZE]; |
||
2212 | uint chan; |
||
2213 | int8_t offsets[3]; |
||
2214 | |||
2215 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
2216 | fetch_texel_offsets(mach, inst, offsets); |
||
2217 | |||
2218 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
2219 | |||
2220 | switch (mach->SamplerViews[resource_unit].Resource) { |
||
2221 | case TGSI_TEXTURE_1D: |
||
2222 | case TGSI_TEXTURE_1D_ARRAY: |
||
2223 | /* only 1D array actually needs Y */ |
||
2224 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2225 | |||
2226 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); |
||
2227 | |||
2228 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2229 | &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
2230 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
2231 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
2232 | break; |
||
2233 | |||
2234 | case TGSI_TEXTURE_2D: |
||
2235 | case TGSI_TEXTURE_RECT: |
||
2236 | case TGSI_TEXTURE_2D_ARRAY: |
||
2237 | /* only 2D array actually needs Z */ |
||
2238 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2239 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2240 | |||
2241 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); |
||
2242 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); |
||
2243 | |||
2244 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2245 | &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ |
||
2246 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
2247 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
2248 | break; |
||
2249 | |||
2250 | case TGSI_TEXTURE_3D: |
||
2251 | case TGSI_TEXTURE_CUBE: |
||
2252 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
2253 | /* only cube array actually needs W */ |
||
2254 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2255 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2256 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
2257 | |||
2258 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); |
||
2259 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); |
||
2260 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); |
||
2261 | |||
2262 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2263 | &r[0], &r[1], &r[2], &r[3], &ZeroVec, |
||
2264 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
2265 | &r[0], &r[1], &r[2], &r[3]); |
||
2266 | break; |
||
2267 | |||
2268 | default: |
||
2269 | assert(0); |
||
2270 | } |
||
2271 | |||
2272 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2273 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2274 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2275 | } |
||
2276 | } |
||
2277 | } |
||
2278 | |||
2279 | |||
2280 | /** |
||
2281 | * Evaluate a constant-valued coefficient at the position of the |
||
2282 | * current quad. |
||
2283 | */ |
||
2284 | static void |
||
2285 | eval_constant_coef( |
||
2286 | struct tgsi_exec_machine *mach, |
||
2287 | unsigned attrib, |
||
2288 | unsigned chan ) |
||
2289 | { |
||
2290 | unsigned i; |
||
2291 | |||
2292 | for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { |
||
2293 | mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; |
||
2294 | } |
||
2295 | } |
||
2296 | |||
2297 | /** |
||
2298 | * Evaluate a linear-valued coefficient at the position of the |
||
2299 | * current quad. |
||
2300 | */ |
||
2301 | static void |
||
2302 | eval_linear_coef( |
||
2303 | struct tgsi_exec_machine *mach, |
||
2304 | unsigned attrib, |
||
2305 | unsigned chan ) |
||
2306 | { |
||
2307 | const float x = mach->QuadPos.xyzw[0].f[0]; |
||
2308 | const float y = mach->QuadPos.xyzw[1].f[0]; |
||
2309 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; |
||
2310 | const float dady = mach->InterpCoefs[attrib].dady[chan]; |
||
2311 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; |
||
2312 | mach->Inputs[attrib].xyzw[chan].f[0] = a0; |
||
2313 | mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; |
||
2314 | mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; |
||
2315 | mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; |
||
2316 | } |
||
2317 | |||
2318 | /** |
||
2319 | * Evaluate a perspective-valued coefficient at the position of the |
||
2320 | * current quad. |
||
2321 | */ |
||
2322 | static void |
||
2323 | eval_perspective_coef( |
||
2324 | struct tgsi_exec_machine *mach, |
||
2325 | unsigned attrib, |
||
2326 | unsigned chan ) |
||
2327 | { |
||
2328 | const float x = mach->QuadPos.xyzw[0].f[0]; |
||
2329 | const float y = mach->QuadPos.xyzw[1].f[0]; |
||
2330 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; |
||
2331 | const float dady = mach->InterpCoefs[attrib].dady[chan]; |
||
2332 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; |
||
2333 | const float *w = mach->QuadPos.xyzw[3].f; |
||
2334 | /* divide by W here */ |
||
2335 | mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; |
||
2336 | mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; |
||
2337 | mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; |
||
2338 | mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; |
||
2339 | } |
||
2340 | |||
2341 | |||
2342 | typedef void (* eval_coef_func)( |
||
2343 | struct tgsi_exec_machine *mach, |
||
2344 | unsigned attrib, |
||
2345 | unsigned chan ); |
||
2346 | |||
2347 | static void |
||
2348 | exec_declaration(struct tgsi_exec_machine *mach, |
||
2349 | const struct tgsi_full_declaration *decl) |
||
2350 | { |
||
2351 | if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { |
||
2352 | mach->SamplerViews[decl->Range.First] = decl->SamplerView; |
||
2353 | return; |
||
2354 | } |
||
2355 | |||
2356 | if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { |
||
2357 | if (decl->Declaration.File == TGSI_FILE_INPUT) { |
||
2358 | uint first, last, mask; |
||
2359 | |||
2360 | first = decl->Range.First; |
||
2361 | last = decl->Range.Last; |
||
2362 | mask = decl->Declaration.UsageMask; |
||
2363 | |||
2364 | /* XXX we could remove this special-case code since |
||
2365 | * mach->InterpCoefs[first].a0 should already have the |
||
2366 | * front/back-face value. But we should first update the |
||
2367 | * ureg code to emit the right UsageMask value (WRITEMASK_X). |
||
2368 | * Then, we could remove the tgsi_exec_machine::Face field. |
||
2369 | */ |
||
2370 | /* XXX make FACE a system value */ |
||
2371 | if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { |
||
2372 | uint i; |
||
2373 | |||
2374 | assert(decl->Semantic.Index == 0); |
||
2375 | assert(first == last); |
||
2376 | |||
2377 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
2378 | mach->Inputs[first].xyzw[0].f[i] = mach->Face; |
||
2379 | } |
||
2380 | } else { |
||
2381 | eval_coef_func eval; |
||
2382 | uint i, j; |
||
2383 | |||
2384 | switch (decl->Interp.Interpolate) { |
||
2385 | case TGSI_INTERPOLATE_CONSTANT: |
||
2386 | eval = eval_constant_coef; |
||
2387 | break; |
||
2388 | |||
2389 | case TGSI_INTERPOLATE_LINEAR: |
||
2390 | eval = eval_linear_coef; |
||
2391 | break; |
||
2392 | |||
2393 | case TGSI_INTERPOLATE_PERSPECTIVE: |
||
2394 | eval = eval_perspective_coef; |
||
2395 | break; |
||
2396 | |||
2397 | case TGSI_INTERPOLATE_COLOR: |
||
2398 | eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; |
||
2399 | break; |
||
2400 | |||
2401 | default: |
||
2402 | assert(0); |
||
2403 | return; |
||
2404 | } |
||
2405 | |||
2406 | for (j = 0; j < TGSI_NUM_CHANNELS; j++) { |
||
2407 | if (mask & (1 << j)) { |
||
2408 | for (i = first; i <= last; i++) { |
||
2409 | eval(mach, i, j); |
||
2410 | } |
||
2411 | } |
||
2412 | } |
||
2413 | } |
||
2414 | |||
2415 | if (DEBUG_EXECUTION) { |
||
2416 | uint i, j; |
||
2417 | for (i = first; i <= last; ++i) { |
||
2418 | debug_printf("IN[%2u] = ", i); |
||
2419 | for (j = 0; j < TGSI_NUM_CHANNELS; j++) { |
||
2420 | if (j > 0) { |
||
2421 | debug_printf(" "); |
||
2422 | } |
||
2423 | debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", |
||
2424 | mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], |
||
2425 | mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], |
||
2426 | mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], |
||
2427 | mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); |
||
2428 | } |
||
2429 | } |
||
2430 | } |
||
2431 | } |
||
2432 | } |
||
2433 | |||
2434 | if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { |
||
2435 | mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; |
||
2436 | } |
||
2437 | } |
||
2438 | |||
2439 | |||
2440 | typedef void (* micro_op)(union tgsi_exec_channel *dst); |
||
2441 | |||
2442 | static void |
||
2443 | exec_vector(struct tgsi_exec_machine *mach, |
||
2444 | const struct tgsi_full_instruction *inst, |
||
2445 | micro_op op, |
||
2446 | enum tgsi_exec_datatype dst_datatype) |
||
2447 | { |
||
2448 | unsigned int chan; |
||
2449 | |||
2450 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2451 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2452 | union tgsi_exec_channel dst; |
||
2453 | |||
2454 | op(&dst); |
||
2455 | store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); |
||
2456 | } |
||
2457 | } |
||
2458 | } |
||
2459 | |||
2460 | typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, |
||
2461 | const union tgsi_exec_channel *src); |
||
2462 | |||
2463 | static void |
||
2464 | exec_scalar_unary(struct tgsi_exec_machine *mach, |
||
2465 | const struct tgsi_full_instruction *inst, |
||
2466 | micro_unary_op op, |
||
2467 | enum tgsi_exec_datatype dst_datatype, |
||
2468 | enum tgsi_exec_datatype src_datatype) |
||
2469 | { |
||
2470 | unsigned int chan; |
||
2471 | union tgsi_exec_channel src; |
||
2472 | union tgsi_exec_channel dst; |
||
2473 | |||
2474 | fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); |
||
2475 | op(&dst, &src); |
||
2476 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2477 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2478 | store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); |
||
2479 | } |
||
2480 | } |
||
2481 | } |
||
2482 | |||
2483 | static void |
||
2484 | exec_vector_unary(struct tgsi_exec_machine *mach, |
||
2485 | const struct tgsi_full_instruction *inst, |
||
2486 | micro_unary_op op, |
||
2487 | enum tgsi_exec_datatype dst_datatype, |
||
2488 | enum tgsi_exec_datatype src_datatype) |
||
2489 | { |
||
2490 | unsigned int chan; |
||
2491 | struct tgsi_exec_vector dst; |
||
2492 | |||
2493 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2494 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2495 | union tgsi_exec_channel src; |
||
2496 | |||
2497 | fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); |
||
2498 | op(&dst.xyzw[chan], &src); |
||
2499 | } |
||
2500 | } |
||
2501 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2502 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2503 | store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); |
||
2504 | } |
||
2505 | } |
||
2506 | } |
||
2507 | |||
2508 | typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, |
||
2509 | const union tgsi_exec_channel *src0, |
||
2510 | const union tgsi_exec_channel *src1); |
||
2511 | |||
2512 | static void |
||
2513 | exec_scalar_binary(struct tgsi_exec_machine *mach, |
||
2514 | const struct tgsi_full_instruction *inst, |
||
2515 | micro_binary_op op, |
||
2516 | enum tgsi_exec_datatype dst_datatype, |
||
2517 | enum tgsi_exec_datatype src_datatype) |
||
2518 | { |
||
2519 | unsigned int chan; |
||
2520 | union tgsi_exec_channel src[2]; |
||
2521 | union tgsi_exec_channel dst; |
||
2522 | |||
2523 | fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); |
||
2524 | fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_Y, src_datatype); |
||
2525 | op(&dst, &src[0], &src[1]); |
||
2526 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2527 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2528 | store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); |
||
2529 | } |
||
2530 | } |
||
2531 | } |
||
2532 | |||
2533 | static void |
||
2534 | exec_vector_binary(struct tgsi_exec_machine *mach, |
||
2535 | const struct tgsi_full_instruction *inst, |
||
2536 | micro_binary_op op, |
||
2537 | enum tgsi_exec_datatype dst_datatype, |
||
2538 | enum tgsi_exec_datatype src_datatype) |
||
2539 | { |
||
2540 | unsigned int chan; |
||
2541 | struct tgsi_exec_vector dst; |
||
2542 | |||
2543 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2544 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2545 | union tgsi_exec_channel src[2]; |
||
2546 | |||
2547 | fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); |
||
2548 | fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); |
||
2549 | op(&dst.xyzw[chan], &src[0], &src[1]); |
||
2550 | } |
||
2551 | } |
||
2552 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2553 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2554 | store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); |
||
2555 | } |
||
2556 | } |
||
2557 | } |
||
2558 | |||
2559 | typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, |
||
2560 | const union tgsi_exec_channel *src0, |
||
2561 | const union tgsi_exec_channel *src1, |
||
2562 | const union tgsi_exec_channel *src2); |
||
2563 | |||
2564 | static void |
||
2565 | exec_vector_trinary(struct tgsi_exec_machine *mach, |
||
2566 | const struct tgsi_full_instruction *inst, |
||
2567 | micro_trinary_op op, |
||
2568 | enum tgsi_exec_datatype dst_datatype, |
||
2569 | enum tgsi_exec_datatype src_datatype) |
||
2570 | { |
||
2571 | unsigned int chan; |
||
2572 | struct tgsi_exec_vector dst; |
||
2573 | |||
2574 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2575 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2576 | union tgsi_exec_channel src[3]; |
||
2577 | |||
2578 | fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); |
||
2579 | fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); |
||
2580 | fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); |
||
2581 | op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); |
||
2582 | } |
||
2583 | } |
||
2584 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2585 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2586 | store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); |
||
2587 | } |
||
2588 | } |
||
2589 | } |
||
2590 | |||
2591 | static void |
||
2592 | exec_dp3(struct tgsi_exec_machine *mach, |
||
2593 | const struct tgsi_full_instruction *inst) |
||
2594 | { |
||
2595 | unsigned int chan; |
||
2596 | union tgsi_exec_channel arg[3]; |
||
2597 | |||
2598 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2599 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2600 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2601 | |||
2602 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { |
||
2603 | fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2604 | fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); |
||
2605 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2606 | } |
||
2607 | |||
2608 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2609 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2610 | store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2611 | } |
||
2612 | } |
||
2613 | } |
||
2614 | |||
2615 | static void |
||
2616 | exec_dp4(struct tgsi_exec_machine *mach, |
||
2617 | const struct tgsi_full_instruction *inst) |
||
2618 | { |
||
2619 | unsigned int chan; |
||
2620 | union tgsi_exec_channel arg[3]; |
||
2621 | |||
2622 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2623 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2624 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2625 | |||
2626 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { |
||
2627 | fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2628 | fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); |
||
2629 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2630 | } |
||
2631 | |||
2632 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2633 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2634 | store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2635 | } |
||
2636 | } |
||
2637 | } |
||
2638 | |||
2639 | static void |
||
2640 | exec_dp2a(struct tgsi_exec_machine *mach, |
||
2641 | const struct tgsi_full_instruction *inst) |
||
2642 | { |
||
2643 | unsigned int chan; |
||
2644 | union tgsi_exec_channel arg[3]; |
||
2645 | |||
2646 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2647 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2648 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2649 | |||
2650 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2651 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2652 | micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); |
||
2653 | |||
2654 | fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2655 | micro_add(&arg[0], &arg[0], &arg[1]); |
||
2656 | |||
2657 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2658 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2659 | store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2660 | } |
||
2661 | } |
||
2662 | } |
||
2663 | |||
2664 | static void |
||
2665 | exec_dph(struct tgsi_exec_machine *mach, |
||
2666 | const struct tgsi_full_instruction *inst) |
||
2667 | { |
||
2668 | unsigned int chan; |
||
2669 | union tgsi_exec_channel arg[3]; |
||
2670 | |||
2671 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2672 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2673 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2674 | |||
2675 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2676 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2677 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2678 | |||
2679 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2680 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2681 | micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); |
||
2682 | |||
2683 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2684 | micro_add(&arg[0], &arg[0], &arg[1]); |
||
2685 | |||
2686 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2687 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2688 | store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2689 | } |
||
2690 | } |
||
2691 | } |
||
2692 | |||
2693 | static void |
||
2694 | exec_dp2(struct tgsi_exec_machine *mach, |
||
2695 | const struct tgsi_full_instruction *inst) |
||
2696 | { |
||
2697 | unsigned int chan; |
||
2698 | union tgsi_exec_channel arg[3]; |
||
2699 | |||
2700 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2701 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2702 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2703 | |||
2704 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2705 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2706 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2707 | |||
2708 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2709 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2710 | store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2711 | } |
||
2712 | } |
||
2713 | } |
||
2714 | |||
2715 | static void |
||
2716 | exec_nrm4(struct tgsi_exec_machine *mach, |
||
2717 | const struct tgsi_full_instruction *inst) |
||
2718 | { |
||
2719 | unsigned int chan; |
||
2720 | union tgsi_exec_channel arg[4]; |
||
2721 | union tgsi_exec_channel scale; |
||
2722 | |||
2723 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2724 | micro_mul(&scale, &arg[0], &arg[0]); |
||
2725 | |||
2726 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { |
||
2727 | union tgsi_exec_channel product; |
||
2728 | |||
2729 | fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2730 | micro_mul(&product, &arg[chan], &arg[chan]); |
||
2731 | micro_add(&scale, &scale, &product); |
||
2732 | } |
||
2733 | |||
2734 | micro_rsq(&scale, &scale); |
||
2735 | |||
2736 | for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) { |
||
2737 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2738 | micro_mul(&arg[chan], &arg[chan], &scale); |
||
2739 | store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2740 | } |
||
2741 | } |
||
2742 | } |
||
2743 | |||
2744 | static void |
||
2745 | exec_nrm3(struct tgsi_exec_machine *mach, |
||
2746 | const struct tgsi_full_instruction *inst) |
||
2747 | { |
||
2748 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { |
||
2749 | unsigned int chan; |
||
2750 | union tgsi_exec_channel arg[3]; |
||
2751 | union tgsi_exec_channel scale; |
||
2752 | |||
2753 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2754 | micro_mul(&scale, &arg[0], &arg[0]); |
||
2755 | |||
2756 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { |
||
2757 | union tgsi_exec_channel product; |
||
2758 | |||
2759 | fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2760 | micro_mul(&product, &arg[chan], &arg[chan]); |
||
2761 | micro_add(&scale, &scale, &product); |
||
2762 | } |
||
2763 | |||
2764 | micro_rsq(&scale, &scale); |
||
2765 | |||
2766 | for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) { |
||
2767 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2768 | micro_mul(&arg[chan], &arg[chan], &scale); |
||
2769 | store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2770 | } |
||
2771 | } |
||
2772 | } |
||
2773 | |||
2774 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2775 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2776 | } |
||
2777 | } |
||
2778 | |||
2779 | static void |
||
2780 | exec_scs(struct tgsi_exec_machine *mach, |
||
2781 | const struct tgsi_full_instruction *inst) |
||
2782 | { |
||
2783 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { |
||
2784 | union tgsi_exec_channel arg; |
||
2785 | union tgsi_exec_channel result; |
||
2786 | |||
2787 | fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2788 | |||
2789 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2790 | micro_cos(&result, &arg); |
||
2791 | store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2792 | } |
||
2793 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2794 | micro_sin(&result, &arg); |
||
2795 | store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2796 | } |
||
2797 | } |
||
2798 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2799 | store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2800 | } |
||
2801 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2802 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2803 | } |
||
2804 | } |
||
2805 | |||
2806 | static void |
||
2807 | exec_x2d(struct tgsi_exec_machine *mach, |
||
2808 | const struct tgsi_full_instruction *inst) |
||
2809 | { |
||
2810 | union tgsi_exec_channel r[4]; |
||
2811 | union tgsi_exec_channel d[2]; |
||
2812 | |||
2813 | fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2814 | fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2815 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) { |
||
2816 | fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2817 | micro_mul(&r[2], &r[2], &r[0]); |
||
2818 | fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2819 | micro_mul(&r[3], &r[3], &r[1]); |
||
2820 | micro_add(&r[2], &r[2], &r[3]); |
||
2821 | fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2822 | micro_add(&d[0], &r[2], &r[3]); |
||
2823 | } |
||
2824 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) { |
||
2825 | fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2826 | micro_mul(&r[2], &r[2], &r[0]); |
||
2827 | fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2828 | micro_mul(&r[3], &r[3], &r[1]); |
||
2829 | micro_add(&r[2], &r[2], &r[3]); |
||
2830 | fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2831 | micro_add(&d[1], &r[2], &r[3]); |
||
2832 | } |
||
2833 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2834 | store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2835 | } |
||
2836 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2837 | store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2838 | } |
||
2839 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2840 | store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2841 | } |
||
2842 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2843 | store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2844 | } |
||
2845 | } |
||
2846 | |||
2847 | static void |
||
2848 | exec_rfl(struct tgsi_exec_machine *mach, |
||
2849 | const struct tgsi_full_instruction *inst) |
||
2850 | { |
||
2851 | union tgsi_exec_channel r[9]; |
||
2852 | |||
2853 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { |
||
2854 | /* r0 = dp3(src0, src0) */ |
||
2855 | fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2856 | micro_mul(&r[0], &r[2], &r[2]); |
||
2857 | fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2858 | micro_mul(&r[8], &r[4], &r[4]); |
||
2859 | micro_add(&r[0], &r[0], &r[8]); |
||
2860 | fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2861 | micro_mul(&r[8], &r[6], &r[6]); |
||
2862 | micro_add(&r[0], &r[0], &r[8]); |
||
2863 | |||
2864 | /* r1 = dp3(src0, src1) */ |
||
2865 | fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2866 | micro_mul(&r[1], &r[2], &r[3]); |
||
2867 | fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2868 | micro_mul(&r[8], &r[4], &r[5]); |
||
2869 | micro_add(&r[1], &r[1], &r[8]); |
||
2870 | fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2871 | micro_mul(&r[8], &r[6], &r[7]); |
||
2872 | micro_add(&r[1], &r[1], &r[8]); |
||
2873 | |||
2874 | /* r1 = 2 * r1 / r0 */ |
||
2875 | micro_add(&r[1], &r[1], &r[1]); |
||
2876 | micro_div(&r[1], &r[1], &r[0]); |
||
2877 | |||
2878 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2879 | micro_mul(&r[2], &r[2], &r[1]); |
||
2880 | micro_sub(&r[2], &r[2], &r[3]); |
||
2881 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2882 | } |
||
2883 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2884 | micro_mul(&r[4], &r[4], &r[1]); |
||
2885 | micro_sub(&r[4], &r[4], &r[5]); |
||
2886 | store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2887 | } |
||
2888 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2889 | micro_mul(&r[6], &r[6], &r[1]); |
||
2890 | micro_sub(&r[6], &r[6], &r[7]); |
||
2891 | store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2892 | } |
||
2893 | } |
||
2894 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2895 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2896 | } |
||
2897 | } |
||
2898 | |||
2899 | static void |
||
2900 | exec_xpd(struct tgsi_exec_machine *mach, |
||
2901 | const struct tgsi_full_instruction *inst) |
||
2902 | { |
||
2903 | union tgsi_exec_channel r[6]; |
||
2904 | union tgsi_exec_channel d[3]; |
||
2905 | |||
2906 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2907 | fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2908 | |||
2909 | micro_mul(&r[2], &r[0], &r[1]); |
||
2910 | |||
2911 | fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2912 | fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2913 | |||
2914 | micro_mul(&r[5], &r[3], &r[4] ); |
||
2915 | micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]); |
||
2916 | |||
2917 | fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2918 | |||
2919 | micro_mul(&r[3], &r[3], &r[2]); |
||
2920 | |||
2921 | fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2922 | |||
2923 | micro_mul(&r[1], &r[1], &r[5]); |
||
2924 | micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]); |
||
2925 | |||
2926 | micro_mul(&r[5], &r[5], &r[4]); |
||
2927 | micro_mul(&r[0], &r[0], &r[2]); |
||
2928 | micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]); |
||
2929 | |||
2930 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2931 | store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2932 | } |
||
2933 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2934 | store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2935 | } |
||
2936 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2937 | store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2938 | } |
||
2939 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2940 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2941 | } |
||
2942 | } |
||
2943 | |||
2944 | static void |
||
2945 | exec_dst(struct tgsi_exec_machine *mach, |
||
2946 | const struct tgsi_full_instruction *inst) |
||
2947 | { |
||
2948 | union tgsi_exec_channel r[2]; |
||
2949 | union tgsi_exec_channel d[4]; |
||
2950 | |||
2951 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2952 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2953 | fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2954 | micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); |
||
2955 | } |
||
2956 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2957 | fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2958 | } |
||
2959 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2960 | fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2961 | } |
||
2962 | |||
2963 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2964 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2965 | } |
||
2966 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2967 | store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2968 | } |
||
2969 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2970 | store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2971 | } |
||
2972 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2973 | store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2974 | } |
||
2975 | } |
||
2976 | |||
2977 | static void |
||
2978 | exec_log(struct tgsi_exec_machine *mach, |
||
2979 | const struct tgsi_full_instruction *inst) |
||
2980 | { |
||
2981 | union tgsi_exec_channel r[3]; |
||
2982 | |||
2983 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2984 | micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ |
||
2985 | micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ |
||
2986 | micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ |
||
2987 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2988 | store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2989 | } |
||
2990 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2991 | micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ |
||
2992 | micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ |
||
2993 | store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2994 | } |
||
2995 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2996 | store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2997 | } |
||
2998 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2999 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
3000 | } |
||
3001 | } |
||
3002 | |||
3003 | static void |
||
3004 | exec_exp(struct tgsi_exec_machine *mach, |
||
3005 | const struct tgsi_full_instruction *inst) |
||
3006 | { |
||
3007 | union tgsi_exec_channel r[3]; |
||
3008 | |||
3009 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3010 | micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ |
||
3011 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
3012 | micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ |
||
3013 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3014 | } |
||
3015 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
3016 | micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ |
||
3017 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
3018 | } |
||
3019 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
3020 | micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ |
||
3021 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
3022 | } |
||
3023 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
3024 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
3025 | } |
||
3026 | } |
||
3027 | |||
3028 | static void |
||
3029 | exec_lit(struct tgsi_exec_machine *mach, |
||
3030 | const struct tgsi_full_instruction *inst) |
||
3031 | { |
||
3032 | union tgsi_exec_channel r[3]; |
||
3033 | union tgsi_exec_channel d[3]; |
||
3034 | |||
3035 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { |
||
3036 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3037 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
3038 | fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
3039 | micro_max(&r[1], &r[1], &ZeroVec); |
||
3040 | |||
3041 | fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
3042 | micro_min(&r[2], &r[2], &P128Vec); |
||
3043 | micro_max(&r[2], &r[2], &M128Vec); |
||
3044 | micro_pow(&r[1], &r[1], &r[2]); |
||
3045 | micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); |
||
3046 | store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
3047 | } |
||
3048 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
3049 | micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); |
||
3050 | store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
3051 | } |
||
3052 | } |
||
3053 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
3054 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3055 | } |
||
3056 | |||
3057 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
3058 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
3059 | } |
||
3060 | } |
||
3061 | |||
3062 | static void |
||
3063 | exec_break(struct tgsi_exec_machine *mach) |
||
3064 | { |
||
3065 | if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { |
||
3066 | /* turn off loop channels for each enabled exec channel */ |
||
3067 | mach->LoopMask &= ~mach->ExecMask; |
||
3068 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
||
3069 | UPDATE_EXEC_MASK(mach); |
||
3070 | } else { |
||
3071 | assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); |
||
3072 | |||
3073 | mach->Switch.mask = 0x0; |
||
3074 | |||
3075 | UPDATE_EXEC_MASK(mach); |
||
3076 | } |
||
3077 | } |
||
3078 | |||
3079 | static void |
||
3080 | exec_switch(struct tgsi_exec_machine *mach, |
||
3081 | const struct tgsi_full_instruction *inst) |
||
3082 | { |
||
3083 | assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); |
||
3084 | assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); |
||
3085 | |||
3086 | mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; |
||
3087 | fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); |
||
3088 | mach->Switch.mask = 0x0; |
||
3089 | mach->Switch.defaultMask = 0x0; |
||
3090 | |||
3091 | mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; |
||
3092 | mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; |
||
3093 | |||
3094 | UPDATE_EXEC_MASK(mach); |
||
3095 | } |
||
3096 | |||
3097 | static void |
||
3098 | exec_case(struct tgsi_exec_machine *mach, |
||
3099 | const struct tgsi_full_instruction *inst) |
||
3100 | { |
||
3101 | uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; |
||
3102 | union tgsi_exec_channel src; |
||
3103 | uint mask = 0; |
||
3104 | |||
3105 | fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); |
||
3106 | |||
3107 | if (mach->Switch.selector.u[0] == src.u[0]) { |
||
3108 | mask |= 0x1; |
||
3109 | } |
||
3110 | if (mach->Switch.selector.u[1] == src.u[1]) { |
||
3111 | mask |= 0x2; |
||
3112 | } |
||
3113 | if (mach->Switch.selector.u[2] == src.u[2]) { |
||
3114 | mask |= 0x4; |
||
3115 | } |
||
3116 | if (mach->Switch.selector.u[3] == src.u[3]) { |
||
3117 | mask |= 0x8; |
||
3118 | } |
||
3119 | |||
3120 | mach->Switch.defaultMask |= mask; |
||
3121 | |||
3122 | mach->Switch.mask |= mask & prevMask; |
||
3123 | |||
3124 | UPDATE_EXEC_MASK(mach); |
||
3125 | } |
||
3126 | |||
3127 | /* FIXME: this will only work if default is last */ |
||
3128 | static void |
||
3129 | exec_default(struct tgsi_exec_machine *mach) |
||
3130 | { |
||
3131 | uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; |
||
3132 | |||
3133 | mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; |
||
3134 | |||
3135 | UPDATE_EXEC_MASK(mach); |
||
3136 | } |
||
3137 | |||
3138 | static void |
||
3139 | exec_endswitch(struct tgsi_exec_machine *mach) |
||
3140 | { |
||
3141 | mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; |
||
3142 | mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; |
||
3143 | |||
3144 | UPDATE_EXEC_MASK(mach); |
||
3145 | } |
||
3146 | |||
3147 | static void |
||
3148 | micro_i2f(union tgsi_exec_channel *dst, |
||
3149 | const union tgsi_exec_channel *src) |
||
3150 | { |
||
3151 | dst->f[0] = (float)src->i[0]; |
||
3152 | dst->f[1] = (float)src->i[1]; |
||
3153 | dst->f[2] = (float)src->i[2]; |
||
3154 | dst->f[3] = (float)src->i[3]; |
||
3155 | } |
||
3156 | |||
3157 | static void |
||
3158 | micro_not(union tgsi_exec_channel *dst, |
||
3159 | const union tgsi_exec_channel *src) |
||
3160 | { |
||
3161 | dst->u[0] = ~src->u[0]; |
||
3162 | dst->u[1] = ~src->u[1]; |
||
3163 | dst->u[2] = ~src->u[2]; |
||
3164 | dst->u[3] = ~src->u[3]; |
||
3165 | } |
||
3166 | |||
3167 | static void |
||
3168 | micro_shl(union tgsi_exec_channel *dst, |
||
3169 | const union tgsi_exec_channel *src0, |
||
3170 | const union tgsi_exec_channel *src1) |
||
3171 | { |
||
3172 | dst->u[0] = src0->u[0] << src1->u[0]; |
||
3173 | dst->u[1] = src0->u[1] << src1->u[1]; |
||
3174 | dst->u[2] = src0->u[2] << src1->u[2]; |
||
3175 | dst->u[3] = src0->u[3] << src1->u[3]; |
||
3176 | } |
||
3177 | |||
3178 | static void |
||
3179 | micro_and(union tgsi_exec_channel *dst, |
||
3180 | const union tgsi_exec_channel *src0, |
||
3181 | const union tgsi_exec_channel *src1) |
||
3182 | { |
||
3183 | dst->u[0] = src0->u[0] & src1->u[0]; |
||
3184 | dst->u[1] = src0->u[1] & src1->u[1]; |
||
3185 | dst->u[2] = src0->u[2] & src1->u[2]; |
||
3186 | dst->u[3] = src0->u[3] & src1->u[3]; |
||
3187 | } |
||
3188 | |||
3189 | static void |
||
3190 | micro_or(union tgsi_exec_channel *dst, |
||
3191 | const union tgsi_exec_channel *src0, |
||
3192 | const union tgsi_exec_channel *src1) |
||
3193 | { |
||
3194 | dst->u[0] = src0->u[0] | src1->u[0]; |
||
3195 | dst->u[1] = src0->u[1] | src1->u[1]; |
||
3196 | dst->u[2] = src0->u[2] | src1->u[2]; |
||
3197 | dst->u[3] = src0->u[3] | src1->u[3]; |
||
3198 | } |
||
3199 | |||
3200 | static void |
||
3201 | micro_xor(union tgsi_exec_channel *dst, |
||
3202 | const union tgsi_exec_channel *src0, |
||
3203 | const union tgsi_exec_channel *src1) |
||
3204 | { |
||
3205 | dst->u[0] = src0->u[0] ^ src1->u[0]; |
||
3206 | dst->u[1] = src0->u[1] ^ src1->u[1]; |
||
3207 | dst->u[2] = src0->u[2] ^ src1->u[2]; |
||
3208 | dst->u[3] = src0->u[3] ^ src1->u[3]; |
||
3209 | } |
||
3210 | |||
3211 | static void |
||
3212 | micro_mod(union tgsi_exec_channel *dst, |
||
3213 | const union tgsi_exec_channel *src0, |
||
3214 | const union tgsi_exec_channel *src1) |
||
3215 | { |
||
3216 | dst->i[0] = src0->i[0] % src1->i[0]; |
||
3217 | dst->i[1] = src0->i[1] % src1->i[1]; |
||
3218 | dst->i[2] = src0->i[2] % src1->i[2]; |
||
3219 | dst->i[3] = src0->i[3] % src1->i[3]; |
||
3220 | } |
||
3221 | |||
3222 | static void |
||
3223 | micro_f2i(union tgsi_exec_channel *dst, |
||
3224 | const union tgsi_exec_channel *src) |
||
3225 | { |
||
3226 | dst->i[0] = (int)src->f[0]; |
||
3227 | dst->i[1] = (int)src->f[1]; |
||
3228 | dst->i[2] = (int)src->f[2]; |
||
3229 | dst->i[3] = (int)src->f[3]; |
||
3230 | } |
||
3231 | |||
3232 | static void |
||
3233 | micro_idiv(union tgsi_exec_channel *dst, |
||
3234 | const union tgsi_exec_channel *src0, |
||
3235 | const union tgsi_exec_channel *src1) |
||
3236 | { |
||
3237 | dst->i[0] = src0->i[0] / src1->i[0]; |
||
3238 | dst->i[1] = src0->i[1] / src1->i[1]; |
||
3239 | dst->i[2] = src0->i[2] / src1->i[2]; |
||
3240 | dst->i[3] = src0->i[3] / src1->i[3]; |
||
3241 | } |
||
3242 | |||
3243 | static void |
||
3244 | micro_imax(union tgsi_exec_channel *dst, |
||
3245 | const union tgsi_exec_channel *src0, |
||
3246 | const union tgsi_exec_channel *src1) |
||
3247 | { |
||
3248 | dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; |
||
3249 | dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; |
||
3250 | dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; |
||
3251 | dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; |
||
3252 | } |
||
3253 | |||
3254 | static void |
||
3255 | micro_imin(union tgsi_exec_channel *dst, |
||
3256 | const union tgsi_exec_channel *src0, |
||
3257 | const union tgsi_exec_channel *src1) |
||
3258 | { |
||
3259 | dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; |
||
3260 | dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; |
||
3261 | dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; |
||
3262 | dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; |
||
3263 | } |
||
3264 | |||
3265 | static void |
||
3266 | micro_isge(union tgsi_exec_channel *dst, |
||
3267 | const union tgsi_exec_channel *src0, |
||
3268 | const union tgsi_exec_channel *src1) |
||
3269 | { |
||
3270 | dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; |
||
3271 | dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; |
||
3272 | dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; |
||
3273 | dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; |
||
3274 | } |
||
3275 | |||
3276 | static void |
||
3277 | micro_ishr(union tgsi_exec_channel *dst, |
||
3278 | const union tgsi_exec_channel *src0, |
||
3279 | const union tgsi_exec_channel *src1) |
||
3280 | { |
||
3281 | dst->i[0] = src0->i[0] >> src1->i[0]; |
||
3282 | dst->i[1] = src0->i[1] >> src1->i[1]; |
||
3283 | dst->i[2] = src0->i[2] >> src1->i[2]; |
||
3284 | dst->i[3] = src0->i[3] >> src1->i[3]; |
||
3285 | } |
||
3286 | |||
3287 | static void |
||
3288 | micro_islt(union tgsi_exec_channel *dst, |
||
3289 | const union tgsi_exec_channel *src0, |
||
3290 | const union tgsi_exec_channel *src1) |
||
3291 | { |
||
3292 | dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; |
||
3293 | dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; |
||
3294 | dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; |
||
3295 | dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; |
||
3296 | } |
||
3297 | |||
3298 | static void |
||
3299 | micro_f2u(union tgsi_exec_channel *dst, |
||
3300 | const union tgsi_exec_channel *src) |
||
3301 | { |
||
3302 | dst->u[0] = (uint)src->f[0]; |
||
3303 | dst->u[1] = (uint)src->f[1]; |
||
3304 | dst->u[2] = (uint)src->f[2]; |
||
3305 | dst->u[3] = (uint)src->f[3]; |
||
3306 | } |
||
3307 | |||
3308 | static void |
||
3309 | micro_u2f(union tgsi_exec_channel *dst, |
||
3310 | const union tgsi_exec_channel *src) |
||
3311 | { |
||
3312 | dst->f[0] = (float)src->u[0]; |
||
3313 | dst->f[1] = (float)src->u[1]; |
||
3314 | dst->f[2] = (float)src->u[2]; |
||
3315 | dst->f[3] = (float)src->u[3]; |
||
3316 | } |
||
3317 | |||
3318 | static void |
||
3319 | micro_uadd(union tgsi_exec_channel *dst, |
||
3320 | const union tgsi_exec_channel *src0, |
||
3321 | const union tgsi_exec_channel *src1) |
||
3322 | { |
||
3323 | dst->u[0] = src0->u[0] + src1->u[0]; |
||
3324 | dst->u[1] = src0->u[1] + src1->u[1]; |
||
3325 | dst->u[2] = src0->u[2] + src1->u[2]; |
||
3326 | dst->u[3] = src0->u[3] + src1->u[3]; |
||
3327 | } |
||
3328 | |||
3329 | static void |
||
3330 | micro_udiv(union tgsi_exec_channel *dst, |
||
3331 | const union tgsi_exec_channel *src0, |
||
3332 | const union tgsi_exec_channel *src1) |
||
3333 | { |
||
3334 | dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; |
||
3335 | dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; |
||
3336 | dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; |
||
3337 | dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; |
||
3338 | } |
||
3339 | |||
3340 | static void |
||
3341 | micro_umad(union tgsi_exec_channel *dst, |
||
3342 | const union tgsi_exec_channel *src0, |
||
3343 | const union tgsi_exec_channel *src1, |
||
3344 | const union tgsi_exec_channel *src2) |
||
3345 | { |
||
3346 | dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; |
||
3347 | dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; |
||
3348 | dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; |
||
3349 | dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; |
||
3350 | } |
||
3351 | |||
3352 | static void |
||
3353 | micro_umax(union tgsi_exec_channel *dst, |
||
3354 | const union tgsi_exec_channel *src0, |
||
3355 | const union tgsi_exec_channel *src1) |
||
3356 | { |
||
3357 | dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; |
||
3358 | dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; |
||
3359 | dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; |
||
3360 | dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; |
||
3361 | } |
||
3362 | |||
3363 | static void |
||
3364 | micro_umin(union tgsi_exec_channel *dst, |
||
3365 | const union tgsi_exec_channel *src0, |
||
3366 | const union tgsi_exec_channel *src1) |
||
3367 | { |
||
3368 | dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; |
||
3369 | dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; |
||
3370 | dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; |
||
3371 | dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; |
||
3372 | } |
||
3373 | |||
3374 | static void |
||
3375 | micro_umod(union tgsi_exec_channel *dst, |
||
3376 | const union tgsi_exec_channel *src0, |
||
3377 | const union tgsi_exec_channel *src1) |
||
3378 | { |
||
3379 | dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; |
||
3380 | dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; |
||
3381 | dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; |
||
3382 | dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; |
||
3383 | } |
||
3384 | |||
3385 | static void |
||
3386 | micro_umul(union tgsi_exec_channel *dst, |
||
3387 | const union tgsi_exec_channel *src0, |
||
3388 | const union tgsi_exec_channel *src1) |
||
3389 | { |
||
3390 | dst->u[0] = src0->u[0] * src1->u[0]; |
||
3391 | dst->u[1] = src0->u[1] * src1->u[1]; |
||
3392 | dst->u[2] = src0->u[2] * src1->u[2]; |
||
3393 | dst->u[3] = src0->u[3] * src1->u[3]; |
||
3394 | } |
||
3395 | |||
3396 | static void |
||
3397 | micro_useq(union tgsi_exec_channel *dst, |
||
3398 | const union tgsi_exec_channel *src0, |
||
3399 | const union tgsi_exec_channel *src1) |
||
3400 | { |
||
3401 | dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; |
||
3402 | dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; |
||
3403 | dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; |
||
3404 | dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; |
||
3405 | } |
||
3406 | |||
3407 | static void |
||
3408 | micro_usge(union tgsi_exec_channel *dst, |
||
3409 | const union tgsi_exec_channel *src0, |
||
3410 | const union tgsi_exec_channel *src1) |
||
3411 | { |
||
3412 | dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; |
||
3413 | dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; |
||
3414 | dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; |
||
3415 | dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; |
||
3416 | } |
||
3417 | |||
3418 | static void |
||
3419 | micro_ushr(union tgsi_exec_channel *dst, |
||
3420 | const union tgsi_exec_channel *src0, |
||
3421 | const union tgsi_exec_channel *src1) |
||
3422 | { |
||
3423 | dst->u[0] = src0->u[0] >> src1->u[0]; |
||
3424 | dst->u[1] = src0->u[1] >> src1->u[1]; |
||
3425 | dst->u[2] = src0->u[2] >> src1->u[2]; |
||
3426 | dst->u[3] = src0->u[3] >> src1->u[3]; |
||
3427 | } |
||
3428 | |||
3429 | static void |
||
3430 | micro_uslt(union tgsi_exec_channel *dst, |
||
3431 | const union tgsi_exec_channel *src0, |
||
3432 | const union tgsi_exec_channel *src1) |
||
3433 | { |
||
3434 | dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; |
||
3435 | dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; |
||
3436 | dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; |
||
3437 | dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; |
||
3438 | } |
||
3439 | |||
3440 | static void |
||
3441 | micro_usne(union tgsi_exec_channel *dst, |
||
3442 | const union tgsi_exec_channel *src0, |
||
3443 | const union tgsi_exec_channel *src1) |
||
3444 | { |
||
3445 | dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; |
||
3446 | dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; |
||
3447 | dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; |
||
3448 | dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; |
||
3449 | } |
||
3450 | |||
3451 | static void |
||
3452 | micro_uarl(union tgsi_exec_channel *dst, |
||
3453 | const union tgsi_exec_channel *src) |
||
3454 | { |
||
3455 | dst->i[0] = src->u[0]; |
||
3456 | dst->i[1] = src->u[1]; |
||
3457 | dst->i[2] = src->u[2]; |
||
3458 | dst->i[3] = src->u[3]; |
||
3459 | } |
||
3460 | |||
3461 | static void |
||
3462 | micro_ucmp(union tgsi_exec_channel *dst, |
||
3463 | const union tgsi_exec_channel *src0, |
||
3464 | const union tgsi_exec_channel *src1, |
||
3465 | const union tgsi_exec_channel *src2) |
||
3466 | { |
||
3467 | dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; |
||
3468 | dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; |
||
3469 | dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; |
||
3470 | dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; |
||
3471 | } |
||
3472 | |||
3473 | static void |
||
3474 | exec_instruction( |
||
3475 | struct tgsi_exec_machine *mach, |
||
3476 | const struct tgsi_full_instruction *inst, |
||
3477 | int *pc ) |
||
3478 | { |
||
3479 | union tgsi_exec_channel r[10]; |
||
3480 | |||
3481 | (*pc)++; |
||
3482 | |||
3483 | switch (inst->Instruction.Opcode) { |
||
3484 | case TGSI_OPCODE_ARL: |
||
3485 | exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); |
||
3486 | break; |
||
3487 | |||
3488 | case TGSI_OPCODE_MOV: |
||
3489 | exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); |
||
3490 | break; |
||
3491 | |||
3492 | case TGSI_OPCODE_LIT: |
||
3493 | exec_lit(mach, inst); |
||
3494 | break; |
||
3495 | |||
3496 | case TGSI_OPCODE_RCP: |
||
3497 | exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3498 | break; |
||
3499 | |||
3500 | case TGSI_OPCODE_RSQ: |
||
3501 | exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3502 | break; |
||
3503 | |||
3504 | case TGSI_OPCODE_EXP: |
||
3505 | exec_exp(mach, inst); |
||
3506 | break; |
||
3507 | |||
3508 | case TGSI_OPCODE_LOG: |
||
3509 | exec_log(mach, inst); |
||
3510 | break; |
||
3511 | |||
3512 | case TGSI_OPCODE_MUL: |
||
3513 | exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3514 | break; |
||
3515 | |||
3516 | case TGSI_OPCODE_ADD: |
||
3517 | exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3518 | break; |
||
3519 | |||
3520 | case TGSI_OPCODE_DP3: |
||
3521 | exec_dp3(mach, inst); |
||
3522 | break; |
||
3523 | |||
3524 | case TGSI_OPCODE_DP4: |
||
3525 | exec_dp4(mach, inst); |
||
3526 | break; |
||
3527 | |||
3528 | case TGSI_OPCODE_DST: |
||
3529 | exec_dst(mach, inst); |
||
3530 | break; |
||
3531 | |||
3532 | case TGSI_OPCODE_MIN: |
||
3533 | exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3534 | break; |
||
3535 | |||
3536 | case TGSI_OPCODE_MAX: |
||
3537 | exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3538 | break; |
||
3539 | |||
3540 | case TGSI_OPCODE_SLT: |
||
3541 | exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3542 | break; |
||
3543 | |||
3544 | case TGSI_OPCODE_SGE: |
||
3545 | exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3546 | break; |
||
3547 | |||
3548 | case TGSI_OPCODE_MAD: |
||
3549 | exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3550 | break; |
||
3551 | |||
3552 | case TGSI_OPCODE_SUB: |
||
3553 | exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3554 | break; |
||
3555 | |||
3556 | case TGSI_OPCODE_LRP: |
||
3557 | exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3558 | break; |
||
3559 | |||
3560 | case TGSI_OPCODE_CND: |
||
3561 | exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3562 | break; |
||
3563 | |||
3564 | case TGSI_OPCODE_SQRT: |
||
3565 | exec_vector_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3566 | break; |
||
3567 | |||
3568 | case TGSI_OPCODE_DP2A: |
||
3569 | exec_dp2a(mach, inst); |
||
3570 | break; |
||
3571 | |||
3572 | case TGSI_OPCODE_FRC: |
||
3573 | exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3574 | break; |
||
3575 | |||
3576 | case TGSI_OPCODE_CLAMP: |
||
3577 | exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3578 | break; |
||
3579 | |||
3580 | case TGSI_OPCODE_FLR: |
||
3581 | exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3582 | break; |
||
3583 | |||
3584 | case TGSI_OPCODE_ROUND: |
||
3585 | exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3586 | break; |
||
3587 | |||
3588 | case TGSI_OPCODE_EX2: |
||
3589 | exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3590 | break; |
||
3591 | |||
3592 | case TGSI_OPCODE_LG2: |
||
3593 | exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3594 | break; |
||
3595 | |||
3596 | case TGSI_OPCODE_POW: |
||
3597 | exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3598 | break; |
||
3599 | |||
3600 | case TGSI_OPCODE_XPD: |
||
3601 | exec_xpd(mach, inst); |
||
3602 | break; |
||
3603 | |||
3604 | case TGSI_OPCODE_ABS: |
||
3605 | exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3606 | break; |
||
3607 | |||
3608 | case TGSI_OPCODE_RCC: |
||
3609 | exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3610 | break; |
||
3611 | |||
3612 | case TGSI_OPCODE_DPH: |
||
3613 | exec_dph(mach, inst); |
||
3614 | break; |
||
3615 | |||
3616 | case TGSI_OPCODE_COS: |
||
3617 | exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3618 | break; |
||
3619 | |||
3620 | case TGSI_OPCODE_DDX: |
||
3621 | exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3622 | break; |
||
3623 | |||
3624 | case TGSI_OPCODE_DDY: |
||
3625 | exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3626 | break; |
||
3627 | |||
3628 | case TGSI_OPCODE_KILP: |
||
3629 | exec_kilp (mach, inst); |
||
3630 | break; |
||
3631 | |||
3632 | case TGSI_OPCODE_KIL: |
||
3633 | exec_kil (mach, inst); |
||
3634 | break; |
||
3635 | |||
3636 | case TGSI_OPCODE_PK2H: |
||
3637 | assert (0); |
||
3638 | break; |
||
3639 | |||
3640 | case TGSI_OPCODE_PK2US: |
||
3641 | assert (0); |
||
3642 | break; |
||
3643 | |||
3644 | case TGSI_OPCODE_PK4B: |
||
3645 | assert (0); |
||
3646 | break; |
||
3647 | |||
3648 | case TGSI_OPCODE_PK4UB: |
||
3649 | assert (0); |
||
3650 | break; |
||
3651 | |||
3652 | case TGSI_OPCODE_RFL: |
||
3653 | exec_rfl(mach, inst); |
||
3654 | break; |
||
3655 | |||
3656 | case TGSI_OPCODE_SEQ: |
||
3657 | exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3658 | break; |
||
3659 | |||
3660 | case TGSI_OPCODE_SFL: |
||
3661 | exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT); |
||
3662 | break; |
||
3663 | |||
3664 | case TGSI_OPCODE_SGT: |
||
3665 | exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3666 | break; |
||
3667 | |||
3668 | case TGSI_OPCODE_SIN: |
||
3669 | exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3670 | break; |
||
3671 | |||
3672 | case TGSI_OPCODE_SLE: |
||
3673 | exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3674 | break; |
||
3675 | |||
3676 | case TGSI_OPCODE_SNE: |
||
3677 | exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3678 | break; |
||
3679 | |||
3680 | case TGSI_OPCODE_STR: |
||
3681 | exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT); |
||
3682 | break; |
||
3683 | |||
3684 | case TGSI_OPCODE_TEX: |
||
3685 | /* simple texture lookup */ |
||
3686 | /* src[0] = texcoord */ |
||
3687 | /* src[1] = sampler unit */ |
||
3688 | exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); |
||
3689 | break; |
||
3690 | |||
3691 | case TGSI_OPCODE_TXB: |
||
3692 | /* Texture lookup with lod bias */ |
||
3693 | /* src[0] = texcoord (src[0].w = LOD bias) */ |
||
3694 | /* src[1] = sampler unit */ |
||
3695 | exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); |
||
3696 | break; |
||
3697 | |||
3698 | case TGSI_OPCODE_TXD: |
||
3699 | /* Texture lookup with explict partial derivatives */ |
||
3700 | /* src[0] = texcoord */ |
||
3701 | /* src[1] = d[strq]/dx */ |
||
3702 | /* src[2] = d[strq]/dy */ |
||
3703 | /* src[3] = sampler unit */ |
||
3704 | exec_txd(mach, inst); |
||
3705 | break; |
||
3706 | |||
3707 | case TGSI_OPCODE_TXL: |
||
3708 | /* Texture lookup with explit LOD */ |
||
3709 | /* src[0] = texcoord (src[0].w = LOD) */ |
||
3710 | /* src[1] = sampler unit */ |
||
3711 | exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); |
||
3712 | break; |
||
3713 | |||
3714 | case TGSI_OPCODE_TXP: |
||
3715 | /* Texture lookup with projection */ |
||
3716 | /* src[0] = texcoord (src[0].w = projection) */ |
||
3717 | /* src[1] = sampler unit */ |
||
3718 | exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); |
||
3719 | break; |
||
3720 | |||
3721 | case TGSI_OPCODE_UP2H: |
||
3722 | assert (0); |
||
3723 | break; |
||
3724 | |||
3725 | case TGSI_OPCODE_UP2US: |
||
3726 | assert (0); |
||
3727 | break; |
||
3728 | |||
3729 | case TGSI_OPCODE_UP4B: |
||
3730 | assert (0); |
||
3731 | break; |
||
3732 | |||
3733 | case TGSI_OPCODE_UP4UB: |
||
3734 | assert (0); |
||
3735 | break; |
||
3736 | |||
3737 | case TGSI_OPCODE_X2D: |
||
3738 | exec_x2d(mach, inst); |
||
3739 | break; |
||
3740 | |||
3741 | case TGSI_OPCODE_ARA: |
||
3742 | assert (0); |
||
3743 | break; |
||
3744 | |||
3745 | case TGSI_OPCODE_ARR: |
||
3746 | exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); |
||
3747 | break; |
||
3748 | |||
3749 | case TGSI_OPCODE_BRA: |
||
3750 | assert (0); |
||
3751 | break; |
||
3752 | |||
3753 | case TGSI_OPCODE_CAL: |
||
3754 | /* skip the call if no execution channels are enabled */ |
||
3755 | if (mach->ExecMask) { |
||
3756 | /* do the call */ |
||
3757 | |||
3758 | /* First, record the depths of the execution stacks. |
||
3759 | * This is important for deeply nested/looped return statements. |
||
3760 | * We have to unwind the stacks by the correct amount. For a |
||
3761 | * real code generator, we could determine the number of entries |
||
3762 | * to pop off each stack with simple static analysis and avoid |
||
3763 | * implementing this data structure at run time. |
||
3764 | */ |
||
3765 | mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; |
||
3766 | mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; |
||
3767 | mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; |
||
3768 | mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; |
||
3769 | mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; |
||
3770 | /* note that PC was already incremented above */ |
||
3771 | mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; |
||
3772 | |||
3773 | mach->CallStackTop++; |
||
3774 | |||
3775 | /* Second, push the Cond, Loop, Cont, Func stacks */ |
||
3776 | assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
||
3777 | assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
3778 | assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
3779 | assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); |
||
3780 | assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); |
||
3781 | assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); |
||
3782 | |||
3783 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
||
3784 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; |
||
3785 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; |
||
3786 | mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; |
||
3787 | mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; |
||
3788 | mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; |
||
3789 | |||
3790 | /* Finally, jump to the subroutine */ |
||
3791 | *pc = inst->Label.Label; |
||
3792 | } |
||
3793 | break; |
||
3794 | |||
3795 | case TGSI_OPCODE_RET: |
||
3796 | mach->FuncMask &= ~mach->ExecMask; |
||
3797 | UPDATE_EXEC_MASK(mach); |
||
3798 | |||
3799 | if (mach->FuncMask == 0x0) { |
||
3800 | /* really return now (otherwise, keep executing */ |
||
3801 | |||
3802 | if (mach->CallStackTop == 0) { |
||
3803 | /* returning from main() */ |
||
3804 | mach->CondStackTop = 0; |
||
3805 | mach->LoopStackTop = 0; |
||
3806 | *pc = -1; |
||
3807 | return; |
||
3808 | } |
||
3809 | |||
3810 | assert(mach->CallStackTop > 0); |
||
3811 | mach->CallStackTop--; |
||
3812 | |||
3813 | mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; |
||
3814 | mach->CondMask = mach->CondStack[mach->CondStackTop]; |
||
3815 | |||
3816 | mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; |
||
3817 | mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; |
||
3818 | |||
3819 | mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; |
||
3820 | mach->ContMask = mach->ContStack[mach->ContStackTop]; |
||
3821 | |||
3822 | mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; |
||
3823 | mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; |
||
3824 | |||
3825 | mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; |
||
3826 | mach->BreakType = mach->BreakStack[mach->BreakStackTop]; |
||
3827 | |||
3828 | assert(mach->FuncStackTop > 0); |
||
3829 | mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; |
||
3830 | |||
3831 | *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; |
||
3832 | |||
3833 | UPDATE_EXEC_MASK(mach); |
||
3834 | } |
||
3835 | break; |
||
3836 | |||
3837 | case TGSI_OPCODE_SSG: |
||
3838 | exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3839 | break; |
||
3840 | |||
3841 | case TGSI_OPCODE_CMP: |
||
3842 | exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3843 | break; |
||
3844 | |||
3845 | case TGSI_OPCODE_SCS: |
||
3846 | exec_scs(mach, inst); |
||
3847 | break; |
||
3848 | |||
3849 | case TGSI_OPCODE_NRM: |
||
3850 | exec_nrm3(mach, inst); |
||
3851 | break; |
||
3852 | |||
3853 | case TGSI_OPCODE_NRM4: |
||
3854 | exec_nrm4(mach, inst); |
||
3855 | break; |
||
3856 | |||
3857 | case TGSI_OPCODE_DIV: |
||
3858 | exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3859 | break; |
||
3860 | |||
3861 | case TGSI_OPCODE_DP2: |
||
3862 | exec_dp2(mach, inst); |
||
3863 | break; |
||
3864 | |||
3865 | case TGSI_OPCODE_IF: |
||
3866 | /* push CondMask */ |
||
3867 | assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
||
3868 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
||
3869 | FETCH( &r[0], 0, TGSI_CHAN_X ); |
||
3870 | /* update CondMask */ |
||
3871 | if( ! r[0].f[0] ) { |
||
3872 | mach->CondMask &= ~0x1; |
||
3873 | } |
||
3874 | if( ! r[0].f[1] ) { |
||
3875 | mach->CondMask &= ~0x2; |
||
3876 | } |
||
3877 | if( ! r[0].f[2] ) { |
||
3878 | mach->CondMask &= ~0x4; |
||
3879 | } |
||
3880 | if( ! r[0].f[3] ) { |
||
3881 | mach->CondMask &= ~0x8; |
||
3882 | } |
||
3883 | UPDATE_EXEC_MASK(mach); |
||
3884 | /* Todo: If CondMask==0, jump to ELSE */ |
||
3885 | break; |
||
3886 | |||
3887 | case TGSI_OPCODE_UIF: |
||
3888 | /* push CondMask */ |
||
3889 | assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
||
3890 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
||
3891 | IFETCH( &r[0], 0, TGSI_CHAN_X ); |
||
3892 | /* update CondMask */ |
||
3893 | if( ! r[0].u[0] ) { |
||
3894 | mach->CondMask &= ~0x1; |
||
3895 | } |
||
3896 | if( ! r[0].u[1] ) { |
||
3897 | mach->CondMask &= ~0x2; |
||
3898 | } |
||
3899 | if( ! r[0].u[2] ) { |
||
3900 | mach->CondMask &= ~0x4; |
||
3901 | } |
||
3902 | if( ! r[0].u[3] ) { |
||
3903 | mach->CondMask &= ~0x8; |
||
3904 | } |
||
3905 | UPDATE_EXEC_MASK(mach); |
||
3906 | /* Todo: If CondMask==0, jump to ELSE */ |
||
3907 | break; |
||
3908 | |||
3909 | case TGSI_OPCODE_ELSE: |
||
3910 | /* invert CondMask wrt previous mask */ |
||
3911 | { |
||
3912 | uint prevMask; |
||
3913 | assert(mach->CondStackTop > 0); |
||
3914 | prevMask = mach->CondStack[mach->CondStackTop - 1]; |
||
3915 | mach->CondMask = ~mach->CondMask & prevMask; |
||
3916 | UPDATE_EXEC_MASK(mach); |
||
3917 | /* Todo: If CondMask==0, jump to ENDIF */ |
||
3918 | } |
||
3919 | break; |
||
3920 | |||
3921 | case TGSI_OPCODE_ENDIF: |
||
3922 | /* pop CondMask */ |
||
3923 | assert(mach->CondStackTop > 0); |
||
3924 | mach->CondMask = mach->CondStack[--mach->CondStackTop]; |
||
3925 | UPDATE_EXEC_MASK(mach); |
||
3926 | break; |
||
3927 | |||
3928 | case TGSI_OPCODE_END: |
||
3929 | /* make sure we end primitives which haven't |
||
3930 | * been explicitly emitted */ |
||
3931 | conditional_emit_primitive(mach); |
||
3932 | /* halt execution */ |
||
3933 | *pc = -1; |
||
3934 | break; |
||
3935 | |||
3936 | case TGSI_OPCODE_PUSHA: |
||
3937 | assert (0); |
||
3938 | break; |
||
3939 | |||
3940 | case TGSI_OPCODE_POPA: |
||
3941 | assert (0); |
||
3942 | break; |
||
3943 | |||
3944 | case TGSI_OPCODE_CEIL: |
||
3945 | exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3946 | break; |
||
3947 | |||
3948 | case TGSI_OPCODE_I2F: |
||
3949 | exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); |
||
3950 | break; |
||
3951 | |||
3952 | case TGSI_OPCODE_NOT: |
||
3953 | exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3954 | break; |
||
3955 | |||
3956 | case TGSI_OPCODE_TRUNC: |
||
3957 | exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3958 | break; |
||
3959 | |||
3960 | case TGSI_OPCODE_SHL: |
||
3961 | exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3962 | break; |
||
3963 | |||
3964 | case TGSI_OPCODE_AND: |
||
3965 | exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3966 | break; |
||
3967 | |||
3968 | case TGSI_OPCODE_OR: |
||
3969 | exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3970 | break; |
||
3971 | |||
3972 | case TGSI_OPCODE_MOD: |
||
3973 | exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
3974 | break; |
||
3975 | |||
3976 | case TGSI_OPCODE_XOR: |
||
3977 | exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3978 | break; |
||
3979 | |||
3980 | case TGSI_OPCODE_SAD: |
||
3981 | assert (0); |
||
3982 | break; |
||
3983 | |||
3984 | case TGSI_OPCODE_TXF: |
||
3985 | exec_txf(mach, inst); |
||
3986 | break; |
||
3987 | |||
3988 | case TGSI_OPCODE_TXQ: |
||
3989 | exec_txq(mach, inst); |
||
3990 | break; |
||
3991 | |||
3992 | case TGSI_OPCODE_EMIT: |
||
3993 | emit_vertex(mach); |
||
3994 | break; |
||
3995 | |||
3996 | case TGSI_OPCODE_ENDPRIM: |
||
3997 | emit_primitive(mach); |
||
3998 | break; |
||
3999 | |||
4000 | case TGSI_OPCODE_BGNLOOP: |
||
4001 | /* push LoopMask and ContMasks */ |
||
4002 | assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
4003 | assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
4004 | assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
4005 | assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); |
||
4006 | |||
4007 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; |
||
4008 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; |
||
4009 | mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; |
||
4010 | mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; |
||
4011 | mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; |
||
4012 | break; |
||
4013 | |||
4014 | case TGSI_OPCODE_ENDLOOP: |
||
4015 | /* Restore ContMask, but don't pop */ |
||
4016 | assert(mach->ContStackTop > 0); |
||
4017 | mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; |
||
4018 | UPDATE_EXEC_MASK(mach); |
||
4019 | if (mach->ExecMask) { |
||
4020 | /* repeat loop: jump to instruction just past BGNLOOP */ |
||
4021 | assert(mach->LoopLabelStackTop > 0); |
||
4022 | *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; |
||
4023 | } |
||
4024 | else { |
||
4025 | /* exit loop: pop LoopMask */ |
||
4026 | assert(mach->LoopStackTop > 0); |
||
4027 | mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; |
||
4028 | /* pop ContMask */ |
||
4029 | assert(mach->ContStackTop > 0); |
||
4030 | mach->ContMask = mach->ContStack[--mach->ContStackTop]; |
||
4031 | assert(mach->LoopLabelStackTop > 0); |
||
4032 | --mach->LoopLabelStackTop; |
||
4033 | |||
4034 | mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; |
||
4035 | } |
||
4036 | UPDATE_EXEC_MASK(mach); |
||
4037 | break; |
||
4038 | |||
4039 | case TGSI_OPCODE_BRK: |
||
4040 | exec_break(mach); |
||
4041 | break; |
||
4042 | |||
4043 | case TGSI_OPCODE_CONT: |
||
4044 | /* turn off cont channels for each enabled exec channel */ |
||
4045 | mach->ContMask &= ~mach->ExecMask; |
||
4046 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
||
4047 | UPDATE_EXEC_MASK(mach); |
||
4048 | break; |
||
4049 | |||
4050 | case TGSI_OPCODE_BGNSUB: |
||
4051 | /* no-op */ |
||
4052 | break; |
||
4053 | |||
4054 | case TGSI_OPCODE_ENDSUB: |
||
4055 | /* |
||
4056 | * XXX: This really should be a no-op. We should never reach this opcode. |
||
4057 | */ |
||
4058 | |||
4059 | assert(mach->CallStackTop > 0); |
||
4060 | mach->CallStackTop--; |
||
4061 | |||
4062 | mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; |
||
4063 | mach->CondMask = mach->CondStack[mach->CondStackTop]; |
||
4064 | |||
4065 | mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; |
||
4066 | mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; |
||
4067 | |||
4068 | mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; |
||
4069 | mach->ContMask = mach->ContStack[mach->ContStackTop]; |
||
4070 | |||
4071 | mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; |
||
4072 | mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; |
||
4073 | |||
4074 | mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; |
||
4075 | mach->BreakType = mach->BreakStack[mach->BreakStackTop]; |
||
4076 | |||
4077 | assert(mach->FuncStackTop > 0); |
||
4078 | mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; |
||
4079 | |||
4080 | *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; |
||
4081 | |||
4082 | UPDATE_EXEC_MASK(mach); |
||
4083 | break; |
||
4084 | |||
4085 | case TGSI_OPCODE_NOP: |
||
4086 | break; |
||
4087 | |||
4088 | case TGSI_OPCODE_BREAKC: |
||
4089 | IFETCH(&r[0], 0, TGSI_CHAN_X); |
||
4090 | /* update CondMask */ |
||
4091 | if (r[0].u[0] && (mach->ExecMask & 0x1)) { |
||
4092 | mach->LoopMask &= ~0x1; |
||
4093 | } |
||
4094 | if (r[0].u[1] && (mach->ExecMask & 0x2)) { |
||
4095 | mach->LoopMask &= ~0x2; |
||
4096 | } |
||
4097 | if (r[0].u[2] && (mach->ExecMask & 0x4)) { |
||
4098 | mach->LoopMask &= ~0x4; |
||
4099 | } |
||
4100 | if (r[0].u[3] && (mach->ExecMask & 0x8)) { |
||
4101 | mach->LoopMask &= ~0x8; |
||
4102 | } |
||
4103 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
||
4104 | UPDATE_EXEC_MASK(mach); |
||
4105 | break; |
||
4106 | |||
4107 | case TGSI_OPCODE_F2I: |
||
4108 | exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); |
||
4109 | break; |
||
4110 | |||
4111 | case TGSI_OPCODE_IDIV: |
||
4112 | exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4113 | break; |
||
4114 | |||
4115 | case TGSI_OPCODE_IMAX: |
||
4116 | exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4117 | break; |
||
4118 | |||
4119 | case TGSI_OPCODE_IMIN: |
||
4120 | exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4121 | break; |
||
4122 | |||
4123 | case TGSI_OPCODE_INEG: |
||
4124 | exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4125 | break; |
||
4126 | |||
4127 | case TGSI_OPCODE_ISGE: |
||
4128 | exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4129 | break; |
||
4130 | |||
4131 | case TGSI_OPCODE_ISHR: |
||
4132 | exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4133 | break; |
||
4134 | |||
4135 | case TGSI_OPCODE_ISLT: |
||
4136 | exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4137 | break; |
||
4138 | |||
4139 | case TGSI_OPCODE_F2U: |
||
4140 | exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); |
||
4141 | break; |
||
4142 | |||
4143 | case TGSI_OPCODE_U2F: |
||
4144 | exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); |
||
4145 | break; |
||
4146 | |||
4147 | case TGSI_OPCODE_UADD: |
||
4148 | exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4149 | break; |
||
4150 | |||
4151 | case TGSI_OPCODE_UDIV: |
||
4152 | exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4153 | break; |
||
4154 | |||
4155 | case TGSI_OPCODE_UMAD: |
||
4156 | exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4157 | break; |
||
4158 | |||
4159 | case TGSI_OPCODE_UMAX: |
||
4160 | exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4161 | break; |
||
4162 | |||
4163 | case TGSI_OPCODE_UMIN: |
||
4164 | exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4165 | break; |
||
4166 | |||
4167 | case TGSI_OPCODE_UMOD: |
||
4168 | exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4169 | break; |
||
4170 | |||
4171 | case TGSI_OPCODE_UMUL: |
||
4172 | exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4173 | break; |
||
4174 | |||
4175 | case TGSI_OPCODE_USEQ: |
||
4176 | exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4177 | break; |
||
4178 | |||
4179 | case TGSI_OPCODE_USGE: |
||
4180 | exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4181 | break; |
||
4182 | |||
4183 | case TGSI_OPCODE_USHR: |
||
4184 | exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4185 | break; |
||
4186 | |||
4187 | case TGSI_OPCODE_USLT: |
||
4188 | exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4189 | break; |
||
4190 | |||
4191 | case TGSI_OPCODE_USNE: |
||
4192 | exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4193 | break; |
||
4194 | |||
4195 | case TGSI_OPCODE_SWITCH: |
||
4196 | exec_switch(mach, inst); |
||
4197 | break; |
||
4198 | |||
4199 | case TGSI_OPCODE_CASE: |
||
4200 | exec_case(mach, inst); |
||
4201 | break; |
||
4202 | |||
4203 | case TGSI_OPCODE_DEFAULT: |
||
4204 | exec_default(mach); |
||
4205 | break; |
||
4206 | |||
4207 | case TGSI_OPCODE_ENDSWITCH: |
||
4208 | exec_endswitch(mach); |
||
4209 | break; |
||
4210 | |||
4211 | case TGSI_OPCODE_SAMPLE_I: |
||
4212 | exec_txf(mach, inst); |
||
4213 | break; |
||
4214 | |||
4215 | case TGSI_OPCODE_SAMPLE_I_MS: |
||
4216 | assert(0); |
||
4217 | break; |
||
4218 | |||
4219 | case TGSI_OPCODE_SAMPLE: |
||
4220 | exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); |
||
4221 | break; |
||
4222 | |||
4223 | case TGSI_OPCODE_SAMPLE_B: |
||
4224 | exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); |
||
4225 | break; |
||
4226 | |||
4227 | case TGSI_OPCODE_SAMPLE_C: |
||
4228 | exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); |
||
4229 | break; |
||
4230 | |||
4231 | case TGSI_OPCODE_SAMPLE_C_LZ: |
||
4232 | exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); |
||
4233 | break; |
||
4234 | |||
4235 | case TGSI_OPCODE_SAMPLE_D: |
||
4236 | exec_sample_d(mach, inst); |
||
4237 | break; |
||
4238 | |||
4239 | case TGSI_OPCODE_SAMPLE_L: |
||
4240 | exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); |
||
4241 | break; |
||
4242 | |||
4243 | case TGSI_OPCODE_GATHER4: |
||
4244 | assert(0); |
||
4245 | break; |
||
4246 | |||
4247 | case TGSI_OPCODE_SVIEWINFO: |
||
4248 | exec_txq(mach, inst); |
||
4249 | break; |
||
4250 | |||
4251 | case TGSI_OPCODE_SAMPLE_POS: |
||
4252 | assert(0); |
||
4253 | break; |
||
4254 | |||
4255 | case TGSI_OPCODE_SAMPLE_INFO: |
||
4256 | assert(0); |
||
4257 | break; |
||
4258 | |||
4259 | case TGSI_OPCODE_UARL: |
||
4260 | exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); |
||
4261 | break; |
||
4262 | |||
4263 | case TGSI_OPCODE_UCMP: |
||
4264 | exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4265 | break; |
||
4266 | |||
4267 | case TGSI_OPCODE_IABS: |
||
4268 | exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4269 | break; |
||
4270 | |||
4271 | case TGSI_OPCODE_ISSG: |
||
4272 | exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4273 | break; |
||
4274 | |||
4275 | case TGSI_OPCODE_TEX2: |
||
4276 | /* simple texture lookup */ |
||
4277 | /* src[0] = texcoord */ |
||
4278 | /* src[1] = compare */ |
||
4279 | /* src[2] = sampler unit */ |
||
4280 | exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); |
||
4281 | break; |
||
4282 | case TGSI_OPCODE_TXB2: |
||
4283 | /* simple texture lookup */ |
||
4284 | /* src[0] = texcoord */ |
||
4285 | /* src[1] = bias */ |
||
4286 | /* src[2] = sampler unit */ |
||
4287 | exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); |
||
4288 | break; |
||
4289 | case TGSI_OPCODE_TXL2: |
||
4290 | /* simple texture lookup */ |
||
4291 | /* src[0] = texcoord */ |
||
4292 | /* src[1] = lod */ |
||
4293 | /* src[2] = sampler unit */ |
||
4294 | exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); |
||
4295 | break; |
||
4296 | default: |
||
4297 | assert( 0 ); |
||
4298 | } |
||
4299 | } |
||
4300 | |||
4301 | |||
4302 | /** |
||
4303 | * Run TGSI interpreter. |
||
4304 | * \return bitmask of "alive" quad components |
||
4305 | */ |
||
4306 | uint |
||
4307 | tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) |
||
4308 | { |
||
4309 | uint i; |
||
4310 | int pc = 0; |
||
4311 | uint default_mask = 0xf; |
||
4312 | |||
4313 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; |
||
4314 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; |
||
4315 | |||
4316 | if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { |
||
4317 | mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; |
||
4318 | mach->Primitives[0] = 0; |
||
4319 | /* GS runs on a single primitive for now */ |
||
4320 | default_mask = 0x1; |
||
4321 | } |
||
4322 | |||
4323 | mach->CondMask = default_mask; |
||
4324 | mach->LoopMask = default_mask; |
||
4325 | mach->ContMask = default_mask; |
||
4326 | mach->FuncMask = default_mask; |
||
4327 | mach->ExecMask = default_mask; |
||
4328 | |||
4329 | mach->Switch.mask = default_mask; |
||
4330 | |||
4331 | assert(mach->CondStackTop == 0); |
||
4332 | assert(mach->LoopStackTop == 0); |
||
4333 | assert(mach->ContStackTop == 0); |
||
4334 | assert(mach->SwitchStackTop == 0); |
||
4335 | assert(mach->BreakStackTop == 0); |
||
4336 | assert(mach->CallStackTop == 0); |
||
4337 | |||
4338 | |||
4339 | /* execute declarations (interpolants) */ |
||
4340 | for (i = 0; i < mach->NumDeclarations; i++) { |
||
4341 | exec_declaration( mach, mach->Declarations+i ); |
||
4342 | } |
||
4343 | |||
4344 | { |
||
4345 | #if DEBUG_EXECUTION |
||
4346 | struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; |
||
4347 | struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; |
||
4348 | uint inst = 1; |
||
4349 | |||
4350 | memset(mach->Temps, 0, sizeof(temps)); |
||
4351 | memset(mach->Outputs, 0, sizeof(outputs)); |
||
4352 | memset(temps, 0, sizeof(temps)); |
||
4353 | memset(outputs, 0, sizeof(outputs)); |
||
4354 | #endif |
||
4355 | |||
4356 | /* execute instructions, until pc is set to -1 */ |
||
4357 | while (pc != -1) { |
||
4358 | |||
4359 | #if DEBUG_EXECUTION |
||
4360 | uint i; |
||
4361 | |||
4362 | tgsi_dump_instruction(&mach->Instructions[pc], inst++); |
||
4363 | #endif |
||
4364 | |||
4365 | assert(pc < (int) mach->NumInstructions); |
||
4366 | exec_instruction(mach, mach->Instructions + pc, &pc); |
||
4367 | |||
4368 | #if DEBUG_EXECUTION |
||
4369 | for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { |
||
4370 | if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { |
||
4371 | uint j; |
||
4372 | |||
4373 | memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); |
||
4374 | debug_printf("TEMP[%2u] = ", i); |
||
4375 | for (j = 0; j < 4; j++) { |
||
4376 | if (j > 0) { |
||
4377 | debug_printf(" "); |
||
4378 | } |
||
4379 | debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", |
||
4380 | temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], |
||
4381 | temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], |
||
4382 | temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], |
||
4383 | temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); |
||
4384 | } |
||
4385 | } |
||
4386 | } |
||
4387 | for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { |
||
4388 | if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { |
||
4389 | uint j; |
||
4390 | |||
4391 | memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); |
||
4392 | debug_printf("OUT[%2u] = ", i); |
||
4393 | for (j = 0; j < 4; j++) { |
||
4394 | if (j > 0) { |
||
4395 | debug_printf(" "); |
||
4396 | } |
||
4397 | debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", |
||
4398 | outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], |
||
4399 | outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], |
||
4400 | outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], |
||
4401 | outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); |
||
4402 | } |
||
4403 | } |
||
4404 | } |
||
4405 | #endif |
||
4406 | } |
||
4407 | } |
||
4408 | |||
4409 | #if 0 |
||
4410 | /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ |
||
4411 | if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { |
||
4412 | /* |
||
4413 | * Scale back depth component. |
||
4414 | */ |
||
4415 | for (i = 0; i < 4; i++) |
||
4416 | mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; |
||
4417 | } |
||
4418 | #endif |
||
4419 | |||
4420 | /* Strictly speaking, these assertions aren't really needed but they |
||
4421 | * can potentially catch some bugs in the control flow code. |
||
4422 | */ |
||
4423 | assert(mach->CondStackTop == 0); |
||
4424 | assert(mach->LoopStackTop == 0); |
||
4425 | assert(mach->ContStackTop == 0); |
||
4426 | assert(mach->SwitchStackTop == 0); |
||
4427 | assert(mach->BreakStackTop == 0); |
||
4428 | assert(mach->CallStackTop == 0); |
||
4429 | |||
4430 | return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; |
||
4431 | }>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><>><>><>><>>>><>=>=>><>=>=>><>>><>>><>>><>>=>><>>=>><>>><>>><>>><>>><>>><>>><>>><>>><>>>=>=>><>>>>><>>><>>><>>>>><>>>><>>><>>>>>=>>=>>>><>>><>>><>>>><>><>>>><>>>><>>><>>><>>><>>>>><>>><>>><>>><>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>=>=>=>><>><>><>>>>>>>>>=>=>=>=>>>>>>>>>>>>>>>>>>> |