Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
||
4 | * All Rights Reserved. |
||
5 | * Copyright 2009-2010 VMware, Inc. All rights Reserved. |
||
6 | * |
||
7 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
8 | * copy of this software and associated documentation files (the |
||
9 | * "Software"), to deal in the Software without restriction, including |
||
10 | * without limitation the rights to use, copy, modify, merge, publish, |
||
11 | * distribute, sub license, and/or sell copies of the Software, and to |
||
12 | * permit persons to whom the Software is furnished to do so, subject to |
||
13 | * the following conditions: |
||
14 | * |
||
15 | * The above copyright notice and this permission notice (including the |
||
16 | * next paragraph) shall be included in all copies or substantial portions |
||
17 | * of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
22 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
23 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
26 | * |
||
27 | **************************************************************************/ |
||
28 | |||
29 | /** |
||
30 | * TGSI interpreter/executor. |
||
31 | * |
||
32 | * Flow control information: |
||
33 | * |
||
34 | * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) |
||
35 | * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special |
||
36 | * care since a condition may be true for some quad components but false |
||
37 | * for other components. |
||
38 | * |
||
39 | * We basically execute all statements (even if they're in the part of |
||
40 | * an IF/ELSE clause that's "not taken") and use a special mask to |
||
41 | * control writing to destination registers. This is the ExecMask. |
||
42 | * See store_dest(). |
||
43 | * |
||
44 | * The ExecMask is computed from three other masks (CondMask, LoopMask and |
||
45 | * ContMask) which are controlled by the flow control instructions (namely: |
||
46 | * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). |
||
47 | * |
||
48 | * |
||
49 | * Authors: |
||
50 | * Michal Krol |
||
51 | * Brian Paul |
||
52 | */ |
||
53 | |||
54 | #include "pipe/p_compiler.h" |
||
55 | #include "pipe/p_state.h" |
||
56 | #include "pipe/p_shader_tokens.h" |
||
57 | #include "tgsi/tgsi_dump.h" |
||
58 | #include "tgsi/tgsi_parse.h" |
||
59 | #include "tgsi/tgsi_util.h" |
||
60 | #include "tgsi_exec.h" |
||
61 | #include "util/u_memory.h" |
||
62 | #include "util/u_math.h" |
||
63 | |||
64 | |||
65 | #define DEBUG_EXECUTION 0 |
||
66 | |||
67 | |||
68 | #define FAST_MATH 0 |
||
69 | |||
70 | #define TILE_TOP_LEFT 0 |
||
71 | #define TILE_TOP_RIGHT 1 |
||
72 | #define TILE_BOTTOM_LEFT 2 |
||
73 | #define TILE_BOTTOM_RIGHT 3 |
||
74 | |||
75 | static void |
||
76 | micro_abs(union tgsi_exec_channel *dst, |
||
77 | const union tgsi_exec_channel *src) |
||
78 | { |
||
79 | dst->f[0] = fabsf(src->f[0]); |
||
80 | dst->f[1] = fabsf(src->f[1]); |
||
81 | dst->f[2] = fabsf(src->f[2]); |
||
82 | dst->f[3] = fabsf(src->f[3]); |
||
83 | } |
||
84 | |||
85 | static void |
||
86 | micro_arl(union tgsi_exec_channel *dst, |
||
87 | const union tgsi_exec_channel *src) |
||
88 | { |
||
89 | dst->i[0] = (int)floorf(src->f[0]); |
||
90 | dst->i[1] = (int)floorf(src->f[1]); |
||
91 | dst->i[2] = (int)floorf(src->f[2]); |
||
92 | dst->i[3] = (int)floorf(src->f[3]); |
||
93 | } |
||
94 | |||
95 | static void |
||
96 | micro_arr(union tgsi_exec_channel *dst, |
||
97 | const union tgsi_exec_channel *src) |
||
98 | { |
||
99 | dst->i[0] = (int)floorf(src->f[0] + 0.5f); |
||
100 | dst->i[1] = (int)floorf(src->f[1] + 0.5f); |
||
101 | dst->i[2] = (int)floorf(src->f[2] + 0.5f); |
||
102 | dst->i[3] = (int)floorf(src->f[3] + 0.5f); |
||
103 | } |
||
104 | |||
105 | static void |
||
106 | micro_ceil(union tgsi_exec_channel *dst, |
||
107 | const union tgsi_exec_channel *src) |
||
108 | { |
||
109 | dst->f[0] = ceilf(src->f[0]); |
||
110 | dst->f[1] = ceilf(src->f[1]); |
||
111 | dst->f[2] = ceilf(src->f[2]); |
||
112 | dst->f[3] = ceilf(src->f[3]); |
||
113 | } |
||
114 | |||
115 | static void |
||
116 | micro_clamp(union tgsi_exec_channel *dst, |
||
117 | const union tgsi_exec_channel *src0, |
||
118 | const union tgsi_exec_channel *src1, |
||
119 | const union tgsi_exec_channel *src2) |
||
120 | { |
||
121 | dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; |
||
122 | dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; |
||
123 | dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; |
||
124 | dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; |
||
125 | } |
||
126 | |||
127 | static void |
||
128 | micro_cmp(union tgsi_exec_channel *dst, |
||
129 | const union tgsi_exec_channel *src0, |
||
130 | const union tgsi_exec_channel *src1, |
||
131 | const union tgsi_exec_channel *src2) |
||
132 | { |
||
133 | dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; |
||
134 | dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; |
||
135 | dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; |
||
136 | dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; |
||
137 | } |
||
138 | |||
139 | static void |
||
140 | micro_cnd(union tgsi_exec_channel *dst, |
||
141 | const union tgsi_exec_channel *src0, |
||
142 | const union tgsi_exec_channel *src1, |
||
143 | const union tgsi_exec_channel *src2) |
||
144 | { |
||
145 | dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; |
||
146 | dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; |
||
147 | dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; |
||
148 | dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; |
||
149 | } |
||
150 | |||
151 | static void |
||
152 | micro_cos(union tgsi_exec_channel *dst, |
||
153 | const union tgsi_exec_channel *src) |
||
154 | { |
||
155 | dst->f[0] = cosf(src->f[0]); |
||
156 | dst->f[1] = cosf(src->f[1]); |
||
157 | dst->f[2] = cosf(src->f[2]); |
||
158 | dst->f[3] = cosf(src->f[3]); |
||
159 | } |
||
160 | |||
161 | static void |
||
162 | micro_ddx(union tgsi_exec_channel *dst, |
||
163 | const union tgsi_exec_channel *src) |
||
164 | { |
||
165 | dst->f[0] = |
||
166 | dst->f[1] = |
||
167 | dst->f[2] = |
||
168 | dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; |
||
169 | } |
||
170 | |||
171 | static void |
||
172 | micro_ddy(union tgsi_exec_channel *dst, |
||
173 | const union tgsi_exec_channel *src) |
||
174 | { |
||
175 | dst->f[0] = |
||
176 | dst->f[1] = |
||
177 | dst->f[2] = |
||
178 | dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; |
||
179 | } |
||
180 | |||
181 | static void |
||
182 | micro_exp2(union tgsi_exec_channel *dst, |
||
183 | const union tgsi_exec_channel *src) |
||
184 | { |
||
185 | #if FAST_MATH |
||
186 | dst->f[0] = util_fast_exp2(src->f[0]); |
||
187 | dst->f[1] = util_fast_exp2(src->f[1]); |
||
188 | dst->f[2] = util_fast_exp2(src->f[2]); |
||
189 | dst->f[3] = util_fast_exp2(src->f[3]); |
||
190 | #else |
||
191 | #if DEBUG |
||
192 | /* Inf is okay for this instruction, so clamp it to silence assertions. */ |
||
193 | uint i; |
||
194 | union tgsi_exec_channel clamped; |
||
195 | |||
196 | for (i = 0; i < 4; i++) { |
||
197 | if (src->f[i] > 127.99999f) { |
||
198 | clamped.f[i] = 127.99999f; |
||
199 | } else if (src->f[i] < -126.99999f) { |
||
200 | clamped.f[i] = -126.99999f; |
||
201 | } else { |
||
202 | clamped.f[i] = src->f[i]; |
||
203 | } |
||
204 | } |
||
205 | src = &clamped; |
||
206 | #endif /* DEBUG */ |
||
207 | |||
208 | dst->f[0] = powf(2.0f, src->f[0]); |
||
209 | dst->f[1] = powf(2.0f, src->f[1]); |
||
210 | dst->f[2] = powf(2.0f, src->f[2]); |
||
211 | dst->f[3] = powf(2.0f, src->f[3]); |
||
212 | #endif /* FAST_MATH */ |
||
213 | } |
||
214 | |||
215 | static void |
||
216 | micro_flr(union tgsi_exec_channel *dst, |
||
217 | const union tgsi_exec_channel *src) |
||
218 | { |
||
219 | dst->f[0] = floorf(src->f[0]); |
||
220 | dst->f[1] = floorf(src->f[1]); |
||
221 | dst->f[2] = floorf(src->f[2]); |
||
222 | dst->f[3] = floorf(src->f[3]); |
||
223 | } |
||
224 | |||
225 | static void |
||
226 | micro_frc(union tgsi_exec_channel *dst, |
||
227 | const union tgsi_exec_channel *src) |
||
228 | { |
||
229 | dst->f[0] = src->f[0] - floorf(src->f[0]); |
||
230 | dst->f[1] = src->f[1] - floorf(src->f[1]); |
||
231 | dst->f[2] = src->f[2] - floorf(src->f[2]); |
||
232 | dst->f[3] = src->f[3] - floorf(src->f[3]); |
||
233 | } |
||
234 | |||
235 | static void |
||
236 | micro_iabs(union tgsi_exec_channel *dst, |
||
237 | const union tgsi_exec_channel *src) |
||
238 | { |
||
239 | dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; |
||
240 | dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; |
||
241 | dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; |
||
242 | dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; |
||
243 | } |
||
244 | |||
245 | static void |
||
246 | micro_ineg(union tgsi_exec_channel *dst, |
||
247 | const union tgsi_exec_channel *src) |
||
248 | { |
||
249 | dst->i[0] = -src->i[0]; |
||
250 | dst->i[1] = -src->i[1]; |
||
251 | dst->i[2] = -src->i[2]; |
||
252 | dst->i[3] = -src->i[3]; |
||
253 | } |
||
254 | |||
255 | static void |
||
256 | micro_lg2(union tgsi_exec_channel *dst, |
||
257 | const union tgsi_exec_channel *src) |
||
258 | { |
||
259 | #if FAST_MATH |
||
260 | dst->f[0] = util_fast_log2(src->f[0]); |
||
261 | dst->f[1] = util_fast_log2(src->f[1]); |
||
262 | dst->f[2] = util_fast_log2(src->f[2]); |
||
263 | dst->f[3] = util_fast_log2(src->f[3]); |
||
264 | #else |
||
265 | dst->f[0] = logf(src->f[0]) * 1.442695f; |
||
266 | dst->f[1] = logf(src->f[1]) * 1.442695f; |
||
267 | dst->f[2] = logf(src->f[2]) * 1.442695f; |
||
268 | dst->f[3] = logf(src->f[3]) * 1.442695f; |
||
269 | #endif |
||
270 | } |
||
271 | |||
272 | static void |
||
273 | micro_lrp(union tgsi_exec_channel *dst, |
||
274 | const union tgsi_exec_channel *src0, |
||
275 | const union tgsi_exec_channel *src1, |
||
276 | const union tgsi_exec_channel *src2) |
||
277 | { |
||
278 | dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; |
||
279 | dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; |
||
280 | dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; |
||
281 | dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; |
||
282 | } |
||
283 | |||
284 | static void |
||
285 | micro_mad(union tgsi_exec_channel *dst, |
||
286 | const union tgsi_exec_channel *src0, |
||
287 | const union tgsi_exec_channel *src1, |
||
288 | const union tgsi_exec_channel *src2) |
||
289 | { |
||
290 | dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; |
||
291 | dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; |
||
292 | dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; |
||
293 | dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; |
||
294 | } |
||
295 | |||
296 | static void |
||
297 | micro_mov(union tgsi_exec_channel *dst, |
||
298 | const union tgsi_exec_channel *src) |
||
299 | { |
||
300 | dst->u[0] = src->u[0]; |
||
301 | dst->u[1] = src->u[1]; |
||
302 | dst->u[2] = src->u[2]; |
||
303 | dst->u[3] = src->u[3]; |
||
304 | } |
||
305 | |||
306 | static void |
||
307 | micro_rcp(union tgsi_exec_channel *dst, |
||
308 | const union tgsi_exec_channel *src) |
||
309 | { |
||
310 | #if 0 /* for debugging */ |
||
311 | assert(src->f[0] != 0.0f); |
||
312 | assert(src->f[1] != 0.0f); |
||
313 | assert(src->f[2] != 0.0f); |
||
314 | assert(src->f[3] != 0.0f); |
||
315 | #endif |
||
316 | dst->f[0] = 1.0f / src->f[0]; |
||
317 | dst->f[1] = 1.0f / src->f[1]; |
||
318 | dst->f[2] = 1.0f / src->f[2]; |
||
319 | dst->f[3] = 1.0f / src->f[3]; |
||
320 | } |
||
321 | |||
322 | static void |
||
323 | micro_rnd(union tgsi_exec_channel *dst, |
||
324 | const union tgsi_exec_channel *src) |
||
325 | { |
||
326 | dst->f[0] = floorf(src->f[0] + 0.5f); |
||
327 | dst->f[1] = floorf(src->f[1] + 0.5f); |
||
328 | dst->f[2] = floorf(src->f[2] + 0.5f); |
||
329 | dst->f[3] = floorf(src->f[3] + 0.5f); |
||
330 | } |
||
331 | |||
332 | static void |
||
333 | micro_rsq(union tgsi_exec_channel *dst, |
||
334 | const union tgsi_exec_channel *src) |
||
335 | { |
||
336 | #if 0 /* for debugging */ |
||
337 | assert(src->f[0] != 0.0f); |
||
338 | assert(src->f[1] != 0.0f); |
||
339 | assert(src->f[2] != 0.0f); |
||
340 | assert(src->f[3] != 0.0f); |
||
341 | #endif |
||
342 | dst->f[0] = 1.0f / sqrtf(src->f[0]); |
||
343 | dst->f[1] = 1.0f / sqrtf(src->f[1]); |
||
344 | dst->f[2] = 1.0f / sqrtf(src->f[2]); |
||
345 | dst->f[3] = 1.0f / sqrtf(src->f[3]); |
||
346 | } |
||
347 | |||
348 | static void |
||
349 | micro_sqrt(union tgsi_exec_channel *dst, |
||
350 | const union tgsi_exec_channel *src) |
||
351 | { |
||
352 | dst->f[0] = sqrtf(src->f[0]); |
||
353 | dst->f[1] = sqrtf(src->f[1]); |
||
354 | dst->f[2] = sqrtf(src->f[2]); |
||
355 | dst->f[3] = sqrtf(src->f[3]); |
||
356 | } |
||
357 | |||
358 | static void |
||
359 | micro_seq(union tgsi_exec_channel *dst, |
||
360 | const union tgsi_exec_channel *src0, |
||
361 | const union tgsi_exec_channel *src1) |
||
362 | { |
||
363 | dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; |
||
364 | dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; |
||
365 | dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; |
||
366 | dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; |
||
367 | } |
||
368 | |||
369 | static void |
||
370 | micro_sge(union tgsi_exec_channel *dst, |
||
371 | const union tgsi_exec_channel *src0, |
||
372 | const union tgsi_exec_channel *src1) |
||
373 | { |
||
374 | dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; |
||
375 | dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; |
||
376 | dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; |
||
377 | dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; |
||
378 | } |
||
379 | |||
380 | static void |
||
381 | micro_sgn(union tgsi_exec_channel *dst, |
||
382 | const union tgsi_exec_channel *src) |
||
383 | { |
||
384 | dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; |
||
385 | dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; |
||
386 | dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; |
||
387 | dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; |
||
388 | } |
||
389 | |||
390 | static void |
||
391 | micro_isgn(union tgsi_exec_channel *dst, |
||
392 | const union tgsi_exec_channel *src) |
||
393 | { |
||
394 | dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; |
||
395 | dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; |
||
396 | dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; |
||
397 | dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; |
||
398 | } |
||
399 | |||
400 | static void |
||
401 | micro_sgt(union tgsi_exec_channel *dst, |
||
402 | const union tgsi_exec_channel *src0, |
||
403 | const union tgsi_exec_channel *src1) |
||
404 | { |
||
405 | dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; |
||
406 | dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; |
||
407 | dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; |
||
408 | dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; |
||
409 | } |
||
410 | |||
411 | static void |
||
412 | micro_sin(union tgsi_exec_channel *dst, |
||
413 | const union tgsi_exec_channel *src) |
||
414 | { |
||
415 | dst->f[0] = sinf(src->f[0]); |
||
416 | dst->f[1] = sinf(src->f[1]); |
||
417 | dst->f[2] = sinf(src->f[2]); |
||
418 | dst->f[3] = sinf(src->f[3]); |
||
419 | } |
||
420 | |||
421 | static void |
||
422 | micro_sle(union tgsi_exec_channel *dst, |
||
423 | const union tgsi_exec_channel *src0, |
||
424 | const union tgsi_exec_channel *src1) |
||
425 | { |
||
426 | dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; |
||
427 | dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; |
||
428 | dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; |
||
429 | dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; |
||
430 | } |
||
431 | |||
432 | static void |
||
433 | micro_slt(union tgsi_exec_channel *dst, |
||
434 | const union tgsi_exec_channel *src0, |
||
435 | const union tgsi_exec_channel *src1) |
||
436 | { |
||
437 | dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; |
||
438 | dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; |
||
439 | dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; |
||
440 | dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; |
||
441 | } |
||
442 | |||
443 | static void |
||
444 | micro_sne(union tgsi_exec_channel *dst, |
||
445 | const union tgsi_exec_channel *src0, |
||
446 | const union tgsi_exec_channel *src1) |
||
447 | { |
||
448 | dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; |
||
449 | dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; |
||
450 | dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; |
||
451 | dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; |
||
452 | } |
||
453 | |||
454 | static void |
||
455 | micro_sfl(union tgsi_exec_channel *dst) |
||
456 | { |
||
457 | dst->f[0] = 0.0f; |
||
458 | dst->f[1] = 0.0f; |
||
459 | dst->f[2] = 0.0f; |
||
460 | dst->f[3] = 0.0f; |
||
461 | } |
||
462 | |||
463 | static void |
||
464 | micro_str(union tgsi_exec_channel *dst) |
||
465 | { |
||
466 | dst->f[0] = 1.0f; |
||
467 | dst->f[1] = 1.0f; |
||
468 | dst->f[2] = 1.0f; |
||
469 | dst->f[3] = 1.0f; |
||
470 | } |
||
471 | |||
472 | static void |
||
473 | micro_trunc(union tgsi_exec_channel *dst, |
||
474 | const union tgsi_exec_channel *src) |
||
475 | { |
||
476 | dst->f[0] = (float)(int)src->f[0]; |
||
477 | dst->f[1] = (float)(int)src->f[1]; |
||
478 | dst->f[2] = (float)(int)src->f[2]; |
||
479 | dst->f[3] = (float)(int)src->f[3]; |
||
480 | } |
||
481 | |||
482 | |||
483 | enum tgsi_exec_datatype { |
||
484 | TGSI_EXEC_DATA_FLOAT, |
||
485 | TGSI_EXEC_DATA_INT, |
||
486 | TGSI_EXEC_DATA_UINT |
||
487 | }; |
||
488 | |||
489 | /* |
||
490 | * Shorthand locations of various utility registers (_I = Index, _C = Channel) |
||
491 | */ |
||
492 | #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I |
||
493 | #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C |
||
494 | #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I |
||
495 | #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C |
||
496 | #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I |
||
497 | #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C |
||
498 | |||
499 | |||
500 | /** The execution mask depends on the conditional mask and the loop mask */ |
||
501 | #define UPDATE_EXEC_MASK(MACH) \ |
||
502 | MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask |
||
503 | |||
504 | |||
505 | static const union tgsi_exec_channel ZeroVec = |
||
506 | { { 0.0, 0.0, 0.0, 0.0 } }; |
||
507 | |||
508 | static const union tgsi_exec_channel OneVec = { |
||
509 | {1.0f, 1.0f, 1.0f, 1.0f} |
||
510 | }; |
||
511 | |||
512 | static const union tgsi_exec_channel P128Vec = { |
||
513 | {128.0f, 128.0f, 128.0f, 128.0f} |
||
514 | }; |
||
515 | |||
516 | static const union tgsi_exec_channel M128Vec = { |
||
517 | {-128.0f, -128.0f, -128.0f, -128.0f} |
||
518 | }; |
||
519 | |||
520 | |||
521 | /** |
||
522 | * Assert that none of the float values in 'chan' are infinite or NaN. |
||
523 | * NaN and Inf may occur normally during program execution and should |
||
524 | * not lead to crashes, etc. But when debugging, it's helpful to catch |
||
525 | * them. |
||
526 | */ |
||
527 | static INLINE void |
||
528 | check_inf_or_nan(const union tgsi_exec_channel *chan) |
||
529 | { |
||
530 | assert(!util_is_inf_or_nan((chan)->f[0])); |
||
531 | assert(!util_is_inf_or_nan((chan)->f[1])); |
||
532 | assert(!util_is_inf_or_nan((chan)->f[2])); |
||
533 | assert(!util_is_inf_or_nan((chan)->f[3])); |
||
534 | } |
||
535 | |||
536 | |||
537 | #ifdef DEBUG |
||
538 | static void |
||
539 | print_chan(const char *msg, const union tgsi_exec_channel *chan) |
||
540 | { |
||
541 | debug_printf("%s = {%f, %f, %f, %f}\n", |
||
542 | msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); |
||
543 | } |
||
544 | #endif |
||
545 | |||
546 | |||
547 | #ifdef DEBUG |
||
548 | static void |
||
549 | print_temp(const struct tgsi_exec_machine *mach, uint index) |
||
550 | { |
||
551 | const struct tgsi_exec_vector *tmp = &mach->Temps[index]; |
||
552 | int i; |
||
553 | debug_printf("Temp[%u] =\n", index); |
||
554 | for (i = 0; i < 4; i++) { |
||
555 | debug_printf(" %c: { %f, %f, %f, %f }\n", |
||
556 | "XYZW"[i], |
||
557 | tmp->xyzw[i].f[0], |
||
558 | tmp->xyzw[i].f[1], |
||
559 | tmp->xyzw[i].f[2], |
||
560 | tmp->xyzw[i].f[3]); |
||
561 | } |
||
562 | } |
||
563 | #endif |
||
564 | |||
565 | |||
566 | void |
||
567 | tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, |
||
568 | unsigned num_bufs, |
||
569 | const void **bufs, |
||
570 | const unsigned *buf_sizes) |
||
571 | { |
||
572 | unsigned i; |
||
573 | |||
574 | for (i = 0; i < num_bufs; i++) { |
||
575 | mach->Consts[i] = bufs[i]; |
||
576 | mach->ConstsSize[i] = buf_sizes[i]; |
||
577 | } |
||
578 | } |
||
579 | |||
580 | |||
581 | /** |
||
582 | * Check if there's a potential src/dst register data dependency when |
||
583 | * using SOA execution. |
||
584 | * Example: |
||
585 | * MOV T, T.yxwz; |
||
586 | * This would expand into: |
||
587 | * MOV t0, t1; |
||
588 | * MOV t1, t0; |
||
589 | * MOV t2, t3; |
||
590 | * MOV t3, t2; |
||
591 | * The second instruction will have the wrong value for t0 if executed as-is. |
||
592 | */ |
||
593 | boolean |
||
594 | tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) |
||
595 | { |
||
596 | uint i, chan; |
||
597 | |||
598 | uint writemask = inst->Dst[0].Register.WriteMask; |
||
599 | if (writemask == TGSI_WRITEMASK_X || |
||
600 | writemask == TGSI_WRITEMASK_Y || |
||
601 | writemask == TGSI_WRITEMASK_Z || |
||
602 | writemask == TGSI_WRITEMASK_W || |
||
603 | writemask == TGSI_WRITEMASK_NONE) { |
||
604 | /* no chance of data dependency */ |
||
605 | return FALSE; |
||
606 | } |
||
607 | |||
608 | /* loop over src regs */ |
||
609 | for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { |
||
610 | if ((inst->Src[i].Register.File == |
||
611 | inst->Dst[0].Register.File) && |
||
612 | ((inst->Src[i].Register.Index == |
||
613 | inst->Dst[0].Register.Index) || |
||
614 | inst->Src[i].Register.Indirect || |
||
615 | inst->Dst[0].Register.Indirect)) { |
||
616 | /* loop over dest channels */ |
||
617 | uint channelsWritten = 0x0; |
||
618 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
619 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
620 | /* check if we're reading a channel that's been written */ |
||
621 | uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); |
||
622 | if (channelsWritten & (1 << swizzle)) { |
||
623 | return TRUE; |
||
624 | } |
||
625 | |||
626 | channelsWritten |= (1 << chan); |
||
627 | } |
||
628 | } |
||
629 | } |
||
630 | } |
||
631 | return FALSE; |
||
632 | } |
||
633 | |||
634 | |||
635 | /** |
||
636 | * Initialize machine state by expanding tokens to full instructions, |
||
637 | * allocating temporary storage, setting up constants, etc. |
||
638 | * After this, we can call tgsi_exec_machine_run() many times. |
||
639 | */ |
||
640 | void |
||
641 | tgsi_exec_machine_bind_shader( |
||
642 | struct tgsi_exec_machine *mach, |
||
643 | const struct tgsi_token *tokens, |
||
644 | struct tgsi_sampler *sampler) |
||
645 | { |
||
646 | uint k; |
||
647 | struct tgsi_parse_context parse; |
||
648 | struct tgsi_full_instruction *instructions; |
||
649 | struct tgsi_full_declaration *declarations; |
||
650 | uint maxInstructions = 10, numInstructions = 0; |
||
651 | uint maxDeclarations = 10, numDeclarations = 0; |
||
652 | |||
653 | #if 0 |
||
654 | tgsi_dump(tokens, 0); |
||
655 | #endif |
||
656 | |||
657 | util_init_math(); |
||
658 | |||
659 | |||
660 | mach->Tokens = tokens; |
||
661 | mach->Sampler = sampler; |
||
662 | |||
663 | if (!tokens) { |
||
664 | /* unbind and free all */ |
||
665 | FREE(mach->Declarations); |
||
666 | mach->Declarations = NULL; |
||
667 | mach->NumDeclarations = 0; |
||
668 | |||
669 | FREE(mach->Instructions); |
||
670 | mach->Instructions = NULL; |
||
671 | mach->NumInstructions = 0; |
||
672 | |||
673 | return; |
||
674 | } |
||
675 | |||
676 | k = tgsi_parse_init (&parse, mach->Tokens); |
||
677 | if (k != TGSI_PARSE_OK) { |
||
678 | debug_printf( "Problem parsing!\n" ); |
||
679 | return; |
||
680 | } |
||
681 | |||
682 | mach->Processor = parse.FullHeader.Processor.Processor; |
||
683 | mach->ImmLimit = 0; |
||
684 | mach->NumOutputs = 0; |
||
685 | |||
686 | if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && |
||
687 | !mach->UsedGeometryShader) { |
||
688 | struct tgsi_exec_vector *inputs; |
||
689 | struct tgsi_exec_vector *outputs; |
||
690 | |||
691 | inputs = align_malloc(sizeof(struct tgsi_exec_vector) * |
||
692 | TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS, |
||
693 | 16); |
||
694 | |||
695 | if (!inputs) |
||
696 | return; |
||
697 | |||
698 | outputs = align_malloc(sizeof(struct tgsi_exec_vector) * |
||
699 | TGSI_MAX_TOTAL_VERTICES, 16); |
||
700 | |||
701 | if (!outputs) { |
||
702 | align_free(inputs); |
||
703 | return; |
||
704 | } |
||
705 | |||
706 | align_free(mach->Inputs); |
||
707 | align_free(mach->Outputs); |
||
708 | |||
709 | mach->Inputs = inputs; |
||
710 | mach->Outputs = outputs; |
||
711 | mach->UsedGeometryShader = TRUE; |
||
712 | } |
||
713 | |||
714 | declarations = (struct tgsi_full_declaration *) |
||
715 | MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); |
||
716 | |||
717 | if (!declarations) { |
||
718 | return; |
||
719 | } |
||
720 | |||
721 | instructions = (struct tgsi_full_instruction *) |
||
722 | MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); |
||
723 | |||
724 | if (!instructions) { |
||
725 | FREE( declarations ); |
||
726 | return; |
||
727 | } |
||
728 | |||
729 | while( !tgsi_parse_end_of_tokens( &parse ) ) { |
||
730 | uint i; |
||
731 | |||
732 | tgsi_parse_token( &parse ); |
||
733 | switch( parse.FullToken.Token.Type ) { |
||
734 | case TGSI_TOKEN_TYPE_DECLARATION: |
||
735 | /* save expanded declaration */ |
||
736 | if (numDeclarations == maxDeclarations) { |
||
737 | declarations = REALLOC(declarations, |
||
738 | maxDeclarations |
||
739 | * sizeof(struct tgsi_full_declaration), |
||
740 | (maxDeclarations + 10) |
||
741 | * sizeof(struct tgsi_full_declaration)); |
||
742 | maxDeclarations += 10; |
||
743 | } |
||
744 | if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { |
||
745 | unsigned reg; |
||
746 | for (reg = parse.FullToken.FullDeclaration.Range.First; |
||
747 | reg <= parse.FullToken.FullDeclaration.Range.Last; |
||
748 | ++reg) { |
||
749 | ++mach->NumOutputs; |
||
750 | } |
||
751 | } |
||
752 | memcpy(declarations + numDeclarations, |
||
753 | &parse.FullToken.FullDeclaration, |
||
754 | sizeof(declarations[0])); |
||
755 | numDeclarations++; |
||
756 | break; |
||
757 | |||
758 | case TGSI_TOKEN_TYPE_IMMEDIATE: |
||
759 | { |
||
760 | uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; |
||
761 | assert( size <= 4 ); |
||
762 | assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); |
||
763 | |||
764 | for( i = 0; i < size; i++ ) { |
||
765 | mach->Imms[mach->ImmLimit][i] = |
||
766 | parse.FullToken.FullImmediate.u[i].Float; |
||
767 | } |
||
768 | mach->ImmLimit += 1; |
||
769 | } |
||
770 | break; |
||
771 | |||
772 | case TGSI_TOKEN_TYPE_INSTRUCTION: |
||
773 | |||
774 | /* save expanded instruction */ |
||
775 | if (numInstructions == maxInstructions) { |
||
776 | instructions = REALLOC(instructions, |
||
777 | maxInstructions |
||
778 | * sizeof(struct tgsi_full_instruction), |
||
779 | (maxInstructions + 10) |
||
780 | * sizeof(struct tgsi_full_instruction)); |
||
781 | maxInstructions += 10; |
||
782 | } |
||
783 | |||
784 | memcpy(instructions + numInstructions, |
||
785 | &parse.FullToken.FullInstruction, |
||
786 | sizeof(instructions[0])); |
||
787 | |||
788 | numInstructions++; |
||
789 | break; |
||
790 | |||
791 | case TGSI_TOKEN_TYPE_PROPERTY: |
||
792 | break; |
||
793 | |||
794 | default: |
||
795 | assert( 0 ); |
||
796 | } |
||
797 | } |
||
798 | tgsi_parse_free (&parse); |
||
799 | |||
800 | FREE(mach->Declarations); |
||
801 | mach->Declarations = declarations; |
||
802 | mach->NumDeclarations = numDeclarations; |
||
803 | |||
804 | FREE(mach->Instructions); |
||
805 | mach->Instructions = instructions; |
||
806 | mach->NumInstructions = numInstructions; |
||
807 | } |
||
808 | |||
809 | |||
810 | struct tgsi_exec_machine * |
||
811 | tgsi_exec_machine_create( void ) |
||
812 | { |
||
813 | struct tgsi_exec_machine *mach; |
||
814 | uint i; |
||
815 | |||
816 | mach = align_malloc( sizeof *mach, 16 ); |
||
817 | if (!mach) |
||
818 | goto fail; |
||
819 | |||
820 | memset(mach, 0, sizeof(*mach)); |
||
821 | |||
822 | mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; |
||
823 | mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; |
||
824 | mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; |
||
825 | |||
826 | mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); |
||
827 | mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); |
||
828 | if (!mach->Inputs || !mach->Outputs) |
||
829 | goto fail; |
||
830 | |||
831 | /* Setup constants needed by the SSE2 executor. */ |
||
832 | for( i = 0; i < 4; i++ ) { |
||
833 | mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; |
||
834 | mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; |
||
835 | mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; |
||
836 | mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ |
||
837 | mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; |
||
838 | mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ |
||
839 | mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; |
||
840 | mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; |
||
841 | mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; |
||
842 | mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; |
||
843 | } |
||
844 | |||
845 | #ifdef DEBUG |
||
846 | /* silence warnings */ |
||
847 | (void) print_chan; |
||
848 | (void) print_temp; |
||
849 | #endif |
||
850 | |||
851 | return mach; |
||
852 | |||
853 | fail: |
||
854 | if (mach) { |
||
855 | align_free(mach->Inputs); |
||
856 | align_free(mach->Outputs); |
||
857 | align_free(mach); |
||
858 | } |
||
859 | return NULL; |
||
860 | } |
||
861 | |||
862 | |||
863 | void |
||
864 | tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) |
||
865 | { |
||
866 | if (mach) { |
||
867 | FREE(mach->Instructions); |
||
868 | FREE(mach->Declarations); |
||
869 | |||
870 | align_free(mach->Inputs); |
||
871 | align_free(mach->Outputs); |
||
872 | |||
873 | align_free(mach); |
||
874 | } |
||
875 | } |
||
876 | |||
877 | static void |
||
878 | micro_add(union tgsi_exec_channel *dst, |
||
879 | const union tgsi_exec_channel *src0, |
||
880 | const union tgsi_exec_channel *src1) |
||
881 | { |
||
882 | dst->f[0] = src0->f[0] + src1->f[0]; |
||
883 | dst->f[1] = src0->f[1] + src1->f[1]; |
||
884 | dst->f[2] = src0->f[2] + src1->f[2]; |
||
885 | dst->f[3] = src0->f[3] + src1->f[3]; |
||
886 | } |
||
887 | |||
888 | static void |
||
889 | micro_div( |
||
890 | union tgsi_exec_channel *dst, |
||
891 | const union tgsi_exec_channel *src0, |
||
892 | const union tgsi_exec_channel *src1 ) |
||
893 | { |
||
894 | if (src1->f[0] != 0) { |
||
895 | dst->f[0] = src0->f[0] / src1->f[0]; |
||
896 | } |
||
897 | if (src1->f[1] != 0) { |
||
898 | dst->f[1] = src0->f[1] / src1->f[1]; |
||
899 | } |
||
900 | if (src1->f[2] != 0) { |
||
901 | dst->f[2] = src0->f[2] / src1->f[2]; |
||
902 | } |
||
903 | if (src1->f[3] != 0) { |
||
904 | dst->f[3] = src0->f[3] / src1->f[3]; |
||
905 | } |
||
906 | } |
||
907 | |||
908 | static void |
||
909 | micro_rcc(union tgsi_exec_channel *dst, |
||
910 | const union tgsi_exec_channel *src) |
||
911 | { |
||
912 | uint i; |
||
913 | |||
914 | for (i = 0; i < 4; i++) { |
||
915 | float recip = 1.0f / src->f[i]; |
||
916 | |||
917 | if (recip > 0.0f) { |
||
918 | if (recip > 1.884467e+019f) { |
||
919 | dst->f[i] = 1.884467e+019f; |
||
920 | } |
||
921 | else if (recip < 5.42101e-020f) { |
||
922 | dst->f[i] = 5.42101e-020f; |
||
923 | } |
||
924 | else { |
||
925 | dst->f[i] = recip; |
||
926 | } |
||
927 | } |
||
928 | else { |
||
929 | if (recip < -1.884467e+019f) { |
||
930 | dst->f[i] = -1.884467e+019f; |
||
931 | } |
||
932 | else if (recip > -5.42101e-020f) { |
||
933 | dst->f[i] = -5.42101e-020f; |
||
934 | } |
||
935 | else { |
||
936 | dst->f[i] = recip; |
||
937 | } |
||
938 | } |
||
939 | } |
||
940 | } |
||
941 | |||
942 | static void |
||
943 | micro_lt( |
||
944 | union tgsi_exec_channel *dst, |
||
945 | const union tgsi_exec_channel *src0, |
||
946 | const union tgsi_exec_channel *src1, |
||
947 | const union tgsi_exec_channel *src2, |
||
948 | const union tgsi_exec_channel *src3 ) |
||
949 | { |
||
950 | dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; |
||
951 | dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; |
||
952 | dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; |
||
953 | dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; |
||
954 | } |
||
955 | |||
956 | static void |
||
957 | micro_max(union tgsi_exec_channel *dst, |
||
958 | const union tgsi_exec_channel *src0, |
||
959 | const union tgsi_exec_channel *src1) |
||
960 | { |
||
961 | dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; |
||
962 | dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; |
||
963 | dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; |
||
964 | dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; |
||
965 | } |
||
966 | |||
967 | static void |
||
968 | micro_min(union tgsi_exec_channel *dst, |
||
969 | const union tgsi_exec_channel *src0, |
||
970 | const union tgsi_exec_channel *src1) |
||
971 | { |
||
972 | dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; |
||
973 | dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; |
||
974 | dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; |
||
975 | dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; |
||
976 | } |
||
977 | |||
978 | static void |
||
979 | micro_mul(union tgsi_exec_channel *dst, |
||
980 | const union tgsi_exec_channel *src0, |
||
981 | const union tgsi_exec_channel *src1) |
||
982 | { |
||
983 | dst->f[0] = src0->f[0] * src1->f[0]; |
||
984 | dst->f[1] = src0->f[1] * src1->f[1]; |
||
985 | dst->f[2] = src0->f[2] * src1->f[2]; |
||
986 | dst->f[3] = src0->f[3] * src1->f[3]; |
||
987 | } |
||
988 | |||
989 | static void |
||
990 | micro_neg( |
||
991 | union tgsi_exec_channel *dst, |
||
992 | const union tgsi_exec_channel *src ) |
||
993 | { |
||
994 | dst->f[0] = -src->f[0]; |
||
995 | dst->f[1] = -src->f[1]; |
||
996 | dst->f[2] = -src->f[2]; |
||
997 | dst->f[3] = -src->f[3]; |
||
998 | } |
||
999 | |||
1000 | static void |
||
1001 | micro_pow( |
||
1002 | union tgsi_exec_channel *dst, |
||
1003 | const union tgsi_exec_channel *src0, |
||
1004 | const union tgsi_exec_channel *src1 ) |
||
1005 | { |
||
1006 | #if FAST_MATH |
||
1007 | dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); |
||
1008 | dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); |
||
1009 | dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); |
||
1010 | dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); |
||
1011 | #else |
||
1012 | dst->f[0] = powf( src0->f[0], src1->f[0] ); |
||
1013 | dst->f[1] = powf( src0->f[1], src1->f[1] ); |
||
1014 | dst->f[2] = powf( src0->f[2], src1->f[2] ); |
||
1015 | dst->f[3] = powf( src0->f[3], src1->f[3] ); |
||
1016 | #endif |
||
1017 | } |
||
1018 | |||
1019 | static void |
||
1020 | micro_sub(union tgsi_exec_channel *dst, |
||
1021 | const union tgsi_exec_channel *src0, |
||
1022 | const union tgsi_exec_channel *src1) |
||
1023 | { |
||
1024 | dst->f[0] = src0->f[0] - src1->f[0]; |
||
1025 | dst->f[1] = src0->f[1] - src1->f[1]; |
||
1026 | dst->f[2] = src0->f[2] - src1->f[2]; |
||
1027 | dst->f[3] = src0->f[3] - src1->f[3]; |
||
1028 | } |
||
1029 | |||
1030 | static void |
||
1031 | fetch_src_file_channel(const struct tgsi_exec_machine *mach, |
||
1032 | const uint chan_index, |
||
1033 | const uint file, |
||
1034 | const uint swizzle, |
||
1035 | const union tgsi_exec_channel *index, |
||
1036 | const union tgsi_exec_channel *index2D, |
||
1037 | union tgsi_exec_channel *chan) |
||
1038 | { |
||
1039 | uint i; |
||
1040 | |||
1041 | assert(swizzle < 4); |
||
1042 | |||
1043 | switch (file) { |
||
1044 | case TGSI_FILE_CONSTANT: |
||
1045 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1046 | assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); |
||
1047 | assert(mach->Consts[index2D->i[i]]); |
||
1048 | |||
1049 | if (index->i[i] < 0) { |
||
1050 | chan->u[i] = 0; |
||
1051 | } else { |
||
1052 | /* NOTE: copying the const value as a uint instead of float */ |
||
1053 | const uint constbuf = index2D->i[i]; |
||
1054 | const uint *buf = (const uint *)mach->Consts[constbuf]; |
||
1055 | const int pos = index->i[i] * 4 + swizzle; |
||
1056 | /* const buffer bounds check */ |
||
1057 | if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) { |
||
1058 | if (0) { |
||
1059 | /* Debug: print warning */ |
||
1060 | static int count = 0; |
||
1061 | if (count++ < 100) |
||
1062 | debug_printf("TGSI Exec: const buffer index %d" |
||
1063 | " out of bounds\n", pos); |
||
1064 | } |
||
1065 | chan->u[i] = 0; |
||
1066 | } |
||
1067 | else |
||
1068 | chan->u[i] = buf[pos]; |
||
1069 | } |
||
1070 | } |
||
1071 | break; |
||
1072 | |||
1073 | case TGSI_FILE_INPUT: |
||
1074 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1075 | /* |
||
1076 | if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { |
||
1077 | debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", |
||
1078 | index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], |
||
1079 | index2D->i[i], index->i[i]); |
||
1080 | }*/ |
||
1081 | int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; |
||
1082 | assert(pos >= 0); |
||
1083 | assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); |
||
1084 | chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; |
||
1085 | } |
||
1086 | break; |
||
1087 | |||
1088 | case TGSI_FILE_SYSTEM_VALUE: |
||
1089 | /* XXX no swizzling at this point. Will be needed if we put |
||
1090 | * gl_FragCoord, for example, in a sys value register. |
||
1091 | */ |
||
1092 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1093 | chan->u[i] = mach->SystemValue[index->i[i]].u[i]; |
||
1094 | } |
||
1095 | break; |
||
1096 | |||
1097 | case TGSI_FILE_TEMPORARY: |
||
1098 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1099 | assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); |
||
1100 | assert(index2D->i[i] == 0); |
||
1101 | |||
1102 | chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; |
||
1103 | } |
||
1104 | break; |
||
1105 | |||
1106 | case TGSI_FILE_IMMEDIATE: |
||
1107 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1108 | assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); |
||
1109 | assert(index2D->i[i] == 0); |
||
1110 | |||
1111 | chan->f[i] = mach->Imms[index->i[i]][swizzle]; |
||
1112 | } |
||
1113 | break; |
||
1114 | |||
1115 | case TGSI_FILE_ADDRESS: |
||
1116 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1117 | assert(index->i[i] >= 0); |
||
1118 | assert(index2D->i[i] == 0); |
||
1119 | |||
1120 | chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; |
||
1121 | } |
||
1122 | break; |
||
1123 | |||
1124 | case TGSI_FILE_PREDICATE: |
||
1125 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1126 | assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); |
||
1127 | assert(index2D->i[i] == 0); |
||
1128 | |||
1129 | chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; |
||
1130 | } |
||
1131 | break; |
||
1132 | |||
1133 | case TGSI_FILE_OUTPUT: |
||
1134 | /* vertex/fragment output vars can be read too */ |
||
1135 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1136 | assert(index->i[i] >= 0); |
||
1137 | assert(index2D->i[i] == 0); |
||
1138 | |||
1139 | chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; |
||
1140 | } |
||
1141 | break; |
||
1142 | |||
1143 | default: |
||
1144 | assert(0); |
||
1145 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1146 | chan->u[i] = 0; |
||
1147 | } |
||
1148 | } |
||
1149 | } |
||
1150 | |||
1151 | static void |
||
1152 | fetch_source(const struct tgsi_exec_machine *mach, |
||
1153 | union tgsi_exec_channel *chan, |
||
1154 | const struct tgsi_full_src_register *reg, |
||
1155 | const uint chan_index, |
||
1156 | enum tgsi_exec_datatype src_datatype) |
||
1157 | { |
||
1158 | union tgsi_exec_channel index; |
||
1159 | union tgsi_exec_channel index2D; |
||
1160 | uint swizzle; |
||
1161 | |||
1162 | /* We start with a direct index into a register file. |
||
1163 | * |
||
1164 | * file[1], |
||
1165 | * where: |
||
1166 | * file = Register.File |
||
1167 | * [1] = Register.Index |
||
1168 | */ |
||
1169 | index.i[0] = |
||
1170 | index.i[1] = |
||
1171 | index.i[2] = |
||
1172 | index.i[3] = reg->Register.Index; |
||
1173 | |||
1174 | /* There is an extra source register that indirectly subscripts |
||
1175 | * a register file. The direct index now becomes an offset |
||
1176 | * that is being added to the indirect register. |
||
1177 | * |
||
1178 | * file[ind[2].x+1], |
||
1179 | * where: |
||
1180 | * ind = Indirect.File |
||
1181 | * [2] = Indirect.Index |
||
1182 | * .x = Indirect.SwizzleX |
||
1183 | */ |
||
1184 | if (reg->Register.Indirect) { |
||
1185 | union tgsi_exec_channel index2; |
||
1186 | union tgsi_exec_channel indir_index; |
||
1187 | const uint execmask = mach->ExecMask; |
||
1188 | uint i; |
||
1189 | |||
1190 | /* which address register (always zero now) */ |
||
1191 | index2.i[0] = |
||
1192 | index2.i[1] = |
||
1193 | index2.i[2] = |
||
1194 | index2.i[3] = reg->Indirect.Index; |
||
1195 | /* get current value of address register[swizzle] */ |
||
1196 | swizzle = reg->Indirect.Swizzle; |
||
1197 | fetch_src_file_channel(mach, |
||
1198 | chan_index, |
||
1199 | reg->Indirect.File, |
||
1200 | swizzle, |
||
1201 | &index2, |
||
1202 | &ZeroVec, |
||
1203 | &indir_index); |
||
1204 | |||
1205 | /* add value of address register to the offset */ |
||
1206 | index.i[0] += indir_index.i[0]; |
||
1207 | index.i[1] += indir_index.i[1]; |
||
1208 | index.i[2] += indir_index.i[2]; |
||
1209 | index.i[3] += indir_index.i[3]; |
||
1210 | |||
1211 | /* for disabled execution channels, zero-out the index to |
||
1212 | * avoid using a potential garbage value. |
||
1213 | */ |
||
1214 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1215 | if ((execmask & (1 << i)) == 0) |
||
1216 | index.i[i] = 0; |
||
1217 | } |
||
1218 | } |
||
1219 | |||
1220 | /* There is an extra source register that is a second |
||
1221 | * subscript to a register file. Effectively it means that |
||
1222 | * the register file is actually a 2D array of registers. |
||
1223 | * |
||
1224 | * file[3][1], |
||
1225 | * where: |
||
1226 | * [3] = Dimension.Index |
||
1227 | */ |
||
1228 | if (reg->Register.Dimension) { |
||
1229 | index2D.i[0] = |
||
1230 | index2D.i[1] = |
||
1231 | index2D.i[2] = |
||
1232 | index2D.i[3] = reg->Dimension.Index; |
||
1233 | |||
1234 | /* Again, the second subscript index can be addressed indirectly |
||
1235 | * identically to the first one. |
||
1236 | * Nothing stops us from indirectly addressing the indirect register, |
||
1237 | * but there is no need for that, so we won't exercise it. |
||
1238 | * |
||
1239 | * file[ind[4].y+3][1], |
||
1240 | * where: |
||
1241 | * ind = DimIndirect.File |
||
1242 | * [4] = DimIndirect.Index |
||
1243 | * .y = DimIndirect.SwizzleX |
||
1244 | */ |
||
1245 | if (reg->Dimension.Indirect) { |
||
1246 | union tgsi_exec_channel index2; |
||
1247 | union tgsi_exec_channel indir_index; |
||
1248 | const uint execmask = mach->ExecMask; |
||
1249 | uint i; |
||
1250 | |||
1251 | index2.i[0] = |
||
1252 | index2.i[1] = |
||
1253 | index2.i[2] = |
||
1254 | index2.i[3] = reg->DimIndirect.Index; |
||
1255 | |||
1256 | swizzle = reg->DimIndirect.Swizzle; |
||
1257 | fetch_src_file_channel(mach, |
||
1258 | chan_index, |
||
1259 | reg->DimIndirect.File, |
||
1260 | swizzle, |
||
1261 | &index2, |
||
1262 | &ZeroVec, |
||
1263 | &indir_index); |
||
1264 | |||
1265 | index2D.i[0] += indir_index.i[0]; |
||
1266 | index2D.i[1] += indir_index.i[1]; |
||
1267 | index2D.i[2] += indir_index.i[2]; |
||
1268 | index2D.i[3] += indir_index.i[3]; |
||
1269 | |||
1270 | /* for disabled execution channels, zero-out the index to |
||
1271 | * avoid using a potential garbage value. |
||
1272 | */ |
||
1273 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1274 | if ((execmask & (1 << i)) == 0) { |
||
1275 | index2D.i[i] = 0; |
||
1276 | } |
||
1277 | } |
||
1278 | } |
||
1279 | |||
1280 | /* If by any chance there was a need for a 3D array of register |
||
1281 | * files, we would have to check whether Dimension is followed |
||
1282 | * by a dimension register and continue the saga. |
||
1283 | */ |
||
1284 | } else { |
||
1285 | index2D.i[0] = |
||
1286 | index2D.i[1] = |
||
1287 | index2D.i[2] = |
||
1288 | index2D.i[3] = 0; |
||
1289 | } |
||
1290 | |||
1291 | swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); |
||
1292 | fetch_src_file_channel(mach, |
||
1293 | chan_index, |
||
1294 | reg->Register.File, |
||
1295 | swizzle, |
||
1296 | &index, |
||
1297 | &index2D, |
||
1298 | chan); |
||
1299 | |||
1300 | if (reg->Register.Absolute) { |
||
1301 | if (src_datatype == TGSI_EXEC_DATA_FLOAT) { |
||
1302 | micro_abs(chan, chan); |
||
1303 | } else { |
||
1304 | micro_iabs(chan, chan); |
||
1305 | } |
||
1306 | } |
||
1307 | |||
1308 | if (reg->Register.Negate) { |
||
1309 | if (src_datatype == TGSI_EXEC_DATA_FLOAT) { |
||
1310 | micro_neg(chan, chan); |
||
1311 | } else { |
||
1312 | micro_ineg(chan, chan); |
||
1313 | } |
||
1314 | } |
||
1315 | } |
||
1316 | |||
1317 | static void |
||
1318 | store_dest(struct tgsi_exec_machine *mach, |
||
1319 | const union tgsi_exec_channel *chan, |
||
1320 | const struct tgsi_full_dst_register *reg, |
||
1321 | const struct tgsi_full_instruction *inst, |
||
1322 | uint chan_index, |
||
1323 | enum tgsi_exec_datatype dst_datatype) |
||
1324 | { |
||
1325 | uint i; |
||
1326 | union tgsi_exec_channel null; |
||
1327 | union tgsi_exec_channel *dst; |
||
1328 | union tgsi_exec_channel index2D; |
||
1329 | uint execmask = mach->ExecMask; |
||
1330 | int offset = 0; /* indirection offset */ |
||
1331 | int index; |
||
1332 | |||
1333 | /* for debugging */ |
||
1334 | if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { |
||
1335 | check_inf_or_nan(chan); |
||
1336 | } |
||
1337 | |||
1338 | /* There is an extra source register that indirectly subscripts |
||
1339 | * a register file. The direct index now becomes an offset |
||
1340 | * that is being added to the indirect register. |
||
1341 | * |
||
1342 | * file[ind[2].x+1], |
||
1343 | * where: |
||
1344 | * ind = Indirect.File |
||
1345 | * [2] = Indirect.Index |
||
1346 | * .x = Indirect.SwizzleX |
||
1347 | */ |
||
1348 | if (reg->Register.Indirect) { |
||
1349 | union tgsi_exec_channel index; |
||
1350 | union tgsi_exec_channel indir_index; |
||
1351 | uint swizzle; |
||
1352 | |||
1353 | /* which address register (always zero for now) */ |
||
1354 | index.i[0] = |
||
1355 | index.i[1] = |
||
1356 | index.i[2] = |
||
1357 | index.i[3] = reg->Indirect.Index; |
||
1358 | |||
1359 | /* get current value of address register[swizzle] */ |
||
1360 | swizzle = reg->Indirect.Swizzle; |
||
1361 | |||
1362 | /* fetch values from the address/indirection register */ |
||
1363 | fetch_src_file_channel(mach, |
||
1364 | chan_index, |
||
1365 | reg->Indirect.File, |
||
1366 | swizzle, |
||
1367 | &index, |
||
1368 | &ZeroVec, |
||
1369 | &indir_index); |
||
1370 | |||
1371 | /* save indirection offset */ |
||
1372 | offset = indir_index.i[0]; |
||
1373 | } |
||
1374 | |||
1375 | /* There is an extra source register that is a second |
||
1376 | * subscript to a register file. Effectively it means that |
||
1377 | * the register file is actually a 2D array of registers. |
||
1378 | * |
||
1379 | * file[3][1], |
||
1380 | * where: |
||
1381 | * [3] = Dimension.Index |
||
1382 | */ |
||
1383 | if (reg->Register.Dimension) { |
||
1384 | index2D.i[0] = |
||
1385 | index2D.i[1] = |
||
1386 | index2D.i[2] = |
||
1387 | index2D.i[3] = reg->Dimension.Index; |
||
1388 | |||
1389 | /* Again, the second subscript index can be addressed indirectly |
||
1390 | * identically to the first one. |
||
1391 | * Nothing stops us from indirectly addressing the indirect register, |
||
1392 | * but there is no need for that, so we won't exercise it. |
||
1393 | * |
||
1394 | * file[ind[4].y+3][1], |
||
1395 | * where: |
||
1396 | * ind = DimIndirect.File |
||
1397 | * [4] = DimIndirect.Index |
||
1398 | * .y = DimIndirect.SwizzleX |
||
1399 | */ |
||
1400 | if (reg->Dimension.Indirect) { |
||
1401 | union tgsi_exec_channel index2; |
||
1402 | union tgsi_exec_channel indir_index; |
||
1403 | const uint execmask = mach->ExecMask; |
||
1404 | unsigned swizzle; |
||
1405 | uint i; |
||
1406 | |||
1407 | index2.i[0] = |
||
1408 | index2.i[1] = |
||
1409 | index2.i[2] = |
||
1410 | index2.i[3] = reg->DimIndirect.Index; |
||
1411 | |||
1412 | swizzle = reg->DimIndirect.Swizzle; |
||
1413 | fetch_src_file_channel(mach, |
||
1414 | chan_index, |
||
1415 | reg->DimIndirect.File, |
||
1416 | swizzle, |
||
1417 | &index2, |
||
1418 | &ZeroVec, |
||
1419 | &indir_index); |
||
1420 | |||
1421 | index2D.i[0] += indir_index.i[0]; |
||
1422 | index2D.i[1] += indir_index.i[1]; |
||
1423 | index2D.i[2] += indir_index.i[2]; |
||
1424 | index2D.i[3] += indir_index.i[3]; |
||
1425 | |||
1426 | /* for disabled execution channels, zero-out the index to |
||
1427 | * avoid using a potential garbage value. |
||
1428 | */ |
||
1429 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1430 | if ((execmask & (1 << i)) == 0) { |
||
1431 | index2D.i[i] = 0; |
||
1432 | } |
||
1433 | } |
||
1434 | } |
||
1435 | |||
1436 | /* If by any chance there was a need for a 3D array of register |
||
1437 | * files, we would have to check whether Dimension is followed |
||
1438 | * by a dimension register and continue the saga. |
||
1439 | */ |
||
1440 | } else { |
||
1441 | index2D.i[0] = |
||
1442 | index2D.i[1] = |
||
1443 | index2D.i[2] = |
||
1444 | index2D.i[3] = 0; |
||
1445 | } |
||
1446 | |||
1447 | switch (reg->Register.File) { |
||
1448 | case TGSI_FILE_NULL: |
||
1449 | dst = &null; |
||
1450 | break; |
||
1451 | |||
1452 | case TGSI_FILE_OUTPUT: |
||
1453 | index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] |
||
1454 | + reg->Register.Index; |
||
1455 | dst = &mach->Outputs[offset + index].xyzw[chan_index]; |
||
1456 | #if 0 |
||
1457 | debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", |
||
1458 | mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], |
||
1459 | reg->Register.Index); |
||
1460 | if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { |
||
1461 | debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); |
||
1462 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1463 | if (execmask & (1 << i)) |
||
1464 | debug_printf("%f, ", chan->f[i]); |
||
1465 | debug_printf(")\n"); |
||
1466 | } |
||
1467 | #endif |
||
1468 | break; |
||
1469 | |||
1470 | case TGSI_FILE_TEMPORARY: |
||
1471 | index = reg->Register.Index; |
||
1472 | assert( index < TGSI_EXEC_NUM_TEMPS ); |
||
1473 | dst = &mach->Temps[offset + index].xyzw[chan_index]; |
||
1474 | break; |
||
1475 | |||
1476 | case TGSI_FILE_ADDRESS: |
||
1477 | index = reg->Register.Index; |
||
1478 | dst = &mach->Addrs[index].xyzw[chan_index]; |
||
1479 | break; |
||
1480 | |||
1481 | case TGSI_FILE_PREDICATE: |
||
1482 | index = reg->Register.Index; |
||
1483 | assert(index < TGSI_EXEC_NUM_PREDS); |
||
1484 | dst = &mach->Predicates[index].xyzw[chan_index]; |
||
1485 | break; |
||
1486 | |||
1487 | default: |
||
1488 | assert( 0 ); |
||
1489 | return; |
||
1490 | } |
||
1491 | |||
1492 | if (inst->Instruction.Predicate) { |
||
1493 | uint swizzle; |
||
1494 | union tgsi_exec_channel *pred; |
||
1495 | |||
1496 | switch (chan_index) { |
||
1497 | case TGSI_CHAN_X: |
||
1498 | swizzle = inst->Predicate.SwizzleX; |
||
1499 | break; |
||
1500 | case TGSI_CHAN_Y: |
||
1501 | swizzle = inst->Predicate.SwizzleY; |
||
1502 | break; |
||
1503 | case TGSI_CHAN_Z: |
||
1504 | swizzle = inst->Predicate.SwizzleZ; |
||
1505 | break; |
||
1506 | case TGSI_CHAN_W: |
||
1507 | swizzle = inst->Predicate.SwizzleW; |
||
1508 | break; |
||
1509 | default: |
||
1510 | assert(0); |
||
1511 | return; |
||
1512 | } |
||
1513 | |||
1514 | assert(inst->Predicate.Index == 0); |
||
1515 | |||
1516 | pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; |
||
1517 | |||
1518 | if (inst->Predicate.Negate) { |
||
1519 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1520 | if (pred->u[i]) { |
||
1521 | execmask &= ~(1 << i); |
||
1522 | } |
||
1523 | } |
||
1524 | } else { |
||
1525 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
1526 | if (!pred->u[i]) { |
||
1527 | execmask &= ~(1 << i); |
||
1528 | } |
||
1529 | } |
||
1530 | } |
||
1531 | } |
||
1532 | |||
1533 | switch (inst->Instruction.Saturate) { |
||
1534 | case TGSI_SAT_NONE: |
||
1535 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1536 | if (execmask & (1 << i)) |
||
1537 | dst->i[i] = chan->i[i]; |
||
1538 | break; |
||
1539 | |||
1540 | case TGSI_SAT_ZERO_ONE: |
||
1541 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1542 | if (execmask & (1 << i)) { |
||
1543 | if (chan->f[i] < 0.0f) |
||
1544 | dst->f[i] = 0.0f; |
||
1545 | else if (chan->f[i] > 1.0f) |
||
1546 | dst->f[i] = 1.0f; |
||
1547 | else |
||
1548 | dst->i[i] = chan->i[i]; |
||
1549 | } |
||
1550 | break; |
||
1551 | |||
1552 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
1553 | for (i = 0; i < TGSI_QUAD_SIZE; i++) |
||
1554 | if (execmask & (1 << i)) { |
||
1555 | if (chan->f[i] < -1.0f) |
||
1556 | dst->f[i] = -1.0f; |
||
1557 | else if (chan->f[i] > 1.0f) |
||
1558 | dst->f[i] = 1.0f; |
||
1559 | else |
||
1560 | dst->i[i] = chan->i[i]; |
||
1561 | } |
||
1562 | break; |
||
1563 | |||
1564 | default: |
||
1565 | assert( 0 ); |
||
1566 | } |
||
1567 | } |
||
1568 | |||
1569 | #define FETCH(VAL,INDEX,CHAN)\ |
||
1570 | fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) |
||
1571 | |||
1572 | #define IFETCH(VAL,INDEX,CHAN)\ |
||
1573 | fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) |
||
1574 | |||
1575 | |||
1576 | /** |
||
1577 | * Execute ARB-style KIL which is predicated by a src register. |
||
1578 | * Kill fragment if any of the four values is less than zero. |
||
1579 | */ |
||
1580 | static void |
||
1581 | exec_kill_if(struct tgsi_exec_machine *mach, |
||
1582 | const struct tgsi_full_instruction *inst) |
||
1583 | { |
||
1584 | uint uniquemask; |
||
1585 | uint chan_index; |
||
1586 | uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ |
||
1587 | union tgsi_exec_channel r[1]; |
||
1588 | |||
1589 | /* This mask stores component bits that were already tested. */ |
||
1590 | uniquemask = 0; |
||
1591 | |||
1592 | for (chan_index = 0; chan_index < 4; chan_index++) |
||
1593 | { |
||
1594 | uint swizzle; |
||
1595 | uint i; |
||
1596 | |||
1597 | /* unswizzle channel */ |
||
1598 | swizzle = tgsi_util_get_full_src_register_swizzle ( |
||
1599 | &inst->Src[0], |
||
1600 | chan_index); |
||
1601 | |||
1602 | /* check if the component has not been already tested */ |
||
1603 | if (uniquemask & (1 << swizzle)) |
||
1604 | continue; |
||
1605 | uniquemask |= 1 << swizzle; |
||
1606 | |||
1607 | FETCH(&r[0], 0, chan_index); |
||
1608 | for (i = 0; i < 4; i++) |
||
1609 | if (r[0].f[i] < 0.0f) |
||
1610 | kilmask |= 1 << i; |
||
1611 | } |
||
1612 | |||
1613 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; |
||
1614 | } |
||
1615 | |||
1616 | /** |
||
1617 | * Unconditional fragment kill/discard. |
||
1618 | */ |
||
1619 | static void |
||
1620 | exec_kill(struct tgsi_exec_machine *mach, |
||
1621 | const struct tgsi_full_instruction *inst) |
||
1622 | { |
||
1623 | uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ |
||
1624 | |||
1625 | /* kill fragment for all fragments currently executing */ |
||
1626 | kilmask = mach->ExecMask; |
||
1627 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; |
||
1628 | } |
||
1629 | |||
1630 | static void |
||
1631 | emit_vertex(struct tgsi_exec_machine *mach) |
||
1632 | { |
||
1633 | /* FIXME: check for exec mask correctly |
||
1634 | unsigned i; |
||
1635 | for (i = 0; i < TGSI_QUAD_SIZE; ++i) { |
||
1636 | if ((mach->ExecMask & (1 << i))) |
||
1637 | */ |
||
1638 | if (mach->ExecMask) { |
||
1639 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; |
||
1640 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; |
||
1641 | } |
||
1642 | } |
||
1643 | |||
1644 | static void |
||
1645 | emit_primitive(struct tgsi_exec_machine *mach) |
||
1646 | { |
||
1647 | unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; |
||
1648 | /* FIXME: check for exec mask correctly |
||
1649 | unsigned i; |
||
1650 | for (i = 0; i < TGSI_QUAD_SIZE; ++i) { |
||
1651 | if ((mach->ExecMask & (1 << i))) |
||
1652 | */ |
||
1653 | if (mach->ExecMask) { |
||
1654 | ++(*prim_count); |
||
1655 | debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); |
||
1656 | mach->Primitives[*prim_count] = 0; |
||
1657 | } |
||
1658 | } |
||
1659 | |||
1660 | static void |
||
1661 | conditional_emit_primitive(struct tgsi_exec_machine *mach) |
||
1662 | { |
||
1663 | if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { |
||
1664 | int emitted_verts = |
||
1665 | mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; |
||
1666 | if (emitted_verts) { |
||
1667 | emit_primitive(mach); |
||
1668 | } |
||
1669 | } |
||
1670 | } |
||
1671 | |||
1672 | |||
1673 | /* |
||
1674 | * Fetch four texture samples using STR texture coordinates. |
||
1675 | */ |
||
1676 | static void |
||
1677 | fetch_texel( struct tgsi_sampler *sampler, |
||
1678 | const unsigned sview_idx, |
||
1679 | const unsigned sampler_idx, |
||
1680 | const union tgsi_exec_channel *s, |
||
1681 | const union tgsi_exec_channel *t, |
||
1682 | const union tgsi_exec_channel *p, |
||
1683 | const union tgsi_exec_channel *c0, |
||
1684 | const union tgsi_exec_channel *c1, |
||
1685 | float derivs[3][2][TGSI_QUAD_SIZE], |
||
1686 | const int8_t offset[3], |
||
1687 | enum tgsi_sampler_control control, |
||
1688 | union tgsi_exec_channel *r, |
||
1689 | union tgsi_exec_channel *g, |
||
1690 | union tgsi_exec_channel *b, |
||
1691 | union tgsi_exec_channel *a ) |
||
1692 | { |
||
1693 | uint j; |
||
1694 | float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; |
||
1695 | |||
1696 | /* FIXME: handle explicit derivs, offsets */ |
||
1697 | sampler->get_samples(sampler, sview_idx, sampler_idx, |
||
1698 | s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); |
||
1699 | |||
1700 | for (j = 0; j < 4; j++) { |
||
1701 | r->f[j] = rgba[0][j]; |
||
1702 | g->f[j] = rgba[1][j]; |
||
1703 | b->f[j] = rgba[2][j]; |
||
1704 | a->f[j] = rgba[3][j]; |
||
1705 | } |
||
1706 | } |
||
1707 | |||
1708 | |||
1709 | #define TEX_MODIFIER_NONE 0 |
||
1710 | #define TEX_MODIFIER_PROJECTED 1 |
||
1711 | #define TEX_MODIFIER_LOD_BIAS 2 |
||
1712 | #define TEX_MODIFIER_EXPLICIT_LOD 3 |
||
1713 | #define TEX_MODIFIER_LEVEL_ZERO 4 |
||
1714 | |||
1715 | |||
1716 | /* |
||
1717 | * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. |
||
1718 | */ |
||
1719 | static void |
||
1720 | fetch_texel_offsets(struct tgsi_exec_machine *mach, |
||
1721 | const struct tgsi_full_instruction *inst, |
||
1722 | int8_t offsets[3]) |
||
1723 | { |
||
1724 | if (inst->Texture.NumOffsets == 1) { |
||
1725 | union tgsi_exec_channel index; |
||
1726 | union tgsi_exec_channel offset[3]; |
||
1727 | index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; |
||
1728 | fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, |
||
1729 | inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); |
||
1730 | fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, |
||
1731 | inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); |
||
1732 | fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, |
||
1733 | inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); |
||
1734 | offsets[0] = offset[0].i[0]; |
||
1735 | offsets[1] = offset[1].i[0]; |
||
1736 | offsets[2] = offset[2].i[0]; |
||
1737 | } else { |
||
1738 | assert(inst->Texture.NumOffsets == 0); |
||
1739 | offsets[0] = offsets[1] = offsets[2] = 0; |
||
1740 | } |
||
1741 | } |
||
1742 | |||
1743 | |||
1744 | /* |
||
1745 | * Fetch dx and dy values for one channel (s, t or r). |
||
1746 | * Put dx values into one float array, dy values into another. |
||
1747 | */ |
||
1748 | static void |
||
1749 | fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, |
||
1750 | const struct tgsi_full_instruction *inst, |
||
1751 | unsigned regdsrcx, |
||
1752 | unsigned chan, |
||
1753 | float derivs[2][TGSI_QUAD_SIZE]) |
||
1754 | { |
||
1755 | union tgsi_exec_channel d; |
||
1756 | FETCH(&d, regdsrcx, chan); |
||
1757 | derivs[0][0] = d.f[0]; |
||
1758 | derivs[0][1] = d.f[1]; |
||
1759 | derivs[0][2] = d.f[2]; |
||
1760 | derivs[0][3] = d.f[3]; |
||
1761 | FETCH(&d, regdsrcx + 1, chan); |
||
1762 | derivs[1][0] = d.f[0]; |
||
1763 | derivs[1][1] = d.f[1]; |
||
1764 | derivs[1][2] = d.f[2]; |
||
1765 | derivs[1][3] = d.f[3]; |
||
1766 | } |
||
1767 | |||
1768 | |||
1769 | /* |
||
1770 | * execute a texture instruction. |
||
1771 | * |
||
1772 | * modifier is used to control the channel routing for the\ |
||
1773 | * instruction variants like proj, lod, and texture with lod bias. |
||
1774 | * sampler indicates which src register the sampler is contained in. |
||
1775 | */ |
||
1776 | static void |
||
1777 | exec_tex(struct tgsi_exec_machine *mach, |
||
1778 | const struct tgsi_full_instruction *inst, |
||
1779 | uint modifier, uint sampler) |
||
1780 | { |
||
1781 | const uint unit = inst->Src[sampler].Register.Index; |
||
1782 | const union tgsi_exec_channel *args[5], *proj = NULL; |
||
1783 | union tgsi_exec_channel r[5]; |
||
1784 | enum tgsi_sampler_control control = tgsi_sampler_lod_none; |
||
1785 | uint chan; |
||
1786 | int8_t offsets[3]; |
||
1787 | int dim, shadow_ref, i; |
||
1788 | |||
1789 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
1790 | fetch_texel_offsets(mach, inst, offsets); |
||
1791 | |||
1792 | assert(modifier != TEX_MODIFIER_LEVEL_ZERO); |
||
1793 | assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); |
||
1794 | |||
1795 | dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref); |
||
1796 | |||
1797 | assert(dim <= 4); |
||
1798 | if (shadow_ref >= 0) |
||
1799 | assert(shadow_ref >= dim && shadow_ref < Elements(args)); |
||
1800 | |||
1801 | /* fetch modifier to the last argument */ |
||
1802 | if (modifier != TEX_MODIFIER_NONE) { |
||
1803 | const int last = Elements(args) - 1; |
||
1804 | |||
1805 | /* fetch modifier from src0.w or src1.x */ |
||
1806 | if (sampler == 1) { |
||
1807 | assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); |
||
1808 | FETCH(&r[last], 0, TGSI_CHAN_W); |
||
1809 | } |
||
1810 | else { |
||
1811 | assert(shadow_ref != 4); |
||
1812 | FETCH(&r[last], 1, TGSI_CHAN_X); |
||
1813 | } |
||
1814 | |||
1815 | if (modifier != TEX_MODIFIER_PROJECTED) { |
||
1816 | args[last] = &r[last]; |
||
1817 | } |
||
1818 | else { |
||
1819 | proj = &r[last]; |
||
1820 | args[last] = &ZeroVec; |
||
1821 | } |
||
1822 | |||
1823 | /* point unused arguments to zero vector */ |
||
1824 | for (i = dim; i < last; i++) |
||
1825 | args[i] = &ZeroVec; |
||
1826 | |||
1827 | if (modifier == TEX_MODIFIER_EXPLICIT_LOD) |
||
1828 | control = tgsi_sampler_lod_explicit; |
||
1829 | else if (modifier == TEX_MODIFIER_LOD_BIAS) |
||
1830 | control = tgsi_sampler_lod_bias; |
||
1831 | } |
||
1832 | else { |
||
1833 | for (i = dim; i < Elements(args); i++) |
||
1834 | args[i] = &ZeroVec; |
||
1835 | } |
||
1836 | |||
1837 | /* fetch coordinates */ |
||
1838 | for (i = 0; i < dim; i++) { |
||
1839 | FETCH(&r[i], 0, TGSI_CHAN_X + i); |
||
1840 | |||
1841 | if (proj) |
||
1842 | micro_div(&r[i], &r[i], proj); |
||
1843 | |||
1844 | args[i] = &r[i]; |
||
1845 | } |
||
1846 | |||
1847 | /* fetch reference value */ |
||
1848 | if (shadow_ref >= 0) { |
||
1849 | FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); |
||
1850 | |||
1851 | if (proj) |
||
1852 | micro_div(&r[shadow_ref], &r[shadow_ref], proj); |
||
1853 | |||
1854 | args[shadow_ref] = &r[shadow_ref]; |
||
1855 | } |
||
1856 | |||
1857 | fetch_texel(mach->Sampler, unit, unit, |
||
1858 | args[0], args[1], args[2], args[3], args[4], |
||
1859 | NULL, offsets, control, |
||
1860 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1861 | |||
1862 | #if 0 |
||
1863 | debug_printf("fetch r: %g %g %g %g\n", |
||
1864 | r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); |
||
1865 | debug_printf("fetch g: %g %g %g %g\n", |
||
1866 | r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); |
||
1867 | debug_printf("fetch b: %g %g %g %g\n", |
||
1868 | r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); |
||
1869 | debug_printf("fetch a: %g %g %g %g\n", |
||
1870 | r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); |
||
1871 | #endif |
||
1872 | |||
1873 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
1874 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
1875 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
1876 | } |
||
1877 | } |
||
1878 | } |
||
1879 | |||
1880 | |||
1881 | static void |
||
1882 | exec_txd(struct tgsi_exec_machine *mach, |
||
1883 | const struct tgsi_full_instruction *inst) |
||
1884 | { |
||
1885 | const uint unit = inst->Src[3].Register.Index; |
||
1886 | union tgsi_exec_channel r[4]; |
||
1887 | float derivs[3][2][TGSI_QUAD_SIZE]; |
||
1888 | uint chan; |
||
1889 | int8_t offsets[3]; |
||
1890 | |||
1891 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
1892 | fetch_texel_offsets(mach, inst, offsets); |
||
1893 | |||
1894 | switch (inst->Texture.Texture) { |
||
1895 | case TGSI_TEXTURE_1D: |
||
1896 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1897 | |||
1898 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1899 | |||
1900 | fetch_texel(mach->Sampler, unit, unit, |
||
1901 | &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
1902 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1903 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1904 | break; |
||
1905 | |||
1906 | case TGSI_TEXTURE_SHADOW1D: |
||
1907 | case TGSI_TEXTURE_1D_ARRAY: |
||
1908 | case TGSI_TEXTURE_SHADOW1D_ARRAY: |
||
1909 | /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ |
||
1910 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1911 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1912 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
1913 | |||
1914 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1915 | |||
1916 | fetch_texel(mach->Sampler, unit, unit, |
||
1917 | &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
1918 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1919 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1920 | break; |
||
1921 | |||
1922 | case TGSI_TEXTURE_2D: |
||
1923 | case TGSI_TEXTURE_RECT: |
||
1924 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1925 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1926 | |||
1927 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1928 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); |
||
1929 | |||
1930 | fetch_texel(mach->Sampler, unit, unit, |
||
1931 | &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
1932 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1933 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
1934 | break; |
||
1935 | |||
1936 | |||
1937 | case TGSI_TEXTURE_SHADOW2D: |
||
1938 | case TGSI_TEXTURE_SHADOWRECT: |
||
1939 | case TGSI_TEXTURE_2D_ARRAY: |
||
1940 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
1941 | /* only SHADOW2D_ARRAY actually needs W */ |
||
1942 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1943 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1944 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
1945 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
1946 | |||
1947 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1948 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); |
||
1949 | |||
1950 | fetch_texel(mach->Sampler, unit, unit, |
||
1951 | &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ |
||
1952 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1953 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
1954 | break; |
||
1955 | |||
1956 | case TGSI_TEXTURE_3D: |
||
1957 | case TGSI_TEXTURE_CUBE: |
||
1958 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
1959 | /* only TEXTURE_CUBE_ARRAY actually needs W */ |
||
1960 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
1961 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
1962 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
1963 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
1964 | |||
1965 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); |
||
1966 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); |
||
1967 | fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); |
||
1968 | |||
1969 | fetch_texel(mach->Sampler, unit, unit, |
||
1970 | &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ |
||
1971 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
1972 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
1973 | break; |
||
1974 | |||
1975 | default: |
||
1976 | assert(0); |
||
1977 | } |
||
1978 | |||
1979 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
1980 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
1981 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
1982 | } |
||
1983 | } |
||
1984 | } |
||
1985 | |||
1986 | |||
1987 | static void |
||
1988 | exec_txf(struct tgsi_exec_machine *mach, |
||
1989 | const struct tgsi_full_instruction *inst) |
||
1990 | { |
||
1991 | const uint unit = inst->Src[1].Register.Index; |
||
1992 | union tgsi_exec_channel r[4]; |
||
1993 | uint chan; |
||
1994 | float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; |
||
1995 | int j; |
||
1996 | int8_t offsets[3]; |
||
1997 | unsigned target; |
||
1998 | |||
1999 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
2000 | fetch_texel_offsets(mach, inst, offsets); |
||
2001 | |||
2002 | IFETCH(&r[3], 0, TGSI_CHAN_W); |
||
2003 | |||
2004 | if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { |
||
2005 | target = mach->SamplerViews[unit].Resource; |
||
2006 | } |
||
2007 | else { |
||
2008 | target = inst->Texture.Texture; |
||
2009 | } |
||
2010 | switch(target) { |
||
2011 | case TGSI_TEXTURE_3D: |
||
2012 | case TGSI_TEXTURE_2D_ARRAY: |
||
2013 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
2014 | IFETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2015 | /* fallthrough */ |
||
2016 | case TGSI_TEXTURE_2D: |
||
2017 | case TGSI_TEXTURE_RECT: |
||
2018 | case TGSI_TEXTURE_SHADOW1D_ARRAY: |
||
2019 | case TGSI_TEXTURE_SHADOW2D: |
||
2020 | case TGSI_TEXTURE_SHADOWRECT: |
||
2021 | case TGSI_TEXTURE_1D_ARRAY: |
||
2022 | IFETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2023 | /* fallthrough */ |
||
2024 | case TGSI_TEXTURE_BUFFER: |
||
2025 | case TGSI_TEXTURE_1D: |
||
2026 | case TGSI_TEXTURE_SHADOW1D: |
||
2027 | IFETCH(&r[0], 0, TGSI_CHAN_X); |
||
2028 | break; |
||
2029 | default: |
||
2030 | assert(0); |
||
2031 | break; |
||
2032 | } |
||
2033 | |||
2034 | mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, |
||
2035 | offsets, rgba); |
||
2036 | |||
2037 | for (j = 0; j < TGSI_QUAD_SIZE; j++) { |
||
2038 | r[0].f[j] = rgba[0][j]; |
||
2039 | r[1].f[j] = rgba[1][j]; |
||
2040 | r[2].f[j] = rgba[2][j]; |
||
2041 | r[3].f[j] = rgba[3][j]; |
||
2042 | } |
||
2043 | |||
2044 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2045 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2046 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2047 | } |
||
2048 | } |
||
2049 | } |
||
2050 | |||
2051 | static void |
||
2052 | exec_txq(struct tgsi_exec_machine *mach, |
||
2053 | const struct tgsi_full_instruction *inst) |
||
2054 | { |
||
2055 | const uint unit = inst->Src[1].Register.Index; |
||
2056 | int result[4]; |
||
2057 | union tgsi_exec_channel r[4], src; |
||
2058 | uint chan; |
||
2059 | int i,j; |
||
2060 | |||
2061 | fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); |
||
2062 | |||
2063 | mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); |
||
2064 | |||
2065 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
2066 | for (j = 0; j < 4; j++) { |
||
2067 | r[j].i[i] = result[j]; |
||
2068 | } |
||
2069 | } |
||
2070 | |||
2071 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2072 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2073 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, |
||
2074 | TGSI_EXEC_DATA_INT); |
||
2075 | } |
||
2076 | } |
||
2077 | } |
||
2078 | |||
2079 | static void |
||
2080 | exec_sample(struct tgsi_exec_machine *mach, |
||
2081 | const struct tgsi_full_instruction *inst, |
||
2082 | uint modifier, boolean compare) |
||
2083 | { |
||
2084 | const uint resource_unit = inst->Src[1].Register.Index; |
||
2085 | const uint sampler_unit = inst->Src[2].Register.Index; |
||
2086 | union tgsi_exec_channel r[4], c1; |
||
2087 | const union tgsi_exec_channel *lod = &ZeroVec; |
||
2088 | enum tgsi_sampler_control control = tgsi_sampler_lod_none; |
||
2089 | uint chan; |
||
2090 | int8_t offsets[3]; |
||
2091 | |||
2092 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
2093 | fetch_texel_offsets(mach, inst, offsets); |
||
2094 | |||
2095 | assert(modifier != TEX_MODIFIER_PROJECTED); |
||
2096 | |||
2097 | if (modifier != TEX_MODIFIER_NONE) { |
||
2098 | if (modifier == TEX_MODIFIER_LOD_BIAS) { |
||
2099 | FETCH(&c1, 3, TGSI_CHAN_X); |
||
2100 | lod = &c1; |
||
2101 | control = tgsi_sampler_lod_bias; |
||
2102 | } |
||
2103 | else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { |
||
2104 | FETCH(&c1, 3, TGSI_CHAN_X); |
||
2105 | lod = &c1; |
||
2106 | control = tgsi_sampler_lod_explicit; |
||
2107 | } |
||
2108 | else { |
||
2109 | assert(modifier == TEX_MODIFIER_LEVEL_ZERO); |
||
2110 | control = tgsi_sampler_lod_zero; |
||
2111 | } |
||
2112 | } |
||
2113 | |||
2114 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
2115 | |||
2116 | switch (mach->SamplerViews[resource_unit].Resource) { |
||
2117 | case TGSI_TEXTURE_1D: |
||
2118 | if (compare) { |
||
2119 | FETCH(&r[2], 3, TGSI_CHAN_X); |
||
2120 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2121 | &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2122 | NULL, offsets, control, |
||
2123 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
2124 | } |
||
2125 | else { |
||
2126 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2127 | &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2128 | NULL, offsets, control, |
||
2129 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
2130 | } |
||
2131 | break; |
||
2132 | |||
2133 | case TGSI_TEXTURE_1D_ARRAY: |
||
2134 | case TGSI_TEXTURE_2D: |
||
2135 | case TGSI_TEXTURE_RECT: |
||
2136 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2137 | if (compare) { |
||
2138 | FETCH(&r[2], 3, TGSI_CHAN_X); |
||
2139 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2140 | &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2141 | NULL, offsets, control, |
||
2142 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
2143 | } |
||
2144 | else { |
||
2145 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2146 | &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ |
||
2147 | NULL, offsets, control, |
||
2148 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
2149 | } |
||
2150 | break; |
||
2151 | |||
2152 | case TGSI_TEXTURE_2D_ARRAY: |
||
2153 | case TGSI_TEXTURE_3D: |
||
2154 | case TGSI_TEXTURE_CUBE: |
||
2155 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2156 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2157 | if(compare) { |
||
2158 | FETCH(&r[3], 3, TGSI_CHAN_X); |
||
2159 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2160 | &r[0], &r[1], &r[2], &r[3], lod, |
||
2161 | NULL, offsets, control, |
||
2162 | &r[0], &r[1], &r[2], &r[3]); |
||
2163 | } |
||
2164 | else { |
||
2165 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2166 | &r[0], &r[1], &r[2], &ZeroVec, lod, |
||
2167 | NULL, offsets, control, |
||
2168 | &r[0], &r[1], &r[2], &r[3]); |
||
2169 | } |
||
2170 | break; |
||
2171 | |||
2172 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
2173 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2174 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2175 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
2176 | if(compare) { |
||
2177 | FETCH(&r[4], 3, TGSI_CHAN_X); |
||
2178 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2179 | &r[0], &r[1], &r[2], &r[3], &r[4], |
||
2180 | NULL, offsets, control, |
||
2181 | &r[0], &r[1], &r[2], &r[3]); |
||
2182 | } |
||
2183 | else { |
||
2184 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2185 | &r[0], &r[1], &r[2], &r[3], lod, |
||
2186 | NULL, offsets, control, |
||
2187 | &r[0], &r[1], &r[2], &r[3]); |
||
2188 | } |
||
2189 | break; |
||
2190 | |||
2191 | |||
2192 | default: |
||
2193 | assert(0); |
||
2194 | } |
||
2195 | |||
2196 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2197 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2198 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2199 | } |
||
2200 | } |
||
2201 | } |
||
2202 | |||
2203 | static void |
||
2204 | exec_sample_d(struct tgsi_exec_machine *mach, |
||
2205 | const struct tgsi_full_instruction *inst) |
||
2206 | { |
||
2207 | const uint resource_unit = inst->Src[1].Register.Index; |
||
2208 | const uint sampler_unit = inst->Src[2].Register.Index; |
||
2209 | union tgsi_exec_channel r[4]; |
||
2210 | float derivs[3][2][TGSI_QUAD_SIZE]; |
||
2211 | uint chan; |
||
2212 | int8_t offsets[3]; |
||
2213 | |||
2214 | /* always fetch all 3 offsets, overkill but keeps code simple */ |
||
2215 | fetch_texel_offsets(mach, inst, offsets); |
||
2216 | |||
2217 | FETCH(&r[0], 0, TGSI_CHAN_X); |
||
2218 | |||
2219 | switch (mach->SamplerViews[resource_unit].Resource) { |
||
2220 | case TGSI_TEXTURE_1D: |
||
2221 | case TGSI_TEXTURE_1D_ARRAY: |
||
2222 | /* only 1D array actually needs Y */ |
||
2223 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2224 | |||
2225 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); |
||
2226 | |||
2227 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2228 | &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ |
||
2229 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
2230 | &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ |
||
2231 | break; |
||
2232 | |||
2233 | case TGSI_TEXTURE_2D: |
||
2234 | case TGSI_TEXTURE_RECT: |
||
2235 | case TGSI_TEXTURE_2D_ARRAY: |
||
2236 | /* only 2D array actually needs Z */ |
||
2237 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2238 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2239 | |||
2240 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); |
||
2241 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); |
||
2242 | |||
2243 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2244 | &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ |
||
2245 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
2246 | &r[0], &r[1], &r[2], &r[3]); /* outputs */ |
||
2247 | break; |
||
2248 | |||
2249 | case TGSI_TEXTURE_3D: |
||
2250 | case TGSI_TEXTURE_CUBE: |
||
2251 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
2252 | /* only cube array actually needs W */ |
||
2253 | FETCH(&r[1], 0, TGSI_CHAN_Y); |
||
2254 | FETCH(&r[2], 0, TGSI_CHAN_Z); |
||
2255 | FETCH(&r[3], 0, TGSI_CHAN_W); |
||
2256 | |||
2257 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); |
||
2258 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); |
||
2259 | fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); |
||
2260 | |||
2261 | fetch_texel(mach->Sampler, resource_unit, sampler_unit, |
||
2262 | &r[0], &r[1], &r[2], &r[3], &ZeroVec, |
||
2263 | derivs, offsets, tgsi_sampler_derivs_explicit, |
||
2264 | &r[0], &r[1], &r[2], &r[3]); |
||
2265 | break; |
||
2266 | |||
2267 | default: |
||
2268 | assert(0); |
||
2269 | } |
||
2270 | |||
2271 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2272 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2273 | store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2274 | } |
||
2275 | } |
||
2276 | } |
||
2277 | |||
2278 | |||
2279 | /** |
||
2280 | * Evaluate a constant-valued coefficient at the position of the |
||
2281 | * current quad. |
||
2282 | */ |
||
2283 | static void |
||
2284 | eval_constant_coef( |
||
2285 | struct tgsi_exec_machine *mach, |
||
2286 | unsigned attrib, |
||
2287 | unsigned chan ) |
||
2288 | { |
||
2289 | unsigned i; |
||
2290 | |||
2291 | for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { |
||
2292 | mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; |
||
2293 | } |
||
2294 | } |
||
2295 | |||
2296 | /** |
||
2297 | * Evaluate a linear-valued coefficient at the position of the |
||
2298 | * current quad. |
||
2299 | */ |
||
2300 | static void |
||
2301 | eval_linear_coef( |
||
2302 | struct tgsi_exec_machine *mach, |
||
2303 | unsigned attrib, |
||
2304 | unsigned chan ) |
||
2305 | { |
||
2306 | const float x = mach->QuadPos.xyzw[0].f[0]; |
||
2307 | const float y = mach->QuadPos.xyzw[1].f[0]; |
||
2308 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; |
||
2309 | const float dady = mach->InterpCoefs[attrib].dady[chan]; |
||
2310 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; |
||
2311 | mach->Inputs[attrib].xyzw[chan].f[0] = a0; |
||
2312 | mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; |
||
2313 | mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; |
||
2314 | mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; |
||
2315 | } |
||
2316 | |||
2317 | /** |
||
2318 | * Evaluate a perspective-valued coefficient at the position of the |
||
2319 | * current quad. |
||
2320 | */ |
||
2321 | static void |
||
2322 | eval_perspective_coef( |
||
2323 | struct tgsi_exec_machine *mach, |
||
2324 | unsigned attrib, |
||
2325 | unsigned chan ) |
||
2326 | { |
||
2327 | const float x = mach->QuadPos.xyzw[0].f[0]; |
||
2328 | const float y = mach->QuadPos.xyzw[1].f[0]; |
||
2329 | const float dadx = mach->InterpCoefs[attrib].dadx[chan]; |
||
2330 | const float dady = mach->InterpCoefs[attrib].dady[chan]; |
||
2331 | const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; |
||
2332 | const float *w = mach->QuadPos.xyzw[3].f; |
||
2333 | /* divide by W here */ |
||
2334 | mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; |
||
2335 | mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; |
||
2336 | mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; |
||
2337 | mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; |
||
2338 | } |
||
2339 | |||
2340 | |||
2341 | typedef void (* eval_coef_func)( |
||
2342 | struct tgsi_exec_machine *mach, |
||
2343 | unsigned attrib, |
||
2344 | unsigned chan ); |
||
2345 | |||
2346 | static void |
||
2347 | exec_declaration(struct tgsi_exec_machine *mach, |
||
2348 | const struct tgsi_full_declaration *decl) |
||
2349 | { |
||
2350 | if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { |
||
2351 | mach->SamplerViews[decl->Range.First] = decl->SamplerView; |
||
2352 | return; |
||
2353 | } |
||
2354 | |||
2355 | if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { |
||
2356 | if (decl->Declaration.File == TGSI_FILE_INPUT) { |
||
2357 | uint first, last, mask; |
||
2358 | |||
2359 | first = decl->Range.First; |
||
2360 | last = decl->Range.Last; |
||
2361 | mask = decl->Declaration.UsageMask; |
||
2362 | |||
2363 | /* XXX we could remove this special-case code since |
||
2364 | * mach->InterpCoefs[first].a0 should already have the |
||
2365 | * front/back-face value. But we should first update the |
||
2366 | * ureg code to emit the right UsageMask value (WRITEMASK_X). |
||
2367 | * Then, we could remove the tgsi_exec_machine::Face field. |
||
2368 | */ |
||
2369 | /* XXX make FACE a system value */ |
||
2370 | if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { |
||
2371 | uint i; |
||
2372 | |||
2373 | assert(decl->Semantic.Index == 0); |
||
2374 | assert(first == last); |
||
2375 | |||
2376 | for (i = 0; i < TGSI_QUAD_SIZE; i++) { |
||
2377 | mach->Inputs[first].xyzw[0].f[i] = mach->Face; |
||
2378 | } |
||
2379 | } else { |
||
2380 | eval_coef_func eval; |
||
2381 | uint i, j; |
||
2382 | |||
2383 | switch (decl->Interp.Interpolate) { |
||
2384 | case TGSI_INTERPOLATE_CONSTANT: |
||
2385 | eval = eval_constant_coef; |
||
2386 | break; |
||
2387 | |||
2388 | case TGSI_INTERPOLATE_LINEAR: |
||
2389 | eval = eval_linear_coef; |
||
2390 | break; |
||
2391 | |||
2392 | case TGSI_INTERPOLATE_PERSPECTIVE: |
||
2393 | eval = eval_perspective_coef; |
||
2394 | break; |
||
2395 | |||
2396 | case TGSI_INTERPOLATE_COLOR: |
||
2397 | eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; |
||
2398 | break; |
||
2399 | |||
2400 | default: |
||
2401 | assert(0); |
||
2402 | return; |
||
2403 | } |
||
2404 | |||
2405 | for (j = 0; j < TGSI_NUM_CHANNELS; j++) { |
||
2406 | if (mask & (1 << j)) { |
||
2407 | for (i = first; i <= last; i++) { |
||
2408 | eval(mach, i, j); |
||
2409 | } |
||
2410 | } |
||
2411 | } |
||
2412 | } |
||
2413 | |||
2414 | if (DEBUG_EXECUTION) { |
||
2415 | uint i, j; |
||
2416 | for (i = first; i <= last; ++i) { |
||
2417 | debug_printf("IN[%2u] = ", i); |
||
2418 | for (j = 0; j < TGSI_NUM_CHANNELS; j++) { |
||
2419 | if (j > 0) { |
||
2420 | debug_printf(" "); |
||
2421 | } |
||
2422 | debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", |
||
2423 | mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], |
||
2424 | mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], |
||
2425 | mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], |
||
2426 | mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); |
||
2427 | } |
||
2428 | } |
||
2429 | } |
||
2430 | } |
||
2431 | } |
||
2432 | |||
2433 | if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { |
||
2434 | mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; |
||
2435 | } |
||
2436 | } |
||
2437 | |||
2438 | |||
2439 | typedef void (* micro_op)(union tgsi_exec_channel *dst); |
||
2440 | |||
2441 | static void |
||
2442 | exec_vector(struct tgsi_exec_machine *mach, |
||
2443 | const struct tgsi_full_instruction *inst, |
||
2444 | micro_op op, |
||
2445 | enum tgsi_exec_datatype dst_datatype) |
||
2446 | { |
||
2447 | unsigned int chan; |
||
2448 | |||
2449 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2450 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2451 | union tgsi_exec_channel dst; |
||
2452 | |||
2453 | op(&dst); |
||
2454 | store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); |
||
2455 | } |
||
2456 | } |
||
2457 | } |
||
2458 | |||
2459 | typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, |
||
2460 | const union tgsi_exec_channel *src); |
||
2461 | |||
2462 | static void |
||
2463 | exec_scalar_unary(struct tgsi_exec_machine *mach, |
||
2464 | const struct tgsi_full_instruction *inst, |
||
2465 | micro_unary_op op, |
||
2466 | enum tgsi_exec_datatype dst_datatype, |
||
2467 | enum tgsi_exec_datatype src_datatype) |
||
2468 | { |
||
2469 | unsigned int chan; |
||
2470 | union tgsi_exec_channel src; |
||
2471 | union tgsi_exec_channel dst; |
||
2472 | |||
2473 | fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); |
||
2474 | op(&dst, &src); |
||
2475 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2476 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2477 | store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); |
||
2478 | } |
||
2479 | } |
||
2480 | } |
||
2481 | |||
2482 | static void |
||
2483 | exec_vector_unary(struct tgsi_exec_machine *mach, |
||
2484 | const struct tgsi_full_instruction *inst, |
||
2485 | micro_unary_op op, |
||
2486 | enum tgsi_exec_datatype dst_datatype, |
||
2487 | enum tgsi_exec_datatype src_datatype) |
||
2488 | { |
||
2489 | unsigned int chan; |
||
2490 | struct tgsi_exec_vector dst; |
||
2491 | |||
2492 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2493 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2494 | union tgsi_exec_channel src; |
||
2495 | |||
2496 | fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); |
||
2497 | op(&dst.xyzw[chan], &src); |
||
2498 | } |
||
2499 | } |
||
2500 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2501 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2502 | store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); |
||
2503 | } |
||
2504 | } |
||
2505 | } |
||
2506 | |||
2507 | typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, |
||
2508 | const union tgsi_exec_channel *src0, |
||
2509 | const union tgsi_exec_channel *src1); |
||
2510 | |||
2511 | static void |
||
2512 | exec_scalar_binary(struct tgsi_exec_machine *mach, |
||
2513 | const struct tgsi_full_instruction *inst, |
||
2514 | micro_binary_op op, |
||
2515 | enum tgsi_exec_datatype dst_datatype, |
||
2516 | enum tgsi_exec_datatype src_datatype) |
||
2517 | { |
||
2518 | unsigned int chan; |
||
2519 | union tgsi_exec_channel src[2]; |
||
2520 | union tgsi_exec_channel dst; |
||
2521 | |||
2522 | fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); |
||
2523 | fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); |
||
2524 | op(&dst, &src[0], &src[1]); |
||
2525 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2526 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2527 | store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); |
||
2528 | } |
||
2529 | } |
||
2530 | } |
||
2531 | |||
2532 | static void |
||
2533 | exec_vector_binary(struct tgsi_exec_machine *mach, |
||
2534 | const struct tgsi_full_instruction *inst, |
||
2535 | micro_binary_op op, |
||
2536 | enum tgsi_exec_datatype dst_datatype, |
||
2537 | enum tgsi_exec_datatype src_datatype) |
||
2538 | { |
||
2539 | unsigned int chan; |
||
2540 | struct tgsi_exec_vector dst; |
||
2541 | |||
2542 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2543 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2544 | union tgsi_exec_channel src[2]; |
||
2545 | |||
2546 | fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); |
||
2547 | fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); |
||
2548 | op(&dst.xyzw[chan], &src[0], &src[1]); |
||
2549 | } |
||
2550 | } |
||
2551 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2552 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2553 | store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); |
||
2554 | } |
||
2555 | } |
||
2556 | } |
||
2557 | |||
2558 | typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, |
||
2559 | const union tgsi_exec_channel *src0, |
||
2560 | const union tgsi_exec_channel *src1, |
||
2561 | const union tgsi_exec_channel *src2); |
||
2562 | |||
2563 | static void |
||
2564 | exec_vector_trinary(struct tgsi_exec_machine *mach, |
||
2565 | const struct tgsi_full_instruction *inst, |
||
2566 | micro_trinary_op op, |
||
2567 | enum tgsi_exec_datatype dst_datatype, |
||
2568 | enum tgsi_exec_datatype src_datatype) |
||
2569 | { |
||
2570 | unsigned int chan; |
||
2571 | struct tgsi_exec_vector dst; |
||
2572 | |||
2573 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2574 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2575 | union tgsi_exec_channel src[3]; |
||
2576 | |||
2577 | fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); |
||
2578 | fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); |
||
2579 | fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); |
||
2580 | op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); |
||
2581 | } |
||
2582 | } |
||
2583 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2584 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2585 | store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); |
||
2586 | } |
||
2587 | } |
||
2588 | } |
||
2589 | |||
2590 | static void |
||
2591 | exec_dp3(struct tgsi_exec_machine *mach, |
||
2592 | const struct tgsi_full_instruction *inst) |
||
2593 | { |
||
2594 | unsigned int chan; |
||
2595 | union tgsi_exec_channel arg[3]; |
||
2596 | |||
2597 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2598 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2599 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2600 | |||
2601 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { |
||
2602 | fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2603 | fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); |
||
2604 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2605 | } |
||
2606 | |||
2607 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2608 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2609 | store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2610 | } |
||
2611 | } |
||
2612 | } |
||
2613 | |||
2614 | static void |
||
2615 | exec_dp4(struct tgsi_exec_machine *mach, |
||
2616 | const struct tgsi_full_instruction *inst) |
||
2617 | { |
||
2618 | unsigned int chan; |
||
2619 | union tgsi_exec_channel arg[3]; |
||
2620 | |||
2621 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2622 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2623 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2624 | |||
2625 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { |
||
2626 | fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2627 | fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); |
||
2628 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2629 | } |
||
2630 | |||
2631 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2632 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2633 | store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2634 | } |
||
2635 | } |
||
2636 | } |
||
2637 | |||
2638 | static void |
||
2639 | exec_dp2a(struct tgsi_exec_machine *mach, |
||
2640 | const struct tgsi_full_instruction *inst) |
||
2641 | { |
||
2642 | unsigned int chan; |
||
2643 | union tgsi_exec_channel arg[3]; |
||
2644 | |||
2645 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2646 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2647 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2648 | |||
2649 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2650 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2651 | micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); |
||
2652 | |||
2653 | fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2654 | micro_add(&arg[0], &arg[0], &arg[1]); |
||
2655 | |||
2656 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2657 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2658 | store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2659 | } |
||
2660 | } |
||
2661 | } |
||
2662 | |||
2663 | static void |
||
2664 | exec_dph(struct tgsi_exec_machine *mach, |
||
2665 | const struct tgsi_full_instruction *inst) |
||
2666 | { |
||
2667 | unsigned int chan; |
||
2668 | union tgsi_exec_channel arg[3]; |
||
2669 | |||
2670 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2671 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2672 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2673 | |||
2674 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2675 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2676 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2677 | |||
2678 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2679 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2680 | micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); |
||
2681 | |||
2682 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2683 | micro_add(&arg[0], &arg[0], &arg[1]); |
||
2684 | |||
2685 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2686 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2687 | store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2688 | } |
||
2689 | } |
||
2690 | } |
||
2691 | |||
2692 | static void |
||
2693 | exec_dp2(struct tgsi_exec_machine *mach, |
||
2694 | const struct tgsi_full_instruction *inst) |
||
2695 | { |
||
2696 | unsigned int chan; |
||
2697 | union tgsi_exec_channel arg[3]; |
||
2698 | |||
2699 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2700 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2701 | micro_mul(&arg[2], &arg[0], &arg[1]); |
||
2702 | |||
2703 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2704 | fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2705 | micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); |
||
2706 | |||
2707 | for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { |
||
2708 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2709 | store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2710 | } |
||
2711 | } |
||
2712 | } |
||
2713 | |||
2714 | static void |
||
2715 | exec_nrm4(struct tgsi_exec_machine *mach, |
||
2716 | const struct tgsi_full_instruction *inst) |
||
2717 | { |
||
2718 | unsigned int chan; |
||
2719 | union tgsi_exec_channel arg[4]; |
||
2720 | union tgsi_exec_channel scale; |
||
2721 | |||
2722 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2723 | micro_mul(&scale, &arg[0], &arg[0]); |
||
2724 | |||
2725 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { |
||
2726 | union tgsi_exec_channel product; |
||
2727 | |||
2728 | fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2729 | micro_mul(&product, &arg[chan], &arg[chan]); |
||
2730 | micro_add(&scale, &scale, &product); |
||
2731 | } |
||
2732 | |||
2733 | micro_rsq(&scale, &scale); |
||
2734 | |||
2735 | for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) { |
||
2736 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2737 | micro_mul(&arg[chan], &arg[chan], &scale); |
||
2738 | store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2739 | } |
||
2740 | } |
||
2741 | } |
||
2742 | |||
2743 | static void |
||
2744 | exec_nrm3(struct tgsi_exec_machine *mach, |
||
2745 | const struct tgsi_full_instruction *inst) |
||
2746 | { |
||
2747 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { |
||
2748 | unsigned int chan; |
||
2749 | union tgsi_exec_channel arg[3]; |
||
2750 | union tgsi_exec_channel scale; |
||
2751 | |||
2752 | fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2753 | micro_mul(&scale, &arg[0], &arg[0]); |
||
2754 | |||
2755 | for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { |
||
2756 | union tgsi_exec_channel product; |
||
2757 | |||
2758 | fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); |
||
2759 | micro_mul(&product, &arg[chan], &arg[chan]); |
||
2760 | micro_add(&scale, &scale, &product); |
||
2761 | } |
||
2762 | |||
2763 | micro_rsq(&scale, &scale); |
||
2764 | |||
2765 | for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) { |
||
2766 | if (inst->Dst[0].Register.WriteMask & (1 << chan)) { |
||
2767 | micro_mul(&arg[chan], &arg[chan], &scale); |
||
2768 | store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); |
||
2769 | } |
||
2770 | } |
||
2771 | } |
||
2772 | |||
2773 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2774 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2775 | } |
||
2776 | } |
||
2777 | |||
2778 | static void |
||
2779 | exec_scs(struct tgsi_exec_machine *mach, |
||
2780 | const struct tgsi_full_instruction *inst) |
||
2781 | { |
||
2782 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { |
||
2783 | union tgsi_exec_channel arg; |
||
2784 | union tgsi_exec_channel result; |
||
2785 | |||
2786 | fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2787 | |||
2788 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2789 | micro_cos(&result, &arg); |
||
2790 | store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2791 | } |
||
2792 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2793 | micro_sin(&result, &arg); |
||
2794 | store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2795 | } |
||
2796 | } |
||
2797 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2798 | store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2799 | } |
||
2800 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2801 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2802 | } |
||
2803 | } |
||
2804 | |||
2805 | static void |
||
2806 | exec_x2d(struct tgsi_exec_machine *mach, |
||
2807 | const struct tgsi_full_instruction *inst) |
||
2808 | { |
||
2809 | union tgsi_exec_channel r[4]; |
||
2810 | union tgsi_exec_channel d[2]; |
||
2811 | |||
2812 | fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2813 | fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2814 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) { |
||
2815 | fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2816 | micro_mul(&r[2], &r[2], &r[0]); |
||
2817 | fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2818 | micro_mul(&r[3], &r[3], &r[1]); |
||
2819 | micro_add(&r[2], &r[2], &r[3]); |
||
2820 | fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2821 | micro_add(&d[0], &r[2], &r[3]); |
||
2822 | } |
||
2823 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) { |
||
2824 | fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2825 | micro_mul(&r[2], &r[2], &r[0]); |
||
2826 | fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2827 | micro_mul(&r[3], &r[3], &r[1]); |
||
2828 | micro_add(&r[2], &r[2], &r[3]); |
||
2829 | fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2830 | micro_add(&d[1], &r[2], &r[3]); |
||
2831 | } |
||
2832 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2833 | store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2834 | } |
||
2835 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2836 | store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2837 | } |
||
2838 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2839 | store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2840 | } |
||
2841 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2842 | store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2843 | } |
||
2844 | } |
||
2845 | |||
2846 | static void |
||
2847 | exec_rfl(struct tgsi_exec_machine *mach, |
||
2848 | const struct tgsi_full_instruction *inst) |
||
2849 | { |
||
2850 | union tgsi_exec_channel r[9]; |
||
2851 | |||
2852 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { |
||
2853 | /* r0 = dp3(src0, src0) */ |
||
2854 | fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2855 | micro_mul(&r[0], &r[2], &r[2]); |
||
2856 | fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2857 | micro_mul(&r[8], &r[4], &r[4]); |
||
2858 | micro_add(&r[0], &r[0], &r[8]); |
||
2859 | fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2860 | micro_mul(&r[8], &r[6], &r[6]); |
||
2861 | micro_add(&r[0], &r[0], &r[8]); |
||
2862 | |||
2863 | /* r1 = dp3(src0, src1) */ |
||
2864 | fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2865 | micro_mul(&r[1], &r[2], &r[3]); |
||
2866 | fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2867 | micro_mul(&r[8], &r[4], &r[5]); |
||
2868 | micro_add(&r[1], &r[1], &r[8]); |
||
2869 | fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2870 | micro_mul(&r[8], &r[6], &r[7]); |
||
2871 | micro_add(&r[1], &r[1], &r[8]); |
||
2872 | |||
2873 | /* r1 = 2 * r1 / r0 */ |
||
2874 | micro_add(&r[1], &r[1], &r[1]); |
||
2875 | micro_div(&r[1], &r[1], &r[0]); |
||
2876 | |||
2877 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2878 | micro_mul(&r[2], &r[2], &r[1]); |
||
2879 | micro_sub(&r[2], &r[2], &r[3]); |
||
2880 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2881 | } |
||
2882 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2883 | micro_mul(&r[4], &r[4], &r[1]); |
||
2884 | micro_sub(&r[4], &r[4], &r[5]); |
||
2885 | store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2886 | } |
||
2887 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2888 | micro_mul(&r[6], &r[6], &r[1]); |
||
2889 | micro_sub(&r[6], &r[6], &r[7]); |
||
2890 | store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2891 | } |
||
2892 | } |
||
2893 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2894 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2895 | } |
||
2896 | } |
||
2897 | |||
2898 | static void |
||
2899 | exec_xpd(struct tgsi_exec_machine *mach, |
||
2900 | const struct tgsi_full_instruction *inst) |
||
2901 | { |
||
2902 | union tgsi_exec_channel r[6]; |
||
2903 | union tgsi_exec_channel d[3]; |
||
2904 | |||
2905 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2906 | fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2907 | |||
2908 | micro_mul(&r[2], &r[0], &r[1]); |
||
2909 | |||
2910 | fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2911 | fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2912 | |||
2913 | micro_mul(&r[5], &r[3], &r[4] ); |
||
2914 | micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]); |
||
2915 | |||
2916 | fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2917 | |||
2918 | micro_mul(&r[3], &r[3], &r[2]); |
||
2919 | |||
2920 | fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2921 | |||
2922 | micro_mul(&r[1], &r[1], &r[5]); |
||
2923 | micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]); |
||
2924 | |||
2925 | micro_mul(&r[5], &r[5], &r[4]); |
||
2926 | micro_mul(&r[0], &r[0], &r[2]); |
||
2927 | micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]); |
||
2928 | |||
2929 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2930 | store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2931 | } |
||
2932 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2933 | store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2934 | } |
||
2935 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2936 | store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2937 | } |
||
2938 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2939 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2940 | } |
||
2941 | } |
||
2942 | |||
2943 | static void |
||
2944 | exec_dst(struct tgsi_exec_machine *mach, |
||
2945 | const struct tgsi_full_instruction *inst) |
||
2946 | { |
||
2947 | union tgsi_exec_channel r[2]; |
||
2948 | union tgsi_exec_channel d[4]; |
||
2949 | |||
2950 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2951 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2952 | fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2953 | micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); |
||
2954 | } |
||
2955 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2956 | fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2957 | } |
||
2958 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2959 | fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2960 | } |
||
2961 | |||
2962 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2963 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2964 | } |
||
2965 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2966 | store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2967 | } |
||
2968 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2969 | store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2970 | } |
||
2971 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2972 | store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2973 | } |
||
2974 | } |
||
2975 | |||
2976 | static void |
||
2977 | exec_log(struct tgsi_exec_machine *mach, |
||
2978 | const struct tgsi_full_instruction *inst) |
||
2979 | { |
||
2980 | union tgsi_exec_channel r[3]; |
||
2981 | |||
2982 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2983 | micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ |
||
2984 | micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ |
||
2985 | micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ |
||
2986 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
2987 | store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
2988 | } |
||
2989 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
2990 | micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ |
||
2991 | micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ |
||
2992 | store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
2993 | } |
||
2994 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
2995 | store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
2996 | } |
||
2997 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
2998 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
2999 | } |
||
3000 | } |
||
3001 | |||
3002 | static void |
||
3003 | exec_exp(struct tgsi_exec_machine *mach, |
||
3004 | const struct tgsi_full_instruction *inst) |
||
3005 | { |
||
3006 | union tgsi_exec_channel r[3]; |
||
3007 | |||
3008 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3009 | micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ |
||
3010 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
3011 | micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ |
||
3012 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3013 | } |
||
3014 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
3015 | micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ |
||
3016 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
3017 | } |
||
3018 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
3019 | micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ |
||
3020 | store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
3021 | } |
||
3022 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
3023 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
3024 | } |
||
3025 | } |
||
3026 | |||
3027 | static void |
||
3028 | exec_lit(struct tgsi_exec_machine *mach, |
||
3029 | const struct tgsi_full_instruction *inst) |
||
3030 | { |
||
3031 | union tgsi_exec_channel r[3]; |
||
3032 | union tgsi_exec_channel d[3]; |
||
3033 | |||
3034 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { |
||
3035 | fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3036 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { |
||
3037 | fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
3038 | micro_max(&r[1], &r[1], &ZeroVec); |
||
3039 | |||
3040 | fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
3041 | micro_min(&r[2], &r[2], &P128Vec); |
||
3042 | micro_max(&r[2], &r[2], &M128Vec); |
||
3043 | micro_pow(&r[1], &r[1], &r[2]); |
||
3044 | micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); |
||
3045 | store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); |
||
3046 | } |
||
3047 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { |
||
3048 | micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); |
||
3049 | store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); |
||
3050 | } |
||
3051 | } |
||
3052 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { |
||
3053 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); |
||
3054 | } |
||
3055 | |||
3056 | if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { |
||
3057 | store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); |
||
3058 | } |
||
3059 | } |
||
3060 | |||
3061 | static void |
||
3062 | exec_break(struct tgsi_exec_machine *mach) |
||
3063 | { |
||
3064 | if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { |
||
3065 | /* turn off loop channels for each enabled exec channel */ |
||
3066 | mach->LoopMask &= ~mach->ExecMask; |
||
3067 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
||
3068 | UPDATE_EXEC_MASK(mach); |
||
3069 | } else { |
||
3070 | assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); |
||
3071 | |||
3072 | mach->Switch.mask = 0x0; |
||
3073 | |||
3074 | UPDATE_EXEC_MASK(mach); |
||
3075 | } |
||
3076 | } |
||
3077 | |||
3078 | static void |
||
3079 | exec_switch(struct tgsi_exec_machine *mach, |
||
3080 | const struct tgsi_full_instruction *inst) |
||
3081 | { |
||
3082 | assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); |
||
3083 | assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); |
||
3084 | |||
3085 | mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; |
||
3086 | fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); |
||
3087 | mach->Switch.mask = 0x0; |
||
3088 | mach->Switch.defaultMask = 0x0; |
||
3089 | |||
3090 | mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; |
||
3091 | mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; |
||
3092 | |||
3093 | UPDATE_EXEC_MASK(mach); |
||
3094 | } |
||
3095 | |||
3096 | static void |
||
3097 | exec_case(struct tgsi_exec_machine *mach, |
||
3098 | const struct tgsi_full_instruction *inst) |
||
3099 | { |
||
3100 | uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; |
||
3101 | union tgsi_exec_channel src; |
||
3102 | uint mask = 0; |
||
3103 | |||
3104 | fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); |
||
3105 | |||
3106 | if (mach->Switch.selector.u[0] == src.u[0]) { |
||
3107 | mask |= 0x1; |
||
3108 | } |
||
3109 | if (mach->Switch.selector.u[1] == src.u[1]) { |
||
3110 | mask |= 0x2; |
||
3111 | } |
||
3112 | if (mach->Switch.selector.u[2] == src.u[2]) { |
||
3113 | mask |= 0x4; |
||
3114 | } |
||
3115 | if (mach->Switch.selector.u[3] == src.u[3]) { |
||
3116 | mask |= 0x8; |
||
3117 | } |
||
3118 | |||
3119 | mach->Switch.defaultMask |= mask; |
||
3120 | |||
3121 | mach->Switch.mask |= mask & prevMask; |
||
3122 | |||
3123 | UPDATE_EXEC_MASK(mach); |
||
3124 | } |
||
3125 | |||
3126 | /* FIXME: this will only work if default is last */ |
||
3127 | static void |
||
3128 | exec_default(struct tgsi_exec_machine *mach) |
||
3129 | { |
||
3130 | uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; |
||
3131 | |||
3132 | mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; |
||
3133 | |||
3134 | UPDATE_EXEC_MASK(mach); |
||
3135 | } |
||
3136 | |||
3137 | static void |
||
3138 | exec_endswitch(struct tgsi_exec_machine *mach) |
||
3139 | { |
||
3140 | mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; |
||
3141 | mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; |
||
3142 | |||
3143 | UPDATE_EXEC_MASK(mach); |
||
3144 | } |
||
3145 | |||
3146 | static void |
||
3147 | micro_i2f(union tgsi_exec_channel *dst, |
||
3148 | const union tgsi_exec_channel *src) |
||
3149 | { |
||
3150 | dst->f[0] = (float)src->i[0]; |
||
3151 | dst->f[1] = (float)src->i[1]; |
||
3152 | dst->f[2] = (float)src->i[2]; |
||
3153 | dst->f[3] = (float)src->i[3]; |
||
3154 | } |
||
3155 | |||
3156 | static void |
||
3157 | micro_not(union tgsi_exec_channel *dst, |
||
3158 | const union tgsi_exec_channel *src) |
||
3159 | { |
||
3160 | dst->u[0] = ~src->u[0]; |
||
3161 | dst->u[1] = ~src->u[1]; |
||
3162 | dst->u[2] = ~src->u[2]; |
||
3163 | dst->u[3] = ~src->u[3]; |
||
3164 | } |
||
3165 | |||
3166 | static void |
||
3167 | micro_shl(union tgsi_exec_channel *dst, |
||
3168 | const union tgsi_exec_channel *src0, |
||
3169 | const union tgsi_exec_channel *src1) |
||
3170 | { |
||
3171 | dst->u[0] = src0->u[0] << src1->u[0]; |
||
3172 | dst->u[1] = src0->u[1] << src1->u[1]; |
||
3173 | dst->u[2] = src0->u[2] << src1->u[2]; |
||
3174 | dst->u[3] = src0->u[3] << src1->u[3]; |
||
3175 | } |
||
3176 | |||
3177 | static void |
||
3178 | micro_and(union tgsi_exec_channel *dst, |
||
3179 | const union tgsi_exec_channel *src0, |
||
3180 | const union tgsi_exec_channel *src1) |
||
3181 | { |
||
3182 | dst->u[0] = src0->u[0] & src1->u[0]; |
||
3183 | dst->u[1] = src0->u[1] & src1->u[1]; |
||
3184 | dst->u[2] = src0->u[2] & src1->u[2]; |
||
3185 | dst->u[3] = src0->u[3] & src1->u[3]; |
||
3186 | } |
||
3187 | |||
3188 | static void |
||
3189 | micro_or(union tgsi_exec_channel *dst, |
||
3190 | const union tgsi_exec_channel *src0, |
||
3191 | const union tgsi_exec_channel *src1) |
||
3192 | { |
||
3193 | dst->u[0] = src0->u[0] | src1->u[0]; |
||
3194 | dst->u[1] = src0->u[1] | src1->u[1]; |
||
3195 | dst->u[2] = src0->u[2] | src1->u[2]; |
||
3196 | dst->u[3] = src0->u[3] | src1->u[3]; |
||
3197 | } |
||
3198 | |||
3199 | static void |
||
3200 | micro_xor(union tgsi_exec_channel *dst, |
||
3201 | const union tgsi_exec_channel *src0, |
||
3202 | const union tgsi_exec_channel *src1) |
||
3203 | { |
||
3204 | dst->u[0] = src0->u[0] ^ src1->u[0]; |
||
3205 | dst->u[1] = src0->u[1] ^ src1->u[1]; |
||
3206 | dst->u[2] = src0->u[2] ^ src1->u[2]; |
||
3207 | dst->u[3] = src0->u[3] ^ src1->u[3]; |
||
3208 | } |
||
3209 | |||
3210 | static void |
||
3211 | micro_mod(union tgsi_exec_channel *dst, |
||
3212 | const union tgsi_exec_channel *src0, |
||
3213 | const union tgsi_exec_channel *src1) |
||
3214 | { |
||
3215 | dst->i[0] = src0->i[0] % src1->i[0]; |
||
3216 | dst->i[1] = src0->i[1] % src1->i[1]; |
||
3217 | dst->i[2] = src0->i[2] % src1->i[2]; |
||
3218 | dst->i[3] = src0->i[3] % src1->i[3]; |
||
3219 | } |
||
3220 | |||
3221 | static void |
||
3222 | micro_f2i(union tgsi_exec_channel *dst, |
||
3223 | const union tgsi_exec_channel *src) |
||
3224 | { |
||
3225 | dst->i[0] = (int)src->f[0]; |
||
3226 | dst->i[1] = (int)src->f[1]; |
||
3227 | dst->i[2] = (int)src->f[2]; |
||
3228 | dst->i[3] = (int)src->f[3]; |
||
3229 | } |
||
3230 | |||
3231 | static void |
||
3232 | micro_idiv(union tgsi_exec_channel *dst, |
||
3233 | const union tgsi_exec_channel *src0, |
||
3234 | const union tgsi_exec_channel *src1) |
||
3235 | { |
||
3236 | dst->i[0] = src0->i[0] / src1->i[0]; |
||
3237 | dst->i[1] = src0->i[1] / src1->i[1]; |
||
3238 | dst->i[2] = src0->i[2] / src1->i[2]; |
||
3239 | dst->i[3] = src0->i[3] / src1->i[3]; |
||
3240 | } |
||
3241 | |||
3242 | static void |
||
3243 | micro_imax(union tgsi_exec_channel *dst, |
||
3244 | const union tgsi_exec_channel *src0, |
||
3245 | const union tgsi_exec_channel *src1) |
||
3246 | { |
||
3247 | dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; |
||
3248 | dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; |
||
3249 | dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; |
||
3250 | dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; |
||
3251 | } |
||
3252 | |||
3253 | static void |
||
3254 | micro_imin(union tgsi_exec_channel *dst, |
||
3255 | const union tgsi_exec_channel *src0, |
||
3256 | const union tgsi_exec_channel *src1) |
||
3257 | { |
||
3258 | dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; |
||
3259 | dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; |
||
3260 | dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; |
||
3261 | dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; |
||
3262 | } |
||
3263 | |||
3264 | static void |
||
3265 | micro_isge(union tgsi_exec_channel *dst, |
||
3266 | const union tgsi_exec_channel *src0, |
||
3267 | const union tgsi_exec_channel *src1) |
||
3268 | { |
||
3269 | dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; |
||
3270 | dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; |
||
3271 | dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; |
||
3272 | dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; |
||
3273 | } |
||
3274 | |||
3275 | static void |
||
3276 | micro_ishr(union tgsi_exec_channel *dst, |
||
3277 | const union tgsi_exec_channel *src0, |
||
3278 | const union tgsi_exec_channel *src1) |
||
3279 | { |
||
3280 | dst->i[0] = src0->i[0] >> src1->i[0]; |
||
3281 | dst->i[1] = src0->i[1] >> src1->i[1]; |
||
3282 | dst->i[2] = src0->i[2] >> src1->i[2]; |
||
3283 | dst->i[3] = src0->i[3] >> src1->i[3]; |
||
3284 | } |
||
3285 | |||
3286 | static void |
||
3287 | micro_islt(union tgsi_exec_channel *dst, |
||
3288 | const union tgsi_exec_channel *src0, |
||
3289 | const union tgsi_exec_channel *src1) |
||
3290 | { |
||
3291 | dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; |
||
3292 | dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; |
||
3293 | dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; |
||
3294 | dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; |
||
3295 | } |
||
3296 | |||
3297 | static void |
||
3298 | micro_f2u(union tgsi_exec_channel *dst, |
||
3299 | const union tgsi_exec_channel *src) |
||
3300 | { |
||
3301 | dst->u[0] = (uint)src->f[0]; |
||
3302 | dst->u[1] = (uint)src->f[1]; |
||
3303 | dst->u[2] = (uint)src->f[2]; |
||
3304 | dst->u[3] = (uint)src->f[3]; |
||
3305 | } |
||
3306 | |||
3307 | static void |
||
3308 | micro_u2f(union tgsi_exec_channel *dst, |
||
3309 | const union tgsi_exec_channel *src) |
||
3310 | { |
||
3311 | dst->f[0] = (float)src->u[0]; |
||
3312 | dst->f[1] = (float)src->u[1]; |
||
3313 | dst->f[2] = (float)src->u[2]; |
||
3314 | dst->f[3] = (float)src->u[3]; |
||
3315 | } |
||
3316 | |||
3317 | static void |
||
3318 | micro_uadd(union tgsi_exec_channel *dst, |
||
3319 | const union tgsi_exec_channel *src0, |
||
3320 | const union tgsi_exec_channel *src1) |
||
3321 | { |
||
3322 | dst->u[0] = src0->u[0] + src1->u[0]; |
||
3323 | dst->u[1] = src0->u[1] + src1->u[1]; |
||
3324 | dst->u[2] = src0->u[2] + src1->u[2]; |
||
3325 | dst->u[3] = src0->u[3] + src1->u[3]; |
||
3326 | } |
||
3327 | |||
3328 | static void |
||
3329 | micro_udiv(union tgsi_exec_channel *dst, |
||
3330 | const union tgsi_exec_channel *src0, |
||
3331 | const union tgsi_exec_channel *src1) |
||
3332 | { |
||
3333 | dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; |
||
3334 | dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; |
||
3335 | dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; |
||
3336 | dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; |
||
3337 | } |
||
3338 | |||
3339 | static void |
||
3340 | micro_umad(union tgsi_exec_channel *dst, |
||
3341 | const union tgsi_exec_channel *src0, |
||
3342 | const union tgsi_exec_channel *src1, |
||
3343 | const union tgsi_exec_channel *src2) |
||
3344 | { |
||
3345 | dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; |
||
3346 | dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; |
||
3347 | dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; |
||
3348 | dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; |
||
3349 | } |
||
3350 | |||
3351 | static void |
||
3352 | micro_umax(union tgsi_exec_channel *dst, |
||
3353 | const union tgsi_exec_channel *src0, |
||
3354 | const union tgsi_exec_channel *src1) |
||
3355 | { |
||
3356 | dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; |
||
3357 | dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; |
||
3358 | dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; |
||
3359 | dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; |
||
3360 | } |
||
3361 | |||
3362 | static void |
||
3363 | micro_umin(union tgsi_exec_channel *dst, |
||
3364 | const union tgsi_exec_channel *src0, |
||
3365 | const union tgsi_exec_channel *src1) |
||
3366 | { |
||
3367 | dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; |
||
3368 | dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; |
||
3369 | dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; |
||
3370 | dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; |
||
3371 | } |
||
3372 | |||
3373 | static void |
||
3374 | micro_umod(union tgsi_exec_channel *dst, |
||
3375 | const union tgsi_exec_channel *src0, |
||
3376 | const union tgsi_exec_channel *src1) |
||
3377 | { |
||
3378 | dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; |
||
3379 | dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; |
||
3380 | dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; |
||
3381 | dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; |
||
3382 | } |
||
3383 | |||
3384 | static void |
||
3385 | micro_umul(union tgsi_exec_channel *dst, |
||
3386 | const union tgsi_exec_channel *src0, |
||
3387 | const union tgsi_exec_channel *src1) |
||
3388 | { |
||
3389 | dst->u[0] = src0->u[0] * src1->u[0]; |
||
3390 | dst->u[1] = src0->u[1] * src1->u[1]; |
||
3391 | dst->u[2] = src0->u[2] * src1->u[2]; |
||
3392 | dst->u[3] = src0->u[3] * src1->u[3]; |
||
3393 | } |
||
3394 | |||
3395 | static void |
||
3396 | micro_useq(union tgsi_exec_channel *dst, |
||
3397 | const union tgsi_exec_channel *src0, |
||
3398 | const union tgsi_exec_channel *src1) |
||
3399 | { |
||
3400 | dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; |
||
3401 | dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; |
||
3402 | dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; |
||
3403 | dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; |
||
3404 | } |
||
3405 | |||
3406 | static void |
||
3407 | micro_usge(union tgsi_exec_channel *dst, |
||
3408 | const union tgsi_exec_channel *src0, |
||
3409 | const union tgsi_exec_channel *src1) |
||
3410 | { |
||
3411 | dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; |
||
3412 | dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; |
||
3413 | dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; |
||
3414 | dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; |
||
3415 | } |
||
3416 | |||
3417 | static void |
||
3418 | micro_ushr(union tgsi_exec_channel *dst, |
||
3419 | const union tgsi_exec_channel *src0, |
||
3420 | const union tgsi_exec_channel *src1) |
||
3421 | { |
||
3422 | dst->u[0] = src0->u[0] >> src1->u[0]; |
||
3423 | dst->u[1] = src0->u[1] >> src1->u[1]; |
||
3424 | dst->u[2] = src0->u[2] >> src1->u[2]; |
||
3425 | dst->u[3] = src0->u[3] >> src1->u[3]; |
||
3426 | } |
||
3427 | |||
3428 | static void |
||
3429 | micro_uslt(union tgsi_exec_channel *dst, |
||
3430 | const union tgsi_exec_channel *src0, |
||
3431 | const union tgsi_exec_channel *src1) |
||
3432 | { |
||
3433 | dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; |
||
3434 | dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; |
||
3435 | dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; |
||
3436 | dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; |
||
3437 | } |
||
3438 | |||
3439 | static void |
||
3440 | micro_usne(union tgsi_exec_channel *dst, |
||
3441 | const union tgsi_exec_channel *src0, |
||
3442 | const union tgsi_exec_channel *src1) |
||
3443 | { |
||
3444 | dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; |
||
3445 | dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; |
||
3446 | dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; |
||
3447 | dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; |
||
3448 | } |
||
3449 | |||
3450 | static void |
||
3451 | micro_uarl(union tgsi_exec_channel *dst, |
||
3452 | const union tgsi_exec_channel *src) |
||
3453 | { |
||
3454 | dst->i[0] = src->u[0]; |
||
3455 | dst->i[1] = src->u[1]; |
||
3456 | dst->i[2] = src->u[2]; |
||
3457 | dst->i[3] = src->u[3]; |
||
3458 | } |
||
3459 | |||
3460 | static void |
||
3461 | micro_ucmp(union tgsi_exec_channel *dst, |
||
3462 | const union tgsi_exec_channel *src0, |
||
3463 | const union tgsi_exec_channel *src1, |
||
3464 | const union tgsi_exec_channel *src2) |
||
3465 | { |
||
3466 | dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; |
||
3467 | dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; |
||
3468 | dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; |
||
3469 | dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; |
||
3470 | } |
||
3471 | |||
3472 | static void |
||
3473 | exec_instruction( |
||
3474 | struct tgsi_exec_machine *mach, |
||
3475 | const struct tgsi_full_instruction *inst, |
||
3476 | int *pc ) |
||
3477 | { |
||
3478 | union tgsi_exec_channel r[10]; |
||
3479 | |||
3480 | (*pc)++; |
||
3481 | |||
3482 | switch (inst->Instruction.Opcode) { |
||
3483 | case TGSI_OPCODE_ARL: |
||
3484 | exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); |
||
3485 | break; |
||
3486 | |||
3487 | case TGSI_OPCODE_MOV: |
||
3488 | exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); |
||
3489 | break; |
||
3490 | |||
3491 | case TGSI_OPCODE_LIT: |
||
3492 | exec_lit(mach, inst); |
||
3493 | break; |
||
3494 | |||
3495 | case TGSI_OPCODE_RCP: |
||
3496 | exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3497 | break; |
||
3498 | |||
3499 | case TGSI_OPCODE_RSQ: |
||
3500 | exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3501 | break; |
||
3502 | |||
3503 | case TGSI_OPCODE_EXP: |
||
3504 | exec_exp(mach, inst); |
||
3505 | break; |
||
3506 | |||
3507 | case TGSI_OPCODE_LOG: |
||
3508 | exec_log(mach, inst); |
||
3509 | break; |
||
3510 | |||
3511 | case TGSI_OPCODE_MUL: |
||
3512 | exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3513 | break; |
||
3514 | |||
3515 | case TGSI_OPCODE_ADD: |
||
3516 | exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3517 | break; |
||
3518 | |||
3519 | case TGSI_OPCODE_DP3: |
||
3520 | exec_dp3(mach, inst); |
||
3521 | break; |
||
3522 | |||
3523 | case TGSI_OPCODE_DP4: |
||
3524 | exec_dp4(mach, inst); |
||
3525 | break; |
||
3526 | |||
3527 | case TGSI_OPCODE_DST: |
||
3528 | exec_dst(mach, inst); |
||
3529 | break; |
||
3530 | |||
3531 | case TGSI_OPCODE_MIN: |
||
3532 | exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3533 | break; |
||
3534 | |||
3535 | case TGSI_OPCODE_MAX: |
||
3536 | exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3537 | break; |
||
3538 | |||
3539 | case TGSI_OPCODE_SLT: |
||
3540 | exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3541 | break; |
||
3542 | |||
3543 | case TGSI_OPCODE_SGE: |
||
3544 | exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3545 | break; |
||
3546 | |||
3547 | case TGSI_OPCODE_MAD: |
||
3548 | exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3549 | break; |
||
3550 | |||
3551 | case TGSI_OPCODE_SUB: |
||
3552 | exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3553 | break; |
||
3554 | |||
3555 | case TGSI_OPCODE_LRP: |
||
3556 | exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3557 | break; |
||
3558 | |||
3559 | case TGSI_OPCODE_CND: |
||
3560 | exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3561 | break; |
||
3562 | |||
3563 | case TGSI_OPCODE_SQRT: |
||
3564 | exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3565 | break; |
||
3566 | |||
3567 | case TGSI_OPCODE_DP2A: |
||
3568 | exec_dp2a(mach, inst); |
||
3569 | break; |
||
3570 | |||
3571 | case TGSI_OPCODE_FRC: |
||
3572 | exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3573 | break; |
||
3574 | |||
3575 | case TGSI_OPCODE_CLAMP: |
||
3576 | exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3577 | break; |
||
3578 | |||
3579 | case TGSI_OPCODE_FLR: |
||
3580 | exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3581 | break; |
||
3582 | |||
3583 | case TGSI_OPCODE_ROUND: |
||
3584 | exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3585 | break; |
||
3586 | |||
3587 | case TGSI_OPCODE_EX2: |
||
3588 | exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3589 | break; |
||
3590 | |||
3591 | case TGSI_OPCODE_LG2: |
||
3592 | exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3593 | break; |
||
3594 | |||
3595 | case TGSI_OPCODE_POW: |
||
3596 | exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3597 | break; |
||
3598 | |||
3599 | case TGSI_OPCODE_XPD: |
||
3600 | exec_xpd(mach, inst); |
||
3601 | break; |
||
3602 | |||
3603 | case TGSI_OPCODE_ABS: |
||
3604 | exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3605 | break; |
||
3606 | |||
3607 | case TGSI_OPCODE_RCC: |
||
3608 | exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3609 | break; |
||
3610 | |||
3611 | case TGSI_OPCODE_DPH: |
||
3612 | exec_dph(mach, inst); |
||
3613 | break; |
||
3614 | |||
3615 | case TGSI_OPCODE_COS: |
||
3616 | exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3617 | break; |
||
3618 | |||
3619 | case TGSI_OPCODE_DDX: |
||
3620 | exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3621 | break; |
||
3622 | |||
3623 | case TGSI_OPCODE_DDY: |
||
3624 | exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3625 | break; |
||
3626 | |||
3627 | case TGSI_OPCODE_KILL: |
||
3628 | exec_kill (mach, inst); |
||
3629 | break; |
||
3630 | |||
3631 | case TGSI_OPCODE_KILL_IF: |
||
3632 | exec_kill_if (mach, inst); |
||
3633 | break; |
||
3634 | |||
3635 | case TGSI_OPCODE_PK2H: |
||
3636 | assert (0); |
||
3637 | break; |
||
3638 | |||
3639 | case TGSI_OPCODE_PK2US: |
||
3640 | assert (0); |
||
3641 | break; |
||
3642 | |||
3643 | case TGSI_OPCODE_PK4B: |
||
3644 | assert (0); |
||
3645 | break; |
||
3646 | |||
3647 | case TGSI_OPCODE_PK4UB: |
||
3648 | assert (0); |
||
3649 | break; |
||
3650 | |||
3651 | case TGSI_OPCODE_RFL: |
||
3652 | exec_rfl(mach, inst); |
||
3653 | break; |
||
3654 | |||
3655 | case TGSI_OPCODE_SEQ: |
||
3656 | exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3657 | break; |
||
3658 | |||
3659 | case TGSI_OPCODE_SFL: |
||
3660 | exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT); |
||
3661 | break; |
||
3662 | |||
3663 | case TGSI_OPCODE_SGT: |
||
3664 | exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3665 | break; |
||
3666 | |||
3667 | case TGSI_OPCODE_SIN: |
||
3668 | exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3669 | break; |
||
3670 | |||
3671 | case TGSI_OPCODE_SLE: |
||
3672 | exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3673 | break; |
||
3674 | |||
3675 | case TGSI_OPCODE_SNE: |
||
3676 | exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3677 | break; |
||
3678 | |||
3679 | case TGSI_OPCODE_STR: |
||
3680 | exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT); |
||
3681 | break; |
||
3682 | |||
3683 | case TGSI_OPCODE_TEX: |
||
3684 | /* simple texture lookup */ |
||
3685 | /* src[0] = texcoord */ |
||
3686 | /* src[1] = sampler unit */ |
||
3687 | exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); |
||
3688 | break; |
||
3689 | |||
3690 | case TGSI_OPCODE_TXB: |
||
3691 | /* Texture lookup with lod bias */ |
||
3692 | /* src[0] = texcoord (src[0].w = LOD bias) */ |
||
3693 | /* src[1] = sampler unit */ |
||
3694 | exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); |
||
3695 | break; |
||
3696 | |||
3697 | case TGSI_OPCODE_TXD: |
||
3698 | /* Texture lookup with explict partial derivatives */ |
||
3699 | /* src[0] = texcoord */ |
||
3700 | /* src[1] = d[strq]/dx */ |
||
3701 | /* src[2] = d[strq]/dy */ |
||
3702 | /* src[3] = sampler unit */ |
||
3703 | exec_txd(mach, inst); |
||
3704 | break; |
||
3705 | |||
3706 | case TGSI_OPCODE_TXL: |
||
3707 | /* Texture lookup with explit LOD */ |
||
3708 | /* src[0] = texcoord (src[0].w = LOD) */ |
||
3709 | /* src[1] = sampler unit */ |
||
3710 | exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); |
||
3711 | break; |
||
3712 | |||
3713 | case TGSI_OPCODE_TXP: |
||
3714 | /* Texture lookup with projection */ |
||
3715 | /* src[0] = texcoord (src[0].w = projection) */ |
||
3716 | /* src[1] = sampler unit */ |
||
3717 | exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); |
||
3718 | break; |
||
3719 | |||
3720 | case TGSI_OPCODE_UP2H: |
||
3721 | assert (0); |
||
3722 | break; |
||
3723 | |||
3724 | case TGSI_OPCODE_UP2US: |
||
3725 | assert (0); |
||
3726 | break; |
||
3727 | |||
3728 | case TGSI_OPCODE_UP4B: |
||
3729 | assert (0); |
||
3730 | break; |
||
3731 | |||
3732 | case TGSI_OPCODE_UP4UB: |
||
3733 | assert (0); |
||
3734 | break; |
||
3735 | |||
3736 | case TGSI_OPCODE_X2D: |
||
3737 | exec_x2d(mach, inst); |
||
3738 | break; |
||
3739 | |||
3740 | case TGSI_OPCODE_ARA: |
||
3741 | assert (0); |
||
3742 | break; |
||
3743 | |||
3744 | case TGSI_OPCODE_ARR: |
||
3745 | exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); |
||
3746 | break; |
||
3747 | |||
3748 | case TGSI_OPCODE_BRA: |
||
3749 | assert (0); |
||
3750 | break; |
||
3751 | |||
3752 | case TGSI_OPCODE_CAL: |
||
3753 | /* skip the call if no execution channels are enabled */ |
||
3754 | if (mach->ExecMask) { |
||
3755 | /* do the call */ |
||
3756 | |||
3757 | /* First, record the depths of the execution stacks. |
||
3758 | * This is important for deeply nested/looped return statements. |
||
3759 | * We have to unwind the stacks by the correct amount. For a |
||
3760 | * real code generator, we could determine the number of entries |
||
3761 | * to pop off each stack with simple static analysis and avoid |
||
3762 | * implementing this data structure at run time. |
||
3763 | */ |
||
3764 | mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; |
||
3765 | mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; |
||
3766 | mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; |
||
3767 | mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; |
||
3768 | mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; |
||
3769 | /* note that PC was already incremented above */ |
||
3770 | mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; |
||
3771 | |||
3772 | mach->CallStackTop++; |
||
3773 | |||
3774 | /* Second, push the Cond, Loop, Cont, Func stacks */ |
||
3775 | assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
||
3776 | assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
3777 | assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
3778 | assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); |
||
3779 | assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); |
||
3780 | assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); |
||
3781 | |||
3782 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
||
3783 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; |
||
3784 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; |
||
3785 | mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; |
||
3786 | mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; |
||
3787 | mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; |
||
3788 | |||
3789 | /* Finally, jump to the subroutine */ |
||
3790 | *pc = inst->Label.Label; |
||
3791 | } |
||
3792 | break; |
||
3793 | |||
3794 | case TGSI_OPCODE_RET: |
||
3795 | mach->FuncMask &= ~mach->ExecMask; |
||
3796 | UPDATE_EXEC_MASK(mach); |
||
3797 | |||
3798 | if (mach->FuncMask == 0x0) { |
||
3799 | /* really return now (otherwise, keep executing */ |
||
3800 | |||
3801 | if (mach->CallStackTop == 0) { |
||
3802 | /* returning from main() */ |
||
3803 | mach->CondStackTop = 0; |
||
3804 | mach->LoopStackTop = 0; |
||
3805 | *pc = -1; |
||
3806 | return; |
||
3807 | } |
||
3808 | |||
3809 | assert(mach->CallStackTop > 0); |
||
3810 | mach->CallStackTop--; |
||
3811 | |||
3812 | mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; |
||
3813 | mach->CondMask = mach->CondStack[mach->CondStackTop]; |
||
3814 | |||
3815 | mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; |
||
3816 | mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; |
||
3817 | |||
3818 | mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; |
||
3819 | mach->ContMask = mach->ContStack[mach->ContStackTop]; |
||
3820 | |||
3821 | mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; |
||
3822 | mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; |
||
3823 | |||
3824 | mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; |
||
3825 | mach->BreakType = mach->BreakStack[mach->BreakStackTop]; |
||
3826 | |||
3827 | assert(mach->FuncStackTop > 0); |
||
3828 | mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; |
||
3829 | |||
3830 | *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; |
||
3831 | |||
3832 | UPDATE_EXEC_MASK(mach); |
||
3833 | } |
||
3834 | break; |
||
3835 | |||
3836 | case TGSI_OPCODE_SSG: |
||
3837 | exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3838 | break; |
||
3839 | |||
3840 | case TGSI_OPCODE_CMP: |
||
3841 | exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3842 | break; |
||
3843 | |||
3844 | case TGSI_OPCODE_SCS: |
||
3845 | exec_scs(mach, inst); |
||
3846 | break; |
||
3847 | |||
3848 | case TGSI_OPCODE_NRM: |
||
3849 | exec_nrm3(mach, inst); |
||
3850 | break; |
||
3851 | |||
3852 | case TGSI_OPCODE_NRM4: |
||
3853 | exec_nrm4(mach, inst); |
||
3854 | break; |
||
3855 | |||
3856 | case TGSI_OPCODE_DIV: |
||
3857 | exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3858 | break; |
||
3859 | |||
3860 | case TGSI_OPCODE_DP2: |
||
3861 | exec_dp2(mach, inst); |
||
3862 | break; |
||
3863 | |||
3864 | case TGSI_OPCODE_IF: |
||
3865 | /* push CondMask */ |
||
3866 | assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
||
3867 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
||
3868 | FETCH( &r[0], 0, TGSI_CHAN_X ); |
||
3869 | /* update CondMask */ |
||
3870 | if( ! r[0].f[0] ) { |
||
3871 | mach->CondMask &= ~0x1; |
||
3872 | } |
||
3873 | if( ! r[0].f[1] ) { |
||
3874 | mach->CondMask &= ~0x2; |
||
3875 | } |
||
3876 | if( ! r[0].f[2] ) { |
||
3877 | mach->CondMask &= ~0x4; |
||
3878 | } |
||
3879 | if( ! r[0].f[3] ) { |
||
3880 | mach->CondMask &= ~0x8; |
||
3881 | } |
||
3882 | UPDATE_EXEC_MASK(mach); |
||
3883 | /* Todo: If CondMask==0, jump to ELSE */ |
||
3884 | break; |
||
3885 | |||
3886 | case TGSI_OPCODE_UIF: |
||
3887 | /* push CondMask */ |
||
3888 | assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); |
||
3889 | mach->CondStack[mach->CondStackTop++] = mach->CondMask; |
||
3890 | IFETCH( &r[0], 0, TGSI_CHAN_X ); |
||
3891 | /* update CondMask */ |
||
3892 | if( ! r[0].u[0] ) { |
||
3893 | mach->CondMask &= ~0x1; |
||
3894 | } |
||
3895 | if( ! r[0].u[1] ) { |
||
3896 | mach->CondMask &= ~0x2; |
||
3897 | } |
||
3898 | if( ! r[0].u[2] ) { |
||
3899 | mach->CondMask &= ~0x4; |
||
3900 | } |
||
3901 | if( ! r[0].u[3] ) { |
||
3902 | mach->CondMask &= ~0x8; |
||
3903 | } |
||
3904 | UPDATE_EXEC_MASK(mach); |
||
3905 | /* Todo: If CondMask==0, jump to ELSE */ |
||
3906 | break; |
||
3907 | |||
3908 | case TGSI_OPCODE_ELSE: |
||
3909 | /* invert CondMask wrt previous mask */ |
||
3910 | { |
||
3911 | uint prevMask; |
||
3912 | assert(mach->CondStackTop > 0); |
||
3913 | prevMask = mach->CondStack[mach->CondStackTop - 1]; |
||
3914 | mach->CondMask = ~mach->CondMask & prevMask; |
||
3915 | UPDATE_EXEC_MASK(mach); |
||
3916 | /* Todo: If CondMask==0, jump to ENDIF */ |
||
3917 | } |
||
3918 | break; |
||
3919 | |||
3920 | case TGSI_OPCODE_ENDIF: |
||
3921 | /* pop CondMask */ |
||
3922 | assert(mach->CondStackTop > 0); |
||
3923 | mach->CondMask = mach->CondStack[--mach->CondStackTop]; |
||
3924 | UPDATE_EXEC_MASK(mach); |
||
3925 | break; |
||
3926 | |||
3927 | case TGSI_OPCODE_END: |
||
3928 | /* make sure we end primitives which haven't |
||
3929 | * been explicitly emitted */ |
||
3930 | conditional_emit_primitive(mach); |
||
3931 | /* halt execution */ |
||
3932 | *pc = -1; |
||
3933 | break; |
||
3934 | |||
3935 | case TGSI_OPCODE_PUSHA: |
||
3936 | assert (0); |
||
3937 | break; |
||
3938 | |||
3939 | case TGSI_OPCODE_POPA: |
||
3940 | assert (0); |
||
3941 | break; |
||
3942 | |||
3943 | case TGSI_OPCODE_CEIL: |
||
3944 | exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3945 | break; |
||
3946 | |||
3947 | case TGSI_OPCODE_I2F: |
||
3948 | exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); |
||
3949 | break; |
||
3950 | |||
3951 | case TGSI_OPCODE_NOT: |
||
3952 | exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3953 | break; |
||
3954 | |||
3955 | case TGSI_OPCODE_TRUNC: |
||
3956 | exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); |
||
3957 | break; |
||
3958 | |||
3959 | case TGSI_OPCODE_SHL: |
||
3960 | exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3961 | break; |
||
3962 | |||
3963 | case TGSI_OPCODE_AND: |
||
3964 | exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3965 | break; |
||
3966 | |||
3967 | case TGSI_OPCODE_OR: |
||
3968 | exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3969 | break; |
||
3970 | |||
3971 | case TGSI_OPCODE_MOD: |
||
3972 | exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
3973 | break; |
||
3974 | |||
3975 | case TGSI_OPCODE_XOR: |
||
3976 | exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
3977 | break; |
||
3978 | |||
3979 | case TGSI_OPCODE_SAD: |
||
3980 | assert (0); |
||
3981 | break; |
||
3982 | |||
3983 | case TGSI_OPCODE_TXF: |
||
3984 | exec_txf(mach, inst); |
||
3985 | break; |
||
3986 | |||
3987 | case TGSI_OPCODE_TXQ: |
||
3988 | exec_txq(mach, inst); |
||
3989 | break; |
||
3990 | |||
3991 | case TGSI_OPCODE_EMIT: |
||
3992 | emit_vertex(mach); |
||
3993 | break; |
||
3994 | |||
3995 | case TGSI_OPCODE_ENDPRIM: |
||
3996 | emit_primitive(mach); |
||
3997 | break; |
||
3998 | |||
3999 | case TGSI_OPCODE_BGNLOOP: |
||
4000 | /* push LoopMask and ContMasks */ |
||
4001 | assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
4002 | assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
4003 | assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); |
||
4004 | assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); |
||
4005 | |||
4006 | mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; |
||
4007 | mach->ContStack[mach->ContStackTop++] = mach->ContMask; |
||
4008 | mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; |
||
4009 | mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; |
||
4010 | mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; |
||
4011 | break; |
||
4012 | |||
4013 | case TGSI_OPCODE_ENDLOOP: |
||
4014 | /* Restore ContMask, but don't pop */ |
||
4015 | assert(mach->ContStackTop > 0); |
||
4016 | mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; |
||
4017 | UPDATE_EXEC_MASK(mach); |
||
4018 | if (mach->ExecMask) { |
||
4019 | /* repeat loop: jump to instruction just past BGNLOOP */ |
||
4020 | assert(mach->LoopLabelStackTop > 0); |
||
4021 | *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; |
||
4022 | } |
||
4023 | else { |
||
4024 | /* exit loop: pop LoopMask */ |
||
4025 | assert(mach->LoopStackTop > 0); |
||
4026 | mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; |
||
4027 | /* pop ContMask */ |
||
4028 | assert(mach->ContStackTop > 0); |
||
4029 | mach->ContMask = mach->ContStack[--mach->ContStackTop]; |
||
4030 | assert(mach->LoopLabelStackTop > 0); |
||
4031 | --mach->LoopLabelStackTop; |
||
4032 | |||
4033 | mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; |
||
4034 | } |
||
4035 | UPDATE_EXEC_MASK(mach); |
||
4036 | break; |
||
4037 | |||
4038 | case TGSI_OPCODE_BRK: |
||
4039 | exec_break(mach); |
||
4040 | break; |
||
4041 | |||
4042 | case TGSI_OPCODE_CONT: |
||
4043 | /* turn off cont channels for each enabled exec channel */ |
||
4044 | mach->ContMask &= ~mach->ExecMask; |
||
4045 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
||
4046 | UPDATE_EXEC_MASK(mach); |
||
4047 | break; |
||
4048 | |||
4049 | case TGSI_OPCODE_BGNSUB: |
||
4050 | /* no-op */ |
||
4051 | break; |
||
4052 | |||
4053 | case TGSI_OPCODE_ENDSUB: |
||
4054 | /* |
||
4055 | * XXX: This really should be a no-op. We should never reach this opcode. |
||
4056 | */ |
||
4057 | |||
4058 | assert(mach->CallStackTop > 0); |
||
4059 | mach->CallStackTop--; |
||
4060 | |||
4061 | mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; |
||
4062 | mach->CondMask = mach->CondStack[mach->CondStackTop]; |
||
4063 | |||
4064 | mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; |
||
4065 | mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; |
||
4066 | |||
4067 | mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; |
||
4068 | mach->ContMask = mach->ContStack[mach->ContStackTop]; |
||
4069 | |||
4070 | mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; |
||
4071 | mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; |
||
4072 | |||
4073 | mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; |
||
4074 | mach->BreakType = mach->BreakStack[mach->BreakStackTop]; |
||
4075 | |||
4076 | assert(mach->FuncStackTop > 0); |
||
4077 | mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; |
||
4078 | |||
4079 | *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; |
||
4080 | |||
4081 | UPDATE_EXEC_MASK(mach); |
||
4082 | break; |
||
4083 | |||
4084 | case TGSI_OPCODE_NOP: |
||
4085 | break; |
||
4086 | |||
4087 | case TGSI_OPCODE_BREAKC: |
||
4088 | IFETCH(&r[0], 0, TGSI_CHAN_X); |
||
4089 | /* update CondMask */ |
||
4090 | if (r[0].u[0] && (mach->ExecMask & 0x1)) { |
||
4091 | mach->LoopMask &= ~0x1; |
||
4092 | } |
||
4093 | if (r[0].u[1] && (mach->ExecMask & 0x2)) { |
||
4094 | mach->LoopMask &= ~0x2; |
||
4095 | } |
||
4096 | if (r[0].u[2] && (mach->ExecMask & 0x4)) { |
||
4097 | mach->LoopMask &= ~0x4; |
||
4098 | } |
||
4099 | if (r[0].u[3] && (mach->ExecMask & 0x8)) { |
||
4100 | mach->LoopMask &= ~0x8; |
||
4101 | } |
||
4102 | /* Todo: if mach->LoopMask == 0, jump to end of loop */ |
||
4103 | UPDATE_EXEC_MASK(mach); |
||
4104 | break; |
||
4105 | |||
4106 | case TGSI_OPCODE_F2I: |
||
4107 | exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); |
||
4108 | break; |
||
4109 | |||
4110 | case TGSI_OPCODE_IDIV: |
||
4111 | exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4112 | break; |
||
4113 | |||
4114 | case TGSI_OPCODE_IMAX: |
||
4115 | exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4116 | break; |
||
4117 | |||
4118 | case TGSI_OPCODE_IMIN: |
||
4119 | exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4120 | break; |
||
4121 | |||
4122 | case TGSI_OPCODE_INEG: |
||
4123 | exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4124 | break; |
||
4125 | |||
4126 | case TGSI_OPCODE_ISGE: |
||
4127 | exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4128 | break; |
||
4129 | |||
4130 | case TGSI_OPCODE_ISHR: |
||
4131 | exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4132 | break; |
||
4133 | |||
4134 | case TGSI_OPCODE_ISLT: |
||
4135 | exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4136 | break; |
||
4137 | |||
4138 | case TGSI_OPCODE_F2U: |
||
4139 | exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); |
||
4140 | break; |
||
4141 | |||
4142 | case TGSI_OPCODE_U2F: |
||
4143 | exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); |
||
4144 | break; |
||
4145 | |||
4146 | case TGSI_OPCODE_UADD: |
||
4147 | exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4148 | break; |
||
4149 | |||
4150 | case TGSI_OPCODE_UDIV: |
||
4151 | exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4152 | break; |
||
4153 | |||
4154 | case TGSI_OPCODE_UMAD: |
||
4155 | exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4156 | break; |
||
4157 | |||
4158 | case TGSI_OPCODE_UMAX: |
||
4159 | exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4160 | break; |
||
4161 | |||
4162 | case TGSI_OPCODE_UMIN: |
||
4163 | exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4164 | break; |
||
4165 | |||
4166 | case TGSI_OPCODE_UMOD: |
||
4167 | exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4168 | break; |
||
4169 | |||
4170 | case TGSI_OPCODE_UMUL: |
||
4171 | exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4172 | break; |
||
4173 | |||
4174 | case TGSI_OPCODE_USEQ: |
||
4175 | exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4176 | break; |
||
4177 | |||
4178 | case TGSI_OPCODE_USGE: |
||
4179 | exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4180 | break; |
||
4181 | |||
4182 | case TGSI_OPCODE_USHR: |
||
4183 | exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4184 | break; |
||
4185 | |||
4186 | case TGSI_OPCODE_USLT: |
||
4187 | exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4188 | break; |
||
4189 | |||
4190 | case TGSI_OPCODE_USNE: |
||
4191 | exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4192 | break; |
||
4193 | |||
4194 | case TGSI_OPCODE_SWITCH: |
||
4195 | exec_switch(mach, inst); |
||
4196 | break; |
||
4197 | |||
4198 | case TGSI_OPCODE_CASE: |
||
4199 | exec_case(mach, inst); |
||
4200 | break; |
||
4201 | |||
4202 | case TGSI_OPCODE_DEFAULT: |
||
4203 | exec_default(mach); |
||
4204 | break; |
||
4205 | |||
4206 | case TGSI_OPCODE_ENDSWITCH: |
||
4207 | exec_endswitch(mach); |
||
4208 | break; |
||
4209 | |||
4210 | case TGSI_OPCODE_SAMPLE_I: |
||
4211 | exec_txf(mach, inst); |
||
4212 | break; |
||
4213 | |||
4214 | case TGSI_OPCODE_SAMPLE_I_MS: |
||
4215 | assert(0); |
||
4216 | break; |
||
4217 | |||
4218 | case TGSI_OPCODE_SAMPLE: |
||
4219 | exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); |
||
4220 | break; |
||
4221 | |||
4222 | case TGSI_OPCODE_SAMPLE_B: |
||
4223 | exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); |
||
4224 | break; |
||
4225 | |||
4226 | case TGSI_OPCODE_SAMPLE_C: |
||
4227 | exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); |
||
4228 | break; |
||
4229 | |||
4230 | case TGSI_OPCODE_SAMPLE_C_LZ: |
||
4231 | exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); |
||
4232 | break; |
||
4233 | |||
4234 | case TGSI_OPCODE_SAMPLE_D: |
||
4235 | exec_sample_d(mach, inst); |
||
4236 | break; |
||
4237 | |||
4238 | case TGSI_OPCODE_SAMPLE_L: |
||
4239 | exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); |
||
4240 | break; |
||
4241 | |||
4242 | case TGSI_OPCODE_GATHER4: |
||
4243 | assert(0); |
||
4244 | break; |
||
4245 | |||
4246 | case TGSI_OPCODE_SVIEWINFO: |
||
4247 | exec_txq(mach, inst); |
||
4248 | break; |
||
4249 | |||
4250 | case TGSI_OPCODE_SAMPLE_POS: |
||
4251 | assert(0); |
||
4252 | break; |
||
4253 | |||
4254 | case TGSI_OPCODE_SAMPLE_INFO: |
||
4255 | assert(0); |
||
4256 | break; |
||
4257 | |||
4258 | case TGSI_OPCODE_UARL: |
||
4259 | exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); |
||
4260 | break; |
||
4261 | |||
4262 | case TGSI_OPCODE_UCMP: |
||
4263 | exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); |
||
4264 | break; |
||
4265 | |||
4266 | case TGSI_OPCODE_IABS: |
||
4267 | exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4268 | break; |
||
4269 | |||
4270 | case TGSI_OPCODE_ISSG: |
||
4271 | exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); |
||
4272 | break; |
||
4273 | |||
4274 | case TGSI_OPCODE_TEX2: |
||
4275 | /* simple texture lookup */ |
||
4276 | /* src[0] = texcoord */ |
||
4277 | /* src[1] = compare */ |
||
4278 | /* src[2] = sampler unit */ |
||
4279 | exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); |
||
4280 | break; |
||
4281 | case TGSI_OPCODE_TXB2: |
||
4282 | /* simple texture lookup */ |
||
4283 | /* src[0] = texcoord */ |
||
4284 | /* src[1] = bias */ |
||
4285 | /* src[2] = sampler unit */ |
||
4286 | exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); |
||
4287 | break; |
||
4288 | case TGSI_OPCODE_TXL2: |
||
4289 | /* simple texture lookup */ |
||
4290 | /* src[0] = texcoord */ |
||
4291 | /* src[1] = lod */ |
||
4292 | /* src[2] = sampler unit */ |
||
4293 | exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); |
||
4294 | break; |
||
4295 | default: |
||
4296 | assert( 0 ); |
||
4297 | } |
||
4298 | } |
||
4299 | |||
4300 | |||
4301 | /** |
||
4302 | * Run TGSI interpreter. |
||
4303 | * \return bitmask of "alive" quad components |
||
4304 | */ |
||
4305 | uint |
||
4306 | tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) |
||
4307 | { |
||
4308 | uint i; |
||
4309 | int pc = 0; |
||
4310 | uint default_mask = 0xf; |
||
4311 | |||
4312 | mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; |
||
4313 | mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; |
||
4314 | |||
4315 | if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { |
||
4316 | mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; |
||
4317 | mach->Primitives[0] = 0; |
||
4318 | /* GS runs on a single primitive for now */ |
||
4319 | default_mask = 0x1; |
||
4320 | } |
||
4321 | |||
4322 | mach->CondMask = default_mask; |
||
4323 | mach->LoopMask = default_mask; |
||
4324 | mach->ContMask = default_mask; |
||
4325 | mach->FuncMask = default_mask; |
||
4326 | mach->ExecMask = default_mask; |
||
4327 | |||
4328 | mach->Switch.mask = default_mask; |
||
4329 | |||
4330 | assert(mach->CondStackTop == 0); |
||
4331 | assert(mach->LoopStackTop == 0); |
||
4332 | assert(mach->ContStackTop == 0); |
||
4333 | assert(mach->SwitchStackTop == 0); |
||
4334 | assert(mach->BreakStackTop == 0); |
||
4335 | assert(mach->CallStackTop == 0); |
||
4336 | |||
4337 | |||
4338 | /* execute declarations (interpolants) */ |
||
4339 | for (i = 0; i < mach->NumDeclarations; i++) { |
||
4340 | exec_declaration( mach, mach->Declarations+i ); |
||
4341 | } |
||
4342 | |||
4343 | { |
||
4344 | #if DEBUG_EXECUTION |
||
4345 | struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; |
||
4346 | struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; |
||
4347 | uint inst = 1; |
||
4348 | |||
4349 | memset(mach->Temps, 0, sizeof(temps)); |
||
4350 | memset(mach->Outputs, 0, sizeof(outputs)); |
||
4351 | memset(temps, 0, sizeof(temps)); |
||
4352 | memset(outputs, 0, sizeof(outputs)); |
||
4353 | #endif |
||
4354 | |||
4355 | /* execute instructions, until pc is set to -1 */ |
||
4356 | while (pc != -1) { |
||
4357 | |||
4358 | #if DEBUG_EXECUTION |
||
4359 | uint i; |
||
4360 | |||
4361 | tgsi_dump_instruction(&mach->Instructions[pc], inst++); |
||
4362 | #endif |
||
4363 | |||
4364 | assert(pc < (int) mach->NumInstructions); |
||
4365 | exec_instruction(mach, mach->Instructions + pc, &pc); |
||
4366 | |||
4367 | #if DEBUG_EXECUTION |
||
4368 | for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { |
||
4369 | if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { |
||
4370 | uint j; |
||
4371 | |||
4372 | memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); |
||
4373 | debug_printf("TEMP[%2u] = ", i); |
||
4374 | for (j = 0; j < 4; j++) { |
||
4375 | if (j > 0) { |
||
4376 | debug_printf(" "); |
||
4377 | } |
||
4378 | debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", |
||
4379 | temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], |
||
4380 | temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], |
||
4381 | temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], |
||
4382 | temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); |
||
4383 | } |
||
4384 | } |
||
4385 | } |
||
4386 | for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { |
||
4387 | if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { |
||
4388 | uint j; |
||
4389 | |||
4390 | memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); |
||
4391 | debug_printf("OUT[%2u] = ", i); |
||
4392 | for (j = 0; j < 4; j++) { |
||
4393 | if (j > 0) { |
||
4394 | debug_printf(" "); |
||
4395 | } |
||
4396 | debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", |
||
4397 | outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], |
||
4398 | outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], |
||
4399 | outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], |
||
4400 | outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); |
||
4401 | } |
||
4402 | } |
||
4403 | } |
||
4404 | #endif |
||
4405 | } |
||
4406 | } |
||
4407 | |||
4408 | #if 0 |
||
4409 | /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ |
||
4410 | if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { |
||
4411 | /* |
||
4412 | * Scale back depth component. |
||
4413 | */ |
||
4414 | for (i = 0; i < 4; i++) |
||
4415 | mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; |
||
4416 | } |
||
4417 | #endif |
||
4418 | |||
4419 | /* Strictly speaking, these assertions aren't really needed but they |
||
4420 | * can potentially catch some bugs in the control flow code. |
||
4421 | */ |
||
4422 | assert(mach->CondStackTop == 0); |
||
4423 | assert(mach->LoopStackTop == 0); |
||
4424 | assert(mach->ContStackTop == 0); |
||
4425 | assert(mach->SwitchStackTop == 0); |
||
4426 | assert(mach->BreakStackTop == 0); |
||
4427 | assert(mach->CallStackTop == 0); |
||
4428 | |||
4429 | return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; |
||
4430 | }>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><>><>><>><>>>><>=>=>><>=>=>><>>><>>><>>><>>=>><>>=>><>>><>>><>>><>>><>>><>>><>>><>>><>>>=>=>><>>>>><>>><>>><>>>>><>>>><>>><>>>>>=>>=>>>><>>><>>><>>>><>><>>>><>>>><>>><>>><>>><>>>>><>>><>>><>>><>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>=>=>=>><>><>><>>>>>>>>>=>=>=>=>>>>>>>>>>>>>>>>>>> |