Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | |||
29 | #include |
||
30 | |||
31 | #include "i915_reg.h" |
||
32 | #include "i915_context.h" |
||
33 | #include "i915_fpc.h" |
||
34 | |||
35 | #include "pipe/p_shader_tokens.h" |
||
36 | #include "util/u_math.h" |
||
37 | #include "util/u_memory.h" |
||
38 | #include "util/u_string.h" |
||
39 | #include "tgsi/tgsi_parse.h" |
||
40 | #include "tgsi/tgsi_dump.h" |
||
41 | |||
42 | #include "draw/draw_vertex.h" |
||
43 | |||
44 | #ifndef M_PI |
||
45 | #define M_PI 3.14159265358979323846 |
||
46 | #endif |
||
47 | |||
48 | /** |
||
49 | * Simple pass-through fragment shader to use when we don't have |
||
50 | * a real shader (or it fails to compile for some reason). |
||
51 | */ |
||
52 | static unsigned passthrough_decl[] = |
||
53 | { |
||
54 | _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), |
||
55 | |||
56 | /* declare input color: |
||
57 | */ |
||
58 | (D0_DCL | |
||
59 | (REG_TYPE_T << D0_TYPE_SHIFT) | |
||
60 | (T_DIFFUSE << D0_NR_SHIFT) | |
||
61 | D0_CHANNEL_ALL), |
||
62 | 0, |
||
63 | 0, |
||
64 | }; |
||
65 | |||
66 | static unsigned passthrough_program[] = |
||
67 | { |
||
68 | /* move to output color: |
||
69 | */ |
||
70 | (A0_MOV | |
||
71 | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | |
||
72 | A0_DEST_CHANNEL_ALL | |
||
73 | (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | |
||
74 | (T_DIFFUSE << A0_SRC0_NR_SHIFT)), |
||
75 | 0x01230000, /* .xyzw */ |
||
76 | |||
77 | }; |
||
78 | |||
79 | |||
80 | /* 1, -1/3!, 1/5!, -1/7! */ |
||
81 | static const float scs_sin_constants[4] = { 1.0, |
||
82 | -1.0f / (3 * 2 * 1), |
||
83 | 1.0f / (5 * 4 * 3 * 2 * 1), |
||
84 | -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) |
||
85 | }; |
||
86 | |||
87 | /* 1, -1/2!, 1/4!, -1/6! */ |
||
88 | static const float scs_cos_constants[4] = { 1.0, |
||
89 | -1.0f / (2 * 1), |
||
90 | 1.0f / (4 * 3 * 2 * 1), |
||
91 | -1.0f / (6 * 5 * 4 * 3 * 2 * 1) |
||
92 | }; |
||
93 | |||
94 | /* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ |
||
95 | static const float sin_constants[4] = { 2.0 * M_PI, |
||
96 | -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), |
||
97 | 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), |
||
98 | -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) |
||
99 | }; |
||
100 | |||
101 | /* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ |
||
102 | static const float cos_constants[4] = { 1.0, |
||
103 | -4.0f * M_PI * M_PI / (2 * 1), |
||
104 | 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), |
||
105 | -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) |
||
106 | }; |
||
107 | |||
108 | |||
109 | |||
110 | /** |
||
111 | * component-wise negation of ureg |
||
112 | */ |
||
113 | static INLINE int |
||
114 | negate(int reg, int x, int y, int z, int w) |
||
115 | { |
||
116 | /* Another neat thing about the UREG representation */ |
||
117 | return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | |
||
118 | ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | |
||
119 | ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | |
||
120 | ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); |
||
121 | } |
||
122 | |||
123 | |||
124 | /** |
||
125 | * In the event of a translation failure, we'll generate a simple color |
||
126 | * pass-through program. |
||
127 | */ |
||
128 | static void |
||
129 | i915_use_passthrough_shader(struct i915_fragment_shader *fs) |
||
130 | { |
||
131 | fs->program = (uint *) MALLOC(sizeof(passthrough_program)); |
||
132 | fs->decl = (uint *) MALLOC(sizeof(passthrough_decl)); |
||
133 | if (fs->program) { |
||
134 | memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); |
||
135 | memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl)); |
||
136 | fs->program_len = Elements(passthrough_program); |
||
137 | fs->decl_len = Elements(passthrough_decl); |
||
138 | } |
||
139 | fs->num_constants = 0; |
||
140 | } |
||
141 | |||
142 | |||
143 | void |
||
144 | i915_program_error(struct i915_fp_compile *p, const char *msg, ...) |
||
145 | { |
||
146 | va_list args; |
||
147 | char buffer[1024]; |
||
148 | |||
149 | debug_printf("i915_program_error: "); |
||
150 | va_start( args, msg ); |
||
151 | util_vsnprintf( buffer, sizeof(buffer), msg, args ); |
||
152 | va_end( args ); |
||
153 | debug_printf("%s", buffer); |
||
154 | debug_printf("\n"); |
||
155 | |||
156 | p->error = 1; |
||
157 | } |
||
158 | |||
159 | static uint get_mapping(struct i915_fragment_shader* fs, int unit) |
||
160 | { |
||
161 | int i; |
||
162 | for (i = 0; i < I915_TEX_UNITS; i++) |
||
163 | { |
||
164 | if (fs->generic_mapping[i] == -1) { |
||
165 | fs->generic_mapping[i] = unit; |
||
166 | return i; |
||
167 | } |
||
168 | if (fs->generic_mapping[i] == unit) |
||
169 | return i; |
||
170 | } |
||
171 | debug_printf("Exceeded max generics\n"); |
||
172 | return 0; |
||
173 | } |
||
174 | |||
175 | /** |
||
176 | * Construct a ureg for the given source register. Will emit |
||
177 | * constants, apply swizzling and negation as needed. |
||
178 | */ |
||
179 | static uint |
||
180 | src_vector(struct i915_fp_compile *p, |
||
181 | const struct i915_full_src_register *source, |
||
182 | struct i915_fragment_shader* fs) |
||
183 | { |
||
184 | uint index = source->Register.Index; |
||
185 | uint src = 0, sem_name, sem_ind; |
||
186 | |||
187 | switch (source->Register.File) { |
||
188 | case TGSI_FILE_TEMPORARY: |
||
189 | if (source->Register.Index >= I915_MAX_TEMPORARY) { |
||
190 | i915_program_error(p, "Exceeded max temporary reg"); |
||
191 | return 0; |
||
192 | } |
||
193 | src = UREG(REG_TYPE_R, index); |
||
194 | break; |
||
195 | case TGSI_FILE_INPUT: |
||
196 | /* XXX: Packing COL1, FOGC into a single attribute works for |
||
197 | * texenv programs, but will fail for real fragment programs |
||
198 | * that use these attributes and expect them to be a full 4 |
||
199 | * components wide. Could use a texcoord to pass these |
||
200 | * attributes if necessary, but that won't work in the general |
||
201 | * case. |
||
202 | * |
||
203 | * We also use a texture coordinate to pass wpos when possible. |
||
204 | */ |
||
205 | |||
206 | sem_name = p->shader->info.input_semantic_name[index]; |
||
207 | sem_ind = p->shader->info.input_semantic_index[index]; |
||
208 | |||
209 | switch (sem_name) { |
||
210 | case TGSI_SEMANTIC_POSITION: |
||
211 | { |
||
212 | /* for fragcoord */ |
||
213 | int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); |
||
214 | src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); |
||
215 | break; |
||
216 | } |
||
217 | case TGSI_SEMANTIC_COLOR: |
||
218 | if (sem_ind == 0) { |
||
219 | src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); |
||
220 | } |
||
221 | else { |
||
222 | /* secondary color */ |
||
223 | assert(sem_ind == 1); |
||
224 | src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); |
||
225 | src = swizzle(src, X, Y, Z, ONE); |
||
226 | } |
||
227 | break; |
||
228 | case TGSI_SEMANTIC_FOG: |
||
229 | src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); |
||
230 | src = swizzle(src, W, W, W, W); |
||
231 | break; |
||
232 | case TGSI_SEMANTIC_GENERIC: |
||
233 | { |
||
234 | int real_tex_unit = get_mapping(fs, sem_ind); |
||
235 | src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); |
||
236 | break; |
||
237 | } |
||
238 | case TGSI_SEMANTIC_FACE: |
||
239 | { |
||
240 | /* for back/front faces */ |
||
241 | int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); |
||
242 | src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); |
||
243 | break; |
||
244 | } |
||
245 | default: |
||
246 | i915_program_error(p, "Bad source->Index"); |
||
247 | return 0; |
||
248 | } |
||
249 | break; |
||
250 | |||
251 | case TGSI_FILE_IMMEDIATE: |
||
252 | assert(index < p->num_immediates); |
||
253 | index = p->immediates_map[index]; |
||
254 | /* fall-through */ |
||
255 | case TGSI_FILE_CONSTANT: |
||
256 | src = UREG(REG_TYPE_CONST, index); |
||
257 | break; |
||
258 | |||
259 | default: |
||
260 | i915_program_error(p, "Bad source->File"); |
||
261 | return 0; |
||
262 | } |
||
263 | |||
264 | src = swizzle(src, |
||
265 | source->Register.SwizzleX, |
||
266 | source->Register.SwizzleY, |
||
267 | source->Register.SwizzleZ, |
||
268 | source->Register.SwizzleW); |
||
269 | |||
270 | /* There's both negate-all-components and per-component negation. |
||
271 | * Try to handle both here. |
||
272 | */ |
||
273 | { |
||
274 | int n = source->Register.Negate; |
||
275 | src = negate(src, n, n, n, n); |
||
276 | } |
||
277 | |||
278 | /* no abs() */ |
||
279 | #if 0 |
||
280 | /* XXX assertions disabled to allow arbfplight.c to run */ |
||
281 | /* XXX enable these assertions, or fix things */ |
||
282 | assert(!source->Register.Absolute); |
||
283 | #endif |
||
284 | if (source->Register.Absolute) |
||
285 | debug_printf("Unhandled absolute value\n"); |
||
286 | |||
287 | return src; |
||
288 | } |
||
289 | |||
290 | |||
291 | /** |
||
292 | * Construct a ureg for a destination register. |
||
293 | */ |
||
294 | static uint |
||
295 | get_result_vector(struct i915_fp_compile *p, |
||
296 | const struct i915_full_dst_register *dest) |
||
297 | { |
||
298 | switch (dest->Register.File) { |
||
299 | case TGSI_FILE_OUTPUT: |
||
300 | { |
||
301 | uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; |
||
302 | switch (sem_name) { |
||
303 | case TGSI_SEMANTIC_POSITION: |
||
304 | return UREG(REG_TYPE_OD, 0); |
||
305 | case TGSI_SEMANTIC_COLOR: |
||
306 | return UREG(REG_TYPE_OC, 0); |
||
307 | default: |
||
308 | i915_program_error(p, "Bad inst->DstReg.Index/semantics"); |
||
309 | return 0; |
||
310 | } |
||
311 | } |
||
312 | case TGSI_FILE_TEMPORARY: |
||
313 | return UREG(REG_TYPE_R, dest->Register.Index); |
||
314 | default: |
||
315 | i915_program_error(p, "Bad inst->DstReg.File"); |
||
316 | return 0; |
||
317 | } |
||
318 | } |
||
319 | |||
320 | |||
321 | /** |
||
322 | * Compute flags for saturation and writemask. |
||
323 | */ |
||
324 | static uint |
||
325 | get_result_flags(const struct i915_full_instruction *inst) |
||
326 | { |
||
327 | const uint writeMask |
||
328 | = inst->Dst[0].Register.WriteMask; |
||
329 | uint flags = 0x0; |
||
330 | |||
331 | if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) |
||
332 | flags |= A0_DEST_SATURATE; |
||
333 | |||
334 | if (writeMask & TGSI_WRITEMASK_X) |
||
335 | flags |= A0_DEST_CHANNEL_X; |
||
336 | if (writeMask & TGSI_WRITEMASK_Y) |
||
337 | flags |= A0_DEST_CHANNEL_Y; |
||
338 | if (writeMask & TGSI_WRITEMASK_Z) |
||
339 | flags |= A0_DEST_CHANNEL_Z; |
||
340 | if (writeMask & TGSI_WRITEMASK_W) |
||
341 | flags |= A0_DEST_CHANNEL_W; |
||
342 | |||
343 | return flags; |
||
344 | } |
||
345 | |||
346 | |||
347 | /** |
||
348 | * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token |
||
349 | */ |
||
350 | static uint |
||
351 | translate_tex_src_target(struct i915_fp_compile *p, uint tex) |
||
352 | { |
||
353 | switch (tex) { |
||
354 | case TGSI_TEXTURE_SHADOW1D: |
||
355 | /* fall-through */ |
||
356 | case TGSI_TEXTURE_1D: |
||
357 | return D0_SAMPLE_TYPE_2D; |
||
358 | |||
359 | case TGSI_TEXTURE_SHADOW2D: |
||
360 | /* fall-through */ |
||
361 | case TGSI_TEXTURE_2D: |
||
362 | return D0_SAMPLE_TYPE_2D; |
||
363 | |||
364 | case TGSI_TEXTURE_SHADOWRECT: |
||
365 | /* fall-through */ |
||
366 | case TGSI_TEXTURE_RECT: |
||
367 | return D0_SAMPLE_TYPE_2D; |
||
368 | |||
369 | case TGSI_TEXTURE_3D: |
||
370 | return D0_SAMPLE_TYPE_VOLUME; |
||
371 | |||
372 | case TGSI_TEXTURE_CUBE: |
||
373 | return D0_SAMPLE_TYPE_CUBE; |
||
374 | |||
375 | default: |
||
376 | i915_program_error(p, "TexSrc type"); |
||
377 | return 0; |
||
378 | } |
||
379 | } |
||
380 | |||
381 | /** |
||
382 | * Return the number of coords needed to access a given TGSI_TEXTURE_* |
||
383 | */ |
||
384 | static uint |
||
385 | texture_num_coords(struct i915_fp_compile *p, uint tex) |
||
386 | { |
||
387 | switch (tex) { |
||
388 | case TGSI_TEXTURE_SHADOW1D: |
||
389 | case TGSI_TEXTURE_1D: |
||
390 | return 1; |
||
391 | |||
392 | case TGSI_TEXTURE_SHADOW2D: |
||
393 | case TGSI_TEXTURE_2D: |
||
394 | case TGSI_TEXTURE_SHADOWRECT: |
||
395 | case TGSI_TEXTURE_RECT: |
||
396 | return 2; |
||
397 | |||
398 | case TGSI_TEXTURE_3D: |
||
399 | case TGSI_TEXTURE_CUBE: |
||
400 | return 3; |
||
401 | |||
402 | default: |
||
403 | i915_program_error(p, "Num coords"); |
||
404 | return 2; |
||
405 | } |
||
406 | } |
||
407 | |||
408 | |||
409 | /** |
||
410 | * Generate texel lookup instruction. |
||
411 | */ |
||
412 | static void |
||
413 | emit_tex(struct i915_fp_compile *p, |
||
414 | const struct i915_full_instruction *inst, |
||
415 | uint opcode, |
||
416 | struct i915_fragment_shader* fs) |
||
417 | { |
||
418 | uint texture = inst->Texture.Texture; |
||
419 | uint unit = inst->Src[1].Register.Index; |
||
420 | uint tex = translate_tex_src_target( p, texture ); |
||
421 | uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); |
||
422 | uint coord = src_vector( p, &inst->Src[0], fs); |
||
423 | |||
424 | i915_emit_texld( p, |
||
425 | get_result_vector( p, &inst->Dst[0] ), |
||
426 | get_result_flags( inst ), |
||
427 | sampler, |
||
428 | coord, |
||
429 | opcode, |
||
430 | texture_num_coords(p, texture) ); |
||
431 | } |
||
432 | |||
433 | |||
434 | /** |
||
435 | * Generate a simple arithmetic instruction |
||
436 | * \param opcode the i915 opcode |
||
437 | * \param numArgs the number of input/src arguments |
||
438 | */ |
||
439 | static void |
||
440 | emit_simple_arith(struct i915_fp_compile *p, |
||
441 | const struct i915_full_instruction *inst, |
||
442 | uint opcode, uint numArgs, |
||
443 | struct i915_fragment_shader* fs) |
||
444 | { |
||
445 | uint arg1, arg2, arg3; |
||
446 | |||
447 | assert(numArgs <= 3); |
||
448 | |||
449 | arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); |
||
450 | arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); |
||
451 | arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); |
||
452 | |||
453 | i915_emit_arith( p, |
||
454 | opcode, |
||
455 | get_result_vector( p, &inst->Dst[0]), |
||
456 | get_result_flags( inst ), 0, |
||
457 | arg1, |
||
458 | arg2, |
||
459 | arg3 ); |
||
460 | } |
||
461 | |||
462 | |||
463 | /** As above, but swap the first two src regs */ |
||
464 | static void |
||
465 | emit_simple_arith_swap2(struct i915_fp_compile *p, |
||
466 | const struct i915_full_instruction *inst, |
||
467 | uint opcode, uint numArgs, |
||
468 | struct i915_fragment_shader* fs) |
||
469 | { |
||
470 | struct i915_full_instruction inst2; |
||
471 | |||
472 | assert(numArgs == 2); |
||
473 | |||
474 | /* transpose first two registers */ |
||
475 | inst2 = *inst; |
||
476 | inst2.Src[0] = inst->Src[1]; |
||
477 | inst2.Src[1] = inst->Src[0]; |
||
478 | |||
479 | emit_simple_arith(p, &inst2, opcode, numArgs, fs); |
||
480 | } |
||
481 | |||
482 | /* |
||
483 | * Translate TGSI instruction to i915 instruction. |
||
484 | * |
||
485 | * Possible concerns: |
||
486 | * |
||
487 | * DDX, DDY -- return 0 |
||
488 | * SIN, COS -- could use another taylor step? |
||
489 | * LIT -- results seem a little different to sw mesa |
||
490 | * LOG -- different to mesa on negative numbers, but this is conformant. |
||
491 | */ |
||
492 | static void |
||
493 | i915_translate_instruction(struct i915_fp_compile *p, |
||
494 | const struct i915_full_instruction *inst, |
||
495 | struct i915_fragment_shader *fs) |
||
496 | { |
||
497 | uint writemask; |
||
498 | uint src0, src1, src2, flags; |
||
499 | uint tmp = 0; |
||
500 | |||
501 | switch (inst->Instruction.Opcode) { |
||
502 | case TGSI_OPCODE_ABS: |
||
503 | src0 = src_vector(p, &inst->Src[0], fs); |
||
504 | i915_emit_arith(p, |
||
505 | A0_MAX, |
||
506 | get_result_vector(p, &inst->Dst[0]), |
||
507 | get_result_flags(inst), 0, |
||
508 | src0, negate(src0, 1, 1, 1, 1), 0); |
||
509 | break; |
||
510 | |||
511 | case TGSI_OPCODE_ADD: |
||
512 | emit_simple_arith(p, inst, A0_ADD, 2, fs); |
||
513 | break; |
||
514 | |||
515 | case TGSI_OPCODE_CEIL: |
||
516 | src0 = src_vector(p, &inst->Src[0], fs); |
||
517 | tmp = i915_get_utemp(p); |
||
518 | flags = get_result_flags(inst); |
||
519 | i915_emit_arith(p, |
||
520 | A0_FLR, |
||
521 | tmp, |
||
522 | flags & A0_DEST_CHANNEL_ALL, 0, |
||
523 | negate(src0, 1, 1, 1, 1), 0, 0); |
||
524 | i915_emit_arith(p, |
||
525 | A0_MOV, |
||
526 | get_result_vector(p, &inst->Dst[0]), |
||
527 | flags, 0, |
||
528 | negate(tmp, 1, 1, 1, 1), 0, 0); |
||
529 | break; |
||
530 | |||
531 | case TGSI_OPCODE_CMP: |
||
532 | src0 = src_vector(p, &inst->Src[0], fs); |
||
533 | src1 = src_vector(p, &inst->Src[1], fs); |
||
534 | src2 = src_vector(p, &inst->Src[2], fs); |
||
535 | i915_emit_arith(p, A0_CMP, |
||
536 | get_result_vector(p, &inst->Dst[0]), |
||
537 | get_result_flags(inst), |
||
538 | 0, src0, src2, src1); /* NOTE: order of src2, src1 */ |
||
539 | break; |
||
540 | |||
541 | case TGSI_OPCODE_COS: |
||
542 | src0 = src_vector(p, &inst->Src[0], fs); |
||
543 | tmp = i915_get_utemp(p); |
||
544 | |||
545 | i915_emit_arith(p, |
||
546 | A0_MUL, |
||
547 | tmp, A0_DEST_CHANNEL_X, 0, |
||
548 | src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); |
||
549 | |||
550 | i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); |
||
551 | |||
552 | /* |
||
553 | * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1 |
||
554 | * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 |
||
555 | * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 |
||
556 | * result = DP4 t0, cos_constants |
||
557 | */ |
||
558 | i915_emit_arith(p, |
||
559 | A0_MUL, |
||
560 | tmp, A0_DEST_CHANNEL_XY, 0, |
||
561 | swizzle(tmp, X, X, ONE, ONE), |
||
562 | swizzle(tmp, X, ONE, ONE, ONE), 0); |
||
563 | |||
564 | i915_emit_arith(p, |
||
565 | A0_MUL, |
||
566 | tmp, A0_DEST_CHANNEL_XYZ, 0, |
||
567 | swizzle(tmp, X, Y, X, ONE), |
||
568 | swizzle(tmp, X, X, ONE, ONE), 0); |
||
569 | |||
570 | i915_emit_arith(p, |
||
571 | A0_MUL, |
||
572 | tmp, A0_DEST_CHANNEL_XYZ, 0, |
||
573 | swizzle(tmp, X, X, Z, ONE), |
||
574 | swizzle(tmp, Z, ONE, ONE, ONE), 0); |
||
575 | |||
576 | i915_emit_arith(p, |
||
577 | A0_DP4, |
||
578 | get_result_vector(p, &inst->Dst[0]), |
||
579 | get_result_flags(inst), 0, |
||
580 | swizzle(tmp, ONE, Z, Y, X), |
||
581 | i915_emit_const4fv(p, cos_constants), 0); |
||
582 | break; |
||
583 | |||
584 | case TGSI_OPCODE_DDX: |
||
585 | case TGSI_OPCODE_DDY: |
||
586 | /* XXX We just output 0 here */ |
||
587 | debug_printf("Punting DDX/DDX\n"); |
||
588 | src0 = get_result_vector(p, &inst->Dst[0]); |
||
589 | i915_emit_arith(p, |
||
590 | A0_MOV, |
||
591 | get_result_vector(p, &inst->Dst[0]), |
||
592 | get_result_flags(inst), 0, |
||
593 | swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); |
||
594 | break; |
||
595 | |||
596 | case TGSI_OPCODE_DP2: |
||
597 | src0 = src_vector(p, &inst->Src[0], fs); |
||
598 | src1 = src_vector(p, &inst->Src[1], fs); |
||
599 | |||
600 | i915_emit_arith(p, |
||
601 | A0_DP3, |
||
602 | get_result_vector(p, &inst->Dst[0]), |
||
603 | get_result_flags(inst), 0, |
||
604 | swizzle(src0, X, Y, ZERO, ZERO), src1, 0); |
||
605 | break; |
||
606 | |||
607 | case TGSI_OPCODE_DP3: |
||
608 | emit_simple_arith(p, inst, A0_DP3, 2, fs); |
||
609 | break; |
||
610 | |||
611 | case TGSI_OPCODE_DP4: |
||
612 | emit_simple_arith(p, inst, A0_DP4, 2, fs); |
||
613 | break; |
||
614 | |||
615 | case TGSI_OPCODE_DPH: |
||
616 | src0 = src_vector(p, &inst->Src[0], fs); |
||
617 | src1 = src_vector(p, &inst->Src[1], fs); |
||
618 | |||
619 | i915_emit_arith(p, |
||
620 | A0_DP4, |
||
621 | get_result_vector(p, &inst->Dst[0]), |
||
622 | get_result_flags(inst), 0, |
||
623 | swizzle(src0, X, Y, Z, ONE), src1, 0); |
||
624 | break; |
||
625 | |||
626 | case TGSI_OPCODE_DST: |
||
627 | src0 = src_vector(p, &inst->Src[0], fs); |
||
628 | src1 = src_vector(p, &inst->Src[1], fs); |
||
629 | |||
630 | /* result[0] = 1 * 1; |
||
631 | * result[1] = a[1] * b[1]; |
||
632 | * result[2] = a[2] * 1; |
||
633 | * result[3] = 1 * b[3]; |
||
634 | */ |
||
635 | i915_emit_arith(p, |
||
636 | A0_MUL, |
||
637 | get_result_vector(p, &inst->Dst[0]), |
||
638 | get_result_flags(inst), 0, |
||
639 | swizzle(src0, ONE, Y, Z, ONE), |
||
640 | swizzle(src1, ONE, Y, ONE, W), 0); |
||
641 | break; |
||
642 | |||
643 | case TGSI_OPCODE_END: |
||
644 | /* no-op */ |
||
645 | break; |
||
646 | |||
647 | case TGSI_OPCODE_EX2: |
||
648 | src0 = src_vector(p, &inst->Src[0], fs); |
||
649 | |||
650 | i915_emit_arith(p, |
||
651 | A0_EXP, |
||
652 | get_result_vector(p, &inst->Dst[0]), |
||
653 | get_result_flags(inst), 0, |
||
654 | swizzle(src0, X, X, X, X), 0, 0); |
||
655 | break; |
||
656 | |||
657 | case TGSI_OPCODE_FLR: |
||
658 | emit_simple_arith(p, inst, A0_FLR, 1, fs); |
||
659 | break; |
||
660 | |||
661 | case TGSI_OPCODE_FRC: |
||
662 | emit_simple_arith(p, inst, A0_FRC, 1, fs); |
||
663 | break; |
||
664 | |||
665 | case TGSI_OPCODE_KILL_IF: |
||
666 | /* kill if src[0].x < 0 || src[0].y < 0 ... */ |
||
667 | src0 = src_vector(p, &inst->Src[0], fs); |
||
668 | tmp = i915_get_utemp(p); |
||
669 | |||
670 | i915_emit_texld(p, |
||
671 | tmp, /* dest reg: a dummy reg */ |
||
672 | A0_DEST_CHANNEL_ALL, /* dest writemask */ |
||
673 | 0, /* sampler */ |
||
674 | src0, /* coord*/ |
||
675 | T0_TEXKILL, /* opcode */ |
||
676 | 1); /* num_coord */ |
||
677 | break; |
||
678 | |||
679 | case TGSI_OPCODE_KILL: |
||
680 | /* unconditional kill */ |
||
681 | tmp = i915_get_utemp(p); |
||
682 | |||
683 | i915_emit_texld(p, |
||
684 | tmp, /* dest reg: a dummy reg */ |
||
685 | A0_DEST_CHANNEL_ALL, /* dest writemask */ |
||
686 | 0, /* sampler */ |
||
687 | negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */ |
||
688 | T0_TEXKILL, /* opcode */ |
||
689 | 1); /* num_coord */ |
||
690 | break; |
||
691 | |||
692 | case TGSI_OPCODE_LG2: |
||
693 | src0 = src_vector(p, &inst->Src[0], fs); |
||
694 | |||
695 | i915_emit_arith(p, |
||
696 | A0_LOG, |
||
697 | get_result_vector(p, &inst->Dst[0]), |
||
698 | get_result_flags(inst), 0, |
||
699 | swizzle(src0, X, X, X, X), 0, 0); |
||
700 | break; |
||
701 | |||
702 | case TGSI_OPCODE_LIT: |
||
703 | src0 = src_vector(p, &inst->Src[0], fs); |
||
704 | tmp = i915_get_utemp(p); |
||
705 | |||
706 | /* tmp = max( a.xyzw, a.00zw ) |
||
707 | * XXX: Clamp tmp.w to -128..128 |
||
708 | * tmp.y = log(tmp.y) |
||
709 | * tmp.y = tmp.w * tmp.y |
||
710 | * tmp.y = exp(tmp.y) |
||
711 | * result = cmp (a.11-x1, a.1x01, a.1xy1 ) |
||
712 | */ |
||
713 | i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, |
||
714 | src0, swizzle(src0, ZERO, ZERO, Z, W), 0); |
||
715 | |||
716 | i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, |
||
717 | swizzle(tmp, Y, Y, Y, Y), 0, 0); |
||
718 | |||
719 | i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, |
||
720 | swizzle(tmp, ZERO, Y, ZERO, ZERO), |
||
721 | swizzle(tmp, ZERO, W, ZERO, ZERO), 0); |
||
722 | |||
723 | i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, |
||
724 | swizzle(tmp, Y, Y, Y, Y), 0, 0); |
||
725 | |||
726 | i915_emit_arith(p, A0_CMP, |
||
727 | get_result_vector(p, &inst->Dst[0]), |
||
728 | get_result_flags(inst), 0, |
||
729 | negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), |
||
730 | swizzle(tmp, ONE, X, ZERO, ONE), |
||
731 | swizzle(tmp, ONE, X, Y, ONE)); |
||
732 | |||
733 | break; |
||
734 | |||
735 | case TGSI_OPCODE_LRP: |
||
736 | src0 = src_vector(p, &inst->Src[0], fs); |
||
737 | src1 = src_vector(p, &inst->Src[1], fs); |
||
738 | src2 = src_vector(p, &inst->Src[2], fs); |
||
739 | flags = get_result_flags(inst); |
||
740 | tmp = i915_get_utemp(p); |
||
741 | |||
742 | /* b*a + c*(1-a) |
||
743 | * |
||
744 | * b*a + c - ca |
||
745 | * |
||
746 | * tmp = b*a + c, |
||
747 | * result = (-c)*a + tmp |
||
748 | */ |
||
749 | i915_emit_arith(p, A0_MAD, tmp, |
||
750 | flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); |
||
751 | |||
752 | i915_emit_arith(p, A0_MAD, |
||
753 | get_result_vector(p, &inst->Dst[0]), |
||
754 | flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); |
||
755 | break; |
||
756 | |||
757 | case TGSI_OPCODE_MAD: |
||
758 | emit_simple_arith(p, inst, A0_MAD, 3, fs); |
||
759 | break; |
||
760 | |||
761 | case TGSI_OPCODE_MAX: |
||
762 | emit_simple_arith(p, inst, A0_MAX, 2, fs); |
||
763 | break; |
||
764 | |||
765 | case TGSI_OPCODE_MIN: |
||
766 | src0 = src_vector(p, &inst->Src[0], fs); |
||
767 | src1 = src_vector(p, &inst->Src[1], fs); |
||
768 | tmp = i915_get_utemp(p); |
||
769 | flags = get_result_flags(inst); |
||
770 | |||
771 | i915_emit_arith(p, |
||
772 | A0_MAX, |
||
773 | tmp, flags & A0_DEST_CHANNEL_ALL, 0, |
||
774 | negate(src0, 1, 1, 1, 1), |
||
775 | negate(src1, 1, 1, 1, 1), 0); |
||
776 | |||
777 | i915_emit_arith(p, |
||
778 | A0_MOV, |
||
779 | get_result_vector(p, &inst->Dst[0]), |
||
780 | flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); |
||
781 | break; |
||
782 | |||
783 | case TGSI_OPCODE_MOV: |
||
784 | emit_simple_arith(p, inst, A0_MOV, 1, fs); |
||
785 | break; |
||
786 | |||
787 | case TGSI_OPCODE_MUL: |
||
788 | emit_simple_arith(p, inst, A0_MUL, 2, fs); |
||
789 | break; |
||
790 | |||
791 | case TGSI_OPCODE_NOP: |
||
792 | break; |
||
793 | |||
794 | case TGSI_OPCODE_POW: |
||
795 | src0 = src_vector(p, &inst->Src[0], fs); |
||
796 | src1 = src_vector(p, &inst->Src[1], fs); |
||
797 | tmp = i915_get_utemp(p); |
||
798 | flags = get_result_flags(inst); |
||
799 | |||
800 | /* XXX: masking on intermediate values, here and elsewhere. |
||
801 | */ |
||
802 | i915_emit_arith(p, |
||
803 | A0_LOG, |
||
804 | tmp, A0_DEST_CHANNEL_X, 0, |
||
805 | swizzle(src0, X, X, X, X), 0, 0); |
||
806 | |||
807 | i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); |
||
808 | |||
809 | i915_emit_arith(p, |
||
810 | A0_EXP, |
||
811 | get_result_vector(p, &inst->Dst[0]), |
||
812 | flags, 0, swizzle(tmp, X, X, X, X), 0, 0); |
||
813 | break; |
||
814 | |||
815 | case TGSI_OPCODE_RET: |
||
816 | /* XXX: no-op? */ |
||
817 | break; |
||
818 | |||
819 | case TGSI_OPCODE_RCP: |
||
820 | src0 = src_vector(p, &inst->Src[0], fs); |
||
821 | |||
822 | i915_emit_arith(p, |
||
823 | A0_RCP, |
||
824 | get_result_vector(p, &inst->Dst[0]), |
||
825 | get_result_flags(inst), 0, |
||
826 | swizzle(src0, X, X, X, X), 0, 0); |
||
827 | break; |
||
828 | |||
829 | case TGSI_OPCODE_RSQ: |
||
830 | src0 = src_vector(p, &inst->Src[0], fs); |
||
831 | |||
832 | i915_emit_arith(p, |
||
833 | A0_RSQ, |
||
834 | get_result_vector(p, &inst->Dst[0]), |
||
835 | get_result_flags(inst), 0, |
||
836 | swizzle(src0, X, X, X, X), 0, 0); |
||
837 | break; |
||
838 | |||
839 | case TGSI_OPCODE_SCS: |
||
840 | src0 = src_vector(p, &inst->Src[0], fs); |
||
841 | tmp = i915_get_utemp(p); |
||
842 | |||
843 | /* |
||
844 | * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 |
||
845 | * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x |
||
846 | * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x |
||
847 | * scs.x = DP4 t1, scs_sin_constants |
||
848 | * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 |
||
849 | * scs.y = DP4 t1, scs_cos_constants |
||
850 | */ |
||
851 | i915_emit_arith(p, |
||
852 | A0_MUL, |
||
853 | tmp, A0_DEST_CHANNEL_XY, 0, |
||
854 | swizzle(src0, X, X, ONE, ONE), |
||
855 | swizzle(src0, X, ONE, ONE, ONE), 0); |
||
856 | |||
857 | i915_emit_arith(p, |
||
858 | A0_MUL, |
||
859 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
860 | swizzle(tmp, X, Y, X, Y), |
||
861 | swizzle(tmp, X, X, ONE, ONE), 0); |
||
862 | |||
863 | writemask = inst->Dst[0].Register.WriteMask; |
||
864 | |||
865 | if (writemask & TGSI_WRITEMASK_Y) { |
||
866 | uint tmp1; |
||
867 | |||
868 | if (writemask & TGSI_WRITEMASK_X) |
||
869 | tmp1 = i915_get_utemp(p); |
||
870 | else |
||
871 | tmp1 = tmp; |
||
872 | |||
873 | i915_emit_arith(p, |
||
874 | A0_MUL, |
||
875 | tmp1, A0_DEST_CHANNEL_ALL, 0, |
||
876 | swizzle(tmp, X, Y, Y, W), |
||
877 | swizzle(tmp, X, Z, ONE, ONE), 0); |
||
878 | |||
879 | i915_emit_arith(p, |
||
880 | A0_DP4, |
||
881 | get_result_vector(p, &inst->Dst[0]), |
||
882 | A0_DEST_CHANNEL_Y, 0, |
||
883 | swizzle(tmp1, W, Z, Y, X), |
||
884 | i915_emit_const4fv(p, scs_sin_constants), 0); |
||
885 | } |
||
886 | |||
887 | if (writemask & TGSI_WRITEMASK_X) { |
||
888 | i915_emit_arith(p, |
||
889 | A0_MUL, |
||
890 | tmp, A0_DEST_CHANNEL_XYZ, 0, |
||
891 | swizzle(tmp, X, X, Z, ONE), |
||
892 | swizzle(tmp, Z, ONE, ONE, ONE), 0); |
||
893 | |||
894 | i915_emit_arith(p, |
||
895 | A0_DP4, |
||
896 | get_result_vector(p, &inst->Dst[0]), |
||
897 | A0_DEST_CHANNEL_X, 0, |
||
898 | swizzle(tmp, ONE, Z, Y, X), |
||
899 | i915_emit_const4fv(p, scs_cos_constants), 0); |
||
900 | } |
||
901 | break; |
||
902 | |||
903 | case TGSI_OPCODE_SEQ: |
||
904 | /* if we're both >= and <= then we're == */ |
||
905 | src0 = src_vector(p, &inst->Src[0], fs); |
||
906 | src1 = src_vector(p, &inst->Src[1], fs); |
||
907 | tmp = i915_get_utemp(p); |
||
908 | |||
909 | i915_emit_arith(p, |
||
910 | A0_SGE, |
||
911 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
912 | src0, |
||
913 | src1, 0); |
||
914 | |||
915 | i915_emit_arith(p, |
||
916 | A0_SGE, |
||
917 | get_result_vector(p, &inst->Dst[0]), |
||
918 | A0_DEST_CHANNEL_ALL, 0, |
||
919 | src1, |
||
920 | src0, 0); |
||
921 | |||
922 | i915_emit_arith(p, |
||
923 | A0_MUL, |
||
924 | get_result_vector(p, &inst->Dst[0]), |
||
925 | A0_DEST_CHANNEL_ALL, 0, |
||
926 | get_result_vector(p, &inst->Dst[0]), |
||
927 | tmp, 0); |
||
928 | |||
929 | break; |
||
930 | |||
931 | case TGSI_OPCODE_SGE: |
||
932 | emit_simple_arith(p, inst, A0_SGE, 2, fs); |
||
933 | break; |
||
934 | |||
935 | case TGSI_OPCODE_SIN: |
||
936 | src0 = src_vector(p, &inst->Src[0], fs); |
||
937 | tmp = i915_get_utemp(p); |
||
938 | |||
939 | i915_emit_arith(p, |
||
940 | A0_MUL, |
||
941 | tmp, A0_DEST_CHANNEL_X, 0, |
||
942 | src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); |
||
943 | |||
944 | i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); |
||
945 | |||
946 | /* |
||
947 | * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 |
||
948 | * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x |
||
949 | * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x |
||
950 | * result = DP4 t1.wzyx, sin_constants |
||
951 | */ |
||
952 | i915_emit_arith(p, |
||
953 | A0_MUL, |
||
954 | tmp, A0_DEST_CHANNEL_XY, 0, |
||
955 | swizzle(tmp, X, X, ONE, ONE), |
||
956 | swizzle(tmp, X, ONE, ONE, ONE), 0); |
||
957 | |||
958 | i915_emit_arith(p, |
||
959 | A0_MUL, |
||
960 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
961 | swizzle(tmp, X, Y, X, Y), |
||
962 | swizzle(tmp, X, X, ONE, ONE), 0); |
||
963 | |||
964 | i915_emit_arith(p, |
||
965 | A0_MUL, |
||
966 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
967 | swizzle(tmp, X, Y, Y, W), |
||
968 | swizzle(tmp, X, Z, ONE, ONE), 0); |
||
969 | |||
970 | i915_emit_arith(p, |
||
971 | A0_DP4, |
||
972 | get_result_vector(p, &inst->Dst[0]), |
||
973 | get_result_flags(inst), 0, |
||
974 | swizzle(tmp, W, Z, Y, X), |
||
975 | i915_emit_const4fv(p, sin_constants), 0); |
||
976 | break; |
||
977 | |||
978 | case TGSI_OPCODE_SLE: |
||
979 | /* like SGE, but swap reg0, reg1 */ |
||
980 | emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); |
||
981 | break; |
||
982 | |||
983 | case TGSI_OPCODE_SLT: |
||
984 | emit_simple_arith(p, inst, A0_SLT, 2, fs); |
||
985 | break; |
||
986 | |||
987 | case TGSI_OPCODE_SGT: |
||
988 | /* like SLT, but swap reg0, reg1 */ |
||
989 | emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); |
||
990 | break; |
||
991 | |||
992 | case TGSI_OPCODE_SNE: |
||
993 | /* if we're < or > then we're != */ |
||
994 | src0 = src_vector(p, &inst->Src[0], fs); |
||
995 | src1 = src_vector(p, &inst->Src[1], fs); |
||
996 | tmp = i915_get_utemp(p); |
||
997 | |||
998 | i915_emit_arith(p, |
||
999 | A0_SLT, |
||
1000 | tmp, |
||
1001 | A0_DEST_CHANNEL_ALL, 0, |
||
1002 | src0, |
||
1003 | src1, 0); |
||
1004 | |||
1005 | i915_emit_arith(p, |
||
1006 | A0_SLT, |
||
1007 | get_result_vector(p, &inst->Dst[0]), |
||
1008 | A0_DEST_CHANNEL_ALL, 0, |
||
1009 | src1, |
||
1010 | src0, 0); |
||
1011 | |||
1012 | i915_emit_arith(p, |
||
1013 | A0_ADD, |
||
1014 | get_result_vector(p, &inst->Dst[0]), |
||
1015 | A0_DEST_CHANNEL_ALL, 0, |
||
1016 | get_result_vector(p, &inst->Dst[0]), |
||
1017 | tmp, 0); |
||
1018 | break; |
||
1019 | |||
1020 | case TGSI_OPCODE_SSG: |
||
1021 | /* compute (src>0) - (src<0) */ |
||
1022 | src0 = src_vector(p, &inst->Src[0], fs); |
||
1023 | tmp = i915_get_utemp(p); |
||
1024 | |||
1025 | i915_emit_arith(p, |
||
1026 | A0_SLT, |
||
1027 | tmp, |
||
1028 | A0_DEST_CHANNEL_ALL, 0, |
||
1029 | src0, |
||
1030 | swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); |
||
1031 | |||
1032 | i915_emit_arith(p, |
||
1033 | A0_SLT, |
||
1034 | get_result_vector(p, &inst->Dst[0]), |
||
1035 | A0_DEST_CHANNEL_ALL, 0, |
||
1036 | swizzle(src0, ZERO, ZERO, ZERO, ZERO), |
||
1037 | src0, 0); |
||
1038 | |||
1039 | i915_emit_arith(p, |
||
1040 | A0_ADD, |
||
1041 | get_result_vector(p, &inst->Dst[0]), |
||
1042 | A0_DEST_CHANNEL_ALL, 0, |
||
1043 | get_result_vector(p, &inst->Dst[0]), |
||
1044 | negate(tmp, 1, 1, 1, 1), 0); |
||
1045 | break; |
||
1046 | |||
1047 | case TGSI_OPCODE_SUB: |
||
1048 | src0 = src_vector(p, &inst->Src[0], fs); |
||
1049 | src1 = src_vector(p, &inst->Src[1], fs); |
||
1050 | |||
1051 | i915_emit_arith(p, |
||
1052 | A0_ADD, |
||
1053 | get_result_vector(p, &inst->Dst[0]), |
||
1054 | get_result_flags(inst), 0, |
||
1055 | src0, negate(src1, 1, 1, 1, 1), 0); |
||
1056 | break; |
||
1057 | |||
1058 | case TGSI_OPCODE_TEX: |
||
1059 | emit_tex(p, inst, T0_TEXLD, fs); |
||
1060 | break; |
||
1061 | |||
1062 | case TGSI_OPCODE_TRUNC: |
||
1063 | emit_simple_arith(p, inst, A0_TRC, 1, fs); |
||
1064 | break; |
||
1065 | |||
1066 | case TGSI_OPCODE_TXB: |
||
1067 | emit_tex(p, inst, T0_TEXLDB, fs); |
||
1068 | break; |
||
1069 | |||
1070 | case TGSI_OPCODE_TXP: |
||
1071 | emit_tex(p, inst, T0_TEXLDP, fs); |
||
1072 | break; |
||
1073 | |||
1074 | case TGSI_OPCODE_XPD: |
||
1075 | /* Cross product: |
||
1076 | * result.x = src0.y * src1.z - src0.z * src1.y; |
||
1077 | * result.y = src0.z * src1.x - src0.x * src1.z; |
||
1078 | * result.z = src0.x * src1.y - src0.y * src1.x; |
||
1079 | * result.w = undef; |
||
1080 | */ |
||
1081 | src0 = src_vector(p, &inst->Src[0], fs); |
||
1082 | src1 = src_vector(p, &inst->Src[1], fs); |
||
1083 | tmp = i915_get_utemp(p); |
||
1084 | |||
1085 | i915_emit_arith(p, |
||
1086 | A0_MUL, |
||
1087 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
1088 | swizzle(src0, Z, X, Y, ONE), |
||
1089 | swizzle(src1, Y, Z, X, ONE), 0); |
||
1090 | |||
1091 | i915_emit_arith(p, |
||
1092 | A0_MAD, |
||
1093 | get_result_vector(p, &inst->Dst[0]), |
||
1094 | get_result_flags(inst), 0, |
||
1095 | swizzle(src0, Y, Z, X, ONE), |
||
1096 | swizzle(src1, Z, X, Y, ONE), |
||
1097 | negate(tmp, 1, 1, 1, 0)); |
||
1098 | break; |
||
1099 | |||
1100 | default: |
||
1101 | i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); |
||
1102 | p->error = 1; |
||
1103 | return; |
||
1104 | } |
||
1105 | |||
1106 | i915_release_utemps(p); |
||
1107 | } |
||
1108 | |||
1109 | |||
1110 | static void i915_translate_token(struct i915_fp_compile *p, |
||
1111 | const union i915_full_token* token, |
||
1112 | struct i915_fragment_shader *fs) |
||
1113 | { |
||
1114 | struct i915_fragment_shader *ifs = p->shader; |
||
1115 | switch( token->Token.Type ) { |
||
1116 | case TGSI_TOKEN_TYPE_PROPERTY: |
||
1117 | /* |
||
1118 | * We only support one cbuf, but we still need to ignore the property |
||
1119 | * correctly so we don't hit the assert at the end of the switch case. |
||
1120 | */ |
||
1121 | assert(token->FullProperty.Property.PropertyName == |
||
1122 | TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); |
||
1123 | break; |
||
1124 | |||
1125 | case TGSI_TOKEN_TYPE_DECLARATION: |
||
1126 | if (token->FullDeclaration.Declaration.File |
||
1127 | == TGSI_FILE_CONSTANT) { |
||
1128 | uint i; |
||
1129 | for (i = token->FullDeclaration.Range.First; |
||
1130 | i <= token->FullDeclaration.Range.Last; |
||
1131 | i++) { |
||
1132 | assert(ifs->constant_flags[i] == 0x0); |
||
1133 | ifs->constant_flags[i] = I915_CONSTFLAG_USER; |
||
1134 | ifs->num_constants = MAX2(ifs->num_constants, i + 1); |
||
1135 | } |
||
1136 | } |
||
1137 | else if (token->FullDeclaration.Declaration.File |
||
1138 | == TGSI_FILE_TEMPORARY) { |
||
1139 | uint i; |
||
1140 | for (i = token->FullDeclaration.Range.First; |
||
1141 | i <= token->FullDeclaration.Range.Last; |
||
1142 | i++) { |
||
1143 | if (i >= I915_MAX_TEMPORARY) |
||
1144 | debug_printf("Too many temps (%d)\n",i); |
||
1145 | else |
||
1146 | /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ |
||
1147 | p->temp_flag |= (1 << i); /* mark temp as used */ |
||
1148 | } |
||
1149 | } |
||
1150 | break; |
||
1151 | |||
1152 | case TGSI_TOKEN_TYPE_IMMEDIATE: |
||
1153 | { |
||
1154 | const struct tgsi_full_immediate *imm |
||
1155 | = &token->FullImmediate; |
||
1156 | const uint pos = p->num_immediates++; |
||
1157 | uint j; |
||
1158 | assert( imm->Immediate.NrTokens <= 4 + 1 ); |
||
1159 | for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { |
||
1160 | p->immediates[pos][j] = imm->u[j].Float; |
||
1161 | } |
||
1162 | } |
||
1163 | break; |
||
1164 | |||
1165 | case TGSI_TOKEN_TYPE_INSTRUCTION: |
||
1166 | if (p->first_instruction) { |
||
1167 | /* resolve location of immediates */ |
||
1168 | uint i, j; |
||
1169 | for (i = 0; i < p->num_immediates; i++) { |
||
1170 | /* find constant slot for this immediate */ |
||
1171 | for (j = 0; j < I915_MAX_CONSTANT; j++) { |
||
1172 | if (ifs->constant_flags[j] == 0x0) { |
||
1173 | memcpy(ifs->constants[j], |
||
1174 | p->immediates[i], |
||
1175 | 4 * sizeof(float)); |
||
1176 | /*printf("immediate %d maps to const %d\n", i, j);*/ |
||
1177 | ifs->constant_flags[j] = 0xf; /* all four comps used */ |
||
1178 | p->immediates_map[i] = j; |
||
1179 | ifs->num_constants = MAX2(ifs->num_constants, j + 1); |
||
1180 | break; |
||
1181 | } |
||
1182 | } |
||
1183 | } |
||
1184 | |||
1185 | p->first_instruction = FALSE; |
||
1186 | } |
||
1187 | |||
1188 | i915_translate_instruction(p, &token->FullInstruction, fs); |
||
1189 | break; |
||
1190 | |||
1191 | default: |
||
1192 | assert( 0 ); |
||
1193 | } |
||
1194 | |||
1195 | } |
||
1196 | |||
1197 | /** |
||
1198 | * Translate TGSI fragment shader into i915 hardware instructions. |
||
1199 | * \param p the translation state |
||
1200 | * \param tokens the TGSI token array |
||
1201 | */ |
||
1202 | static void |
||
1203 | i915_translate_instructions(struct i915_fp_compile *p, |
||
1204 | const struct i915_token_list *tokens, |
||
1205 | struct i915_fragment_shader *fs) |
||
1206 | { |
||
1207 | int i; |
||
1208 | for(i = 0; i |
||
1209 | i915_translate_token(p, &tokens->Tokens[i], fs); |
||
1210 | } |
||
1211 | } |
||
1212 | |||
1213 | |||
1214 | static struct i915_fp_compile * |
||
1215 | i915_init_compile(struct i915_context *i915, |
||
1216 | struct i915_fragment_shader *ifs) |
||
1217 | { |
||
1218 | struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); |
||
1219 | int i; |
||
1220 | |||
1221 | p->shader = ifs; |
||
1222 | |||
1223 | /* Put new constants at end of const buffer, growing downward. |
||
1224 | * The problem is we don't know how many user-defined constants might |
||
1225 | * be specified with pipe->set_constant_buffer(). |
||
1226 | * Should pre-scan the user's program to determine the highest-numbered |
||
1227 | * constant referenced. |
||
1228 | */ |
||
1229 | ifs->num_constants = 0; |
||
1230 | memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); |
||
1231 | |||
1232 | memset(&p->register_phases, 0, sizeof(p->register_phases)); |
||
1233 | |||
1234 | for (i = 0; i < I915_TEX_UNITS; i++) |
||
1235 | ifs->generic_mapping[i] = -1; |
||
1236 | |||
1237 | p->first_instruction = TRUE; |
||
1238 | |||
1239 | p->nr_tex_indirect = 1; /* correct? */ |
||
1240 | p->nr_tex_insn = 0; |
||
1241 | p->nr_alu_insn = 0; |
||
1242 | p->nr_decl_insn = 0; |
||
1243 | |||
1244 | p->csr = p->program; |
||
1245 | p->decl = p->declarations; |
||
1246 | p->decl_s = 0; |
||
1247 | p->decl_t = 0; |
||
1248 | p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; |
||
1249 | p->utemp_flag = ~0x7; |
||
1250 | |||
1251 | /* initialize the first program word */ |
||
1252 | *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; |
||
1253 | |||
1254 | return p; |
||
1255 | } |
||
1256 | |||
1257 | |||
1258 | /* Copy compile results to the fragment program struct and destroy the |
||
1259 | * compilation context. |
||
1260 | */ |
||
1261 | static void |
||
1262 | i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) |
||
1263 | { |
||
1264 | struct i915_fragment_shader *ifs = p->shader; |
||
1265 | unsigned long program_size = (unsigned long) (p->csr - p->program); |
||
1266 | unsigned long decl_size = (unsigned long) (p->decl - p->declarations); |
||
1267 | |||
1268 | if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) |
||
1269 | debug_printf("Exceeded max nr indirect texture lookups\n"); |
||
1270 | |||
1271 | if (p->nr_tex_insn > I915_MAX_TEX_INSN) |
||
1272 | i915_program_error(p, "Exceeded max TEX instructions"); |
||
1273 | |||
1274 | if (p->nr_alu_insn > I915_MAX_ALU_INSN) |
||
1275 | i915_program_error(p, "Exceeded max ALU instructions"); |
||
1276 | |||
1277 | if (p->nr_decl_insn > I915_MAX_DECL_INSN) |
||
1278 | i915_program_error(p, "Exceeded max DECL instructions"); |
||
1279 | |||
1280 | if (p->error) { |
||
1281 | p->NumNativeInstructions = 0; |
||
1282 | p->NumNativeAluInstructions = 0; |
||
1283 | p->NumNativeTexInstructions = 0; |
||
1284 | p->NumNativeTexIndirections = 0; |
||
1285 | |||
1286 | i915_use_passthrough_shader(ifs); |
||
1287 | } |
||
1288 | else { |
||
1289 | p->NumNativeInstructions |
||
1290 | = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; |
||
1291 | p->NumNativeAluInstructions = p->nr_alu_insn; |
||
1292 | p->NumNativeTexInstructions = p->nr_tex_insn; |
||
1293 | p->NumNativeTexIndirections = p->nr_tex_indirect; |
||
1294 | |||
1295 | /* patch in the program length */ |
||
1296 | p->declarations[0] |= program_size + decl_size - 2; |
||
1297 | |||
1298 | /* Copy compilation results to fragment program struct: |
||
1299 | */ |
||
1300 | assert(!ifs->decl); |
||
1301 | assert(!ifs->program); |
||
1302 | |||
1303 | ifs->decl |
||
1304 | = (uint *) MALLOC(decl_size * sizeof(uint)); |
||
1305 | ifs->program |
||
1306 | = (uint *) MALLOC(program_size * sizeof(uint)); |
||
1307 | |||
1308 | if (ifs->decl) { |
||
1309 | ifs->decl_len = decl_size; |
||
1310 | |||
1311 | memcpy(ifs->decl, |
||
1312 | p->declarations, |
||
1313 | decl_size * sizeof(uint)); |
||
1314 | } |
||
1315 | |||
1316 | if (ifs->program) { |
||
1317 | ifs->program_len = program_size; |
||
1318 | |||
1319 | memcpy(ifs->program, |
||
1320 | p->program, |
||
1321 | program_size * sizeof(uint)); |
||
1322 | } |
||
1323 | } |
||
1324 | |||
1325 | /* Release the compilation struct: |
||
1326 | */ |
||
1327 | FREE(p); |
||
1328 | } |
||
1329 | |||
1330 | |||
1331 | |||
1332 | |||
1333 | |||
1334 | /** |
||
1335 | * Rather than trying to intercept and jiggle depth writes during |
||
1336 | * emit, just move the value into its correct position at the end of |
||
1337 | * the program: |
||
1338 | */ |
||
1339 | static void |
||
1340 | i915_fixup_depth_write(struct i915_fp_compile *p) |
||
1341 | { |
||
1342 | /* XXX assuming pos/depth is always in output[0] */ |
||
1343 | if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { |
||
1344 | const uint depth = UREG(REG_TYPE_OD, 0); |
||
1345 | |||
1346 | i915_emit_arith(p, |
||
1347 | A0_MOV, /* opcode */ |
||
1348 | depth, /* dest reg */ |
||
1349 | A0_DEST_CHANNEL_W, /* write mask */ |
||
1350 | 0, /* saturate? */ |
||
1351 | swizzle(depth, X, Y, Z, Z), /* src0 */ |
||
1352 | 0, 0 /* src1, src2 */); |
||
1353 | } |
||
1354 | } |
||
1355 | |||
1356 | |||
1357 | void |
||
1358 | i915_translate_fragment_program( struct i915_context *i915, |
||
1359 | struct i915_fragment_shader *fs) |
||
1360 | { |
||
1361 | struct i915_fp_compile *p; |
||
1362 | const struct tgsi_token *tokens = fs->state.tokens; |
||
1363 | struct i915_token_list* i_tokens; |
||
1364 | |||
1365 | #if 0 |
||
1366 | tgsi_dump(tokens, 0); |
||
1367 | #endif |
||
1368 | |||
1369 | /* hw doesn't seem to like empty frag programs, even when the depth write |
||
1370 | * fixup gets emitted below - may that one is fishy, too? */ |
||
1371 | if (fs->info.num_instructions == 1) { |
||
1372 | i915_use_passthrough_shader(fs); |
||
1373 | |||
1374 | return; |
||
1375 | } |
||
1376 | |||
1377 | p = i915_init_compile(i915, fs); |
||
1378 | |||
1379 | i_tokens = i915_optimize(tokens); |
||
1380 | i915_translate_instructions(p, i_tokens, fs); |
||
1381 | i915_fixup_depth_write(p); |
||
1382 | |||
1383 | i915_fini_compile(i915, p); |
||
1384 | i915_optimize_free(i_tokens); |
||
1385 | |||
1386 | #if 0 |
||
1387 | i915_disassemble_program(NULL, fs->program, fs->program_len); |
||
1388 | #endif |
||
1389 | }><>>>>>=>><>=>=>0)>>=>>>>>>=>>>><>><>><>><>><>><>><>><>><> |