Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2007 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | |||
29 | #include |
||
30 | |||
31 | #include "i915_reg.h" |
||
32 | #include "i915_context.h" |
||
33 | #include "i915_fpc.h" |
||
34 | #include "i915_debug_private.h" |
||
35 | |||
36 | #include "pipe/p_shader_tokens.h" |
||
37 | #include "util/u_math.h" |
||
38 | #include "util/u_memory.h" |
||
39 | #include "util/u_string.h" |
||
40 | #include "tgsi/tgsi_parse.h" |
||
41 | #include "tgsi/tgsi_dump.h" |
||
42 | |||
43 | #include "draw/draw_vertex.h" |
||
44 | |||
45 | #ifndef M_PI |
||
46 | #define M_PI 3.14159265358979323846 |
||
47 | #endif |
||
48 | |||
49 | /** |
||
50 | * Simple pass-through fragment shader to use when we don't have |
||
51 | * a real shader (or it fails to compile for some reason). |
||
52 | */ |
||
53 | static unsigned passthrough_decl[] = |
||
54 | { |
||
55 | _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), |
||
56 | |||
57 | /* declare input color: |
||
58 | */ |
||
59 | (D0_DCL | |
||
60 | (REG_TYPE_T << D0_TYPE_SHIFT) | |
||
61 | (T_DIFFUSE << D0_NR_SHIFT) | |
||
62 | D0_CHANNEL_ALL), |
||
63 | 0, |
||
64 | 0, |
||
65 | }; |
||
66 | |||
67 | static unsigned passthrough_program[] = |
||
68 | { |
||
69 | /* move to output color: |
||
70 | */ |
||
71 | (A0_MOV | |
||
72 | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | |
||
73 | A0_DEST_CHANNEL_ALL | |
||
74 | (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | |
||
75 | (T_DIFFUSE << A0_SRC0_NR_SHIFT)), |
||
76 | 0x01230000, /* .xyzw */ |
||
77 | |||
78 | }; |
||
79 | |||
80 | |||
81 | /* 1, -1/3!, 1/5!, -1/7! */ |
||
82 | static const float scs_sin_constants[4] = { 1.0, |
||
83 | -1.0f / (3 * 2 * 1), |
||
84 | 1.0f / (5 * 4 * 3 * 2 * 1), |
||
85 | -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) |
||
86 | }; |
||
87 | |||
88 | /* 1, -1/2!, 1/4!, -1/6! */ |
||
89 | static const float scs_cos_constants[4] = { 1.0, |
||
90 | -1.0f / (2 * 1), |
||
91 | 1.0f / (4 * 3 * 2 * 1), |
||
92 | -1.0f / (6 * 5 * 4 * 3 * 2 * 1) |
||
93 | }; |
||
94 | |||
95 | /* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ |
||
96 | static const float sin_constants[4] = { 2.0 * M_PI, |
||
97 | -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), |
||
98 | 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), |
||
99 | -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) |
||
100 | }; |
||
101 | |||
102 | /* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ |
||
103 | static const float cos_constants[4] = { 1.0, |
||
104 | -4.0f * M_PI * M_PI / (2 * 1), |
||
105 | 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), |
||
106 | -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) |
||
107 | }; |
||
108 | |||
109 | |||
110 | |||
111 | /** |
||
112 | * component-wise negation of ureg |
||
113 | */ |
||
114 | static INLINE int |
||
115 | negate(int reg, int x, int y, int z, int w) |
||
116 | { |
||
117 | /* Another neat thing about the UREG representation */ |
||
118 | return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | |
||
119 | ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | |
||
120 | ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | |
||
121 | ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); |
||
122 | } |
||
123 | |||
124 | |||
125 | /** |
||
126 | * In the event of a translation failure, we'll generate a simple color |
||
127 | * pass-through program. |
||
128 | */ |
||
129 | static void |
||
130 | i915_use_passthrough_shader(struct i915_fragment_shader *fs) |
||
131 | { |
||
132 | fs->program = (uint *) MALLOC(sizeof(passthrough_program)); |
||
133 | fs->decl = (uint *) MALLOC(sizeof(passthrough_decl)); |
||
134 | if (fs->program) { |
||
135 | memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); |
||
136 | memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl)); |
||
137 | fs->program_len = Elements(passthrough_program); |
||
138 | fs->decl_len = Elements(passthrough_decl); |
||
139 | } |
||
140 | fs->num_constants = 0; |
||
141 | } |
||
142 | |||
143 | |||
144 | void |
||
145 | i915_program_error(struct i915_fp_compile *p, const char *msg, ...) |
||
146 | { |
||
147 | va_list args; |
||
148 | char buffer[1024]; |
||
149 | |||
150 | debug_printf("i915_program_error: "); |
||
151 | va_start( args, msg ); |
||
152 | util_vsnprintf( buffer, sizeof(buffer), msg, args ); |
||
153 | va_end( args ); |
||
154 | debug_printf("%s", buffer); |
||
155 | debug_printf("\n"); |
||
156 | |||
157 | p->error = 1; |
||
158 | } |
||
159 | |||
160 | static uint get_mapping(struct i915_fragment_shader* fs, int unit) |
||
161 | { |
||
162 | int i; |
||
163 | for (i = 0; i < I915_TEX_UNITS; i++) |
||
164 | { |
||
165 | if (fs->generic_mapping[i] == -1) { |
||
166 | fs->generic_mapping[i] = unit; |
||
167 | return i; |
||
168 | } |
||
169 | if (fs->generic_mapping[i] == unit) |
||
170 | return i; |
||
171 | } |
||
172 | debug_printf("Exceeded max generics\n"); |
||
173 | return 0; |
||
174 | } |
||
175 | |||
176 | /** |
||
177 | * Construct a ureg for the given source register. Will emit |
||
178 | * constants, apply swizzling and negation as needed. |
||
179 | */ |
||
180 | static uint |
||
181 | src_vector(struct i915_fp_compile *p, |
||
182 | const struct i915_full_src_register *source, |
||
183 | struct i915_fragment_shader *fs) |
||
184 | { |
||
185 | uint index = source->Register.Index; |
||
186 | uint src = 0, sem_name, sem_ind; |
||
187 | |||
188 | switch (source->Register.File) { |
||
189 | case TGSI_FILE_TEMPORARY: |
||
190 | if (source->Register.Index >= I915_MAX_TEMPORARY) { |
||
191 | i915_program_error(p, "Exceeded max temporary reg"); |
||
192 | return 0; |
||
193 | } |
||
194 | src = UREG(REG_TYPE_R, index); |
||
195 | break; |
||
196 | case TGSI_FILE_INPUT: |
||
197 | /* XXX: Packing COL1, FOGC into a single attribute works for |
||
198 | * texenv programs, but will fail for real fragment programs |
||
199 | * that use these attributes and expect them to be a full 4 |
||
200 | * components wide. Could use a texcoord to pass these |
||
201 | * attributes if necessary, but that won't work in the general |
||
202 | * case. |
||
203 | * |
||
204 | * We also use a texture coordinate to pass wpos when possible. |
||
205 | */ |
||
206 | |||
207 | sem_name = p->shader->info.input_semantic_name[index]; |
||
208 | sem_ind = p->shader->info.input_semantic_index[index]; |
||
209 | |||
210 | switch (sem_name) { |
||
211 | case TGSI_SEMANTIC_POSITION: |
||
212 | { |
||
213 | /* for fragcoord */ |
||
214 | int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); |
||
215 | src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); |
||
216 | break; |
||
217 | } |
||
218 | case TGSI_SEMANTIC_COLOR: |
||
219 | if (sem_ind == 0) { |
||
220 | src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); |
||
221 | } |
||
222 | else { |
||
223 | /* secondary color */ |
||
224 | assert(sem_ind == 1); |
||
225 | src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); |
||
226 | src = swizzle(src, X, Y, Z, ONE); |
||
227 | } |
||
228 | break; |
||
229 | case TGSI_SEMANTIC_FOG: |
||
230 | src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); |
||
231 | src = swizzle(src, W, W, W, W); |
||
232 | break; |
||
233 | case TGSI_SEMANTIC_GENERIC: |
||
234 | { |
||
235 | int real_tex_unit = get_mapping(fs, sem_ind); |
||
236 | src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); |
||
237 | break; |
||
238 | } |
||
239 | case TGSI_SEMANTIC_FACE: |
||
240 | { |
||
241 | /* for back/front faces */ |
||
242 | int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); |
||
243 | src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); |
||
244 | break; |
||
245 | } |
||
246 | default: |
||
247 | i915_program_error(p, "Bad source->Index"); |
||
248 | return 0; |
||
249 | } |
||
250 | break; |
||
251 | |||
252 | case TGSI_FILE_IMMEDIATE: |
||
253 | assert(index < p->num_immediates); |
||
254 | index = p->immediates_map[index]; |
||
255 | /* fall-through */ |
||
256 | case TGSI_FILE_CONSTANT: |
||
257 | src = UREG(REG_TYPE_CONST, index); |
||
258 | break; |
||
259 | |||
260 | default: |
||
261 | i915_program_error(p, "Bad source->File"); |
||
262 | return 0; |
||
263 | } |
||
264 | |||
265 | src = swizzle(src, |
||
266 | source->Register.SwizzleX, |
||
267 | source->Register.SwizzleY, |
||
268 | source->Register.SwizzleZ, |
||
269 | source->Register.SwizzleW); |
||
270 | |||
271 | /* There's both negate-all-components and per-component negation. |
||
272 | * Try to handle both here. |
||
273 | */ |
||
274 | { |
||
275 | int n = source->Register.Negate; |
||
276 | src = negate(src, n, n, n, n); |
||
277 | } |
||
278 | |||
279 | /* no abs() */ |
||
280 | #if 0 |
||
281 | /* XXX assertions disabled to allow arbfplight.c to run */ |
||
282 | /* XXX enable these assertions, or fix things */ |
||
283 | assert(!source->Register.Absolute); |
||
284 | #endif |
||
285 | if (source->Register.Absolute) |
||
286 | debug_printf("Unhandled absolute value\n"); |
||
287 | |||
288 | return src; |
||
289 | } |
||
290 | |||
291 | |||
292 | /** |
||
293 | * Construct a ureg for a destination register. |
||
294 | */ |
||
295 | static uint |
||
296 | get_result_vector(struct i915_fp_compile *p, |
||
297 | const struct i915_full_dst_register *dest) |
||
298 | { |
||
299 | switch (dest->Register.File) { |
||
300 | case TGSI_FILE_OUTPUT: |
||
301 | { |
||
302 | uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; |
||
303 | switch (sem_name) { |
||
304 | case TGSI_SEMANTIC_POSITION: |
||
305 | return UREG(REG_TYPE_OD, 0); |
||
306 | case TGSI_SEMANTIC_COLOR: |
||
307 | return UREG(REG_TYPE_OC, 0); |
||
308 | default: |
||
309 | i915_program_error(p, "Bad inst->DstReg.Index/semantics"); |
||
310 | return 0; |
||
311 | } |
||
312 | } |
||
313 | case TGSI_FILE_TEMPORARY: |
||
314 | return UREG(REG_TYPE_R, dest->Register.Index); |
||
315 | default: |
||
316 | i915_program_error(p, "Bad inst->DstReg.File"); |
||
317 | return 0; |
||
318 | } |
||
319 | } |
||
320 | |||
321 | |||
322 | /** |
||
323 | * Compute flags for saturation and writemask. |
||
324 | */ |
||
325 | static uint |
||
326 | get_result_flags(const struct i915_full_instruction *inst) |
||
327 | { |
||
328 | const uint writeMask |
||
329 | = inst->Dst[0].Register.WriteMask; |
||
330 | uint flags = 0x0; |
||
331 | |||
332 | if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) |
||
333 | flags |= A0_DEST_SATURATE; |
||
334 | |||
335 | if (writeMask & TGSI_WRITEMASK_X) |
||
336 | flags |= A0_DEST_CHANNEL_X; |
||
337 | if (writeMask & TGSI_WRITEMASK_Y) |
||
338 | flags |= A0_DEST_CHANNEL_Y; |
||
339 | if (writeMask & TGSI_WRITEMASK_Z) |
||
340 | flags |= A0_DEST_CHANNEL_Z; |
||
341 | if (writeMask & TGSI_WRITEMASK_W) |
||
342 | flags |= A0_DEST_CHANNEL_W; |
||
343 | |||
344 | return flags; |
||
345 | } |
||
346 | |||
347 | |||
348 | /** |
||
349 | * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token |
||
350 | */ |
||
351 | static uint |
||
352 | translate_tex_src_target(struct i915_fp_compile *p, uint tex) |
||
353 | { |
||
354 | switch (tex) { |
||
355 | case TGSI_TEXTURE_SHADOW1D: |
||
356 | /* fall-through */ |
||
357 | case TGSI_TEXTURE_1D: |
||
358 | return D0_SAMPLE_TYPE_2D; |
||
359 | |||
360 | case TGSI_TEXTURE_SHADOW2D: |
||
361 | /* fall-through */ |
||
362 | case TGSI_TEXTURE_2D: |
||
363 | return D0_SAMPLE_TYPE_2D; |
||
364 | |||
365 | case TGSI_TEXTURE_SHADOWRECT: |
||
366 | /* fall-through */ |
||
367 | case TGSI_TEXTURE_RECT: |
||
368 | return D0_SAMPLE_TYPE_2D; |
||
369 | |||
370 | case TGSI_TEXTURE_3D: |
||
371 | return D0_SAMPLE_TYPE_VOLUME; |
||
372 | |||
373 | case TGSI_TEXTURE_CUBE: |
||
374 | return D0_SAMPLE_TYPE_CUBE; |
||
375 | |||
376 | default: |
||
377 | i915_program_error(p, "TexSrc type"); |
||
378 | return 0; |
||
379 | } |
||
380 | } |
||
381 | |||
382 | /** |
||
383 | * Return the number of coords needed to access a given TGSI_TEXTURE_* |
||
384 | */ |
||
385 | uint |
||
386 | i915_num_coords(uint tex) |
||
387 | { |
||
388 | switch (tex) { |
||
389 | case TGSI_TEXTURE_SHADOW1D: |
||
390 | case TGSI_TEXTURE_1D: |
||
391 | return 1; |
||
392 | |||
393 | case TGSI_TEXTURE_SHADOW2D: |
||
394 | case TGSI_TEXTURE_2D: |
||
395 | case TGSI_TEXTURE_SHADOWRECT: |
||
396 | case TGSI_TEXTURE_RECT: |
||
397 | return 2; |
||
398 | |||
399 | case TGSI_TEXTURE_3D: |
||
400 | case TGSI_TEXTURE_CUBE: |
||
401 | return 3; |
||
402 | |||
403 | default: |
||
404 | debug_printf("Unknown texture target for num coords"); |
||
405 | return 2; |
||
406 | } |
||
407 | } |
||
408 | |||
409 | |||
410 | /** |
||
411 | * Generate texel lookup instruction. |
||
412 | */ |
||
413 | static void |
||
414 | emit_tex(struct i915_fp_compile *p, |
||
415 | const struct i915_full_instruction *inst, |
||
416 | uint opcode, |
||
417 | struct i915_fragment_shader* fs) |
||
418 | { |
||
419 | uint texture = inst->Texture.Texture; |
||
420 | uint unit = inst->Src[1].Register.Index; |
||
421 | uint tex = translate_tex_src_target( p, texture ); |
||
422 | uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); |
||
423 | uint coord = src_vector( p, &inst->Src[0], fs); |
||
424 | |||
425 | i915_emit_texld( p, |
||
426 | get_result_vector( p, &inst->Dst[0] ), |
||
427 | get_result_flags( inst ), |
||
428 | sampler, |
||
429 | coord, |
||
430 | opcode, |
||
431 | i915_num_coords(texture) ); |
||
432 | } |
||
433 | |||
434 | |||
435 | /** |
||
436 | * Generate a simple arithmetic instruction |
||
437 | * \param opcode the i915 opcode |
||
438 | * \param numArgs the number of input/src arguments |
||
439 | */ |
||
440 | static void |
||
441 | emit_simple_arith(struct i915_fp_compile *p, |
||
442 | const struct i915_full_instruction *inst, |
||
443 | uint opcode, uint numArgs, |
||
444 | struct i915_fragment_shader *fs) |
||
445 | { |
||
446 | uint arg1, arg2, arg3; |
||
447 | |||
448 | assert(numArgs <= 3); |
||
449 | |||
450 | arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); |
||
451 | arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); |
||
452 | arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); |
||
453 | |||
454 | i915_emit_arith( p, |
||
455 | opcode, |
||
456 | get_result_vector( p, &inst->Dst[0]), |
||
457 | get_result_flags( inst ), 0, |
||
458 | arg1, |
||
459 | arg2, |
||
460 | arg3 ); |
||
461 | } |
||
462 | |||
463 | |||
464 | /** As above, but swap the first two src regs */ |
||
465 | static void |
||
466 | emit_simple_arith_swap2(struct i915_fp_compile *p, |
||
467 | const struct i915_full_instruction *inst, |
||
468 | uint opcode, uint numArgs, |
||
469 | struct i915_fragment_shader *fs) |
||
470 | { |
||
471 | struct i915_full_instruction inst2; |
||
472 | |||
473 | assert(numArgs == 2); |
||
474 | |||
475 | /* transpose first two registers */ |
||
476 | inst2 = *inst; |
||
477 | inst2.Src[0] = inst->Src[1]; |
||
478 | inst2.Src[1] = inst->Src[0]; |
||
479 | |||
480 | emit_simple_arith(p, &inst2, opcode, numArgs, fs); |
||
481 | } |
||
482 | |||
483 | /* |
||
484 | * Translate TGSI instruction to i915 instruction. |
||
485 | * |
||
486 | * Possible concerns: |
||
487 | * |
||
488 | * DDX, DDY -- return 0 |
||
489 | * SIN, COS -- could use another taylor step? |
||
490 | * LIT -- results seem a little different to sw mesa |
||
491 | * LOG -- different to mesa on negative numbers, but this is conformant. |
||
492 | */ |
||
493 | static void |
||
494 | i915_translate_instruction(struct i915_fp_compile *p, |
||
495 | const struct i915_full_instruction *inst, |
||
496 | struct i915_fragment_shader *fs) |
||
497 | { |
||
498 | uint writemask; |
||
499 | uint src0, src1, src2, flags; |
||
500 | uint tmp = 0; |
||
501 | |||
502 | switch (inst->Instruction.Opcode) { |
||
503 | case TGSI_OPCODE_ABS: |
||
504 | src0 = src_vector(p, &inst->Src[0], fs); |
||
505 | i915_emit_arith(p, |
||
506 | A0_MAX, |
||
507 | get_result_vector(p, &inst->Dst[0]), |
||
508 | get_result_flags(inst), 0, |
||
509 | src0, negate(src0, 1, 1, 1, 1), 0); |
||
510 | break; |
||
511 | |||
512 | case TGSI_OPCODE_ADD: |
||
513 | emit_simple_arith(p, inst, A0_ADD, 2, fs); |
||
514 | break; |
||
515 | |||
516 | case TGSI_OPCODE_CEIL: |
||
517 | src0 = src_vector(p, &inst->Src[0], fs); |
||
518 | tmp = i915_get_utemp(p); |
||
519 | flags = get_result_flags(inst); |
||
520 | i915_emit_arith(p, |
||
521 | A0_FLR, |
||
522 | tmp, |
||
523 | flags & A0_DEST_CHANNEL_ALL, 0, |
||
524 | negate(src0, 1, 1, 1, 1), 0, 0); |
||
525 | i915_emit_arith(p, |
||
526 | A0_MOV, |
||
527 | get_result_vector(p, &inst->Dst[0]), |
||
528 | flags, 0, |
||
529 | negate(tmp, 1, 1, 1, 1), 0, 0); |
||
530 | break; |
||
531 | |||
532 | case TGSI_OPCODE_CMP: |
||
533 | src0 = src_vector(p, &inst->Src[0], fs); |
||
534 | src1 = src_vector(p, &inst->Src[1], fs); |
||
535 | src2 = src_vector(p, &inst->Src[2], fs); |
||
536 | i915_emit_arith(p, A0_CMP, |
||
537 | get_result_vector(p, &inst->Dst[0]), |
||
538 | get_result_flags(inst), |
||
539 | 0, src0, src2, src1); /* NOTE: order of src2, src1 */ |
||
540 | break; |
||
541 | |||
542 | case TGSI_OPCODE_COS: |
||
543 | src0 = src_vector(p, &inst->Src[0], fs); |
||
544 | tmp = i915_get_utemp(p); |
||
545 | |||
546 | i915_emit_arith(p, |
||
547 | A0_MUL, |
||
548 | tmp, A0_DEST_CHANNEL_X, 0, |
||
549 | src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); |
||
550 | |||
551 | i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); |
||
552 | |||
553 | /* |
||
554 | * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1 |
||
555 | * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 |
||
556 | * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 |
||
557 | * result = DP4 t0, cos_constants |
||
558 | */ |
||
559 | i915_emit_arith(p, |
||
560 | A0_MUL, |
||
561 | tmp, A0_DEST_CHANNEL_XY, 0, |
||
562 | swizzle(tmp, X, X, ONE, ONE), |
||
563 | swizzle(tmp, X, ONE, ONE, ONE), 0); |
||
564 | |||
565 | i915_emit_arith(p, |
||
566 | A0_MUL, |
||
567 | tmp, A0_DEST_CHANNEL_XYZ, 0, |
||
568 | swizzle(tmp, X, Y, X, ONE), |
||
569 | swizzle(tmp, X, X, ONE, ONE), 0); |
||
570 | |||
571 | i915_emit_arith(p, |
||
572 | A0_MUL, |
||
573 | tmp, A0_DEST_CHANNEL_XYZ, 0, |
||
574 | swizzle(tmp, X, X, Z, ONE), |
||
575 | swizzle(tmp, Z, ONE, ONE, ONE), 0); |
||
576 | |||
577 | i915_emit_arith(p, |
||
578 | A0_DP4, |
||
579 | get_result_vector(p, &inst->Dst[0]), |
||
580 | get_result_flags(inst), 0, |
||
581 | swizzle(tmp, ONE, Z, Y, X), |
||
582 | i915_emit_const4fv(p, cos_constants), 0); |
||
583 | break; |
||
584 | |||
585 | case TGSI_OPCODE_DDX: |
||
586 | case TGSI_OPCODE_DDY: |
||
587 | /* XXX We just output 0 here */ |
||
588 | debug_printf("Punting DDX/DDX\n"); |
||
589 | src0 = get_result_vector(p, &inst->Dst[0]); |
||
590 | i915_emit_arith(p, |
||
591 | A0_MOV, |
||
592 | get_result_vector(p, &inst->Dst[0]), |
||
593 | get_result_flags(inst), 0, |
||
594 | swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); |
||
595 | break; |
||
596 | |||
597 | case TGSI_OPCODE_DP2: |
||
598 | src0 = src_vector(p, &inst->Src[0], fs); |
||
599 | src1 = src_vector(p, &inst->Src[1], fs); |
||
600 | |||
601 | i915_emit_arith(p, |
||
602 | A0_DP3, |
||
603 | get_result_vector(p, &inst->Dst[0]), |
||
604 | get_result_flags(inst), 0, |
||
605 | swizzle(src0, X, Y, ZERO, ZERO), src1, 0); |
||
606 | break; |
||
607 | |||
608 | case TGSI_OPCODE_DP3: |
||
609 | emit_simple_arith(p, inst, A0_DP3, 2, fs); |
||
610 | break; |
||
611 | |||
612 | case TGSI_OPCODE_DP4: |
||
613 | emit_simple_arith(p, inst, A0_DP4, 2, fs); |
||
614 | break; |
||
615 | |||
616 | case TGSI_OPCODE_DPH: |
||
617 | src0 = src_vector(p, &inst->Src[0], fs); |
||
618 | src1 = src_vector(p, &inst->Src[1], fs); |
||
619 | |||
620 | i915_emit_arith(p, |
||
621 | A0_DP4, |
||
622 | get_result_vector(p, &inst->Dst[0]), |
||
623 | get_result_flags(inst), 0, |
||
624 | swizzle(src0, X, Y, Z, ONE), src1, 0); |
||
625 | break; |
||
626 | |||
627 | case TGSI_OPCODE_DST: |
||
628 | src0 = src_vector(p, &inst->Src[0], fs); |
||
629 | src1 = src_vector(p, &inst->Src[1], fs); |
||
630 | |||
631 | /* result[0] = 1 * 1; |
||
632 | * result[1] = a[1] * b[1]; |
||
633 | * result[2] = a[2] * 1; |
||
634 | * result[3] = 1 * b[3]; |
||
635 | */ |
||
636 | i915_emit_arith(p, |
||
637 | A0_MUL, |
||
638 | get_result_vector(p, &inst->Dst[0]), |
||
639 | get_result_flags(inst), 0, |
||
640 | swizzle(src0, ONE, Y, Z, ONE), |
||
641 | swizzle(src1, ONE, Y, ONE, W), 0); |
||
642 | break; |
||
643 | |||
644 | case TGSI_OPCODE_END: |
||
645 | /* no-op */ |
||
646 | break; |
||
647 | |||
648 | case TGSI_OPCODE_EX2: |
||
649 | src0 = src_vector(p, &inst->Src[0], fs); |
||
650 | |||
651 | i915_emit_arith(p, |
||
652 | A0_EXP, |
||
653 | get_result_vector(p, &inst->Dst[0]), |
||
654 | get_result_flags(inst), 0, |
||
655 | swizzle(src0, X, X, X, X), 0, 0); |
||
656 | break; |
||
657 | |||
658 | case TGSI_OPCODE_FLR: |
||
659 | emit_simple_arith(p, inst, A0_FLR, 1, fs); |
||
660 | break; |
||
661 | |||
662 | case TGSI_OPCODE_FRC: |
||
663 | emit_simple_arith(p, inst, A0_FRC, 1, fs); |
||
664 | break; |
||
665 | |||
666 | case TGSI_OPCODE_KILL_IF: |
||
667 | /* kill if src[0].x < 0 || src[0].y < 0 ... */ |
||
668 | src0 = src_vector(p, &inst->Src[0], fs); |
||
669 | tmp = i915_get_utemp(p); |
||
670 | |||
671 | i915_emit_texld(p, |
||
672 | tmp, /* dest reg: a dummy reg */ |
||
673 | A0_DEST_CHANNEL_ALL, /* dest writemask */ |
||
674 | 0, /* sampler */ |
||
675 | src0, /* coord*/ |
||
676 | T0_TEXKILL, /* opcode */ |
||
677 | 1); /* num_coord */ |
||
678 | break; |
||
679 | |||
680 | case TGSI_OPCODE_KILL: |
||
681 | /* unconditional kill */ |
||
682 | tmp = i915_get_utemp(p); |
||
683 | |||
684 | i915_emit_texld(p, |
||
685 | tmp, /* dest reg: a dummy reg */ |
||
686 | A0_DEST_CHANNEL_ALL, /* dest writemask */ |
||
687 | 0, /* sampler */ |
||
688 | negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */ |
||
689 | T0_TEXKILL, /* opcode */ |
||
690 | 1); /* num_coord */ |
||
691 | break; |
||
692 | |||
693 | case TGSI_OPCODE_LG2: |
||
694 | src0 = src_vector(p, &inst->Src[0], fs); |
||
695 | |||
696 | i915_emit_arith(p, |
||
697 | A0_LOG, |
||
698 | get_result_vector(p, &inst->Dst[0]), |
||
699 | get_result_flags(inst), 0, |
||
700 | swizzle(src0, X, X, X, X), 0, 0); |
||
701 | break; |
||
702 | |||
703 | case TGSI_OPCODE_LIT: |
||
704 | src0 = src_vector(p, &inst->Src[0], fs); |
||
705 | tmp = i915_get_utemp(p); |
||
706 | |||
707 | /* tmp = max( a.xyzw, a.00zw ) |
||
708 | * XXX: Clamp tmp.w to -128..128 |
||
709 | * tmp.y = log(tmp.y) |
||
710 | * tmp.y = tmp.w * tmp.y |
||
711 | * tmp.y = exp(tmp.y) |
||
712 | * result = cmp (a.11-x1, a.1x01, a.1xy1 ) |
||
713 | */ |
||
714 | i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, |
||
715 | src0, swizzle(src0, ZERO, ZERO, Z, W), 0); |
||
716 | |||
717 | i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, |
||
718 | swizzle(tmp, Y, Y, Y, Y), 0, 0); |
||
719 | |||
720 | i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, |
||
721 | swizzle(tmp, ZERO, Y, ZERO, ZERO), |
||
722 | swizzle(tmp, ZERO, W, ZERO, ZERO), 0); |
||
723 | |||
724 | i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, |
||
725 | swizzle(tmp, Y, Y, Y, Y), 0, 0); |
||
726 | |||
727 | i915_emit_arith(p, A0_CMP, |
||
728 | get_result_vector(p, &inst->Dst[0]), |
||
729 | get_result_flags(inst), 0, |
||
730 | negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), |
||
731 | swizzle(tmp, ONE, X, ZERO, ONE), |
||
732 | swizzle(tmp, ONE, X, Y, ONE)); |
||
733 | |||
734 | break; |
||
735 | |||
736 | case TGSI_OPCODE_LRP: |
||
737 | src0 = src_vector(p, &inst->Src[0], fs); |
||
738 | src1 = src_vector(p, &inst->Src[1], fs); |
||
739 | src2 = src_vector(p, &inst->Src[2], fs); |
||
740 | flags = get_result_flags(inst); |
||
741 | tmp = i915_get_utemp(p); |
||
742 | |||
743 | /* b*a + c*(1-a) |
||
744 | * |
||
745 | * b*a + c - ca |
||
746 | * |
||
747 | * tmp = b*a + c, |
||
748 | * result = (-c)*a + tmp |
||
749 | */ |
||
750 | i915_emit_arith(p, A0_MAD, tmp, |
||
751 | flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); |
||
752 | |||
753 | i915_emit_arith(p, A0_MAD, |
||
754 | get_result_vector(p, &inst->Dst[0]), |
||
755 | flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); |
||
756 | break; |
||
757 | |||
758 | case TGSI_OPCODE_MAD: |
||
759 | emit_simple_arith(p, inst, A0_MAD, 3, fs); |
||
760 | break; |
||
761 | |||
762 | case TGSI_OPCODE_MAX: |
||
763 | emit_simple_arith(p, inst, A0_MAX, 2, fs); |
||
764 | break; |
||
765 | |||
766 | case TGSI_OPCODE_MIN: |
||
767 | emit_simple_arith(p, inst, A0_MIN, 2, fs); |
||
768 | break; |
||
769 | |||
770 | case TGSI_OPCODE_MOV: |
||
771 | emit_simple_arith(p, inst, A0_MOV, 1, fs); |
||
772 | break; |
||
773 | |||
774 | case TGSI_OPCODE_MUL: |
||
775 | emit_simple_arith(p, inst, A0_MUL, 2, fs); |
||
776 | break; |
||
777 | |||
778 | case TGSI_OPCODE_NOP: |
||
779 | break; |
||
780 | |||
781 | case TGSI_OPCODE_POW: |
||
782 | src0 = src_vector(p, &inst->Src[0], fs); |
||
783 | src1 = src_vector(p, &inst->Src[1], fs); |
||
784 | tmp = i915_get_utemp(p); |
||
785 | flags = get_result_flags(inst); |
||
786 | |||
787 | /* XXX: masking on intermediate values, here and elsewhere. |
||
788 | */ |
||
789 | i915_emit_arith(p, |
||
790 | A0_LOG, |
||
791 | tmp, A0_DEST_CHANNEL_X, 0, |
||
792 | swizzle(src0, X, X, X, X), 0, 0); |
||
793 | |||
794 | i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); |
||
795 | |||
796 | i915_emit_arith(p, |
||
797 | A0_EXP, |
||
798 | get_result_vector(p, &inst->Dst[0]), |
||
799 | flags, 0, swizzle(tmp, X, X, X, X), 0, 0); |
||
800 | break; |
||
801 | |||
802 | case TGSI_OPCODE_RET: |
||
803 | /* XXX: no-op? */ |
||
804 | break; |
||
805 | |||
806 | case TGSI_OPCODE_RCP: |
||
807 | src0 = src_vector(p, &inst->Src[0], fs); |
||
808 | |||
809 | i915_emit_arith(p, |
||
810 | A0_RCP, |
||
811 | get_result_vector(p, &inst->Dst[0]), |
||
812 | get_result_flags(inst), 0, |
||
813 | swizzle(src0, X, X, X, X), 0, 0); |
||
814 | break; |
||
815 | |||
816 | case TGSI_OPCODE_RSQ: |
||
817 | src0 = src_vector(p, &inst->Src[0], fs); |
||
818 | |||
819 | i915_emit_arith(p, |
||
820 | A0_RSQ, |
||
821 | get_result_vector(p, &inst->Dst[0]), |
||
822 | get_result_flags(inst), 0, |
||
823 | swizzle(src0, X, X, X, X), 0, 0); |
||
824 | break; |
||
825 | |||
826 | case TGSI_OPCODE_SCS: |
||
827 | src0 = src_vector(p, &inst->Src[0], fs); |
||
828 | tmp = i915_get_utemp(p); |
||
829 | |||
830 | /* |
||
831 | * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 |
||
832 | * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x |
||
833 | * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x |
||
834 | * scs.x = DP4 t1, scs_sin_constants |
||
835 | * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 |
||
836 | * scs.y = DP4 t1, scs_cos_constants |
||
837 | */ |
||
838 | i915_emit_arith(p, |
||
839 | A0_MUL, |
||
840 | tmp, A0_DEST_CHANNEL_XY, 0, |
||
841 | swizzle(src0, X, X, ONE, ONE), |
||
842 | swizzle(src0, X, ONE, ONE, ONE), 0); |
||
843 | |||
844 | i915_emit_arith(p, |
||
845 | A0_MUL, |
||
846 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
847 | swizzle(tmp, X, Y, X, Y), |
||
848 | swizzle(tmp, X, X, ONE, ONE), 0); |
||
849 | |||
850 | writemask = inst->Dst[0].Register.WriteMask; |
||
851 | |||
852 | if (writemask & TGSI_WRITEMASK_Y) { |
||
853 | uint tmp1; |
||
854 | |||
855 | if (writemask & TGSI_WRITEMASK_X) |
||
856 | tmp1 = i915_get_utemp(p); |
||
857 | else |
||
858 | tmp1 = tmp; |
||
859 | |||
860 | i915_emit_arith(p, |
||
861 | A0_MUL, |
||
862 | tmp1, A0_DEST_CHANNEL_ALL, 0, |
||
863 | swizzle(tmp, X, Y, Y, W), |
||
864 | swizzle(tmp, X, Z, ONE, ONE), 0); |
||
865 | |||
866 | i915_emit_arith(p, |
||
867 | A0_DP4, |
||
868 | get_result_vector(p, &inst->Dst[0]), |
||
869 | A0_DEST_CHANNEL_Y, 0, |
||
870 | swizzle(tmp1, W, Z, Y, X), |
||
871 | i915_emit_const4fv(p, scs_sin_constants), 0); |
||
872 | } |
||
873 | |||
874 | if (writemask & TGSI_WRITEMASK_X) { |
||
875 | i915_emit_arith(p, |
||
876 | A0_MUL, |
||
877 | tmp, A0_DEST_CHANNEL_XYZ, 0, |
||
878 | swizzle(tmp, X, X, Z, ONE), |
||
879 | swizzle(tmp, Z, ONE, ONE, ONE), 0); |
||
880 | |||
881 | i915_emit_arith(p, |
||
882 | A0_DP4, |
||
883 | get_result_vector(p, &inst->Dst[0]), |
||
884 | A0_DEST_CHANNEL_X, 0, |
||
885 | swizzle(tmp, ONE, Z, Y, X), |
||
886 | i915_emit_const4fv(p, scs_cos_constants), 0); |
||
887 | } |
||
888 | break; |
||
889 | |||
890 | case TGSI_OPCODE_SEQ: |
||
891 | /* if we're both >= and <= then we're == */ |
||
892 | src0 = src_vector(p, &inst->Src[0], fs); |
||
893 | src1 = src_vector(p, &inst->Src[1], fs); |
||
894 | tmp = i915_get_utemp(p); |
||
895 | |||
896 | i915_emit_arith(p, |
||
897 | A0_SGE, |
||
898 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
899 | src0, |
||
900 | src1, 0); |
||
901 | |||
902 | i915_emit_arith(p, |
||
903 | A0_SGE, |
||
904 | get_result_vector(p, &inst->Dst[0]), |
||
905 | A0_DEST_CHANNEL_ALL, 0, |
||
906 | src1, |
||
907 | src0, 0); |
||
908 | |||
909 | i915_emit_arith(p, |
||
910 | A0_MUL, |
||
911 | get_result_vector(p, &inst->Dst[0]), |
||
912 | A0_DEST_CHANNEL_ALL, 0, |
||
913 | get_result_vector(p, &inst->Dst[0]), |
||
914 | tmp, 0); |
||
915 | |||
916 | break; |
||
917 | |||
918 | case TGSI_OPCODE_SGE: |
||
919 | emit_simple_arith(p, inst, A0_SGE, 2, fs); |
||
920 | break; |
||
921 | |||
922 | case TGSI_OPCODE_SIN: |
||
923 | src0 = src_vector(p, &inst->Src[0], fs); |
||
924 | tmp = i915_get_utemp(p); |
||
925 | |||
926 | i915_emit_arith(p, |
||
927 | A0_MUL, |
||
928 | tmp, A0_DEST_CHANNEL_X, 0, |
||
929 | src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); |
||
930 | |||
931 | i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); |
||
932 | |||
933 | /* |
||
934 | * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 |
||
935 | * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x |
||
936 | * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x |
||
937 | * result = DP4 t1.wzyx, sin_constants |
||
938 | */ |
||
939 | i915_emit_arith(p, |
||
940 | A0_MUL, |
||
941 | tmp, A0_DEST_CHANNEL_XY, 0, |
||
942 | swizzle(tmp, X, X, ONE, ONE), |
||
943 | swizzle(tmp, X, ONE, ONE, ONE), 0); |
||
944 | |||
945 | i915_emit_arith(p, |
||
946 | A0_MUL, |
||
947 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
948 | swizzle(tmp, X, Y, X, Y), |
||
949 | swizzle(tmp, X, X, ONE, ONE), 0); |
||
950 | |||
951 | i915_emit_arith(p, |
||
952 | A0_MUL, |
||
953 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
954 | swizzle(tmp, X, Y, Y, W), |
||
955 | swizzle(tmp, X, Z, ONE, ONE), 0); |
||
956 | |||
957 | i915_emit_arith(p, |
||
958 | A0_DP4, |
||
959 | get_result_vector(p, &inst->Dst[0]), |
||
960 | get_result_flags(inst), 0, |
||
961 | swizzle(tmp, W, Z, Y, X), |
||
962 | i915_emit_const4fv(p, sin_constants), 0); |
||
963 | break; |
||
964 | |||
965 | case TGSI_OPCODE_SLE: |
||
966 | /* like SGE, but swap reg0, reg1 */ |
||
967 | emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); |
||
968 | break; |
||
969 | |||
970 | case TGSI_OPCODE_SLT: |
||
971 | emit_simple_arith(p, inst, A0_SLT, 2, fs); |
||
972 | break; |
||
973 | |||
974 | case TGSI_OPCODE_SGT: |
||
975 | /* like SLT, but swap reg0, reg1 */ |
||
976 | emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); |
||
977 | break; |
||
978 | |||
979 | case TGSI_OPCODE_SNE: |
||
980 | /* if we're < or > then we're != */ |
||
981 | src0 = src_vector(p, &inst->Src[0], fs); |
||
982 | src1 = src_vector(p, &inst->Src[1], fs); |
||
983 | tmp = i915_get_utemp(p); |
||
984 | |||
985 | i915_emit_arith(p, |
||
986 | A0_SLT, |
||
987 | tmp, |
||
988 | A0_DEST_CHANNEL_ALL, 0, |
||
989 | src0, |
||
990 | src1, 0); |
||
991 | |||
992 | i915_emit_arith(p, |
||
993 | A0_SLT, |
||
994 | get_result_vector(p, &inst->Dst[0]), |
||
995 | A0_DEST_CHANNEL_ALL, 0, |
||
996 | src1, |
||
997 | src0, 0); |
||
998 | |||
999 | i915_emit_arith(p, |
||
1000 | A0_ADD, |
||
1001 | get_result_vector(p, &inst->Dst[0]), |
||
1002 | A0_DEST_CHANNEL_ALL, 0, |
||
1003 | get_result_vector(p, &inst->Dst[0]), |
||
1004 | tmp, 0); |
||
1005 | break; |
||
1006 | |||
1007 | case TGSI_OPCODE_SSG: |
||
1008 | /* compute (src>0) - (src<0) */ |
||
1009 | src0 = src_vector(p, &inst->Src[0], fs); |
||
1010 | tmp = i915_get_utemp(p); |
||
1011 | |||
1012 | i915_emit_arith(p, |
||
1013 | A0_SLT, |
||
1014 | tmp, |
||
1015 | A0_DEST_CHANNEL_ALL, 0, |
||
1016 | src0, |
||
1017 | swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); |
||
1018 | |||
1019 | i915_emit_arith(p, |
||
1020 | A0_SLT, |
||
1021 | get_result_vector(p, &inst->Dst[0]), |
||
1022 | A0_DEST_CHANNEL_ALL, 0, |
||
1023 | swizzle(src0, ZERO, ZERO, ZERO, ZERO), |
||
1024 | src0, 0); |
||
1025 | |||
1026 | i915_emit_arith(p, |
||
1027 | A0_ADD, |
||
1028 | get_result_vector(p, &inst->Dst[0]), |
||
1029 | A0_DEST_CHANNEL_ALL, 0, |
||
1030 | get_result_vector(p, &inst->Dst[0]), |
||
1031 | negate(tmp, 1, 1, 1, 1), 0); |
||
1032 | break; |
||
1033 | |||
1034 | case TGSI_OPCODE_SUB: |
||
1035 | src0 = src_vector(p, &inst->Src[0], fs); |
||
1036 | src1 = src_vector(p, &inst->Src[1], fs); |
||
1037 | |||
1038 | i915_emit_arith(p, |
||
1039 | A0_ADD, |
||
1040 | get_result_vector(p, &inst->Dst[0]), |
||
1041 | get_result_flags(inst), 0, |
||
1042 | src0, negate(src1, 1, 1, 1, 1), 0); |
||
1043 | break; |
||
1044 | |||
1045 | case TGSI_OPCODE_TEX: |
||
1046 | emit_tex(p, inst, T0_TEXLD, fs); |
||
1047 | break; |
||
1048 | |||
1049 | case TGSI_OPCODE_TRUNC: |
||
1050 | emit_simple_arith(p, inst, A0_TRC, 1, fs); |
||
1051 | break; |
||
1052 | |||
1053 | case TGSI_OPCODE_TXB: |
||
1054 | emit_tex(p, inst, T0_TEXLDB, fs); |
||
1055 | break; |
||
1056 | |||
1057 | case TGSI_OPCODE_TXP: |
||
1058 | emit_tex(p, inst, T0_TEXLDP, fs); |
||
1059 | break; |
||
1060 | |||
1061 | case TGSI_OPCODE_XPD: |
||
1062 | /* Cross product: |
||
1063 | * result.x = src0.y * src1.z - src0.z * src1.y; |
||
1064 | * result.y = src0.z * src1.x - src0.x * src1.z; |
||
1065 | * result.z = src0.x * src1.y - src0.y * src1.x; |
||
1066 | * result.w = undef; |
||
1067 | */ |
||
1068 | src0 = src_vector(p, &inst->Src[0], fs); |
||
1069 | src1 = src_vector(p, &inst->Src[1], fs); |
||
1070 | tmp = i915_get_utemp(p); |
||
1071 | |||
1072 | i915_emit_arith(p, |
||
1073 | A0_MUL, |
||
1074 | tmp, A0_DEST_CHANNEL_ALL, 0, |
||
1075 | swizzle(src0, Z, X, Y, ONE), |
||
1076 | swizzle(src1, Y, Z, X, ONE), 0); |
||
1077 | |||
1078 | i915_emit_arith(p, |
||
1079 | A0_MAD, |
||
1080 | get_result_vector(p, &inst->Dst[0]), |
||
1081 | get_result_flags(inst), 0, |
||
1082 | swizzle(src0, Y, Z, X, ONE), |
||
1083 | swizzle(src1, Z, X, Y, ONE), |
||
1084 | negate(tmp, 1, 1, 1, 0)); |
||
1085 | break; |
||
1086 | |||
1087 | default: |
||
1088 | i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); |
||
1089 | p->error = 1; |
||
1090 | return; |
||
1091 | } |
||
1092 | |||
1093 | i915_release_utemps(p); |
||
1094 | } |
||
1095 | |||
1096 | |||
1097 | static void i915_translate_token(struct i915_fp_compile *p, |
||
1098 | const union i915_full_token *token, |
||
1099 | struct i915_fragment_shader *fs) |
||
1100 | { |
||
1101 | struct i915_fragment_shader *ifs = p->shader; |
||
1102 | switch( token->Token.Type ) { |
||
1103 | case TGSI_TOKEN_TYPE_PROPERTY: |
||
1104 | /* |
||
1105 | * We only support one cbuf, but we still need to ignore the property |
||
1106 | * correctly so we don't hit the assert at the end of the switch case. |
||
1107 | */ |
||
1108 | assert(token->FullProperty.Property.PropertyName == |
||
1109 | TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); |
||
1110 | break; |
||
1111 | |||
1112 | case TGSI_TOKEN_TYPE_DECLARATION: |
||
1113 | if (token->FullDeclaration.Declaration.File |
||
1114 | == TGSI_FILE_CONSTANT) { |
||
1115 | uint i; |
||
1116 | for (i = token->FullDeclaration.Range.First; |
||
1117 | i <= MIN2(token->FullDeclaration.Range.Last, I915_MAX_CONSTANT - 1); |
||
1118 | i++) { |
||
1119 | assert(ifs->constant_flags[i] == 0x0); |
||
1120 | ifs->constant_flags[i] = I915_CONSTFLAG_USER; |
||
1121 | ifs->num_constants = MAX2(ifs->num_constants, i + 1); |
||
1122 | } |
||
1123 | } |
||
1124 | else if (token->FullDeclaration.Declaration.File |
||
1125 | == TGSI_FILE_TEMPORARY) { |
||
1126 | uint i; |
||
1127 | for (i = token->FullDeclaration.Range.First; |
||
1128 | i <= token->FullDeclaration.Range.Last; |
||
1129 | i++) { |
||
1130 | if (i >= I915_MAX_TEMPORARY) |
||
1131 | debug_printf("Too many temps (%d)\n",i); |
||
1132 | else |
||
1133 | /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ |
||
1134 | p->temp_flag |= (1 << i); /* mark temp as used */ |
||
1135 | } |
||
1136 | } |
||
1137 | break; |
||
1138 | |||
1139 | case TGSI_TOKEN_TYPE_IMMEDIATE: |
||
1140 | { |
||
1141 | const struct tgsi_full_immediate *imm |
||
1142 | = &token->FullImmediate; |
||
1143 | const uint pos = p->num_immediates++; |
||
1144 | uint j; |
||
1145 | assert( imm->Immediate.NrTokens <= 4 + 1 ); |
||
1146 | for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { |
||
1147 | p->immediates[pos][j] = imm->u[j].Float; |
||
1148 | } |
||
1149 | } |
||
1150 | break; |
||
1151 | |||
1152 | case TGSI_TOKEN_TYPE_INSTRUCTION: |
||
1153 | if (p->first_instruction) { |
||
1154 | /* resolve location of immediates */ |
||
1155 | uint i, j; |
||
1156 | for (i = 0; i < p->num_immediates; i++) { |
||
1157 | /* find constant slot for this immediate */ |
||
1158 | for (j = 0; j < I915_MAX_CONSTANT; j++) { |
||
1159 | if (ifs->constant_flags[j] == 0x0) { |
||
1160 | memcpy(ifs->constants[j], |
||
1161 | p->immediates[i], |
||
1162 | 4 * sizeof(float)); |
||
1163 | /*printf("immediate %d maps to const %d\n", i, j);*/ |
||
1164 | ifs->constant_flags[j] = 0xf; /* all four comps used */ |
||
1165 | p->immediates_map[i] = j; |
||
1166 | ifs->num_constants = MAX2(ifs->num_constants, j + 1); |
||
1167 | break; |
||
1168 | } |
||
1169 | } |
||
1170 | } |
||
1171 | |||
1172 | p->first_instruction = FALSE; |
||
1173 | } |
||
1174 | |||
1175 | i915_translate_instruction(p, &token->FullInstruction, fs); |
||
1176 | break; |
||
1177 | |||
1178 | default: |
||
1179 | assert( 0 ); |
||
1180 | } |
||
1181 | |||
1182 | } |
||
1183 | |||
1184 | /** |
||
1185 | * Translate TGSI fragment shader into i915 hardware instructions. |
||
1186 | * \param p the translation state |
||
1187 | * \param tokens the TGSI token array |
||
1188 | */ |
||
1189 | static void |
||
1190 | i915_translate_instructions(struct i915_fp_compile *p, |
||
1191 | const struct i915_token_list *tokens, |
||
1192 | struct i915_fragment_shader *fs) |
||
1193 | { |
||
1194 | int i; |
||
1195 | for(i = 0; i |
||
1196 | i915_translate_token(p, &tokens->Tokens[i], fs); |
||
1197 | } |
||
1198 | } |
||
1199 | |||
1200 | |||
1201 | static struct i915_fp_compile * |
||
1202 | i915_init_compile(struct i915_context *i915, |
||
1203 | struct i915_fragment_shader *ifs) |
||
1204 | { |
||
1205 | struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); |
||
1206 | int i; |
||
1207 | |||
1208 | p->shader = ifs; |
||
1209 | |||
1210 | /* Put new constants at end of const buffer, growing downward. |
||
1211 | * The problem is we don't know how many user-defined constants might |
||
1212 | * be specified with pipe->set_constant_buffer(). |
||
1213 | * Should pre-scan the user's program to determine the highest-numbered |
||
1214 | * constant referenced. |
||
1215 | */ |
||
1216 | ifs->num_constants = 0; |
||
1217 | memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); |
||
1218 | |||
1219 | memset(&p->register_phases, 0, sizeof(p->register_phases)); |
||
1220 | |||
1221 | for (i = 0; i < I915_TEX_UNITS; i++) |
||
1222 | ifs->generic_mapping[i] = -1; |
||
1223 | |||
1224 | p->first_instruction = TRUE; |
||
1225 | |||
1226 | p->nr_tex_indirect = 1; /* correct? */ |
||
1227 | p->nr_tex_insn = 0; |
||
1228 | p->nr_alu_insn = 0; |
||
1229 | p->nr_decl_insn = 0; |
||
1230 | |||
1231 | p->csr = p->program; |
||
1232 | p->decl = p->declarations; |
||
1233 | p->decl_s = 0; |
||
1234 | p->decl_t = 0; |
||
1235 | p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; |
||
1236 | p->utemp_flag = ~0x7; |
||
1237 | |||
1238 | /* initialize the first program word */ |
||
1239 | *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; |
||
1240 | |||
1241 | return p; |
||
1242 | } |
||
1243 | |||
1244 | |||
1245 | /* Copy compile results to the fragment program struct and destroy the |
||
1246 | * compilation context. |
||
1247 | */ |
||
1248 | static void |
||
1249 | i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) |
||
1250 | { |
||
1251 | struct i915_fragment_shader *ifs = p->shader; |
||
1252 | unsigned long program_size = (unsigned long) (p->csr - p->program); |
||
1253 | unsigned long decl_size = (unsigned long) (p->decl - p->declarations); |
||
1254 | |||
1255 | if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) |
||
1256 | debug_printf("Exceeded max nr indirect texture lookups\n"); |
||
1257 | |||
1258 | if (p->nr_tex_insn > I915_MAX_TEX_INSN) |
||
1259 | i915_program_error(p, "Exceeded max TEX instructions"); |
||
1260 | |||
1261 | if (p->nr_alu_insn > I915_MAX_ALU_INSN) |
||
1262 | i915_program_error(p, "Exceeded max ALU instructions"); |
||
1263 | |||
1264 | if (p->nr_decl_insn > I915_MAX_DECL_INSN) |
||
1265 | i915_program_error(p, "Exceeded max DECL instructions"); |
||
1266 | |||
1267 | if (p->error) { |
||
1268 | p->NumNativeInstructions = 0; |
||
1269 | p->NumNativeAluInstructions = 0; |
||
1270 | p->NumNativeTexInstructions = 0; |
||
1271 | p->NumNativeTexIndirections = 0; |
||
1272 | |||
1273 | i915_use_passthrough_shader(ifs); |
||
1274 | } |
||
1275 | else { |
||
1276 | p->NumNativeInstructions |
||
1277 | = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; |
||
1278 | p->NumNativeAluInstructions = p->nr_alu_insn; |
||
1279 | p->NumNativeTexInstructions = p->nr_tex_insn; |
||
1280 | p->NumNativeTexIndirections = p->nr_tex_indirect; |
||
1281 | |||
1282 | /* patch in the program length */ |
||
1283 | p->declarations[0] |= program_size + decl_size - 2; |
||
1284 | |||
1285 | /* Copy compilation results to fragment program struct: |
||
1286 | */ |
||
1287 | assert(!ifs->decl); |
||
1288 | assert(!ifs->program); |
||
1289 | |||
1290 | ifs->decl |
||
1291 | = (uint *) MALLOC(decl_size * sizeof(uint)); |
||
1292 | ifs->program |
||
1293 | = (uint *) MALLOC(program_size * sizeof(uint)); |
||
1294 | |||
1295 | if (ifs->decl) { |
||
1296 | ifs->decl_len = decl_size; |
||
1297 | |||
1298 | memcpy(ifs->decl, |
||
1299 | p->declarations, |
||
1300 | decl_size * sizeof(uint)); |
||
1301 | } |
||
1302 | |||
1303 | if (ifs->program) { |
||
1304 | ifs->program_len = program_size; |
||
1305 | |||
1306 | memcpy(ifs->program, |
||
1307 | p->program, |
||
1308 | program_size * sizeof(uint)); |
||
1309 | } |
||
1310 | } |
||
1311 | |||
1312 | /* Release the compilation struct: |
||
1313 | */ |
||
1314 | FREE(p); |
||
1315 | } |
||
1316 | |||
1317 | |||
1318 | |||
1319 | |||
1320 | |||
1321 | /** |
||
1322 | * Rather than trying to intercept and jiggle depth writes during |
||
1323 | * emit, just move the value into its correct position at the end of |
||
1324 | * the program: |
||
1325 | */ |
||
1326 | static void |
||
1327 | i915_fixup_depth_write(struct i915_fp_compile *p) |
||
1328 | { |
||
1329 | /* XXX assuming pos/depth is always in output[0] */ |
||
1330 | if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { |
||
1331 | const uint depth = UREG(REG_TYPE_OD, 0); |
||
1332 | |||
1333 | i915_emit_arith(p, |
||
1334 | A0_MOV, /* opcode */ |
||
1335 | depth, /* dest reg */ |
||
1336 | A0_DEST_CHANNEL_W, /* write mask */ |
||
1337 | 0, /* saturate? */ |
||
1338 | swizzle(depth, X, Y, Z, Z), /* src0 */ |
||
1339 | 0, 0 /* src1, src2 */); |
||
1340 | } |
||
1341 | } |
||
1342 | |||
1343 | |||
1344 | void |
||
1345 | i915_translate_fragment_program( struct i915_context *i915, |
||
1346 | struct i915_fragment_shader *fs) |
||
1347 | { |
||
1348 | struct i915_fp_compile *p; |
||
1349 | const struct tgsi_token *tokens = fs->state.tokens; |
||
1350 | struct i915_token_list* i_tokens; |
||
1351 | |||
1352 | #if 0 |
||
1353 | tgsi_dump(tokens, 0); |
||
1354 | #endif |
||
1355 | |||
1356 | /* hw doesn't seem to like empty frag programs, even when the depth write |
||
1357 | * fixup gets emitted below - may that one is fishy, too? */ |
||
1358 | if (fs->info.num_instructions == 1) { |
||
1359 | i915_use_passthrough_shader(fs); |
||
1360 | |||
1361 | return; |
||
1362 | } |
||
1363 | |||
1364 | p = i915_init_compile(i915, fs); |
||
1365 | |||
1366 | i_tokens = i915_optimize(tokens); |
||
1367 | i915_translate_instructions(p, i_tokens, fs); |
||
1368 | i915_fixup_depth_write(p); |
||
1369 | |||
1370 | i915_fini_compile(i915, p); |
||
1371 | i915_optimize_free(i_tokens); |
||
1372 | |||
1373 | #if 0 |
||
1374 | i915_disassemble_program(NULL, fs->program, fs->program_len); |
||
1375 | #endif |
||
1376 | }><>>>>>=>><>=>=>0)>>=>>>>>>=>>>><>><>><>><>><>><>><>><>><> |