Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright (C) 2008 Nicolai Haehnle. |
||
3 | * |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining |
||
7 | * a copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial |
||
16 | * portions of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | /** |
||
29 | * @file |
||
30 | * |
||
31 | * Shareable transformations that transform "special" ALU instructions |
||
32 | * into ALU instructions that are supported by hardware. |
||
33 | * |
||
34 | */ |
||
35 | |||
36 | #include "radeon_program_alu.h" |
||
37 | |||
38 | #include "radeon_compiler.h" |
||
39 | #include "radeon_compiler_util.h" |
||
40 | |||
41 | |||
42 | static struct rc_instruction *emit1( |
||
43 | struct radeon_compiler * c, struct rc_instruction * after, |
||
44 | rc_opcode Opcode, struct rc_sub_instruction * base, |
||
45 | struct rc_dst_register DstReg, struct rc_src_register SrcReg) |
||
46 | { |
||
47 | struct rc_instruction *fpi = rc_insert_new_instruction(c, after); |
||
48 | |||
49 | if (base) { |
||
50 | memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); |
||
51 | } |
||
52 | |||
53 | fpi->U.I.Opcode = Opcode; |
||
54 | fpi->U.I.DstReg = DstReg; |
||
55 | fpi->U.I.SrcReg[0] = SrcReg; |
||
56 | return fpi; |
||
57 | } |
||
58 | |||
59 | static struct rc_instruction *emit2( |
||
60 | struct radeon_compiler * c, struct rc_instruction * after, |
||
61 | rc_opcode Opcode, struct rc_sub_instruction * base, |
||
62 | struct rc_dst_register DstReg, |
||
63 | struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) |
||
64 | { |
||
65 | struct rc_instruction *fpi = rc_insert_new_instruction(c, after); |
||
66 | |||
67 | if (base) { |
||
68 | memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); |
||
69 | } |
||
70 | |||
71 | fpi->U.I.Opcode = Opcode; |
||
72 | fpi->U.I.DstReg = DstReg; |
||
73 | fpi->U.I.SrcReg[0] = SrcReg0; |
||
74 | fpi->U.I.SrcReg[1] = SrcReg1; |
||
75 | return fpi; |
||
76 | } |
||
77 | |||
78 | static struct rc_instruction *emit3( |
||
79 | struct radeon_compiler * c, struct rc_instruction * after, |
||
80 | rc_opcode Opcode, struct rc_sub_instruction * base, |
||
81 | struct rc_dst_register DstReg, |
||
82 | struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, |
||
83 | struct rc_src_register SrcReg2) |
||
84 | { |
||
85 | struct rc_instruction *fpi = rc_insert_new_instruction(c, after); |
||
86 | |||
87 | if (base) { |
||
88 | memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction)); |
||
89 | } |
||
90 | |||
91 | fpi->U.I.Opcode = Opcode; |
||
92 | fpi->U.I.DstReg = DstReg; |
||
93 | fpi->U.I.SrcReg[0] = SrcReg0; |
||
94 | fpi->U.I.SrcReg[1] = SrcReg1; |
||
95 | fpi->U.I.SrcReg[2] = SrcReg2; |
||
96 | return fpi; |
||
97 | } |
||
98 | |||
99 | static struct rc_dst_register dstregtmpmask(int index, int mask) |
||
100 | { |
||
101 | struct rc_dst_register dst = {0, 0, 0}; |
||
102 | dst.File = RC_FILE_TEMPORARY; |
||
103 | dst.Index = index; |
||
104 | dst.WriteMask = mask; |
||
105 | return dst; |
||
106 | } |
||
107 | |||
108 | static const struct rc_src_register builtin_zero = { |
||
109 | .File = RC_FILE_NONE, |
||
110 | .Index = 0, |
||
111 | .Swizzle = RC_SWIZZLE_0000 |
||
112 | }; |
||
113 | static const struct rc_src_register builtin_one = { |
||
114 | .File = RC_FILE_NONE, |
||
115 | .Index = 0, |
||
116 | .Swizzle = RC_SWIZZLE_1111 |
||
117 | }; |
||
118 | |||
119 | static const struct rc_src_register builtin_half = { |
||
120 | .File = RC_FILE_NONE, |
||
121 | .Index = 0, |
||
122 | .Swizzle = RC_SWIZZLE_HHHH |
||
123 | }; |
||
124 | |||
125 | static const struct rc_src_register srcreg_undefined = { |
||
126 | .File = RC_FILE_NONE, |
||
127 | .Index = 0, |
||
128 | .Swizzle = RC_SWIZZLE_XYZW |
||
129 | }; |
||
130 | |||
131 | static struct rc_src_register srcreg(int file, int index) |
||
132 | { |
||
133 | struct rc_src_register src = srcreg_undefined; |
||
134 | src.File = file; |
||
135 | src.Index = index; |
||
136 | return src; |
||
137 | } |
||
138 | |||
139 | static struct rc_src_register srcregswz(int file, int index, int swz) |
||
140 | { |
||
141 | struct rc_src_register src = srcreg_undefined; |
||
142 | src.File = file; |
||
143 | src.Index = index; |
||
144 | src.Swizzle = swz; |
||
145 | return src; |
||
146 | } |
||
147 | |||
148 | static struct rc_src_register absolute(struct rc_src_register reg) |
||
149 | { |
||
150 | struct rc_src_register newreg = reg; |
||
151 | newreg.Abs = 1; |
||
152 | newreg.Negate = RC_MASK_NONE; |
||
153 | return newreg; |
||
154 | } |
||
155 | |||
156 | static struct rc_src_register negate(struct rc_src_register reg) |
||
157 | { |
||
158 | struct rc_src_register newreg = reg; |
||
159 | newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; |
||
160 | return newreg; |
||
161 | } |
||
162 | |||
163 | static struct rc_src_register swizzle(struct rc_src_register reg, |
||
164 | rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) |
||
165 | { |
||
166 | struct rc_src_register swizzled = reg; |
||
167 | swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); |
||
168 | return swizzled; |
||
169 | } |
||
170 | |||
171 | static struct rc_src_register swizzle_smear(struct rc_src_register reg, |
||
172 | rc_swizzle x) |
||
173 | { |
||
174 | return swizzle(reg, x, x, x, x); |
||
175 | } |
||
176 | |||
177 | static struct rc_src_register swizzle_xxxx(struct rc_src_register reg) |
||
178 | { |
||
179 | return swizzle_smear(reg, RC_SWIZZLE_X); |
||
180 | } |
||
181 | |||
182 | static struct rc_src_register swizzle_yyyy(struct rc_src_register reg) |
||
183 | { |
||
184 | return swizzle_smear(reg, RC_SWIZZLE_Y); |
||
185 | } |
||
186 | |||
187 | static struct rc_src_register swizzle_zzzz(struct rc_src_register reg) |
||
188 | { |
||
189 | return swizzle_smear(reg, RC_SWIZZLE_Z); |
||
190 | } |
||
191 | |||
192 | static struct rc_src_register swizzle_wwww(struct rc_src_register reg) |
||
193 | { |
||
194 | return swizzle_smear(reg, RC_SWIZZLE_W); |
||
195 | } |
||
196 | |||
197 | static int is_dst_safe_to_reuse(struct rc_instruction *inst) |
||
198 | { |
||
199 | const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); |
||
200 | unsigned i; |
||
201 | |||
202 | assert(info->HasDstReg); |
||
203 | |||
204 | if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) |
||
205 | return 0; |
||
206 | |||
207 | for (i = 0; i < info->NumSrcRegs; i++) { |
||
208 | if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && |
||
209 | inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) |
||
210 | return 0; |
||
211 | } |
||
212 | |||
213 | return 1; |
||
214 | } |
||
215 | |||
216 | static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, |
||
217 | struct rc_instruction *inst) |
||
218 | { |
||
219 | unsigned tmp; |
||
220 | |||
221 | if (is_dst_safe_to_reuse(inst)) |
||
222 | tmp = inst->U.I.DstReg.Index; |
||
223 | else |
||
224 | tmp = rc_find_free_temporary(c); |
||
225 | |||
226 | return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); |
||
227 | } |
||
228 | |||
229 | static void transform_ABS(struct radeon_compiler* c, |
||
230 | struct rc_instruction* inst) |
||
231 | { |
||
232 | struct rc_src_register src = inst->U.I.SrcReg[0]; |
||
233 | src.Abs = 1; |
||
234 | src.Negate = RC_MASK_NONE; |
||
235 | emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src); |
||
236 | rc_remove_instruction(inst); |
||
237 | } |
||
238 | |||
239 | static void transform_CEIL(struct radeon_compiler* c, |
||
240 | struct rc_instruction* inst) |
||
241 | { |
||
242 | /* Assuming: |
||
243 | * ceil(x) = -floor(-x) |
||
244 | * |
||
245 | * After inlining floor: |
||
246 | * ceil(x) = -(-x-frac(-x)) |
||
247 | * |
||
248 | * After simplification: |
||
249 | * ceil(x) = x+frac(-x) |
||
250 | */ |
||
251 | |||
252 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
253 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); |
||
254 | emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg, |
||
255 | inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); |
||
256 | rc_remove_instruction(inst); |
||
257 | } |
||
258 | |||
259 | static void transform_CLAMP(struct radeon_compiler *c, |
||
260 | struct rc_instruction *inst) |
||
261 | { |
||
262 | /* CLAMP dst, src, min, max |
||
263 | * into: |
||
264 | * MIN tmp, src, max |
||
265 | * MAX dst, tmp, min |
||
266 | */ |
||
267 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
268 | emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, |
||
269 | inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); |
||
270 | emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg, |
||
271 | srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); |
||
272 | rc_remove_instruction(inst); |
||
273 | } |
||
274 | |||
275 | static void transform_DP2(struct radeon_compiler* c, |
||
276 | struct rc_instruction* inst) |
||
277 | { |
||
278 | struct rc_src_register src0 = inst->U.I.SrcReg[0]; |
||
279 | struct rc_src_register src1 = inst->U.I.SrcReg[1]; |
||
280 | src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); |
||
281 | src0.Swizzle &= ~(63 << (3 * 2)); |
||
282 | src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); |
||
283 | src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); |
||
284 | src1.Swizzle &= ~(63 << (3 * 2)); |
||
285 | src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); |
||
286 | emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1); |
||
287 | rc_remove_instruction(inst); |
||
288 | } |
||
289 | |||
290 | static void transform_DPH(struct radeon_compiler* c, |
||
291 | struct rc_instruction* inst) |
||
292 | { |
||
293 | struct rc_src_register src0 = inst->U.I.SrcReg[0]; |
||
294 | src0.Negate &= ~RC_MASK_W; |
||
295 | src0.Swizzle &= ~(7 << (3 * 3)); |
||
296 | src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); |
||
297 | emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); |
||
298 | rc_remove_instruction(inst); |
||
299 | } |
||
300 | |||
301 | /** |
||
302 | * [1, src0.y*src1.y, src0.z, src1.w] |
||
303 | * So basically MUL with lotsa swizzling. |
||
304 | */ |
||
305 | static void transform_DST(struct radeon_compiler* c, |
||
306 | struct rc_instruction* inst) |
||
307 | { |
||
308 | emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg, |
||
309 | swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), |
||
310 | swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); |
||
311 | rc_remove_instruction(inst); |
||
312 | } |
||
313 | |||
314 | static void transform_FLR(struct radeon_compiler* c, |
||
315 | struct rc_instruction* inst) |
||
316 | { |
||
317 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
318 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); |
||
319 | emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg, |
||
320 | inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); |
||
321 | rc_remove_instruction(inst); |
||
322 | } |
||
323 | |||
324 | static void transform_TRUNC(struct radeon_compiler* c, |
||
325 | struct rc_instruction* inst) |
||
326 | { |
||
327 | /* Definition of trunc: |
||
328 | * trunc(x) = (abs(x) - fract(abs(x))) * sgn(x) |
||
329 | * |
||
330 | * The multiplication by sgn(x) can be simplified using CMP: |
||
331 | * y * sgn(x) = (x < 0 ? -y : y) |
||
332 | */ |
||
333 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
334 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0])); |
||
335 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]), |
||
336 | negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); |
||
337 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0], |
||
338 | negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index)); |
||
339 | rc_remove_instruction(inst); |
||
340 | } |
||
341 | |||
342 | /** |
||
343 | * Definition of LIT (from ARB_fragment_program): |
||
344 | * |
||
345 | * tmp = VectorLoad(op0); |
||
346 | * if (tmp.x < 0) tmp.x = 0; |
||
347 | * if (tmp.y < 0) tmp.y = 0; |
||
348 | * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); |
||
349 | * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; |
||
350 | * result.x = 1.0; |
||
351 | * result.y = tmp.x; |
||
352 | * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; |
||
353 | * result.w = 1.0; |
||
354 | * |
||
355 | * The longest path of computation is the one leading to result.z, |
||
356 | * consisting of 5 operations. This implementation of LIT takes |
||
357 | * 5 slots, if the subsequent optimization passes are clever enough |
||
358 | * to pair instructions correctly. |
||
359 | */ |
||
360 | static void transform_LIT(struct radeon_compiler* c, |
||
361 | struct rc_instruction* inst) |
||
362 | { |
||
363 | unsigned int constant; |
||
364 | unsigned int constant_swizzle; |
||
365 | unsigned int temp; |
||
366 | struct rc_src_register srctemp; |
||
367 | |||
368 | constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); |
||
369 | |||
370 | if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { |
||
371 | struct rc_instruction * inst_mov; |
||
372 | |||
373 | inst_mov = emit1(c, inst, |
||
374 | RC_OPCODE_MOV, 0, inst->U.I.DstReg, |
||
375 | srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); |
||
376 | |||
377 | inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
||
378 | inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; |
||
379 | inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; |
||
380 | } |
||
381 | |||
382 | temp = inst->U.I.DstReg.Index; |
||
383 | srctemp = srcreg(RC_FILE_TEMPORARY, temp); |
||
384 | |||
385 | /* tmp.x = max(0.0, Src.x); */ |
||
386 | /* tmp.y = max(0.0, Src.y); */ |
||
387 | /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ |
||
388 | emit2(c, inst->Prev, RC_OPCODE_MAX, 0, |
||
389 | dstregtmpmask(temp, RC_MASK_XYW), |
||
390 | inst->U.I.SrcReg[0], |
||
391 | swizzle(srcreg(RC_FILE_CONSTANT, constant), |
||
392 | RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); |
||
393 | emit2(c, inst->Prev, RC_OPCODE_MIN, 0, |
||
394 | dstregtmpmask(temp, RC_MASK_Z), |
||
395 | swizzle_wwww(srctemp), |
||
396 | negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); |
||
397 | |||
398 | /* tmp.w = Pow(tmp.y, tmp.w) */ |
||
399 | emit1(c, inst->Prev, RC_OPCODE_LG2, 0, |
||
400 | dstregtmpmask(temp, RC_MASK_W), |
||
401 | swizzle_yyyy(srctemp)); |
||
402 | emit2(c, inst->Prev, RC_OPCODE_MUL, 0, |
||
403 | dstregtmpmask(temp, RC_MASK_W), |
||
404 | swizzle_wwww(srctemp), |
||
405 | swizzle_zzzz(srctemp)); |
||
406 | emit1(c, inst->Prev, RC_OPCODE_EX2, 0, |
||
407 | dstregtmpmask(temp, RC_MASK_W), |
||
408 | swizzle_wwww(srctemp)); |
||
409 | |||
410 | /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ |
||
411 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, |
||
412 | dstregtmpmask(temp, RC_MASK_Z), |
||
413 | negate(swizzle_xxxx(srctemp)), |
||
414 | swizzle_wwww(srctemp), |
||
415 | builtin_zero); |
||
416 | |||
417 | /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ |
||
418 | emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, |
||
419 | dstregtmpmask(temp, RC_MASK_XYW), |
||
420 | swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); |
||
421 | |||
422 | rc_remove_instruction(inst); |
||
423 | } |
||
424 | |||
425 | static void transform_LRP(struct radeon_compiler* c, |
||
426 | struct rc_instruction* inst) |
||
427 | { |
||
428 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
429 | |||
430 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, |
||
431 | dst, |
||
432 | inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); |
||
433 | emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, |
||
434 | inst->U.I.DstReg, |
||
435 | inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); |
||
436 | |||
437 | rc_remove_instruction(inst); |
||
438 | } |
||
439 | |||
440 | static void transform_POW(struct radeon_compiler* c, |
||
441 | struct rc_instruction* inst) |
||
442 | { |
||
443 | struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); |
||
444 | struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); |
||
445 | tempdst.WriteMask = RC_MASK_W; |
||
446 | tempsrc.Swizzle = RC_SWIZZLE_WWWW; |
||
447 | |||
448 | emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); |
||
449 | emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); |
||
450 | emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc); |
||
451 | |||
452 | rc_remove_instruction(inst); |
||
453 | } |
||
454 | |||
455 | /* dst = ROUND(src) : |
||
456 | * add = src + .5 |
||
457 | * frac = FRC(add) |
||
458 | * dst = add - frac |
||
459 | * |
||
460 | * According to the GLSL spec, the implementor can decide which way to round |
||
461 | * when the fraction is .5. We round down for .5. |
||
462 | * |
||
463 | */ |
||
464 | static void transform_ROUND(struct radeon_compiler* c, |
||
465 | struct rc_instruction* inst) |
||
466 | { |
||
467 | unsigned int mask = inst->U.I.DstReg.WriteMask; |
||
468 | unsigned int frac_index, add_index; |
||
469 | struct rc_dst_register frac_dst, add_dst; |
||
470 | struct rc_src_register frac_src, add_src; |
||
471 | |||
472 | /* add = src + .5 */ |
||
473 | add_index = rc_find_free_temporary(c); |
||
474 | add_dst = dstregtmpmask(add_index, mask); |
||
475 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0], |
||
476 | builtin_half); |
||
477 | add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index); |
||
478 | |||
479 | |||
480 | /* frac = FRC(add) */ |
||
481 | frac_index = rc_find_free_temporary(c); |
||
482 | frac_dst = dstregtmpmask(frac_index, mask); |
||
483 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src); |
||
484 | frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index); |
||
485 | |||
486 | /* dst = add - frac */ |
||
487 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, |
||
488 | add_src, negate(frac_src)); |
||
489 | rc_remove_instruction(inst); |
||
490 | } |
||
491 | |||
492 | static void transform_RSQ(struct radeon_compiler* c, |
||
493 | struct rc_instruction* inst) |
||
494 | { |
||
495 | inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); |
||
496 | } |
||
497 | |||
498 | static void transform_SEQ(struct radeon_compiler* c, |
||
499 | struct rc_instruction* inst) |
||
500 | { |
||
501 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
502 | |||
503 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
||
504 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
||
505 | negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); |
||
506 | |||
507 | rc_remove_instruction(inst); |
||
508 | } |
||
509 | |||
510 | static void transform_SFL(struct radeon_compiler* c, |
||
511 | struct rc_instruction* inst) |
||
512 | { |
||
513 | emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero); |
||
514 | rc_remove_instruction(inst); |
||
515 | } |
||
516 | |||
517 | static void transform_SGE(struct radeon_compiler* c, |
||
518 | struct rc_instruction* inst) |
||
519 | { |
||
520 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
521 | |||
522 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
||
523 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
||
524 | srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); |
||
525 | |||
526 | rc_remove_instruction(inst); |
||
527 | } |
||
528 | |||
529 | static void transform_SGT(struct radeon_compiler* c, |
||
530 | struct rc_instruction* inst) |
||
531 | { |
||
532 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
533 | |||
534 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); |
||
535 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
||
536 | srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); |
||
537 | |||
538 | rc_remove_instruction(inst); |
||
539 | } |
||
540 | |||
541 | static void transform_SLE(struct radeon_compiler* c, |
||
542 | struct rc_instruction* inst) |
||
543 | { |
||
544 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
545 | |||
546 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); |
||
547 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
||
548 | srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); |
||
549 | |||
550 | rc_remove_instruction(inst); |
||
551 | } |
||
552 | |||
553 | static void transform_SLT(struct radeon_compiler* c, |
||
554 | struct rc_instruction* inst) |
||
555 | { |
||
556 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
557 | |||
558 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
||
559 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
||
560 | srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); |
||
561 | |||
562 | rc_remove_instruction(inst); |
||
563 | } |
||
564 | |||
565 | static void transform_SNE(struct radeon_compiler* c, |
||
566 | struct rc_instruction* inst) |
||
567 | { |
||
568 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
569 | |||
570 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); |
||
571 | emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, |
||
572 | negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); |
||
573 | |||
574 | rc_remove_instruction(inst); |
||
575 | } |
||
576 | |||
577 | static void transform_SSG(struct radeon_compiler* c, |
||
578 | struct rc_instruction* inst) |
||
579 | { |
||
580 | /* result = sign(x) |
||
581 | * |
||
582 | * CMP tmp0, -x, 1, 0 |
||
583 | * CMP tmp1, x, 1, 0 |
||
584 | * ADD result, tmp0, -tmp1; |
||
585 | */ |
||
586 | struct rc_dst_register dst0; |
||
587 | unsigned tmp1; |
||
588 | |||
589 | /* 0 < x */ |
||
590 | dst0 = try_to_reuse_dst(c, inst); |
||
591 | emit3(c, inst->Prev, RC_OPCODE_CMP, 0, |
||
592 | dst0, |
||
593 | negate(inst->U.I.SrcReg[0]), |
||
594 | builtin_one, |
||
595 | builtin_zero); |
||
596 | |||
597 | /* x < 0 */ |
||
598 | tmp1 = rc_find_free_temporary(c); |
||
599 | emit3(c, inst->Prev, RC_OPCODE_CMP, 0, |
||
600 | dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), |
||
601 | inst->U.I.SrcReg[0], |
||
602 | builtin_one, |
||
603 | builtin_zero); |
||
604 | |||
605 | /* Either both are zero, or one of them is one and the other is zero. */ |
||
606 | /* result = tmp0 - tmp1 */ |
||
607 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, |
||
608 | inst->U.I.DstReg, |
||
609 | srcreg(RC_FILE_TEMPORARY, dst0.Index), |
||
610 | negate(srcreg(RC_FILE_TEMPORARY, tmp1))); |
||
611 | |||
612 | rc_remove_instruction(inst); |
||
613 | } |
||
614 | |||
615 | static void transform_SUB(struct radeon_compiler* c, |
||
616 | struct rc_instruction* inst) |
||
617 | { |
||
618 | inst->U.I.Opcode = RC_OPCODE_ADD; |
||
619 | inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); |
||
620 | } |
||
621 | |||
622 | static void transform_SWZ(struct radeon_compiler* c, |
||
623 | struct rc_instruction* inst) |
||
624 | { |
||
625 | inst->U.I.Opcode = RC_OPCODE_MOV; |
||
626 | } |
||
627 | |||
628 | static void transform_XPD(struct radeon_compiler* c, |
||
629 | struct rc_instruction* inst) |
||
630 | { |
||
631 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
632 | |||
633 | emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, |
||
634 | swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), |
||
635 | swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); |
||
636 | emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg, |
||
637 | swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), |
||
638 | swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), |
||
639 | negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); |
||
640 | |||
641 | rc_remove_instruction(inst); |
||
642 | } |
||
643 | |||
644 | |||
645 | /** |
||
646 | * Can be used as a transformation for @ref radeonClauseLocalTransform, |
||
647 | * no userData necessary. |
||
648 | * |
||
649 | * Eliminates the following ALU instructions: |
||
650 | * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD |
||
651 | * using: |
||
652 | * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP |
||
653 | * |
||
654 | * Transforms RSQ to Radeon's native RSQ by explicitly setting |
||
655 | * absolute value. |
||
656 | * |
||
657 | * @note should be applicable to R300 and R500 fragment programs. |
||
658 | */ |
||
659 | int radeonTransformALU( |
||
660 | struct radeon_compiler * c, |
||
661 | struct rc_instruction* inst, |
||
662 | void* unused) |
||
663 | { |
||
664 | switch(inst->U.I.Opcode) { |
||
665 | case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; |
||
666 | case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; |
||
667 | case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; |
||
668 | case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; |
||
669 | case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; |
||
670 | case RC_OPCODE_DST: transform_DST(c, inst); return 1; |
||
671 | case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; |
||
672 | case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; |
||
673 | case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; |
||
674 | case RC_OPCODE_POW: transform_POW(c, inst); return 1; |
||
675 | case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1; |
||
676 | case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; |
||
677 | case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; |
||
678 | case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; |
||
679 | case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; |
||
680 | case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; |
||
681 | case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; |
||
682 | case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; |
||
683 | case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; |
||
684 | case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; |
||
685 | case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; |
||
686 | case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; |
||
687 | case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1; |
||
688 | case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; |
||
689 | default: |
||
690 | return 0; |
||
691 | } |
||
692 | } |
||
693 | |||
694 | |||
695 | static void transform_r300_vertex_ABS(struct radeon_compiler* c, |
||
696 | struct rc_instruction* inst) |
||
697 | { |
||
698 | /* Note: r500 can take absolute values, but r300 cannot. */ |
||
699 | inst->U.I.Opcode = RC_OPCODE_MAX; |
||
700 | inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; |
||
701 | inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
||
702 | } |
||
703 | |||
704 | static void transform_r300_vertex_CMP(struct radeon_compiler* c, |
||
705 | struct rc_instruction* inst) |
||
706 | { |
||
707 | /* There is no decent CMP available, so let's rig one up. |
||
708 | * CMP is defined as dst = src0 < 0.0 ? src1 : src2 |
||
709 | * The following sequence consumes zero to two temps and two extra slots |
||
710 | * (the second temp and the second slot is consumed by transform_LRP), |
||
711 | * but should be equivalent: |
||
712 | * |
||
713 | * SLT tmp0, src0, 0.0 |
||
714 | * LRP dst, tmp0, src1, src2 |
||
715 | * |
||
716 | * Yes, I know, I'm a mad scientist. ~ C. & M. */ |
||
717 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
718 | |||
719 | /* SLT tmp0, src0, 0.0 */ |
||
720 | emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
||
721 | dst, |
||
722 | inst->U.I.SrcReg[0], builtin_zero); |
||
723 | |||
724 | /* LRP dst, tmp0, src1, src2 */ |
||
725 | transform_LRP(c, |
||
726 | emit3(c, inst->Prev, RC_OPCODE_LRP, 0, |
||
727 | inst->U.I.DstReg, |
||
728 | srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); |
||
729 | |||
730 | rc_remove_instruction(inst); |
||
731 | } |
||
732 | |||
733 | static void transform_r300_vertex_DP2(struct radeon_compiler* c, |
||
734 | struct rc_instruction* inst) |
||
735 | { |
||
736 | struct rc_instruction *next_inst = inst->Next; |
||
737 | transform_DP2(c, inst); |
||
738 | next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; |
||
739 | } |
||
740 | |||
741 | static void transform_r300_vertex_DP3(struct radeon_compiler* c, |
||
742 | struct rc_instruction* inst) |
||
743 | { |
||
744 | struct rc_src_register src0 = inst->U.I.SrcReg[0]; |
||
745 | struct rc_src_register src1 = inst->U.I.SrcReg[1]; |
||
746 | src0.Negate &= ~RC_MASK_W; |
||
747 | src0.Swizzle &= ~(7 << (3 * 3)); |
||
748 | src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); |
||
749 | src1.Negate &= ~RC_MASK_W; |
||
750 | src1.Swizzle &= ~(7 << (3 * 3)); |
||
751 | src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); |
||
752 | emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1); |
||
753 | rc_remove_instruction(inst); |
||
754 | } |
||
755 | |||
756 | static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, |
||
757 | struct rc_instruction* inst) |
||
758 | { |
||
759 | struct rc_dst_register dst = try_to_reuse_dst(c, inst); |
||
760 | unsigned constant_swizzle; |
||
761 | int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, |
||
762 | 0.0000000000000000001, |
||
763 | &constant_swizzle); |
||
764 | |||
765 | /* MOV dst, src */ |
||
766 | dst.WriteMask = RC_MASK_XYZW; |
||
767 | emit1(c, inst->Prev, RC_OPCODE_MOV, 0, |
||
768 | dst, |
||
769 | inst->U.I.SrcReg[0]); |
||
770 | |||
771 | /* MAX dst.y, src, 0.00...001 */ |
||
772 | emit2(c, inst->Prev, RC_OPCODE_MAX, 0, |
||
773 | dstregtmpmask(dst.Index, RC_MASK_Y), |
||
774 | srcreg(RC_FILE_TEMPORARY, dst.Index), |
||
775 | srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); |
||
776 | |||
777 | inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); |
||
778 | } |
||
779 | |||
780 | static void transform_r300_vertex_SEQ(struct radeon_compiler *c, |
||
781 | struct rc_instruction *inst) |
||
782 | { |
||
783 | /* x = y <==> x >= y && y >= x */ |
||
784 | int tmp = rc_find_free_temporary(c); |
||
785 | |||
786 | /* x <= y */ |
||
787 | emit2(c, inst->Prev, RC_OPCODE_SGE, 0, |
||
788 | dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), |
||
789 | inst->U.I.SrcReg[0], |
||
790 | inst->U.I.SrcReg[1]); |
||
791 | |||
792 | /* y <= x */ |
||
793 | emit2(c, inst->Prev, RC_OPCODE_SGE, 0, |
||
794 | inst->U.I.DstReg, |
||
795 | inst->U.I.SrcReg[1], |
||
796 | inst->U.I.SrcReg[0]); |
||
797 | |||
798 | /* x && y = x * y */ |
||
799 | emit2(c, inst->Prev, RC_OPCODE_MUL, 0, |
||
800 | inst->U.I.DstReg, |
||
801 | srcreg(RC_FILE_TEMPORARY, tmp), |
||
802 | srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); |
||
803 | |||
804 | rc_remove_instruction(inst); |
||
805 | } |
||
806 | |||
807 | static void transform_r300_vertex_SNE(struct radeon_compiler *c, |
||
808 | struct rc_instruction *inst) |
||
809 | { |
||
810 | /* x != y <==> x < y || y < x */ |
||
811 | int tmp = rc_find_free_temporary(c); |
||
812 | |||
813 | /* x < y */ |
||
814 | emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
||
815 | dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), |
||
816 | inst->U.I.SrcReg[0], |
||
817 | inst->U.I.SrcReg[1]); |
||
818 | |||
819 | /* y < x */ |
||
820 | emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
||
821 | inst->U.I.DstReg, |
||
822 | inst->U.I.SrcReg[1], |
||
823 | inst->U.I.SrcReg[0]); |
||
824 | |||
825 | /* x || y = max(x, y) */ |
||
826 | emit2(c, inst->Prev, RC_OPCODE_MAX, 0, |
||
827 | inst->U.I.DstReg, |
||
828 | srcreg(RC_FILE_TEMPORARY, tmp), |
||
829 | srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); |
||
830 | |||
831 | rc_remove_instruction(inst); |
||
832 | } |
||
833 | |||
834 | static void transform_r300_vertex_SGT(struct radeon_compiler* c, |
||
835 | struct rc_instruction* inst) |
||
836 | { |
||
837 | /* x > y <==> -x < -y */ |
||
838 | inst->U.I.Opcode = RC_OPCODE_SLT; |
||
839 | inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; |
||
840 | inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
||
841 | } |
||
842 | |||
843 | static void transform_r300_vertex_SLE(struct radeon_compiler* c, |
||
844 | struct rc_instruction* inst) |
||
845 | { |
||
846 | /* x <= y <==> -x >= -y */ |
||
847 | inst->U.I.Opcode = RC_OPCODE_SGE; |
||
848 | inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; |
||
849 | inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; |
||
850 | } |
||
851 | |||
852 | static void transform_r300_vertex_SSG(struct radeon_compiler* c, |
||
853 | struct rc_instruction* inst) |
||
854 | { |
||
855 | /* result = sign(x) |
||
856 | * |
||
857 | * SLT tmp0, 0, x; |
||
858 | * SLT tmp1, x, 0; |
||
859 | * ADD result, tmp0, -tmp1; |
||
860 | */ |
||
861 | struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); |
||
862 | unsigned tmp1; |
||
863 | |||
864 | /* 0 < x */ |
||
865 | dst0 = try_to_reuse_dst(c, inst); |
||
866 | emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
||
867 | dst0, |
||
868 | builtin_zero, |
||
869 | inst->U.I.SrcReg[0]); |
||
870 | |||
871 | /* x < 0 */ |
||
872 | tmp1 = rc_find_free_temporary(c); |
||
873 | emit2(c, inst->Prev, RC_OPCODE_SLT, 0, |
||
874 | dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), |
||
875 | inst->U.I.SrcReg[0], |
||
876 | builtin_zero); |
||
877 | |||
878 | /* Either both are zero, or one of them is one and the other is zero. */ |
||
879 | /* result = tmp0 - tmp1 */ |
||
880 | emit2(c, inst->Prev, RC_OPCODE_ADD, 0, |
||
881 | inst->U.I.DstReg, |
||
882 | srcreg(RC_FILE_TEMPORARY, dst0.Index), |
||
883 | negate(srcreg(RC_FILE_TEMPORARY, tmp1))); |
||
884 | |||
885 | rc_remove_instruction(inst); |
||
886 | } |
||
887 | |||
888 | static void transform_vertex_TRUNC(struct radeon_compiler* c, |
||
889 | struct rc_instruction* inst) |
||
890 | { |
||
891 | struct rc_instruction *next = inst->Next; |
||
892 | |||
893 | /* next->Prev is removed after each transformation and replaced |
||
894 | * by a new instruction. */ |
||
895 | transform_TRUNC(c, next->Prev); |
||
896 | transform_r300_vertex_CMP(c, next->Prev); |
||
897 | } |
||
898 | |||
899 | /** |
||
900 | * For use with rc_local_transform, this transforms non-native ALU |
||
901 | * instructions of the r300 up to r500 vertex engine. |
||
902 | */ |
||
903 | int r300_transform_vertex_alu( |
||
904 | struct radeon_compiler * c, |
||
905 | struct rc_instruction* inst, |
||
906 | void* unused) |
||
907 | { |
||
908 | switch(inst->U.I.Opcode) { |
||
909 | case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; |
||
910 | case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; |
||
911 | case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; |
||
912 | case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; |
||
913 | case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; |
||
914 | case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; |
||
915 | case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; |
||
916 | case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; |
||
917 | case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; |
||
918 | case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; |
||
919 | case RC_OPCODE_SEQ: |
||
920 | if (!c->is_r500) { |
||
921 | transform_r300_vertex_SEQ(c, inst); |
||
922 | return 1; |
||
923 | } |
||
924 | return 0; |
||
925 | case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; |
||
926 | case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; |
||
927 | case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; |
||
928 | case RC_OPCODE_SNE: |
||
929 | if (!c->is_r500) { |
||
930 | transform_r300_vertex_SNE(c, inst); |
||
931 | return 1; |
||
932 | } |
||
933 | return 0; |
||
934 | case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; |
||
935 | case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; |
||
936 | case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; |
||
937 | case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1; |
||
938 | case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; |
||
939 | default: |
||
940 | return 0; |
||
941 | } |
||
942 | } |
||
943 | |||
944 | static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) |
||
945 | { |
||
946 | static const float SinCosConsts[2][4] = { |
||
947 | { |
||
948 | 1.273239545, /* 4/PI */ |
||
949 | -0.405284735, /* -4/(PI*PI) */ |
||
950 | 3.141592654, /* PI */ |
||
951 | 0.2225 /* weight */ |
||
952 | }, |
||
953 | { |
||
954 | 0.75, |
||
955 | 0.5, |
||
956 | 0.159154943, /* 1/(2*PI) */ |
||
957 | 6.283185307 /* 2*PI */ |
||
958 | } |
||
959 | }; |
||
960 | int i; |
||
961 | |||
962 | for(i = 0; i < 2; ++i) |
||
963 | constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); |
||
964 | } |
||
965 | |||
966 | /** |
||
967 | * Approximate sin(x), where x is clamped to (-pi/2, pi/2). |
||
968 | * |
||
969 | * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } |
||
970 | * MAD tmp.x, tmp.y, |src|, tmp.x |
||
971 | * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x |
||
972 | * MAD dest, tmp.y, weight, tmp.x |
||
973 | */ |
||
974 | static void sin_approx( |
||
975 | struct radeon_compiler* c, struct rc_instruction * inst, |
||
976 | struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) |
||
977 | { |
||
978 | unsigned int tempreg = rc_find_free_temporary(c); |
||
979 | |||
980 | emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
||
981 | swizzle_xxxx(src), |
||
982 | srcreg(RC_FILE_CONSTANT, constants[0])); |
||
983 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), |
||
984 | swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
985 | absolute(swizzle_xxxx(src)), |
||
986 | swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); |
||
987 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), |
||
988 | swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
989 | absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), |
||
990 | negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); |
||
991 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, |
||
992 | swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
993 | swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), |
||
994 | swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); |
||
995 | } |
||
996 | |||
997 | /** |
||
998 | * Translate the trigonometric functions COS, SIN, and SCS |
||
999 | * using only the basic instructions |
||
1000 | * MOV, ADD, MUL, MAD, FRC |
||
1001 | */ |
||
1002 | int r300_transform_trig_simple(struct radeon_compiler* c, |
||
1003 | struct rc_instruction* inst, |
||
1004 | void* unused) |
||
1005 | { |
||
1006 | unsigned int constants[2]; |
||
1007 | unsigned int tempreg; |
||
1008 | |||
1009 | if (inst->U.I.Opcode != RC_OPCODE_COS && |
||
1010 | inst->U.I.Opcode != RC_OPCODE_SIN && |
||
1011 | inst->U.I.Opcode != RC_OPCODE_SCS) |
||
1012 | return 0; |
||
1013 | |||
1014 | tempreg = rc_find_free_temporary(c); |
||
1015 | |||
1016 | sincos_constants(c, constants); |
||
1017 | |||
1018 | if (inst->U.I.Opcode == RC_OPCODE_COS) { |
||
1019 | /* MAD tmp.x, src, 1/(2*PI), 0.75 */ |
||
1020 | /* FRC tmp.x, tmp.x */ |
||
1021 | /* MAD tmp.z, tmp.x, 2*PI, -PI */ |
||
1022 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
||
1023 | swizzle_xxxx(inst->U.I.SrcReg[0]), |
||
1024 | swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), |
||
1025 | swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); |
||
1026 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), |
||
1027 | swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); |
||
1028 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
||
1029 | swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
1030 | swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), |
||
1031 | negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); |
||
1032 | |||
1033 | sin_approx(c, inst, inst->U.I.DstReg, |
||
1034 | swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
1035 | constants); |
||
1036 | } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { |
||
1037 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
||
1038 | swizzle_xxxx(inst->U.I.SrcReg[0]), |
||
1039 | swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), |
||
1040 | swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); |
||
1041 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), |
||
1042 | swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); |
||
1043 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), |
||
1044 | swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
1045 | swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), |
||
1046 | negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); |
||
1047 | |||
1048 | sin_approx(c, inst, inst->U.I.DstReg, |
||
1049 | swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
1050 | constants); |
||
1051 | } else { |
||
1052 | struct rc_dst_register dst; |
||
1053 | |||
1054 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
||
1055 | swizzle_xxxx(inst->U.I.SrcReg[0]), |
||
1056 | swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), |
||
1057 | swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); |
||
1058 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
||
1059 | srcreg(RC_FILE_TEMPORARY, tempreg)); |
||
1060 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), |
||
1061 | srcreg(RC_FILE_TEMPORARY, tempreg), |
||
1062 | swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), |
||
1063 | negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); |
||
1064 | |||
1065 | dst = inst->U.I.DstReg; |
||
1066 | |||
1067 | dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; |
||
1068 | sin_approx(c, inst, dst, |
||
1069 | swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
1070 | constants); |
||
1071 | |||
1072 | dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; |
||
1073 | sin_approx(c, inst, dst, |
||
1074 | swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), |
||
1075 | constants); |
||
1076 | } |
||
1077 | |||
1078 | rc_remove_instruction(inst); |
||
1079 | |||
1080 | return 1; |
||
1081 | } |
||
1082 | |||
1083 | static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, |
||
1084 | struct rc_instruction *inst, |
||
1085 | unsigned srctmp) |
||
1086 | { |
||
1087 | if (inst->U.I.Opcode == RC_OPCODE_COS) { |
||
1088 | emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg, |
||
1089 | srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
||
1090 | } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { |
||
1091 | emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, |
||
1092 | inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
||
1093 | } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { |
||
1094 | struct rc_dst_register moddst = inst->U.I.DstReg; |
||
1095 | |||
1096 | if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { |
||
1097 | moddst.WriteMask = RC_MASK_X; |
||
1098 | emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst, |
||
1099 | srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
||
1100 | } |
||
1101 | if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { |
||
1102 | moddst.WriteMask = RC_MASK_Y; |
||
1103 | emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst, |
||
1104 | srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); |
||
1105 | } |
||
1106 | } |
||
1107 | |||
1108 | rc_remove_instruction(inst); |
||
1109 | } |
||
1110 | |||
1111 | |||
1112 | /** |
||
1113 | * Transform the trigonometric functions COS, SIN, and SCS |
||
1114 | * to include pre-scaling by 1/(2*PI) and taking the fractional |
||
1115 | * part, so that the input to COS and SIN is always in the range [0,1). |
||
1116 | * SCS is replaced by one COS and one SIN instruction. |
||
1117 | * |
||
1118 | * @warning This transformation implicitly changes the semantics of SIN and COS! |
||
1119 | */ |
||
1120 | int radeonTransformTrigScale(struct radeon_compiler* c, |
||
1121 | struct rc_instruction* inst, |
||
1122 | void* unused) |
||
1123 | { |
||
1124 | static const float RCP_2PI = 0.15915494309189535; |
||
1125 | unsigned int temp; |
||
1126 | unsigned int constant; |
||
1127 | unsigned int constant_swizzle; |
||
1128 | |||
1129 | if (inst->U.I.Opcode != RC_OPCODE_COS && |
||
1130 | inst->U.I.Opcode != RC_OPCODE_SIN && |
||
1131 | inst->U.I.Opcode != RC_OPCODE_SCS) |
||
1132 | return 0; |
||
1133 | |||
1134 | temp = rc_find_free_temporary(c); |
||
1135 | constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); |
||
1136 | |||
1137 | emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), |
||
1138 | swizzle_xxxx(inst->U.I.SrcReg[0]), |
||
1139 | srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); |
||
1140 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), |
||
1141 | srcreg(RC_FILE_TEMPORARY, temp)); |
||
1142 | |||
1143 | r300_transform_SIN_COS_SCS(c, inst, temp); |
||
1144 | return 1; |
||
1145 | } |
||
1146 | |||
1147 | /** |
||
1148 | * Transform the trigonometric functions COS, SIN, and SCS |
||
1149 | * so that the input to COS and SIN is always in the range [-PI, PI]. |
||
1150 | * SCS is replaced by one COS and one SIN instruction. |
||
1151 | */ |
||
1152 | int r300_transform_trig_scale_vertex(struct radeon_compiler *c, |
||
1153 | struct rc_instruction *inst, |
||
1154 | void *unused) |
||
1155 | { |
||
1156 | static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; |
||
1157 | unsigned int temp; |
||
1158 | unsigned int constant; |
||
1159 | |||
1160 | if (inst->U.I.Opcode != RC_OPCODE_COS && |
||
1161 | inst->U.I.Opcode != RC_OPCODE_SIN && |
||
1162 | inst->U.I.Opcode != RC_OPCODE_SCS) |
||
1163 | return 0; |
||
1164 | |||
1165 | /* Repeat x in the range [-PI, PI]: |
||
1166 | * |
||
1167 | * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI |
||
1168 | */ |
||
1169 | |||
1170 | temp = rc_find_free_temporary(c); |
||
1171 | constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); |
||
1172 | |||
1173 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), |
||
1174 | swizzle_xxxx(inst->U.I.SrcReg[0]), |
||
1175 | srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), |
||
1176 | srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); |
||
1177 | emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), |
||
1178 | srcreg(RC_FILE_TEMPORARY, temp)); |
||
1179 | emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), |
||
1180 | srcreg(RC_FILE_TEMPORARY, temp), |
||
1181 | srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), |
||
1182 | srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); |
||
1183 | |||
1184 | r300_transform_SIN_COS_SCS(c, inst, temp); |
||
1185 | return 1; |
||
1186 | } |
||
1187 | |||
1188 | /** |
||
1189 | * Rewrite DDX/DDY instructions to properly work with r5xx shaders. |
||
1190 | * The r5xx MDH/MDV instruction provides per-quad partial derivatives. |
||
1191 | * It takes the form A*B+C. A and C are set by setting src0. B should be -1. |
||
1192 | * |
||
1193 | * @warning This explicitly changes the form of DDX and DDY! |
||
1194 | */ |
||
1195 | |||
1196 | int radeonTransformDeriv(struct radeon_compiler* c, |
||
1197 | struct rc_instruction* inst, |
||
1198 | void* unused) |
||
1199 | { |
||
1200 | if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) |
||
1201 | return 0; |
||
1202 | |||
1203 | inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; |
||
1204 | inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; |
||
1205 | |||
1206 | return 1; |
||
1207 | } |
||
1208 | |||
1209 | /** |
||
1210 | * IF Temp[0].x -> IF Temp[0].x |
||
1211 | * ... -> ... |
||
1212 | * KILL -> KIL -abs(Temp[0].x) |
||
1213 | * ... -> ... |
||
1214 | * ENDIF -> ENDIF |
||
1215 | * |
||
1216 | * === OR === |
||
1217 | * |
||
1218 | * IF Temp[0].x -\ |
||
1219 | * KILL - > KIL -abs(Temp[0].x) |
||
1220 | * ENDIF -/ |
||
1221 | * |
||
1222 | * === OR === |
||
1223 | * |
||
1224 | * IF Temp[0].x -> IF Temp[0].x |
||
1225 | * ... -> ... |
||
1226 | * ELSE -> ELSE |
||
1227 | * ... -> ... |
||
1228 | * KILL -> KIL -abs(Temp[0].x) |
||
1229 | * ... -> ... |
||
1230 | * ENDIF -> ENDIF |
||
1231 | * |
||
1232 | * === OR === |
||
1233 | * |
||
1234 | * KILL -> KIL -none.1111 |
||
1235 | * |
||
1236 | * This needs to be done in its own pass, because it might modify the |
||
1237 | * instructions before and after KILL. |
||
1238 | */ |
||
1239 | void rc_transform_KILL(struct radeon_compiler * c, void *user) |
||
1240 | { |
||
1241 | struct rc_instruction * inst; |
||
1242 | for (inst = c->Program.Instructions.Next; |
||
1243 | inst != &c->Program.Instructions; inst = inst->Next) { |
||
1244 | struct rc_instruction * if_inst; |
||
1245 | unsigned in_if = 0; |
||
1246 | |||
1247 | if (inst->U.I.Opcode != RC_OPCODE_KILP) |
||
1248 | continue; |
||
1249 | |||
1250 | for (if_inst = inst->Prev; if_inst != &c->Program.Instructions; |
||
1251 | if_inst = if_inst->Prev) { |
||
1252 | |||
1253 | if (if_inst->U.I.Opcode == RC_OPCODE_IF) { |
||
1254 | in_if = 1; |
||
1255 | break; |
||
1256 | } |
||
1257 | } |
||
1258 | |||
1259 | inst->U.I.Opcode = RC_OPCODE_KIL; |
||
1260 | |||
1261 | if (!in_if) { |
||
1262 | inst->U.I.SrcReg[0] = negate(builtin_one); |
||
1263 | } else { |
||
1264 | /* This should work even if the KILP is inside the ELSE |
||
1265 | * block, because -0.0 is considered negative. */ |
||
1266 | inst->U.I.SrcReg[0] = |
||
1267 | negate(absolute(if_inst->U.I.SrcReg[0])); |
||
1268 | |||
1269 | if (inst->Prev->U.I.Opcode != RC_OPCODE_IF |
||
1270 | && inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { |
||
1271 | |||
1272 | /* Optimize the special case: |
||
1273 | * IF Temp[0].x |
||
1274 | * KILP |
||
1275 | * ENDIF |
||
1276 | */ |
||
1277 | |||
1278 | /* Remove IF */ |
||
1279 | rc_remove_instruction(inst->Prev); |
||
1280 | /* Remove ENDIF */ |
||
1281 | rc_remove_instruction(inst->Next); |
||
1282 | } |
||
1283 | } |
||
1284 | } |
||
1285 | } |
||
1286 | |||
1287 | int rc_force_output_alpha_to_one(struct radeon_compiler *c, |
||
1288 | struct rc_instruction *inst, void *data) |
||
1289 | { |
||
1290 | struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler*)c; |
||
1291 | const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); |
||
1292 | unsigned tmp; |
||
1293 | |||
1294 | if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT || |
||
1295 | inst->U.I.DstReg.Index == fragc->OutputDepth) |
||
1296 | return 1; |
||
1297 | |||
1298 | tmp = rc_find_free_temporary(c); |
||
1299 | |||
1300 | /* Insert MOV after inst, set alpha to 1. */ |
||
1301 | emit1(c, inst, RC_OPCODE_MOV, 0, inst->U.I.DstReg, |
||
1302 | srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1)); |
||
1303 | |||
1304 | /* Re-route the destination of inst to the source of mov. */ |
||
1305 | inst->U.I.DstReg.File = RC_FILE_TEMPORARY; |
||
1306 | inst->U.I.DstReg.Index = tmp; |
||
1307 | |||
1308 | /* Move the saturate output modifier to the MOV instruction |
||
1309 | * (for better copy propagation). */ |
||
1310 | inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode; |
||
1311 | inst->U.I.SaturateMode = RC_SATURATE_NONE; |
||
1312 | return 1; |
||
1313 | }>>>==>=>>==>>>>>==>=>=>==>><>><>><>><>>>>>>>>><>><>><>><>><>><>><>><>> |