Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2003 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | #include "i915_reg.h" |
||
29 | #include "i915_context.h" |
||
30 | #include "i915_fpc.h" |
||
31 | #include "util/u_math.h" |
||
32 | |||
33 | uint |
||
34 | i915_get_temp(struct i915_fp_compile *p) |
||
35 | { |
||
36 | int bit = ffs(~p->temp_flag); |
||
37 | if (!bit) { |
||
38 | i915_program_error(p, "i915_get_temp: out of temporaries"); |
||
39 | return 0; |
||
40 | } |
||
41 | |||
42 | p->temp_flag |= 1 << (bit - 1); |
||
43 | return bit - 1; |
||
44 | } |
||
45 | |||
46 | |||
47 | static void |
||
48 | i915_release_temp(struct i915_fp_compile *p, int reg) |
||
49 | { |
||
50 | p->temp_flag &= ~(1 << reg); |
||
51 | } |
||
52 | |||
53 | |||
54 | /** |
||
55 | * Get unpreserved temporary, a temp whose value is not preserved between |
||
56 | * PS program phases. |
||
57 | */ |
||
58 | uint |
||
59 | i915_get_utemp(struct i915_fp_compile * p) |
||
60 | { |
||
61 | int bit = ffs(~p->utemp_flag); |
||
62 | if (!bit) { |
||
63 | i915_program_error(p, "i915_get_utemp: out of temporaries"); |
||
64 | return 0; |
||
65 | } |
||
66 | |||
67 | p->utemp_flag |= 1 << (bit - 1); |
||
68 | return UREG(REG_TYPE_U, (bit - 1)); |
||
69 | } |
||
70 | |||
71 | void |
||
72 | i915_release_utemps(struct i915_fp_compile *p) |
||
73 | { |
||
74 | p->utemp_flag = ~0x7; |
||
75 | } |
||
76 | |||
77 | |||
78 | uint |
||
79 | i915_emit_decl(struct i915_fp_compile *p, |
||
80 | uint type, uint nr, uint d0_flags) |
||
81 | { |
||
82 | uint reg = UREG(type, nr); |
||
83 | |||
84 | if (type == REG_TYPE_T) { |
||
85 | if (p->decl_t & (1 << nr)) |
||
86 | return reg; |
||
87 | |||
88 | p->decl_t |= (1 << nr); |
||
89 | } |
||
90 | else if (type == REG_TYPE_S) { |
||
91 | if (p->decl_s & (1 << nr)) |
||
92 | return reg; |
||
93 | |||
94 | p->decl_s |= (1 << nr); |
||
95 | } |
||
96 | else |
||
97 | return reg; |
||
98 | |||
99 | if (p->decl< p->declarations + I915_PROGRAM_SIZE) { |
||
100 | *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); |
||
101 | *(p->decl++) = D1_MBZ; |
||
102 | *(p->decl++) = D2_MBZ; |
||
103 | } |
||
104 | else |
||
105 | i915_program_error(p, "Out of declarations"); |
||
106 | |||
107 | p->nr_decl_insn++; |
||
108 | return reg; |
||
109 | } |
||
110 | |||
111 | uint |
||
112 | i915_emit_arith(struct i915_fp_compile * p, |
||
113 | uint op, |
||
114 | uint dest, |
||
115 | uint mask, |
||
116 | uint saturate, uint src0, uint src1, uint src2) |
||
117 | { |
||
118 | uint c[3]; |
||
119 | uint nr_const = 0; |
||
120 | |||
121 | assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); |
||
122 | dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); |
||
123 | assert(dest); |
||
124 | |||
125 | if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) |
||
126 | c[nr_const++] = 0; |
||
127 | if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) |
||
128 | c[nr_const++] = 1; |
||
129 | if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) |
||
130 | c[nr_const++] = 2; |
||
131 | |||
132 | /* Recursively call this function to MOV additional const values |
||
133 | * into temporary registers. Use utemp registers for this - |
||
134 | * currently shouldn't be possible to run out, but keep an eye on |
||
135 | * this. |
||
136 | */ |
||
137 | if (nr_const > 1) { |
||
138 | uint s[3], first, i, old_utemp_flag; |
||
139 | |||
140 | s[0] = src0; |
||
141 | s[1] = src1; |
||
142 | s[2] = src2; |
||
143 | old_utemp_flag = p->utemp_flag; |
||
144 | |||
145 | first = GET_UREG_NR(s[c[0]]); |
||
146 | for (i = 1; i < nr_const; i++) { |
||
147 | if (GET_UREG_NR(s[c[i]]) != first) { |
||
148 | uint tmp = i915_get_utemp(p); |
||
149 | |||
150 | i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, |
||
151 | s[c[i]], 0, 0); |
||
152 | s[c[i]] = tmp; |
||
153 | } |
||
154 | } |
||
155 | |||
156 | src0 = s[0]; |
||
157 | src1 = s[1]; |
||
158 | src2 = s[2]; |
||
159 | p->utemp_flag = old_utemp_flag; /* restore */ |
||
160 | } |
||
161 | |||
162 | if (p->csr< p->program + I915_PROGRAM_SIZE) { |
||
163 | *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); |
||
164 | *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); |
||
165 | *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); |
||
166 | } |
||
167 | else |
||
168 | i915_program_error(p, "Out of instructions"); |
||
169 | |||
170 | if (GET_UREG_TYPE(dest) == REG_TYPE_R) |
||
171 | p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; |
||
172 | |||
173 | p->nr_alu_insn++; |
||
174 | return dest; |
||
175 | } |
||
176 | |||
177 | |||
178 | /** |
||
179 | * Emit a texture load or texkill instruction. |
||
180 | * \param dest the dest i915 register |
||
181 | * \param destmask the dest register writemask |
||
182 | * \param sampler the i915 sampler register |
||
183 | * \param coord the i915 source texcoord operand |
||
184 | * \param opcode the instruction opcode |
||
185 | */ |
||
186 | uint i915_emit_texld( struct i915_fp_compile *p, |
||
187 | uint dest, |
||
188 | uint destmask, |
||
189 | uint sampler, |
||
190 | uint coord, |
||
191 | uint opcode, |
||
192 | uint num_coord ) |
||
193 | { |
||
194 | const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); |
||
195 | |||
196 | int temp = -1; |
||
197 | uint ignore = 0; |
||
198 | |||
199 | /* Eliminate the useless texture coordinates. Otherwise we end up generating |
||
200 | * a swizzle for no reason below. */ |
||
201 | switch(num_coord) { |
||
202 | case 0: |
||
203 | ignore |= (0xf << UREG_CHANNEL_X_SHIFT); |
||
204 | /* fall-through */ |
||
205 | case 1: |
||
206 | ignore |= (0xf << UREG_CHANNEL_Y_SHIFT); |
||
207 | /* fall-through */ |
||
208 | case 2: |
||
209 | ignore |= (0xf << UREG_CHANNEL_Z_SHIFT); |
||
210 | /* fall-through */ |
||
211 | case 3: |
||
212 | ignore |= (0xf << UREG_CHANNEL_W_SHIFT); |
||
213 | } |
||
214 | |||
215 | if ( (coord & ~ignore ) != (k & ~ignore) ) { |
||
216 | /* texcoord is swizzled or negated. Need to allocate a new temporary |
||
217 | * register (a utemp / unpreserved temp) won't do. |
||
218 | */ |
||
219 | uint tempReg; |
||
220 | |||
221 | temp = i915_get_temp(p); /* get temp reg index */ |
||
222 | tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ |
||
223 | |||
224 | i915_emit_arith( p, A0_MOV, |
||
225 | tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ |
||
226 | 0, /* saturate */ |
||
227 | coord, 0, 0 ); /* src0, src1, src2 */ |
||
228 | |||
229 | /* new src texcoord is tempReg */ |
||
230 | coord = tempReg; |
||
231 | } |
||
232 | |||
233 | /* Don't worry about saturate as we only support |
||
234 | */ |
||
235 | if (destmask != A0_DEST_CHANNEL_ALL) { |
||
236 | /* if not writing to XYZW... */ |
||
237 | uint tmp = i915_get_utemp(p); |
||
238 | i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode, num_coord ); |
||
239 | i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); |
||
240 | /* XXX release utemp here? */ |
||
241 | } |
||
242 | else { |
||
243 | assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); |
||
244 | assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); |
||
245 | |||
246 | /* Output register being oC or oD defines a phase boundary */ |
||
247 | if (GET_UREG_TYPE(dest) == REG_TYPE_OC || |
||
248 | GET_UREG_TYPE(dest) == REG_TYPE_OD) |
||
249 | p->nr_tex_indirect++; |
||
250 | |||
251 | /* Reading from an r# register whose contents depend on output of the |
||
252 | * current phase defines a phase boundary. |
||
253 | */ |
||
254 | if (GET_UREG_TYPE(coord) == REG_TYPE_R && |
||
255 | p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) |
||
256 | p->nr_tex_indirect++; |
||
257 | |||
258 | if (p->csr< p->program + I915_PROGRAM_SIZE) { |
||
259 | *(p->csr++) = (opcode | |
||
260 | T0_DEST( dest ) | |
||
261 | T0_SAMPLER( sampler )); |
||
262 | |||
263 | *(p->csr++) = T1_ADDRESS_REG( coord ); |
||
264 | *(p->csr++) = T2_MBZ; |
||
265 | } |
||
266 | else |
||
267 | i915_program_error(p, "Out of instructions"); |
||
268 | |||
269 | if (GET_UREG_TYPE(dest) == REG_TYPE_R) |
||
270 | p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect; |
||
271 | |||
272 | p->nr_tex_insn++; |
||
273 | } |
||
274 | |||
275 | if (temp >= 0) |
||
276 | i915_release_temp(p, temp); |
||
277 | |||
278 | return dest; |
||
279 | } |
||
280 | |||
281 | |||
282 | uint |
||
283 | i915_emit_const1f(struct i915_fp_compile * p, float c0) |
||
284 | { |
||
285 | struct i915_fragment_shader *ifs = p->shader; |
||
286 | unsigned reg, idx; |
||
287 | |||
288 | if (c0 == 0.0) |
||
289 | return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); |
||
290 | if (c0 == 1.0) |
||
291 | return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); |
||
292 | |||
293 | for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { |
||
294 | if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) |
||
295 | continue; |
||
296 | for (idx = 0; idx < 4; idx++) { |
||
297 | if (!(ifs->constant_flags[reg] & (1 << idx)) || |
||
298 | ifs->constants[reg][idx] == c0) { |
||
299 | ifs->constants[reg][idx] = c0; |
||
300 | ifs->constant_flags[reg] |= 1 << idx; |
||
301 | if (reg + 1 > ifs->num_constants) |
||
302 | ifs->num_constants = reg + 1; |
||
303 | return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); |
||
304 | } |
||
305 | } |
||
306 | } |
||
307 | |||
308 | i915_program_error(p, "i915_emit_const1f: out of constants"); |
||
309 | return 0; |
||
310 | } |
||
311 | |||
312 | uint |
||
313 | i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) |
||
314 | { |
||
315 | struct i915_fragment_shader *ifs = p->shader; |
||
316 | unsigned reg, idx; |
||
317 | |||
318 | if (c0 == 0.0) |
||
319 | return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); |
||
320 | if (c0 == 1.0) |
||
321 | return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); |
||
322 | |||
323 | if (c1 == 0.0) |
||
324 | return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); |
||
325 | if (c1 == 1.0) |
||
326 | return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); |
||
327 | |||
328 | // XXX emit swizzle here for 0, 1, -1 and any combination thereof |
||
329 | // we can use swizzle + neg for that |
||
330 | for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { |
||
331 | if (ifs->constant_flags[reg] == 0xf || |
||
332 | ifs->constant_flags[reg] == I915_CONSTFLAG_USER) |
||
333 | continue; |
||
334 | for (idx = 0; idx < 3; idx++) { |
||
335 | if (!(ifs->constant_flags[reg] & (3 << idx))) { |
||
336 | ifs->constants[reg][idx + 0] = c0; |
||
337 | ifs->constants[reg][idx + 1] = c1; |
||
338 | ifs->constant_flags[reg] |= 3 << idx; |
||
339 | if (reg + 1 > ifs->num_constants) |
||
340 | ifs->num_constants = reg + 1; |
||
341 | return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); |
||
342 | } |
||
343 | } |
||
344 | } |
||
345 | |||
346 | i915_program_error(p, "i915_emit_const2f: out of constants"); |
||
347 | return 0; |
||
348 | } |
||
349 | |||
350 | uint |
||
351 | i915_emit_const4f(struct i915_fp_compile * p, |
||
352 | float c0, float c1, float c2, float c3) |
||
353 | { |
||
354 | struct i915_fragment_shader *ifs = p->shader; |
||
355 | unsigned reg; |
||
356 | |||
357 | // XXX emit swizzle here for 0, 1, -1 and any combination thereof |
||
358 | // we can use swizzle + neg for that |
||
359 | for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { |
||
360 | if (ifs->constant_flags[reg] == 0xf && |
||
361 | ifs->constants[reg][0] == c0 && |
||
362 | ifs->constants[reg][1] == c1 && |
||
363 | ifs->constants[reg][2] == c2 && |
||
364 | ifs->constants[reg][3] == c3) { |
||
365 | return UREG(REG_TYPE_CONST, reg); |
||
366 | } |
||
367 | else if (ifs->constant_flags[reg] == 0) { |
||
368 | |||
369 | ifs->constants[reg][0] = c0; |
||
370 | ifs->constants[reg][1] = c1; |
||
371 | ifs->constants[reg][2] = c2; |
||
372 | ifs->constants[reg][3] = c3; |
||
373 | ifs->constant_flags[reg] = 0xf; |
||
374 | if (reg + 1 > ifs->num_constants) |
||
375 | ifs->num_constants = reg + 1; |
||
376 | return UREG(REG_TYPE_CONST, reg); |
||
377 | } |
||
378 | } |
||
379 | |||
380 | i915_program_error(p, "i915_emit_const4f: out of constants"); |
||
381 | return 0; |
||
382 | } |
||
383 | |||
384 | |||
385 | uint |
||
386 | i915_emit_const4fv(struct i915_fp_compile * p, const float * c) |
||
387 | { |
||
388 | return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); |
||
389 | }>><>><>>>><>><>>>>><>><>><>><>>>>><>><>><>><>><>><>><> |