Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. |
||
3 | * Copyright (C) 2008 VMware, Inc. All Rights Reserved. |
||
4 | * Copyright © 2010 Intel Corporation |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice (including the next |
||
14 | * paragraph) shall be included in all copies or substantial portions of the |
||
15 | * Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
22 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
23 | * DEALINGS IN THE SOFTWARE. |
||
24 | */ |
||
25 | |||
26 | /** |
||
27 | * \file ir_to_mesa.cpp |
||
28 | * |
||
29 | * Translate GLSL IR to Mesa's gl_program representation. |
||
30 | */ |
||
31 | |||
32 | #include |
||
33 | #include "main/compiler.h" |
||
34 | #include "ir.h" |
||
35 | #include "ir_visitor.h" |
||
36 | #include "ir_expression_flattening.h" |
||
37 | #include "ir_uniform.h" |
||
38 | #include "glsl_types.h" |
||
39 | #include "glsl_parser_extras.h" |
||
40 | #include "../glsl/program.h" |
||
41 | #include "ir_optimization.h" |
||
42 | #include "ast.h" |
||
43 | #include "linker.h" |
||
44 | |||
45 | #include "main/mtypes.h" |
||
46 | #include "main/shaderobj.h" |
||
47 | #include "program/hash_table.h" |
||
48 | |||
49 | extern "C" { |
||
50 | #include "main/shaderapi.h" |
||
51 | #include "main/uniforms.h" |
||
52 | #include "program/prog_instruction.h" |
||
53 | #include "program/prog_optimize.h" |
||
54 | #include "program/prog_print.h" |
||
55 | #include "program/program.h" |
||
56 | #include "program/prog_parameter.h" |
||
57 | #include "program/sampler.h" |
||
58 | } |
||
59 | |||
60 | class src_reg; |
||
61 | class dst_reg; |
||
62 | |||
63 | static int swizzle_for_size(int size); |
||
64 | |||
65 | /** |
||
66 | * This struct is a corresponding struct to Mesa prog_src_register, with |
||
67 | * wider fields. |
||
68 | */ |
||
69 | class src_reg { |
||
70 | public: |
||
71 | src_reg(gl_register_file file, int index, const glsl_type *type) |
||
72 | { |
||
73 | this->file = file; |
||
74 | this->index = index; |
||
75 | if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) |
||
76 | this->swizzle = swizzle_for_size(type->vector_elements); |
||
77 | else |
||
78 | this->swizzle = SWIZZLE_XYZW; |
||
79 | this->negate = 0; |
||
80 | this->reladdr = NULL; |
||
81 | } |
||
82 | |||
83 | src_reg() |
||
84 | { |
||
85 | this->file = PROGRAM_UNDEFINED; |
||
86 | this->index = 0; |
||
87 | this->swizzle = 0; |
||
88 | this->negate = 0; |
||
89 | this->reladdr = NULL; |
||
90 | } |
||
91 | |||
92 | explicit src_reg(dst_reg reg); |
||
93 | |||
94 | gl_register_file file; /**< PROGRAM_* from Mesa */ |
||
95 | int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ |
||
96 | GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ |
||
97 | int negate; /**< NEGATE_XYZW mask from mesa */ |
||
98 | /** Register index should be offset by the integer in this reg. */ |
||
99 | src_reg *reladdr; |
||
100 | }; |
||
101 | |||
102 | class dst_reg { |
||
103 | public: |
||
104 | dst_reg(gl_register_file file, int writemask) |
||
105 | { |
||
106 | this->file = file; |
||
107 | this->index = 0; |
||
108 | this->writemask = writemask; |
||
109 | this->cond_mask = COND_TR; |
||
110 | this->reladdr = NULL; |
||
111 | } |
||
112 | |||
113 | dst_reg() |
||
114 | { |
||
115 | this->file = PROGRAM_UNDEFINED; |
||
116 | this->index = 0; |
||
117 | this->writemask = 0; |
||
118 | this->cond_mask = COND_TR; |
||
119 | this->reladdr = NULL; |
||
120 | } |
||
121 | |||
122 | explicit dst_reg(src_reg reg); |
||
123 | |||
124 | gl_register_file file; /**< PROGRAM_* from Mesa */ |
||
125 | int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ |
||
126 | int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ |
||
127 | GLuint cond_mask:4; |
||
128 | /** Register index should be offset by the integer in this reg. */ |
||
129 | src_reg *reladdr; |
||
130 | }; |
||
131 | |||
132 | src_reg::src_reg(dst_reg reg) |
||
133 | { |
||
134 | this->file = reg.file; |
||
135 | this->index = reg.index; |
||
136 | this->swizzle = SWIZZLE_XYZW; |
||
137 | this->negate = 0; |
||
138 | this->reladdr = reg.reladdr; |
||
139 | } |
||
140 | |||
141 | dst_reg::dst_reg(src_reg reg) |
||
142 | { |
||
143 | this->file = reg.file; |
||
144 | this->index = reg.index; |
||
145 | this->writemask = WRITEMASK_XYZW; |
||
146 | this->cond_mask = COND_TR; |
||
147 | this->reladdr = reg.reladdr; |
||
148 | } |
||
149 | |||
150 | class ir_to_mesa_instruction : public exec_node { |
||
151 | public: |
||
152 | /* Callers of this ralloc-based new need not call delete. It's |
||
153 | * easier to just ralloc_free 'ctx' (or any of its ancestors). */ |
||
154 | static void* operator new(size_t size, void *ctx) |
||
155 | { |
||
156 | void *node; |
||
157 | |||
158 | node = rzalloc_size(ctx, size); |
||
159 | assert(node != NULL); |
||
160 | |||
161 | return node; |
||
162 | } |
||
163 | |||
164 | enum prog_opcode op; |
||
165 | dst_reg dst; |
||
166 | src_reg src[3]; |
||
167 | /** Pointer to the ir source this tree came from for debugging */ |
||
168 | ir_instruction *ir; |
||
169 | GLboolean cond_update; |
||
170 | bool saturate; |
||
171 | int sampler; /**< sampler index */ |
||
172 | int tex_target; /**< One of TEXTURE_*_INDEX */ |
||
173 | GLboolean tex_shadow; |
||
174 | }; |
||
175 | |||
176 | class variable_storage : public exec_node { |
||
177 | public: |
||
178 | variable_storage(ir_variable *var, gl_register_file file, int index) |
||
179 | : file(file), index(index), var(var) |
||
180 | { |
||
181 | /* empty */ |
||
182 | } |
||
183 | |||
184 | gl_register_file file; |
||
185 | int index; |
||
186 | ir_variable *var; /* variable that maps to this, if any */ |
||
187 | }; |
||
188 | |||
189 | class function_entry : public exec_node { |
||
190 | public: |
||
191 | ir_function_signature *sig; |
||
192 | |||
193 | /** |
||
194 | * identifier of this function signature used by the program. |
||
195 | * |
||
196 | * At the point that Mesa instructions for function calls are |
||
197 | * generated, we don't know the address of the first instruction of |
||
198 | * the function body. So we make the BranchTarget that is called a |
||
199 | * small integer and rewrite them during set_branchtargets(). |
||
200 | */ |
||
201 | int sig_id; |
||
202 | |||
203 | /** |
||
204 | * Pointer to first instruction of the function body. |
||
205 | * |
||
206 | * Set during function body emits after main() is processed. |
||
207 | */ |
||
208 | ir_to_mesa_instruction *bgn_inst; |
||
209 | |||
210 | /** |
||
211 | * Index of the first instruction of the function body in actual |
||
212 | * Mesa IR. |
||
213 | * |
||
214 | * Set after convertion from ir_to_mesa_instruction to prog_instruction. |
||
215 | */ |
||
216 | int inst; |
||
217 | |||
218 | /** Storage for the return value. */ |
||
219 | src_reg return_reg; |
||
220 | }; |
||
221 | |||
222 | class ir_to_mesa_visitor : public ir_visitor { |
||
223 | public: |
||
224 | ir_to_mesa_visitor(); |
||
225 | ~ir_to_mesa_visitor(); |
||
226 | |||
227 | function_entry *current_function; |
||
228 | |||
229 | struct gl_context *ctx; |
||
230 | struct gl_program *prog; |
||
231 | struct gl_shader_program *shader_program; |
||
232 | struct gl_shader_compiler_options *options; |
||
233 | |||
234 | int next_temp; |
||
235 | |||
236 | variable_storage *find_variable_storage(ir_variable *var); |
||
237 | |||
238 | src_reg get_temp(const glsl_type *type); |
||
239 | void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); |
||
240 | |||
241 | src_reg src_reg_for_float(float val); |
||
242 | |||
243 | /** |
||
244 | * \name Visit methods |
||
245 | * |
||
246 | * As typical for the visitor pattern, there must be one \c visit method for |
||
247 | * each concrete subclass of \c ir_instruction. Virtual base classes within |
||
248 | * the hierarchy should not have \c visit methods. |
||
249 | */ |
||
250 | /*@{*/ |
||
251 | virtual void visit(ir_variable *); |
||
252 | virtual void visit(ir_loop *); |
||
253 | virtual void visit(ir_loop_jump *); |
||
254 | virtual void visit(ir_function_signature *); |
||
255 | virtual void visit(ir_function *); |
||
256 | virtual void visit(ir_expression *); |
||
257 | virtual void visit(ir_swizzle *); |
||
258 | virtual void visit(ir_dereference_variable *); |
||
259 | virtual void visit(ir_dereference_array *); |
||
260 | virtual void visit(ir_dereference_record *); |
||
261 | virtual void visit(ir_assignment *); |
||
262 | virtual void visit(ir_constant *); |
||
263 | virtual void visit(ir_call *); |
||
264 | virtual void visit(ir_return *); |
||
265 | virtual void visit(ir_discard *); |
||
266 | virtual void visit(ir_texture *); |
||
267 | virtual void visit(ir_if *); |
||
268 | /*@}*/ |
||
269 | |||
270 | src_reg result; |
||
271 | |||
272 | /** List of variable_storage */ |
||
273 | exec_list variables; |
||
274 | |||
275 | /** List of function_entry */ |
||
276 | exec_list function_signatures; |
||
277 | int next_signature_id; |
||
278 | |||
279 | /** List of ir_to_mesa_instruction */ |
||
280 | exec_list instructions; |
||
281 | |||
282 | ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); |
||
283 | |||
284 | ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, |
||
285 | dst_reg dst, src_reg src0); |
||
286 | |||
287 | ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, |
||
288 | dst_reg dst, src_reg src0, src_reg src1); |
||
289 | |||
290 | ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, |
||
291 | dst_reg dst, |
||
292 | src_reg src0, src_reg src1, src_reg src2); |
||
293 | |||
294 | /** |
||
295 | * Emit the correct dot-product instruction for the type of arguments |
||
296 | */ |
||
297 | ir_to_mesa_instruction * emit_dp(ir_instruction *ir, |
||
298 | dst_reg dst, |
||
299 | src_reg src0, |
||
300 | src_reg src1, |
||
301 | unsigned elements); |
||
302 | |||
303 | void emit_scalar(ir_instruction *ir, enum prog_opcode op, |
||
304 | dst_reg dst, src_reg src0); |
||
305 | |||
306 | void emit_scalar(ir_instruction *ir, enum prog_opcode op, |
||
307 | dst_reg dst, src_reg src0, src_reg src1); |
||
308 | |||
309 | void emit_scs(ir_instruction *ir, enum prog_opcode op, |
||
310 | dst_reg dst, const src_reg &src); |
||
311 | |||
312 | bool try_emit_mad(ir_expression *ir, |
||
313 | int mul_operand); |
||
314 | bool try_emit_mad_for_and_not(ir_expression *ir, |
||
315 | int mul_operand); |
||
316 | bool try_emit_sat(ir_expression *ir); |
||
317 | |||
318 | void emit_swz(ir_expression *ir); |
||
319 | |||
320 | bool process_move_condition(ir_rvalue *ir); |
||
321 | |||
322 | void copy_propagate(void); |
||
323 | |||
324 | void *mem_ctx; |
||
325 | }; |
||
326 | |||
327 | static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); |
||
328 | |||
329 | static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); |
||
330 | |||
331 | static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); |
||
332 | |||
333 | static int |
||
334 | swizzle_for_size(int size) |
||
335 | { |
||
336 | static const int size_swizzles[4] = { |
||
337 | MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), |
||
338 | MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), |
||
339 | MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), |
||
340 | MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), |
||
341 | }; |
||
342 | |||
343 | assert((size >= 1) && (size <= 4)); |
||
344 | return size_swizzles[size - 1]; |
||
345 | } |
||
346 | |||
347 | ir_to_mesa_instruction * |
||
348 | ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, |
||
349 | dst_reg dst, |
||
350 | src_reg src0, src_reg src1, src_reg src2) |
||
351 | { |
||
352 | ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); |
||
353 | int num_reladdr = 0; |
||
354 | |||
355 | /* If we have to do relative addressing, we want to load the ARL |
||
356 | * reg directly for one of the regs, and preload the other reladdr |
||
357 | * sources into temps. |
||
358 | */ |
||
359 | num_reladdr += dst.reladdr != NULL; |
||
360 | num_reladdr += src0.reladdr != NULL; |
||
361 | num_reladdr += src1.reladdr != NULL; |
||
362 | num_reladdr += src2.reladdr != NULL; |
||
363 | |||
364 | reladdr_to_temp(ir, &src2, &num_reladdr); |
||
365 | reladdr_to_temp(ir, &src1, &num_reladdr); |
||
366 | reladdr_to_temp(ir, &src0, &num_reladdr); |
||
367 | |||
368 | if (dst.reladdr) { |
||
369 | emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); |
||
370 | num_reladdr--; |
||
371 | } |
||
372 | assert(num_reladdr == 0); |
||
373 | |||
374 | inst->op = op; |
||
375 | inst->dst = dst; |
||
376 | inst->src[0] = src0; |
||
377 | inst->src[1] = src1; |
||
378 | inst->src[2] = src2; |
||
379 | inst->ir = ir; |
||
380 | |||
381 | this->instructions.push_tail(inst); |
||
382 | |||
383 | return inst; |
||
384 | } |
||
385 | |||
386 | |||
387 | ir_to_mesa_instruction * |
||
388 | ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, |
||
389 | dst_reg dst, src_reg src0, src_reg src1) |
||
390 | { |
||
391 | return emit(ir, op, dst, src0, src1, undef_src); |
||
392 | } |
||
393 | |||
394 | ir_to_mesa_instruction * |
||
395 | ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, |
||
396 | dst_reg dst, src_reg src0) |
||
397 | { |
||
398 | assert(dst.writemask != 0); |
||
399 | return emit(ir, op, dst, src0, undef_src, undef_src); |
||
400 | } |
||
401 | |||
402 | ir_to_mesa_instruction * |
||
403 | ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) |
||
404 | { |
||
405 | return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); |
||
406 | } |
||
407 | |||
408 | ir_to_mesa_instruction * |
||
409 | ir_to_mesa_visitor::emit_dp(ir_instruction *ir, |
||
410 | dst_reg dst, src_reg src0, src_reg src1, |
||
411 | unsigned elements) |
||
412 | { |
||
413 | static const gl_inst_opcode dot_opcodes[] = { |
||
414 | OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 |
||
415 | }; |
||
416 | |||
417 | return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); |
||
418 | } |
||
419 | |||
420 | /** |
||
421 | * Emits Mesa scalar opcodes to produce unique answers across channels. |
||
422 | * |
||
423 | * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X |
||
424 | * channel determines the result across all channels. So to do a vec4 |
||
425 | * of this operation, we want to emit a scalar per source channel used |
||
426 | * to produce dest channels. |
||
427 | */ |
||
428 | void |
||
429 | ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, |
||
430 | dst_reg dst, |
||
431 | src_reg orig_src0, src_reg orig_src1) |
||
432 | { |
||
433 | int i, j; |
||
434 | int done_mask = ~dst.writemask; |
||
435 | |||
436 | /* Mesa RCP is a scalar operation splatting results to all channels, |
||
437 | * like ARB_fp/vp. So emit as many RCPs as necessary to cover our |
||
438 | * dst channels. |
||
439 | */ |
||
440 | for (i = 0; i < 4; i++) { |
||
441 | GLuint this_mask = (1 << i); |
||
442 | ir_to_mesa_instruction *inst; |
||
443 | src_reg src0 = orig_src0; |
||
444 | src_reg src1 = orig_src1; |
||
445 | |||
446 | if (done_mask & this_mask) |
||
447 | continue; |
||
448 | |||
449 | GLuint src0_swiz = GET_SWZ(src0.swizzle, i); |
||
450 | GLuint src1_swiz = GET_SWZ(src1.swizzle, i); |
||
451 | for (j = i + 1; j < 4; j++) { |
||
452 | /* If there is another enabled component in the destination that is |
||
453 | * derived from the same inputs, generate its value on this pass as |
||
454 | * well. |
||
455 | */ |
||
456 | if (!(done_mask & (1 << j)) && |
||
457 | GET_SWZ(src0.swizzle, j) == src0_swiz && |
||
458 | GET_SWZ(src1.swizzle, j) == src1_swiz) { |
||
459 | this_mask |= (1 << j); |
||
460 | } |
||
461 | } |
||
462 | src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, |
||
463 | src0_swiz, src0_swiz); |
||
464 | src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, |
||
465 | src1_swiz, src1_swiz); |
||
466 | |||
467 | inst = emit(ir, op, dst, src0, src1); |
||
468 | inst->dst.writemask = this_mask; |
||
469 | done_mask |= this_mask; |
||
470 | } |
||
471 | } |
||
472 | |||
473 | void |
||
474 | ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, |
||
475 | dst_reg dst, src_reg src0) |
||
476 | { |
||
477 | src_reg undef = undef_src; |
||
478 | |||
479 | undef.swizzle = SWIZZLE_XXXX; |
||
480 | |||
481 | emit_scalar(ir, op, dst, src0, undef); |
||
482 | } |
||
483 | |||
484 | /** |
||
485 | * Emit an OPCODE_SCS instruction |
||
486 | * |
||
487 | * The \c SCS opcode functions a bit differently than the other Mesa (or |
||
488 | * ARB_fragment_program) opcodes. Instead of splatting its result across all |
||
489 | * four components of the destination, it writes one value to the \c x |
||
490 | * component and another value to the \c y component. |
||
491 | * |
||
492 | * \param ir IR instruction being processed |
||
493 | * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which |
||
494 | * value is desired. |
||
495 | * \param dst Destination register |
||
496 | * \param src Source register |
||
497 | */ |
||
498 | void |
||
499 | ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, |
||
500 | dst_reg dst, |
||
501 | const src_reg &src) |
||
502 | { |
||
503 | /* Vertex programs cannot use the SCS opcode. |
||
504 | */ |
||
505 | if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { |
||
506 | emit_scalar(ir, op, dst, src); |
||
507 | return; |
||
508 | } |
||
509 | |||
510 | const unsigned component = (op == OPCODE_SIN) ? 0 : 1; |
||
511 | const unsigned scs_mask = (1U << component); |
||
512 | int done_mask = ~dst.writemask; |
||
513 | src_reg tmp; |
||
514 | |||
515 | assert(op == OPCODE_SIN || op == OPCODE_COS); |
||
516 | |||
517 | /* If there are compnents in the destination that differ from the component |
||
518 | * that will be written by the SCS instrution, we'll need a temporary. |
||
519 | */ |
||
520 | if (scs_mask != unsigned(dst.writemask)) { |
||
521 | tmp = get_temp(glsl_type::vec4_type); |
||
522 | } |
||
523 | |||
524 | for (unsigned i = 0; i < 4; i++) { |
||
525 | unsigned this_mask = (1U << i); |
||
526 | src_reg src0 = src; |
||
527 | |||
528 | if ((done_mask & this_mask) != 0) |
||
529 | continue; |
||
530 | |||
531 | /* The source swizzle specified which component of the source generates |
||
532 | * sine / cosine for the current component in the destination. The SCS |
||
533 | * instruction requires that this value be swizzle to the X component. |
||
534 | * Replace the current swizzle with a swizzle that puts the source in |
||
535 | * the X component. |
||
536 | */ |
||
537 | unsigned src0_swiz = GET_SWZ(src.swizzle, i); |
||
538 | |||
539 | src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, |
||
540 | src0_swiz, src0_swiz); |
||
541 | for (unsigned j = i + 1; j < 4; j++) { |
||
542 | /* If there is another enabled component in the destination that is |
||
543 | * derived from the same inputs, generate its value on this pass as |
||
544 | * well. |
||
545 | */ |
||
546 | if (!(done_mask & (1 << j)) && |
||
547 | GET_SWZ(src0.swizzle, j) == src0_swiz) { |
||
548 | this_mask |= (1 << j); |
||
549 | } |
||
550 | } |
||
551 | |||
552 | if (this_mask != scs_mask) { |
||
553 | ir_to_mesa_instruction *inst; |
||
554 | dst_reg tmp_dst = dst_reg(tmp); |
||
555 | |||
556 | /* Emit the SCS instruction. |
||
557 | */ |
||
558 | inst = emit(ir, OPCODE_SCS, tmp_dst, src0); |
||
559 | inst->dst.writemask = scs_mask; |
||
560 | |||
561 | /* Move the result of the SCS instruction to the desired location in |
||
562 | * the destination. |
||
563 | */ |
||
564 | tmp.swizzle = MAKE_SWIZZLE4(component, component, |
||
565 | component, component); |
||
566 | inst = emit(ir, OPCODE_SCS, dst, tmp); |
||
567 | inst->dst.writemask = this_mask; |
||
568 | } else { |
||
569 | /* Emit the SCS instruction to write directly to the destination. |
||
570 | */ |
||
571 | ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); |
||
572 | inst->dst.writemask = scs_mask; |
||
573 | } |
||
574 | |||
575 | done_mask |= this_mask; |
||
576 | } |
||
577 | } |
||
578 | |||
579 | src_reg |
||
580 | ir_to_mesa_visitor::src_reg_for_float(float val) |
||
581 | { |
||
582 | src_reg src(PROGRAM_CONSTANT, -1, NULL); |
||
583 | |||
584 | src.index = _mesa_add_unnamed_constant(this->prog->Parameters, |
||
585 | (const gl_constant_value *)&val, 1, &src.swizzle); |
||
586 | |||
587 | return src; |
||
588 | } |
||
589 | |||
590 | static int |
||
591 | type_size(const struct glsl_type *type) |
||
592 | { |
||
593 | unsigned int i; |
||
594 | int size; |
||
595 | |||
596 | switch (type->base_type) { |
||
597 | case GLSL_TYPE_UINT: |
||
598 | case GLSL_TYPE_INT: |
||
599 | case GLSL_TYPE_FLOAT: |
||
600 | case GLSL_TYPE_BOOL: |
||
601 | if (type->is_matrix()) { |
||
602 | return type->matrix_columns; |
||
603 | } else { |
||
604 | /* Regardless of size of vector, it gets a vec4. This is bad |
||
605 | * packing for things like floats, but otherwise arrays become a |
||
606 | * mess. Hopefully a later pass over the code can pack scalars |
||
607 | * down if appropriate. |
||
608 | */ |
||
609 | return 1; |
||
610 | } |
||
611 | case GLSL_TYPE_ARRAY: |
||
612 | assert(type->length > 0); |
||
613 | return type_size(type->fields.array) * type->length; |
||
614 | case GLSL_TYPE_STRUCT: |
||
615 | size = 0; |
||
616 | for (i = 0; i < type->length; i++) { |
||
617 | size += type_size(type->fields.structure[i].type); |
||
618 | } |
||
619 | return size; |
||
620 | case GLSL_TYPE_SAMPLER: |
||
621 | /* Samplers take up one slot in UNIFORMS[], but they're baked in |
||
622 | * at link time. |
||
623 | */ |
||
624 | return 1; |
||
625 | case GLSL_TYPE_VOID: |
||
626 | case GLSL_TYPE_ERROR: |
||
627 | case GLSL_TYPE_INTERFACE: |
||
628 | assert(!"Invalid type in type_size"); |
||
629 | break; |
||
630 | } |
||
631 | |||
632 | return 0; |
||
633 | } |
||
634 | |||
635 | /** |
||
636 | * In the initial pass of codegen, we assign temporary numbers to |
||
637 | * intermediate results. (not SSA -- variable assignments will reuse |
||
638 | * storage). Actual register allocation for the Mesa VM occurs in a |
||
639 | * pass over the Mesa IR later. |
||
640 | */ |
||
641 | src_reg |
||
642 | ir_to_mesa_visitor::get_temp(const glsl_type *type) |
||
643 | { |
||
644 | src_reg src; |
||
645 | |||
646 | src.file = PROGRAM_TEMPORARY; |
||
647 | src.index = next_temp; |
||
648 | src.reladdr = NULL; |
||
649 | next_temp += type_size(type); |
||
650 | |||
651 | if (type->is_array() || type->is_record()) { |
||
652 | src.swizzle = SWIZZLE_NOOP; |
||
653 | } else { |
||
654 | src.swizzle = swizzle_for_size(type->vector_elements); |
||
655 | } |
||
656 | src.negate = 0; |
||
657 | |||
658 | return src; |
||
659 | } |
||
660 | |||
661 | variable_storage * |
||
662 | ir_to_mesa_visitor::find_variable_storage(ir_variable *var) |
||
663 | { |
||
664 | |||
665 | variable_storage *entry; |
||
666 | |||
667 | foreach_iter(exec_list_iterator, iter, this->variables) { |
||
668 | entry = (variable_storage *)iter.get(); |
||
669 | |||
670 | if (entry->var == var) |
||
671 | return entry; |
||
672 | } |
||
673 | |||
674 | return NULL; |
||
675 | } |
||
676 | |||
677 | void |
||
678 | ir_to_mesa_visitor::visit(ir_variable *ir) |
||
679 | { |
||
680 | if (strcmp(ir->name, "gl_FragCoord") == 0) { |
||
681 | struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; |
||
682 | |||
683 | fp->OriginUpperLeft = ir->origin_upper_left; |
||
684 | fp->PixelCenterInteger = ir->pixel_center_integer; |
||
685 | } |
||
686 | |||
687 | if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { |
||
688 | unsigned int i; |
||
689 | const ir_state_slot *const slots = ir->state_slots; |
||
690 | assert(ir->state_slots != NULL); |
||
691 | |||
692 | /* Check if this statevar's setup in the STATE file exactly |
||
693 | * matches how we'll want to reference it as a |
||
694 | * struct/array/whatever. If not, then we need to move it into |
||
695 | * temporary storage and hope that it'll get copy-propagated |
||
696 | * out. |
||
697 | */ |
||
698 | for (i = 0; i < ir->num_state_slots; i++) { |
||
699 | if (slots[i].swizzle != SWIZZLE_XYZW) { |
||
700 | break; |
||
701 | } |
||
702 | } |
||
703 | |||
704 | variable_storage *storage; |
||
705 | dst_reg dst; |
||
706 | if (i == ir->num_state_slots) { |
||
707 | /* We'll set the index later. */ |
||
708 | storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); |
||
709 | this->variables.push_tail(storage); |
||
710 | |||
711 | dst = undef_dst; |
||
712 | } else { |
||
713 | /* The variable_storage constructor allocates slots based on the size |
||
714 | * of the type. However, this had better match the number of state |
||
715 | * elements that we're going to copy into the new temporary. |
||
716 | */ |
||
717 | assert((int) ir->num_state_slots == type_size(ir->type)); |
||
718 | |||
719 | storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, |
||
720 | this->next_temp); |
||
721 | this->variables.push_tail(storage); |
||
722 | this->next_temp += type_size(ir->type); |
||
723 | |||
724 | dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); |
||
725 | } |
||
726 | |||
727 | |||
728 | for (unsigned int i = 0; i < ir->num_state_slots; i++) { |
||
729 | int index = _mesa_add_state_reference(this->prog->Parameters, |
||
730 | (gl_state_index *)slots[i].tokens); |
||
731 | |||
732 | if (storage->file == PROGRAM_STATE_VAR) { |
||
733 | if (storage->index == -1) { |
||
734 | storage->index = index; |
||
735 | } else { |
||
736 | assert(index == storage->index + (int)i); |
||
737 | } |
||
738 | } else { |
||
739 | src_reg src(PROGRAM_STATE_VAR, index, NULL); |
||
740 | src.swizzle = slots[i].swizzle; |
||
741 | emit(ir, OPCODE_MOV, dst, src); |
||
742 | /* even a float takes up a whole vec4 reg in a struct/array. */ |
||
743 | dst.index++; |
||
744 | } |
||
745 | } |
||
746 | |||
747 | if (storage->file == PROGRAM_TEMPORARY && |
||
748 | dst.index != storage->index + (int) ir->num_state_slots) { |
||
749 | linker_error(this->shader_program, |
||
750 | "failed to load builtin uniform `%s' " |
||
751 | "(%d/%d regs loaded)\n", |
||
752 | ir->name, dst.index - storage->index, |
||
753 | type_size(ir->type)); |
||
754 | } |
||
755 | } |
||
756 | } |
||
757 | |||
758 | void |
||
759 | ir_to_mesa_visitor::visit(ir_loop *ir) |
||
760 | { |
||
761 | ir_dereference_variable *counter = NULL; |
||
762 | |||
763 | if (ir->counter != NULL) |
||
764 | counter = new(mem_ctx) ir_dereference_variable(ir->counter); |
||
765 | |||
766 | if (ir->from != NULL) { |
||
767 | assert(ir->counter != NULL); |
||
768 | |||
769 | ir_assignment *a = |
||
770 | new(mem_ctx) ir_assignment(counter, ir->from, NULL); |
||
771 | |||
772 | a->accept(this); |
||
773 | } |
||
774 | |||
775 | emit(NULL, OPCODE_BGNLOOP); |
||
776 | |||
777 | if (ir->to) { |
||
778 | ir_expression *e = |
||
779 | new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type, |
||
780 | counter, ir->to); |
||
781 | ir_if *if_stmt = new(mem_ctx) ir_if(e); |
||
782 | |||
783 | ir_loop_jump *brk = |
||
784 | new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); |
||
785 | |||
786 | if_stmt->then_instructions.push_tail(brk); |
||
787 | |||
788 | if_stmt->accept(this); |
||
789 | } |
||
790 | |||
791 | visit_exec_list(&ir->body_instructions, this); |
||
792 | |||
793 | if (ir->increment) { |
||
794 | ir_expression *e = |
||
795 | new(mem_ctx) ir_expression(ir_binop_add, counter->type, |
||
796 | counter, ir->increment); |
||
797 | |||
798 | ir_assignment *a = |
||
799 | new(mem_ctx) ir_assignment(counter, e, NULL); |
||
800 | |||
801 | a->accept(this); |
||
802 | } |
||
803 | |||
804 | emit(NULL, OPCODE_ENDLOOP); |
||
805 | } |
||
806 | |||
807 | void |
||
808 | ir_to_mesa_visitor::visit(ir_loop_jump *ir) |
||
809 | { |
||
810 | switch (ir->mode) { |
||
811 | case ir_loop_jump::jump_break: |
||
812 | emit(NULL, OPCODE_BRK); |
||
813 | break; |
||
814 | case ir_loop_jump::jump_continue: |
||
815 | emit(NULL, OPCODE_CONT); |
||
816 | break; |
||
817 | } |
||
818 | } |
||
819 | |||
820 | |||
821 | void |
||
822 | ir_to_mesa_visitor::visit(ir_function_signature *ir) |
||
823 | { |
||
824 | assert(0); |
||
825 | (void)ir; |
||
826 | } |
||
827 | |||
828 | void |
||
829 | ir_to_mesa_visitor::visit(ir_function *ir) |
||
830 | { |
||
831 | /* Ignore function bodies other than main() -- we shouldn't see calls to |
||
832 | * them since they should all be inlined before we get to ir_to_mesa. |
||
833 | */ |
||
834 | if (strcmp(ir->name, "main") == 0) { |
||
835 | const ir_function_signature *sig; |
||
836 | exec_list empty; |
||
837 | |||
838 | sig = ir->matching_signature(&empty); |
||
839 | |||
840 | assert(sig); |
||
841 | |||
842 | foreach_iter(exec_list_iterator, iter, sig->body) { |
||
843 | ir_instruction *ir = (ir_instruction *)iter.get(); |
||
844 | |||
845 | ir->accept(this); |
||
846 | } |
||
847 | } |
||
848 | } |
||
849 | |||
850 | bool |
||
851 | ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) |
||
852 | { |
||
853 | int nonmul_operand = 1 - mul_operand; |
||
854 | src_reg a, b, c; |
||
855 | |||
856 | ir_expression *expr = ir->operands[mul_operand]->as_expression(); |
||
857 | if (!expr || expr->operation != ir_binop_mul) |
||
858 | return false; |
||
859 | |||
860 | expr->operands[0]->accept(this); |
||
861 | a = this->result; |
||
862 | expr->operands[1]->accept(this); |
||
863 | b = this->result; |
||
864 | ir->operands[nonmul_operand]->accept(this); |
||
865 | c = this->result; |
||
866 | |||
867 | this->result = get_temp(ir->type); |
||
868 | emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); |
||
869 | |||
870 | return true; |
||
871 | } |
||
872 | |||
873 | /** |
||
874 | * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) |
||
875 | * |
||
876 | * The logic values are 1.0 for true and 0.0 for false. Logical-and is |
||
877 | * implemented using multiplication, and logical-or is implemented using |
||
878 | * addition. Logical-not can be implemented as (true - x), or (1.0 - x). |
||
879 | * As result, the logical expression (a & !b) can be rewritten as: |
||
880 | * |
||
881 | * - a * !b |
||
882 | * - a * (1 - b) |
||
883 | * - (a * 1) - (a * b) |
||
884 | * - a + -(a * b) |
||
885 | * - a + (a * -b) |
||
886 | * |
||
887 | * This final expression can be implemented as a single MAD(a, -b, a) |
||
888 | * instruction. |
||
889 | */ |
||
890 | bool |
||
891 | ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) |
||
892 | { |
||
893 | const int other_operand = 1 - try_operand; |
||
894 | src_reg a, b; |
||
895 | |||
896 | ir_expression *expr = ir->operands[try_operand]->as_expression(); |
||
897 | if (!expr || expr->operation != ir_unop_logic_not) |
||
898 | return false; |
||
899 | |||
900 | ir->operands[other_operand]->accept(this); |
||
901 | a = this->result; |
||
902 | expr->operands[0]->accept(this); |
||
903 | b = this->result; |
||
904 | |||
905 | b.negate = ~b.negate; |
||
906 | |||
907 | this->result = get_temp(ir->type); |
||
908 | emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); |
||
909 | |||
910 | return true; |
||
911 | } |
||
912 | |||
913 | bool |
||
914 | ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) |
||
915 | { |
||
916 | /* Saturates were only introduced to vertex programs in |
||
917 | * NV_vertex_program3, so don't give them to drivers in the VP. |
||
918 | */ |
||
919 | if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) |
||
920 | return false; |
||
921 | |||
922 | ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); |
||
923 | if (!sat_src) |
||
924 | return false; |
||
925 | |||
926 | sat_src->accept(this); |
||
927 | src_reg src = this->result; |
||
928 | |||
929 | /* If we generated an expression instruction into a temporary in |
||
930 | * processing the saturate's operand, apply the saturate to that |
||
931 | * instruction. Otherwise, generate a MOV to do the saturate. |
||
932 | * |
||
933 | * Note that we have to be careful to only do this optimization if |
||
934 | * the instruction in question was what generated src->result. For |
||
935 | * example, ir_dereference_array might generate a MUL instruction |
||
936 | * to create the reladdr, and return us a src reg using that |
||
937 | * reladdr. That MUL result is not the value we're trying to |
||
938 | * saturate. |
||
939 | */ |
||
940 | ir_expression *sat_src_expr = sat_src->as_expression(); |
||
941 | ir_to_mesa_instruction *new_inst; |
||
942 | new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); |
||
943 | if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || |
||
944 | sat_src_expr->operation == ir_binop_add || |
||
945 | sat_src_expr->operation == ir_binop_dot)) { |
||
946 | new_inst->saturate = true; |
||
947 | } else { |
||
948 | this->result = get_temp(ir->type); |
||
949 | ir_to_mesa_instruction *inst; |
||
950 | inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); |
||
951 | inst->saturate = true; |
||
952 | } |
||
953 | |||
954 | return true; |
||
955 | } |
||
956 | |||
957 | void |
||
958 | ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, |
||
959 | src_reg *reg, int *num_reladdr) |
||
960 | { |
||
961 | if (!reg->reladdr) |
||
962 | return; |
||
963 | |||
964 | emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); |
||
965 | |||
966 | if (*num_reladdr != 1) { |
||
967 | src_reg temp = get_temp(glsl_type::vec4_type); |
||
968 | |||
969 | emit(ir, OPCODE_MOV, dst_reg(temp), *reg); |
||
970 | *reg = temp; |
||
971 | } |
||
972 | |||
973 | (*num_reladdr)--; |
||
974 | } |
||
975 | |||
976 | void |
||
977 | ir_to_mesa_visitor::emit_swz(ir_expression *ir) |
||
978 | { |
||
979 | /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. |
||
980 | * This means that each of the operands is either an immediate value of -1, |
||
981 | * 0, or 1, or is a component from one source register (possibly with |
||
982 | * negation). |
||
983 | */ |
||
984 | uint8_t components[4] = { 0 }; |
||
985 | bool negate[4] = { false }; |
||
986 | ir_variable *var = NULL; |
||
987 | |||
988 | for (unsigned i = 0; i < ir->type->vector_elements; i++) { |
||
989 | ir_rvalue *op = ir->operands[i]; |
||
990 | |||
991 | assert(op->type->is_scalar()); |
||
992 | |||
993 | while (op != NULL) { |
||
994 | switch (op->ir_type) { |
||
995 | case ir_type_constant: { |
||
996 | |||
997 | assert(op->type->is_scalar()); |
||
998 | |||
999 | const ir_constant *const c = op->as_constant(); |
||
1000 | if (c->is_one()) { |
||
1001 | components[i] = SWIZZLE_ONE; |
||
1002 | } else if (c->is_zero()) { |
||
1003 | components[i] = SWIZZLE_ZERO; |
||
1004 | } else if (c->is_negative_one()) { |
||
1005 | components[i] = SWIZZLE_ONE; |
||
1006 | negate[i] = true; |
||
1007 | } else { |
||
1008 | assert(!"SWZ constant must be 0.0 or 1.0."); |
||
1009 | } |
||
1010 | |||
1011 | op = NULL; |
||
1012 | break; |
||
1013 | } |
||
1014 | |||
1015 | case ir_type_dereference_variable: { |
||
1016 | ir_dereference_variable *const deref = |
||
1017 | (ir_dereference_variable *) op; |
||
1018 | |||
1019 | assert((var == NULL) || (deref->var == var)); |
||
1020 | components[i] = SWIZZLE_X; |
||
1021 | var = deref->var; |
||
1022 | op = NULL; |
||
1023 | break; |
||
1024 | } |
||
1025 | |||
1026 | case ir_type_expression: { |
||
1027 | ir_expression *const expr = (ir_expression *) op; |
||
1028 | |||
1029 | assert(expr->operation == ir_unop_neg); |
||
1030 | negate[i] = true; |
||
1031 | |||
1032 | op = expr->operands[0]; |
||
1033 | break; |
||
1034 | } |
||
1035 | |||
1036 | case ir_type_swizzle: { |
||
1037 | ir_swizzle *const swiz = (ir_swizzle *) op; |
||
1038 | |||
1039 | components[i] = swiz->mask.x; |
||
1040 | op = swiz->val; |
||
1041 | break; |
||
1042 | } |
||
1043 | |||
1044 | default: |
||
1045 | assert(!"Should not get here."); |
||
1046 | return; |
||
1047 | } |
||
1048 | } |
||
1049 | } |
||
1050 | |||
1051 | assert(var != NULL); |
||
1052 | |||
1053 | ir_dereference_variable *const deref = |
||
1054 | new(mem_ctx) ir_dereference_variable(var); |
||
1055 | |||
1056 | this->result.file = PROGRAM_UNDEFINED; |
||
1057 | deref->accept(this); |
||
1058 | if (this->result.file == PROGRAM_UNDEFINED) { |
||
1059 | printf("Failed to get tree for expression operand:\n"); |
||
1060 | deref->print(); |
||
1061 | printf("\n"); |
||
1062 | exit(1); |
||
1063 | } |
||
1064 | |||
1065 | src_reg src; |
||
1066 | |||
1067 | src = this->result; |
||
1068 | src.swizzle = MAKE_SWIZZLE4(components[0], |
||
1069 | components[1], |
||
1070 | components[2], |
||
1071 | components[3]); |
||
1072 | src.negate = ((unsigned(negate[0]) << 0) |
||
1073 | | (unsigned(negate[1]) << 1) |
||
1074 | | (unsigned(negate[2]) << 2) |
||
1075 | | (unsigned(negate[3]) << 3)); |
||
1076 | |||
1077 | /* Storage for our result. Ideally for an assignment we'd be using the |
||
1078 | * actual storage for the result here, instead. |
||
1079 | */ |
||
1080 | const src_reg result_src = get_temp(ir->type); |
||
1081 | dst_reg result_dst = dst_reg(result_src); |
||
1082 | |||
1083 | /* Limit writes to the channels that will be used by result_src later. |
||
1084 | * This does limit this temp's use as a temporary for multi-instruction |
||
1085 | * sequences. |
||
1086 | */ |
||
1087 | result_dst.writemask = (1 << ir->type->vector_elements) - 1; |
||
1088 | |||
1089 | emit(ir, OPCODE_SWZ, result_dst, src); |
||
1090 | this->result = result_src; |
||
1091 | } |
||
1092 | |||
1093 | void |
||
1094 | ir_to_mesa_visitor::visit(ir_expression *ir) |
||
1095 | { |
||
1096 | unsigned int operand; |
||
1097 | src_reg op[Elements(ir->operands)]; |
||
1098 | src_reg result_src; |
||
1099 | dst_reg result_dst; |
||
1100 | |||
1101 | /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) |
||
1102 | */ |
||
1103 | if (ir->operation == ir_binop_add) { |
||
1104 | if (try_emit_mad(ir, 1)) |
||
1105 | return; |
||
1106 | if (try_emit_mad(ir, 0)) |
||
1107 | return; |
||
1108 | } |
||
1109 | |||
1110 | /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) |
||
1111 | */ |
||
1112 | if (ir->operation == ir_binop_logic_and) { |
||
1113 | if (try_emit_mad_for_and_not(ir, 1)) |
||
1114 | return; |
||
1115 | if (try_emit_mad_for_and_not(ir, 0)) |
||
1116 | return; |
||
1117 | } |
||
1118 | |||
1119 | if (try_emit_sat(ir)) |
||
1120 | return; |
||
1121 | |||
1122 | if (ir->operation == ir_quadop_vector) { |
||
1123 | this->emit_swz(ir); |
||
1124 | return; |
||
1125 | } |
||
1126 | |||
1127 | for (operand = 0; operand < ir->get_num_operands(); operand++) { |
||
1128 | this->result.file = PROGRAM_UNDEFINED; |
||
1129 | ir->operands[operand]->accept(this); |
||
1130 | if (this->result.file == PROGRAM_UNDEFINED) { |
||
1131 | printf("Failed to get tree for expression operand:\n"); |
||
1132 | ir->operands[operand]->print(); |
||
1133 | printf("\n"); |
||
1134 | exit(1); |
||
1135 | } |
||
1136 | op[operand] = this->result; |
||
1137 | |||
1138 | /* Matrix expression operands should have been broken down to vector |
||
1139 | * operations already. |
||
1140 | */ |
||
1141 | assert(!ir->operands[operand]->type->is_matrix()); |
||
1142 | } |
||
1143 | |||
1144 | int vector_elements = ir->operands[0]->type->vector_elements; |
||
1145 | if (ir->operands[1]) { |
||
1146 | vector_elements = MAX2(vector_elements, |
||
1147 | ir->operands[1]->type->vector_elements); |
||
1148 | } |
||
1149 | |||
1150 | this->result.file = PROGRAM_UNDEFINED; |
||
1151 | |||
1152 | /* Storage for our result. Ideally for an assignment we'd be using |
||
1153 | * the actual storage for the result here, instead. |
||
1154 | */ |
||
1155 | result_src = get_temp(ir->type); |
||
1156 | /* convenience for the emit functions below. */ |
||
1157 | result_dst = dst_reg(result_src); |
||
1158 | /* Limit writes to the channels that will be used by result_src later. |
||
1159 | * This does limit this temp's use as a temporary for multi-instruction |
||
1160 | * sequences. |
||
1161 | */ |
||
1162 | result_dst.writemask = (1 << ir->type->vector_elements) - 1; |
||
1163 | |||
1164 | switch (ir->operation) { |
||
1165 | case ir_unop_logic_not: |
||
1166 | /* Previously 'SEQ dst, src, 0.0' was used for this. However, many |
||
1167 | * older GPUs implement SEQ using multiple instructions (i915 uses two |
||
1168 | * SGE instructions and a MUL instruction). Since our logic values are |
||
1169 | * 0.0 and 1.0, 1-x also implements !x. |
||
1170 | */ |
||
1171 | op[0].negate = ~op[0].negate; |
||
1172 | emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); |
||
1173 | break; |
||
1174 | case ir_unop_neg: |
||
1175 | op[0].negate = ~op[0].negate; |
||
1176 | result_src = op[0]; |
||
1177 | break; |
||
1178 | case ir_unop_abs: |
||
1179 | emit(ir, OPCODE_ABS, result_dst, op[0]); |
||
1180 | break; |
||
1181 | case ir_unop_sign: |
||
1182 | emit(ir, OPCODE_SSG, result_dst, op[0]); |
||
1183 | break; |
||
1184 | case ir_unop_rcp: |
||
1185 | emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); |
||
1186 | break; |
||
1187 | |||
1188 | case ir_unop_exp2: |
||
1189 | emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); |
||
1190 | break; |
||
1191 | case ir_unop_exp: |
||
1192 | case ir_unop_log: |
||
1193 | assert(!"not reached: should be handled by ir_explog_to_explog2"); |
||
1194 | break; |
||
1195 | case ir_unop_log2: |
||
1196 | emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); |
||
1197 | break; |
||
1198 | case ir_unop_sin: |
||
1199 | emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); |
||
1200 | break; |
||
1201 | case ir_unop_cos: |
||
1202 | emit_scalar(ir, OPCODE_COS, result_dst, op[0]); |
||
1203 | break; |
||
1204 | case ir_unop_sin_reduced: |
||
1205 | emit_scs(ir, OPCODE_SIN, result_dst, op[0]); |
||
1206 | break; |
||
1207 | case ir_unop_cos_reduced: |
||
1208 | emit_scs(ir, OPCODE_COS, result_dst, op[0]); |
||
1209 | break; |
||
1210 | |||
1211 | case ir_unop_dFdx: |
||
1212 | emit(ir, OPCODE_DDX, result_dst, op[0]); |
||
1213 | break; |
||
1214 | case ir_unop_dFdy: |
||
1215 | emit(ir, OPCODE_DDY, result_dst, op[0]); |
||
1216 | break; |
||
1217 | |||
1218 | case ir_unop_noise: { |
||
1219 | const enum prog_opcode opcode = |
||
1220 | prog_opcode(OPCODE_NOISE1 |
||
1221 | + (ir->operands[0]->type->vector_elements) - 1); |
||
1222 | assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); |
||
1223 | |||
1224 | emit(ir, opcode, result_dst, op[0]); |
||
1225 | break; |
||
1226 | } |
||
1227 | |||
1228 | case ir_binop_add: |
||
1229 | emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); |
||
1230 | break; |
||
1231 | case ir_binop_sub: |
||
1232 | emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); |
||
1233 | break; |
||
1234 | |||
1235 | case ir_binop_mul: |
||
1236 | emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); |
||
1237 | break; |
||
1238 | case ir_binop_div: |
||
1239 | assert(!"not reached: should be handled by ir_div_to_mul_rcp"); |
||
1240 | break; |
||
1241 | case ir_binop_mod: |
||
1242 | /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ |
||
1243 | assert(ir->type->is_integer()); |
||
1244 | emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); |
||
1245 | break; |
||
1246 | |||
1247 | case ir_binop_less: |
||
1248 | emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); |
||
1249 | break; |
||
1250 | case ir_binop_greater: |
||
1251 | emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); |
||
1252 | break; |
||
1253 | case ir_binop_lequal: |
||
1254 | emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); |
||
1255 | break; |
||
1256 | case ir_binop_gequal: |
||
1257 | emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); |
||
1258 | break; |
||
1259 | case ir_binop_equal: |
||
1260 | emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); |
||
1261 | break; |
||
1262 | case ir_binop_nequal: |
||
1263 | emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); |
||
1264 | break; |
||
1265 | case ir_binop_all_equal: |
||
1266 | /* "==" operator producing a scalar boolean. */ |
||
1267 | if (ir->operands[0]->type->is_vector() || |
||
1268 | ir->operands[1]->type->is_vector()) { |
||
1269 | src_reg temp = get_temp(glsl_type::vec4_type); |
||
1270 | emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); |
||
1271 | |||
1272 | /* After the dot-product, the value will be an integer on the |
||
1273 | * range [0,4]. Zero becomes 1.0, and positive values become zero. |
||
1274 | */ |
||
1275 | emit_dp(ir, result_dst, temp, temp, vector_elements); |
||
1276 | |||
1277 | /* Negating the result of the dot-product gives values on the range |
||
1278 | * [-4, 0]. Zero becomes 1.0, and negative values become zero. This |
||
1279 | * achieved using SGE. |
||
1280 | */ |
||
1281 | src_reg sge_src = result_src; |
||
1282 | sge_src.negate = ~sge_src.negate; |
||
1283 | emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); |
||
1284 | } else { |
||
1285 | emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); |
||
1286 | } |
||
1287 | break; |
||
1288 | case ir_binop_any_nequal: |
||
1289 | /* "!=" operator producing a scalar boolean. */ |
||
1290 | if (ir->operands[0]->type->is_vector() || |
||
1291 | ir->operands[1]->type->is_vector()) { |
||
1292 | src_reg temp = get_temp(glsl_type::vec4_type); |
||
1293 | emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); |
||
1294 | |||
1295 | /* After the dot-product, the value will be an integer on the |
||
1296 | * range [0,4]. Zero stays zero, and positive values become 1.0. |
||
1297 | */ |
||
1298 | ir_to_mesa_instruction *const dp = |
||
1299 | emit_dp(ir, result_dst, temp, temp, vector_elements); |
||
1300 | if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { |
||
1301 | /* The clamping to [0,1] can be done for free in the fragment |
||
1302 | * shader with a saturate. |
||
1303 | */ |
||
1304 | dp->saturate = true; |
||
1305 | } else { |
||
1306 | /* Negating the result of the dot-product gives values on the range |
||
1307 | * [-4, 0]. Zero stays zero, and negative values become 1.0. This |
||
1308 | * achieved using SLT. |
||
1309 | */ |
||
1310 | src_reg slt_src = result_src; |
||
1311 | slt_src.negate = ~slt_src.negate; |
||
1312 | emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); |
||
1313 | } |
||
1314 | } else { |
||
1315 | emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); |
||
1316 | } |
||
1317 | break; |
||
1318 | |||
1319 | case ir_unop_any: { |
||
1320 | assert(ir->operands[0]->type->is_vector()); |
||
1321 | |||
1322 | /* After the dot-product, the value will be an integer on the |
||
1323 | * range [0,4]. Zero stays zero, and positive values become 1.0. |
||
1324 | */ |
||
1325 | ir_to_mesa_instruction *const dp = |
||
1326 | emit_dp(ir, result_dst, op[0], op[0], |
||
1327 | ir->operands[0]->type->vector_elements); |
||
1328 | if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { |
||
1329 | /* The clamping to [0,1] can be done for free in the fragment |
||
1330 | * shader with a saturate. |
||
1331 | */ |
||
1332 | dp->saturate = true; |
||
1333 | } else { |
||
1334 | /* Negating the result of the dot-product gives values on the range |
||
1335 | * [-4, 0]. Zero stays zero, and negative values become 1.0. This |
||
1336 | * is achieved using SLT. |
||
1337 | */ |
||
1338 | src_reg slt_src = result_src; |
||
1339 | slt_src.negate = ~slt_src.negate; |
||
1340 | emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); |
||
1341 | } |
||
1342 | break; |
||
1343 | } |
||
1344 | |||
1345 | case ir_binop_logic_xor: |
||
1346 | emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); |
||
1347 | break; |
||
1348 | |||
1349 | case ir_binop_logic_or: { |
||
1350 | /* After the addition, the value will be an integer on the |
||
1351 | * range [0,2]. Zero stays zero, and positive values become 1.0. |
||
1352 | */ |
||
1353 | ir_to_mesa_instruction *add = |
||
1354 | emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); |
||
1355 | if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { |
||
1356 | /* The clamping to [0,1] can be done for free in the fragment |
||
1357 | * shader with a saturate. |
||
1358 | */ |
||
1359 | add->saturate = true; |
||
1360 | } else { |
||
1361 | /* Negating the result of the addition gives values on the range |
||
1362 | * [-2, 0]. Zero stays zero, and negative values become 1.0. This |
||
1363 | * is achieved using SLT. |
||
1364 | */ |
||
1365 | src_reg slt_src = result_src; |
||
1366 | slt_src.negate = ~slt_src.negate; |
||
1367 | emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); |
||
1368 | } |
||
1369 | break; |
||
1370 | } |
||
1371 | |||
1372 | case ir_binop_logic_and: |
||
1373 | /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ |
||
1374 | emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); |
||
1375 | break; |
||
1376 | |||
1377 | case ir_binop_dot: |
||
1378 | assert(ir->operands[0]->type->is_vector()); |
||
1379 | assert(ir->operands[0]->type == ir->operands[1]->type); |
||
1380 | emit_dp(ir, result_dst, op[0], op[1], |
||
1381 | ir->operands[0]->type->vector_elements); |
||
1382 | break; |
||
1383 | |||
1384 | case ir_unop_sqrt: |
||
1385 | /* sqrt(x) = x * rsq(x). */ |
||
1386 | emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); |
||
1387 | emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); |
||
1388 | /* For incoming channels <= 0, set the result to 0. */ |
||
1389 | op[0].negate = ~op[0].negate; |
||
1390 | emit(ir, OPCODE_CMP, result_dst, |
||
1391 | op[0], result_src, src_reg_for_float(0.0)); |
||
1392 | break; |
||
1393 | case ir_unop_rsq: |
||
1394 | emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); |
||
1395 | break; |
||
1396 | case ir_unop_i2f: |
||
1397 | case ir_unop_u2f: |
||
1398 | case ir_unop_b2f: |
||
1399 | case ir_unop_b2i: |
||
1400 | case ir_unop_i2u: |
||
1401 | case ir_unop_u2i: |
||
1402 | /* Mesa IR lacks types, ints are stored as truncated floats. */ |
||
1403 | result_src = op[0]; |
||
1404 | break; |
||
1405 | case ir_unop_f2i: |
||
1406 | case ir_unop_f2u: |
||
1407 | emit(ir, OPCODE_TRUNC, result_dst, op[0]); |
||
1408 | break; |
||
1409 | case ir_unop_f2b: |
||
1410 | case ir_unop_i2b: |
||
1411 | emit(ir, OPCODE_SNE, result_dst, |
||
1412 | op[0], src_reg_for_float(0.0)); |
||
1413 | break; |
||
1414 | case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway |
||
1415 | case ir_unop_bitcast_f2u: |
||
1416 | case ir_unop_bitcast_i2f: |
||
1417 | case ir_unop_bitcast_u2f: |
||
1418 | break; |
||
1419 | case ir_unop_trunc: |
||
1420 | emit(ir, OPCODE_TRUNC, result_dst, op[0]); |
||
1421 | break; |
||
1422 | case ir_unop_ceil: |
||
1423 | op[0].negate = ~op[0].negate; |
||
1424 | emit(ir, OPCODE_FLR, result_dst, op[0]); |
||
1425 | result_src.negate = ~result_src.negate; |
||
1426 | break; |
||
1427 | case ir_unop_floor: |
||
1428 | emit(ir, OPCODE_FLR, result_dst, op[0]); |
||
1429 | break; |
||
1430 | case ir_unop_fract: |
||
1431 | emit(ir, OPCODE_FRC, result_dst, op[0]); |
||
1432 | break; |
||
1433 | case ir_unop_pack_snorm_2x16: |
||
1434 | case ir_unop_pack_snorm_4x8: |
||
1435 | case ir_unop_pack_unorm_2x16: |
||
1436 | case ir_unop_pack_unorm_4x8: |
||
1437 | case ir_unop_pack_half_2x16: |
||
1438 | case ir_unop_unpack_snorm_2x16: |
||
1439 | case ir_unop_unpack_snorm_4x8: |
||
1440 | case ir_unop_unpack_unorm_2x16: |
||
1441 | case ir_unop_unpack_unorm_4x8: |
||
1442 | case ir_unop_unpack_half_2x16: |
||
1443 | case ir_unop_unpack_half_2x16_split_x: |
||
1444 | case ir_unop_unpack_half_2x16_split_y: |
||
1445 | case ir_binop_pack_half_2x16_split: |
||
1446 | case ir_unop_bitfield_reverse: |
||
1447 | case ir_unop_bit_count: |
||
1448 | case ir_unop_find_msb: |
||
1449 | case ir_unop_find_lsb: |
||
1450 | assert(!"not supported"); |
||
1451 | break; |
||
1452 | case ir_binop_min: |
||
1453 | emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); |
||
1454 | break; |
||
1455 | case ir_binop_max: |
||
1456 | emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); |
||
1457 | break; |
||
1458 | case ir_binop_pow: |
||
1459 | emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); |
||
1460 | break; |
||
1461 | |||
1462 | /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since |
||
1463 | * hardware backends have no way to avoid Mesa IR generation |
||
1464 | * even if they don't use it, we need to emit "something" and |
||
1465 | * continue. |
||
1466 | */ |
||
1467 | case ir_binop_lshift: |
||
1468 | case ir_binop_rshift: |
||
1469 | case ir_binop_bit_and: |
||
1470 | case ir_binop_bit_xor: |
||
1471 | case ir_binop_bit_or: |
||
1472 | emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); |
||
1473 | break; |
||
1474 | |||
1475 | case ir_unop_bit_not: |
||
1476 | case ir_unop_round_even: |
||
1477 | emit(ir, OPCODE_MOV, result_dst, op[0]); |
||
1478 | break; |
||
1479 | |||
1480 | case ir_binop_ubo_load: |
||
1481 | assert(!"not supported"); |
||
1482 | break; |
||
1483 | |||
1484 | case ir_triop_lrp: |
||
1485 | /* ir_triop_lrp operands are (x, y, a) while |
||
1486 | * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program. |
||
1487 | */ |
||
1488 | emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); |
||
1489 | break; |
||
1490 | |||
1491 | case ir_binop_vector_extract: |
||
1492 | case ir_binop_bfm: |
||
1493 | case ir_triop_bfi: |
||
1494 | case ir_triop_bitfield_extract: |
||
1495 | case ir_triop_vector_insert: |
||
1496 | case ir_quadop_bitfield_insert: |
||
1497 | assert(!"not supported"); |
||
1498 | break; |
||
1499 | |||
1500 | case ir_quadop_vector: |
||
1501 | /* This operation should have already been handled. |
||
1502 | */ |
||
1503 | assert(!"Should not get here."); |
||
1504 | break; |
||
1505 | } |
||
1506 | |||
1507 | this->result = result_src; |
||
1508 | } |
||
1509 | |||
1510 | |||
1511 | void |
||
1512 | ir_to_mesa_visitor::visit(ir_swizzle *ir) |
||
1513 | { |
||
1514 | src_reg src; |
||
1515 | int i; |
||
1516 | int swizzle[4]; |
||
1517 | |||
1518 | /* Note that this is only swizzles in expressions, not those on the left |
||
1519 | * hand side of an assignment, which do write masking. See ir_assignment |
||
1520 | * for that. |
||
1521 | */ |
||
1522 | |||
1523 | ir->val->accept(this); |
||
1524 | src = this->result; |
||
1525 | assert(src.file != PROGRAM_UNDEFINED); |
||
1526 | |||
1527 | for (i = 0; i < 4; i++) { |
||
1528 | if (i < ir->type->vector_elements) { |
||
1529 | switch (i) { |
||
1530 | case 0: |
||
1531 | swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); |
||
1532 | break; |
||
1533 | case 1: |
||
1534 | swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); |
||
1535 | break; |
||
1536 | case 2: |
||
1537 | swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); |
||
1538 | break; |
||
1539 | case 3: |
||
1540 | swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); |
||
1541 | break; |
||
1542 | } |
||
1543 | } else { |
||
1544 | /* If the type is smaller than a vec4, replicate the last |
||
1545 | * channel out. |
||
1546 | */ |
||
1547 | swizzle[i] = swizzle[ir->type->vector_elements - 1]; |
||
1548 | } |
||
1549 | } |
||
1550 | |||
1551 | src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); |
||
1552 | |||
1553 | this->result = src; |
||
1554 | } |
||
1555 | |||
1556 | void |
||
1557 | ir_to_mesa_visitor::visit(ir_dereference_variable *ir) |
||
1558 | { |
||
1559 | variable_storage *entry = find_variable_storage(ir->var); |
||
1560 | ir_variable *var = ir->var; |
||
1561 | |||
1562 | if (!entry) { |
||
1563 | switch (var->mode) { |
||
1564 | case ir_var_uniform: |
||
1565 | entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, |
||
1566 | var->location); |
||
1567 | this->variables.push_tail(entry); |
||
1568 | break; |
||
1569 | case ir_var_shader_in: |
||
1570 | /* The linker assigns locations for varyings and attributes, |
||
1571 | * including deprecated builtins (like gl_Color), |
||
1572 | * user-assigned generic attributes (glBindVertexLocation), |
||
1573 | * and user-defined varyings. |
||
1574 | */ |
||
1575 | assert(var->location != -1); |
||
1576 | entry = new(mem_ctx) variable_storage(var, |
||
1577 | PROGRAM_INPUT, |
||
1578 | var->location); |
||
1579 | break; |
||
1580 | case ir_var_shader_out: |
||
1581 | assert(var->location != -1); |
||
1582 | entry = new(mem_ctx) variable_storage(var, |
||
1583 | PROGRAM_OUTPUT, |
||
1584 | var->location); |
||
1585 | break; |
||
1586 | case ir_var_system_value: |
||
1587 | entry = new(mem_ctx) variable_storage(var, |
||
1588 | PROGRAM_SYSTEM_VALUE, |
||
1589 | var->location); |
||
1590 | break; |
||
1591 | case ir_var_auto: |
||
1592 | case ir_var_temporary: |
||
1593 | entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, |
||
1594 | this->next_temp); |
||
1595 | this->variables.push_tail(entry); |
||
1596 | |||
1597 | next_temp += type_size(var->type); |
||
1598 | break; |
||
1599 | } |
||
1600 | |||
1601 | if (!entry) { |
||
1602 | printf("Failed to make storage for %s\n", var->name); |
||
1603 | exit(1); |
||
1604 | } |
||
1605 | } |
||
1606 | |||
1607 | this->result = src_reg(entry->file, entry->index, var->type); |
||
1608 | } |
||
1609 | |||
1610 | void |
||
1611 | ir_to_mesa_visitor::visit(ir_dereference_array *ir) |
||
1612 | { |
||
1613 | ir_constant *index; |
||
1614 | src_reg src; |
||
1615 | int element_size = type_size(ir->type); |
||
1616 | |||
1617 | index = ir->array_index->constant_expression_value(); |
||
1618 | |||
1619 | ir->array->accept(this); |
||
1620 | src = this->result; |
||
1621 | |||
1622 | if (index) { |
||
1623 | src.index += index->value.i[0] * element_size; |
||
1624 | } else { |
||
1625 | /* Variable index array dereference. It eats the "vec4" of the |
||
1626 | * base of the array and an index that offsets the Mesa register |
||
1627 | * index. |
||
1628 | */ |
||
1629 | ir->array_index->accept(this); |
||
1630 | |||
1631 | src_reg index_reg; |
||
1632 | |||
1633 | if (element_size == 1) { |
||
1634 | index_reg = this->result; |
||
1635 | } else { |
||
1636 | index_reg = get_temp(glsl_type::float_type); |
||
1637 | |||
1638 | emit(ir, OPCODE_MUL, dst_reg(index_reg), |
||
1639 | this->result, src_reg_for_float(element_size)); |
||
1640 | } |
||
1641 | |||
1642 | /* If there was already a relative address register involved, add the |
||
1643 | * new and the old together to get the new offset. |
||
1644 | */ |
||
1645 | if (src.reladdr != NULL) { |
||
1646 | src_reg accum_reg = get_temp(glsl_type::float_type); |
||
1647 | |||
1648 | emit(ir, OPCODE_ADD, dst_reg(accum_reg), |
||
1649 | index_reg, *src.reladdr); |
||
1650 | |||
1651 | index_reg = accum_reg; |
||
1652 | } |
||
1653 | |||
1654 | src.reladdr = ralloc(mem_ctx, src_reg); |
||
1655 | memcpy(src.reladdr, &index_reg, sizeof(index_reg)); |
||
1656 | } |
||
1657 | |||
1658 | /* If the type is smaller than a vec4, replicate the last channel out. */ |
||
1659 | if (ir->type->is_scalar() || ir->type->is_vector()) |
||
1660 | src.swizzle = swizzle_for_size(ir->type->vector_elements); |
||
1661 | else |
||
1662 | src.swizzle = SWIZZLE_NOOP; |
||
1663 | |||
1664 | this->result = src; |
||
1665 | } |
||
1666 | |||
1667 | void |
||
1668 | ir_to_mesa_visitor::visit(ir_dereference_record *ir) |
||
1669 | { |
||
1670 | unsigned int i; |
||
1671 | const glsl_type *struct_type = ir->record->type; |
||
1672 | int offset = 0; |
||
1673 | |||
1674 | ir->record->accept(this); |
||
1675 | |||
1676 | for (i = 0; i < struct_type->length; i++) { |
||
1677 | if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) |
||
1678 | break; |
||
1679 | offset += type_size(struct_type->fields.structure[i].type); |
||
1680 | } |
||
1681 | |||
1682 | /* If the type is smaller than a vec4, replicate the last channel out. */ |
||
1683 | if (ir->type->is_scalar() || ir->type->is_vector()) |
||
1684 | this->result.swizzle = swizzle_for_size(ir->type->vector_elements); |
||
1685 | else |
||
1686 | this->result.swizzle = SWIZZLE_NOOP; |
||
1687 | |||
1688 | this->result.index += offset; |
||
1689 | } |
||
1690 | |||
1691 | /** |
||
1692 | * We want to be careful in assignment setup to hit the actual storage |
||
1693 | * instead of potentially using a temporary like we might with the |
||
1694 | * ir_dereference handler. |
||
1695 | */ |
||
1696 | static dst_reg |
||
1697 | get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) |
||
1698 | { |
||
1699 | /* The LHS must be a dereference. If the LHS is a variable indexed array |
||
1700 | * access of a vector, it must be separated into a series conditional moves |
||
1701 | * before reaching this point (see ir_vec_index_to_cond_assign). |
||
1702 | */ |
||
1703 | assert(ir->as_dereference()); |
||
1704 | ir_dereference_array *deref_array = ir->as_dereference_array(); |
||
1705 | if (deref_array) { |
||
1706 | assert(!deref_array->array->type->is_vector()); |
||
1707 | } |
||
1708 | |||
1709 | /* Use the rvalue deref handler for the most part. We'll ignore |
||
1710 | * swizzles in it and write swizzles using writemask, though. |
||
1711 | */ |
||
1712 | ir->accept(v); |
||
1713 | return dst_reg(v->result); |
||
1714 | } |
||
1715 | |||
1716 | /** |
||
1717 | * Process the condition of a conditional assignment |
||
1718 | * |
||
1719 | * Examines the condition of a conditional assignment to generate the optimal |
||
1720 | * first operand of a \c CMP instruction. If the condition is a relational |
||
1721 | * operator with 0 (e.g., \c ir_binop_less), the value being compared will be |
||
1722 | * used as the source for the \c CMP instruction. Otherwise the comparison |
||
1723 | * is processed to a boolean result, and the boolean result is used as the |
||
1724 | * operand to the CMP instruction. |
||
1725 | */ |
||
1726 | bool |
||
1727 | ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) |
||
1728 | { |
||
1729 | ir_rvalue *src_ir = ir; |
||
1730 | bool negate = true; |
||
1731 | bool switch_order = false; |
||
1732 | |||
1733 | ir_expression *const expr = ir->as_expression(); |
||
1734 | if ((expr != NULL) && (expr->get_num_operands() == 2)) { |
||
1735 | bool zero_on_left = false; |
||
1736 | |||
1737 | if (expr->operands[0]->is_zero()) { |
||
1738 | src_ir = expr->operands[1]; |
||
1739 | zero_on_left = true; |
||
1740 | } else if (expr->operands[1]->is_zero()) { |
||
1741 | src_ir = expr->operands[0]; |
||
1742 | zero_on_left = false; |
||
1743 | } |
||
1744 | |||
1745 | /* a is - 0 + - 0 + |
||
1746 | * (a < 0) T F F ( a < 0) T F F |
||
1747 | * (0 < a) F F T (-a < 0) F F T |
||
1748 | * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) |
||
1749 | * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) |
||
1750 | * (a > 0) F F T (-a < 0) F F T |
||
1751 | * (0 > a) T F F ( a < 0) T F F |
||
1752 | * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) |
||
1753 | * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) |
||
1754 | * |
||
1755 | * Note that exchanging the order of 0 and 'a' in the comparison simply |
||
1756 | * means that the value of 'a' should be negated. |
||
1757 | */ |
||
1758 | if (src_ir != ir) { |
||
1759 | switch (expr->operation) { |
||
1760 | case ir_binop_less: |
||
1761 | switch_order = false; |
||
1762 | negate = zero_on_left; |
||
1763 | break; |
||
1764 | |||
1765 | case ir_binop_greater: |
||
1766 | switch_order = false; |
||
1767 | negate = !zero_on_left; |
||
1768 | break; |
||
1769 | |||
1770 | case ir_binop_lequal: |
||
1771 | switch_order = true; |
||
1772 | negate = !zero_on_left; |
||
1773 | break; |
||
1774 | |||
1775 | case ir_binop_gequal: |
||
1776 | switch_order = true; |
||
1777 | negate = zero_on_left; |
||
1778 | break; |
||
1779 | |||
1780 | default: |
||
1781 | /* This isn't the right kind of comparison afterall, so make sure |
||
1782 | * the whole condition is visited. |
||
1783 | */ |
||
1784 | src_ir = ir; |
||
1785 | break; |
||
1786 | } |
||
1787 | } |
||
1788 | } |
||
1789 | |||
1790 | src_ir->accept(this); |
||
1791 | |||
1792 | /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the |
||
1793 | * condition we produced is 0.0 or 1.0. By flipping the sign, we can |
||
1794 | * choose which value OPCODE_CMP produces without an extra instruction |
||
1795 | * computing the condition. |
||
1796 | */ |
||
1797 | if (negate) |
||
1798 | this->result.negate = ~this->result.negate; |
||
1799 | |||
1800 | return switch_order; |
||
1801 | } |
||
1802 | |||
1803 | void |
||
1804 | ir_to_mesa_visitor::visit(ir_assignment *ir) |
||
1805 | { |
||
1806 | dst_reg l; |
||
1807 | src_reg r; |
||
1808 | int i; |
||
1809 | |||
1810 | ir->rhs->accept(this); |
||
1811 | r = this->result; |
||
1812 | |||
1813 | l = get_assignment_lhs(ir->lhs, this); |
||
1814 | |||
1815 | /* FINISHME: This should really set to the correct maximal writemask for each |
||
1816 | * FINISHME: component written (in the loops below). This case can only |
||
1817 | * FINISHME: occur for matrices, arrays, and structures. |
||
1818 | */ |
||
1819 | if (ir->write_mask == 0) { |
||
1820 | assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); |
||
1821 | l.writemask = WRITEMASK_XYZW; |
||
1822 | } else if (ir->lhs->type->is_scalar()) { |
||
1823 | /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the |
||
1824 | * FINISHME: W component of fragment shader output zero, work correctly. |
||
1825 | */ |
||
1826 | l.writemask = WRITEMASK_XYZW; |
||
1827 | } else { |
||
1828 | int swizzles[4]; |
||
1829 | int first_enabled_chan = 0; |
||
1830 | int rhs_chan = 0; |
||
1831 | |||
1832 | assert(ir->lhs->type->is_vector()); |
||
1833 | l.writemask = ir->write_mask; |
||
1834 | |||
1835 | for (int i = 0; i < 4; i++) { |
||
1836 | if (l.writemask & (1 << i)) { |
||
1837 | first_enabled_chan = GET_SWZ(r.swizzle, i); |
||
1838 | break; |
||
1839 | } |
||
1840 | } |
||
1841 | |||
1842 | /* Swizzle a small RHS vector into the channels being written. |
||
1843 | * |
||
1844 | * glsl ir treats write_mask as dictating how many channels are |
||
1845 | * present on the RHS while Mesa IR treats write_mask as just |
||
1846 | * showing which channels of the vec4 RHS get written. |
||
1847 | */ |
||
1848 | for (int i = 0; i < 4; i++) { |
||
1849 | if (l.writemask & (1 << i)) |
||
1850 | swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); |
||
1851 | else |
||
1852 | swizzles[i] = first_enabled_chan; |
||
1853 | } |
||
1854 | r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], |
||
1855 | swizzles[2], swizzles[3]); |
||
1856 | } |
||
1857 | |||
1858 | assert(l.file != PROGRAM_UNDEFINED); |
||
1859 | assert(r.file != PROGRAM_UNDEFINED); |
||
1860 | |||
1861 | if (ir->condition) { |
||
1862 | const bool switch_order = this->process_move_condition(ir->condition); |
||
1863 | src_reg condition = this->result; |
||
1864 | |||
1865 | for (i = 0; i < type_size(ir->lhs->type); i++) { |
||
1866 | if (switch_order) { |
||
1867 | emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); |
||
1868 | } else { |
||
1869 | emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); |
||
1870 | } |
||
1871 | |||
1872 | l.index++; |
||
1873 | r.index++; |
||
1874 | } |
||
1875 | } else { |
||
1876 | for (i = 0; i < type_size(ir->lhs->type); i++) { |
||
1877 | emit(ir, OPCODE_MOV, l, r); |
||
1878 | l.index++; |
||
1879 | r.index++; |
||
1880 | } |
||
1881 | } |
||
1882 | } |
||
1883 | |||
1884 | |||
1885 | void |
||
1886 | ir_to_mesa_visitor::visit(ir_constant *ir) |
||
1887 | { |
||
1888 | src_reg src; |
||
1889 | GLfloat stack_vals[4] = { 0 }; |
||
1890 | GLfloat *values = stack_vals; |
||
1891 | unsigned int i; |
||
1892 | |||
1893 | /* Unfortunately, 4 floats is all we can get into |
||
1894 | * _mesa_add_unnamed_constant. So, make a temp to store an |
||
1895 | * aggregate constant and move each constant value into it. If we |
||
1896 | * get lucky, copy propagation will eliminate the extra moves. |
||
1897 | */ |
||
1898 | |||
1899 | if (ir->type->base_type == GLSL_TYPE_STRUCT) { |
||
1900 | src_reg temp_base = get_temp(ir->type); |
||
1901 | dst_reg temp = dst_reg(temp_base); |
||
1902 | |||
1903 | foreach_iter(exec_list_iterator, iter, ir->components) { |
||
1904 | ir_constant *field_value = (ir_constant *)iter.get(); |
||
1905 | int size = type_size(field_value->type); |
||
1906 | |||
1907 | assert(size > 0); |
||
1908 | |||
1909 | field_value->accept(this); |
||
1910 | src = this->result; |
||
1911 | |||
1912 | for (i = 0; i < (unsigned int)size; i++) { |
||
1913 | emit(ir, OPCODE_MOV, temp, src); |
||
1914 | |||
1915 | src.index++; |
||
1916 | temp.index++; |
||
1917 | } |
||
1918 | } |
||
1919 | this->result = temp_base; |
||
1920 | return; |
||
1921 | } |
||
1922 | |||
1923 | if (ir->type->is_array()) { |
||
1924 | src_reg temp_base = get_temp(ir->type); |
||
1925 | dst_reg temp = dst_reg(temp_base); |
||
1926 | int size = type_size(ir->type->fields.array); |
||
1927 | |||
1928 | assert(size > 0); |
||
1929 | |||
1930 | for (i = 0; i < ir->type->length; i++) { |
||
1931 | ir->array_elements[i]->accept(this); |
||
1932 | src = this->result; |
||
1933 | for (int j = 0; j < size; j++) { |
||
1934 | emit(ir, OPCODE_MOV, temp, src); |
||
1935 | |||
1936 | src.index++; |
||
1937 | temp.index++; |
||
1938 | } |
||
1939 | } |
||
1940 | this->result = temp_base; |
||
1941 | return; |
||
1942 | } |
||
1943 | |||
1944 | if (ir->type->is_matrix()) { |
||
1945 | src_reg mat = get_temp(ir->type); |
||
1946 | dst_reg mat_column = dst_reg(mat); |
||
1947 | |||
1948 | for (i = 0; i < ir->type->matrix_columns; i++) { |
||
1949 | assert(ir->type->base_type == GLSL_TYPE_FLOAT); |
||
1950 | values = &ir->value.f[i * ir->type->vector_elements]; |
||
1951 | |||
1952 | src = src_reg(PROGRAM_CONSTANT, -1, NULL); |
||
1953 | src.index = _mesa_add_unnamed_constant(this->prog->Parameters, |
||
1954 | (gl_constant_value *) values, |
||
1955 | ir->type->vector_elements, |
||
1956 | &src.swizzle); |
||
1957 | emit(ir, OPCODE_MOV, mat_column, src); |
||
1958 | |||
1959 | mat_column.index++; |
||
1960 | } |
||
1961 | |||
1962 | this->result = mat; |
||
1963 | return; |
||
1964 | } |
||
1965 | |||
1966 | src.file = PROGRAM_CONSTANT; |
||
1967 | switch (ir->type->base_type) { |
||
1968 | case GLSL_TYPE_FLOAT: |
||
1969 | values = &ir->value.f[0]; |
||
1970 | break; |
||
1971 | case GLSL_TYPE_UINT: |
||
1972 | for (i = 0; i < ir->type->vector_elements; i++) { |
||
1973 | values[i] = ir->value.u[i]; |
||
1974 | } |
||
1975 | break; |
||
1976 | case GLSL_TYPE_INT: |
||
1977 | for (i = 0; i < ir->type->vector_elements; i++) { |
||
1978 | values[i] = ir->value.i[i]; |
||
1979 | } |
||
1980 | break; |
||
1981 | case GLSL_TYPE_BOOL: |
||
1982 | for (i = 0; i < ir->type->vector_elements; i++) { |
||
1983 | values[i] = ir->value.b[i]; |
||
1984 | } |
||
1985 | break; |
||
1986 | default: |
||
1987 | assert(!"Non-float/uint/int/bool constant"); |
||
1988 | } |
||
1989 | |||
1990 | this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); |
||
1991 | this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, |
||
1992 | (gl_constant_value *) values, |
||
1993 | ir->type->vector_elements, |
||
1994 | &this->result.swizzle); |
||
1995 | } |
||
1996 | |||
1997 | void |
||
1998 | ir_to_mesa_visitor::visit(ir_call *ir) |
||
1999 | { |
||
2000 | assert(!"ir_to_mesa: All function calls should have been inlined by now."); |
||
2001 | } |
||
2002 | |||
2003 | void |
||
2004 | ir_to_mesa_visitor::visit(ir_texture *ir) |
||
2005 | { |
||
2006 | src_reg result_src, coord, lod_info, projector, dx, dy; |
||
2007 | dst_reg result_dst, coord_dst; |
||
2008 | ir_to_mesa_instruction *inst = NULL; |
||
2009 | prog_opcode opcode = OPCODE_NOP; |
||
2010 | |||
2011 | if (ir->op == ir_txs) |
||
2012 | this->result = src_reg_for_float(0.0); |
||
2013 | else |
||
2014 | ir->coordinate->accept(this); |
||
2015 | |||
2016 | /* Put our coords in a temp. We'll need to modify them for shadow, |
||
2017 | * projection, or LOD, so the only case we'd use it as is is if |
||
2018 | * we're doing plain old texturing. Mesa IR optimization should |
||
2019 | * handle cleaning up our mess in that case. |
||
2020 | */ |
||
2021 | coord = get_temp(glsl_type::vec4_type); |
||
2022 | coord_dst = dst_reg(coord); |
||
2023 | emit(ir, OPCODE_MOV, coord_dst, this->result); |
||
2024 | |||
2025 | if (ir->projector) { |
||
2026 | ir->projector->accept(this); |
||
2027 | projector = this->result; |
||
2028 | } |
||
2029 | |||
2030 | /* Storage for our result. Ideally for an assignment we'd be using |
||
2031 | * the actual storage for the result here, instead. |
||
2032 | */ |
||
2033 | result_src = get_temp(glsl_type::vec4_type); |
||
2034 | result_dst = dst_reg(result_src); |
||
2035 | |||
2036 | switch (ir->op) { |
||
2037 | case ir_tex: |
||
2038 | case ir_txs: |
||
2039 | opcode = OPCODE_TEX; |
||
2040 | break; |
||
2041 | case ir_txb: |
||
2042 | opcode = OPCODE_TXB; |
||
2043 | ir->lod_info.bias->accept(this); |
||
2044 | lod_info = this->result; |
||
2045 | break; |
||
2046 | case ir_txf: |
||
2047 | /* Pretend to be TXL so the sampler, coordinate, lod are available */ |
||
2048 | case ir_txl: |
||
2049 | opcode = OPCODE_TXL; |
||
2050 | ir->lod_info.lod->accept(this); |
||
2051 | lod_info = this->result; |
||
2052 | break; |
||
2053 | case ir_txd: |
||
2054 | opcode = OPCODE_TXD; |
||
2055 | ir->lod_info.grad.dPdx->accept(this); |
||
2056 | dx = this->result; |
||
2057 | ir->lod_info.grad.dPdy->accept(this); |
||
2058 | dy = this->result; |
||
2059 | break; |
||
2060 | case ir_txf_ms: |
||
2061 | assert(!"Unexpected ir_txf_ms opcode"); |
||
2062 | break; |
||
2063 | case ir_lod: |
||
2064 | assert(!"Unexpected ir_lod opcode"); |
||
2065 | break; |
||
2066 | } |
||
2067 | |||
2068 | const glsl_type *sampler_type = ir->sampler->type; |
||
2069 | |||
2070 | if (ir->projector) { |
||
2071 | if (opcode == OPCODE_TEX) { |
||
2072 | /* Slot the projector in as the last component of the coord. */ |
||
2073 | coord_dst.writemask = WRITEMASK_W; |
||
2074 | emit(ir, OPCODE_MOV, coord_dst, projector); |
||
2075 | coord_dst.writemask = WRITEMASK_XYZW; |
||
2076 | opcode = OPCODE_TXP; |
||
2077 | } else { |
||
2078 | src_reg coord_w = coord; |
||
2079 | coord_w.swizzle = SWIZZLE_WWWW; |
||
2080 | |||
2081 | /* For the other TEX opcodes there's no projective version |
||
2082 | * since the last slot is taken up by lod info. Do the |
||
2083 | * projective divide now. |
||
2084 | */ |
||
2085 | coord_dst.writemask = WRITEMASK_W; |
||
2086 | emit(ir, OPCODE_RCP, coord_dst, projector); |
||
2087 | |||
2088 | /* In the case where we have to project the coordinates "by hand," |
||
2089 | * the shadow comparitor value must also be projected. |
||
2090 | */ |
||
2091 | src_reg tmp_src = coord; |
||
2092 | if (ir->shadow_comparitor) { |
||
2093 | /* Slot the shadow value in as the second to last component of the |
||
2094 | * coord. |
||
2095 | */ |
||
2096 | ir->shadow_comparitor->accept(this); |
||
2097 | |||
2098 | tmp_src = get_temp(glsl_type::vec4_type); |
||
2099 | dst_reg tmp_dst = dst_reg(tmp_src); |
||
2100 | |||
2101 | /* Projective division not allowed for array samplers. */ |
||
2102 | assert(!sampler_type->sampler_array); |
||
2103 | |||
2104 | tmp_dst.writemask = WRITEMASK_Z; |
||
2105 | emit(ir, OPCODE_MOV, tmp_dst, this->result); |
||
2106 | |||
2107 | tmp_dst.writemask = WRITEMASK_XY; |
||
2108 | emit(ir, OPCODE_MOV, tmp_dst, coord); |
||
2109 | } |
||
2110 | |||
2111 | coord_dst.writemask = WRITEMASK_XYZ; |
||
2112 | emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); |
||
2113 | |||
2114 | coord_dst.writemask = WRITEMASK_XYZW; |
||
2115 | coord.swizzle = SWIZZLE_XYZW; |
||
2116 | } |
||
2117 | } |
||
2118 | |||
2119 | /* If projection is done and the opcode is not OPCODE_TXP, then the shadow |
||
2120 | * comparitor was put in the correct place (and projected) by the code, |
||
2121 | * above, that handles by-hand projection. |
||
2122 | */ |
||
2123 | if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { |
||
2124 | /* Slot the shadow value in as the second to last component of the |
||
2125 | * coord. |
||
2126 | */ |
||
2127 | ir->shadow_comparitor->accept(this); |
||
2128 | |||
2129 | /* XXX This will need to be updated for cubemap array samplers. */ |
||
2130 | if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && |
||
2131 | sampler_type->sampler_array) { |
||
2132 | coord_dst.writemask = WRITEMASK_W; |
||
2133 | } else { |
||
2134 | coord_dst.writemask = WRITEMASK_Z; |
||
2135 | } |
||
2136 | |||
2137 | emit(ir, OPCODE_MOV, coord_dst, this->result); |
||
2138 | coord_dst.writemask = WRITEMASK_XYZW; |
||
2139 | } |
||
2140 | |||
2141 | if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { |
||
2142 | /* Mesa IR stores lod or lod bias in the last channel of the coords. */ |
||
2143 | coord_dst.writemask = WRITEMASK_W; |
||
2144 | emit(ir, OPCODE_MOV, coord_dst, lod_info); |
||
2145 | coord_dst.writemask = WRITEMASK_XYZW; |
||
2146 | } |
||
2147 | |||
2148 | if (opcode == OPCODE_TXD) |
||
2149 | inst = emit(ir, opcode, result_dst, coord, dx, dy); |
||
2150 | else |
||
2151 | inst = emit(ir, opcode, result_dst, coord); |
||
2152 | |||
2153 | if (ir->shadow_comparitor) |
||
2154 | inst->tex_shadow = GL_TRUE; |
||
2155 | |||
2156 | inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, |
||
2157 | this->shader_program, |
||
2158 | this->prog); |
||
2159 | |||
2160 | switch (sampler_type->sampler_dimensionality) { |
||
2161 | case GLSL_SAMPLER_DIM_1D: |
||
2162 | inst->tex_target = (sampler_type->sampler_array) |
||
2163 | ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; |
||
2164 | break; |
||
2165 | case GLSL_SAMPLER_DIM_2D: |
||
2166 | inst->tex_target = (sampler_type->sampler_array) |
||
2167 | ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; |
||
2168 | break; |
||
2169 | case GLSL_SAMPLER_DIM_3D: |
||
2170 | inst->tex_target = TEXTURE_3D_INDEX; |
||
2171 | break; |
||
2172 | case GLSL_SAMPLER_DIM_CUBE: |
||
2173 | inst->tex_target = TEXTURE_CUBE_INDEX; |
||
2174 | break; |
||
2175 | case GLSL_SAMPLER_DIM_RECT: |
||
2176 | inst->tex_target = TEXTURE_RECT_INDEX; |
||
2177 | break; |
||
2178 | case GLSL_SAMPLER_DIM_BUF: |
||
2179 | assert(!"FINISHME: Implement ARB_texture_buffer_object"); |
||
2180 | break; |
||
2181 | case GLSL_SAMPLER_DIM_EXTERNAL: |
||
2182 | inst->tex_target = TEXTURE_EXTERNAL_INDEX; |
||
2183 | break; |
||
2184 | default: |
||
2185 | assert(!"Should not get here."); |
||
2186 | } |
||
2187 | |||
2188 | this->result = result_src; |
||
2189 | } |
||
2190 | |||
2191 | void |
||
2192 | ir_to_mesa_visitor::visit(ir_return *ir) |
||
2193 | { |
||
2194 | /* Non-void functions should have been inlined. We may still emit RETs |
||
2195 | * from main() unless the EmitNoMainReturn option is set. |
||
2196 | */ |
||
2197 | assert(!ir->get_value()); |
||
2198 | emit(ir, OPCODE_RET); |
||
2199 | } |
||
2200 | |||
2201 | void |
||
2202 | ir_to_mesa_visitor::visit(ir_discard *ir) |
||
2203 | { |
||
2204 | if (ir->condition) { |
||
2205 | ir->condition->accept(this); |
||
2206 | this->result.negate = ~this->result.negate; |
||
2207 | emit(ir, OPCODE_KIL, undef_dst, this->result); |
||
2208 | } else { |
||
2209 | emit(ir, OPCODE_KIL_NV); |
||
2210 | } |
||
2211 | } |
||
2212 | |||
2213 | void |
||
2214 | ir_to_mesa_visitor::visit(ir_if *ir) |
||
2215 | { |
||
2216 | ir_to_mesa_instruction *cond_inst, *if_inst; |
||
2217 | ir_to_mesa_instruction *prev_inst; |
||
2218 | |||
2219 | prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); |
||
2220 | |||
2221 | ir->condition->accept(this); |
||
2222 | assert(this->result.file != PROGRAM_UNDEFINED); |
||
2223 | |||
2224 | if (this->options->EmitCondCodes) { |
||
2225 | cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); |
||
2226 | |||
2227 | /* See if we actually generated any instruction for generating |
||
2228 | * the condition. If not, then cook up a move to a temp so we |
||
2229 | * have something to set cond_update on. |
||
2230 | */ |
||
2231 | if (cond_inst == prev_inst) { |
||
2232 | src_reg temp = get_temp(glsl_type::bool_type); |
||
2233 | cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result); |
||
2234 | } |
||
2235 | cond_inst->cond_update = GL_TRUE; |
||
2236 | |||
2237 | if_inst = emit(ir->condition, OPCODE_IF); |
||
2238 | if_inst->dst.cond_mask = COND_NE; |
||
2239 | } else { |
||
2240 | if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); |
||
2241 | } |
||
2242 | |||
2243 | this->instructions.push_tail(if_inst); |
||
2244 | |||
2245 | visit_exec_list(&ir->then_instructions, this); |
||
2246 | |||
2247 | if (!ir->else_instructions.is_empty()) { |
||
2248 | emit(ir->condition, OPCODE_ELSE); |
||
2249 | visit_exec_list(&ir->else_instructions, this); |
||
2250 | } |
||
2251 | |||
2252 | if_inst = emit(ir->condition, OPCODE_ENDIF); |
||
2253 | } |
||
2254 | |||
2255 | ir_to_mesa_visitor::ir_to_mesa_visitor() |
||
2256 | { |
||
2257 | result.file = PROGRAM_UNDEFINED; |
||
2258 | next_temp = 1; |
||
2259 | next_signature_id = 1; |
||
2260 | current_function = NULL; |
||
2261 | mem_ctx = ralloc_context(NULL); |
||
2262 | } |
||
2263 | |||
2264 | ir_to_mesa_visitor::~ir_to_mesa_visitor() |
||
2265 | { |
||
2266 | ralloc_free(mem_ctx); |
||
2267 | } |
||
2268 | |||
2269 | static struct prog_src_register |
||
2270 | mesa_src_reg_from_ir_src_reg(src_reg reg) |
||
2271 | { |
||
2272 | struct prog_src_register mesa_reg; |
||
2273 | |||
2274 | mesa_reg.File = reg.file; |
||
2275 | assert(reg.index < (1 << INST_INDEX_BITS)); |
||
2276 | mesa_reg.Index = reg.index; |
||
2277 | mesa_reg.Swizzle = reg.swizzle; |
||
2278 | mesa_reg.RelAddr = reg.reladdr != NULL; |
||
2279 | mesa_reg.Negate = reg.negate; |
||
2280 | mesa_reg.Abs = 0; |
||
2281 | mesa_reg.HasIndex2 = GL_FALSE; |
||
2282 | mesa_reg.RelAddr2 = 0; |
||
2283 | mesa_reg.Index2 = 0; |
||
2284 | |||
2285 | return mesa_reg; |
||
2286 | } |
||
2287 | |||
2288 | static void |
||
2289 | set_branchtargets(ir_to_mesa_visitor *v, |
||
2290 | struct prog_instruction *mesa_instructions, |
||
2291 | int num_instructions) |
||
2292 | { |
||
2293 | int if_count = 0, loop_count = 0; |
||
2294 | int *if_stack, *loop_stack; |
||
2295 | int if_stack_pos = 0, loop_stack_pos = 0; |
||
2296 | int i, j; |
||
2297 | |||
2298 | for (i = 0; i < num_instructions; i++) { |
||
2299 | switch (mesa_instructions[i].Opcode) { |
||
2300 | case OPCODE_IF: |
||
2301 | if_count++; |
||
2302 | break; |
||
2303 | case OPCODE_BGNLOOP: |
||
2304 | loop_count++; |
||
2305 | break; |
||
2306 | case OPCODE_BRK: |
||
2307 | case OPCODE_CONT: |
||
2308 | mesa_instructions[i].BranchTarget = -1; |
||
2309 | break; |
||
2310 | default: |
||
2311 | break; |
||
2312 | } |
||
2313 | } |
||
2314 | |||
2315 | if_stack = rzalloc_array(v->mem_ctx, int, if_count); |
||
2316 | loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); |
||
2317 | |||
2318 | for (i = 0; i < num_instructions; i++) { |
||
2319 | switch (mesa_instructions[i].Opcode) { |
||
2320 | case OPCODE_IF: |
||
2321 | if_stack[if_stack_pos] = i; |
||
2322 | if_stack_pos++; |
||
2323 | break; |
||
2324 | case OPCODE_ELSE: |
||
2325 | mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; |
||
2326 | if_stack[if_stack_pos - 1] = i; |
||
2327 | break; |
||
2328 | case OPCODE_ENDIF: |
||
2329 | mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; |
||
2330 | if_stack_pos--; |
||
2331 | break; |
||
2332 | case OPCODE_BGNLOOP: |
||
2333 | loop_stack[loop_stack_pos] = i; |
||
2334 | loop_stack_pos++; |
||
2335 | break; |
||
2336 | case OPCODE_ENDLOOP: |
||
2337 | loop_stack_pos--; |
||
2338 | /* Rewrite any breaks/conts at this nesting level (haven't |
||
2339 | * already had a BranchTarget assigned) to point to the end |
||
2340 | * of the loop. |
||
2341 | */ |
||
2342 | for (j = loop_stack[loop_stack_pos]; j < i; j++) { |
||
2343 | if (mesa_instructions[j].Opcode == OPCODE_BRK || |
||
2344 | mesa_instructions[j].Opcode == OPCODE_CONT) { |
||
2345 | if (mesa_instructions[j].BranchTarget == -1) { |
||
2346 | mesa_instructions[j].BranchTarget = i; |
||
2347 | } |
||
2348 | } |
||
2349 | } |
||
2350 | /* The loop ends point at each other. */ |
||
2351 | mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; |
||
2352 | mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; |
||
2353 | break; |
||
2354 | case OPCODE_CAL: |
||
2355 | foreach_iter(exec_list_iterator, iter, v->function_signatures) { |
||
2356 | function_entry *entry = (function_entry *)iter.get(); |
||
2357 | |||
2358 | if (entry->sig_id == mesa_instructions[i].BranchTarget) { |
||
2359 | mesa_instructions[i].BranchTarget = entry->inst; |
||
2360 | break; |
||
2361 | } |
||
2362 | } |
||
2363 | break; |
||
2364 | default: |
||
2365 | break; |
||
2366 | } |
||
2367 | } |
||
2368 | } |
||
2369 | |||
2370 | static void |
||
2371 | print_program(struct prog_instruction *mesa_instructions, |
||
2372 | ir_instruction **mesa_instruction_annotation, |
||
2373 | int num_instructions) |
||
2374 | { |
||
2375 | ir_instruction *last_ir = NULL; |
||
2376 | int i; |
||
2377 | int indent = 0; |
||
2378 | |||
2379 | for (i = 0; i < num_instructions; i++) { |
||
2380 | struct prog_instruction *mesa_inst = mesa_instructions + i; |
||
2381 | ir_instruction *ir = mesa_instruction_annotation[i]; |
||
2382 | |||
2383 | fprintf(stdout, "%3d: ", i); |
||
2384 | |||
2385 | if (last_ir != ir && ir) { |
||
2386 | int j; |
||
2387 | |||
2388 | for (j = 0; j < indent; j++) { |
||
2389 | fprintf(stdout, " "); |
||
2390 | } |
||
2391 | ir->print(); |
||
2392 | printf("\n"); |
||
2393 | last_ir = ir; |
||
2394 | |||
2395 | fprintf(stdout, " "); /* line number spacing. */ |
||
2396 | } |
||
2397 | |||
2398 | indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, |
||
2399 | PROG_PRINT_DEBUG, NULL); |
||
2400 | } |
||
2401 | } |
||
2402 | |||
2403 | class add_uniform_to_shader : public program_resource_visitor { |
||
2404 | public: |
||
2405 | add_uniform_to_shader(struct gl_shader_program *shader_program, |
||
2406 | struct gl_program_parameter_list *params, |
||
2407 | gl_shader_type shader_type) |
||
2408 | : shader_program(shader_program), params(params), idx(-1), |
||
2409 | shader_type(shader_type) |
||
2410 | { |
||
2411 | /* empty */ |
||
2412 | } |
||
2413 | |||
2414 | void process(ir_variable *var) |
||
2415 | { |
||
2416 | this->idx = -1; |
||
2417 | this->program_resource_visitor::process(var); |
||
2418 | |||
2419 | var->location = this->idx; |
||
2420 | } |
||
2421 | |||
2422 | private: |
||
2423 | virtual void visit_field(const glsl_type *type, const char *name, |
||
2424 | bool row_major); |
||
2425 | |||
2426 | struct gl_shader_program *shader_program; |
||
2427 | struct gl_program_parameter_list *params; |
||
2428 | int idx; |
||
2429 | gl_shader_type shader_type; |
||
2430 | }; |
||
2431 | |||
2432 | void |
||
2433 | add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, |
||
2434 | bool row_major) |
||
2435 | { |
||
2436 | unsigned int size; |
||
2437 | |||
2438 | (void) row_major; |
||
2439 | |||
2440 | if (type->is_vector() || type->is_scalar()) { |
||
2441 | size = type->vector_elements; |
||
2442 | } else { |
||
2443 | size = type_size(type) * 4; |
||
2444 | } |
||
2445 | |||
2446 | gl_register_file file; |
||
2447 | if (type->is_sampler() || |
||
2448 | (type->is_array() && type->fields.array->is_sampler())) { |
||
2449 | file = PROGRAM_SAMPLER; |
||
2450 | } else { |
||
2451 | file = PROGRAM_UNIFORM; |
||
2452 | } |
||
2453 | |||
2454 | int index = _mesa_lookup_parameter_index(params, -1, name); |
||
2455 | if (index < 0) { |
||
2456 | index = _mesa_add_parameter(params, file, name, size, type->gl_type, |
||
2457 | NULL, NULL); |
||
2458 | |||
2459 | /* Sampler uniform values are stored in prog->SamplerUnits, |
||
2460 | * and the entry in that array is selected by this index we |
||
2461 | * store in ParameterValues[]. |
||
2462 | */ |
||
2463 | if (file == PROGRAM_SAMPLER) { |
||
2464 | unsigned location; |
||
2465 | const bool found = |
||
2466 | this->shader_program->UniformHash->get(location, |
||
2467 | params->Parameters[index].Name); |
||
2468 | assert(found); |
||
2469 | |||
2470 | if (!found) |
||
2471 | return; |
||
2472 | |||
2473 | struct gl_uniform_storage *storage = |
||
2474 | &this->shader_program->UniformStorage[location]; |
||
2475 | |||
2476 | assert(storage->sampler[shader_type].active); |
||
2477 | |||
2478 | for (unsigned int j = 0; j < size / 4; j++) |
||
2479 | params->ParameterValues[index + j][0].f = |
||
2480 | storage->sampler[shader_type].index + j; |
||
2481 | } |
||
2482 | } |
||
2483 | |||
2484 | /* The first part of the uniform that's processed determines the base |
||
2485 | * location of the whole uniform (for structures). |
||
2486 | */ |
||
2487 | if (this->idx < 0) |
||
2488 | this->idx = index; |
||
2489 | } |
||
2490 | |||
2491 | /** |
||
2492 | * Generate the program parameters list for the user uniforms in a shader |
||
2493 | * |
||
2494 | * \param shader_program Linked shader program. This is only used to |
||
2495 | * emit possible link errors to the info log. |
||
2496 | * \param sh Shader whose uniforms are to be processed. |
||
2497 | * \param params Parameter list to be filled in. |
||
2498 | */ |
||
2499 | void |
||
2500 | _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program |
||
2501 | *shader_program, |
||
2502 | struct gl_shader *sh, |
||
2503 | struct gl_program_parameter_list |
||
2504 | *params) |
||
2505 | { |
||
2506 | add_uniform_to_shader add(shader_program, params, |
||
2507 | _mesa_shader_type_to_index(sh->Type)); |
||
2508 | |||
2509 | foreach_list(node, sh->ir) { |
||
2510 | ir_variable *var = ((ir_instruction *) node)->as_variable(); |
||
2511 | |||
2512 | if ((var == NULL) || (var->mode != ir_var_uniform) |
||
2513 | || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0)) |
||
2514 | continue; |
||
2515 | |||
2516 | add.process(var); |
||
2517 | } |
||
2518 | } |
||
2519 | |||
2520 | void |
||
2521 | _mesa_associate_uniform_storage(struct gl_context *ctx, |
||
2522 | struct gl_shader_program *shader_program, |
||
2523 | struct gl_program_parameter_list *params) |
||
2524 | { |
||
2525 | /* After adding each uniform to the parameter list, connect the storage for |
||
2526 | * the parameter with the tracking structure used by the API for the |
||
2527 | * uniform. |
||
2528 | */ |
||
2529 | unsigned last_location = unsigned(~0); |
||
2530 | for (unsigned i = 0; i < params->NumParameters; i++) { |
||
2531 | if (params->Parameters[i].Type != PROGRAM_UNIFORM) |
||
2532 | continue; |
||
2533 | |||
2534 | unsigned location; |
||
2535 | const bool found = |
||
2536 | shader_program->UniformHash->get(location, params->Parameters[i].Name); |
||
2537 | assert(found); |
||
2538 | |||
2539 | if (!found) |
||
2540 | continue; |
||
2541 | |||
2542 | if (location != last_location) { |
||
2543 | struct gl_uniform_storage *storage = |
||
2544 | &shader_program->UniformStorage[location]; |
||
2545 | enum gl_uniform_driver_format format = uniform_native; |
||
2546 | |||
2547 | unsigned columns = 0; |
||
2548 | switch (storage->type->base_type) { |
||
2549 | case GLSL_TYPE_UINT: |
||
2550 | assert(ctx->Const.NativeIntegers); |
||
2551 | format = uniform_native; |
||
2552 | columns = 1; |
||
2553 | break; |
||
2554 | case GLSL_TYPE_INT: |
||
2555 | format = |
||
2556 | (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; |
||
2557 | columns = 1; |
||
2558 | break; |
||
2559 | case GLSL_TYPE_FLOAT: |
||
2560 | format = uniform_native; |
||
2561 | columns = storage->type->matrix_columns; |
||
2562 | break; |
||
2563 | case GLSL_TYPE_BOOL: |
||
2564 | if (ctx->Const.NativeIntegers) { |
||
2565 | format = (ctx->Const.UniformBooleanTrue == 1) |
||
2566 | ? uniform_bool_int_0_1 : uniform_bool_int_0_not0; |
||
2567 | } else { |
||
2568 | format = uniform_bool_float; |
||
2569 | } |
||
2570 | columns = 1; |
||
2571 | break; |
||
2572 | case GLSL_TYPE_SAMPLER: |
||
2573 | format = uniform_native; |
||
2574 | columns = 1; |
||
2575 | break; |
||
2576 | case GLSL_TYPE_ARRAY: |
||
2577 | case GLSL_TYPE_VOID: |
||
2578 | case GLSL_TYPE_STRUCT: |
||
2579 | case GLSL_TYPE_ERROR: |
||
2580 | case GLSL_TYPE_INTERFACE: |
||
2581 | assert(!"Should not get here."); |
||
2582 | break; |
||
2583 | } |
||
2584 | |||
2585 | _mesa_uniform_attach_driver_storage(storage, |
||
2586 | 4 * sizeof(float) * columns, |
||
2587 | 4 * sizeof(float), |
||
2588 | format, |
||
2589 | ¶ms->ParameterValues[i]); |
||
2590 | |||
2591 | /* After attaching the driver's storage to the uniform, propagate any |
||
2592 | * data from the linker's backing store. This will cause values from |
||
2593 | * initializers in the source code to be copied over. |
||
2594 | */ |
||
2595 | _mesa_propagate_uniforms_to_driver_storage(storage, |
||
2596 | 0, |
||
2597 | MAX2(1, storage->array_elements)); |
||
2598 | |||
2599 | last_location = location; |
||
2600 | } |
||
2601 | } |
||
2602 | } |
||
2603 | |||
2604 | /* |
||
2605 | * On a basic block basis, tracks available PROGRAM_TEMPORARY register |
||
2606 | * channels for copy propagation and updates following instructions to |
||
2607 | * use the original versions. |
||
2608 | * |
||
2609 | * The ir_to_mesa_visitor lazily produces code assuming that this pass |
||
2610 | * will occur. As an example, a TXP production before this pass: |
||
2611 | * |
||
2612 | * 0: MOV TEMP[1], INPUT[4].xyyy; |
||
2613 | * 1: MOV TEMP[1].w, INPUT[4].wwww; |
||
2614 | * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; |
||
2615 | * |
||
2616 | * and after: |
||
2617 | * |
||
2618 | * 0: MOV TEMP[1], INPUT[4].xyyy; |
||
2619 | * 1: MOV TEMP[1].w, INPUT[4].wwww; |
||
2620 | * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; |
||
2621 | * |
||
2622 | * which allows for dead code elimination on TEMP[1]'s writes. |
||
2623 | */ |
||
2624 | void |
||
2625 | ir_to_mesa_visitor::copy_propagate(void) |
||
2626 | { |
||
2627 | ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, |
||
2628 | ir_to_mesa_instruction *, |
||
2629 | this->next_temp * 4); |
||
2630 | int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); |
||
2631 | int level = 0; |
||
2632 | |||
2633 | foreach_iter(exec_list_iterator, iter, this->instructions) { |
||
2634 | ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get(); |
||
2635 | |||
2636 | assert(inst->dst.file != PROGRAM_TEMPORARY |
||
2637 | || inst->dst.index < this->next_temp); |
||
2638 | |||
2639 | /* First, do any copy propagation possible into the src regs. */ |
||
2640 | for (int r = 0; r < 3; r++) { |
||
2641 | ir_to_mesa_instruction *first = NULL; |
||
2642 | bool good = true; |
||
2643 | int acp_base = inst->src[r].index * 4; |
||
2644 | |||
2645 | if (inst->src[r].file != PROGRAM_TEMPORARY || |
||
2646 | inst->src[r].reladdr) |
||
2647 | continue; |
||
2648 | |||
2649 | /* See if we can find entries in the ACP consisting of MOVs |
||
2650 | * from the same src register for all the swizzled channels |
||
2651 | * of this src register reference. |
||
2652 | */ |
||
2653 | for (int i = 0; i < 4; i++) { |
||
2654 | int src_chan = GET_SWZ(inst->src[r].swizzle, i); |
||
2655 | ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; |
||
2656 | |||
2657 | if (!copy_chan) { |
||
2658 | good = false; |
||
2659 | break; |
||
2660 | } |
||
2661 | |||
2662 | assert(acp_level[acp_base + src_chan] <= level); |
||
2663 | |||
2664 | if (!first) { |
||
2665 | first = copy_chan; |
||
2666 | } else { |
||
2667 | if (first->src[0].file != copy_chan->src[0].file || |
||
2668 | first->src[0].index != copy_chan->src[0].index) { |
||
2669 | good = false; |
||
2670 | break; |
||
2671 | } |
||
2672 | } |
||
2673 | } |
||
2674 | |||
2675 | if (good) { |
||
2676 | /* We've now validated that we can copy-propagate to |
||
2677 | * replace this src register reference. Do it. |
||
2678 | */ |
||
2679 | inst->src[r].file = first->src[0].file; |
||
2680 | inst->src[r].index = first->src[0].index; |
||
2681 | |||
2682 | int swizzle = 0; |
||
2683 | for (int i = 0; i < 4; i++) { |
||
2684 | int src_chan = GET_SWZ(inst->src[r].swizzle, i); |
||
2685 | ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; |
||
2686 | swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << |
||
2687 | (3 * i)); |
||
2688 | } |
||
2689 | inst->src[r].swizzle = swizzle; |
||
2690 | } |
||
2691 | } |
||
2692 | |||
2693 | switch (inst->op) { |
||
2694 | case OPCODE_BGNLOOP: |
||
2695 | case OPCODE_ENDLOOP: |
||
2696 | /* End of a basic block, clear the ACP entirely. */ |
||
2697 | memset(acp, 0, sizeof(*acp) * this->next_temp * 4); |
||
2698 | break; |
||
2699 | |||
2700 | case OPCODE_IF: |
||
2701 | ++level; |
||
2702 | break; |
||
2703 | |||
2704 | case OPCODE_ENDIF: |
||
2705 | case OPCODE_ELSE: |
||
2706 | /* Clear all channels written inside the block from the ACP, but |
||
2707 | * leaving those that were not touched. |
||
2708 | */ |
||
2709 | for (int r = 0; r < this->next_temp; r++) { |
||
2710 | for (int c = 0; c < 4; c++) { |
||
2711 | if (!acp[4 * r + c]) |
||
2712 | continue; |
||
2713 | |||
2714 | if (acp_level[4 * r + c] >= level) |
||
2715 | acp[4 * r + c] = NULL; |
||
2716 | } |
||
2717 | } |
||
2718 | if (inst->op == OPCODE_ENDIF) |
||
2719 | --level; |
||
2720 | break; |
||
2721 | |||
2722 | default: |
||
2723 | /* Continuing the block, clear any written channels from |
||
2724 | * the ACP. |
||
2725 | */ |
||
2726 | if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { |
||
2727 | /* Any temporary might be written, so no copy propagation |
||
2728 | * across this instruction. |
||
2729 | */ |
||
2730 | memset(acp, 0, sizeof(*acp) * this->next_temp * 4); |
||
2731 | } else if (inst->dst.file == PROGRAM_OUTPUT && |
||
2732 | inst->dst.reladdr) { |
||
2733 | /* Any output might be written, so no copy propagation |
||
2734 | * from outputs across this instruction. |
||
2735 | */ |
||
2736 | for (int r = 0; r < this->next_temp; r++) { |
||
2737 | for (int c = 0; c < 4; c++) { |
||
2738 | if (!acp[4 * r + c]) |
||
2739 | continue; |
||
2740 | |||
2741 | if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) |
||
2742 | acp[4 * r + c] = NULL; |
||
2743 | } |
||
2744 | } |
||
2745 | } else if (inst->dst.file == PROGRAM_TEMPORARY || |
||
2746 | inst->dst.file == PROGRAM_OUTPUT) { |
||
2747 | /* Clear where it's used as dst. */ |
||
2748 | if (inst->dst.file == PROGRAM_TEMPORARY) { |
||
2749 | for (int c = 0; c < 4; c++) { |
||
2750 | if (inst->dst.writemask & (1 << c)) { |
||
2751 | acp[4 * inst->dst.index + c] = NULL; |
||
2752 | } |
||
2753 | } |
||
2754 | } |
||
2755 | |||
2756 | /* Clear where it's used as src. */ |
||
2757 | for (int r = 0; r < this->next_temp; r++) { |
||
2758 | for (int c = 0; c < 4; c++) { |
||
2759 | if (!acp[4 * r + c]) |
||
2760 | continue; |
||
2761 | |||
2762 | int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); |
||
2763 | |||
2764 | if (acp[4 * r + c]->src[0].file == inst->dst.file && |
||
2765 | acp[4 * r + c]->src[0].index == inst->dst.index && |
||
2766 | inst->dst.writemask & (1 << src_chan)) |
||
2767 | { |
||
2768 | acp[4 * r + c] = NULL; |
||
2769 | } |
||
2770 | } |
||
2771 | } |
||
2772 | } |
||
2773 | break; |
||
2774 | } |
||
2775 | |||
2776 | /* If this is a copy, add it to the ACP. */ |
||
2777 | if (inst->op == OPCODE_MOV && |
||
2778 | inst->dst.file == PROGRAM_TEMPORARY && |
||
2779 | !(inst->dst.file == inst->src[0].file && |
||
2780 | inst->dst.index == inst->src[0].index) && |
||
2781 | !inst->dst.reladdr && |
||
2782 | !inst->saturate && |
||
2783 | !inst->src[0].reladdr && |
||
2784 | !inst->src[0].negate) { |
||
2785 | for (int i = 0; i < 4; i++) { |
||
2786 | if (inst->dst.writemask & (1 << i)) { |
||
2787 | acp[4 * inst->dst.index + i] = inst; |
||
2788 | acp_level[4 * inst->dst.index + i] = level; |
||
2789 | } |
||
2790 | } |
||
2791 | } |
||
2792 | } |
||
2793 | |||
2794 | ralloc_free(acp_level); |
||
2795 | ralloc_free(acp); |
||
2796 | } |
||
2797 | |||
2798 | |||
2799 | /** |
||
2800 | * Convert a shader's GLSL IR into a Mesa gl_program. |
||
2801 | */ |
||
2802 | static struct gl_program * |
||
2803 | get_mesa_program(struct gl_context *ctx, |
||
2804 | struct gl_shader_program *shader_program, |
||
2805 | struct gl_shader *shader) |
||
2806 | { |
||
2807 | ir_to_mesa_visitor v; |
||
2808 | struct prog_instruction *mesa_instructions, *mesa_inst; |
||
2809 | ir_instruction **mesa_instruction_annotation; |
||
2810 | int i; |
||
2811 | struct gl_program *prog; |
||
2812 | GLenum target; |
||
2813 | const char *target_string = _mesa_glsl_shader_target_name(shader->Type); |
||
2814 | struct gl_shader_compiler_options *options = |
||
2815 | &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; |
||
2816 | |||
2817 | switch (shader->Type) { |
||
2818 | case GL_VERTEX_SHADER: |
||
2819 | target = GL_VERTEX_PROGRAM_ARB; |
||
2820 | break; |
||
2821 | case GL_FRAGMENT_SHADER: |
||
2822 | target = GL_FRAGMENT_PROGRAM_ARB; |
||
2823 | break; |
||
2824 | case GL_GEOMETRY_SHADER: |
||
2825 | target = GL_GEOMETRY_PROGRAM_NV; |
||
2826 | break; |
||
2827 | default: |
||
2828 | assert(!"should not be reached"); |
||
2829 | return NULL; |
||
2830 | } |
||
2831 | |||
2832 | validate_ir_tree(shader->ir); |
||
2833 | |||
2834 | prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); |
||
2835 | if (!prog) |
||
2836 | return NULL; |
||
2837 | prog->Parameters = _mesa_new_parameter_list(); |
||
2838 | v.ctx = ctx; |
||
2839 | v.prog = prog; |
||
2840 | v.shader_program = shader_program; |
||
2841 | v.options = options; |
||
2842 | |||
2843 | _mesa_generate_parameters_list_for_uniforms(shader_program, shader, |
||
2844 | prog->Parameters); |
||
2845 | |||
2846 | /* Emit Mesa IR for main(). */ |
||
2847 | visit_exec_list(shader->ir, &v); |
||
2848 | v.emit(NULL, OPCODE_END); |
||
2849 | |||
2850 | prog->NumTemporaries = v.next_temp; |
||
2851 | |||
2852 | int num_instructions = 0; |
||
2853 | foreach_iter(exec_list_iterator, iter, v.instructions) { |
||
2854 | num_instructions++; |
||
2855 | } |
||
2856 | |||
2857 | mesa_instructions = |
||
2858 | (struct prog_instruction *)calloc(num_instructions, |
||
2859 | sizeof(*mesa_instructions)); |
||
2860 | mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, |
||
2861 | num_instructions); |
||
2862 | |||
2863 | v.copy_propagate(); |
||
2864 | |||
2865 | /* Convert ir_mesa_instructions into prog_instructions. |
||
2866 | */ |
||
2867 | mesa_inst = mesa_instructions; |
||
2868 | i = 0; |
||
2869 | foreach_iter(exec_list_iterator, iter, v.instructions) { |
||
2870 | const ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get(); |
||
2871 | |||
2872 | mesa_inst->Opcode = inst->op; |
||
2873 | mesa_inst->CondUpdate = inst->cond_update; |
||
2874 | if (inst->saturate) |
||
2875 | mesa_inst->SaturateMode = SATURATE_ZERO_ONE; |
||
2876 | mesa_inst->DstReg.File = inst->dst.file; |
||
2877 | mesa_inst->DstReg.Index = inst->dst.index; |
||
2878 | mesa_inst->DstReg.CondMask = inst->dst.cond_mask; |
||
2879 | mesa_inst->DstReg.WriteMask = inst->dst.writemask; |
||
2880 | mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; |
||
2881 | mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); |
||
2882 | mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); |
||
2883 | mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); |
||
2884 | mesa_inst->TexSrcUnit = inst->sampler; |
||
2885 | mesa_inst->TexSrcTarget = inst->tex_target; |
||
2886 | mesa_inst->TexShadow = inst->tex_shadow; |
||
2887 | mesa_instruction_annotation[i] = inst->ir; |
||
2888 | |||
2889 | /* Set IndirectRegisterFiles. */ |
||
2890 | if (mesa_inst->DstReg.RelAddr) |
||
2891 | prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; |
||
2892 | |||
2893 | /* Update program's bitmask of indirectly accessed register files */ |
||
2894 | for (unsigned src = 0; src < 3; src++) |
||
2895 | if (mesa_inst->SrcReg[src].RelAddr) |
||
2896 | prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; |
||
2897 | |||
2898 | switch (mesa_inst->Opcode) { |
||
2899 | case OPCODE_IF: |
||
2900 | if (options->MaxIfDepth == 0) { |
||
2901 | linker_warning(shader_program, |
||
2902 | "Couldn't flatten if-statement. " |
||
2903 | "This will likely result in software " |
||
2904 | "rasterization.\n"); |
||
2905 | } |
||
2906 | break; |
||
2907 | case OPCODE_BGNLOOP: |
||
2908 | if (options->EmitNoLoops) { |
||
2909 | linker_warning(shader_program, |
||
2910 | "Couldn't unroll loop. " |
||
2911 | "This will likely result in software " |
||
2912 | "rasterization.\n"); |
||
2913 | } |
||
2914 | break; |
||
2915 | case OPCODE_CONT: |
||
2916 | if (options->EmitNoCont) { |
||
2917 | linker_warning(shader_program, |
||
2918 | "Couldn't lower continue-statement. " |
||
2919 | "This will likely result in software " |
||
2920 | "rasterization.\n"); |
||
2921 | } |
||
2922 | break; |
||
2923 | case OPCODE_ARL: |
||
2924 | prog->NumAddressRegs = 1; |
||
2925 | break; |
||
2926 | default: |
||
2927 | break; |
||
2928 | } |
||
2929 | |||
2930 | mesa_inst++; |
||
2931 | i++; |
||
2932 | |||
2933 | if (!shader_program->LinkStatus) |
||
2934 | break; |
||
2935 | } |
||
2936 | |||
2937 | if (!shader_program->LinkStatus) { |
||
2938 | goto fail_exit; |
||
2939 | } |
||
2940 | |||
2941 | set_branchtargets(&v, mesa_instructions, num_instructions); |
||
2942 | |||
2943 | if (ctx->Shader.Flags & GLSL_DUMP) { |
||
2944 | printf("\n"); |
||
2945 | printf("GLSL IR for linked %s program %d:\n", target_string, |
||
2946 | shader_program->Name); |
||
2947 | _mesa_print_ir(shader->ir, NULL); |
||
2948 | printf("\n"); |
||
2949 | printf("\n"); |
||
2950 | printf("Mesa IR for linked %s program %d:\n", target_string, |
||
2951 | shader_program->Name); |
||
2952 | print_program(mesa_instructions, mesa_instruction_annotation, |
||
2953 | num_instructions); |
||
2954 | } |
||
2955 | |||
2956 | prog->Instructions = mesa_instructions; |
||
2957 | prog->NumInstructions = num_instructions; |
||
2958 | |||
2959 | /* Setting this to NULL prevents a possible double free in the fail_exit |
||
2960 | * path (far below). |
||
2961 | */ |
||
2962 | mesa_instructions = NULL; |
||
2963 | |||
2964 | do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); |
||
2965 | |||
2966 | prog->SamplersUsed = shader->active_samplers; |
||
2967 | prog->ShadowSamplers = shader->shadow_samplers; |
||
2968 | _mesa_update_shader_textures_used(shader_program, prog); |
||
2969 | |||
2970 | /* Set the gl_FragDepth layout. */ |
||
2971 | if (target == GL_FRAGMENT_PROGRAM_ARB) { |
||
2972 | struct gl_fragment_program *fp = (struct gl_fragment_program *)prog; |
||
2973 | fp->FragDepthLayout = shader_program->FragDepthLayout; |
||
2974 | } |
||
2975 | |||
2976 | _mesa_reference_program(ctx, &shader->Program, prog); |
||
2977 | |||
2978 | if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { |
||
2979 | _mesa_optimize_program(ctx, prog); |
||
2980 | } |
||
2981 | |||
2982 | /* This has to be done last. Any operation that can cause |
||
2983 | * prog->ParameterValues to get reallocated (e.g., anything that adds a |
||
2984 | * program constant) has to happen before creating this linkage. |
||
2985 | */ |
||
2986 | _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); |
||
2987 | if (!shader_program->LinkStatus) { |
||
2988 | goto fail_exit; |
||
2989 | } |
||
2990 | |||
2991 | return prog; |
||
2992 | |||
2993 | fail_exit: |
||
2994 | free(mesa_instructions); |
||
2995 | _mesa_reference_program(ctx, &shader->Program, NULL); |
||
2996 | return NULL; |
||
2997 | } |
||
2998 | |||
2999 | extern "C" { |
||
3000 | |||
3001 | /** |
||
3002 | * Link a shader. |
||
3003 | * Called via ctx->Driver.LinkShader() |
||
3004 | * This actually involves converting GLSL IR into Mesa gl_programs with |
||
3005 | * code lowering and other optimizations. |
||
3006 | */ |
||
3007 | GLboolean |
||
3008 | _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) |
||
3009 | { |
||
3010 | assert(prog->LinkStatus); |
||
3011 | |||
3012 | for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { |
||
3013 | if (prog->_LinkedShaders[i] == NULL) |
||
3014 | continue; |
||
3015 | |||
3016 | bool progress; |
||
3017 | exec_list *ir = prog->_LinkedShaders[i]->ir; |
||
3018 | const struct gl_shader_compiler_options *options = |
||
3019 | &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; |
||
3020 | |||
3021 | do { |
||
3022 | progress = false; |
||
3023 | |||
3024 | /* Lowering */ |
||
3025 | do_mat_op_to_vec(ir); |
||
3026 | lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 |
||
3027 | | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP |
||
3028 | | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); |
||
3029 | |||
3030 | progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; |
||
3031 | |||
3032 | progress = do_common_optimization(ir, true, true, |
||
3033 | options->MaxUnrollIterations, |
||
3034 | options) |
||
3035 | || progress; |
||
3036 | |||
3037 | progress = lower_quadop_vector(ir, true) || progress; |
||
3038 | |||
3039 | if (options->MaxIfDepth == 0) |
||
3040 | progress = lower_discard(ir) || progress; |
||
3041 | |||
3042 | progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; |
||
3043 | |||
3044 | if (options->EmitNoNoise) |
||
3045 | progress = lower_noise(ir) || progress; |
||
3046 | |||
3047 | /* If there are forms of indirect addressing that the driver |
||
3048 | * cannot handle, perform the lowering pass. |
||
3049 | */ |
||
3050 | if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput |
||
3051 | || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) |
||
3052 | progress = |
||
3053 | lower_variable_index_to_cond_assign(ir, |
||
3054 | options->EmitNoIndirectInput, |
||
3055 | options->EmitNoIndirectOutput, |
||
3056 | options->EmitNoIndirectTemp, |
||
3057 | options->EmitNoIndirectUniform) |
||
3058 | || progress; |
||
3059 | |||
3060 | progress = do_vec_index_to_cond_assign(ir) || progress; |
||
3061 | progress = lower_vector_insert(ir, true) || progress; |
||
3062 | } while (progress); |
||
3063 | |||
3064 | validate_ir_tree(ir); |
||
3065 | } |
||
3066 | |||
3067 | for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { |
||
3068 | struct gl_program *linked_prog; |
||
3069 | |||
3070 | if (prog->_LinkedShaders[i] == NULL) |
||
3071 | continue; |
||
3072 | |||
3073 | linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); |
||
3074 | |||
3075 | if (linked_prog) { |
||
3076 | if (i == MESA_SHADER_VERTEX) { |
||
3077 | ((struct gl_vertex_program *)linked_prog)->UsesClipDistance |
||
3078 | = prog->Vert.UsesClipDistance; |
||
3079 | } |
||
3080 | |||
3081 | _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, |
||
3082 | linked_prog); |
||
3083 | if (!ctx->Driver.ProgramStringNotify(ctx, |
||
3084 | _mesa_program_index_to_target(i), |
||
3085 | linked_prog)) { |
||
3086 | return GL_FALSE; |
||
3087 | } |
||
3088 | } |
||
3089 | |||
3090 | _mesa_reference_program(ctx, &linked_prog, NULL); |
||
3091 | } |
||
3092 | |||
3093 | return prog->LinkStatus; |
||
3094 | } |
||
3095 | |||
3096 | /** |
||
3097 | * Link a GLSL shader program. Called via glLinkProgram(). |
||
3098 | */ |
||
3099 | void |
||
3100 | _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) |
||
3101 | { |
||
3102 | unsigned int i; |
||
3103 | |||
3104 | _mesa_clear_shader_program_data(ctx, prog); |
||
3105 | |||
3106 | prog->LinkStatus = GL_TRUE; |
||
3107 | |||
3108 | for (i = 0; i < prog->NumShaders; i++) { |
||
3109 | if (!prog->Shaders[i]->CompileStatus) { |
||
3110 | linker_error(prog, "linking with uncompiled shader"); |
||
3111 | prog->LinkStatus = GL_FALSE; |
||
3112 | } |
||
3113 | } |
||
3114 | |||
3115 | if (prog->LinkStatus) { |
||
3116 | link_shaders(ctx, prog); |
||
3117 | } |
||
3118 | |||
3119 | if (prog->LinkStatus) { |
||
3120 | if (!ctx->Driver.LinkShader(ctx, prog)) { |
||
3121 | prog->LinkStatus = GL_FALSE; |
||
3122 | } |
||
3123 | } |
||
3124 | |||
3125 | if (ctx->Shader.Flags & GLSL_DUMP) { |
||
3126 | if (!prog->LinkStatus) { |
||
3127 | printf("GLSL shader program %d failed to link\n", prog->Name); |
||
3128 | } |
||
3129 | |||
3130 | if (prog->InfoLog && prog->InfoLog[0] != 0) { |
||
3131 | printf("GLSL shader program %d info log:\n", prog->Name); |
||
3132 | printf("%s\n", prog->InfoLog); |
||
3133 | } |
||
3134 | } |
||
3135 | } |
||
3136 | |||
3137 | } /* extern "C" */>>>><>>><>><>>><>>>><>>>>>> |