Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright © 2010 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | /** @file brw_fs_visitor.cpp |
||
25 | * |
||
26 | * This file supports generating the FS LIR from the GLSL IR. The LIR |
||
27 | * makes it easier to do backend-specific optimizations than doing so |
||
28 | * in the GLSL IR or in the native code. |
||
29 | */ |
||
30 | extern "C" { |
||
31 | |||
32 | #include |
||
33 | |||
34 | #include "main/macros.h" |
||
35 | #include "main/shaderobj.h" |
||
36 | #include "main/uniforms.h" |
||
37 | #include "program/prog_parameter.h" |
||
38 | #include "program/prog_print.h" |
||
39 | #include "program/prog_optimize.h" |
||
40 | #include "program/register_allocate.h" |
||
41 | #include "program/sampler.h" |
||
42 | #include "program/hash_table.h" |
||
43 | #include "brw_context.h" |
||
44 | #include "brw_eu.h" |
||
45 | #include "brw_wm.h" |
||
46 | } |
||
47 | #include "brw_fs.h" |
||
48 | #include "glsl/glsl_types.h" |
||
49 | #include "glsl/ir_optimization.h" |
||
50 | |||
51 | void |
||
52 | fs_visitor::visit(ir_variable *ir) |
||
53 | { |
||
54 | fs_reg *reg = NULL; |
||
55 | |||
56 | if (variable_storage(ir)) |
||
57 | return; |
||
58 | |||
59 | if (ir->mode == ir_var_shader_in) { |
||
60 | if (!strcmp(ir->name, "gl_FragCoord")) { |
||
61 | reg = emit_fragcoord_interpolation(ir); |
||
62 | } else if (!strcmp(ir->name, "gl_FrontFacing")) { |
||
63 | reg = emit_frontfacing_interpolation(ir); |
||
64 | } else { |
||
65 | reg = emit_general_interpolation(ir); |
||
66 | } |
||
67 | assert(reg); |
||
68 | hash_table_insert(this->variable_ht, reg, ir); |
||
69 | return; |
||
70 | } else if (ir->mode == ir_var_shader_out) { |
||
71 | reg = new(this->mem_ctx) fs_reg(this, ir->type); |
||
72 | |||
73 | if (ir->index > 0) { |
||
74 | assert(ir->location == FRAG_RESULT_DATA0); |
||
75 | assert(ir->index == 1); |
||
76 | this->dual_src_output = *reg; |
||
77 | } else if (ir->location == FRAG_RESULT_COLOR) { |
||
78 | /* Writing gl_FragColor outputs to all color regions. */ |
||
79 | for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) { |
||
80 | this->outputs[i] = *reg; |
||
81 | this->output_components[i] = 4; |
||
82 | } |
||
83 | } else if (ir->location == FRAG_RESULT_DEPTH) { |
||
84 | this->frag_depth = *reg; |
||
85 | } else { |
||
86 | /* gl_FragData or a user-defined FS output */ |
||
87 | assert(ir->location >= FRAG_RESULT_DATA0 && |
||
88 | ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS); |
||
89 | |||
90 | int vector_elements = |
||
91 | ir->type->is_array() ? ir->type->fields.array->vector_elements |
||
92 | : ir->type->vector_elements; |
||
93 | |||
94 | /* General color output. */ |
||
95 | for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) { |
||
96 | int output = ir->location - FRAG_RESULT_DATA0 + i; |
||
97 | this->outputs[output] = *reg; |
||
98 | this->outputs[output].reg_offset += vector_elements * i; |
||
99 | this->output_components[output] = vector_elements; |
||
100 | } |
||
101 | } |
||
102 | } else if (ir->mode == ir_var_uniform) { |
||
103 | int param_index = c->prog_data.nr_params; |
||
104 | |||
105 | /* Thanks to the lower_ubo_reference pass, we will see only |
||
106 | * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO |
||
107 | * variables, so no need for them to be in variable_ht. |
||
108 | */ |
||
109 | if (ir->is_in_uniform_block()) |
||
110 | return; |
||
111 | |||
112 | if (dispatch_width == 16) { |
||
113 | if (!variable_storage(ir)) { |
||
114 | fail("Failed to find uniform '%s' in 16-wide\n", ir->name); |
||
115 | } |
||
116 | return; |
||
117 | } |
||
118 | |||
119 | param_size[param_index] = type_size(ir->type); |
||
120 | if (!strncmp(ir->name, "gl_", 3)) { |
||
121 | setup_builtin_uniform_values(ir); |
||
122 | } else { |
||
123 | setup_uniform_values(ir); |
||
124 | } |
||
125 | |||
126 | reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); |
||
127 | reg->type = brw_type_for_base_type(ir->type); |
||
128 | } |
||
129 | |||
130 | if (!reg) |
||
131 | reg = new(this->mem_ctx) fs_reg(this, ir->type); |
||
132 | |||
133 | hash_table_insert(this->variable_ht, reg, ir); |
||
134 | } |
||
135 | |||
136 | void |
||
137 | fs_visitor::visit(ir_dereference_variable *ir) |
||
138 | { |
||
139 | fs_reg *reg = variable_storage(ir->var); |
||
140 | this->result = *reg; |
||
141 | } |
||
142 | |||
143 | void |
||
144 | fs_visitor::visit(ir_dereference_record *ir) |
||
145 | { |
||
146 | const glsl_type *struct_type = ir->record->type; |
||
147 | |||
148 | ir->record->accept(this); |
||
149 | |||
150 | unsigned int offset = 0; |
||
151 | for (unsigned int i = 0; i < struct_type->length; i++) { |
||
152 | if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) |
||
153 | break; |
||
154 | offset += type_size(struct_type->fields.structure[i].type); |
||
155 | } |
||
156 | this->result.reg_offset += offset; |
||
157 | this->result.type = brw_type_for_base_type(ir->type); |
||
158 | } |
||
159 | |||
160 | void |
||
161 | fs_visitor::visit(ir_dereference_array *ir) |
||
162 | { |
||
163 | ir_constant *constant_index; |
||
164 | fs_reg src; |
||
165 | int element_size = type_size(ir->type); |
||
166 | |||
167 | constant_index = ir->array_index->as_constant(); |
||
168 | |||
169 | ir->array->accept(this); |
||
170 | src = this->result; |
||
171 | src.type = brw_type_for_base_type(ir->type); |
||
172 | |||
173 | if (constant_index) { |
||
174 | assert(src.file == UNIFORM || src.file == GRF); |
||
175 | src.reg_offset += constant_index->value.i[0] * element_size; |
||
176 | } else { |
||
177 | /* Variable index array dereference. We attach the variable index |
||
178 | * component to the reg as a pointer to a register containing the |
||
179 | * offset. Currently only uniform arrays are supported in this patch, |
||
180 | * and that reladdr pointer is resolved by |
||
181 | * move_uniform_array_access_to_pull_constants(). All other array types |
||
182 | * are lowered by lower_variable_index_to_cond_assign(). |
||
183 | */ |
||
184 | ir->array_index->accept(this); |
||
185 | |||
186 | fs_reg index_reg; |
||
187 | index_reg = fs_reg(this, glsl_type::int_type); |
||
188 | emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size)); |
||
189 | |||
190 | if (src.reladdr) { |
||
191 | emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg); |
||
192 | } |
||
193 | |||
194 | src.reladdr = ralloc(mem_ctx, fs_reg); |
||
195 | memcpy(src.reladdr, &index_reg, sizeof(index_reg)); |
||
196 | } |
||
197 | this->result = src; |
||
198 | } |
||
199 | |||
200 | void |
||
201 | fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a) |
||
202 | { |
||
203 | if (brw->gen < 6 || |
||
204 | !x.is_valid_3src() || |
||
205 | !y.is_valid_3src() || |
||
206 | !a.is_valid_3src()) { |
||
207 | /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ |
||
208 | fs_reg y_times_a = fs_reg(this, glsl_type::float_type); |
||
209 | fs_reg one_minus_a = fs_reg(this, glsl_type::float_type); |
||
210 | fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type); |
||
211 | |||
212 | emit(MUL(y_times_a, y, a)); |
||
213 | |||
214 | a.negate = !a.negate; |
||
215 | emit(ADD(one_minus_a, a, fs_reg(1.0f))); |
||
216 | emit(MUL(x_times_one_minus_a, x, one_minus_a)); |
||
217 | |||
218 | emit(ADD(dst, x_times_one_minus_a, y_times_a)); |
||
219 | } else { |
||
220 | /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so |
||
221 | * we need to reorder the operands. |
||
222 | */ |
||
223 | emit(LRP(dst, a, y, x)); |
||
224 | } |
||
225 | } |
||
226 | |||
227 | void |
||
228 | fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst, |
||
229 | fs_reg src0, fs_reg src1) |
||
230 | { |
||
231 | fs_inst *inst; |
||
232 | |||
233 | if (brw->gen >= 6) { |
||
234 | inst = emit(BRW_OPCODE_SEL, dst, src0, src1); |
||
235 | inst->conditional_mod = conditionalmod; |
||
236 | } else { |
||
237 | emit(CMP(reg_null_d, src0, src1, conditionalmod)); |
||
238 | |||
239 | inst = emit(BRW_OPCODE_SEL, dst, src0, src1); |
||
240 | inst->predicate = BRW_PREDICATE_NORMAL; |
||
241 | } |
||
242 | } |
||
243 | |||
244 | /* Instruction selection: Produce a MOV.sat instead of |
||
245 | * MIN(MAX(val, 0), 1) when possible. |
||
246 | */ |
||
247 | bool |
||
248 | fs_visitor::try_emit_saturate(ir_expression *ir) |
||
249 | { |
||
250 | ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); |
||
251 | |||
252 | if (!sat_val) |
||
253 | return false; |
||
254 | |||
255 | fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail(); |
||
256 | |||
257 | sat_val->accept(this); |
||
258 | fs_reg src = this->result; |
||
259 | |||
260 | fs_inst *last_inst = (fs_inst *) this->instructions.get_tail(); |
||
261 | |||
262 | /* If the last instruction from our accept() didn't generate our |
||
263 | * src, generate a saturated MOV |
||
264 | */ |
||
265 | fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src); |
||
266 | if (!modify || modify->regs_written != 1) { |
||
267 | this->result = fs_reg(this, ir->type); |
||
268 | fs_inst *inst = emit(MOV(this->result, src)); |
||
269 | inst->saturate = true; |
||
270 | } else { |
||
271 | modify->saturate = true; |
||
272 | this->result = src; |
||
273 | } |
||
274 | |||
275 | |||
276 | return true; |
||
277 | } |
||
278 | |||
279 | bool |
||
280 | fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg) |
||
281 | { |
||
282 | /* 3-src instructions were introduced in gen6. */ |
||
283 | if (brw->gen < 6) |
||
284 | return false; |
||
285 | |||
286 | /* MAD can only handle floating-point data. */ |
||
287 | if (ir->type != glsl_type::float_type) |
||
288 | return false; |
||
289 | |||
290 | ir_rvalue *nonmul = ir->operands[1 - mul_arg]; |
||
291 | ir_expression *mul = ir->operands[mul_arg]->as_expression(); |
||
292 | |||
293 | if (!mul || mul->operation != ir_binop_mul) |
||
294 | return false; |
||
295 | |||
296 | if (nonmul->as_constant() || |
||
297 | mul->operands[0]->as_constant() || |
||
298 | mul->operands[1]->as_constant()) |
||
299 | return false; |
||
300 | |||
301 | nonmul->accept(this); |
||
302 | fs_reg src0 = this->result; |
||
303 | |||
304 | mul->operands[0]->accept(this); |
||
305 | fs_reg src1 = this->result; |
||
306 | |||
307 | mul->operands[1]->accept(this); |
||
308 | fs_reg src2 = this->result; |
||
309 | |||
310 | this->result = fs_reg(this, ir->type); |
||
311 | emit(BRW_OPCODE_MAD, this->result, src0, src1, src2); |
||
312 | |||
313 | return true; |
||
314 | } |
||
315 | |||
316 | void |
||
317 | fs_visitor::visit(ir_expression *ir) |
||
318 | { |
||
319 | unsigned int operand; |
||
320 | fs_reg op[3], temp; |
||
321 | fs_inst *inst; |
||
322 | |||
323 | assert(ir->get_num_operands() <= 3); |
||
324 | |||
325 | if (try_emit_saturate(ir)) |
||
326 | return; |
||
327 | if (ir->operation == ir_binop_add) { |
||
328 | if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1)) |
||
329 | return; |
||
330 | } |
||
331 | |||
332 | for (operand = 0; operand < ir->get_num_operands(); operand++) { |
||
333 | ir->operands[operand]->accept(this); |
||
334 | if (this->result.file == BAD_FILE) { |
||
335 | fail("Failed to get tree for expression operand:\n"); |
||
336 | ir->operands[operand]->print(); |
||
337 | printf("\n"); |
||
338 | } |
||
339 | op[operand] = this->result; |
||
340 | |||
341 | /* Matrix expression operands should have been broken down to vector |
||
342 | * operations already. |
||
343 | */ |
||
344 | assert(!ir->operands[operand]->type->is_matrix()); |
||
345 | /* And then those vector operands should have been broken down to scalar. |
||
346 | */ |
||
347 | assert(!ir->operands[operand]->type->is_vector()); |
||
348 | } |
||
349 | |||
350 | /* Storage for our result. If our result goes into an assignment, it will |
||
351 | * just get copy-propagated out, so no worries. |
||
352 | */ |
||
353 | this->result = fs_reg(this, ir->type); |
||
354 | |||
355 | switch (ir->operation) { |
||
356 | case ir_unop_logic_not: |
||
357 | /* Note that BRW_OPCODE_NOT is not appropriate here, since it is |
||
358 | * ones complement of the whole register, not just bit 0. |
||
359 | */ |
||
360 | emit(XOR(this->result, op[0], fs_reg(1))); |
||
361 | break; |
||
362 | case ir_unop_neg: |
||
363 | op[0].negate = !op[0].negate; |
||
364 | emit(MOV(this->result, op[0])); |
||
365 | break; |
||
366 | case ir_unop_abs: |
||
367 | op[0].abs = true; |
||
368 | op[0].negate = false; |
||
369 | emit(MOV(this->result, op[0])); |
||
370 | break; |
||
371 | case ir_unop_sign: |
||
372 | temp = fs_reg(this, ir->type); |
||
373 | |||
374 | emit(MOV(this->result, fs_reg(0.0f))); |
||
375 | |||
376 | emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G)); |
||
377 | inst = emit(MOV(this->result, fs_reg(1.0f))); |
||
378 | inst->predicate = BRW_PREDICATE_NORMAL; |
||
379 | |||
380 | emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L)); |
||
381 | inst = emit(MOV(this->result, fs_reg(-1.0f))); |
||
382 | inst->predicate = BRW_PREDICATE_NORMAL; |
||
383 | |||
384 | break; |
||
385 | case ir_unop_rcp: |
||
386 | emit_math(SHADER_OPCODE_RCP, this->result, op[0]); |
||
387 | break; |
||
388 | |||
389 | case ir_unop_exp2: |
||
390 | emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); |
||
391 | break; |
||
392 | case ir_unop_log2: |
||
393 | emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); |
||
394 | break; |
||
395 | case ir_unop_exp: |
||
396 | case ir_unop_log: |
||
397 | assert(!"not reached: should be handled by ir_explog_to_explog2"); |
||
398 | break; |
||
399 | case ir_unop_sin: |
||
400 | case ir_unop_sin_reduced: |
||
401 | emit_math(SHADER_OPCODE_SIN, this->result, op[0]); |
||
402 | break; |
||
403 | case ir_unop_cos: |
||
404 | case ir_unop_cos_reduced: |
||
405 | emit_math(SHADER_OPCODE_COS, this->result, op[0]); |
||
406 | break; |
||
407 | |||
408 | case ir_unop_dFdx: |
||
409 | emit(FS_OPCODE_DDX, this->result, op[0]); |
||
410 | break; |
||
411 | case ir_unop_dFdy: |
||
412 | emit(FS_OPCODE_DDY, this->result, op[0]); |
||
413 | break; |
||
414 | |||
415 | case ir_binop_add: |
||
416 | emit(ADD(this->result, op[0], op[1])); |
||
417 | break; |
||
418 | case ir_binop_sub: |
||
419 | assert(!"not reached: should be handled by ir_sub_to_add_neg"); |
||
420 | break; |
||
421 | |||
422 | case ir_binop_mul: |
||
423 | if (ir->type->is_integer()) { |
||
424 | /* For integer multiplication, the MUL uses the low 16 bits |
||
425 | * of one of the operands (src0 on gen6, src1 on gen7). The |
||
426 | * MACH accumulates in the contribution of the upper 16 bits |
||
427 | * of that operand. |
||
428 | * |
||
429 | * FINISHME: Emit just the MUL if we know an operand is small |
||
430 | * enough. |
||
431 | */ |
||
432 | if (brw->gen >= 7 && dispatch_width == 16) |
||
433 | fail("16-wide explicit accumulator operands unsupported\n"); |
||
434 | |||
435 | struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); |
||
436 | |||
437 | emit(MUL(acc, op[0], op[1])); |
||
438 | emit(MACH(reg_null_d, op[0], op[1])); |
||
439 | emit(MOV(this->result, fs_reg(acc))); |
||
440 | } else { |
||
441 | emit(MUL(this->result, op[0], op[1])); |
||
442 | } |
||
443 | break; |
||
444 | case ir_binop_div: |
||
445 | /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ |
||
446 | assert(ir->type->is_integer()); |
||
447 | emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); |
||
448 | break; |
||
449 | case ir_binop_mod: |
||
450 | /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ |
||
451 | assert(ir->type->is_integer()); |
||
452 | emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); |
||
453 | break; |
||
454 | |||
455 | case ir_binop_less: |
||
456 | case ir_binop_greater: |
||
457 | case ir_binop_lequal: |
||
458 | case ir_binop_gequal: |
||
459 | case ir_binop_equal: |
||
460 | case ir_binop_all_equal: |
||
461 | case ir_binop_nequal: |
||
462 | case ir_binop_any_nequal: |
||
463 | resolve_bool_comparison(ir->operands[0], &op[0]); |
||
464 | resolve_bool_comparison(ir->operands[1], &op[1]); |
||
465 | |||
466 | emit(CMP(this->result, op[0], op[1], |
||
467 | brw_conditional_for_comparison(ir->operation))); |
||
468 | break; |
||
469 | |||
470 | case ir_binop_logic_xor: |
||
471 | emit(XOR(this->result, op[0], op[1])); |
||
472 | break; |
||
473 | |||
474 | case ir_binop_logic_or: |
||
475 | emit(OR(this->result, op[0], op[1])); |
||
476 | break; |
||
477 | |||
478 | case ir_binop_logic_and: |
||
479 | emit(AND(this->result, op[0], op[1])); |
||
480 | break; |
||
481 | |||
482 | case ir_binop_dot: |
||
483 | case ir_unop_any: |
||
484 | assert(!"not reached: should be handled by brw_fs_channel_expressions"); |
||
485 | break; |
||
486 | |||
487 | case ir_unop_noise: |
||
488 | assert(!"not reached: should be handled by lower_noise"); |
||
489 | break; |
||
490 | |||
491 | case ir_quadop_vector: |
||
492 | assert(!"not reached: should be handled by lower_quadop_vector"); |
||
493 | break; |
||
494 | |||
495 | case ir_binop_vector_extract: |
||
496 | assert(!"not reached: should be handled by lower_vec_index_to_cond_assign()"); |
||
497 | break; |
||
498 | |||
499 | case ir_triop_vector_insert: |
||
500 | assert(!"not reached: should be handled by lower_vector_insert()"); |
||
501 | break; |
||
502 | |||
503 | case ir_unop_sqrt: |
||
504 | emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); |
||
505 | break; |
||
506 | |||
507 | case ir_unop_rsq: |
||
508 | emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); |
||
509 | break; |
||
510 | |||
511 | case ir_unop_bitcast_i2f: |
||
512 | case ir_unop_bitcast_u2f: |
||
513 | op[0].type = BRW_REGISTER_TYPE_F; |
||
514 | this->result = op[0]; |
||
515 | break; |
||
516 | case ir_unop_i2u: |
||
517 | case ir_unop_bitcast_f2u: |
||
518 | op[0].type = BRW_REGISTER_TYPE_UD; |
||
519 | this->result = op[0]; |
||
520 | break; |
||
521 | case ir_unop_u2i: |
||
522 | case ir_unop_bitcast_f2i: |
||
523 | op[0].type = BRW_REGISTER_TYPE_D; |
||
524 | this->result = op[0]; |
||
525 | break; |
||
526 | case ir_unop_i2f: |
||
527 | case ir_unop_u2f: |
||
528 | case ir_unop_f2i: |
||
529 | case ir_unop_f2u: |
||
530 | emit(MOV(this->result, op[0])); |
||
531 | break; |
||
532 | |||
533 | case ir_unop_b2i: |
||
534 | emit(AND(this->result, op[0], fs_reg(1))); |
||
535 | break; |
||
536 | case ir_unop_b2f: |
||
537 | temp = fs_reg(this, glsl_type::int_type); |
||
538 | emit(AND(temp, op[0], fs_reg(1))); |
||
539 | emit(MOV(this->result, temp)); |
||
540 | break; |
||
541 | |||
542 | case ir_unop_f2b: |
||
543 | emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); |
||
544 | break; |
||
545 | case ir_unop_i2b: |
||
546 | emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); |
||
547 | break; |
||
548 | |||
549 | case ir_unop_trunc: |
||
550 | emit(RNDZ(this->result, op[0])); |
||
551 | break; |
||
552 | case ir_unop_ceil: |
||
553 | op[0].negate = !op[0].negate; |
||
554 | emit(RNDD(this->result, op[0])); |
||
555 | this->result.negate = true; |
||
556 | break; |
||
557 | case ir_unop_floor: |
||
558 | emit(RNDD(this->result, op[0])); |
||
559 | break; |
||
560 | case ir_unop_fract: |
||
561 | emit(FRC(this->result, op[0])); |
||
562 | break; |
||
563 | case ir_unop_round_even: |
||
564 | emit(RNDE(this->result, op[0])); |
||
565 | break; |
||
566 | |||
567 | case ir_binop_min: |
||
568 | case ir_binop_max: |
||
569 | resolve_ud_negate(&op[0]); |
||
570 | resolve_ud_negate(&op[1]); |
||
571 | emit_minmax(ir->operation == ir_binop_min ? |
||
572 | BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE, |
||
573 | this->result, op[0], op[1]); |
||
574 | break; |
||
575 | case ir_unop_pack_snorm_2x16: |
||
576 | case ir_unop_pack_snorm_4x8: |
||
577 | case ir_unop_pack_unorm_2x16: |
||
578 | case ir_unop_pack_unorm_4x8: |
||
579 | case ir_unop_unpack_snorm_2x16: |
||
580 | case ir_unop_unpack_snorm_4x8: |
||
581 | case ir_unop_unpack_unorm_2x16: |
||
582 | case ir_unop_unpack_unorm_4x8: |
||
583 | case ir_unop_unpack_half_2x16: |
||
584 | case ir_unop_pack_half_2x16: |
||
585 | assert(!"not reached: should be handled by lower_packing_builtins"); |
||
586 | break; |
||
587 | case ir_unop_unpack_half_2x16_split_x: |
||
588 | emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]); |
||
589 | break; |
||
590 | case ir_unop_unpack_half_2x16_split_y: |
||
591 | emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]); |
||
592 | break; |
||
593 | case ir_binop_pow: |
||
594 | emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); |
||
595 | break; |
||
596 | |||
597 | case ir_unop_bitfield_reverse: |
||
598 | emit(BFREV(this->result, op[0])); |
||
599 | break; |
||
600 | case ir_unop_bit_count: |
||
601 | emit(CBIT(this->result, op[0])); |
||
602 | break; |
||
603 | case ir_unop_find_msb: |
||
604 | temp = fs_reg(this, glsl_type::uint_type); |
||
605 | emit(FBH(temp, op[0])); |
||
606 | |||
607 | /* FBH counts from the MSB side, while GLSL's findMSB() wants the count |
||
608 | * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then |
||
609 | * subtract the result from 31 to convert the MSB count into an LSB count. |
||
610 | */ |
||
611 | |||
612 | /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ |
||
613 | emit(MOV(this->result, temp)); |
||
614 | emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ)); |
||
615 | |||
616 | temp.negate = true; |
||
617 | inst = emit(ADD(this->result, temp, fs_reg(31))); |
||
618 | inst->predicate = BRW_PREDICATE_NORMAL; |
||
619 | break; |
||
620 | case ir_unop_find_lsb: |
||
621 | emit(FBL(this->result, op[0])); |
||
622 | break; |
||
623 | case ir_triop_bitfield_extract: |
||
624 | /* Note that the instruction's argument order is reversed from GLSL |
||
625 | * and the IR. |
||
626 | */ |
||
627 | emit(BFE(this->result, op[2], op[1], op[0])); |
||
628 | break; |
||
629 | case ir_binop_bfm: |
||
630 | emit(BFI1(this->result, op[0], op[1])); |
||
631 | break; |
||
632 | case ir_triop_bfi: |
||
633 | emit(BFI2(this->result, op[0], op[1], op[2])); |
||
634 | break; |
||
635 | case ir_quadop_bitfield_insert: |
||
636 | assert(!"not reached: should be handled by " |
||
637 | "lower_instructions::bitfield_insert_to_bfm_bfi"); |
||
638 | break; |
||
639 | |||
640 | case ir_unop_bit_not: |
||
641 | emit(NOT(this->result, op[0])); |
||
642 | break; |
||
643 | case ir_binop_bit_and: |
||
644 | emit(AND(this->result, op[0], op[1])); |
||
645 | break; |
||
646 | case ir_binop_bit_xor: |
||
647 | emit(XOR(this->result, op[0], op[1])); |
||
648 | break; |
||
649 | case ir_binop_bit_or: |
||
650 | emit(OR(this->result, op[0], op[1])); |
||
651 | break; |
||
652 | |||
653 | case ir_binop_lshift: |
||
654 | emit(SHL(this->result, op[0], op[1])); |
||
655 | break; |
||
656 | |||
657 | case ir_binop_rshift: |
||
658 | if (ir->type->base_type == GLSL_TYPE_INT) |
||
659 | emit(ASR(this->result, op[0], op[1])); |
||
660 | else |
||
661 | emit(SHR(this->result, op[0], op[1])); |
||
662 | break; |
||
663 | case ir_binop_pack_half_2x16_split: |
||
664 | emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]); |
||
665 | break; |
||
666 | case ir_binop_ubo_load: { |
||
667 | /* This IR node takes a constant uniform block and a constant or |
||
668 | * variable byte offset within the block and loads a vector from that. |
||
669 | */ |
||
670 | ir_constant *uniform_block = ir->operands[0]->as_constant(); |
||
671 | ir_constant *const_offset = ir->operands[1]->as_constant(); |
||
672 | fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_WM_UBO(uniform_block->value.u[0])); |
||
673 | if (const_offset) { |
||
674 | fs_reg packed_consts = fs_reg(this, glsl_type::float_type); |
||
675 | packed_consts.type = result.type; |
||
676 | |||
677 | fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); |
||
678 | emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, |
||
679 | packed_consts, surf_index, const_offset_reg)); |
||
680 | |||
681 | packed_consts.smear = const_offset->value.u[0] % 16 / 4; |
||
682 | for (int i = 0; i < ir->type->vector_elements; i++) { |
||
683 | /* UBO bools are any nonzero value. We consider bools to be |
||
684 | * values with the low bit set to 1. Convert them using CMP. |
||
685 | */ |
||
686 | if (ir->type->base_type == GLSL_TYPE_BOOL) { |
||
687 | emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ)); |
||
688 | } else { |
||
689 | emit(MOV(result, packed_consts)); |
||
690 | } |
||
691 | |||
692 | packed_consts.smear++; |
||
693 | result.reg_offset++; |
||
694 | |||
695 | /* The std140 packing rules don't allow vectors to cross 16-byte |
||
696 | * boundaries, and a reg is 32 bytes. |
||
697 | */ |
||
698 | assert(packed_consts.smear < 8); |
||
699 | } |
||
700 | } else { |
||
701 | /* Turn the byte offset into a dword offset. */ |
||
702 | fs_reg base_offset = fs_reg(this, glsl_type::int_type); |
||
703 | emit(SHR(base_offset, op[1], fs_reg(2))); |
||
704 | |||
705 | for (int i = 0; i < ir->type->vector_elements; i++) { |
||
706 | emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, |
||
707 | base_offset, i)); |
||
708 | |||
709 | if (ir->type->base_type == GLSL_TYPE_BOOL) |
||
710 | emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ)); |
||
711 | |||
712 | result.reg_offset++; |
||
713 | } |
||
714 | } |
||
715 | |||
716 | result.reg_offset = 0; |
||
717 | break; |
||
718 | } |
||
719 | |||
720 | case ir_triop_lrp: |
||
721 | emit_lrp(this->result, op[0], op[1], op[2]); |
||
722 | break; |
||
723 | } |
||
724 | } |
||
725 | |||
726 | void |
||
727 | fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, |
||
728 | const glsl_type *type, bool predicated) |
||
729 | { |
||
730 | switch (type->base_type) { |
||
731 | case GLSL_TYPE_FLOAT: |
||
732 | case GLSL_TYPE_UINT: |
||
733 | case GLSL_TYPE_INT: |
||
734 | case GLSL_TYPE_BOOL: |
||
735 | for (unsigned int i = 0; i < type->components(); i++) { |
||
736 | l.type = brw_type_for_base_type(type); |
||
737 | r.type = brw_type_for_base_type(type); |
||
738 | |||
739 | if (predicated || !l.equals(r)) { |
||
740 | fs_inst *inst = emit(MOV(l, r)); |
||
741 | inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE; |
||
742 | } |
||
743 | |||
744 | l.reg_offset++; |
||
745 | r.reg_offset++; |
||
746 | } |
||
747 | break; |
||
748 | case GLSL_TYPE_ARRAY: |
||
749 | for (unsigned int i = 0; i < type->length; i++) { |
||
750 | emit_assignment_writes(l, r, type->fields.array, predicated); |
||
751 | } |
||
752 | break; |
||
753 | |||
754 | case GLSL_TYPE_STRUCT: |
||
755 | for (unsigned int i = 0; i < type->length; i++) { |
||
756 | emit_assignment_writes(l, r, type->fields.structure[i].type, |
||
757 | predicated); |
||
758 | } |
||
759 | break; |
||
760 | |||
761 | case GLSL_TYPE_SAMPLER: |
||
762 | break; |
||
763 | |||
764 | case GLSL_TYPE_VOID: |
||
765 | case GLSL_TYPE_ERROR: |
||
766 | case GLSL_TYPE_INTERFACE: |
||
767 | assert(!"not reached"); |
||
768 | break; |
||
769 | } |
||
770 | } |
||
771 | |||
772 | /* If the RHS processing resulted in an instruction generating a |
||
773 | * temporary value, and it would be easy to rewrite the instruction to |
||
774 | * generate its result right into the LHS instead, do so. This ends |
||
775 | * up reliably removing instructions where it can be tricky to do so |
||
776 | * later without real UD chain information. |
||
777 | */ |
||
778 | bool |
||
779 | fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, |
||
780 | fs_reg dst, |
||
781 | fs_reg src, |
||
782 | fs_inst *pre_rhs_inst, |
||
783 | fs_inst *last_rhs_inst) |
||
784 | { |
||
785 | /* Only attempt if we're doing a direct assignment. */ |
||
786 | if (ir->condition || |
||
787 | !(ir->lhs->type->is_scalar() || |
||
788 | (ir->lhs->type->is_vector() && |
||
789 | ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) |
||
790 | return false; |
||
791 | |||
792 | /* Make sure the last instruction generated our source reg. */ |
||
793 | fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst, |
||
794 | last_rhs_inst, |
||
795 | src); |
||
796 | if (!modify) |
||
797 | return false; |
||
798 | |||
799 | /* If last_rhs_inst wrote a different number of components than our LHS, |
||
800 | * we can't safely rewrite it. |
||
801 | */ |
||
802 | if (virtual_grf_sizes[dst.reg] != modify->regs_written) |
||
803 | return false; |
||
804 | |||
805 | /* Success! Rewrite the instruction. */ |
||
806 | modify->dst = dst; |
||
807 | |||
808 | return true; |
||
809 | } |
||
810 | |||
811 | void |
||
812 | fs_visitor::visit(ir_assignment *ir) |
||
813 | { |
||
814 | fs_reg l, r; |
||
815 | fs_inst *inst; |
||
816 | |||
817 | /* FINISHME: arrays on the lhs */ |
||
818 | ir->lhs->accept(this); |
||
819 | l = this->result; |
||
820 | |||
821 | fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail(); |
||
822 | |||
823 | ir->rhs->accept(this); |
||
824 | r = this->result; |
||
825 | |||
826 | fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail(); |
||
827 | |||
828 | assert(l.file != BAD_FILE); |
||
829 | assert(r.file != BAD_FILE); |
||
830 | |||
831 | if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst)) |
||
832 | return; |
||
833 | |||
834 | if (ir->condition) { |
||
835 | emit_bool_to_cond_code(ir->condition); |
||
836 | } |
||
837 | |||
838 | if (ir->lhs->type->is_scalar() || |
||
839 | ir->lhs->type->is_vector()) { |
||
840 | for (int i = 0; i < ir->lhs->type->vector_elements; i++) { |
||
841 | if (ir->write_mask & (1 << i)) { |
||
842 | inst = emit(MOV(l, r)); |
||
843 | if (ir->condition) |
||
844 | inst->predicate = BRW_PREDICATE_NORMAL; |
||
845 | r.reg_offset++; |
||
846 | } |
||
847 | l.reg_offset++; |
||
848 | } |
||
849 | } else { |
||
850 | emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); |
||
851 | } |
||
852 | } |
||
853 | |||
854 | fs_inst * |
||
855 | fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, |
||
856 | fs_reg shadow_c, fs_reg lod, fs_reg dPdy) |
||
857 | { |
||
858 | int mlen; |
||
859 | int base_mrf = 1; |
||
860 | bool simd16 = false; |
||
861 | fs_reg orig_dst; |
||
862 | |||
863 | /* g0 header. */ |
||
864 | mlen = 1; |
||
865 | |||
866 | if (ir->shadow_comparitor) { |
||
867 | for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { |
||
868 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); |
||
869 | coordinate.reg_offset++; |
||
870 | } |
||
871 | |||
872 | /* gen4's SIMD8 sampler always has the slots for u,v,r present. |
||
873 | * the unused slots must be zeroed. |
||
874 | */ |
||
875 | for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { |
||
876 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); |
||
877 | } |
||
878 | mlen += 3; |
||
879 | |||
880 | if (ir->op == ir_tex) { |
||
881 | /* There's no plain shadow compare message, so we use shadow |
||
882 | * compare with a bias of 0.0. |
||
883 | */ |
||
884 | emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f))); |
||
885 | mlen++; |
||
886 | } else if (ir->op == ir_txb || ir->op == ir_txl) { |
||
887 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); |
||
888 | mlen++; |
||
889 | } else { |
||
890 | assert(!"Should not get here."); |
||
891 | } |
||
892 | |||
893 | emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); |
||
894 | mlen++; |
||
895 | } else if (ir->op == ir_tex) { |
||
896 | for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { |
||
897 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); |
||
898 | coordinate.reg_offset++; |
||
899 | } |
||
900 | /* zero the others. */ |
||
901 | for (int i = ir->coordinate->type->vector_elements; i<3; i++) { |
||
902 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f))); |
||
903 | } |
||
904 | /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ |
||
905 | mlen += 3; |
||
906 | } else if (ir->op == ir_txd) { |
||
907 | fs_reg &dPdx = lod; |
||
908 | |||
909 | for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { |
||
910 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate)); |
||
911 | coordinate.reg_offset++; |
||
912 | } |
||
913 | /* the slots for u and v are always present, but r is optional */ |
||
914 | mlen += MAX2(ir->coordinate->type->vector_elements, 2); |
||
915 | |||
916 | /* P = u, v, r |
||
917 | * dPdx = dudx, dvdx, drdx |
||
918 | * dPdy = dudy, dvdy, drdy |
||
919 | * |
||
920 | * 1-arg: Does not exist. |
||
921 | * |
||
922 | * 2-arg: dudx dvdx dudy dvdy |
||
923 | * dPdx.x dPdx.y dPdy.x dPdy.y |
||
924 | * m4 m5 m6 m7 |
||
925 | * |
||
926 | * 3-arg: dudx dvdx drdx dudy dvdy drdy |
||
927 | * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z |
||
928 | * m5 m6 m7 m8 m9 m10 |
||
929 | */ |
||
930 | for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { |
||
931 | emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx)); |
||
932 | dPdx.reg_offset++; |
||
933 | } |
||
934 | mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); |
||
935 | |||
936 | for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { |
||
937 | emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy)); |
||
938 | dPdy.reg_offset++; |
||
939 | } |
||
940 | mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); |
||
941 | } else if (ir->op == ir_txs) { |
||
942 | /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ |
||
943 | simd16 = true; |
||
944 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); |
||
945 | mlen += 2; |
||
946 | } else { |
||
947 | /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod |
||
948 | * instructions. We'll need to do SIMD16 here. |
||
949 | */ |
||
950 | simd16 = true; |
||
951 | assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf); |
||
952 | |||
953 | for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { |
||
954 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type), |
||
955 | coordinate)); |
||
956 | coordinate.reg_offset++; |
||
957 | } |
||
958 | |||
959 | /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to |
||
960 | * be necessary for TXF (ld), but seems wise to do for all messages. |
||
961 | */ |
||
962 | for (int i = ir->coordinate->type->vector_elements; i < 3; i++) { |
||
963 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f))); |
||
964 | } |
||
965 | |||
966 | /* lod/bias appears after u/v/r. */ |
||
967 | mlen += 6; |
||
968 | |||
969 | emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod)); |
||
970 | mlen++; |
||
971 | |||
972 | /* The unused upper half. */ |
||
973 | mlen++; |
||
974 | } |
||
975 | |||
976 | if (simd16) { |
||
977 | /* Now, since we're doing simd16, the return is 2 interleaved |
||
978 | * vec4s where the odd-indexed ones are junk. We'll need to move |
||
979 | * this weirdness around to the expected layout. |
||
980 | */ |
||
981 | orig_dst = dst; |
||
982 | dst = fs_reg(GRF, virtual_grf_alloc(8), |
||
983 | (brw->is_g4x ? |
||
984 | brw_type_for_base_type(ir->type) : |
||
985 | BRW_REGISTER_TYPE_F)); |
||
986 | } |
||
987 | |||
988 | fs_inst *inst = NULL; |
||
989 | switch (ir->op) { |
||
990 | case ir_tex: |
||
991 | inst = emit(SHADER_OPCODE_TEX, dst); |
||
992 | break; |
||
993 | case ir_txb: |
||
994 | inst = emit(FS_OPCODE_TXB, dst); |
||
995 | break; |
||
996 | case ir_txl: |
||
997 | inst = emit(SHADER_OPCODE_TXL, dst); |
||
998 | break; |
||
999 | case ir_txd: |
||
1000 | inst = emit(SHADER_OPCODE_TXD, dst); |
||
1001 | break; |
||
1002 | case ir_txs: |
||
1003 | inst = emit(SHADER_OPCODE_TXS, dst); |
||
1004 | break; |
||
1005 | case ir_txf: |
||
1006 | inst = emit(SHADER_OPCODE_TXF, dst); |
||
1007 | break; |
||
1008 | default: |
||
1009 | fail("unrecognized texture opcode"); |
||
1010 | } |
||
1011 | inst->base_mrf = base_mrf; |
||
1012 | inst->mlen = mlen; |
||
1013 | inst->header_present = true; |
||
1014 | inst->regs_written = simd16 ? 8 : 4; |
||
1015 | |||
1016 | if (simd16) { |
||
1017 | for (int i = 0; i < 4; i++) { |
||
1018 | emit(MOV(orig_dst, dst)); |
||
1019 | orig_dst.reg_offset++; |
||
1020 | dst.reg_offset += 2; |
||
1021 | } |
||
1022 | } |
||
1023 | |||
1024 | return inst; |
||
1025 | } |
||
1026 | |||
1027 | /* gen5's sampler has slots for u, v, r, array index, then optional |
||
1028 | * parameters like shadow comparitor or LOD bias. If optional |
||
1029 | * parameters aren't present, those base slots are optional and don't |
||
1030 | * need to be included in the message. |
||
1031 | * |
||
1032 | * We don't fill in the unnecessary slots regardless, which may look |
||
1033 | * surprising in the disassembly. |
||
1034 | */ |
||
1035 | fs_inst * |
||
1036 | fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, |
||
1037 | fs_reg shadow_c, fs_reg lod, fs_reg lod2, |
||
1038 | fs_reg sample_index) |
||
1039 | { |
||
1040 | int mlen = 0; |
||
1041 | int base_mrf = 2; |
||
1042 | int reg_width = dispatch_width / 8; |
||
1043 | bool header_present = false; |
||
1044 | const int vector_elements = |
||
1045 | ir->coordinate ? ir->coordinate->type->vector_elements : 0; |
||
1046 | |||
1047 | if (ir->offset != NULL && ir->op == ir_txf) { |
||
1048 | /* It appears that the ld instruction used for txf does its |
||
1049 | * address bounds check before adding in the offset. To work |
||
1050 | * around this, just add the integer offset to the integer texel |
||
1051 | * coordinate, and don't put the offset in the header. |
||
1052 | */ |
||
1053 | ir_constant *offset = ir->offset->as_constant(); |
||
1054 | for (int i = 0; i < vector_elements; i++) { |
||
1055 | emit(ADD(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), |
||
1056 | coordinate, |
||
1057 | offset->value.i[i])); |
||
1058 | coordinate.reg_offset++; |
||
1059 | } |
||
1060 | } else { |
||
1061 | if (ir->offset) { |
||
1062 | /* The offsets set up by the ir_texture visitor are in the |
||
1063 | * m1 header, so we can't go headerless. |
||
1064 | */ |
||
1065 | header_present = true; |
||
1066 | mlen++; |
||
1067 | base_mrf--; |
||
1068 | } |
||
1069 | |||
1070 | for (int i = 0; i < vector_elements; i++) { |
||
1071 | emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type), |
||
1072 | coordinate)); |
||
1073 | coordinate.reg_offset++; |
||
1074 | } |
||
1075 | } |
||
1076 | mlen += vector_elements * reg_width; |
||
1077 | |||
1078 | if (ir->shadow_comparitor) { |
||
1079 | mlen = MAX2(mlen, header_present + 4 * reg_width); |
||
1080 | |||
1081 | emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); |
||
1082 | mlen += reg_width; |
||
1083 | } |
||
1084 | |||
1085 | fs_inst *inst = NULL; |
||
1086 | switch (ir->op) { |
||
1087 | case ir_tex: |
||
1088 | inst = emit(SHADER_OPCODE_TEX, dst); |
||
1089 | break; |
||
1090 | case ir_txb: |
||
1091 | mlen = MAX2(mlen, header_present + 4 * reg_width); |
||
1092 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); |
||
1093 | mlen += reg_width; |
||
1094 | |||
1095 | inst = emit(FS_OPCODE_TXB, dst); |
||
1096 | break; |
||
1097 | case ir_txl: |
||
1098 | mlen = MAX2(mlen, header_present + 4 * reg_width); |
||
1099 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); |
||
1100 | mlen += reg_width; |
||
1101 | |||
1102 | inst = emit(SHADER_OPCODE_TXL, dst); |
||
1103 | break; |
||
1104 | case ir_txd: { |
||
1105 | mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */ |
||
1106 | |||
1107 | /** |
||
1108 | * P = u, v, r |
||
1109 | * dPdx = dudx, dvdx, drdx |
||
1110 | * dPdy = dudy, dvdy, drdy |
||
1111 | * |
||
1112 | * Load up these values: |
||
1113 | * - dudx dudy dvdx dvdy drdx drdy |
||
1114 | * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z |
||
1115 | */ |
||
1116 | for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { |
||
1117 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); |
||
1118 | lod.reg_offset++; |
||
1119 | mlen += reg_width; |
||
1120 | |||
1121 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2)); |
||
1122 | lod2.reg_offset++; |
||
1123 | mlen += reg_width; |
||
1124 | } |
||
1125 | |||
1126 | inst = emit(SHADER_OPCODE_TXD, dst); |
||
1127 | break; |
||
1128 | } |
||
1129 | case ir_txs: |
||
1130 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); |
||
1131 | mlen += reg_width; |
||
1132 | inst = emit(SHADER_OPCODE_TXS, dst); |
||
1133 | break; |
||
1134 | case ir_txf: |
||
1135 | mlen = header_present + 4 * reg_width; |
||
1136 | emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod)); |
||
1137 | inst = emit(SHADER_OPCODE_TXF, dst); |
||
1138 | break; |
||
1139 | case ir_txf_ms: |
||
1140 | mlen = header_present + 4 * reg_width; |
||
1141 | |||
1142 | /* lod */ |
||
1143 | emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), fs_reg(0))); |
||
1144 | /* sample index */ |
||
1145 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index)); |
||
1146 | mlen += reg_width; |
||
1147 | inst = emit(SHADER_OPCODE_TXF_MS, dst); |
||
1148 | break; |
||
1149 | case ir_lod: |
||
1150 | inst = emit(SHADER_OPCODE_LOD, dst); |
||
1151 | break; |
||
1152 | } |
||
1153 | inst->base_mrf = base_mrf; |
||
1154 | inst->mlen = mlen; |
||
1155 | inst->header_present = header_present; |
||
1156 | inst->regs_written = 4; |
||
1157 | |||
1158 | if (mlen > 11) { |
||
1159 | fail("Message length >11 disallowed by hardware\n"); |
||
1160 | } |
||
1161 | |||
1162 | return inst; |
||
1163 | } |
||
1164 | |||
1165 | fs_inst * |
||
1166 | fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, |
||
1167 | fs_reg shadow_c, fs_reg lod, fs_reg lod2, |
||
1168 | fs_reg sample_index) |
||
1169 | { |
||
1170 | int mlen = 0; |
||
1171 | int base_mrf = 2; |
||
1172 | int reg_width = dispatch_width / 8; |
||
1173 | bool header_present = false; |
||
1174 | int offsets[3]; |
||
1175 | |||
1176 | if (ir->offset && ir->op != ir_txf) { |
||
1177 | /* The offsets set up by the ir_texture visitor are in the |
||
1178 | * m1 header, so we can't go headerless. |
||
1179 | */ |
||
1180 | header_present = true; |
||
1181 | mlen++; |
||
1182 | base_mrf--; |
||
1183 | } |
||
1184 | |||
1185 | if (ir->shadow_comparitor) { |
||
1186 | emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); |
||
1187 | mlen += reg_width; |
||
1188 | } |
||
1189 | |||
1190 | /* Set up the LOD info */ |
||
1191 | switch (ir->op) { |
||
1192 | case ir_tex: |
||
1193 | case ir_lod: |
||
1194 | break; |
||
1195 | case ir_txb: |
||
1196 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); |
||
1197 | mlen += reg_width; |
||
1198 | break; |
||
1199 | case ir_txl: |
||
1200 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); |
||
1201 | mlen += reg_width; |
||
1202 | break; |
||
1203 | case ir_txd: { |
||
1204 | if (dispatch_width == 16) |
||
1205 | fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); |
||
1206 | |||
1207 | /* Load dPdx and the coordinate together: |
||
1208 | * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z |
||
1209 | */ |
||
1210 | for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { |
||
1211 | emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate)); |
||
1212 | coordinate.reg_offset++; |
||
1213 | mlen += reg_width; |
||
1214 | |||
1215 | /* For cube map array, the coordinate is (u,v,r,ai) but there are |
||
1216 | * only derivatives for (u, v, r). |
||
1217 | */ |
||
1218 | if (i < ir->lod_info.grad.dPdx->type->vector_elements) { |
||
1219 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); |
||
1220 | lod.reg_offset++; |
||
1221 | mlen += reg_width; |
||
1222 | |||
1223 | emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2)); |
||
1224 | lod2.reg_offset++; |
||
1225 | mlen += reg_width; |
||
1226 | } |
||
1227 | } |
||
1228 | break; |
||
1229 | } |
||
1230 | case ir_txs: |
||
1231 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); |
||
1232 | mlen += reg_width; |
||
1233 | break; |
||
1234 | case ir_txf: |
||
1235 | /* It appears that the ld instruction used for txf does its |
||
1236 | * address bounds check before adding in the offset. To work |
||
1237 | * around this, just add the integer offset to the integer texel |
||
1238 | * coordinate, and don't put the offset in the header. |
||
1239 | */ |
||
1240 | if (ir->offset) { |
||
1241 | ir_constant *offset = ir->offset->as_constant(); |
||
1242 | offsets[0] = offset->value.i[0]; |
||
1243 | offsets[1] = offset->value.i[1]; |
||
1244 | offsets[2] = offset->value.i[2]; |
||
1245 | } else { |
||
1246 | memset(offsets, 0, sizeof(offsets)); |
||
1247 | } |
||
1248 | |||
1249 | /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ |
||
1250 | emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), |
||
1251 | coordinate, offsets[0])); |
||
1252 | coordinate.reg_offset++; |
||
1253 | mlen += reg_width; |
||
1254 | |||
1255 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod)); |
||
1256 | mlen += reg_width; |
||
1257 | |||
1258 | for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { |
||
1259 | emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), |
||
1260 | coordinate, offsets[i])); |
||
1261 | coordinate.reg_offset++; |
||
1262 | mlen += reg_width; |
||
1263 | } |
||
1264 | break; |
||
1265 | case ir_txf_ms: |
||
1266 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index)); |
||
1267 | mlen += reg_width; |
||
1268 | |||
1269 | /* constant zero MCS; we arrange to never actually have a compressed |
||
1270 | * multisample surface here for now. TODO: issue ld_mcs to get this first, |
||
1271 | * if we ever support texturing from compressed multisample surfaces |
||
1272 | */ |
||
1273 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u))); |
||
1274 | mlen += reg_width; |
||
1275 | |||
1276 | /* there is no offsetting for this message; just copy in the integer |
||
1277 | * texture coordinates |
||
1278 | */ |
||
1279 | for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { |
||
1280 | emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), |
||
1281 | coordinate)); |
||
1282 | coordinate.reg_offset++; |
||
1283 | mlen += reg_width; |
||
1284 | } |
||
1285 | break; |
||
1286 | } |
||
1287 | |||
1288 | /* Set up the coordinate (except for cases where it was done above) */ |
||
1289 | if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms) { |
||
1290 | for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { |
||
1291 | emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate)); |
||
1292 | coordinate.reg_offset++; |
||
1293 | mlen += reg_width; |
||
1294 | } |
||
1295 | } |
||
1296 | |||
1297 | /* Generate the SEND */ |
||
1298 | fs_inst *inst = NULL; |
||
1299 | switch (ir->op) { |
||
1300 | case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; |
||
1301 | case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; |
||
1302 | case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; |
||
1303 | case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; |
||
1304 | case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; |
||
1305 | case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break; |
||
1306 | case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; |
||
1307 | case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break; |
||
1308 | } |
||
1309 | inst->base_mrf = base_mrf; |
||
1310 | inst->mlen = mlen; |
||
1311 | inst->header_present = header_present; |
||
1312 | inst->regs_written = 4; |
||
1313 | |||
1314 | if (mlen > 11) { |
||
1315 | fail("Message length >11 disallowed by hardware\n"); |
||
1316 | } |
||
1317 | |||
1318 | return inst; |
||
1319 | } |
||
1320 | |||
1321 | fs_reg |
||
1322 | fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, |
||
1323 | bool is_rect, int sampler, int texunit) |
||
1324 | { |
||
1325 | fs_inst *inst = NULL; |
||
1326 | bool needs_gl_clamp = true; |
||
1327 | fs_reg scale_x, scale_y; |
||
1328 | |||
1329 | /* The 965 requires the EU to do the normalization of GL rectangle |
||
1330 | * texture coordinates. We use the program parameter state |
||
1331 | * tracking to get the scaling factor. |
||
1332 | */ |
||
1333 | if (is_rect && |
||
1334 | (brw->gen < 6 || |
||
1335 | (brw->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) || |
||
1336 | c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) { |
||
1337 | struct gl_program_parameter_list *params = fp->Base.Parameters; |
||
1338 | int tokens[STATE_LENGTH] = { |
||
1339 | STATE_INTERNAL, |
||
1340 | STATE_TEXRECT_SCALE, |
||
1341 | texunit, |
||
1342 | 0, |
||
1343 | |||
1344 | }; |
||
1345 | |||
1346 | if (dispatch_width == 16) { |
||
1347 | fail("rectangle scale uniform setup not supported on 16-wide\n"); |
||
1348 | return coordinate; |
||
1349 | } |
||
1350 | |||
1351 | scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); |
||
1352 | scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); |
||
1353 | |||
1354 | GLuint index = _mesa_add_state_reference(params, |
||
1355 | (gl_state_index *)tokens); |
||
1356 | c->prog_data.param[c->prog_data.nr_params++] = |
||
1357 | &fp->Base.Parameters->ParameterValues[index][0].f; |
||
1358 | c->prog_data.param[c->prog_data.nr_params++] = |
||
1359 | &fp->Base.Parameters->ParameterValues[index][1].f; |
||
1360 | } |
||
1361 | |||
1362 | /* The 965 requires the EU to do the normalization of GL rectangle |
||
1363 | * texture coordinates. We use the program parameter state |
||
1364 | * tracking to get the scaling factor. |
||
1365 | */ |
||
1366 | if (brw->gen < 6 && is_rect) { |
||
1367 | fs_reg dst = fs_reg(this, ir->coordinate->type); |
||
1368 | fs_reg src = coordinate; |
||
1369 | coordinate = dst; |
||
1370 | |||
1371 | emit(MUL(dst, src, scale_x)); |
||
1372 | dst.reg_offset++; |
||
1373 | src.reg_offset++; |
||
1374 | emit(MUL(dst, src, scale_y)); |
||
1375 | } else if (is_rect) { |
||
1376 | /* On gen6+, the sampler handles the rectangle coordinates |
||
1377 | * natively, without needing rescaling. But that means we have |
||
1378 | * to do GL_CLAMP clamping at the [0, width], [0, height] scale, |
||
1379 | * not [0, 1] like the default case below. |
||
1380 | */ |
||
1381 | needs_gl_clamp = false; |
||
1382 | |||
1383 | for (int i = 0; i < 2; i++) { |
||
1384 | if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { |
||
1385 | fs_reg chan = coordinate; |
||
1386 | chan.reg_offset += i; |
||
1387 | |||
1388 | inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0)); |
||
1389 | inst->conditional_mod = BRW_CONDITIONAL_G; |
||
1390 | |||
1391 | /* Our parameter comes in as 1.0/width or 1.0/height, |
||
1392 | * because that's what people normally want for doing |
||
1393 | * texture rectangle handling. We need width or height |
||
1394 | * for clamping, but we don't care enough to make a new |
||
1395 | * parameter type, so just invert back. |
||
1396 | */ |
||
1397 | fs_reg limit = fs_reg(this, glsl_type::float_type); |
||
1398 | emit(MOV(limit, i == 0 ? scale_x : scale_y)); |
||
1399 | emit(SHADER_OPCODE_RCP, limit, limit); |
||
1400 | |||
1401 | inst = emit(BRW_OPCODE_SEL, chan, chan, limit); |
||
1402 | inst->conditional_mod = BRW_CONDITIONAL_L; |
||
1403 | } |
||
1404 | } |
||
1405 | } |
||
1406 | |||
1407 | if (ir->coordinate && needs_gl_clamp) { |
||
1408 | for (unsigned int i = 0; |
||
1409 | i < MIN2(ir->coordinate->type->vector_elements, 3); i++) { |
||
1410 | if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) { |
||
1411 | fs_reg chan = coordinate; |
||
1412 | chan.reg_offset += i; |
||
1413 | |||
1414 | fs_inst *inst = emit(MOV(chan, chan)); |
||
1415 | inst->saturate = true; |
||
1416 | } |
||
1417 | } |
||
1418 | } |
||
1419 | return coordinate; |
||
1420 | } |
||
1421 | |||
1422 | void |
||
1423 | fs_visitor::visit(ir_texture *ir) |
||
1424 | { |
||
1425 | fs_inst *inst = NULL; |
||
1426 | |||
1427 | int sampler = |
||
1428 | _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, &fp->Base); |
||
1429 | /* FINISHME: We're failing to recompile our programs when the sampler is |
||
1430 | * updated. This only matters for the texture rectangle scale parameters |
||
1431 | * (pre-gen6, or gen6+ with GL_CLAMP). |
||
1432 | */ |
||
1433 | int texunit = fp->Base.SamplerUnits[sampler]; |
||
1434 | |||
1435 | /* Should be lowered by do_lower_texture_projection */ |
||
1436 | assert(!ir->projector); |
||
1437 | |||
1438 | /* Generate code to compute all the subexpression trees. This has to be |
||
1439 | * done before loading any values into MRFs for the sampler message since |
||
1440 | * generating these values may involve SEND messages that need the MRFs. |
||
1441 | */ |
||
1442 | fs_reg coordinate; |
||
1443 | if (ir->coordinate) { |
||
1444 | ir->coordinate->accept(this); |
||
1445 | |||
1446 | coordinate = rescale_texcoord(ir, this->result, |
||
1447 | ir->sampler->type->sampler_dimensionality == |
||
1448 | GLSL_SAMPLER_DIM_RECT, |
||
1449 | sampler, texunit); |
||
1450 | } |
||
1451 | |||
1452 | fs_reg shadow_comparitor; |
||
1453 | if (ir->shadow_comparitor) { |
||
1454 | ir->shadow_comparitor->accept(this); |
||
1455 | shadow_comparitor = this->result; |
||
1456 | } |
||
1457 | |||
1458 | fs_reg lod, lod2, sample_index; |
||
1459 | switch (ir->op) { |
||
1460 | case ir_tex: |
||
1461 | case ir_lod: |
||
1462 | break; |
||
1463 | case ir_txb: |
||
1464 | ir->lod_info.bias->accept(this); |
||
1465 | lod = this->result; |
||
1466 | break; |
||
1467 | case ir_txd: |
||
1468 | ir->lod_info.grad.dPdx->accept(this); |
||
1469 | lod = this->result; |
||
1470 | |||
1471 | ir->lod_info.grad.dPdy->accept(this); |
||
1472 | lod2 = this->result; |
||
1473 | break; |
||
1474 | case ir_txf: |
||
1475 | case ir_txl: |
||
1476 | case ir_txs: |
||
1477 | ir->lod_info.lod->accept(this); |
||
1478 | lod = this->result; |
||
1479 | break; |
||
1480 | case ir_txf_ms: |
||
1481 | ir->lod_info.sample_index->accept(this); |
||
1482 | sample_index = this->result; |
||
1483 | break; |
||
1484 | }; |
||
1485 | |||
1486 | /* Writemasking doesn't eliminate channels on SIMD8 texture |
||
1487 | * samples, so don't worry about them. |
||
1488 | */ |
||
1489 | fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); |
||
1490 | |||
1491 | if (brw->gen >= 7) { |
||
1492 | inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor, |
||
1493 | lod, lod2, sample_index); |
||
1494 | } else if (brw->gen >= 5) { |
||
1495 | inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor, |
||
1496 | lod, lod2, sample_index); |
||
1497 | } else { |
||
1498 | inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor, |
||
1499 | lod, lod2); |
||
1500 | } |
||
1501 | |||
1502 | /* The header is set up by generate_tex() when necessary. */ |
||
1503 | inst->src[0] = reg_undef; |
||
1504 | |||
1505 | if (ir->offset != NULL && ir->op != ir_txf) |
||
1506 | inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); |
||
1507 | |||
1508 | inst->sampler = sampler; |
||
1509 | |||
1510 | if (ir->shadow_comparitor) |
||
1511 | inst->shadow_compare = true; |
||
1512 | |||
1513 | /* fixup #layers for cube map arrays */ |
||
1514 | if (ir->op == ir_txs) { |
||
1515 | glsl_type const *type = ir->sampler->type; |
||
1516 | if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && |
||
1517 | type->sampler_array) { |
||
1518 | fs_reg depth = dst; |
||
1519 | depth.reg_offset = 2; |
||
1520 | emit_math(SHADER_OPCODE_INT_QUOTIENT, depth, depth, fs_reg(6)); |
||
1521 | } |
||
1522 | } |
||
1523 | |||
1524 | swizzle_result(ir, dst, sampler); |
||
1525 | } |
||
1526 | |||
1527 | /** |
||
1528 | * Swizzle the result of a texture result. This is necessary for |
||
1529 | * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. |
||
1530 | */ |
||
1531 | void |
||
1532 | fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) |
||
1533 | { |
||
1534 | this->result = orig_val; |
||
1535 | |||
1536 | if (ir->op == ir_txs || ir->op == ir_lod) |
||
1537 | return; |
||
1538 | |||
1539 | if (ir->type == glsl_type::float_type) { |
||
1540 | /* Ignore DEPTH_TEXTURE_MODE swizzling. */ |
||
1541 | assert(ir->sampler->type->sampler_shadow); |
||
1542 | } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) { |
||
1543 | fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type); |
||
1544 | |||
1545 | for (int i = 0; i < 4; i++) { |
||
1546 | int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i); |
||
1547 | fs_reg l = swizzled_result; |
||
1548 | l.reg_offset += i; |
||
1549 | |||
1550 | if (swiz == SWIZZLE_ZERO) { |
||
1551 | emit(MOV(l, fs_reg(0.0f))); |
||
1552 | } else if (swiz == SWIZZLE_ONE) { |
||
1553 | emit(MOV(l, fs_reg(1.0f))); |
||
1554 | } else { |
||
1555 | fs_reg r = orig_val; |
||
1556 | r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i); |
||
1557 | emit(MOV(l, r)); |
||
1558 | } |
||
1559 | } |
||
1560 | this->result = swizzled_result; |
||
1561 | } |
||
1562 | } |
||
1563 | |||
1564 | void |
||
1565 | fs_visitor::visit(ir_swizzle *ir) |
||
1566 | { |
||
1567 | ir->val->accept(this); |
||
1568 | fs_reg val = this->result; |
||
1569 | |||
1570 | if (ir->type->vector_elements == 1) { |
||
1571 | this->result.reg_offset += ir->mask.x; |
||
1572 | return; |
||
1573 | } |
||
1574 | |||
1575 | fs_reg result = fs_reg(this, ir->type); |
||
1576 | this->result = result; |
||
1577 | |||
1578 | for (unsigned int i = 0; i < ir->type->vector_elements; i++) { |
||
1579 | fs_reg channel = val; |
||
1580 | int swiz = 0; |
||
1581 | |||
1582 | switch (i) { |
||
1583 | case 0: |
||
1584 | swiz = ir->mask.x; |
||
1585 | break; |
||
1586 | case 1: |
||
1587 | swiz = ir->mask.y; |
||
1588 | break; |
||
1589 | case 2: |
||
1590 | swiz = ir->mask.z; |
||
1591 | break; |
||
1592 | case 3: |
||
1593 | swiz = ir->mask.w; |
||
1594 | break; |
||
1595 | } |
||
1596 | |||
1597 | channel.reg_offset += swiz; |
||
1598 | emit(MOV(result, channel)); |
||
1599 | result.reg_offset++; |
||
1600 | } |
||
1601 | } |
||
1602 | |||
1603 | void |
||
1604 | fs_visitor::visit(ir_discard *ir) |
||
1605 | { |
||
1606 | assert(ir->condition == NULL); /* FINISHME */ |
||
1607 | |||
1608 | /* We track our discarded pixels in f0.1. By predicating on it, we can |
||
1609 | * update just the flag bits that aren't yet discarded. By emitting a |
||
1610 | * CMP of g0 != g0, all our currently executing channels will get turned |
||
1611 | * off. |
||
1612 | */ |
||
1613 | fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), |
||
1614 | BRW_REGISTER_TYPE_UW)); |
||
1615 | fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg, |
||
1616 | BRW_CONDITIONAL_NZ)); |
||
1617 | cmp->predicate = BRW_PREDICATE_NORMAL; |
||
1618 | cmp->flag_subreg = 1; |
||
1619 | |||
1620 | if (brw->gen >= 6) { |
||
1621 | /* For performance, after a discard, jump to the end of the shader. |
||
1622 | * However, many people will do foliage by discarding based on a |
||
1623 | * texture's alpha mask, and then continue on to texture with the |
||
1624 | * remaining pixels. To avoid trashing the derivatives for those |
||
1625 | * texture samples, we'll only jump if all of the pixels in the subspan |
||
1626 | * have been discarded. |
||
1627 | */ |
||
1628 | fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP); |
||
1629 | discard_jump->flag_subreg = 1; |
||
1630 | discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H; |
||
1631 | discard_jump->predicate_inverse = true; |
||
1632 | } |
||
1633 | } |
||
1634 | |||
1635 | void |
||
1636 | fs_visitor::visit(ir_constant *ir) |
||
1637 | { |
||
1638 | /* Set this->result to reg at the bottom of the function because some code |
||
1639 | * paths will cause this visitor to be applied to other fields. This will |
||
1640 | * cause the value stored in this->result to be modified. |
||
1641 | * |
||
1642 | * Make reg constant so that it doesn't get accidentally modified along the |
||
1643 | * way. Yes, I actually had this problem. :( |
||
1644 | */ |
||
1645 | const fs_reg reg(this, ir->type); |
||
1646 | fs_reg dst_reg = reg; |
||
1647 | |||
1648 | if (ir->type->is_array()) { |
||
1649 | const unsigned size = type_size(ir->type->fields.array); |
||
1650 | |||
1651 | for (unsigned i = 0; i < ir->type->length; i++) { |
||
1652 | ir->array_elements[i]->accept(this); |
||
1653 | fs_reg src_reg = this->result; |
||
1654 | |||
1655 | dst_reg.type = src_reg.type; |
||
1656 | for (unsigned j = 0; j < size; j++) { |
||
1657 | emit(MOV(dst_reg, src_reg)); |
||
1658 | src_reg.reg_offset++; |
||
1659 | dst_reg.reg_offset++; |
||
1660 | } |
||
1661 | } |
||
1662 | } else if (ir->type->is_record()) { |
||
1663 | foreach_list(node, &ir->components) { |
||
1664 | ir_constant *const field = (ir_constant *) node; |
||
1665 | const unsigned size = type_size(field->type); |
||
1666 | |||
1667 | field->accept(this); |
||
1668 | fs_reg src_reg = this->result; |
||
1669 | |||
1670 | dst_reg.type = src_reg.type; |
||
1671 | for (unsigned j = 0; j < size; j++) { |
||
1672 | emit(MOV(dst_reg, src_reg)); |
||
1673 | src_reg.reg_offset++; |
||
1674 | dst_reg.reg_offset++; |
||
1675 | } |
||
1676 | } |
||
1677 | } else { |
||
1678 | const unsigned size = type_size(ir->type); |
||
1679 | |||
1680 | for (unsigned i = 0; i < size; i++) { |
||
1681 | switch (ir->type->base_type) { |
||
1682 | case GLSL_TYPE_FLOAT: |
||
1683 | emit(MOV(dst_reg, fs_reg(ir->value.f[i]))); |
||
1684 | break; |
||
1685 | case GLSL_TYPE_UINT: |
||
1686 | emit(MOV(dst_reg, fs_reg(ir->value.u[i]))); |
||
1687 | break; |
||
1688 | case GLSL_TYPE_INT: |
||
1689 | emit(MOV(dst_reg, fs_reg(ir->value.i[i]))); |
||
1690 | break; |
||
1691 | case GLSL_TYPE_BOOL: |
||
1692 | emit(MOV(dst_reg, fs_reg((int)ir->value.b[i]))); |
||
1693 | break; |
||
1694 | default: |
||
1695 | assert(!"Non-float/uint/int/bool constant"); |
||
1696 | } |
||
1697 | dst_reg.reg_offset++; |
||
1698 | } |
||
1699 | } |
||
1700 | |||
1701 | this->result = reg; |
||
1702 | } |
||
1703 | |||
1704 | void |
||
1705 | fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) |
||
1706 | { |
||
1707 | ir_expression *expr = ir->as_expression(); |
||
1708 | |||
1709 | if (expr) { |
||
1710 | fs_reg op[2]; |
||
1711 | fs_inst *inst; |
||
1712 | |||
1713 | assert(expr->get_num_operands() <= 2); |
||
1714 | for (unsigned int i = 0; i < expr->get_num_operands(); i++) { |
||
1715 | assert(expr->operands[i]->type->is_scalar()); |
||
1716 | |||
1717 | expr->operands[i]->accept(this); |
||
1718 | op[i] = this->result; |
||
1719 | |||
1720 | resolve_ud_negate(&op[i]); |
||
1721 | } |
||
1722 | |||
1723 | switch (expr->operation) { |
||
1724 | case ir_unop_logic_not: |
||
1725 | inst = emit(AND(reg_null_d, op[0], fs_reg(1))); |
||
1726 | inst->conditional_mod = BRW_CONDITIONAL_Z; |
||
1727 | break; |
||
1728 | |||
1729 | case ir_binop_logic_xor: |
||
1730 | case ir_binop_logic_or: |
||
1731 | case ir_binop_logic_and: |
||
1732 | goto out; |
||
1733 | |||
1734 | case ir_unop_f2b: |
||
1735 | if (brw->gen >= 6) { |
||
1736 | emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ)); |
||
1737 | } else { |
||
1738 | inst = emit(MOV(reg_null_f, op[0])); |
||
1739 | inst->conditional_mod = BRW_CONDITIONAL_NZ; |
||
1740 | } |
||
1741 | break; |
||
1742 | |||
1743 | case ir_unop_i2b: |
||
1744 | if (brw->gen >= 6) { |
||
1745 | emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); |
||
1746 | } else { |
||
1747 | inst = emit(MOV(reg_null_d, op[0])); |
||
1748 | inst->conditional_mod = BRW_CONDITIONAL_NZ; |
||
1749 | } |
||
1750 | break; |
||
1751 | |||
1752 | case ir_binop_greater: |
||
1753 | case ir_binop_gequal: |
||
1754 | case ir_binop_less: |
||
1755 | case ir_binop_lequal: |
||
1756 | case ir_binop_equal: |
||
1757 | case ir_binop_all_equal: |
||
1758 | case ir_binop_nequal: |
||
1759 | case ir_binop_any_nequal: |
||
1760 | resolve_bool_comparison(expr->operands[0], &op[0]); |
||
1761 | resolve_bool_comparison(expr->operands[1], &op[1]); |
||
1762 | |||
1763 | emit(CMP(reg_null_d, op[0], op[1], |
||
1764 | brw_conditional_for_comparison(expr->operation))); |
||
1765 | break; |
||
1766 | |||
1767 | default: |
||
1768 | assert(!"not reached"); |
||
1769 | fail("bad cond code\n"); |
||
1770 | break; |
||
1771 | } |
||
1772 | return; |
||
1773 | } |
||
1774 | |||
1775 | out: |
||
1776 | ir->accept(this); |
||
1777 | |||
1778 | fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1))); |
||
1779 | inst->conditional_mod = BRW_CONDITIONAL_NZ; |
||
1780 | } |
||
1781 | |||
1782 | /** |
||
1783 | * Emit a gen6 IF statement with the comparison folded into the IF |
||
1784 | * instruction. |
||
1785 | */ |
||
1786 | void |
||
1787 | fs_visitor::emit_if_gen6(ir_if *ir) |
||
1788 | { |
||
1789 | ir_expression *expr = ir->condition->as_expression(); |
||
1790 | |||
1791 | if (expr) { |
||
1792 | fs_reg op[2]; |
||
1793 | fs_inst *inst; |
||
1794 | fs_reg temp; |
||
1795 | |||
1796 | assert(expr->get_num_operands() <= 2); |
||
1797 | for (unsigned int i = 0; i < expr->get_num_operands(); i++) { |
||
1798 | assert(expr->operands[i]->type->is_scalar()); |
||
1799 | |||
1800 | expr->operands[i]->accept(this); |
||
1801 | op[i] = this->result; |
||
1802 | } |
||
1803 | |||
1804 | switch (expr->operation) { |
||
1805 | case ir_unop_logic_not: |
||
1806 | case ir_binop_logic_xor: |
||
1807 | case ir_binop_logic_or: |
||
1808 | case ir_binop_logic_and: |
||
1809 | /* For operations on bool arguments, only the low bit of the bool is |
||
1810 | * valid, and the others are undefined. Fall back to the condition |
||
1811 | * code path. |
||
1812 | */ |
||
1813 | break; |
||
1814 | |||
1815 | case ir_unop_f2b: |
||
1816 | inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); |
||
1817 | inst->conditional_mod = BRW_CONDITIONAL_NZ; |
||
1818 | return; |
||
1819 | |||
1820 | case ir_unop_i2b: |
||
1821 | emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); |
||
1822 | return; |
||
1823 | |||
1824 | case ir_binop_greater: |
||
1825 | case ir_binop_gequal: |
||
1826 | case ir_binop_less: |
||
1827 | case ir_binop_lequal: |
||
1828 | case ir_binop_equal: |
||
1829 | case ir_binop_all_equal: |
||
1830 | case ir_binop_nequal: |
||
1831 | case ir_binop_any_nequal: |
||
1832 | resolve_bool_comparison(expr->operands[0], &op[0]); |
||
1833 | resolve_bool_comparison(expr->operands[1], &op[1]); |
||
1834 | |||
1835 | emit(IF(op[0], op[1], |
||
1836 | brw_conditional_for_comparison(expr->operation))); |
||
1837 | return; |
||
1838 | default: |
||
1839 | assert(!"not reached"); |
||
1840 | emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ)); |
||
1841 | fail("bad condition\n"); |
||
1842 | return; |
||
1843 | } |
||
1844 | } |
||
1845 | |||
1846 | emit_bool_to_cond_code(ir->condition); |
||
1847 | fs_inst *inst = emit(BRW_OPCODE_IF); |
||
1848 | inst->predicate = BRW_PREDICATE_NORMAL; |
||
1849 | } |
||
1850 | |||
1851 | void |
||
1852 | fs_visitor::visit(ir_if *ir) |
||
1853 | { |
||
1854 | if (brw->gen < 6 && dispatch_width == 16) { |
||
1855 | fail("Can't support (non-uniform) control flow on 16-wide\n"); |
||
1856 | } |
||
1857 | |||
1858 | /* Don't point the annotation at the if statement, because then it plus |
||
1859 | * the then and else blocks get printed. |
||
1860 | */ |
||
1861 | this->base_ir = ir->condition; |
||
1862 | |||
1863 | if (brw->gen == 6) { |
||
1864 | emit_if_gen6(ir); |
||
1865 | } else { |
||
1866 | emit_bool_to_cond_code(ir->condition); |
||
1867 | |||
1868 | emit(IF(BRW_PREDICATE_NORMAL)); |
||
1869 | } |
||
1870 | |||
1871 | foreach_list(node, &ir->then_instructions) { |
||
1872 | ir_instruction *ir = (ir_instruction *)node; |
||
1873 | this->base_ir = ir; |
||
1874 | |||
1875 | ir->accept(this); |
||
1876 | } |
||
1877 | |||
1878 | if (!ir->else_instructions.is_empty()) { |
||
1879 | emit(BRW_OPCODE_ELSE); |
||
1880 | |||
1881 | foreach_list(node, &ir->else_instructions) { |
||
1882 | ir_instruction *ir = (ir_instruction *)node; |
||
1883 | this->base_ir = ir; |
||
1884 | |||
1885 | ir->accept(this); |
||
1886 | } |
||
1887 | } |
||
1888 | |||
1889 | emit(BRW_OPCODE_ENDIF); |
||
1890 | } |
||
1891 | |||
1892 | void |
||
1893 | fs_visitor::visit(ir_loop *ir) |
||
1894 | { |
||
1895 | fs_reg counter = reg_undef; |
||
1896 | |||
1897 | if (brw->gen < 6 && dispatch_width == 16) { |
||
1898 | fail("Can't support (non-uniform) control flow on 16-wide\n"); |
||
1899 | } |
||
1900 | |||
1901 | if (ir->counter) { |
||
1902 | this->base_ir = ir->counter; |
||
1903 | ir->counter->accept(this); |
||
1904 | counter = *(variable_storage(ir->counter)); |
||
1905 | |||
1906 | if (ir->from) { |
||
1907 | this->base_ir = ir->from; |
||
1908 | ir->from->accept(this); |
||
1909 | |||
1910 | emit(MOV(counter, this->result)); |
||
1911 | } |
||
1912 | } |
||
1913 | |||
1914 | this->base_ir = NULL; |
||
1915 | emit(BRW_OPCODE_DO); |
||
1916 | |||
1917 | if (ir->to) { |
||
1918 | this->base_ir = ir->to; |
||
1919 | ir->to->accept(this); |
||
1920 | |||
1921 | emit(CMP(reg_null_d, counter, this->result, |
||
1922 | brw_conditional_for_comparison(ir->cmp))); |
||
1923 | |||
1924 | fs_inst *inst = emit(BRW_OPCODE_BREAK); |
||
1925 | inst->predicate = BRW_PREDICATE_NORMAL; |
||
1926 | } |
||
1927 | |||
1928 | foreach_list(node, &ir->body_instructions) { |
||
1929 | ir_instruction *ir = (ir_instruction *)node; |
||
1930 | |||
1931 | this->base_ir = ir; |
||
1932 | ir->accept(this); |
||
1933 | } |
||
1934 | |||
1935 | if (ir->increment) { |
||
1936 | this->base_ir = ir->increment; |
||
1937 | ir->increment->accept(this); |
||
1938 | emit(ADD(counter, counter, this->result)); |
||
1939 | } |
||
1940 | |||
1941 | this->base_ir = NULL; |
||
1942 | emit(BRW_OPCODE_WHILE); |
||
1943 | } |
||
1944 | |||
1945 | void |
||
1946 | fs_visitor::visit(ir_loop_jump *ir) |
||
1947 | { |
||
1948 | switch (ir->mode) { |
||
1949 | case ir_loop_jump::jump_break: |
||
1950 | emit(BRW_OPCODE_BREAK); |
||
1951 | break; |
||
1952 | case ir_loop_jump::jump_continue: |
||
1953 | emit(BRW_OPCODE_CONTINUE); |
||
1954 | break; |
||
1955 | } |
||
1956 | } |
||
1957 | |||
1958 | void |
||
1959 | fs_visitor::visit(ir_call *ir) |
||
1960 | { |
||
1961 | assert(!"FINISHME"); |
||
1962 | } |
||
1963 | |||
1964 | void |
||
1965 | fs_visitor::visit(ir_return *ir) |
||
1966 | { |
||
1967 | assert(!"FINISHME"); |
||
1968 | } |
||
1969 | |||
1970 | void |
||
1971 | fs_visitor::visit(ir_function *ir) |
||
1972 | { |
||
1973 | /* Ignore function bodies other than main() -- we shouldn't see calls to |
||
1974 | * them since they should all be inlined before we get to ir_to_mesa. |
||
1975 | */ |
||
1976 | if (strcmp(ir->name, "main") == 0) { |
||
1977 | const ir_function_signature *sig; |
||
1978 | exec_list empty; |
||
1979 | |||
1980 | sig = ir->matching_signature(&empty); |
||
1981 | |||
1982 | assert(sig); |
||
1983 | |||
1984 | foreach_list(node, &sig->body) { |
||
1985 | ir_instruction *ir = (ir_instruction *)node; |
||
1986 | this->base_ir = ir; |
||
1987 | |||
1988 | ir->accept(this); |
||
1989 | } |
||
1990 | } |
||
1991 | } |
||
1992 | |||
1993 | void |
||
1994 | fs_visitor::visit(ir_function_signature *ir) |
||
1995 | { |
||
1996 | assert(!"not reached"); |
||
1997 | (void)ir; |
||
1998 | } |
||
1999 | |||
2000 | fs_inst * |
||
2001 | fs_visitor::emit(fs_inst inst) |
||
2002 | { |
||
2003 | fs_inst *list_inst = new(mem_ctx) fs_inst; |
||
2004 | *list_inst = inst; |
||
2005 | emit(list_inst); |
||
2006 | return list_inst; |
||
2007 | } |
||
2008 | |||
2009 | fs_inst * |
||
2010 | fs_visitor::emit(fs_inst *inst) |
||
2011 | { |
||
2012 | if (force_uncompressed_stack > 0) |
||
2013 | inst->force_uncompressed = true; |
||
2014 | else if (force_sechalf_stack > 0) |
||
2015 | inst->force_sechalf = true; |
||
2016 | |||
2017 | inst->annotation = this->current_annotation; |
||
2018 | inst->ir = this->base_ir; |
||
2019 | |||
2020 | this->instructions.push_tail(inst); |
||
2021 | |||
2022 | return inst; |
||
2023 | } |
||
2024 | |||
2025 | void |
||
2026 | fs_visitor::emit(exec_list list) |
||
2027 | { |
||
2028 | foreach_list_safe(node, &list) { |
||
2029 | fs_inst *inst = (fs_inst *)node; |
||
2030 | inst->remove(); |
||
2031 | emit(inst); |
||
2032 | } |
||
2033 | } |
||
2034 | |||
2035 | /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ |
||
2036 | void |
||
2037 | fs_visitor::emit_dummy_fs() |
||
2038 | { |
||
2039 | int reg_width = dispatch_width / 8; |
||
2040 | |||
2041 | /* Everyone's favorite color. */ |
||
2042 | emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f))); |
||
2043 | emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f))); |
||
2044 | emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f))); |
||
2045 | emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f))); |
||
2046 | |||
2047 | fs_inst *write; |
||
2048 | write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); |
||
2049 | write->base_mrf = 2; |
||
2050 | write->mlen = 4 * reg_width; |
||
2051 | write->eot = true; |
||
2052 | } |
||
2053 | |||
2054 | /* The register location here is relative to the start of the URB |
||
2055 | * data. It will get adjusted to be a real location before |
||
2056 | * generate_code() time. |
||
2057 | */ |
||
2058 | struct brw_reg |
||
2059 | fs_visitor::interp_reg(int location, int channel) |
||
2060 | { |
||
2061 | int regnr = urb_setup[location] * 2 + channel / 2; |
||
2062 | int stride = (channel & 1) * 4; |
||
2063 | |||
2064 | assert(urb_setup[location] != -1); |
||
2065 | |||
2066 | return brw_vec1_grf(regnr, stride); |
||
2067 | } |
||
2068 | |||
2069 | /** Emits the interpolation for the varying inputs. */ |
||
2070 | void |
||
2071 | fs_visitor::emit_interpolation_setup_gen4() |
||
2072 | { |
||
2073 | this->current_annotation = "compute pixel centers"; |
||
2074 | this->pixel_x = fs_reg(this, glsl_type::uint_type); |
||
2075 | this->pixel_y = fs_reg(this, glsl_type::uint_type); |
||
2076 | this->pixel_x.type = BRW_REGISTER_TYPE_UW; |
||
2077 | this->pixel_y.type = BRW_REGISTER_TYPE_UW; |
||
2078 | |||
2079 | emit(FS_OPCODE_PIXEL_X, this->pixel_x); |
||
2080 | emit(FS_OPCODE_PIXEL_Y, this->pixel_y); |
||
2081 | |||
2082 | this->current_annotation = "compute pixel deltas from v0"; |
||
2083 | if (brw->has_pln) { |
||
2084 | this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = |
||
2085 | fs_reg(this, glsl_type::vec2_type); |
||
2086 | this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = |
||
2087 | this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; |
||
2088 | this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++; |
||
2089 | } else { |
||
2090 | this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = |
||
2091 | fs_reg(this, glsl_type::float_type); |
||
2092 | this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = |
||
2093 | fs_reg(this, glsl_type::float_type); |
||
2094 | } |
||
2095 | emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], |
||
2096 | this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))))); |
||
2097 | emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], |
||
2098 | this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))))); |
||
2099 | |||
2100 | this->current_annotation = "compute pos.w and 1/pos.w"; |
||
2101 | /* Compute wpos.w. It's always in our setup, since it's needed to |
||
2102 | * interpolate the other attributes. |
||
2103 | */ |
||
2104 | this->wpos_w = fs_reg(this, glsl_type::float_type); |
||
2105 | emit(FS_OPCODE_LINTERP, wpos_w, |
||
2106 | this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], |
||
2107 | this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], |
||
2108 | interp_reg(VARYING_SLOT_POS, 3)); |
||
2109 | /* Compute the pixel 1/W value from wpos.w. */ |
||
2110 | this->pixel_w = fs_reg(this, glsl_type::float_type); |
||
2111 | emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); |
||
2112 | this->current_annotation = NULL; |
||
2113 | } |
||
2114 | |||
2115 | /** Emits the interpolation for the varying inputs. */ |
||
2116 | void |
||
2117 | fs_visitor::emit_interpolation_setup_gen6() |
||
2118 | { |
||
2119 | struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); |
||
2120 | |||
2121 | /* If the pixel centers end up used, the setup is the same as for gen4. */ |
||
2122 | this->current_annotation = "compute pixel centers"; |
||
2123 | fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); |
||
2124 | fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); |
||
2125 | int_pixel_x.type = BRW_REGISTER_TYPE_UW; |
||
2126 | int_pixel_y.type = BRW_REGISTER_TYPE_UW; |
||
2127 | emit(ADD(int_pixel_x, |
||
2128 | fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), |
||
2129 | fs_reg(brw_imm_v(0x10101010)))); |
||
2130 | emit(ADD(int_pixel_y, |
||
2131 | fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), |
||
2132 | fs_reg(brw_imm_v(0x11001100)))); |
||
2133 | |||
2134 | /* As of gen6, we can no longer mix float and int sources. We have |
||
2135 | * to turn the integer pixel centers into floats for their actual |
||
2136 | * use. |
||
2137 | */ |
||
2138 | this->pixel_x = fs_reg(this, glsl_type::float_type); |
||
2139 | this->pixel_y = fs_reg(this, glsl_type::float_type); |
||
2140 | emit(MOV(this->pixel_x, int_pixel_x)); |
||
2141 | emit(MOV(this->pixel_y, int_pixel_y)); |
||
2142 | |||
2143 | this->current_annotation = "compute pos.w"; |
||
2144 | this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); |
||
2145 | this->wpos_w = fs_reg(this, glsl_type::float_type); |
||
2146 | emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); |
||
2147 | |||
2148 | for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { |
||
2149 | uint8_t reg = c->barycentric_coord_reg[i]; |
||
2150 | this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); |
||
2151 | this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); |
||
2152 | } |
||
2153 | |||
2154 | this->current_annotation = NULL; |
||
2155 | } |
||
2156 | |||
2157 | void |
||
2158 | fs_visitor::emit_color_write(int target, int index, int first_color_mrf) |
||
2159 | { |
||
2160 | int reg_width = dispatch_width / 8; |
||
2161 | fs_inst *inst; |
||
2162 | fs_reg color = outputs[target]; |
||
2163 | fs_reg mrf; |
||
2164 | |||
2165 | /* If there's no color data to be written, skip it. */ |
||
2166 | if (color.file == BAD_FILE) |
||
2167 | return; |
||
2168 | |||
2169 | color.reg_offset += index; |
||
2170 | |||
2171 | if (dispatch_width == 8 || brw->gen >= 6) { |
||
2172 | /* SIMD8 write looks like: |
||
2173 | * m + 0: r0 |
||
2174 | * m + 1: r1 |
||
2175 | * m + 2: g0 |
||
2176 | * m + 3: g1 |
||
2177 | * |
||
2178 | * gen6 SIMD16 DP write looks like: |
||
2179 | * m + 0: r0 |
||
2180 | * m + 1: r1 |
||
2181 | * m + 2: g0 |
||
2182 | * m + 3: g1 |
||
2183 | * m + 4: b0 |
||
2184 | * m + 5: b1 |
||
2185 | * m + 6: a0 |
||
2186 | * m + 7: a1 |
||
2187 | */ |
||
2188 | inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width, |
||
2189 | color.type), |
||
2190 | color)); |
||
2191 | inst->saturate = c->key.clamp_fragment_color; |
||
2192 | } else { |
||
2193 | /* pre-gen6 SIMD16 single source DP write looks like: |
||
2194 | * m + 0: r0 |
||
2195 | * m + 1: g0 |
||
2196 | * m + 2: b0 |
||
2197 | * m + 3: a0 |
||
2198 | * m + 4: r1 |
||
2199 | * m + 5: g1 |
||
2200 | * m + 6: b1 |
||
2201 | * m + 7: a1 |
||
2202 | */ |
||
2203 | if (brw->has_compr4) { |
||
2204 | /* By setting the high bit of the MRF register number, we |
||
2205 | * indicate that we want COMPR4 mode - instead of doing the |
||
2206 | * usual destination + 1 for the second half we get |
||
2207 | * destination + 4. |
||
2208 | */ |
||
2209 | inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index, |
||
2210 | color.type), |
||
2211 | color)); |
||
2212 | inst->saturate = c->key.clamp_fragment_color; |
||
2213 | } else { |
||
2214 | push_force_uncompressed(); |
||
2215 | inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type), |
||
2216 | color)); |
||
2217 | inst->saturate = c->key.clamp_fragment_color; |
||
2218 | pop_force_uncompressed(); |
||
2219 | |||
2220 | push_force_sechalf(); |
||
2221 | color.sechalf = true; |
||
2222 | inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type), |
||
2223 | color)); |
||
2224 | inst->saturate = c->key.clamp_fragment_color; |
||
2225 | pop_force_sechalf(); |
||
2226 | color.sechalf = false; |
||
2227 | } |
||
2228 | } |
||
2229 | } |
||
2230 | |||
4401 | Serge | 2231 | static int |
2232 | cond_for_alpha_func(GLenum func) |
||
2233 | { |
||
2234 | switch(func) { |
||
2235 | case GL_GREATER: |
||
2236 | return BRW_CONDITIONAL_G; |
||
2237 | case GL_GEQUAL: |
||
2238 | return BRW_CONDITIONAL_GE; |
||
2239 | case GL_LESS: |
||
2240 | return BRW_CONDITIONAL_L; |
||
2241 | case GL_LEQUAL: |
||
2242 | return BRW_CONDITIONAL_LE; |
||
2243 | case GL_EQUAL: |
||
2244 | return BRW_CONDITIONAL_EQ; |
||
2245 | case GL_NOTEQUAL: |
||
2246 | return BRW_CONDITIONAL_NEQ; |
||
2247 | default: |
||
2248 | assert(!"Not reached"); |
||
2249 | return 0; |
||
2250 | } |
||
2251 | } |
||
2252 | |||
2253 | /** |
||
2254 | * Alpha test support for when we compile it into the shader instead |
||
2255 | * of using the normal fixed-function alpha test. |
||
2256 | */ |
||
4358 | Serge | 2257 | void |
4401 | Serge | 2258 | fs_visitor::emit_alpha_test() |
2259 | { |
||
2260 | this->current_annotation = "Alpha test"; |
||
2261 | |||
2262 | fs_inst *cmp; |
||
2263 | if (c->key.alpha_test_func == GL_ALWAYS) |
||
2264 | return; |
||
2265 | |||
2266 | if (c->key.alpha_test_func == GL_NEVER) { |
||
2267 | /* f0.1 = 0 */ |
||
2268 | fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0), |
||
2269 | BRW_REGISTER_TYPE_UW)); |
||
2270 | cmp = emit(CMP(reg_null_f, some_reg, some_reg, |
||
2271 | BRW_CONDITIONAL_NEQ)); |
||
2272 | } else { |
||
2273 | /* RT0 alpha */ |
||
2274 | fs_reg color = outputs[0]; |
||
2275 | color.reg_offset += 3; |
||
2276 | |||
2277 | /* f0.1 &= func(color, ref) */ |
||
2278 | cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref), |
||
2279 | cond_for_alpha_func(c->key.alpha_test_func))); |
||
2280 | } |
||
2281 | cmp->predicate = BRW_PREDICATE_NORMAL; |
||
2282 | cmp->flag_subreg = 1; |
||
2283 | } |
||
2284 | |||
2285 | void |
||
4358 | Serge | 2286 | fs_visitor::emit_fb_writes() |
2287 | { |
||
2288 | this->current_annotation = "FB write header"; |
||
2289 | bool header_present = true; |
||
2290 | /* We can potentially have a message length of up to 15, so we have to set |
||
2291 | * base_mrf to either 0 or 1 in order to fit in m0..m15. |
||
2292 | */ |
||
2293 | int base_mrf = 1; |
||
2294 | int nr = base_mrf; |
||
2295 | int reg_width = dispatch_width / 8; |
||
2296 | bool do_dual_src = this->dual_src_output.file != BAD_FILE; |
||
2297 | bool src0_alpha_to_render_target = false; |
||
2298 | |||
2299 | if (dispatch_width == 16 && do_dual_src) { |
||
2300 | fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); |
||
2301 | do_dual_src = false; |
||
2302 | } |
||
2303 | |||
2304 | /* From the Sandy Bridge PRM, volume 4, page 198: |
||
2305 | * |
||
2306 | * "Dispatched Pixel Enables. One bit per pixel indicating |
||
2307 | * which pixels were originally enabled when the thread was |
||
2308 | * dispatched. This field is only required for the end-of- |
||
2309 | * thread message and on all dual-source messages." |
||
2310 | */ |
||
2311 | if (brw->gen >= 6 && |
||
2312 | !this->fp->UsesKill && |
||
2313 | !do_dual_src && |
||
2314 | c->key.nr_color_regions == 1) { |
||
2315 | header_present = false; |
||
2316 | } |
||
2317 | |||
2318 | if (header_present) { |
||
2319 | src0_alpha_to_render_target = brw->gen >= 6 && |
||
2320 | !do_dual_src && |
||
2321 | c->key.replicate_alpha; |
||
2322 | /* m2, m3 header */ |
||
2323 | nr += 2; |
||
2324 | } |
||
2325 | |||
2326 | if (c->aa_dest_stencil_reg) { |
||
2327 | push_force_uncompressed(); |
||
2328 | emit(MOV(fs_reg(MRF, nr++), |
||
2329 | fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); |
||
2330 | pop_force_uncompressed(); |
||
2331 | } |
||
2332 | |||
2333 | /* Reserve space for color. It'll be filled in per MRT below. */ |
||
2334 | int color_mrf = nr; |
||
2335 | nr += 4 * reg_width; |
||
2336 | if (do_dual_src) |
||
2337 | nr += 4; |
||
2338 | if (src0_alpha_to_render_target) |
||
2339 | nr += reg_width; |
||
2340 | |||
2341 | if (c->source_depth_to_render_target) { |
||
2342 | if (brw->gen == 6 && dispatch_width == 16) { |
||
2343 | /* For outputting oDepth on gen6, SIMD8 writes have to be |
||
2344 | * used. This would require 8-wide moves of each half to |
||
2345 | * message regs, kind of like pre-gen5 SIMD16 FB writes. |
||
2346 | * Just bail on doing so for now. |
||
2347 | */ |
||
2348 | fail("Missing support for simd16 depth writes on gen6\n"); |
||
2349 | } |
||
2350 | |||
2351 | if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { |
||
2352 | /* Hand over gl_FragDepth. */ |
||
2353 | assert(this->frag_depth.file != BAD_FILE); |
||
2354 | emit(MOV(fs_reg(MRF, nr), this->frag_depth)); |
||
2355 | } else { |
||
2356 | /* Pass through the payload depth. */ |
||
2357 | emit(MOV(fs_reg(MRF, nr), |
||
2358 | fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); |
||
2359 | } |
||
2360 | nr += reg_width; |
||
2361 | } |
||
2362 | |||
2363 | if (c->dest_depth_reg) { |
||
2364 | emit(MOV(fs_reg(MRF, nr), |
||
2365 | fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); |
||
2366 | nr += reg_width; |
||
2367 | } |
||
2368 | |||
2369 | if (do_dual_src) { |
||
2370 | fs_reg src0 = this->outputs[0]; |
||
2371 | fs_reg src1 = this->dual_src_output; |
||
2372 | |||
2373 | this->current_annotation = ralloc_asprintf(this->mem_ctx, |
||
2374 | "FB write src0"); |
||
2375 | for (int i = 0; i < 4; i++) { |
||
2376 | fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0)); |
||
2377 | src0.reg_offset++; |
||
2378 | inst->saturate = c->key.clamp_fragment_color; |
||
2379 | } |
||
2380 | |||
2381 | this->current_annotation = ralloc_asprintf(this->mem_ctx, |
||
2382 | "FB write src1"); |
||
2383 | for (int i = 0; i < 4; i++) { |
||
2384 | fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type), |
||
2385 | src1)); |
||
2386 | src1.reg_offset++; |
||
2387 | inst->saturate = c->key.clamp_fragment_color; |
||
2388 | } |
||
2389 | |||
2390 | if (INTEL_DEBUG & DEBUG_SHADER_TIME) |
||
2391 | emit_shader_time_end(); |
||
2392 | |||
2393 | fs_inst *inst = emit(FS_OPCODE_FB_WRITE); |
||
2394 | inst->target = 0; |
||
2395 | inst->base_mrf = base_mrf; |
||
2396 | inst->mlen = nr - base_mrf; |
||
2397 | inst->eot = true; |
||
2398 | inst->header_present = header_present; |
||
2399 | |||
2400 | c->prog_data.dual_src_blend = true; |
||
2401 | this->current_annotation = NULL; |
||
2402 | return; |
||
2403 | } |
||
2404 | |||
2405 | for (int target = 0; target < c->key.nr_color_regions; target++) { |
||
2406 | this->current_annotation = ralloc_asprintf(this->mem_ctx, |
||
2407 | "FB write target %d", |
||
2408 | target); |
||
2409 | /* If src0_alpha_to_render_target is true, include source zero alpha |
||
2410 | * data in RenderTargetWrite message for targets > 0. |
||
2411 | */ |
||
2412 | int write_color_mrf = color_mrf; |
||
2413 | if (src0_alpha_to_render_target && target != 0) { |
||
2414 | fs_inst *inst; |
||
2415 | fs_reg color = outputs[0]; |
||
2416 | color.reg_offset += 3; |
||
2417 | |||
2418 | inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type), |
||
2419 | color)); |
||
2420 | inst->saturate = c->key.clamp_fragment_color; |
||
2421 | write_color_mrf = color_mrf + reg_width; |
||
2422 | } |
||
2423 | |||
2424 | for (unsigned i = 0; i < this->output_components[target]; i++) |
||
2425 | emit_color_write(target, i, write_color_mrf); |
||
2426 | |||
2427 | bool eot = false; |
||
2428 | if (target == c->key.nr_color_regions - 1) { |
||
2429 | eot = true; |
||
2430 | |||
2431 | if (INTEL_DEBUG & DEBUG_SHADER_TIME) |
||
2432 | emit_shader_time_end(); |
||
2433 | } |
||
2434 | |||
2435 | fs_inst *inst = emit(FS_OPCODE_FB_WRITE); |
||
2436 | inst->target = target; |
||
2437 | inst->base_mrf = base_mrf; |
||
2438 | if (src0_alpha_to_render_target && target == 0) |
||
2439 | inst->mlen = nr - base_mrf - reg_width; |
||
2440 | else |
||
2441 | inst->mlen = nr - base_mrf; |
||
2442 | inst->eot = eot; |
||
2443 | inst->header_present = header_present; |
||
2444 | } |
||
2445 | |||
2446 | if (c->key.nr_color_regions == 0) { |
||
2447 | /* Even if there's no color buffers enabled, we still need to send |
||
2448 | * alpha out the pipeline to our null renderbuffer to support |
||
2449 | * alpha-testing, alpha-to-coverage, and so on. |
||
2450 | */ |
||
2451 | emit_color_write(0, 3, color_mrf); |
||
2452 | |||
2453 | if (INTEL_DEBUG & DEBUG_SHADER_TIME) |
||
2454 | emit_shader_time_end(); |
||
2455 | |||
2456 | fs_inst *inst = emit(FS_OPCODE_FB_WRITE); |
||
2457 | inst->base_mrf = base_mrf; |
||
2458 | inst->mlen = nr - base_mrf; |
||
2459 | inst->eot = true; |
||
2460 | inst->header_present = header_present; |
||
2461 | } |
||
2462 | |||
2463 | this->current_annotation = NULL; |
||
2464 | } |
||
2465 | |||
2466 | void |
||
2467 | fs_visitor::resolve_ud_negate(fs_reg *reg) |
||
2468 | { |
||
2469 | if (reg->type != BRW_REGISTER_TYPE_UD || |
||
2470 | !reg->negate) |
||
2471 | return; |
||
2472 | |||
2473 | fs_reg temp = fs_reg(this, glsl_type::uint_type); |
||
2474 | emit(MOV(temp, *reg)); |
||
2475 | *reg = temp; |
||
2476 | } |
||
2477 | |||
2478 | void |
||
2479 | fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) |
||
2480 | { |
||
2481 | if (rvalue->type != glsl_type::bool_type) |
||
2482 | return; |
||
2483 | |||
2484 | fs_reg temp = fs_reg(this, glsl_type::bool_type); |
||
2485 | emit(AND(temp, *reg, fs_reg(1))); |
||
2486 | *reg = temp; |
||
2487 | } |
||
2488 | |||
2489 | fs_visitor::fs_visitor(struct brw_context *brw, |
||
2490 | struct brw_wm_compile *c, |
||
2491 | struct gl_shader_program *shader_prog, |
||
2492 | struct gl_fragment_program *fp, |
||
2493 | unsigned dispatch_width) |
||
2494 | : dispatch_width(dispatch_width) |
||
2495 | { |
||
2496 | this->c = c; |
||
2497 | this->brw = brw; |
||
2498 | this->fp = fp; |
||
2499 | this->shader_prog = shader_prog; |
||
2500 | this->ctx = &brw->ctx; |
||
2501 | this->mem_ctx = ralloc_context(NULL); |
||
2502 | if (shader_prog) |
||
2503 | shader = (struct brw_shader *) |
||
2504 | shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; |
||
2505 | else |
||
2506 | shader = NULL; |
||
2507 | this->failed = false; |
||
2508 | this->variable_ht = hash_table_ctor(0, |
||
2509 | hash_table_pointer_hash, |
||
2510 | hash_table_pointer_compare); |
||
2511 | |||
2512 | memset(this->outputs, 0, sizeof(this->outputs)); |
||
2513 | memset(this->output_components, 0, sizeof(this->output_components)); |
||
2514 | this->first_non_payload_grf = 0; |
||
2515 | this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; |
||
2516 | |||
2517 | this->current_annotation = NULL; |
||
2518 | this->base_ir = NULL; |
||
2519 | |||
2520 | this->virtual_grf_sizes = NULL; |
||
2521 | this->virtual_grf_count = 0; |
||
2522 | this->virtual_grf_array_size = 0; |
||
2523 | this->virtual_grf_start = NULL; |
||
2524 | this->virtual_grf_end = NULL; |
||
2525 | this->live_intervals_valid = false; |
||
2526 | |||
2527 | this->params_remap = NULL; |
||
2528 | this->nr_params_remap = 0; |
||
2529 | |||
2530 | this->force_uncompressed_stack = 0; |
||
2531 | this->force_sechalf_stack = 0; |
||
2532 | |||
2533 | memset(&this->param_size, 0, sizeof(this->param_size)); |
||
2534 | } |
||
2535 | |||
2536 | fs_visitor::~fs_visitor() |
||
2537 | { |
||
2538 | ralloc_free(this->mem_ctx); |
||
2539 | hash_table_dtor(this->variable_ht); |
||
2540 | }>>>>>>>>=>>=>>>>>>>><>>><>>>><>><>>>>>>>>>>>>>>>>3;>>>>><>>><>>>>>>>>=>>>>>>> |