Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright © 2010 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | #include "main/macros.h" |
||
25 | #include "brw_context.h" |
||
26 | #include "brw_vs.h" |
||
27 | #include "brw_gs.h" |
||
28 | #include "brw_fs.h" |
||
29 | #include "brw_cfg.h" |
||
30 | #include "brw_nir.h" |
||
31 | #include "glsl/ir_optimization.h" |
||
32 | #include "glsl/glsl_parser_extras.h" |
||
33 | #include "main/shaderapi.h" |
||
34 | |||
35 | struct brw_compiler * |
||
36 | brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) |
||
37 | { |
||
38 | struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); |
||
39 | |||
40 | compiler->devinfo = devinfo; |
||
41 | |||
42 | brw_fs_alloc_reg_sets(compiler); |
||
43 | brw_vec4_alloc_reg_set(compiler); |
||
44 | |||
45 | return compiler; |
||
46 | } |
||
47 | |||
48 | struct gl_shader * |
||
49 | brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) |
||
50 | { |
||
51 | struct brw_shader *shader; |
||
52 | |||
53 | shader = rzalloc(NULL, struct brw_shader); |
||
54 | if (shader) { |
||
55 | shader->base.Type = type; |
||
56 | shader->base.Stage = _mesa_shader_enum_to_shader_stage(type); |
||
57 | shader->base.Name = name; |
||
58 | _mesa_init_shader(ctx, &shader->base); |
||
59 | } |
||
60 | |||
61 | return &shader->base; |
||
62 | } |
||
63 | |||
64 | /** |
||
65 | * Performs a compile of the shader stages even when we don't know |
||
66 | * what non-orthogonal state will be set, in the hope that it reflects |
||
67 | * the eventual NOS used, and thus allows us to produce link failures. |
||
68 | */ |
||
69 | static bool |
||
70 | brw_shader_precompile(struct gl_context *ctx, |
||
71 | struct gl_shader_program *sh_prog) |
||
72 | { |
||
73 | struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; |
||
74 | struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; |
||
75 | struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; |
||
76 | struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; |
||
77 | |||
78 | if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program)) |
||
79 | return false; |
||
80 | |||
81 | if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program)) |
||
82 | return false; |
||
83 | |||
84 | if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program)) |
||
85 | return false; |
||
86 | |||
87 | if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program)) |
||
88 | return false; |
||
89 | |||
90 | return true; |
||
91 | } |
||
92 | |||
93 | static inline bool |
||
94 | is_scalar_shader_stage(struct brw_context *brw, int stage) |
||
95 | { |
||
96 | switch (stage) { |
||
97 | case MESA_SHADER_FRAGMENT: |
||
98 | return true; |
||
99 | case MESA_SHADER_VERTEX: |
||
100 | return brw->scalar_vs; |
||
101 | default: |
||
102 | return false; |
||
103 | } |
||
104 | } |
||
105 | |||
106 | static void |
||
107 | brw_lower_packing_builtins(struct brw_context *brw, |
||
108 | gl_shader_stage shader_type, |
||
109 | exec_list *ir) |
||
110 | { |
||
111 | int ops = LOWER_PACK_SNORM_2x16 |
||
112 | | LOWER_UNPACK_SNORM_2x16 |
||
113 | | LOWER_PACK_UNORM_2x16 |
||
114 | | LOWER_UNPACK_UNORM_2x16; |
||
115 | |||
116 | if (is_scalar_shader_stage(brw, shader_type)) { |
||
117 | ops |= LOWER_UNPACK_UNORM_4x8 |
||
118 | | LOWER_UNPACK_SNORM_4x8 |
||
119 | | LOWER_PACK_UNORM_4x8 |
||
120 | | LOWER_PACK_SNORM_4x8; |
||
121 | } |
||
122 | |||
123 | if (brw->gen >= 7) { |
||
124 | /* Gen7 introduced the f32to16 and f16to32 instructions, which can be |
||
125 | * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no |
||
126 | * lowering is needed. For SOA code, the Half2x16 ops must be |
||
127 | * scalarized. |
||
128 | */ |
||
129 | if (is_scalar_shader_stage(brw, shader_type)) { |
||
130 | ops |= LOWER_PACK_HALF_2x16_TO_SPLIT |
||
131 | | LOWER_UNPACK_HALF_2x16_TO_SPLIT; |
||
132 | } |
||
133 | } else { |
||
134 | ops |= LOWER_PACK_HALF_2x16 |
||
135 | | LOWER_UNPACK_HALF_2x16; |
||
136 | } |
||
137 | |||
138 | lower_packing_builtins(ir, ops); |
||
139 | } |
||
140 | |||
141 | static void |
||
142 | process_glsl_ir(struct brw_context *brw, |
||
143 | struct gl_shader_program *shader_prog, |
||
144 | struct gl_shader *shader) |
||
145 | { |
||
146 | struct gl_context *ctx = &brw->ctx; |
||
147 | const struct gl_shader_compiler_options *options = |
||
148 | &ctx->Const.ShaderCompilerOptions[shader->Stage]; |
||
149 | |||
150 | /* Temporary memory context for any new IR. */ |
||
151 | void *mem_ctx = ralloc_context(NULL); |
||
152 | |||
153 | ralloc_adopt(mem_ctx, shader->ir); |
||
154 | |||
155 | /* lower_packing_builtins() inserts arithmetic instructions, so it |
||
156 | * must precede lower_instructions(). |
||
157 | */ |
||
158 | brw_lower_packing_builtins(brw, shader->Stage, shader->ir); |
||
159 | do_mat_op_to_vec(shader->ir); |
||
160 | const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; |
||
161 | lower_instructions(shader->ir, |
||
162 | MOD_TO_FLOOR | |
||
163 | DIV_TO_MUL_RCP | |
||
164 | SUB_TO_ADD_NEG | |
||
165 | EXP_TO_EXP2 | |
||
166 | LOG_TO_LOG2 | |
||
167 | bitfield_insert | |
||
168 | LDEXP_TO_ARITH); |
||
169 | |||
170 | /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, |
||
171 | * if-statements need to be flattened. |
||
172 | */ |
||
173 | if (brw->gen < 6) |
||
174 | lower_if_to_cond_assign(shader->ir, 16); |
||
175 | |||
176 | do_lower_texture_projection(shader->ir); |
||
177 | brw_lower_texture_gradients(brw, shader->ir); |
||
178 | do_vec_index_to_cond_assign(shader->ir); |
||
179 | lower_vector_insert(shader->ir, true); |
||
180 | if (options->NirOptions == NULL) |
||
181 | brw_do_cubemap_normalize(shader->ir); |
||
182 | lower_offset_arrays(shader->ir); |
||
183 | brw_do_lower_unnormalized_offset(shader->ir); |
||
184 | lower_noise(shader->ir); |
||
185 | lower_quadop_vector(shader->ir, false); |
||
186 | |||
187 | bool lowered_variable_indexing = |
||
188 | lower_variable_index_to_cond_assign(shader->ir, |
||
189 | options->EmitNoIndirectInput, |
||
190 | options->EmitNoIndirectOutput, |
||
191 | options->EmitNoIndirectTemp, |
||
192 | options->EmitNoIndirectUniform); |
||
193 | |||
194 | if (unlikely(brw->perf_debug && lowered_variable_indexing)) { |
||
195 | perf_debug("Unsupported form of variable indexing in FS; falling " |
||
196 | "back to very inefficient code generation\n"); |
||
197 | } |
||
198 | |||
199 | lower_ubo_reference(shader, shader->ir); |
||
200 | |||
201 | bool progress; |
||
202 | do { |
||
203 | progress = false; |
||
204 | |||
205 | if (is_scalar_shader_stage(brw, shader->Stage)) { |
||
206 | brw_do_channel_expressions(shader->ir); |
||
207 | brw_do_vector_splitting(shader->ir); |
||
208 | } |
||
209 | |||
210 | progress = do_lower_jumps(shader->ir, true, true, |
||
211 | true, /* main return */ |
||
212 | false, /* continue */ |
||
213 | false /* loops */ |
||
214 | ) || progress; |
||
215 | |||
216 | progress = do_common_optimization(shader->ir, true, true, |
||
217 | options, ctx->Const.NativeIntegers) || progress; |
||
218 | } while (progress); |
||
219 | |||
220 | if (options->NirOptions != NULL) |
||
221 | lower_output_reads(shader->ir); |
||
222 | |||
223 | validate_ir_tree(shader->ir); |
||
224 | |||
225 | /* Now that we've finished altering the linked IR, reparent any live IR back |
||
226 | * to the permanent memory context, and free the temporary one (discarding any |
||
227 | * junk we optimized away). |
||
228 | */ |
||
229 | reparent_ir(shader->ir, shader->ir); |
||
230 | ralloc_free(mem_ctx); |
||
231 | |||
232 | if (ctx->_Shader->Flags & GLSL_DUMP) { |
||
233 | fprintf(stderr, "\n"); |
||
234 | fprintf(stderr, "GLSL IR for linked %s program %d:\n", |
||
235 | _mesa_shader_stage_to_string(shader->Stage), |
||
236 | shader_prog->Name); |
||
237 | _mesa_print_ir(stderr, shader->ir, NULL); |
||
238 | fprintf(stderr, "\n"); |
||
239 | } |
||
240 | } |
||
241 | |||
242 | GLboolean |
||
243 | brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) |
||
244 | { |
||
245 | struct brw_context *brw = brw_context(ctx); |
||
246 | unsigned int stage; |
||
247 | |||
248 | for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { |
||
249 | struct gl_shader *shader = shProg->_LinkedShaders[stage]; |
||
250 | const struct gl_shader_compiler_options *options = |
||
251 | &ctx->Const.ShaderCompilerOptions[stage]; |
||
252 | |||
253 | if (!shader) |
||
254 | continue; |
||
255 | |||
256 | struct gl_program *prog = |
||
257 | ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage), |
||
258 | shader->Name); |
||
259 | if (!prog) |
||
260 | return false; |
||
261 | prog->Parameters = _mesa_new_parameter_list(); |
||
262 | |||
263 | _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); |
||
264 | |||
265 | process_glsl_ir(brw, shProg, shader); |
||
266 | |||
267 | /* Make a pass over the IR to add state references for any built-in |
||
268 | * uniforms that are used. This has to be done now (during linking). |
||
269 | * Code generation doesn't happen until the first time this shader is |
||
270 | * used for rendering. Waiting until then to generate the parameters is |
||
271 | * too late. At that point, the values for the built-in uniforms won't |
||
272 | * get sent to the shader. |
||
273 | */ |
||
274 | foreach_in_list(ir_instruction, node, shader->ir) { |
||
275 | ir_variable *var = node->as_variable(); |
||
276 | |||
277 | if ((var == NULL) || (var->data.mode != ir_var_uniform) |
||
278 | || (strncmp(var->name, "gl_", 3) != 0)) |
||
279 | continue; |
||
280 | |||
281 | const ir_state_slot *const slots = var->get_state_slots(); |
||
282 | assert(slots != NULL); |
||
283 | |||
284 | for (unsigned int i = 0; i < var->get_num_state_slots(); i++) { |
||
285 | _mesa_add_state_reference(prog->Parameters, |
||
286 | (gl_state_index *) slots[i].tokens); |
||
287 | } |
||
288 | } |
||
289 | |||
290 | do_set_program_inouts(shader->ir, prog, shader->Stage); |
||
291 | |||
292 | prog->SamplersUsed = shader->active_samplers; |
||
293 | prog->ShadowSamplers = shader->shadow_samplers; |
||
294 | _mesa_update_shader_textures_used(shProg, prog); |
||
295 | |||
296 | _mesa_reference_program(ctx, &shader->Program, prog); |
||
297 | |||
298 | brw_add_texrect_params(prog); |
||
299 | |||
300 | if (options->NirOptions) |
||
301 | prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage); |
||
302 | |||
303 | _mesa_reference_program(ctx, &prog, NULL); |
||
304 | } |
||
305 | |||
306 | if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) { |
||
307 | for (unsigned i = 0; i < shProg->NumShaders; i++) { |
||
308 | const struct gl_shader *sh = shProg->Shaders[i]; |
||
309 | if (!sh) |
||
310 | continue; |
||
311 | |||
312 | fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n", |
||
313 | _mesa_shader_stage_to_string(sh->Stage), |
||
314 | i, shProg->Name); |
||
315 | fprintf(stderr, "%s", sh->Source); |
||
316 | fprintf(stderr, "\n"); |
||
317 | } |
||
318 | } |
||
319 | |||
320 | if (brw->precompile && !brw_shader_precompile(ctx, shProg)) |
||
321 | return false; |
||
322 | |||
323 | return true; |
||
324 | } |
||
325 | |||
326 | |||
327 | enum brw_reg_type |
||
328 | brw_type_for_base_type(const struct glsl_type *type) |
||
329 | { |
||
330 | switch (type->base_type) { |
||
331 | case GLSL_TYPE_FLOAT: |
||
332 | return BRW_REGISTER_TYPE_F; |
||
333 | case GLSL_TYPE_INT: |
||
334 | case GLSL_TYPE_BOOL: |
||
335 | return BRW_REGISTER_TYPE_D; |
||
336 | case GLSL_TYPE_UINT: |
||
337 | return BRW_REGISTER_TYPE_UD; |
||
338 | case GLSL_TYPE_ARRAY: |
||
339 | return brw_type_for_base_type(type->fields.array); |
||
340 | case GLSL_TYPE_STRUCT: |
||
341 | case GLSL_TYPE_SAMPLER: |
||
342 | case GLSL_TYPE_ATOMIC_UINT: |
||
343 | /* These should be overridden with the type of the member when |
||
344 | * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely |
||
345 | * way to trip up if we don't. |
||
346 | */ |
||
347 | return BRW_REGISTER_TYPE_UD; |
||
348 | case GLSL_TYPE_IMAGE: |
||
349 | return BRW_REGISTER_TYPE_UD; |
||
350 | case GLSL_TYPE_VOID: |
||
351 | case GLSL_TYPE_ERROR: |
||
352 | case GLSL_TYPE_INTERFACE: |
||
353 | case GLSL_TYPE_DOUBLE: |
||
354 | unreachable("not reached"); |
||
355 | } |
||
356 | |||
357 | return BRW_REGISTER_TYPE_F; |
||
358 | } |
||
359 | |||
360 | enum brw_conditional_mod |
||
361 | brw_conditional_for_comparison(unsigned int op) |
||
362 | { |
||
363 | switch (op) { |
||
364 | case ir_binop_less: |
||
365 | return BRW_CONDITIONAL_L; |
||
366 | case ir_binop_greater: |
||
367 | return BRW_CONDITIONAL_G; |
||
368 | case ir_binop_lequal: |
||
369 | return BRW_CONDITIONAL_LE; |
||
370 | case ir_binop_gequal: |
||
371 | return BRW_CONDITIONAL_GE; |
||
372 | case ir_binop_equal: |
||
373 | case ir_binop_all_equal: /* same as equal for scalars */ |
||
374 | return BRW_CONDITIONAL_Z; |
||
375 | case ir_binop_nequal: |
||
376 | case ir_binop_any_nequal: /* same as nequal for scalars */ |
||
377 | return BRW_CONDITIONAL_NZ; |
||
378 | default: |
||
379 | unreachable("not reached: bad operation for comparison"); |
||
380 | } |
||
381 | } |
||
382 | |||
383 | uint32_t |
||
384 | brw_math_function(enum opcode op) |
||
385 | { |
||
386 | switch (op) { |
||
387 | case SHADER_OPCODE_RCP: |
||
388 | return BRW_MATH_FUNCTION_INV; |
||
389 | case SHADER_OPCODE_RSQ: |
||
390 | return BRW_MATH_FUNCTION_RSQ; |
||
391 | case SHADER_OPCODE_SQRT: |
||
392 | return BRW_MATH_FUNCTION_SQRT; |
||
393 | case SHADER_OPCODE_EXP2: |
||
394 | return BRW_MATH_FUNCTION_EXP; |
||
395 | case SHADER_OPCODE_LOG2: |
||
396 | return BRW_MATH_FUNCTION_LOG; |
||
397 | case SHADER_OPCODE_POW: |
||
398 | return BRW_MATH_FUNCTION_POW; |
||
399 | case SHADER_OPCODE_SIN: |
||
400 | return BRW_MATH_FUNCTION_SIN; |
||
401 | case SHADER_OPCODE_COS: |
||
402 | return BRW_MATH_FUNCTION_COS; |
||
403 | case SHADER_OPCODE_INT_QUOTIENT: |
||
404 | return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; |
||
405 | case SHADER_OPCODE_INT_REMAINDER: |
||
406 | return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; |
||
407 | default: |
||
408 | unreachable("not reached: unknown math function"); |
||
409 | } |
||
410 | } |
||
411 | |||
412 | uint32_t |
||
413 | brw_texture_offset(int *offsets, unsigned num_components) |
||
414 | { |
||
415 | if (!offsets) return 0; /* nonconstant offset; caller will handle it. */ |
||
416 | |||
417 | /* Combine all three offsets into a single unsigned dword: |
||
418 | * |
||
419 | * bits 11:8 - U Offset (X component) |
||
420 | * bits 7:4 - V Offset (Y component) |
||
421 | * bits 3:0 - R Offset (Z component) |
||
422 | */ |
||
423 | unsigned offset_bits = 0; |
||
424 | for (unsigned i = 0; i < num_components; i++) { |
||
425 | const unsigned shift = 4 * (2 - i); |
||
426 | offset_bits |= (offsets[i] << shift) & (0xF << shift); |
||
427 | } |
||
428 | return offset_bits; |
||
429 | } |
||
430 | |||
431 | const char * |
||
432 | brw_instruction_name(enum opcode op) |
||
433 | { |
||
434 | switch (op) { |
||
435 | case BRW_OPCODE_MOV ... BRW_OPCODE_NOP: |
||
436 | assert(opcode_descs[op].name); |
||
437 | return opcode_descs[op].name; |
||
438 | case FS_OPCODE_FB_WRITE: |
||
439 | return "fb_write"; |
||
440 | case FS_OPCODE_BLORP_FB_WRITE: |
||
441 | return "blorp_fb_write"; |
||
442 | case FS_OPCODE_REP_FB_WRITE: |
||
443 | return "rep_fb_write"; |
||
444 | |||
445 | case SHADER_OPCODE_RCP: |
||
446 | return "rcp"; |
||
447 | case SHADER_OPCODE_RSQ: |
||
448 | return "rsq"; |
||
449 | case SHADER_OPCODE_SQRT: |
||
450 | return "sqrt"; |
||
451 | case SHADER_OPCODE_EXP2: |
||
452 | return "exp2"; |
||
453 | case SHADER_OPCODE_LOG2: |
||
454 | return "log2"; |
||
455 | case SHADER_OPCODE_POW: |
||
456 | return "pow"; |
||
457 | case SHADER_OPCODE_INT_QUOTIENT: |
||
458 | return "int_quot"; |
||
459 | case SHADER_OPCODE_INT_REMAINDER: |
||
460 | return "int_rem"; |
||
461 | case SHADER_OPCODE_SIN: |
||
462 | return "sin"; |
||
463 | case SHADER_OPCODE_COS: |
||
464 | return "cos"; |
||
465 | |||
466 | case SHADER_OPCODE_TEX: |
||
467 | return "tex"; |
||
468 | case SHADER_OPCODE_TXD: |
||
469 | return "txd"; |
||
470 | case SHADER_OPCODE_TXF: |
||
471 | return "txf"; |
||
472 | case SHADER_OPCODE_TXL: |
||
473 | return "txl"; |
||
474 | case SHADER_OPCODE_TXS: |
||
475 | return "txs"; |
||
476 | case FS_OPCODE_TXB: |
||
477 | return "txb"; |
||
478 | case SHADER_OPCODE_TXF_CMS: |
||
479 | return "txf_cms"; |
||
480 | case SHADER_OPCODE_TXF_UMS: |
||
481 | return "txf_ums"; |
||
482 | case SHADER_OPCODE_TXF_MCS: |
||
483 | return "txf_mcs"; |
||
484 | case SHADER_OPCODE_LOD: |
||
485 | return "lod"; |
||
486 | case SHADER_OPCODE_TG4: |
||
487 | return "tg4"; |
||
488 | case SHADER_OPCODE_TG4_OFFSET: |
||
489 | return "tg4_offset"; |
||
490 | case SHADER_OPCODE_SHADER_TIME_ADD: |
||
491 | return "shader_time_add"; |
||
492 | |||
493 | case SHADER_OPCODE_UNTYPED_ATOMIC: |
||
494 | return "untyped_atomic"; |
||
495 | case SHADER_OPCODE_UNTYPED_SURFACE_READ: |
||
496 | return "untyped_surface_read"; |
||
497 | case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: |
||
498 | return "untyped_surface_write"; |
||
499 | case SHADER_OPCODE_TYPED_ATOMIC: |
||
500 | return "typed_atomic"; |
||
501 | case SHADER_OPCODE_TYPED_SURFACE_READ: |
||
502 | return "typed_surface_read"; |
||
503 | case SHADER_OPCODE_TYPED_SURFACE_WRITE: |
||
504 | return "typed_surface_write"; |
||
505 | case SHADER_OPCODE_MEMORY_FENCE: |
||
506 | return "memory_fence"; |
||
507 | |||
508 | case SHADER_OPCODE_LOAD_PAYLOAD: |
||
509 | return "load_payload"; |
||
510 | |||
511 | case SHADER_OPCODE_GEN4_SCRATCH_READ: |
||
512 | return "gen4_scratch_read"; |
||
513 | case SHADER_OPCODE_GEN4_SCRATCH_WRITE: |
||
514 | return "gen4_scratch_write"; |
||
515 | case SHADER_OPCODE_GEN7_SCRATCH_READ: |
||
516 | return "gen7_scratch_read"; |
||
517 | case SHADER_OPCODE_URB_WRITE_SIMD8: |
||
518 | return "gen8_urb_write_simd8"; |
||
519 | |||
520 | case SHADER_OPCODE_FIND_LIVE_CHANNEL: |
||
521 | return "find_live_channel"; |
||
522 | case SHADER_OPCODE_BROADCAST: |
||
523 | return "broadcast"; |
||
524 | |||
525 | case VEC4_OPCODE_MOV_BYTES: |
||
526 | return "mov_bytes"; |
||
527 | case VEC4_OPCODE_PACK_BYTES: |
||
528 | return "pack_bytes"; |
||
529 | case VEC4_OPCODE_UNPACK_UNIFORM: |
||
530 | return "unpack_uniform"; |
||
531 | |||
532 | case FS_OPCODE_DDX_COARSE: |
||
533 | return "ddx_coarse"; |
||
534 | case FS_OPCODE_DDX_FINE: |
||
535 | return "ddx_fine"; |
||
536 | case FS_OPCODE_DDY_COARSE: |
||
537 | return "ddy_coarse"; |
||
538 | case FS_OPCODE_DDY_FINE: |
||
539 | return "ddy_fine"; |
||
540 | |||
541 | case FS_OPCODE_CINTERP: |
||
542 | return "cinterp"; |
||
543 | case FS_OPCODE_LINTERP: |
||
544 | return "linterp"; |
||
545 | |||
546 | case FS_OPCODE_PIXEL_X: |
||
547 | return "pixel_x"; |
||
548 | case FS_OPCODE_PIXEL_Y: |
||
549 | return "pixel_y"; |
||
550 | |||
551 | case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: |
||
552 | return "uniform_pull_const"; |
||
553 | case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: |
||
554 | return "uniform_pull_const_gen7"; |
||
555 | case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: |
||
556 | return "varying_pull_const"; |
||
557 | case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: |
||
558 | return "varying_pull_const_gen7"; |
||
559 | |||
560 | case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: |
||
561 | return "mov_dispatch_to_flags"; |
||
562 | case FS_OPCODE_DISCARD_JUMP: |
||
563 | return "discard_jump"; |
||
564 | |||
565 | case FS_OPCODE_SET_OMASK: |
||
566 | return "set_omask"; |
||
567 | case FS_OPCODE_SET_SAMPLE_ID: |
||
568 | return "set_sample_id"; |
||
569 | case FS_OPCODE_SET_SIMD4X2_OFFSET: |
||
570 | return "set_simd4x2_offset"; |
||
571 | |||
572 | case FS_OPCODE_PACK_HALF_2x16_SPLIT: |
||
573 | return "pack_half_2x16_split"; |
||
574 | case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: |
||
575 | return "unpack_half_2x16_split_x"; |
||
576 | case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: |
||
577 | return "unpack_half_2x16_split_y"; |
||
578 | |||
579 | case FS_OPCODE_PLACEHOLDER_HALT: |
||
580 | return "placeholder_halt"; |
||
581 | |||
582 | case FS_OPCODE_INTERPOLATE_AT_CENTROID: |
||
583 | return "interp_centroid"; |
||
584 | case FS_OPCODE_INTERPOLATE_AT_SAMPLE: |
||
585 | return "interp_sample"; |
||
586 | case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: |
||
587 | return "interp_shared_offset"; |
||
588 | case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: |
||
589 | return "interp_per_slot_offset"; |
||
590 | |||
591 | case VS_OPCODE_URB_WRITE: |
||
592 | return "vs_urb_write"; |
||
593 | case VS_OPCODE_PULL_CONSTANT_LOAD: |
||
594 | return "pull_constant_load"; |
||
595 | case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: |
||
596 | return "pull_constant_load_gen7"; |
||
597 | |||
598 | case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: |
||
599 | return "set_simd4x2_header_gen9"; |
||
600 | |||
601 | case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: |
||
602 | return "unpack_flags_simd4x2"; |
||
603 | |||
604 | case GS_OPCODE_URB_WRITE: |
||
605 | return "gs_urb_write"; |
||
606 | case GS_OPCODE_URB_WRITE_ALLOCATE: |
||
607 | return "gs_urb_write_allocate"; |
||
608 | case GS_OPCODE_THREAD_END: |
||
609 | return "gs_thread_end"; |
||
610 | case GS_OPCODE_SET_WRITE_OFFSET: |
||
611 | return "set_write_offset"; |
||
612 | case GS_OPCODE_SET_VERTEX_COUNT: |
||
613 | return "set_vertex_count"; |
||
614 | case GS_OPCODE_SET_DWORD_2: |
||
615 | return "set_dword_2"; |
||
616 | case GS_OPCODE_PREPARE_CHANNEL_MASKS: |
||
617 | return "prepare_channel_masks"; |
||
618 | case GS_OPCODE_SET_CHANNEL_MASKS: |
||
619 | return "set_channel_masks"; |
||
620 | case GS_OPCODE_GET_INSTANCE_ID: |
||
621 | return "get_instance_id"; |
||
622 | case GS_OPCODE_FF_SYNC: |
||
623 | return "ff_sync"; |
||
624 | case GS_OPCODE_SET_PRIMITIVE_ID: |
||
625 | return "set_primitive_id"; |
||
626 | case GS_OPCODE_SVB_WRITE: |
||
627 | return "gs_svb_write"; |
||
628 | case GS_OPCODE_SVB_SET_DST_INDEX: |
||
629 | return "gs_svb_set_dst_index"; |
||
630 | case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: |
||
631 | return "gs_ff_sync_set_primitives"; |
||
632 | case CS_OPCODE_CS_TERMINATE: |
||
633 | return "cs_terminate"; |
||
634 | } |
||
635 | |||
636 | unreachable("not reached"); |
||
637 | } |
||
638 | |||
639 | bool |
||
640 | brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) |
||
641 | { |
||
642 | union { |
||
643 | unsigned ud; |
||
644 | int d; |
||
645 | float f; |
||
646 | } imm = { reg->dw1.ud }, sat_imm = { 0 }; |
||
647 | |||
648 | switch (type) { |
||
649 | case BRW_REGISTER_TYPE_UD: |
||
650 | case BRW_REGISTER_TYPE_D: |
||
651 | case BRW_REGISTER_TYPE_UQ: |
||
652 | case BRW_REGISTER_TYPE_Q: |
||
653 | /* Nothing to do. */ |
||
654 | return false; |
||
655 | case BRW_REGISTER_TYPE_UW: |
||
656 | sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX); |
||
657 | break; |
||
658 | case BRW_REGISTER_TYPE_W: |
||
659 | sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX); |
||
660 | break; |
||
661 | case BRW_REGISTER_TYPE_F: |
||
662 | sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); |
||
663 | break; |
||
664 | case BRW_REGISTER_TYPE_UB: |
||
665 | case BRW_REGISTER_TYPE_B: |
||
666 | unreachable("no UB/B immediates"); |
||
667 | case BRW_REGISTER_TYPE_V: |
||
668 | case BRW_REGISTER_TYPE_UV: |
||
669 | case BRW_REGISTER_TYPE_VF: |
||
670 | unreachable("unimplemented: saturate vector immediate"); |
||
671 | case BRW_REGISTER_TYPE_DF: |
||
672 | case BRW_REGISTER_TYPE_HF: |
||
673 | unreachable("unimplemented: saturate DF/HF immediate"); |
||
674 | } |
||
675 | |||
676 | if (imm.ud != sat_imm.ud) { |
||
677 | reg->dw1.ud = sat_imm.ud; |
||
678 | return true; |
||
679 | } |
||
680 | return false; |
||
681 | } |
||
682 | |||
683 | bool |
||
684 | brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) |
||
685 | { |
||
686 | switch (type) { |
||
687 | case BRW_REGISTER_TYPE_D: |
||
688 | case BRW_REGISTER_TYPE_UD: |
||
689 | reg->dw1.d = -reg->dw1.d; |
||
690 | return true; |
||
691 | case BRW_REGISTER_TYPE_W: |
||
692 | case BRW_REGISTER_TYPE_UW: |
||
693 | reg->dw1.d = -(int16_t)reg->dw1.ud; |
||
694 | return true; |
||
695 | case BRW_REGISTER_TYPE_F: |
||
696 | reg->dw1.f = -reg->dw1.f; |
||
697 | return true; |
||
698 | case BRW_REGISTER_TYPE_VF: |
||
699 | reg->dw1.ud ^= 0x80808080; |
||
700 | return true; |
||
701 | case BRW_REGISTER_TYPE_UB: |
||
702 | case BRW_REGISTER_TYPE_B: |
||
703 | unreachable("no UB/B immediates"); |
||
704 | case BRW_REGISTER_TYPE_UV: |
||
705 | case BRW_REGISTER_TYPE_V: |
||
706 | assert(!"unimplemented: negate UV/V immediate"); |
||
707 | case BRW_REGISTER_TYPE_UQ: |
||
708 | case BRW_REGISTER_TYPE_Q: |
||
709 | assert(!"unimplemented: negate UQ/Q immediate"); |
||
710 | case BRW_REGISTER_TYPE_DF: |
||
711 | case BRW_REGISTER_TYPE_HF: |
||
712 | assert(!"unimplemented: negate DF/HF immediate"); |
||
713 | } |
||
714 | |||
715 | return false; |
||
716 | } |
||
717 | |||
718 | bool |
||
719 | brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) |
||
720 | { |
||
721 | switch (type) { |
||
722 | case BRW_REGISTER_TYPE_D: |
||
723 | reg->dw1.d = abs(reg->dw1.d); |
||
724 | return true; |
||
725 | case BRW_REGISTER_TYPE_W: |
||
726 | reg->dw1.d = abs((int16_t)reg->dw1.ud); |
||
727 | return true; |
||
728 | case BRW_REGISTER_TYPE_F: |
||
729 | reg->dw1.f = fabsf(reg->dw1.f); |
||
730 | return true; |
||
731 | case BRW_REGISTER_TYPE_VF: |
||
732 | reg->dw1.ud &= ~0x80808080; |
||
733 | return true; |
||
734 | case BRW_REGISTER_TYPE_UB: |
||
735 | case BRW_REGISTER_TYPE_B: |
||
736 | unreachable("no UB/B immediates"); |
||
737 | case BRW_REGISTER_TYPE_UQ: |
||
738 | case BRW_REGISTER_TYPE_UD: |
||
739 | case BRW_REGISTER_TYPE_UW: |
||
740 | case BRW_REGISTER_TYPE_UV: |
||
741 | /* Presumably the absolute value modifier on an unsigned source is a |
||
742 | * nop, but it would be nice to confirm. |
||
743 | */ |
||
744 | assert(!"unimplemented: abs unsigned immediate"); |
||
745 | case BRW_REGISTER_TYPE_V: |
||
746 | assert(!"unimplemented: abs V immediate"); |
||
747 | case BRW_REGISTER_TYPE_Q: |
||
748 | assert(!"unimplemented: abs Q immediate"); |
||
749 | case BRW_REGISTER_TYPE_DF: |
||
750 | case BRW_REGISTER_TYPE_HF: |
||
751 | assert(!"unimplemented: abs DF/HF immediate"); |
||
752 | } |
||
753 | |||
754 | return false; |
||
755 | } |
||
756 | |||
757 | backend_visitor::backend_visitor(struct brw_context *brw, |
||
758 | struct gl_shader_program *shader_prog, |
||
759 | struct gl_program *prog, |
||
760 | struct brw_stage_prog_data *stage_prog_data, |
||
761 | gl_shader_stage stage) |
||
762 | : brw(brw), |
||
763 | devinfo(brw->intelScreen->devinfo), |
||
764 | ctx(&brw->ctx), |
||
765 | shader(shader_prog ? |
||
766 | (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL), |
||
767 | shader_prog(shader_prog), |
||
768 | prog(prog), |
||
769 | stage_prog_data(stage_prog_data), |
||
770 | cfg(NULL), |
||
771 | stage(stage) |
||
772 | { |
||
773 | debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); |
||
774 | stage_name = _mesa_shader_stage_to_string(stage); |
||
775 | stage_abbrev = _mesa_shader_stage_to_abbrev(stage); |
||
776 | } |
||
777 | |||
778 | bool |
||
779 | backend_reg::is_zero() const |
||
780 | { |
||
781 | if (file != IMM) |
||
782 | return false; |
||
783 | |||
784 | return fixed_hw_reg.dw1.d == 0; |
||
785 | } |
||
786 | |||
787 | bool |
||
788 | backend_reg::is_one() const |
||
789 | { |
||
790 | if (file != IMM) |
||
791 | return false; |
||
792 | |||
793 | return type == BRW_REGISTER_TYPE_F |
||
794 | ? fixed_hw_reg.dw1.f == 1.0 |
||
795 | : fixed_hw_reg.dw1.d == 1; |
||
796 | } |
||
797 | |||
798 | bool |
||
799 | backend_reg::is_negative_one() const |
||
800 | { |
||
801 | if (file != IMM) |
||
802 | return false; |
||
803 | |||
804 | switch (type) { |
||
805 | case BRW_REGISTER_TYPE_F: |
||
806 | return fixed_hw_reg.dw1.f == -1.0; |
||
807 | case BRW_REGISTER_TYPE_D: |
||
808 | return fixed_hw_reg.dw1.d == -1; |
||
809 | default: |
||
810 | return false; |
||
811 | } |
||
812 | } |
||
813 | |||
814 | bool |
||
815 | backend_reg::is_null() const |
||
816 | { |
||
817 | return file == HW_REG && |
||
818 | fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
819 | fixed_hw_reg.nr == BRW_ARF_NULL; |
||
820 | } |
||
821 | |||
822 | |||
823 | bool |
||
824 | backend_reg::is_accumulator() const |
||
825 | { |
||
826 | return file == HW_REG && |
||
827 | fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
828 | fixed_hw_reg.nr == BRW_ARF_ACCUMULATOR; |
||
829 | } |
||
830 | |||
831 | bool |
||
832 | backend_reg::in_range(const backend_reg &r, unsigned n) const |
||
833 | { |
||
834 | return (file == r.file && |
||
835 | reg == r.reg && |
||
836 | reg_offset >= r.reg_offset && |
||
837 | reg_offset < r.reg_offset + n); |
||
838 | } |
||
839 | |||
840 | bool |
||
841 | backend_instruction::is_commutative() const |
||
842 | { |
||
843 | switch (opcode) { |
||
844 | case BRW_OPCODE_AND: |
||
845 | case BRW_OPCODE_OR: |
||
846 | case BRW_OPCODE_XOR: |
||
847 | case BRW_OPCODE_ADD: |
||
848 | case BRW_OPCODE_MUL: |
||
849 | return true; |
||
850 | case BRW_OPCODE_SEL: |
||
851 | /* MIN and MAX are commutative. */ |
||
852 | if (conditional_mod == BRW_CONDITIONAL_GE || |
||
853 | conditional_mod == BRW_CONDITIONAL_L) { |
||
854 | return true; |
||
855 | } |
||
856 | /* fallthrough */ |
||
857 | default: |
||
858 | return false; |
||
859 | } |
||
860 | } |
||
861 | |||
862 | bool |
||
863 | backend_instruction::is_3src() const |
||
864 | { |
||
865 | return opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[opcode].nsrc == 3; |
||
866 | } |
||
867 | |||
868 | bool |
||
869 | backend_instruction::is_tex() const |
||
870 | { |
||
871 | return (opcode == SHADER_OPCODE_TEX || |
||
872 | opcode == FS_OPCODE_TXB || |
||
873 | opcode == SHADER_OPCODE_TXD || |
||
874 | opcode == SHADER_OPCODE_TXF || |
||
875 | opcode == SHADER_OPCODE_TXF_CMS || |
||
876 | opcode == SHADER_OPCODE_TXF_UMS || |
||
877 | opcode == SHADER_OPCODE_TXF_MCS || |
||
878 | opcode == SHADER_OPCODE_TXL || |
||
879 | opcode == SHADER_OPCODE_TXS || |
||
880 | opcode == SHADER_OPCODE_LOD || |
||
881 | opcode == SHADER_OPCODE_TG4 || |
||
882 | opcode == SHADER_OPCODE_TG4_OFFSET); |
||
883 | } |
||
884 | |||
885 | bool |
||
886 | backend_instruction::is_math() const |
||
887 | { |
||
888 | return (opcode == SHADER_OPCODE_RCP || |
||
889 | opcode == SHADER_OPCODE_RSQ || |
||
890 | opcode == SHADER_OPCODE_SQRT || |
||
891 | opcode == SHADER_OPCODE_EXP2 || |
||
892 | opcode == SHADER_OPCODE_LOG2 || |
||
893 | opcode == SHADER_OPCODE_SIN || |
||
894 | opcode == SHADER_OPCODE_COS || |
||
895 | opcode == SHADER_OPCODE_INT_QUOTIENT || |
||
896 | opcode == SHADER_OPCODE_INT_REMAINDER || |
||
897 | opcode == SHADER_OPCODE_POW); |
||
898 | } |
||
899 | |||
900 | bool |
||
901 | backend_instruction::is_control_flow() const |
||
902 | { |
||
903 | switch (opcode) { |
||
904 | case BRW_OPCODE_DO: |
||
905 | case BRW_OPCODE_WHILE: |
||
906 | case BRW_OPCODE_IF: |
||
907 | case BRW_OPCODE_ELSE: |
||
908 | case BRW_OPCODE_ENDIF: |
||
909 | case BRW_OPCODE_BREAK: |
||
910 | case BRW_OPCODE_CONTINUE: |
||
911 | return true; |
||
912 | default: |
||
913 | return false; |
||
914 | } |
||
915 | } |
||
916 | |||
917 | bool |
||
918 | backend_instruction::can_do_source_mods() const |
||
919 | { |
||
920 | switch (opcode) { |
||
921 | case BRW_OPCODE_ADDC: |
||
922 | case BRW_OPCODE_BFE: |
||
923 | case BRW_OPCODE_BFI1: |
||
924 | case BRW_OPCODE_BFI2: |
||
925 | case BRW_OPCODE_BFREV: |
||
926 | case BRW_OPCODE_CBIT: |
||
927 | case BRW_OPCODE_FBH: |
||
928 | case BRW_OPCODE_FBL: |
||
929 | case BRW_OPCODE_SUBB: |
||
930 | return false; |
||
931 | default: |
||
932 | return true; |
||
933 | } |
||
934 | } |
||
935 | |||
936 | bool |
||
937 | backend_instruction::can_do_saturate() const |
||
938 | { |
||
939 | switch (opcode) { |
||
940 | case BRW_OPCODE_ADD: |
||
941 | case BRW_OPCODE_ASR: |
||
942 | case BRW_OPCODE_AVG: |
||
943 | case BRW_OPCODE_DP2: |
||
944 | case BRW_OPCODE_DP3: |
||
945 | case BRW_OPCODE_DP4: |
||
946 | case BRW_OPCODE_DPH: |
||
947 | case BRW_OPCODE_F16TO32: |
||
948 | case BRW_OPCODE_F32TO16: |
||
949 | case BRW_OPCODE_LINE: |
||
950 | case BRW_OPCODE_LRP: |
||
951 | case BRW_OPCODE_MAC: |
||
952 | case BRW_OPCODE_MACH: |
||
953 | case BRW_OPCODE_MAD: |
||
954 | case BRW_OPCODE_MATH: |
||
955 | case BRW_OPCODE_MOV: |
||
956 | case BRW_OPCODE_MUL: |
||
957 | case BRW_OPCODE_PLN: |
||
958 | case BRW_OPCODE_RNDD: |
||
959 | case BRW_OPCODE_RNDE: |
||
960 | case BRW_OPCODE_RNDU: |
||
961 | case BRW_OPCODE_RNDZ: |
||
962 | case BRW_OPCODE_SEL: |
||
963 | case BRW_OPCODE_SHL: |
||
964 | case BRW_OPCODE_SHR: |
||
965 | case FS_OPCODE_LINTERP: |
||
966 | case SHADER_OPCODE_COS: |
||
967 | case SHADER_OPCODE_EXP2: |
||
968 | case SHADER_OPCODE_LOG2: |
||
969 | case SHADER_OPCODE_POW: |
||
970 | case SHADER_OPCODE_RCP: |
||
971 | case SHADER_OPCODE_RSQ: |
||
972 | case SHADER_OPCODE_SIN: |
||
973 | case SHADER_OPCODE_SQRT: |
||
974 | return true; |
||
975 | default: |
||
976 | return false; |
||
977 | } |
||
978 | } |
||
979 | |||
980 | bool |
||
981 | backend_instruction::can_do_cmod() const |
||
982 | { |
||
983 | switch (opcode) { |
||
984 | case BRW_OPCODE_ADD: |
||
985 | case BRW_OPCODE_ADDC: |
||
986 | case BRW_OPCODE_AND: |
||
987 | case BRW_OPCODE_ASR: |
||
988 | case BRW_OPCODE_AVG: |
||
989 | case BRW_OPCODE_CMP: |
||
990 | case BRW_OPCODE_CMPN: |
||
991 | case BRW_OPCODE_DP2: |
||
992 | case BRW_OPCODE_DP3: |
||
993 | case BRW_OPCODE_DP4: |
||
994 | case BRW_OPCODE_DPH: |
||
995 | case BRW_OPCODE_F16TO32: |
||
996 | case BRW_OPCODE_F32TO16: |
||
997 | case BRW_OPCODE_FRC: |
||
998 | case BRW_OPCODE_LINE: |
||
999 | case BRW_OPCODE_LRP: |
||
1000 | case BRW_OPCODE_LZD: |
||
1001 | case BRW_OPCODE_MAC: |
||
1002 | case BRW_OPCODE_MACH: |
||
1003 | case BRW_OPCODE_MAD: |
||
1004 | case BRW_OPCODE_MOV: |
||
1005 | case BRW_OPCODE_MUL: |
||
1006 | case BRW_OPCODE_NOT: |
||
1007 | case BRW_OPCODE_OR: |
||
1008 | case BRW_OPCODE_PLN: |
||
1009 | case BRW_OPCODE_RNDD: |
||
1010 | case BRW_OPCODE_RNDE: |
||
1011 | case BRW_OPCODE_RNDU: |
||
1012 | case BRW_OPCODE_RNDZ: |
||
1013 | case BRW_OPCODE_SAD2: |
||
1014 | case BRW_OPCODE_SADA2: |
||
1015 | case BRW_OPCODE_SHL: |
||
1016 | case BRW_OPCODE_SHR: |
||
1017 | case BRW_OPCODE_SUBB: |
||
1018 | case BRW_OPCODE_XOR: |
||
1019 | case FS_OPCODE_CINTERP: |
||
1020 | case FS_OPCODE_LINTERP: |
||
1021 | return true; |
||
1022 | default: |
||
1023 | return false; |
||
1024 | } |
||
1025 | } |
||
1026 | |||
1027 | bool |
||
1028 | backend_instruction::reads_accumulator_implicitly() const |
||
1029 | { |
||
1030 | switch (opcode) { |
||
1031 | case BRW_OPCODE_MAC: |
||
1032 | case BRW_OPCODE_MACH: |
||
1033 | case BRW_OPCODE_SADA2: |
||
1034 | return true; |
||
1035 | default: |
||
1036 | return false; |
||
1037 | } |
||
1038 | } |
||
1039 | |||
1040 | bool |
||
1041 | backend_instruction::writes_accumulator_implicitly(const struct brw_device_info *devinfo) const |
||
1042 | { |
||
1043 | return writes_accumulator || |
||
1044 | (devinfo->gen < 6 && |
||
1045 | ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || |
||
1046 | (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP && |
||
1047 | opcode != FS_OPCODE_CINTERP))); |
||
1048 | } |
||
1049 | |||
1050 | bool |
||
1051 | backend_instruction::has_side_effects() const |
||
1052 | { |
||
1053 | switch (opcode) { |
||
1054 | case SHADER_OPCODE_UNTYPED_ATOMIC: |
||
1055 | case SHADER_OPCODE_GEN4_SCRATCH_WRITE: |
||
1056 | case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: |
||
1057 | case SHADER_OPCODE_TYPED_ATOMIC: |
||
1058 | case SHADER_OPCODE_TYPED_SURFACE_WRITE: |
||
1059 | case SHADER_OPCODE_MEMORY_FENCE: |
||
1060 | case SHADER_OPCODE_URB_WRITE_SIMD8: |
||
1061 | case FS_OPCODE_FB_WRITE: |
||
1062 | return true; |
||
1063 | default: |
||
1064 | return false; |
||
1065 | } |
||
1066 | } |
||
1067 | |||
1068 | #ifndef NDEBUG |
||
1069 | static bool |
||
1070 | inst_is_in_block(const bblock_t *block, const backend_instruction *inst) |
||
1071 | { |
||
1072 | bool found = false; |
||
1073 | foreach_inst_in_block (backend_instruction, i, block) { |
||
1074 | if (inst == i) { |
||
1075 | found = true; |
||
1076 | } |
||
1077 | } |
||
1078 | return found; |
||
1079 | } |
||
1080 | #endif |
||
1081 | |||
1082 | static void |
||
1083 | adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) |
||
1084 | { |
||
1085 | for (bblock_t *block_iter = start_block->next(); |
||
1086 | !block_iter->link.is_tail_sentinel(); |
||
1087 | block_iter = block_iter->next()) { |
||
1088 | block_iter->start_ip += ip_adjustment; |
||
1089 | block_iter->end_ip += ip_adjustment; |
||
1090 | } |
||
1091 | } |
||
1092 | |||
1093 | void |
||
1094 | backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) |
||
1095 | { |
||
1096 | if (!this->is_head_sentinel()) |
||
1097 | assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
||
1098 | |||
1099 | block->end_ip++; |
||
1100 | |||
1101 | adjust_later_block_ips(block, 1); |
||
1102 | |||
1103 | exec_node::insert_after(inst); |
||
1104 | } |
||
1105 | |||
1106 | void |
||
1107 | backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) |
||
1108 | { |
||
1109 | if (!this->is_tail_sentinel()) |
||
1110 | assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
||
1111 | |||
1112 | block->end_ip++; |
||
1113 | |||
1114 | adjust_later_block_ips(block, 1); |
||
1115 | |||
1116 | exec_node::insert_before(inst); |
||
1117 | } |
||
1118 | |||
1119 | void |
||
1120 | backend_instruction::insert_before(bblock_t *block, exec_list *list) |
||
1121 | { |
||
1122 | assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
||
1123 | |||
1124 | unsigned num_inst = list->length(); |
||
1125 | |||
1126 | block->end_ip += num_inst; |
||
1127 | |||
1128 | adjust_later_block_ips(block, num_inst); |
||
1129 | |||
1130 | exec_node::insert_before(list); |
||
1131 | } |
||
1132 | |||
1133 | void |
||
1134 | backend_instruction::remove(bblock_t *block) |
||
1135 | { |
||
1136 | assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
||
1137 | |||
1138 | adjust_later_block_ips(block, -1); |
||
1139 | |||
1140 | if (block->start_ip == block->end_ip) { |
||
1141 | block->cfg->remove_block(block); |
||
1142 | } else { |
||
1143 | block->end_ip--; |
||
1144 | } |
||
1145 | |||
1146 | exec_node::remove(); |
||
1147 | } |
||
1148 | |||
1149 | void |
||
1150 | backend_visitor::dump_instructions() |
||
1151 | { |
||
1152 | dump_instructions(NULL); |
||
1153 | } |
||
1154 | |||
1155 | void |
||
1156 | backend_visitor::dump_instructions(const char *name) |
||
1157 | { |
||
1158 | FILE *file = stderr; |
||
1159 | if (name && geteuid() != 0) { |
||
1160 | file = fopen(name, "w"); |
||
1161 | if (!file) |
||
1162 | file = stderr; |
||
1163 | } |
||
1164 | |||
1165 | if (cfg) { |
||
1166 | int ip = 0; |
||
1167 | foreach_block_and_inst(block, backend_instruction, inst, cfg) { |
||
1168 | fprintf(file, "%4d: ", ip++); |
||
1169 | dump_instruction(inst, file); |
||
1170 | } |
||
1171 | } else { |
||
1172 | int ip = 0; |
||
1173 | foreach_in_list(backend_instruction, inst, &instructions) { |
||
1174 | fprintf(file, "%4d: ", ip++); |
||
1175 | dump_instruction(inst, file); |
||
1176 | } |
||
1177 | } |
||
1178 | |||
1179 | if (file != stderr) { |
||
1180 | fclose(file); |
||
1181 | } |
||
1182 | } |
||
1183 | |||
1184 | void |
||
1185 | backend_visitor::calculate_cfg() |
||
1186 | { |
||
1187 | if (this->cfg) |
||
1188 | return; |
||
1189 | cfg = new(mem_ctx) cfg_t(&this->instructions); |
||
1190 | } |
||
1191 | |||
1192 | void |
||
1193 | backend_visitor::invalidate_cfg() |
||
1194 | { |
||
1195 | ralloc_free(this->cfg); |
||
1196 | this->cfg = NULL; |
||
1197 | } |
||
1198 | |||
1199 | /** |
||
1200 | * Sets up the starting offsets for the groups of binding table entries |
||
1201 | * commong to all pipeline stages. |
||
1202 | * |
||
1203 | * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're |
||
1204 | * unused but also make sure that addition of small offsets to them will |
||
1205 | * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. |
||
1206 | */ |
||
1207 | void |
||
1208 | backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) |
||
1209 | { |
||
1210 | int num_textures = _mesa_fls(prog->SamplersUsed); |
||
1211 | |||
1212 | stage_prog_data->binding_table.texture_start = next_binding_table_offset; |
||
1213 | next_binding_table_offset += num_textures; |
||
1214 | |||
1215 | if (shader) { |
||
1216 | stage_prog_data->binding_table.ubo_start = next_binding_table_offset; |
||
1217 | next_binding_table_offset += shader->base.NumUniformBlocks; |
||
1218 | } else { |
||
1219 | stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; |
||
1220 | } |
||
1221 | |||
1222 | if (INTEL_DEBUG & DEBUG_SHADER_TIME) { |
||
1223 | stage_prog_data->binding_table.shader_time_start = next_binding_table_offset; |
||
1224 | next_binding_table_offset++; |
||
1225 | } else { |
||
1226 | stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0; |
||
1227 | } |
||
1228 | |||
1229 | if (prog->UsesGather) { |
||
1230 | if (devinfo->gen >= 8) { |
||
1231 | stage_prog_data->binding_table.gather_texture_start = |
||
1232 | stage_prog_data->binding_table.texture_start; |
||
1233 | } else { |
||
1234 | stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset; |
||
1235 | next_binding_table_offset += num_textures; |
||
1236 | } |
||
1237 | } else { |
||
1238 | stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; |
||
1239 | } |
||
1240 | |||
1241 | if (shader_prog && shader_prog->NumAtomicBuffers) { |
||
1242 | stage_prog_data->binding_table.abo_start = next_binding_table_offset; |
||
1243 | next_binding_table_offset += shader_prog->NumAtomicBuffers; |
||
1244 | } else { |
||
1245 | stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; |
||
1246 | } |
||
1247 | |||
1248 | if (shader && shader->base.NumImages) { |
||
1249 | stage_prog_data->binding_table.image_start = next_binding_table_offset; |
||
1250 | next_binding_table_offset += shader->base.NumImages; |
||
1251 | } else { |
||
1252 | stage_prog_data->binding_table.image_start = 0xd0d0d0d0; |
||
1253 | } |
||
1254 | |||
1255 | /* This may or may not be used depending on how the compile goes. */ |
||
1256 | stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset; |
||
1257 | next_binding_table_offset++; |
||
1258 | |||
1259 | assert(next_binding_table_offset <= BRW_MAX_SURFACES); |
||
1260 | |||
1261 | /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ |
||
1262 | }=>>=>>>>>><>><>>>>>> |