Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "main/macros.h"
  25. #include "brw_context.h"
  26. #include "brw_vs.h"
  27. #include "brw_gs.h"
  28. #include "brw_fs.h"
  29. #include "brw_cfg.h"
  30. #include "brw_nir.h"
  31. #include "glsl/ir_optimization.h"
  32. #include "glsl/glsl_parser_extras.h"
  33. #include "main/shaderapi.h"
  34.  
  35. struct brw_compiler *
  36. brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
  37. {
  38.    struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
  39.  
  40.    compiler->devinfo = devinfo;
  41.  
  42.    brw_fs_alloc_reg_sets(compiler);
  43.    brw_vec4_alloc_reg_set(compiler);
  44.  
  45.    return compiler;
  46. }
  47.  
  48. struct gl_shader *
  49. brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
  50. {
  51.    struct brw_shader *shader;
  52.  
  53.    shader = rzalloc(NULL, struct brw_shader);
  54.    if (shader) {
  55.       shader->base.Type = type;
  56.       shader->base.Stage = _mesa_shader_enum_to_shader_stage(type);
  57.       shader->base.Name = name;
  58.       _mesa_init_shader(ctx, &shader->base);
  59.    }
  60.  
  61.    return &shader->base;
  62. }
  63.  
  64. /**
  65.  * Performs a compile of the shader stages even when we don't know
  66.  * what non-orthogonal state will be set, in the hope that it reflects
  67.  * the eventual NOS used, and thus allows us to produce link failures.
  68.  */
  69. static bool
  70. brw_shader_precompile(struct gl_context *ctx,
  71.                       struct gl_shader_program *sh_prog)
  72. {
  73.    struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
  74.    struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
  75.    struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  76.    struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
  77.  
  78.    if (fs && !brw_fs_precompile(ctx, sh_prog, fs->Program))
  79.       return false;
  80.  
  81.    if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
  82.       return false;
  83.  
  84.    if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
  85.       return false;
  86.  
  87.    if (cs && !brw_cs_precompile(ctx, sh_prog, cs->Program))
  88.       return false;
  89.  
  90.    return true;
  91. }
  92.  
  93. static inline bool
  94. is_scalar_shader_stage(struct brw_context *brw, int stage)
  95. {
  96.    switch (stage) {
  97.    case MESA_SHADER_FRAGMENT:
  98.       return true;
  99.    case MESA_SHADER_VERTEX:
  100.       return brw->scalar_vs;
  101.    default:
  102.       return false;
  103.    }
  104. }
  105.  
  106. static void
  107. brw_lower_packing_builtins(struct brw_context *brw,
  108.                            gl_shader_stage shader_type,
  109.                            exec_list *ir)
  110. {
  111.    int ops = LOWER_PACK_SNORM_2x16
  112.            | LOWER_UNPACK_SNORM_2x16
  113.            | LOWER_PACK_UNORM_2x16
  114.            | LOWER_UNPACK_UNORM_2x16;
  115.  
  116.    if (is_scalar_shader_stage(brw, shader_type)) {
  117.       ops |= LOWER_UNPACK_UNORM_4x8
  118.            | LOWER_UNPACK_SNORM_4x8
  119.            | LOWER_PACK_UNORM_4x8
  120.            | LOWER_PACK_SNORM_4x8;
  121.    }
  122.  
  123.    if (brw->gen >= 7) {
  124.       /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
  125.        * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
  126.        * lowering is needed. For SOA code, the Half2x16 ops must be
  127.        * scalarized.
  128.        */
  129.       if (is_scalar_shader_stage(brw, shader_type)) {
  130.          ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
  131.              |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
  132.       }
  133.    } else {
  134.       ops |= LOWER_PACK_HALF_2x16
  135.           |  LOWER_UNPACK_HALF_2x16;
  136.    }
  137.  
  138.    lower_packing_builtins(ir, ops);
  139. }
  140.  
  141. static void
  142. process_glsl_ir(struct brw_context *brw,
  143.                 struct gl_shader_program *shader_prog,
  144.                 struct gl_shader *shader)
  145. {
  146.    struct gl_context *ctx = &brw->ctx;
  147.    const struct gl_shader_compiler_options *options =
  148.       &ctx->Const.ShaderCompilerOptions[shader->Stage];
  149.  
  150.    /* Temporary memory context for any new IR. */
  151.    void *mem_ctx = ralloc_context(NULL);
  152.  
  153.    ralloc_adopt(mem_ctx, shader->ir);
  154.  
  155.    /* lower_packing_builtins() inserts arithmetic instructions, so it
  156.     * must precede lower_instructions().
  157.     */
  158.    brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
  159.    do_mat_op_to_vec(shader->ir);
  160.    const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
  161.    lower_instructions(shader->ir,
  162.                       MOD_TO_FLOOR |
  163.                       DIV_TO_MUL_RCP |
  164.                       SUB_TO_ADD_NEG |
  165.                       EXP_TO_EXP2 |
  166.                       LOG_TO_LOG2 |
  167.                       bitfield_insert |
  168.                       LDEXP_TO_ARITH);
  169.  
  170.    /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
  171.     * if-statements need to be flattened.
  172.     */
  173.    if (brw->gen < 6)
  174.       lower_if_to_cond_assign(shader->ir, 16);
  175.  
  176.    do_lower_texture_projection(shader->ir);
  177.    brw_lower_texture_gradients(brw, shader->ir);
  178.    do_vec_index_to_cond_assign(shader->ir);
  179.    lower_vector_insert(shader->ir, true);
  180.    if (options->NirOptions == NULL)
  181.       brw_do_cubemap_normalize(shader->ir);
  182.    lower_offset_arrays(shader->ir);
  183.    brw_do_lower_unnormalized_offset(shader->ir);
  184.    lower_noise(shader->ir);
  185.    lower_quadop_vector(shader->ir, false);
  186.  
  187.    bool lowered_variable_indexing =
  188.       lower_variable_index_to_cond_assign(shader->ir,
  189.                                           options->EmitNoIndirectInput,
  190.                                           options->EmitNoIndirectOutput,
  191.                                           options->EmitNoIndirectTemp,
  192.                                           options->EmitNoIndirectUniform);
  193.  
  194.    if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
  195.       perf_debug("Unsupported form of variable indexing in FS; falling "
  196.                  "back to very inefficient code generation\n");
  197.    }
  198.  
  199.    lower_ubo_reference(shader, shader->ir);
  200.  
  201.    bool progress;
  202.    do {
  203.       progress = false;
  204.  
  205.       if (is_scalar_shader_stage(brw, shader->Stage)) {
  206.          brw_do_channel_expressions(shader->ir);
  207.          brw_do_vector_splitting(shader->ir);
  208.       }
  209.  
  210.       progress = do_lower_jumps(shader->ir, true, true,
  211.                                 true, /* main return */
  212.                                 false, /* continue */
  213.                                 false /* loops */
  214.                                 ) || progress;
  215.  
  216.       progress = do_common_optimization(shader->ir, true, true,
  217.                                         options, ctx->Const.NativeIntegers) || progress;
  218.    } while (progress);
  219.  
  220.    if (options->NirOptions != NULL)
  221.       lower_output_reads(shader->ir);
  222.  
  223.    validate_ir_tree(shader->ir);
  224.  
  225.    /* Now that we've finished altering the linked IR, reparent any live IR back
  226.     * to the permanent memory context, and free the temporary one (discarding any
  227.     * junk we optimized away).
  228.     */
  229.    reparent_ir(shader->ir, shader->ir);
  230.    ralloc_free(mem_ctx);
  231.  
  232.    if (ctx->_Shader->Flags & GLSL_DUMP) {
  233.       fprintf(stderr, "\n");
  234.       fprintf(stderr, "GLSL IR for linked %s program %d:\n",
  235.               _mesa_shader_stage_to_string(shader->Stage),
  236.               shader_prog->Name);
  237.       _mesa_print_ir(stderr, shader->ir, NULL);
  238.       fprintf(stderr, "\n");
  239.    }
  240. }
  241.  
  242. GLboolean
  243. brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
  244. {
  245.    struct brw_context *brw = brw_context(ctx);
  246.    unsigned int stage;
  247.  
  248.    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
  249.       struct gl_shader *shader = shProg->_LinkedShaders[stage];
  250.       const struct gl_shader_compiler_options *options =
  251.          &ctx->Const.ShaderCompilerOptions[stage];
  252.  
  253.       if (!shader)
  254.          continue;
  255.  
  256.       struct gl_program *prog =
  257.          ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
  258.                                 shader->Name);
  259.       if (!prog)
  260.         return false;
  261.       prog->Parameters = _mesa_new_parameter_list();
  262.  
  263.       _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
  264.  
  265.       process_glsl_ir(brw, shProg, shader);
  266.  
  267.       /* Make a pass over the IR to add state references for any built-in
  268.        * uniforms that are used.  This has to be done now (during linking).
  269.        * Code generation doesn't happen until the first time this shader is
  270.        * used for rendering.  Waiting until then to generate the parameters is
  271.        * too late.  At that point, the values for the built-in uniforms won't
  272.        * get sent to the shader.
  273.        */
  274.       foreach_in_list(ir_instruction, node, shader->ir) {
  275.          ir_variable *var = node->as_variable();
  276.  
  277.          if ((var == NULL) || (var->data.mode != ir_var_uniform)
  278.              || (strncmp(var->name, "gl_", 3) != 0))
  279.             continue;
  280.  
  281.          const ir_state_slot *const slots = var->get_state_slots();
  282.          assert(slots != NULL);
  283.  
  284.          for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
  285.             _mesa_add_state_reference(prog->Parameters,
  286.                                       (gl_state_index *) slots[i].tokens);
  287.          }
  288.       }
  289.  
  290.       do_set_program_inouts(shader->ir, prog, shader->Stage);
  291.  
  292.       prog->SamplersUsed = shader->active_samplers;
  293.       prog->ShadowSamplers = shader->shadow_samplers;
  294.       _mesa_update_shader_textures_used(shProg, prog);
  295.  
  296.       _mesa_reference_program(ctx, &shader->Program, prog);
  297.  
  298.       brw_add_texrect_params(prog);
  299.  
  300.       if (options->NirOptions)
  301.          prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
  302.  
  303.       _mesa_reference_program(ctx, &prog, NULL);
  304.    }
  305.  
  306.    if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
  307.       for (unsigned i = 0; i < shProg->NumShaders; i++) {
  308.          const struct gl_shader *sh = shProg->Shaders[i];
  309.          if (!sh)
  310.             continue;
  311.  
  312.          fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
  313.                  _mesa_shader_stage_to_string(sh->Stage),
  314.                  i, shProg->Name);
  315.          fprintf(stderr, "%s", sh->Source);
  316.          fprintf(stderr, "\n");
  317.       }
  318.    }
  319.  
  320.    if (brw->precompile && !brw_shader_precompile(ctx, shProg))
  321.       return false;
  322.  
  323.    return true;
  324. }
  325.  
  326.  
  327. enum brw_reg_type
  328. brw_type_for_base_type(const struct glsl_type *type)
  329. {
  330.    switch (type->base_type) {
  331.    case GLSL_TYPE_FLOAT:
  332.       return BRW_REGISTER_TYPE_F;
  333.    case GLSL_TYPE_INT:
  334.    case GLSL_TYPE_BOOL:
  335.       return BRW_REGISTER_TYPE_D;
  336.    case GLSL_TYPE_UINT:
  337.       return BRW_REGISTER_TYPE_UD;
  338.    case GLSL_TYPE_ARRAY:
  339.       return brw_type_for_base_type(type->fields.array);
  340.    case GLSL_TYPE_STRUCT:
  341.    case GLSL_TYPE_SAMPLER:
  342.    case GLSL_TYPE_ATOMIC_UINT:
  343.       /* These should be overridden with the type of the member when
  344.        * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
  345.        * way to trip up if we don't.
  346.        */
  347.       return BRW_REGISTER_TYPE_UD;
  348.    case GLSL_TYPE_IMAGE:
  349.       return BRW_REGISTER_TYPE_UD;
  350.    case GLSL_TYPE_VOID:
  351.    case GLSL_TYPE_ERROR:
  352.    case GLSL_TYPE_INTERFACE:
  353.    case GLSL_TYPE_DOUBLE:
  354.       unreachable("not reached");
  355.    }
  356.  
  357.    return BRW_REGISTER_TYPE_F;
  358. }
  359.  
  360. enum brw_conditional_mod
  361. brw_conditional_for_comparison(unsigned int op)
  362. {
  363.    switch (op) {
  364.    case ir_binop_less:
  365.       return BRW_CONDITIONAL_L;
  366.    case ir_binop_greater:
  367.       return BRW_CONDITIONAL_G;
  368.    case ir_binop_lequal:
  369.       return BRW_CONDITIONAL_LE;
  370.    case ir_binop_gequal:
  371.       return BRW_CONDITIONAL_GE;
  372.    case ir_binop_equal:
  373.    case ir_binop_all_equal: /* same as equal for scalars */
  374.       return BRW_CONDITIONAL_Z;
  375.    case ir_binop_nequal:
  376.    case ir_binop_any_nequal: /* same as nequal for scalars */
  377.       return BRW_CONDITIONAL_NZ;
  378.    default:
  379.       unreachable("not reached: bad operation for comparison");
  380.    }
  381. }
  382.  
  383. uint32_t
  384. brw_math_function(enum opcode op)
  385. {
  386.    switch (op) {
  387.    case SHADER_OPCODE_RCP:
  388.       return BRW_MATH_FUNCTION_INV;
  389.    case SHADER_OPCODE_RSQ:
  390.       return BRW_MATH_FUNCTION_RSQ;
  391.    case SHADER_OPCODE_SQRT:
  392.       return BRW_MATH_FUNCTION_SQRT;
  393.    case SHADER_OPCODE_EXP2:
  394.       return BRW_MATH_FUNCTION_EXP;
  395.    case SHADER_OPCODE_LOG2:
  396.       return BRW_MATH_FUNCTION_LOG;
  397.    case SHADER_OPCODE_POW:
  398.       return BRW_MATH_FUNCTION_POW;
  399.    case SHADER_OPCODE_SIN:
  400.       return BRW_MATH_FUNCTION_SIN;
  401.    case SHADER_OPCODE_COS:
  402.       return BRW_MATH_FUNCTION_COS;
  403.    case SHADER_OPCODE_INT_QUOTIENT:
  404.       return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
  405.    case SHADER_OPCODE_INT_REMAINDER:
  406.       return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
  407.    default:
  408.       unreachable("not reached: unknown math function");
  409.    }
  410. }
  411.  
  412. uint32_t
  413. brw_texture_offset(int *offsets, unsigned num_components)
  414. {
  415.    if (!offsets) return 0;  /* nonconstant offset; caller will handle it. */
  416.  
  417.    /* Combine all three offsets into a single unsigned dword:
  418.     *
  419.     *    bits 11:8 - U Offset (X component)
  420.     *    bits  7:4 - V Offset (Y component)
  421.     *    bits  3:0 - R Offset (Z component)
  422.     */
  423.    unsigned offset_bits = 0;
  424.    for (unsigned i = 0; i < num_components; i++) {
  425.       const unsigned shift = 4 * (2 - i);
  426.       offset_bits |= (offsets[i] << shift) & (0xF << shift);
  427.    }
  428.    return offset_bits;
  429. }
  430.  
  431. const char *
  432. brw_instruction_name(enum opcode op)
  433. {
  434.    switch (op) {
  435.    case BRW_OPCODE_MOV ... BRW_OPCODE_NOP:
  436.       assert(opcode_descs[op].name);
  437.       return opcode_descs[op].name;
  438.    case FS_OPCODE_FB_WRITE:
  439.       return "fb_write";
  440.    case FS_OPCODE_BLORP_FB_WRITE:
  441.       return "blorp_fb_write";
  442.    case FS_OPCODE_REP_FB_WRITE:
  443.       return "rep_fb_write";
  444.  
  445.    case SHADER_OPCODE_RCP:
  446.       return "rcp";
  447.    case SHADER_OPCODE_RSQ:
  448.       return "rsq";
  449.    case SHADER_OPCODE_SQRT:
  450.       return "sqrt";
  451.    case SHADER_OPCODE_EXP2:
  452.       return "exp2";
  453.    case SHADER_OPCODE_LOG2:
  454.       return "log2";
  455.    case SHADER_OPCODE_POW:
  456.       return "pow";
  457.    case SHADER_OPCODE_INT_QUOTIENT:
  458.       return "int_quot";
  459.    case SHADER_OPCODE_INT_REMAINDER:
  460.       return "int_rem";
  461.    case SHADER_OPCODE_SIN:
  462.       return "sin";
  463.    case SHADER_OPCODE_COS:
  464.       return "cos";
  465.  
  466.    case SHADER_OPCODE_TEX:
  467.       return "tex";
  468.    case SHADER_OPCODE_TXD:
  469.       return "txd";
  470.    case SHADER_OPCODE_TXF:
  471.       return "txf";
  472.    case SHADER_OPCODE_TXL:
  473.       return "txl";
  474.    case SHADER_OPCODE_TXS:
  475.       return "txs";
  476.    case FS_OPCODE_TXB:
  477.       return "txb";
  478.    case SHADER_OPCODE_TXF_CMS:
  479.       return "txf_cms";
  480.    case SHADER_OPCODE_TXF_UMS:
  481.       return "txf_ums";
  482.    case SHADER_OPCODE_TXF_MCS:
  483.       return "txf_mcs";
  484.    case SHADER_OPCODE_LOD:
  485.       return "lod";
  486.    case SHADER_OPCODE_TG4:
  487.       return "tg4";
  488.    case SHADER_OPCODE_TG4_OFFSET:
  489.       return "tg4_offset";
  490.    case SHADER_OPCODE_SHADER_TIME_ADD:
  491.       return "shader_time_add";
  492.  
  493.    case SHADER_OPCODE_UNTYPED_ATOMIC:
  494.       return "untyped_atomic";
  495.    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
  496.       return "untyped_surface_read";
  497.    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
  498.       return "untyped_surface_write";
  499.    case SHADER_OPCODE_TYPED_ATOMIC:
  500.       return "typed_atomic";
  501.    case SHADER_OPCODE_TYPED_SURFACE_READ:
  502.       return "typed_surface_read";
  503.    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
  504.       return "typed_surface_write";
  505.    case SHADER_OPCODE_MEMORY_FENCE:
  506.       return "memory_fence";
  507.  
  508.    case SHADER_OPCODE_LOAD_PAYLOAD:
  509.       return "load_payload";
  510.  
  511.    case SHADER_OPCODE_GEN4_SCRATCH_READ:
  512.       return "gen4_scratch_read";
  513.    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
  514.       return "gen4_scratch_write";
  515.    case SHADER_OPCODE_GEN7_SCRATCH_READ:
  516.       return "gen7_scratch_read";
  517.    case SHADER_OPCODE_URB_WRITE_SIMD8:
  518.       return "gen8_urb_write_simd8";
  519.  
  520.    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
  521.       return "find_live_channel";
  522.    case SHADER_OPCODE_BROADCAST:
  523.       return "broadcast";
  524.  
  525.    case VEC4_OPCODE_MOV_BYTES:
  526.       return "mov_bytes";
  527.    case VEC4_OPCODE_PACK_BYTES:
  528.       return "pack_bytes";
  529.    case VEC4_OPCODE_UNPACK_UNIFORM:
  530.       return "unpack_uniform";
  531.  
  532.    case FS_OPCODE_DDX_COARSE:
  533.       return "ddx_coarse";
  534.    case FS_OPCODE_DDX_FINE:
  535.       return "ddx_fine";
  536.    case FS_OPCODE_DDY_COARSE:
  537.       return "ddy_coarse";
  538.    case FS_OPCODE_DDY_FINE:
  539.       return "ddy_fine";
  540.  
  541.    case FS_OPCODE_CINTERP:
  542.       return "cinterp";
  543.    case FS_OPCODE_LINTERP:
  544.       return "linterp";
  545.  
  546.    case FS_OPCODE_PIXEL_X:
  547.       return "pixel_x";
  548.    case FS_OPCODE_PIXEL_Y:
  549.       return "pixel_y";
  550.  
  551.    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
  552.       return "uniform_pull_const";
  553.    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
  554.       return "uniform_pull_const_gen7";
  555.    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
  556.       return "varying_pull_const";
  557.    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
  558.       return "varying_pull_const_gen7";
  559.  
  560.    case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
  561.       return "mov_dispatch_to_flags";
  562.    case FS_OPCODE_DISCARD_JUMP:
  563.       return "discard_jump";
  564.  
  565.    case FS_OPCODE_SET_OMASK:
  566.       return "set_omask";
  567.    case FS_OPCODE_SET_SAMPLE_ID:
  568.       return "set_sample_id";
  569.    case FS_OPCODE_SET_SIMD4X2_OFFSET:
  570.       return "set_simd4x2_offset";
  571.  
  572.    case FS_OPCODE_PACK_HALF_2x16_SPLIT:
  573.       return "pack_half_2x16_split";
  574.    case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
  575.       return "unpack_half_2x16_split_x";
  576.    case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
  577.       return "unpack_half_2x16_split_y";
  578.  
  579.    case FS_OPCODE_PLACEHOLDER_HALT:
  580.       return "placeholder_halt";
  581.  
  582.    case FS_OPCODE_INTERPOLATE_AT_CENTROID:
  583.       return "interp_centroid";
  584.    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
  585.       return "interp_sample";
  586.    case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
  587.       return "interp_shared_offset";
  588.    case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
  589.       return "interp_per_slot_offset";
  590.  
  591.    case VS_OPCODE_URB_WRITE:
  592.       return "vs_urb_write";
  593.    case VS_OPCODE_PULL_CONSTANT_LOAD:
  594.       return "pull_constant_load";
  595.    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
  596.       return "pull_constant_load_gen7";
  597.  
  598.    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
  599.       return "set_simd4x2_header_gen9";
  600.  
  601.    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
  602.       return "unpack_flags_simd4x2";
  603.  
  604.    case GS_OPCODE_URB_WRITE:
  605.       return "gs_urb_write";
  606.    case GS_OPCODE_URB_WRITE_ALLOCATE:
  607.       return "gs_urb_write_allocate";
  608.    case GS_OPCODE_THREAD_END:
  609.       return "gs_thread_end";
  610.    case GS_OPCODE_SET_WRITE_OFFSET:
  611.       return "set_write_offset";
  612.    case GS_OPCODE_SET_VERTEX_COUNT:
  613.       return "set_vertex_count";
  614.    case GS_OPCODE_SET_DWORD_2:
  615.       return "set_dword_2";
  616.    case GS_OPCODE_PREPARE_CHANNEL_MASKS:
  617.       return "prepare_channel_masks";
  618.    case GS_OPCODE_SET_CHANNEL_MASKS:
  619.       return "set_channel_masks";
  620.    case GS_OPCODE_GET_INSTANCE_ID:
  621.       return "get_instance_id";
  622.    case GS_OPCODE_FF_SYNC:
  623.       return "ff_sync";
  624.    case GS_OPCODE_SET_PRIMITIVE_ID:
  625.       return "set_primitive_id";
  626.    case GS_OPCODE_SVB_WRITE:
  627.       return "gs_svb_write";
  628.    case GS_OPCODE_SVB_SET_DST_INDEX:
  629.       return "gs_svb_set_dst_index";
  630.    case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
  631.       return "gs_ff_sync_set_primitives";
  632.    case CS_OPCODE_CS_TERMINATE:
  633.       return "cs_terminate";
  634.    }
  635.  
  636.    unreachable("not reached");
  637. }
  638.  
  639. bool
  640. brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
  641. {
  642.    union {
  643.       unsigned ud;
  644.       int d;
  645.       float f;
  646.    } imm = { reg->dw1.ud }, sat_imm = { 0 };
  647.  
  648.    switch (type) {
  649.    case BRW_REGISTER_TYPE_UD:
  650.    case BRW_REGISTER_TYPE_D:
  651.    case BRW_REGISTER_TYPE_UQ:
  652.    case BRW_REGISTER_TYPE_Q:
  653.       /* Nothing to do. */
  654.       return false;
  655.    case BRW_REGISTER_TYPE_UW:
  656.       sat_imm.ud = CLAMP(imm.ud, 0, USHRT_MAX);
  657.       break;
  658.    case BRW_REGISTER_TYPE_W:
  659.       sat_imm.d = CLAMP(imm.d, SHRT_MIN, SHRT_MAX);
  660.       break;
  661.    case BRW_REGISTER_TYPE_F:
  662.       sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f);
  663.       break;
  664.    case BRW_REGISTER_TYPE_UB:
  665.    case BRW_REGISTER_TYPE_B:
  666.       unreachable("no UB/B immediates");
  667.    case BRW_REGISTER_TYPE_V:
  668.    case BRW_REGISTER_TYPE_UV:
  669.    case BRW_REGISTER_TYPE_VF:
  670.       unreachable("unimplemented: saturate vector immediate");
  671.    case BRW_REGISTER_TYPE_DF:
  672.    case BRW_REGISTER_TYPE_HF:
  673.       unreachable("unimplemented: saturate DF/HF immediate");
  674.    }
  675.  
  676.    if (imm.ud != sat_imm.ud) {
  677.       reg->dw1.ud = sat_imm.ud;
  678.       return true;
  679.    }
  680.    return false;
  681. }
  682.  
  683. bool
  684. brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
  685. {
  686.    switch (type) {
  687.    case BRW_REGISTER_TYPE_D:
  688.    case BRW_REGISTER_TYPE_UD:
  689.       reg->dw1.d = -reg->dw1.d;
  690.       return true;
  691.    case BRW_REGISTER_TYPE_W:
  692.    case BRW_REGISTER_TYPE_UW:
  693.       reg->dw1.d = -(int16_t)reg->dw1.ud;
  694.       return true;
  695.    case BRW_REGISTER_TYPE_F:
  696.       reg->dw1.f = -reg->dw1.f;
  697.       return true;
  698.    case BRW_REGISTER_TYPE_VF:
  699.       reg->dw1.ud ^= 0x80808080;
  700.       return true;
  701.    case BRW_REGISTER_TYPE_UB:
  702.    case BRW_REGISTER_TYPE_B:
  703.       unreachable("no UB/B immediates");
  704.    case BRW_REGISTER_TYPE_UV:
  705.    case BRW_REGISTER_TYPE_V:
  706.       assert(!"unimplemented: negate UV/V immediate");
  707.    case BRW_REGISTER_TYPE_UQ:
  708.    case BRW_REGISTER_TYPE_Q:
  709.       assert(!"unimplemented: negate UQ/Q immediate");
  710.    case BRW_REGISTER_TYPE_DF:
  711.    case BRW_REGISTER_TYPE_HF:
  712.       assert(!"unimplemented: negate DF/HF immediate");
  713.    }
  714.  
  715.    return false;
  716. }
  717.  
  718. bool
  719. brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
  720. {
  721.    switch (type) {
  722.    case BRW_REGISTER_TYPE_D:
  723.       reg->dw1.d = abs(reg->dw1.d);
  724.       return true;
  725.    case BRW_REGISTER_TYPE_W:
  726.       reg->dw1.d = abs((int16_t)reg->dw1.ud);
  727.       return true;
  728.    case BRW_REGISTER_TYPE_F:
  729.       reg->dw1.f = fabsf(reg->dw1.f);
  730.       return true;
  731.    case BRW_REGISTER_TYPE_VF:
  732.       reg->dw1.ud &= ~0x80808080;
  733.       return true;
  734.    case BRW_REGISTER_TYPE_UB:
  735.    case BRW_REGISTER_TYPE_B:
  736.       unreachable("no UB/B immediates");
  737.    case BRW_REGISTER_TYPE_UQ:
  738.    case BRW_REGISTER_TYPE_UD:
  739.    case BRW_REGISTER_TYPE_UW:
  740.    case BRW_REGISTER_TYPE_UV:
  741.       /* Presumably the absolute value modifier on an unsigned source is a
  742.        * nop, but it would be nice to confirm.
  743.        */
  744.       assert(!"unimplemented: abs unsigned immediate");
  745.    case BRW_REGISTER_TYPE_V:
  746.       assert(!"unimplemented: abs V immediate");
  747.    case BRW_REGISTER_TYPE_Q:
  748.       assert(!"unimplemented: abs Q immediate");
  749.    case BRW_REGISTER_TYPE_DF:
  750.    case BRW_REGISTER_TYPE_HF:
  751.       assert(!"unimplemented: abs DF/HF immediate");
  752.    }
  753.  
  754.    return false;
  755. }
  756.  
  757. backend_visitor::backend_visitor(struct brw_context *brw,
  758.                                  struct gl_shader_program *shader_prog,
  759.                                  struct gl_program *prog,
  760.                                  struct brw_stage_prog_data *stage_prog_data,
  761.                                  gl_shader_stage stage)
  762.    : brw(brw),
  763.      devinfo(brw->intelScreen->devinfo),
  764.      ctx(&brw->ctx),
  765.      shader(shader_prog ?
  766.         (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
  767.      shader_prog(shader_prog),
  768.      prog(prog),
  769.      stage_prog_data(stage_prog_data),
  770.      cfg(NULL),
  771.      stage(stage)
  772. {
  773.    debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
  774.    stage_name = _mesa_shader_stage_to_string(stage);
  775.    stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
  776. }
  777.  
  778. bool
  779. backend_reg::is_zero() const
  780. {
  781.    if (file != IMM)
  782.       return false;
  783.  
  784.    return fixed_hw_reg.dw1.d == 0;
  785. }
  786.  
  787. bool
  788. backend_reg::is_one() const
  789. {
  790.    if (file != IMM)
  791.       return false;
  792.  
  793.    return type == BRW_REGISTER_TYPE_F
  794.           ? fixed_hw_reg.dw1.f == 1.0
  795.           : fixed_hw_reg.dw1.d == 1;
  796. }
  797.  
  798. bool
  799. backend_reg::is_negative_one() const
  800. {
  801.    if (file != IMM)
  802.       return false;
  803.  
  804.    switch (type) {
  805.    case BRW_REGISTER_TYPE_F:
  806.       return fixed_hw_reg.dw1.f == -1.0;
  807.    case BRW_REGISTER_TYPE_D:
  808.       return fixed_hw_reg.dw1.d == -1;
  809.    default:
  810.       return false;
  811.    }
  812. }
  813.  
  814. bool
  815. backend_reg::is_null() const
  816. {
  817.    return file == HW_REG &&
  818.           fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
  819.           fixed_hw_reg.nr == BRW_ARF_NULL;
  820. }
  821.  
  822.  
  823. bool
  824. backend_reg::is_accumulator() const
  825. {
  826.    return file == HW_REG &&
  827.           fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
  828.           fixed_hw_reg.nr == BRW_ARF_ACCUMULATOR;
  829. }
  830.  
  831. bool
  832. backend_reg::in_range(const backend_reg &r, unsigned n) const
  833. {
  834.    return (file == r.file &&
  835.            reg == r.reg &&
  836.            reg_offset >= r.reg_offset &&
  837.            reg_offset < r.reg_offset + n);
  838. }
  839.  
  840. bool
  841. backend_instruction::is_commutative() const
  842. {
  843.    switch (opcode) {
  844.    case BRW_OPCODE_AND:
  845.    case BRW_OPCODE_OR:
  846.    case BRW_OPCODE_XOR:
  847.    case BRW_OPCODE_ADD:
  848.    case BRW_OPCODE_MUL:
  849.       return true;
  850.    case BRW_OPCODE_SEL:
  851.       /* MIN and MAX are commutative. */
  852.       if (conditional_mod == BRW_CONDITIONAL_GE ||
  853.           conditional_mod == BRW_CONDITIONAL_L) {
  854.          return true;
  855.       }
  856.       /* fallthrough */
  857.    default:
  858.       return false;
  859.    }
  860. }
  861.  
  862. bool
  863. backend_instruction::is_3src() const
  864. {
  865.    return opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[opcode].nsrc == 3;
  866. }
  867.  
  868. bool
  869. backend_instruction::is_tex() const
  870. {
  871.    return (opcode == SHADER_OPCODE_TEX ||
  872.            opcode == FS_OPCODE_TXB ||
  873.            opcode == SHADER_OPCODE_TXD ||
  874.            opcode == SHADER_OPCODE_TXF ||
  875.            opcode == SHADER_OPCODE_TXF_CMS ||
  876.            opcode == SHADER_OPCODE_TXF_UMS ||
  877.            opcode == SHADER_OPCODE_TXF_MCS ||
  878.            opcode == SHADER_OPCODE_TXL ||
  879.            opcode == SHADER_OPCODE_TXS ||
  880.            opcode == SHADER_OPCODE_LOD ||
  881.            opcode == SHADER_OPCODE_TG4 ||
  882.            opcode == SHADER_OPCODE_TG4_OFFSET);
  883. }
  884.  
  885. bool
  886. backend_instruction::is_math() const
  887. {
  888.    return (opcode == SHADER_OPCODE_RCP ||
  889.            opcode == SHADER_OPCODE_RSQ ||
  890.            opcode == SHADER_OPCODE_SQRT ||
  891.            opcode == SHADER_OPCODE_EXP2 ||
  892.            opcode == SHADER_OPCODE_LOG2 ||
  893.            opcode == SHADER_OPCODE_SIN ||
  894.            opcode == SHADER_OPCODE_COS ||
  895.            opcode == SHADER_OPCODE_INT_QUOTIENT ||
  896.            opcode == SHADER_OPCODE_INT_REMAINDER ||
  897.            opcode == SHADER_OPCODE_POW);
  898. }
  899.  
  900. bool
  901. backend_instruction::is_control_flow() const
  902. {
  903.    switch (opcode) {
  904.    case BRW_OPCODE_DO:
  905.    case BRW_OPCODE_WHILE:
  906.    case BRW_OPCODE_IF:
  907.    case BRW_OPCODE_ELSE:
  908.    case BRW_OPCODE_ENDIF:
  909.    case BRW_OPCODE_BREAK:
  910.    case BRW_OPCODE_CONTINUE:
  911.       return true;
  912.    default:
  913.       return false;
  914.    }
  915. }
  916.  
  917. bool
  918. backend_instruction::can_do_source_mods() const
  919. {
  920.    switch (opcode) {
  921.    case BRW_OPCODE_ADDC:
  922.    case BRW_OPCODE_BFE:
  923.    case BRW_OPCODE_BFI1:
  924.    case BRW_OPCODE_BFI2:
  925.    case BRW_OPCODE_BFREV:
  926.    case BRW_OPCODE_CBIT:
  927.    case BRW_OPCODE_FBH:
  928.    case BRW_OPCODE_FBL:
  929.    case BRW_OPCODE_SUBB:
  930.       return false;
  931.    default:
  932.       return true;
  933.    }
  934. }
  935.  
  936. bool
  937. backend_instruction::can_do_saturate() const
  938. {
  939.    switch (opcode) {
  940.    case BRW_OPCODE_ADD:
  941.    case BRW_OPCODE_ASR:
  942.    case BRW_OPCODE_AVG:
  943.    case BRW_OPCODE_DP2:
  944.    case BRW_OPCODE_DP3:
  945.    case BRW_OPCODE_DP4:
  946.    case BRW_OPCODE_DPH:
  947.    case BRW_OPCODE_F16TO32:
  948.    case BRW_OPCODE_F32TO16:
  949.    case BRW_OPCODE_LINE:
  950.    case BRW_OPCODE_LRP:
  951.    case BRW_OPCODE_MAC:
  952.    case BRW_OPCODE_MACH:
  953.    case BRW_OPCODE_MAD:
  954.    case BRW_OPCODE_MATH:
  955.    case BRW_OPCODE_MOV:
  956.    case BRW_OPCODE_MUL:
  957.    case BRW_OPCODE_PLN:
  958.    case BRW_OPCODE_RNDD:
  959.    case BRW_OPCODE_RNDE:
  960.    case BRW_OPCODE_RNDU:
  961.    case BRW_OPCODE_RNDZ:
  962.    case BRW_OPCODE_SEL:
  963.    case BRW_OPCODE_SHL:
  964.    case BRW_OPCODE_SHR:
  965.    case FS_OPCODE_LINTERP:
  966.    case SHADER_OPCODE_COS:
  967.    case SHADER_OPCODE_EXP2:
  968.    case SHADER_OPCODE_LOG2:
  969.    case SHADER_OPCODE_POW:
  970.    case SHADER_OPCODE_RCP:
  971.    case SHADER_OPCODE_RSQ:
  972.    case SHADER_OPCODE_SIN:
  973.    case SHADER_OPCODE_SQRT:
  974.       return true;
  975.    default:
  976.       return false;
  977.    }
  978. }
  979.  
  980. bool
  981. backend_instruction::can_do_cmod() const
  982. {
  983.    switch (opcode) {
  984.    case BRW_OPCODE_ADD:
  985.    case BRW_OPCODE_ADDC:
  986.    case BRW_OPCODE_AND:
  987.    case BRW_OPCODE_ASR:
  988.    case BRW_OPCODE_AVG:
  989.    case BRW_OPCODE_CMP:
  990.    case BRW_OPCODE_CMPN:
  991.    case BRW_OPCODE_DP2:
  992.    case BRW_OPCODE_DP3:
  993.    case BRW_OPCODE_DP4:
  994.    case BRW_OPCODE_DPH:
  995.    case BRW_OPCODE_F16TO32:
  996.    case BRW_OPCODE_F32TO16:
  997.    case BRW_OPCODE_FRC:
  998.    case BRW_OPCODE_LINE:
  999.    case BRW_OPCODE_LRP:
  1000.    case BRW_OPCODE_LZD:
  1001.    case BRW_OPCODE_MAC:
  1002.    case BRW_OPCODE_MACH:
  1003.    case BRW_OPCODE_MAD:
  1004.    case BRW_OPCODE_MOV:
  1005.    case BRW_OPCODE_MUL:
  1006.    case BRW_OPCODE_NOT:
  1007.    case BRW_OPCODE_OR:
  1008.    case BRW_OPCODE_PLN:
  1009.    case BRW_OPCODE_RNDD:
  1010.    case BRW_OPCODE_RNDE:
  1011.    case BRW_OPCODE_RNDU:
  1012.    case BRW_OPCODE_RNDZ:
  1013.    case BRW_OPCODE_SAD2:
  1014.    case BRW_OPCODE_SADA2:
  1015.    case BRW_OPCODE_SHL:
  1016.    case BRW_OPCODE_SHR:
  1017.    case BRW_OPCODE_SUBB:
  1018.    case BRW_OPCODE_XOR:
  1019.    case FS_OPCODE_CINTERP:
  1020.    case FS_OPCODE_LINTERP:
  1021.       return true;
  1022.    default:
  1023.       return false;
  1024.    }
  1025. }
  1026.  
  1027. bool
  1028. backend_instruction::reads_accumulator_implicitly() const
  1029. {
  1030.    switch (opcode) {
  1031.    case BRW_OPCODE_MAC:
  1032.    case BRW_OPCODE_MACH:
  1033.    case BRW_OPCODE_SADA2:
  1034.       return true;
  1035.    default:
  1036.       return false;
  1037.    }
  1038. }
  1039.  
  1040. bool
  1041. backend_instruction::writes_accumulator_implicitly(const struct brw_device_info *devinfo) const
  1042. {
  1043.    return writes_accumulator ||
  1044.           (devinfo->gen < 6 &&
  1045.            ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
  1046.             (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
  1047.              opcode != FS_OPCODE_CINTERP)));
  1048. }
  1049.  
  1050. bool
  1051. backend_instruction::has_side_effects() const
  1052. {
  1053.    switch (opcode) {
  1054.    case SHADER_OPCODE_UNTYPED_ATOMIC:
  1055.    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
  1056.    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
  1057.    case SHADER_OPCODE_TYPED_ATOMIC:
  1058.    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
  1059.    case SHADER_OPCODE_MEMORY_FENCE:
  1060.    case SHADER_OPCODE_URB_WRITE_SIMD8:
  1061.    case FS_OPCODE_FB_WRITE:
  1062.       return true;
  1063.    default:
  1064.       return false;
  1065.    }
  1066. }
  1067.  
  1068. #ifndef NDEBUG
  1069. static bool
  1070. inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
  1071. {
  1072.    bool found = false;
  1073.    foreach_inst_in_block (backend_instruction, i, block) {
  1074.       if (inst == i) {
  1075.          found = true;
  1076.       }
  1077.    }
  1078.    return found;
  1079. }
  1080. #endif
  1081.  
  1082. static void
  1083. adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
  1084. {
  1085.    for (bblock_t *block_iter = start_block->next();
  1086.         !block_iter->link.is_tail_sentinel();
  1087.         block_iter = block_iter->next()) {
  1088.       block_iter->start_ip += ip_adjustment;
  1089.       block_iter->end_ip += ip_adjustment;
  1090.    }
  1091. }
  1092.  
  1093. void
  1094. backend_instruction::insert_after(bblock_t *block, backend_instruction *inst)
  1095. {
  1096.    if (!this->is_head_sentinel())
  1097.       assert(inst_is_in_block(block, this) || !"Instruction not in block");
  1098.  
  1099.    block->end_ip++;
  1100.  
  1101.    adjust_later_block_ips(block, 1);
  1102.  
  1103.    exec_node::insert_after(inst);
  1104. }
  1105.  
  1106. void
  1107. backend_instruction::insert_before(bblock_t *block, backend_instruction *inst)
  1108. {
  1109.    if (!this->is_tail_sentinel())
  1110.       assert(inst_is_in_block(block, this) || !"Instruction not in block");
  1111.  
  1112.    block->end_ip++;
  1113.  
  1114.    adjust_later_block_ips(block, 1);
  1115.  
  1116.    exec_node::insert_before(inst);
  1117. }
  1118.  
  1119. void
  1120. backend_instruction::insert_before(bblock_t *block, exec_list *list)
  1121. {
  1122.    assert(inst_is_in_block(block, this) || !"Instruction not in block");
  1123.  
  1124.    unsigned num_inst = list->length();
  1125.  
  1126.    block->end_ip += num_inst;
  1127.  
  1128.    adjust_later_block_ips(block, num_inst);
  1129.  
  1130.    exec_node::insert_before(list);
  1131. }
  1132.  
  1133. void
  1134. backend_instruction::remove(bblock_t *block)
  1135. {
  1136.    assert(inst_is_in_block(block, this) || !"Instruction not in block");
  1137.  
  1138.    adjust_later_block_ips(block, -1);
  1139.  
  1140.    if (block->start_ip == block->end_ip) {
  1141.       block->cfg->remove_block(block);
  1142.    } else {
  1143.       block->end_ip--;
  1144.    }
  1145.  
  1146.    exec_node::remove();
  1147. }
  1148.  
  1149. void
  1150. backend_visitor::dump_instructions()
  1151. {
  1152.    dump_instructions(NULL);
  1153. }
  1154.  
  1155. void
  1156. backend_visitor::dump_instructions(const char *name)
  1157. {
  1158.    FILE *file = stderr;
  1159.    if (name && geteuid() != 0) {
  1160.       file = fopen(name, "w");
  1161.       if (!file)
  1162.          file = stderr;
  1163.    }
  1164.  
  1165.    if (cfg) {
  1166.       int ip = 0;
  1167.       foreach_block_and_inst(block, backend_instruction, inst, cfg) {
  1168.          fprintf(file, "%4d: ", ip++);
  1169.          dump_instruction(inst, file);
  1170.       }
  1171.    } else {
  1172.       int ip = 0;
  1173.       foreach_in_list(backend_instruction, inst, &instructions) {
  1174.          fprintf(file, "%4d: ", ip++);
  1175.          dump_instruction(inst, file);
  1176.       }
  1177.    }
  1178.  
  1179.    if (file != stderr) {
  1180.       fclose(file);
  1181.    }
  1182. }
  1183.  
  1184. void
  1185. backend_visitor::calculate_cfg()
  1186. {
  1187.    if (this->cfg)
  1188.       return;
  1189.    cfg = new(mem_ctx) cfg_t(&this->instructions);
  1190. }
  1191.  
  1192. void
  1193. backend_visitor::invalidate_cfg()
  1194. {
  1195.    ralloc_free(this->cfg);
  1196.    this->cfg = NULL;
  1197. }
  1198.  
  1199. /**
  1200.  * Sets up the starting offsets for the groups of binding table entries
  1201.  * commong to all pipeline stages.
  1202.  *
  1203.  * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
  1204.  * unused but also make sure that addition of small offsets to them will
  1205.  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
  1206.  */
  1207. void
  1208. backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
  1209. {
  1210.    int num_textures = _mesa_fls(prog->SamplersUsed);
  1211.  
  1212.    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
  1213.    next_binding_table_offset += num_textures;
  1214.  
  1215.    if (shader) {
  1216.       stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
  1217.       next_binding_table_offset += shader->base.NumUniformBlocks;
  1218.    } else {
  1219.       stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
  1220.    }
  1221.  
  1222.    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
  1223.       stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
  1224.       next_binding_table_offset++;
  1225.    } else {
  1226.       stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
  1227.    }
  1228.  
  1229.    if (prog->UsesGather) {
  1230.       if (devinfo->gen >= 8) {
  1231.          stage_prog_data->binding_table.gather_texture_start =
  1232.             stage_prog_data->binding_table.texture_start;
  1233.       } else {
  1234.          stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
  1235.          next_binding_table_offset += num_textures;
  1236.       }
  1237.    } else {
  1238.       stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
  1239.    }
  1240.  
  1241.    if (shader_prog && shader_prog->NumAtomicBuffers) {
  1242.       stage_prog_data->binding_table.abo_start = next_binding_table_offset;
  1243.       next_binding_table_offset += shader_prog->NumAtomicBuffers;
  1244.    } else {
  1245.       stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
  1246.    }
  1247.  
  1248.    if (shader && shader->base.NumImages) {
  1249.       stage_prog_data->binding_table.image_start = next_binding_table_offset;
  1250.       next_binding_table_offset += shader->base.NumImages;
  1251.    } else {
  1252.       stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
  1253.    }
  1254.  
  1255.    /* This may or may not be used depending on how the compile goes. */
  1256.    stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
  1257.    next_binding_table_offset++;
  1258.  
  1259.    assert(next_binding_table_offset <= BRW_MAX_SURFACES);
  1260.  
  1261.    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
  1262. }
  1263.