Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2009 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *
  26.  */
  27.  
  28. #include "brw_context.h"
  29. #include "brw_state.h"
  30. #include "brw_defines.h"
  31. #include "brw_util.h"
  32. #include "program/prog_parameter.h"
  33. #include "program/prog_statevars.h"
  34. #include "intel_batchbuffer.h"
  35. #include "glsl/glsl_parser_extras.h"
  36.  
  37. /**
  38.  * Creates a streamed BO containing the push constants for the VS or GS on
  39.  * gen6+.
  40.  *
  41.  * Push constants are constant values (such as GLSL uniforms) that are
  42.  * pre-loaded into a shader stage's register space at thread spawn time.
  43.  *
  44.  * Not all GLSL uniforms will be uploaded as push constants: The hardware has
  45.  * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
  46.  * uploaded as push constants, while GL 4.4 requires at least 1024 components
  47.  * to be usable for the VS.  Plus, currently we always use pull constants
  48.  * instead of push constants when doing variable-index array access.
  49.  *
  50.  * See brw_curbe.c for the equivalent gen4/5 code.
  51.  */
  52. void
  53. gen6_upload_push_constants(struct brw_context *brw,
  54.                            const struct gl_program *prog,
  55.                            const struct brw_stage_prog_data *prog_data,
  56.                            struct brw_stage_state *stage_state,
  57.                            enum aub_state_struct_type type)
  58. {
  59.    struct gl_context *ctx = &brw->ctx;
  60.  
  61.    if (prog_data->nr_params == 0) {
  62.       stage_state->push_const_size = 0;
  63.    } else {
  64.       /* Updates the ParamaterValues[i] pointers for all parameters of the
  65.        * basic type of PROGRAM_STATE_VAR.
  66.        */
  67.       /* XXX: Should this happen somewhere before to get our state flag set? */
  68.       _mesa_load_state_parameters(ctx, prog->Parameters);
  69.  
  70.       gl_constant_value *param;
  71.       int i;
  72.  
  73.       param = brw_state_batch(brw, type,
  74.                               prog_data->nr_params * sizeof(gl_constant_value),
  75.                               32, &stage_state->push_const_offset);
  76.  
  77.       STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
  78.  
  79.       /* _NEW_PROGRAM_CONSTANTS
  80.        *
  81.        * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
  82.        * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
  83.        * wouldn't be set for them.
  84.       */
  85.       for (i = 0; i < prog_data->nr_params; i++) {
  86.          param[i] = *prog_data->param[i];
  87.       }
  88.  
  89.       if (0) {
  90.          fprintf(stderr, "%s constants:\n",
  91.                  _mesa_shader_stage_to_string(stage_state->stage));
  92.          for (i = 0; i < prog_data->nr_params; i++) {
  93.             if ((i & 7) == 0)
  94.                fprintf(stderr, "g%d: ",
  95.                        prog_data->dispatch_grf_start_reg + i / 8);
  96.             fprintf(stderr, "%8f ", param[i].f);
  97.             if ((i & 7) == 7)
  98.                fprintf(stderr, "\n");
  99.          }
  100.          if ((i & 7) != 0)
  101.             fprintf(stderr, "\n");
  102.          fprintf(stderr, "\n");
  103.       }
  104.  
  105.       stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
  106.       /* We can only push 32 registers of constants at a time. */
  107.  
  108.       /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
  109.        *
  110.        *     "The sum of all four read length fields (each incremented to
  111.        *      represent the actual read length) must be less than or equal to
  112.        *      32"
  113.        *
  114.        * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
  115.        *
  116.        *     "The sum of all four read length fields must be less than or
  117.        *      equal to the size of 64"
  118.        *
  119.        * The other shader stages all match the VS's limits.
  120.        */
  121.       assert(stage_state->push_const_size <= 32);
  122.    }
  123. }
  124.  
  125. static void
  126. gen6_upload_vs_push_constants(struct brw_context *brw)
  127. {
  128.    struct brw_stage_state *stage_state = &brw->vs.base;
  129.  
  130.    /* _BRW_NEW_VERTEX_PROGRAM */
  131.    const struct brw_vertex_program *vp =
  132.       brw_vertex_program_const(brw->vertex_program);
  133.    /* BRW_NEW_VS_PROG_DATA */
  134.    const struct brw_stage_prog_data *prog_data = &brw->vs.prog_data->base.base;
  135.  
  136.    gen6_upload_push_constants(brw, &vp->program.Base, prog_data,
  137.                               stage_state, AUB_TRACE_VS_CONSTANTS);
  138.  
  139.    if (brw->gen >= 7) {
  140.       if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail)
  141.          gen7_emit_vs_workaround_flush(brw);
  142.  
  143.       gen7_upload_constant_state(brw, stage_state, true /* active */,
  144.                                  _3DSTATE_CONSTANT_VS);
  145.    }
  146. }
  147.  
  148. const struct brw_tracked_state gen6_vs_push_constants = {
  149.    .dirty = {
  150.       .mesa  = _NEW_PROGRAM_CONSTANTS |
  151.                _NEW_TRANSFORM,
  152.       .brw   = BRW_NEW_BATCH |
  153.                BRW_NEW_PUSH_CONSTANT_ALLOCATION |
  154.                BRW_NEW_VERTEX_PROGRAM |
  155.                BRW_NEW_VS_PROG_DATA,
  156.    },
  157.    .emit = gen6_upload_vs_push_constants,
  158. };
  159.  
  160. static void
  161. upload_vs_state(struct brw_context *brw)
  162. {
  163.    const struct brw_stage_state *stage_state = &brw->vs.base;
  164.    uint32_t floating_point_mode = 0;
  165.  
  166.    /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
  167.     * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
  168.     *
  169.     *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
  170.     *   command that causes the VS Function Enable to toggle. Pipeline
  171.     *   flush can be executed by sending a PIPE_CONTROL command with CS
  172.     *   stall bit set and a post sync operation.
  173.     *
  174.     * We've already done such a flush at the start of state upload, so we
  175.     * don't need to do another one here.
  176.     */
  177.  
  178.    if (stage_state->push_const_size == 0) {
  179.       /* Disable the push constant buffers. */
  180.       BEGIN_BATCH(5);
  181.       OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
  182.       OUT_BATCH(0);
  183.       OUT_BATCH(0);
  184.       OUT_BATCH(0);
  185.       OUT_BATCH(0);
  186.       ADVANCE_BATCH();
  187.    } else {
  188.       BEGIN_BATCH(5);
  189.       OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
  190.                 GEN6_CONSTANT_BUFFER_0_ENABLE |
  191.                 (5 - 2));
  192.       /* Pointer to the VS constant buffer.  Covered by the set of
  193.        * state flags from gen6_upload_vs_constants
  194.        */
  195.       OUT_BATCH(stage_state->push_const_offset +
  196.                 stage_state->push_const_size - 1);
  197.       OUT_BATCH(0);
  198.       OUT_BATCH(0);
  199.       OUT_BATCH(0);
  200.       ADVANCE_BATCH();
  201.    }
  202.  
  203.    if (brw->vs.prog_data->base.base.use_alt_mode)
  204.       floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT;
  205.  
  206.    BEGIN_BATCH(6);
  207.    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
  208.    OUT_BATCH(stage_state->prog_offset);
  209.    OUT_BATCH(floating_point_mode |
  210.              ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) |
  211.              ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
  212.               GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  213.  
  214.    if (brw->vs.prog_data->base.base.total_scratch) {
  215.       OUT_RELOC(stage_state->scratch_bo,
  216.                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  217.                 ffs(brw->vs.prog_data->base.base.total_scratch) - 11);
  218.    } else {
  219.       OUT_BATCH(0);
  220.    }
  221.  
  222.    OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg <<
  223.               GEN6_VS_DISPATCH_START_GRF_SHIFT) |
  224.              (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
  225.              (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
  226.  
  227.    OUT_BATCH(((brw->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
  228.              GEN6_VS_STATISTICS_ENABLE |
  229.              GEN6_VS_ENABLE);
  230.    ADVANCE_BATCH();
  231.  
  232.    /* Based on my reading of the simulator, the VS constants don't get
  233.     * pulled into the VS FF unit until an appropriate pipeline flush
  234.     * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
  235.     * references to them into a little FIFO.  The flushes are common,
  236.     * but don't reliably happen between this and a 3DPRIMITIVE, causing
  237.     * the primitive to use the wrong constants.  Then the FIFO
  238.     * containing the constant setup gets added to again on the next
  239.     * constants change, and eventually when a flush does happen the
  240.     * unit is overwhelmed by constant changes and dies.
  241.     *
  242.     * To avoid this, send a PIPE_CONTROL down the line that will
  243.     * update the unit immediately loading the constants.  The flush
  244.     * type bits here were those set by the STATE_BASE_ADDRESS whose
  245.     * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
  246.     * bug reports that led to this workaround, and may be more than
  247.     * what is strictly required to avoid the issue.
  248.     */
  249.    brw_emit_pipe_control_flush(brw,
  250.                                PIPE_CONTROL_DEPTH_STALL |
  251.                                PIPE_CONTROL_INSTRUCTION_INVALIDATE |
  252.                                PIPE_CONTROL_STATE_CACHE_INVALIDATE);
  253. }
  254.  
  255. const struct brw_tracked_state gen6_vs_state = {
  256.    .dirty = {
  257.       .mesa  = _NEW_PROGRAM_CONSTANTS |
  258.                _NEW_TRANSFORM,
  259.       .brw   = BRW_NEW_BATCH |
  260.                BRW_NEW_CONTEXT |
  261.                BRW_NEW_PUSH_CONSTANT_ALLOCATION |
  262.                BRW_NEW_VERTEX_PROGRAM |
  263.                BRW_NEW_VS_PROG_DATA,
  264.    },
  265.    .emit = upload_vs_state,
  266. };
  267.