Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /**
  25.  * @file gen7_sol_state.c
  26.  *
  27.  * Controls the stream output logic (SOL) stage of the gen7 hardware, which is
  28.  * used to implement GL_EXT_transform_feedback.
  29.  */
  30.  
  31. #include "brw_context.h"
  32. #include "brw_state.h"
  33. #include "brw_defines.h"
  34. #include "intel_batchbuffer.h"
  35. #include "intel_buffer_objects.h"
  36. #include "main/transformfeedback.h"
  37.  
  38. static void
  39. upload_3dstate_so_buffers(struct brw_context *brw)
  40. {
  41.    struct gl_context *ctx = &brw->ctx;
  42.    /* BRW_NEW_VERTEX_PROGRAM */
  43.    const struct gl_shader_program *vs_prog =
  44.       ctx->Shader.CurrentVertexProgram;
  45.    const struct gl_transform_feedback_info *linked_xfb_info =
  46.       &vs_prog->LinkedTransformFeedback;
  47.    /* BRW_NEW_TRANSFORM_FEEDBACK */
  48.    struct gl_transform_feedback_object *xfb_obj =
  49.       ctx->TransformFeedback.CurrentObject;
  50.    int i;
  51.  
  52.    /* Set up the up to 4 output buffers.  These are the ranges defined in the
  53.     * gl_transform_feedback_object.
  54.     */
  55.    for (i = 0; i < 4; i++) {
  56.       struct intel_buffer_object *bufferobj =
  57.          intel_buffer_object(xfb_obj->Buffers[i]);
  58.       drm_intel_bo *bo;
  59.       uint32_t start, end;
  60.       uint32_t stride;
  61.  
  62.       if (!xfb_obj->Buffers[i]) {
  63.          /* The pitch of 0 in this command indicates that the buffer is
  64.           * unbound and won't be written to.
  65.           */
  66.          BEGIN_BATCH(4);
  67.          OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
  68.          OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT));
  69.          OUT_BATCH(0);
  70.          OUT_BATCH(0);
  71.          ADVANCE_BATCH();
  72.  
  73.          continue;
  74.       }
  75.  
  76.       bo = intel_bufferobj_buffer(brw, bufferobj, INTEL_WRITE_PART);
  77.       stride = linked_xfb_info->BufferStride[i] * 4;
  78.  
  79.       start = xfb_obj->Offset[i];
  80.       assert(start % 4 == 0);
  81.       end = ALIGN(start + xfb_obj->Size[i], 4);
  82.       assert(end <= bo->size);
  83.  
  84.       BEGIN_BATCH(4);
  85.       OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
  86.       OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT) | stride);
  87.       OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start);
  88.       OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, end);
  89.       ADVANCE_BATCH();
  90.    }
  91. }
  92.  
  93. /**
  94.  * Outputs the 3DSTATE_SO_DECL_LIST command.
  95.  *
  96.  * The data output is a series of 64-bit entries containing a SO_DECL per
  97.  * stream.  We only have one stream of rendering coming out of the GS unit, so
  98.  * we only emit stream 0 (low 16 bits) SO_DECLs.
  99.  */
  100. static void
  101. upload_3dstate_so_decl_list(struct brw_context *brw,
  102.                             const struct brw_vue_map *vue_map)
  103. {
  104.    struct gl_context *ctx = &brw->ctx;
  105.    /* BRW_NEW_VERTEX_PROGRAM */
  106.    const struct gl_shader_program *vs_prog =
  107.       ctx->Shader.CurrentVertexProgram;
  108.    /* BRW_NEW_TRANSFORM_FEEDBACK */
  109.    const struct gl_transform_feedback_info *linked_xfb_info =
  110.       &vs_prog->LinkedTransformFeedback;
  111.    int i;
  112.    uint16_t so_decl[128];
  113.    int buffer_mask = 0;
  114.    int next_offset[4] = {0, 0, 0, 0};
  115.  
  116.    STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS);
  117.  
  118.    /* Construct the list of SO_DECLs to be emitted.  The formatting of the
  119.     * command is feels strange -- each dword pair contains a SO_DECL per stream.
  120.     */
  121.    for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
  122.       int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
  123.       uint16_t decl = 0;
  124.       int varying = linked_xfb_info->Outputs[i].OutputRegister;
  125.       unsigned component_mask =
  126.          (1 << linked_xfb_info->Outputs[i].NumComponents) - 1;
  127.  
  128.       /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
  129.       if (varying == VARYING_SLOT_PSIZ) {
  130.          assert(linked_xfb_info->Outputs[i].NumComponents == 1);
  131.          component_mask <<= 3;
  132.       } else {
  133.          component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
  134.       }
  135.  
  136.       buffer_mask |= 1 << buffer;
  137.  
  138.       decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
  139.       decl |= vue_map->varying_to_slot[varying] <<
  140.          SO_DECL_REGISTER_INDEX_SHIFT;
  141.       decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT;
  142.  
  143.       /* This assert should be true until GL_ARB_transform_feedback_instanced
  144.        * is added and we start using the hole flag.
  145.        */
  146.       assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
  147.  
  148.       next_offset[buffer] += linked_xfb_info->Outputs[i].NumComponents;
  149.  
  150.       so_decl[i] = decl;
  151.    }
  152.  
  153.    BEGIN_BATCH(linked_xfb_info->NumOutputs * 2 + 3);
  154.    OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 |
  155.              (linked_xfb_info->NumOutputs * 2 + 1));
  156.  
  157.    OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |
  158.              (0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |
  159.              (0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |
  160.              (0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));
  161.  
  162.    OUT_BATCH((linked_xfb_info->NumOutputs << SO_NUM_ENTRIES_0_SHIFT) |
  163.              (0 << SO_NUM_ENTRIES_1_SHIFT) |
  164.              (0 << SO_NUM_ENTRIES_2_SHIFT) |
  165.              (0 << SO_NUM_ENTRIES_3_SHIFT));
  166.  
  167.    for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
  168.       OUT_BATCH(so_decl[i]);
  169.       OUT_BATCH(0);
  170.    }
  171.  
  172.    ADVANCE_BATCH();
  173. }
  174.  
  175. static void
  176. upload_3dstate_streamout(struct brw_context *brw, bool active,
  177.                          const struct brw_vue_map *vue_map)
  178. {
  179.    struct gl_context *ctx = &brw->ctx;
  180.    /* BRW_NEW_TRANSFORM_FEEDBACK */
  181.    struct gl_transform_feedback_object *xfb_obj =
  182.       ctx->TransformFeedback.CurrentObject;
  183.    uint32_t dw1 = 0, dw2 = 0;
  184.    int i;
  185.  
  186.    if (active) {
  187.       int urb_entry_read_offset = 0;
  188.       int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
  189.          urb_entry_read_offset;
  190.  
  191.       dw1 |= SO_FUNCTION_ENABLE;
  192.       dw1 |= SO_STATISTICS_ENABLE;
  193.  
  194.       /* _NEW_LIGHT */
  195.       if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
  196.          dw1 |= SO_REORDER_TRAILING;
  197.  
  198.       for (i = 0; i < 4; i++) {
  199.          if (xfb_obj->Buffers[i]) {
  200.             dw1 |= SO_BUFFER_ENABLE(i);
  201.          }
  202.       }
  203.  
  204.       /* We always read the whole vertex.  This could be reduced at some
  205.        * point by reading less and offsetting the register index in the
  206.        * SO_DECLs.
  207.        */
  208.       dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT;
  209.       dw2 |= (urb_entry_read_length - 1) <<
  210.          SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
  211.    }
  212.  
  213.    BEGIN_BATCH(3);
  214.    OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
  215.    OUT_BATCH(dw1);
  216.    OUT_BATCH(dw2);
  217.    ADVANCE_BATCH();
  218. }
  219.  
  220. static void
  221. upload_sol_state(struct brw_context *brw)
  222. {
  223.    struct gl_context *ctx = &brw->ctx;
  224.    /* BRW_NEW_TRANSFORM_FEEDBACK */
  225.    bool active = _mesa_is_xfb_active_and_unpaused(ctx);
  226.  
  227.    if (active) {
  228.       upload_3dstate_so_buffers(brw);
  229.       /* BRW_NEW_VUE_MAP_GEOM_OUT */
  230.       upload_3dstate_so_decl_list(brw, &brw->vue_map_geom_out);
  231.    }
  232.  
  233.    /* Finally, set up the SOL stage.  This command must always follow updates to
  234.     * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
  235.     * MMIO register updates (current performed by the kernel at each batch
  236.     * emit).
  237.     */
  238.    upload_3dstate_streamout(brw, active, &brw->vue_map_geom_out);
  239. }
  240.  
  241. const struct brw_tracked_state gen7_sol_state = {
  242.    .dirty = {
  243.       .mesa  = (_NEW_LIGHT),
  244.       .brw   = (BRW_NEW_BATCH |
  245.                 BRW_NEW_VERTEX_PROGRAM |
  246.                 BRW_NEW_VUE_MAP_GEOM_OUT |
  247.                 BRW_NEW_TRANSFORM_FEEDBACK)
  248.    },
  249.    .emit = upload_sol_state,
  250. };
  251.  
  252. void
  253. gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
  254.                               struct gl_transform_feedback_object *obj)
  255. {
  256.    struct brw_context *brw = brw_context(ctx);
  257.  
  258.    intel_batchbuffer_flush(brw);
  259.    brw->batch.needs_sol_reset = true;
  260. }
  261.  
  262. void
  263. gen7_end_transform_feedback(struct gl_context *ctx,
  264.                             struct gl_transform_feedback_object *obj)
  265. {
  266.    /* Because we have to rely on the kernel to reset our SO write offsets, and
  267.     * we only get to do it once per batchbuffer, flush the batch after feedback
  268.     * so another transform feedback can get the write offset reset it needs.
  269.     *
  270.     * This also covers any cache flushing required.
  271.     */
  272.    struct brw_context *brw = brw_context(ctx);
  273.  
  274.    intel_batchbuffer_flush(brw);
  275. }
  276.