Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "main/macros.h"
  25. #include "intel_batchbuffer.h"
  26. #include "brw_context.h"
  27. #include "brw_state.h"
  28. #include "brw_defines.h"
  29.  
  30. /**
  31.  * The following diagram shows how we partition the URB:
  32.  *
  33.  *        16kB or 32kB               Rest of the URB space
  34.  *   __________-__________   _________________-_________________
  35.  *  /                     \ /                                   \
  36.  * +-------------------------------------------------------------+
  37.  * |     VS/FS/GS Push     |              VS/GS URB              |
  38.  * |       Constants       |               Entries               |
  39.  * +-------------------------------------------------------------+
  40.  *
  41.  * Notably, push constants must be stored at the beginning of the URB
  42.  * space, while entries can be stored anywhere.  Ivybridge and Haswell
  43.  * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
  44.  * doubles this (32kB).
  45.  *
  46.  * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
  47.  * sized) in increments of 1kB.  Haswell GT3 requires them to be located and
  48.  * sized in increments of 2kB.
  49.  *
  50.  * Currently we split the constant buffer space evenly among whatever stages
  51.  * are active.  This is probably not ideal, but simple.
  52.  *
  53.  * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
  54.  * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
  55.  * Haswell GT3 has 512kB of URB space.
  56.  *
  57.  * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
  58.  * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
  59.  */
  60. static void
  61. gen7_allocate_push_constants(struct brw_context *brw)
  62. {
  63.    unsigned avail_size = 16;
  64.    unsigned multiplier =
  65.       (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 2 : 1;
  66.  
  67.    /* BRW_NEW_GEOMETRY_PROGRAM */
  68.    bool gs_present = brw->geometry_program;
  69.  
  70.    unsigned vs_size, gs_size;
  71.    if (gs_present) {
  72.       vs_size = avail_size / 3;
  73.       avail_size -= vs_size;
  74.       gs_size = avail_size / 2;
  75.       avail_size -= gs_size;
  76.    } else {
  77.       vs_size = avail_size / 2;
  78.       avail_size -= vs_size;
  79.       gs_size = 0;
  80.    }
  81.    unsigned fs_size = avail_size;
  82.  
  83.    gen7_emit_push_constant_state(brw, multiplier * vs_size,
  84.                                  multiplier * gs_size, multiplier * fs_size);
  85.  
  86.    /* From p115 of the Ivy Bridge PRM (3.2.1.4 3DSTATE_PUSH_CONSTANT_ALLOC_VS):
  87.     *
  88.     *     Programming Restriction:
  89.     *
  90.     *     The 3DSTATE_CONSTANT_VS must be reprogrammed prior to the next
  91.     *     3DPRIMITIVE command after programming the
  92.     *     3DSTATE_PUSH_CONSTANT_ALLOC_VS.
  93.     *
  94.     * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_*
  95.     * commands.
  96.     */
  97.    brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
  98. }
  99.  
  100. void
  101. gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
  102.                               unsigned gs_size, unsigned fs_size)
  103. {
  104.    unsigned offset = 0;
  105.  
  106.    BEGIN_BATCH(6);
  107.    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
  108.    OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
  109.    offset += vs_size;
  110.  
  111.    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2));
  112.    OUT_BATCH(gs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
  113.    offset += gs_size;
  114.  
  115.    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
  116.    OUT_BATCH(fs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
  117.    ADVANCE_BATCH();
  118.  
  119.    /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
  120.     *
  121.     *     A PIPE_CONTOL command with the CS Stall bit set must be programmed
  122.     *     in the ring after this instruction.
  123.     *
  124.     * No such restriction exists for Haswell or Baytrail.
  125.     */
  126.    if (brw->gen < 8 && !brw->is_haswell && !brw->is_baytrail)
  127.       gen7_emit_cs_stall_flush(brw);
  128. }
  129.  
  130. const struct brw_tracked_state gen7_push_constant_space = {
  131.    .dirty = {
  132.       .mesa = 0,
  133.       .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM,
  134.    },
  135.    .emit = gen7_allocate_push_constants,
  136. };
  137.  
  138. static void
  139. gen7_upload_urb(struct brw_context *brw)
  140. {
  141.    const int push_size_kB =
  142.       (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16;
  143.  
  144.    /* BRW_NEW_VS_PROG_DATA */
  145.    unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
  146.    unsigned vs_entry_size_bytes = vs_size * 64;
  147.    /* BRW_NEW_GEOMETRY_PROGRAM, BRW_NEW_GS_PROG_DATA */
  148.    bool gs_present = brw->geometry_program;
  149.    unsigned gs_size = gs_present ? brw->gs.prog_data->base.urb_entry_size : 1;
  150.    unsigned gs_entry_size_bytes = gs_size * 64;
  151.  
  152.    /* If we're just switching between programs with the same URB requirements,
  153.     * skip the rest of the logic.
  154.     */
  155.    if (!(brw->ctx.NewDriverState & BRW_NEW_CONTEXT) &&
  156.        brw->urb.vsize == vs_size &&
  157.        brw->urb.gs_present == gs_present &&
  158.        brw->urb.gsize == gs_size) {
  159.       return;
  160.    }
  161.    brw->urb.vsize = vs_size;
  162.    brw->urb.gs_present = gs_present;
  163.    brw->urb.gsize = gs_size;
  164.  
  165.    /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
  166.     *
  167.     *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
  168.     *     Allocation Size is less than 9 512-bit URB entries.
  169.     *
  170.     * Similar text exists for GS.
  171.     */
  172.    unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
  173.    unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
  174.  
  175.    /* URB allocations must be done in 8k chunks. */
  176.    unsigned chunk_size_bytes = 8192;
  177.  
  178.    /* Determine the size of the URB in chunks.
  179.     */
  180.    unsigned urb_chunks = brw->urb.size * 1024 / chunk_size_bytes;
  181.  
  182.    /* Reserve space for push constants */
  183.    unsigned push_constant_bytes = 1024 * push_size_kB;
  184.    unsigned push_constant_chunks =
  185.       push_constant_bytes / chunk_size_bytes;
  186.  
  187.    /* Initially, assign each stage the minimum amount of URB space it needs,
  188.     * and make a note of how much additional space it "wants" (the amount of
  189.     * additional space it could actually make use of).
  190.     */
  191.  
  192.    /* VS has a lower limit on the number of URB entries */
  193.    unsigned vs_chunks =
  194.       ALIGN(brw->urb.min_vs_entries * vs_entry_size_bytes, chunk_size_bytes) /
  195.       chunk_size_bytes;
  196.    unsigned vs_wants =
  197.       ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes,
  198.             chunk_size_bytes) / chunk_size_bytes - vs_chunks;
  199.  
  200.    unsigned gs_chunks = 0;
  201.    unsigned gs_wants = 0;
  202.    if (gs_present) {
  203.       /* There are two constraints on the minimum amount of URB space we can
  204.        * allocate:
  205.        *
  206.        * (1) We need room for at least 2 URB entries, since we always operate
  207.        * the GS in DUAL_OBJECT mode.
  208.        *
  209.        * (2) We can't allocate less than nr_gs_entries_granularity.
  210.        */
  211.       gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
  212.                         chunk_size_bytes) / chunk_size_bytes;
  213.       gs_wants =
  214.          ALIGN(brw->urb.max_gs_entries * gs_entry_size_bytes,
  215.                chunk_size_bytes) / chunk_size_bytes - gs_chunks;
  216.    }
  217.  
  218.    /* There should always be enough URB space to satisfy the minimum
  219.     * requirements of each stage.
  220.     */
  221.    unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
  222.    assert(total_needs <= urb_chunks);
  223.  
  224.    /* Mete out remaining space (if any) in proportion to "wants". */
  225.    unsigned total_wants = vs_wants + gs_wants;
  226.    unsigned remaining_space = urb_chunks - total_needs;
  227.    if (remaining_space > total_wants)
  228.       remaining_space = total_wants;
  229.    if (remaining_space > 0) {
  230.       unsigned vs_additional = (unsigned)
  231.          round(vs_wants * (((double) remaining_space) / total_wants));
  232.       vs_chunks += vs_additional;
  233.       remaining_space -= vs_additional;
  234.       gs_chunks += remaining_space;
  235.    }
  236.  
  237.    /* Sanity check that we haven't over-allocated. */
  238.    assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
  239.  
  240.    /* Finally, compute the number of entries that can fit in the space
  241.     * allocated to each stage.
  242.     */
  243.    unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
  244.    unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
  245.  
  246.    /* Since we rounded up when computing *_wants, this may be slightly more
  247.     * than the maximum allowed amount, so correct for that.
  248.     */
  249.    nr_vs_entries = MIN2(nr_vs_entries, brw->urb.max_vs_entries);
  250.    nr_gs_entries = MIN2(nr_gs_entries, brw->urb.max_gs_entries);
  251.  
  252.    /* Ensure that we program a multiple of the granularity. */
  253.    nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
  254.    nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
  255.  
  256.    /* Finally, sanity check to make sure we have at least the minimum number
  257.     * of entries needed for each stage.
  258.     */
  259.    assert(nr_vs_entries >= brw->urb.min_vs_entries);
  260.    if (gs_present)
  261.       assert(nr_gs_entries >= 2);
  262.  
  263.    /* Gen7 doesn't actually use brw->urb.nr_{vs,gs}_entries, but it seems
  264.     * better to put reasonable data in there rather than leave them
  265.     * uninitialized.
  266.     */
  267.    brw->urb.nr_vs_entries = nr_vs_entries;
  268.    brw->urb.nr_gs_entries = nr_gs_entries;
  269.  
  270.    /* Lay out the URB in the following order:
  271.     * - push constants
  272.     * - VS
  273.     * - GS
  274.     */
  275.    brw->urb.vs_start = push_constant_chunks;
  276.    brw->urb.gs_start = push_constant_chunks + vs_chunks;
  277.  
  278.    if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail)
  279.       gen7_emit_vs_workaround_flush(brw);
  280.    gen7_emit_urb_state(brw,
  281.                        brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start,
  282.                        brw->urb.nr_gs_entries, gs_size, brw->urb.gs_start);
  283. }
  284.  
  285. void
  286. gen7_emit_urb_state(struct brw_context *brw,
  287.                     unsigned nr_vs_entries, unsigned vs_size,
  288.                     unsigned vs_start, unsigned nr_gs_entries,
  289.                     unsigned gs_size, unsigned gs_start)
  290. {
  291.    BEGIN_BATCH(8);
  292.    OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
  293.    OUT_BATCH(nr_vs_entries |
  294.              ((vs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
  295.              (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
  296.  
  297.    OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
  298.    OUT_BATCH(nr_gs_entries |
  299.              ((gs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
  300.              (gs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
  301.  
  302.    /* Allocate the HS and DS zero space - we don't use them. */
  303.    OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
  304.    OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  305.              (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
  306.  
  307.    OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
  308.    OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  309.              (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
  310.    ADVANCE_BATCH();
  311. }
  312.  
  313. const struct brw_tracked_state gen7_urb = {
  314.    .dirty = {
  315.       .mesa = 0,
  316.       .brw = BRW_NEW_CONTEXT |
  317.              BRW_NEW_GEOMETRY_PROGRAM |
  318.              BRW_NEW_GS_PROG_DATA |
  319.              BRW_NEW_VS_PROG_DATA,
  320.    },
  321.    .emit = gen7_upload_urb,
  322. };
  323.