Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2014 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "genhw/genhw.h"
  29. #include "core/ilo_builder_media.h"
  30. #include "core/ilo_builder_mi.h"
  31. #include "core/ilo_builder_render.h"
  32.  
  33. #include "ilo_state.h"
  34. #include "ilo_render_gen.h"
  35.  
  36. struct gen7_l3_config {
  37.    int slm;
  38.    int urb;
  39.    int rest;
  40.    int dc;
  41.    int ro;
  42.    int is;
  43.    int c;
  44.    int t;
  45. };
  46.  
  47. /*
  48.  * From the Ivy Bridge PRM, volume 1 part 7, page 10:
  49.  *
  50.  *     "Normal L3/URB mode (non-SLM mode), uses all 4 banks of L3 equally to
  51.  *      distribute cycles. The following allocation is a suggested programming
  52.  *      model. Note all numbers below are given in KBytes."
  53.  *
  54.  * From the Haswell PRM, volume 7, page 662:
  55.  *
  56.  *     "The configuration for {SLM = 0,URB = 224,DC = 32,RO = 256,IS = 0,C =
  57.  *      0,T =0, SUM 512} was validated as a later supported configuration and
  58.  *      can be utilized if desired."
  59.  */
  60. static const struct gen7_l3_config gen7_l3_non_slm_configs[] = {
  61.    /*       SLM   URB  Rest    DC    RO   I/S     C     T */
  62.    [0] = {    0,  256,    0,    0,  256,    0,    0,    0, },
  63.    [1] = {    0,  256,    0,  128,  128,    0,    0,    0, },
  64.    [2] = {    0,  256,    0,   32,    0,   64,   32,  128, },
  65.    [3] = {    0,  224,    0,   64,    0,   64,   32,  128, },
  66.    [4] = {    0,  224,    0,  128,    0,   64,   32,   64, },
  67.    [5] = {    0,  224,    0,   64,    0,  128,   32,   64, },
  68.    [6] = {    0,  224,    0,    0,    0,  128,   32,  128, },
  69.    [7] = {    0,  256,    0,    0,    0,  128,    0,  128, },
  70.  
  71.    [8] = {    0,  224,    0,   32,  256,    0,    0,    0, },
  72. };
  73.  
  74. /*
  75.  * From the Ivy Bridge PRM, volume 1 part 7, page 11:
  76.  *
  77.  *     "With the existence of Shared Local Memory, a 64KB chunk from each of
  78.  *      the 2 L3 banks will be reserved for SLM usage. The remaining cache
  79.  *      space is divided between the remaining clients. SLM allocation is done
  80.  *      via reducing the number of ways on the two banks from 64 to 32."
  81.  *
  82.  * From the Haswell PRM, volume 7, page 662:
  83.  *
  84.  *     "The configuration for {SLM = 128,URB = 128,DC = 0,RO = 256,IS = 0,C =
  85.  *      0,T =0, SUM 512} was validated as a later supported configuration and
  86.  *      can be utilized if desired. For this configuration, global atomics
  87.  *      must be programmed to be in GTI."
  88.  */
  89. static const struct gen7_l3_config gen7_l3_slm_configs[] = {
  90.    /*       SLM   URB  Rest    DC    RO   I/S     C     T */
  91.    [0] = {  128,  128,    0,  128,  128,    0,    0,    0, },
  92.    [1] = {  128,  128,    0,   64,    0,   64,   64,   64, },
  93.    [2] = {  128,  128,    0,   32,    0,   64,   32,  128, },
  94.    [3] = {  128,  128,    0,   32,    0,  128,   32,   64, },
  95.  
  96.    [4] = {  128,  128,    0,    0,  256,    0,    0,    0, },
  97. };
  98.  
  99. static void
  100. gen7_launch_grid_l3(struct ilo_render *r, bool use_slm)
  101. {
  102.    uint32_t l3sqcreg1, l3cntlreg2, l3cntlreg3;
  103.    const struct gen7_l3_config *conf;
  104.  
  105.    /*
  106.     * This function mostly follows what beignet does.  I do not know why, for
  107.     * example, CON4DCUNC should be reset.  I do not know if it should be set
  108.     * again after launch_grid().
  109.     */
  110.  
  111.    ILO_DEV_ASSERT(r->dev, 7, 7.5);
  112.  
  113.    if (use_slm)
  114.       conf = &gen7_l3_slm_configs[1];
  115.    else
  116.       conf = &gen7_l3_non_slm_configs[4];
  117.  
  118.    /* unset GEN7_REG_L3SQCREG1_CON4DCUNC (without readback first) */
  119.    if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
  120.       l3sqcreg1 = GEN75_REG_L3SQCREG1_SQGPCI_24 |
  121.                   GEN75_REG_L3SQCREG1_SQHPCI_8;
  122.    } else {
  123.       l3sqcreg1 = GEN7_REG_L3SQCREG1_SQGHPCI_18_6;
  124.    }
  125.  
  126.    l3cntlreg2 = (conf->dc / 8) << GEN7_REG_L3CNTLREG2_DCWASS__SHIFT |
  127.                 (conf->ro / 8) << GEN7_REG_L3CNTLREG2_RDOCPL__SHIFT |
  128.                 (conf->urb / 8) << GEN7_REG_L3CNTLREG2_URBALL__SHIFT;
  129.  
  130.    l3cntlreg3 = (conf->t / 8) << GEN7_REG_L3CNTLREG3_TXWYALL__SHIFT |
  131.                 (conf->c / 8) << GEN7_REG_L3CNTLREG3_CTWYALL__SHIFT |
  132.                 (conf->is / 8) << GEN7_REG_L3CNTLREG3_ISWYALL__SHIFT;
  133.  
  134.    if (conf->slm) {
  135.       /*
  136.        * From the Ivy Bridge PRM, volume 1 part 7, page 11:
  137.        *
  138.        *     "Note that URB needs to be set as low b/w client in SLM mode,
  139.        *      else the hash will fail. This is a required s/w model."
  140.        */
  141.       l3cntlreg2 |= GEN7_REG_L3CNTLREG2_URBSLMB |
  142.                     GEN7_REG_L3CNTLREG2_SLMMENB;
  143.    }
  144.  
  145.    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3SQCREG1, l3sqcreg1);
  146.    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG2, l3cntlreg2);
  147.    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG3, l3cntlreg3);
  148. }
  149.  
  150. int
  151. ilo_render_get_launch_grid_commands_len(const struct ilo_render *render,
  152.                                         const struct ilo_state_vector *vec)
  153. {
  154.    static int len;
  155.  
  156.    ILO_DEV_ASSERT(render->dev, 7, 7.5);
  157.  
  158.    if (!len) {
  159.       len +=
  160.          GEN6_PIPELINE_SELECT__SIZE +
  161.          GEN6_STATE_BASE_ADDRESS__SIZE +
  162.          GEN6_MEDIA_VFE_STATE__SIZE +
  163.          GEN6_MEDIA_CURBE_LOAD__SIZE +
  164.          GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD__SIZE +
  165.          GEN6_MEDIA_STATE_FLUSH__SIZE;
  166.  
  167.       len += ilo_render_get_flush_len(render) * 3;
  168.  
  169.       if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
  170.          len += GEN6_MI_LOAD_REGISTER_IMM__SIZE * 3 * 2;
  171.          len += GEN7_GPGPU_WALKER__SIZE;
  172.       }
  173.    }
  174.  
  175.    return len;
  176. }
  177.  
  178. void
  179. ilo_render_emit_launch_grid_commands(struct ilo_render *render,
  180.                                      const struct ilo_state_vector *vec,
  181.                                      const struct ilo_render_launch_grid_session *session)
  182. {
  183.    const unsigned batch_used = ilo_builder_batch_used(render->builder);
  184.    const uint32_t pcb = render->state.cs.PUSH_CONSTANT_BUFFER;
  185.    const int pcb_size = render->state.cs.PUSH_CONSTANT_BUFFER_size;
  186.    int simd_size;
  187.    bool use_slm;
  188.  
  189.    ILO_DEV_ASSERT(render->dev, 7, 7.5);
  190.  
  191.    simd_size = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_SIMD_SIZE);
  192.    use_slm = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_LOCAL_SIZE);
  193.  
  194.    ilo_render_emit_flush(render);
  195.  
  196.    if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
  197.       gen7_launch_grid_l3(render, use_slm);
  198.       ilo_render_emit_flush(render);
  199.  
  200.       gen6_PIPELINE_SELECT(render->builder,
  201.             GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU);
  202.    } else {
  203.       gen6_PIPELINE_SELECT(render->builder,
  204.             GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA);
  205.    }
  206.  
  207.    gen6_state_base_address(render->builder, true);
  208.  
  209.    gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm);
  210.  
  211.    if (pcb_size)
  212.       gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
  213.  
  214.    gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(render->builder,
  215.          session->idrt, session->idrt_size);
  216.  
  217.    gen7_GPGPU_WALKER(render->builder, session->thread_group_offset,
  218.          session->thread_group_dim, session->thread_group_size, simd_size);
  219.  
  220.    gen6_MEDIA_STATE_FLUSH(render->builder);
  221.  
  222.    if (ilo_dev_gen(render->dev) >= ILO_GEN(7) && use_slm) {
  223.       ilo_render_emit_flush(render);
  224.       gen7_launch_grid_l3(render, false);
  225.    }
  226.  
  227.    assert(ilo_builder_batch_used(render->builder) <= batch_used +
  228.          ilo_render_get_launch_grid_commands_len(render, vec));
  229. }
  230.