Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2014 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #ifndef ILO_BUILDER_MEDIA_H
  29. #define ILO_BUILDER_MEDIA_H
  30.  
  31. #include "genhw/genhw.h"
  32. #include "../ilo_shader.h"
  33. #include "intel_winsys.h"
  34.  
  35. #include "ilo_core.h"
  36. #include "ilo_dev.h"
  37. #include "ilo_builder.h"
  38.  
  39. struct gen6_idrt_data {
  40.    const struct ilo_shader_state *cs;
  41.  
  42.    uint32_t sampler_offset;
  43.    uint32_t binding_table_offset;
  44.  
  45.    unsigned curbe_size;
  46.    unsigned thread_group_size;
  47. };
  48.  
  49. static inline void
  50. gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
  51.                      unsigned curbe_alloc, bool use_slm)
  52. {
  53.    const uint8_t cmd_len = 8;
  54.    const unsigned idrt_alloc =
  55.       ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
  56.    int max_threads;
  57.    uint32_t *dw;
  58.  
  59.    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
  60.  
  61.    max_threads = builder->dev->thread_count;
  62.  
  63.    curbe_alloc = align(curbe_alloc, 32);
  64.    assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1));
  65.  
  66.    ilo_builder_batch_pointer(builder, cmd_len, &dw);
  67.  
  68.    dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2);
  69.    dw[1] = 0; /* scratch */
  70.  
  71.    dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
  72.            0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
  73.            GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
  74.            GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
  75.    if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
  76.       dw[2] |= GEN7_VFE_DW2_GPGPU_MODE;
  77.  
  78.    dw[3] = 0;
  79.  
  80.    dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
  81.            (curbe_alloc / 32);
  82.  
  83.    dw[5] = 0;
  84.    dw[6] = 0;
  85.    dw[7] = 0;
  86. }
  87.  
  88. static inline void
  89. gen6_MEDIA_CURBE_LOAD(struct ilo_builder *builder,
  90.                       uint32_t offset, unsigned size)
  91. {
  92.    const uint8_t cmd_len = 4;
  93.    uint32_t *dw;
  94.  
  95.    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
  96.  
  97.    assert(offset % 32 == 0 && size % 32 == 0);
  98.    /* GPU hangs if size is zero */
  99.    assert(size);
  100.  
  101.    ilo_builder_batch_pointer(builder, cmd_len, &dw);
  102.  
  103.    dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_CURBE_LOAD) | (cmd_len - 2);
  104.    dw[1] = 0;
  105.    dw[2] = size;
  106.    dw[3] = offset;
  107. }
  108.  
  109. static inline void
  110. gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(struct ilo_builder *builder,
  111.                                      uint32_t offset, unsigned size)
  112. {
  113.    const uint8_t cmd_len = 4;
  114.    const unsigned idrt_alloc =
  115.       ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
  116.    uint32_t *dw;
  117.  
  118.    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
  119.  
  120.    assert(offset % 32 == 0 && size % 32 == 0);
  121.    assert(size && size <= idrt_alloc);
  122.  
  123.    ilo_builder_batch_pointer(builder, cmd_len, &dw);
  124.  
  125.    dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_INTERFACE_DESCRIPTOR_LOAD) |
  126.            (cmd_len - 2);
  127.    dw[1] = 0;
  128.    dw[2] = size;
  129.    dw[3] = offset;
  130. }
  131.  
  132. static inline void
  133. gen6_MEDIA_STATE_FLUSH(struct ilo_builder *builder)
  134. {
  135.    const uint8_t cmd_len = 2;
  136.    uint32_t *dw;
  137.  
  138.    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
  139.  
  140.    ilo_builder_batch_pointer(builder, cmd_len, &dw);
  141.  
  142.    dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_STATE_FLUSH) | (cmd_len - 2);
  143.    dw[1] = 0;
  144. }
  145.  
  146. static inline void
  147. gen7_GPGPU_WALKER(struct ilo_builder *builder,
  148.                   const unsigned thread_group_offset[3],
  149.                   const unsigned thread_group_dim[3],
  150.                   unsigned thread_group_size,
  151.                   unsigned simd_size)
  152. {
  153.    const uint8_t cmd_len = 11;
  154.    uint32_t right_execmask, bottom_execmask;
  155.    unsigned thread_count;
  156.    uint32_t *dw;
  157.  
  158.    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
  159.  
  160.    assert(simd_size == 16 || simd_size == 8);
  161.  
  162.    thread_count = (thread_group_size + simd_size - 1) / simd_size;
  163.    assert(thread_count <= 64);
  164.  
  165.    right_execmask = thread_group_size % simd_size;
  166.    if (right_execmask)
  167.       right_execmask = (1 << right_execmask) - 1;
  168.    else
  169.       right_execmask = (1 << simd_size) - 1;
  170.  
  171.    bottom_execmask = 0xffffffff;
  172.  
  173.    ilo_builder_batch_pointer(builder, cmd_len, &dw);
  174.  
  175.    dw[0] = GEN7_RENDER_CMD(MEDIA, GPGPU_WALKER) | (cmd_len - 2);
  176.    dw[1] = 0; /* always first IDRT */
  177.  
  178.    dw[2] = (thread_count - 1) << GEN7_GPGPU_DW2_THREAD_MAX_X__SHIFT;
  179.    if (simd_size == 16)
  180.       dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD16;
  181.    else
  182.       dw[2] |= GEN7_GPGPU_DW2_SIMD_SIZE_SIMD8;
  183.  
  184.    dw[3] = thread_group_offset[0];
  185.    dw[4] = thread_group_dim[0];
  186.    dw[5] = thread_group_offset[1];
  187.    dw[6] = thread_group_dim[1];
  188.    dw[7] = thread_group_offset[2];
  189.    dw[8] = thread_group_dim[2];
  190.  
  191.    dw[9] = right_execmask;
  192.    dw[10] = bottom_execmask;
  193. }
  194.  
  195. static inline uint32_t
  196. gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
  197.                                const struct gen6_idrt_data *data,
  198.                                int idrt_count)
  199. {
  200.    /*
  201.     * From the Sandy Bridge PRM, volume 2 part 2, page 34:
  202.     *
  203.     *     "(Interface Descriptor Total Length) This field must have the same
  204.     *      alignment as the Interface Descriptor Data Start Address.
  205.     *
  206.     *      It must be DQWord (32-byte) aligned..."
  207.     *
  208.     * From the Sandy Bridge PRM, volume 2 part 2, page 35:
  209.     *
  210.     *     "(Interface Descriptor Data Start Address) Specifies the 32-byte
  211.     *      aligned address of the Interface Descriptor data."
  212.     */
  213.    const int state_align = 32;
  214.    const int state_len = (32 / 4) * idrt_count;
  215.    uint32_t state_offset, *dw;
  216.    int i;
  217.  
  218.    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
  219.  
  220.    state_offset = ilo_builder_dynamic_pointer(builder,
  221.          ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw);
  222.  
  223.    for (i = 0; i < idrt_count; i++) {
  224.       const struct gen6_idrt_data *idrt = &data[i];
  225.       const struct ilo_shader_state *cs = idrt->cs;
  226.       unsigned sampler_count, bt_size, slm_size;
  227.  
  228.       sampler_count =
  229.          ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
  230.       assert(sampler_count <= 16);
  231.       sampler_count = (sampler_count + 3) / 4;
  232.  
  233.       bt_size =
  234.          ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
  235.       if (bt_size > 31)
  236.          bt_size = 31;
  237.  
  238.       slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
  239.  
  240.       assert(idrt->curbe_size / 32 <= 63);
  241.  
  242.       dw[0] = ilo_shader_get_kernel_offset(idrt->cs);
  243.       dw[1] = 0;
  244.       dw[2] = idrt->sampler_offset |
  245.               sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
  246.       dw[3] = idrt->binding_table_offset |
  247.               bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
  248.  
  249.       dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
  250.               0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
  251.  
  252.       if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
  253.          dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
  254.  
  255.          if (slm_size) {
  256.             assert(slm_size <= 64 * 1024);
  257.             slm_size = util_next_power_of_two((slm_size + 4095) / 4096);
  258.  
  259.             dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE |
  260.                      slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT |
  261.                      idrt->thread_group_size <<
  262.                         GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
  263.          }
  264.       } else {
  265.          dw[5] = 0;
  266.       }
  267.  
  268.       dw[6] = 0;
  269.       dw[7] = 0;
  270.  
  271.       dw += 8;
  272.    }
  273.  
  274.    return state_offset;
  275. }
  276.  
  277. #endif /* ILO_BUILDER_MEDIA_H */
  278.