Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "util/u_resource.h"
  29. #include "brw_defines.h"
  30. #include "intel_reg.h"
  31.  
  32. #include "ilo_cp.h"
  33. #include "ilo_format.h"
  34. #include "ilo_resource.h"
  35. #include "ilo_shader.h"
  36. #include "ilo_gpe_gen7.h"
  37.  
  38. static void
  39. gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
  40.                        struct ilo_cp *cp)
  41. {
  42.    assert(!"GPGPU_WALKER unsupported");
  43. }
  44.  
  45. static void
  46. gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
  47.                                uint32_t clear_val,
  48.                                struct ilo_cp *cp)
  49. {
  50.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
  51.    const uint8_t cmd_len = 3;
  52.  
  53.    ILO_GPE_VALID_GEN(dev, 7, 7);
  54.  
  55.    ilo_cp_begin(cp, cmd_len);
  56.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  57.    ilo_cp_write(cp, clear_val);
  58.    ilo_cp_write(cp, 1);
  59.    ilo_cp_end(cp);
  60. }
  61.  
  62. static void
  63. gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
  64.                           int subop, uint32_t pointer,
  65.                           struct ilo_cp *cp)
  66. {
  67.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
  68.    const uint8_t cmd_len = 2;
  69.  
  70.    ILO_GPE_VALID_GEN(dev, 7, 7);
  71.  
  72.    ilo_cp_begin(cp, cmd_len);
  73.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  74.    ilo_cp_write(cp, pointer);
  75.    ilo_cp_end(cp);
  76. }
  77.  
  78. static void
  79. gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
  80.                                     uint32_t color_calc_state,
  81.                                     struct ilo_cp *cp)
  82. {
  83.    gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
  84. }
  85.  
  86. void
  87. ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
  88.                          const struct ilo_shader_state *gs,
  89.                          struct ilo_shader_cso *cso)
  90. {
  91.    int start_grf, vue_read_len, max_threads;
  92.    uint32_t dw2, dw4, dw5;
  93.  
  94.    ILO_GPE_VALID_GEN(dev, 7, 7);
  95.  
  96.    start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
  97.    vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
  98.  
  99.    /* in pairs */
  100.    vue_read_len = (vue_read_len + 1) / 2;
  101.  
  102.    switch (dev->gen) {
  103.    case ILO_GEN(7):
  104.       max_threads = (dev->gt == 2) ? 128 : 36;
  105.       break;
  106.    default:
  107.       max_threads = 1;
  108.       break;
  109.    }
  110.  
  111.    dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
  112.  
  113.    dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
  114.          GEN7_GS_INCLUDE_VERTEX_HANDLES |
  115.          0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
  116.          start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
  117.  
  118.    dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
  119.          GEN6_GS_STATISTICS_ENABLE |
  120.          GEN6_GS_ENABLE;
  121.  
  122.    STATIC_ASSERT(Elements(cso->payload) >= 3);
  123.    cso->payload[0] = dw2;
  124.    cso->payload[1] = dw4;
  125.    cso->payload[2] = dw5;
  126. }
  127.  
  128. static void
  129. gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
  130.                      const struct ilo_shader_state *gs,
  131.                      int num_samplers,
  132.                      struct ilo_cp *cp)
  133. {
  134.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
  135.    const uint8_t cmd_len = 7;
  136.    const struct ilo_shader_cso *cso;
  137.    uint32_t dw2, dw4, dw5;
  138.  
  139.    ILO_GPE_VALID_GEN(dev, 7, 7);
  140.  
  141.    if (!gs) {
  142.       ilo_cp_begin(cp, cmd_len);
  143.       ilo_cp_write(cp, cmd | (cmd_len - 2));
  144.       ilo_cp_write(cp, 0);
  145.       ilo_cp_write(cp, 0);
  146.       ilo_cp_write(cp, 0);
  147.       ilo_cp_write(cp, 0);
  148.       ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
  149.       ilo_cp_write(cp, 0);
  150.       ilo_cp_end(cp);
  151.       return;
  152.    }
  153.  
  154.    cso = ilo_shader_get_kernel_cso(gs);
  155.    dw2 = cso->payload[0];
  156.    dw4 = cso->payload[1];
  157.    dw5 = cso->payload[2];
  158.  
  159.    dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
  160.  
  161.    ilo_cp_begin(cp, cmd_len);
  162.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  163.    ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
  164.    ilo_cp_write(cp, dw2);
  165.    ilo_cp_write(cp, 0); /* scratch */
  166.    ilo_cp_write(cp, dw4);
  167.    ilo_cp_write(cp, dw5);
  168.    ilo_cp_write(cp, 0);
  169.    ilo_cp_end(cp);
  170. }
  171.  
  172. static void
  173. gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
  174.                      const struct ilo_rasterizer_state *rasterizer,
  175.                      const struct pipe_surface *zs_surf,
  176.                      struct ilo_cp *cp)
  177. {
  178.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
  179.    const uint8_t cmd_len = 7;
  180.    const int num_samples = 1;
  181.    uint32_t payload[6];
  182.  
  183.    ILO_GPE_VALID_GEN(dev, 7, 7);
  184.  
  185.    ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
  186.          rasterizer, num_samples,
  187.          (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
  188.          payload, Elements(payload));
  189.  
  190.    ilo_cp_begin(cp, cmd_len);
  191.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  192.    ilo_cp_write_multi(cp, payload, 6);
  193.    ilo_cp_end(cp);
  194. }
  195.  
  196. void
  197. ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
  198.                                 const struct pipe_rasterizer_state *state,
  199.                                 struct ilo_rasterizer_wm *wm)
  200. {
  201.    uint32_t dw1, dw2;
  202.  
  203.    ILO_GPE_VALID_GEN(dev, 7, 7);
  204.  
  205.    dw1 = GEN7_WM_POSITION_ZW_PIXEL |
  206.          GEN7_WM_LINE_AA_WIDTH_2_0 |
  207.          GEN7_WM_MSRAST_OFF_PIXEL;
  208.  
  209.    /* same value as in 3DSTATE_SF */
  210.    if (state->line_smooth)
  211.       dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
  212.  
  213.    if (state->poly_stipple_enable)
  214.       dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
  215.    if (state->line_stipple_enable)
  216.       dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
  217.  
  218.    if (state->bottom_edge_rule)
  219.       dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
  220.  
  221.    dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
  222.  
  223.    /*
  224.     * assertion that makes sure
  225.     *
  226.     *   dw1 |= wm->dw_msaa_rast;
  227.     *   dw2 |= wm->dw_msaa_disp;
  228.     *
  229.     * is valid
  230.     */
  231.    STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 &&
  232.                  GEN7_WM_MSDISPMODE_PERSAMPLE == 0);
  233.  
  234.    wm->dw_msaa_rast =
  235.       (state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0;
  236.    wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL;
  237.  
  238.    STATIC_ASSERT(Elements(wm->payload) >= 2);
  239.    wm->payload[0] = dw1;
  240.    wm->payload[1] = dw2;
  241. }
  242.  
  243. void
  244. ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
  245.                          const struct ilo_shader_state *fs,
  246.                          struct ilo_shader_cso *cso)
  247. {
  248.    int start_grf, max_threads;
  249.    uint32_t dw2, dw4, dw5;
  250.    uint32_t wm_interps, wm_dw1;
  251.  
  252.    ILO_GPE_VALID_GEN(dev, 7, 7);
  253.  
  254.    start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
  255.    /* see brwCreateContext() */
  256.    max_threads = (dev->gt == 2) ? 172 : 48;
  257.  
  258.    dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT;
  259.  
  260.    dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
  261.          GEN7_PS_POSOFFSET_NONE;
  262.  
  263.    if (false)
  264.       dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
  265.  
  266.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
  267.       dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
  268.  
  269.    assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
  270.    dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
  271.  
  272.    dw5 = start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
  273.          0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
  274.          0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
  275.  
  276.    /* FS affects 3DSTATE_WM too */
  277.    wm_dw1 = 0;
  278.  
  279.    /*
  280.     * TODO set this bit only when
  281.     *
  282.     *  a) fs writes colors and color is not masked, or
  283.     *  b) fs writes depth, or
  284.     *  c) fs or cc kills
  285.     */
  286.    wm_dw1 |= GEN7_WM_DISPATCH_ENABLE;
  287.  
  288.    /*
  289.     * From the Ivy Bridge PRM, volume 2 part 1, page 278:
  290.     *
  291.     *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
  292.     *      the PS kernel or color calculator has the ability to kill
  293.     *      (discard) pixels or samples, other than due to depth or stencil
  294.     *      testing. This bit is required to be ENABLED in the following
  295.     *      situations:
  296.     *
  297.     *      - The API pixel shader program contains "killpix" or "discard"
  298.     *        instructions, or other code in the pixel shader kernel that
  299.     *        can cause the final pixel mask to differ from the pixel mask
  300.     *        received on dispatch.
  301.     *
  302.     *      - A sampler with chroma key enabled with kill pixel mode is used
  303.     *        by the pixel shader.
  304.     *
  305.     *      - Any render target has Alpha Test Enable or AlphaToCoverage
  306.     *        Enable enabled.
  307.     *
  308.     *      - The pixel shader kernel generates and outputs oMask.
  309.     *
  310.     *      Note: As ClipDistance clipping is fully supported in hardware
  311.     *      and therefore not via PS instructions, there should be no need
  312.     *      to ENABLE this bit due to ClipDistance clipping."
  313.     */
  314.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
  315.       wm_dw1 |= GEN7_WM_KILL_ENABLE;
  316.  
  317.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
  318.       wm_dw1 |= GEN7_WM_PSCDEPTH_ON;
  319.  
  320.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
  321.       wm_dw1 |= GEN7_WM_USES_SOURCE_DEPTH;
  322.  
  323.    if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
  324.       wm_dw1 |= GEN7_WM_USES_SOURCE_W;
  325.  
  326.    wm_interps = ilo_shader_get_kernel_param(fs,
  327.          ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
  328.  
  329.    wm_dw1 |= wm_interps << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
  330.  
  331.    STATIC_ASSERT(Elements(cso->payload) >= 4);
  332.    cso->payload[0] = dw2;
  333.    cso->payload[1] = dw4;
  334.    cso->payload[2] = dw5;
  335.    cso->payload[3] = wm_dw1;
  336. }
  337.  
  338. static void
  339. gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
  340.                      const struct ilo_shader_state *fs,
  341.                      const struct ilo_rasterizer_state *rasterizer,
  342.                      bool cc_may_kill,
  343.                      struct ilo_cp *cp)
  344. {
  345.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
  346.    const uint8_t cmd_len = 3;
  347.    const int num_samples = 1;
  348.    uint32_t dw1, dw2;
  349.  
  350.    ILO_GPE_VALID_GEN(dev, 7, 7);
  351.  
  352.    /* see ilo_gpe_init_rasterizer_wm() */
  353.    dw1 = rasterizer->wm.payload[0];
  354.    dw2 = rasterizer->wm.payload[1];
  355.  
  356.    dw1 |= GEN7_WM_STATISTICS_ENABLE;
  357.  
  358.    if (false) {
  359.       dw1 |= GEN7_WM_DEPTH_CLEAR;
  360.       dw1 |= GEN7_WM_DEPTH_RESOLVE;
  361.       dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
  362.    }
  363.  
  364.    if (fs) {
  365.       const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
  366.  
  367.       dw1 |= fs_cso->payload[3];
  368.    }
  369.  
  370.    if (cc_may_kill) {
  371.       dw1 |= GEN7_WM_DISPATCH_ENABLE |
  372.              GEN7_WM_KILL_ENABLE;
  373.    }
  374.  
  375.    if (num_samples > 1) {
  376.       dw1 |= rasterizer->wm.dw_msaa_rast;
  377.       dw2 |= rasterizer->wm.dw_msaa_disp;
  378.    }
  379.  
  380.    ilo_cp_begin(cp, cmd_len);
  381.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  382.    ilo_cp_write(cp, dw1);
  383.    ilo_cp_write(cp, dw2);
  384.    ilo_cp_end(cp);
  385. }
  386.  
  387. static void
  388. gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
  389.                            int subop,
  390.                            const uint32_t *bufs, const int *sizes,
  391.                            int num_bufs,
  392.                            struct ilo_cp *cp)
  393. {
  394.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
  395.    const uint8_t cmd_len = 7;
  396.    uint32_t dw[6];
  397.    int total_read_length, i;
  398.  
  399.    ILO_GPE_VALID_GEN(dev, 7, 7);
  400.  
  401.    /* VS, HS, DS, GS, and PS variants */
  402.    assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
  403.  
  404.    assert(num_bufs <= 4);
  405.  
  406.    dw[0] = 0;
  407.    dw[1] = 0;
  408.  
  409.    total_read_length = 0;
  410.    for (i = 0; i < 4; i++) {
  411.       int read_len;
  412.  
  413.       /*
  414.        * From the Ivy Bridge PRM, volume 2 part 1, page 112:
  415.        *
  416.        *     "Constant buffers must be enabled in order from Constant Buffer 0
  417.        *      to Constant Buffer 3 within this command.  For example, it is
  418.        *      not allowed to enable Constant Buffer 1 by programming a
  419.        *      non-zero value in the VS Constant Buffer 1 Read Length without a
  420.        *      non-zero value in VS Constant Buffer 0 Read Length."
  421.        */
  422.       if (i >= num_bufs || !sizes[i]) {
  423.          for (; i < 4; i++) {
  424.             assert(i >= num_bufs || !sizes[i]);
  425.             dw[2 + i] = 0;
  426.          }
  427.          break;
  428.       }
  429.  
  430.       /* read lengths are in 256-bit units */
  431.       read_len = (sizes[i] + 31) / 32;
  432.       /* the lower 5 bits are used for memory object control state */
  433.       assert(bufs[i] % 32 == 0);
  434.  
  435.       dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
  436.       dw[2 + i] = bufs[i];
  437.  
  438.       total_read_length += read_len;
  439.    }
  440.  
  441.    /*
  442.     * From the Ivy Bridge PRM, volume 2 part 1, page 113:
  443.     *
  444.     *     "The sum of all four read length fields must be less than or equal
  445.     *      to the size of 64"
  446.     */
  447.    assert(total_read_length <= 64);
  448.  
  449.    ilo_cp_begin(cp, cmd_len);
  450.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  451.    ilo_cp_write_multi(cp, dw, 6);
  452.    ilo_cp_end(cp);
  453. }
  454.  
  455. static void
  456. gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
  457.                               const uint32_t *bufs, const int *sizes,
  458.                               int num_bufs,
  459.                               struct ilo_cp *cp)
  460. {
  461.    gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
  462. }
  463.  
  464. static void
  465. gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
  466.                               const uint32_t *bufs, const int *sizes,
  467.                               int num_bufs,
  468.                               struct ilo_cp *cp)
  469. {
  470.    gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
  471. }
  472.  
  473. static void
  474. gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
  475.                               const uint32_t *bufs, const int *sizes,
  476.                               int num_bufs,
  477.                               struct ilo_cp *cp)
  478. {
  479.    gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
  480. }
  481.  
  482. static void
  483. gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
  484.                               unsigned sample_mask,
  485.                               int num_samples,
  486.                               struct ilo_cp *cp)
  487. {
  488.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
  489.    const uint8_t cmd_len = 2;
  490.    const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
  491.  
  492.    ILO_GPE_VALID_GEN(dev, 7, 7);
  493.  
  494.    /*
  495.     * From the Ivy Bridge PRM, volume 2 part 1, page 294:
  496.     *
  497.     *     "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
  498.     *      (Sample Mask) must be zero.
  499.     *
  500.     *      If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
  501.     *      must be zero."
  502.     */
  503.    sample_mask &= valid_mask;
  504.  
  505.    ilo_cp_begin(cp, cmd_len);
  506.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  507.    ilo_cp_write(cp, sample_mask);
  508.    ilo_cp_end(cp);
  509. }
  510.  
  511. static void
  512. gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
  513.                               const uint32_t *bufs, const int *sizes,
  514.                               int num_bufs,
  515.                               struct ilo_cp *cp)
  516. {
  517.    gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
  518. }
  519.  
  520. static void
  521. gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
  522.                               const uint32_t *bufs, const int *sizes,
  523.                               int num_bufs,
  524.                               struct ilo_cp *cp)
  525. {
  526.    gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
  527. }
  528.  
  529. static void
  530. gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
  531.                      const struct ilo_shader_state *hs,
  532.                      int num_samplers,
  533.                      struct ilo_cp *cp)
  534. {
  535.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
  536.    const uint8_t cmd_len = 7;
  537.  
  538.    ILO_GPE_VALID_GEN(dev, 7, 7);
  539.  
  540.    assert(!hs);
  541.  
  542.    ilo_cp_begin(cp, cmd_len);
  543.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  544.    ilo_cp_write(cp, 0);
  545.    ilo_cp_write(cp, 0);
  546.    ilo_cp_write(cp, 0);
  547.    ilo_cp_write(cp, 0);
  548.    ilo_cp_write(cp, 0);
  549.    ilo_cp_write(cp, 0);
  550.    ilo_cp_end(cp);
  551. }
  552.  
  553. static void
  554. gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
  555.                      struct ilo_cp *cp)
  556. {
  557.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
  558.    const uint8_t cmd_len = 4;
  559.  
  560.    ILO_GPE_VALID_GEN(dev, 7, 7);
  561.  
  562.    ilo_cp_begin(cp, cmd_len);
  563.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  564.    ilo_cp_write(cp, 0);
  565.    ilo_cp_write(cp, 0);
  566.    ilo_cp_write(cp, 0);
  567.    ilo_cp_end(cp);
  568. }
  569.  
  570. static void
  571. gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
  572.                      const struct ilo_shader_state *ds,
  573.                      int num_samplers,
  574.                      struct ilo_cp *cp)
  575. {
  576.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
  577.    const uint8_t cmd_len = 6;
  578.  
  579.    ILO_GPE_VALID_GEN(dev, 7, 7);
  580.  
  581.    assert(!ds);
  582.  
  583.    ilo_cp_begin(cp, cmd_len);
  584.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  585.    ilo_cp_write(cp, 0);
  586.    ilo_cp_write(cp, 0);
  587.    ilo_cp_write(cp, 0);
  588.    ilo_cp_write(cp, 0);
  589.    ilo_cp_write(cp, 0);
  590.    ilo_cp_end(cp);
  591.  
  592. }
  593.  
  594. static void
  595. gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
  596.                             unsigned buffer_mask,
  597.                             int vertex_attrib_count,
  598.                             bool rasterizer_discard,
  599.                             struct ilo_cp *cp)
  600. {
  601.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
  602.    const uint8_t cmd_len = 3;
  603.    const bool enable = (buffer_mask != 0);
  604.    uint32_t dw1, dw2;
  605.    int read_len;
  606.  
  607.    ILO_GPE_VALID_GEN(dev, 7, 7);
  608.  
  609.    if (!enable) {
  610.       dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
  611.       if (rasterizer_discard)
  612.          dw1 |= SO_RENDERING_DISABLE;
  613.  
  614.       dw2 = 0;
  615.  
  616.       ilo_cp_begin(cp, cmd_len);
  617.       ilo_cp_write(cp, cmd | (cmd_len - 2));
  618.       ilo_cp_write(cp, dw1);
  619.       ilo_cp_write(cp, dw2);
  620.       ilo_cp_end(cp);
  621.       return;
  622.    }
  623.  
  624.    read_len = (vertex_attrib_count + 1) / 2;
  625.    if (!read_len)
  626.       read_len = 1;
  627.  
  628.    dw1 = SO_FUNCTION_ENABLE |
  629.          0 << SO_RENDER_STREAM_SELECT_SHIFT |
  630.          SO_STATISTICS_ENABLE |
  631.          buffer_mask << 8;
  632.  
  633.    if (rasterizer_discard)
  634.       dw1 |= SO_RENDERING_DISABLE;
  635.  
  636.    /* API_OPENGL */
  637.    if (true)
  638.       dw1 |= SO_REORDER_TRAILING;
  639.  
  640.    dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
  641.          0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
  642.          0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
  643.          0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
  644.          0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
  645.          0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
  646.          0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
  647.          (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
  648.  
  649.    ilo_cp_begin(cp, cmd_len);
  650.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  651.    ilo_cp_write(cp, dw1);
  652.    ilo_cp_write(cp, dw2);
  653.    ilo_cp_end(cp);
  654. }
  655.  
  656. static void
  657. gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
  658.                       const struct ilo_rasterizer_state *rasterizer,
  659.                       const struct ilo_shader_state *fs,
  660.                       const struct ilo_shader_state *last_sh,
  661.                       struct ilo_cp *cp)
  662. {
  663.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
  664.    const uint8_t cmd_len = 14;
  665.    uint32_t dw[13];
  666.  
  667.    ILO_GPE_VALID_GEN(dev, 7, 7);
  668.  
  669.    ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
  670.          fs, last_sh, dw, Elements(dw));
  671.  
  672.    ilo_cp_begin(cp, cmd_len);
  673.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  674.    ilo_cp_write_multi(cp, dw, 13);
  675.    ilo_cp_end(cp);
  676. }
  677.  
  678. static void
  679. gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
  680.                      const struct ilo_shader_state *fs,
  681.                      int num_samplers, bool dual_blend,
  682.                      struct ilo_cp *cp)
  683. {
  684.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
  685.    const uint8_t cmd_len = 8;
  686.    const struct ilo_shader_cso *cso;
  687.    uint32_t dw2, dw4, dw5;
  688.  
  689.    ILO_GPE_VALID_GEN(dev, 7, 7);
  690.  
  691.    if (!fs) {
  692.       /* see brwCreateContext() */
  693.       const int max_threads = (dev->gt == 2) ? 172 : 48;
  694.  
  695.       ilo_cp_begin(cp, cmd_len);
  696.       ilo_cp_write(cp, cmd | (cmd_len - 2));
  697.       ilo_cp_write(cp, 0);
  698.       ilo_cp_write(cp, 0);
  699.       ilo_cp_write(cp, 0);
  700.       /* GPU hangs if none of the dispatch enable bits is set */
  701.       ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
  702.                        GEN7_PS_8_DISPATCH_ENABLE);
  703.       ilo_cp_write(cp, 0);
  704.       ilo_cp_write(cp, 0);
  705.       ilo_cp_write(cp, 0);
  706.       ilo_cp_end(cp);
  707.  
  708.       return;
  709.    }
  710.  
  711.    cso = ilo_shader_get_kernel_cso(fs);
  712.    dw2 = cso->payload[0];
  713.    dw4 = cso->payload[1];
  714.    dw5 = cso->payload[2];
  715.  
  716.    dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
  717.  
  718.    if (dual_blend)
  719.       dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
  720.  
  721.    ilo_cp_begin(cp, cmd_len);
  722.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  723.    ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
  724.    ilo_cp_write(cp, dw2);
  725.    ilo_cp_write(cp, 0); /* scratch */
  726.    ilo_cp_write(cp, dw4);
  727.    ilo_cp_write(cp, dw5);
  728.    ilo_cp_write(cp, 0); /* kernel 1 */
  729.    ilo_cp_write(cp, 0); /* kernel 2 */
  730.    ilo_cp_end(cp);
  731. }
  732.  
  733. static void
  734. gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
  735.                                                   uint32_t sf_clip_viewport,
  736.                                                   struct ilo_cp *cp)
  737. {
  738.    gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
  739. }
  740.  
  741. static void
  742. gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
  743.                                              uint32_t cc_viewport,
  744.                                              struct ilo_cp *cp)
  745. {
  746.    gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
  747. }
  748.  
  749. static void
  750. gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
  751.                                        uint32_t blend_state,
  752.                                        struct ilo_cp *cp)
  753. {
  754.    gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
  755. }
  756.  
  757. static void
  758. gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
  759.                                                uint32_t depth_stencil_state,
  760.                                                struct ilo_cp *cp)
  761. {
  762.    gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
  763. }
  764.  
  765. static void
  766. gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
  767.                                             uint32_t binding_table,
  768.                                             struct ilo_cp *cp)
  769. {
  770.    gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
  771. }
  772.  
  773. static void
  774. gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
  775.                                             uint32_t binding_table,
  776.                                             struct ilo_cp *cp)
  777. {
  778.    gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
  779. }
  780.  
  781. static void
  782. gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
  783.                                             uint32_t binding_table,
  784.                                             struct ilo_cp *cp)
  785. {
  786.    gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
  787. }
  788.  
  789. static void
  790. gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
  791.                                             uint32_t binding_table,
  792.                                             struct ilo_cp *cp)
  793. {
  794.    gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
  795. }
  796.  
  797. static void
  798. gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
  799.                                             uint32_t binding_table,
  800.                                             struct ilo_cp *cp)
  801. {
  802.    gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
  803. }
  804.  
  805. static void
  806. gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
  807.                                             uint32_t sampler_state,
  808.                                             struct ilo_cp *cp)
  809. {
  810.    gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
  811. }
  812.  
  813. static void
  814. gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
  815.                                             uint32_t sampler_state,
  816.                                             struct ilo_cp *cp)
  817. {
  818.    gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
  819. }
  820.  
  821. static void
  822. gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
  823.                                             uint32_t sampler_state,
  824.                                             struct ilo_cp *cp)
  825. {
  826.    gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
  827. }
  828.  
  829. static void
  830. gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
  831.                                             uint32_t sampler_state,
  832.                                             struct ilo_cp *cp)
  833. {
  834.    gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
  835. }
  836.  
  837. static void
  838. gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
  839.                                             uint32_t sampler_state,
  840.                                             struct ilo_cp *cp)
  841. {
  842.    gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
  843. }
  844.  
  845. static void
  846. gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
  847.                       int subop, int offset, int size,
  848.                       int entry_size,
  849.                       struct ilo_cp *cp)
  850. {
  851.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
  852.    const uint8_t cmd_len = 2;
  853.    const int row_size = 64; /* 512 bits */
  854.    int alloc_size, num_entries, min_entries, max_entries;
  855.  
  856.    ILO_GPE_VALID_GEN(dev, 7, 7);
  857.  
  858.    /* VS, HS, DS, and GS variants */
  859.    assert(subop >= 0x30 && subop <= 0x33);
  860.  
  861.    /* in multiples of 8KB */
  862.    assert(offset % 8192 == 0);
  863.    offset /= 8192;
  864.  
  865.    /* in multiple of 512-bit rows */
  866.    alloc_size = (entry_size + row_size - 1) / row_size;
  867.    if (!alloc_size)
  868.       alloc_size = 1;
  869.  
  870.    /*
  871.     * From the Ivy Bridge PRM, volume 2 part 1, page 34:
  872.     *
  873.     *     "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
  874.     *      cause performance to decrease due to banking in the URB. Element
  875.     *      sizes of 16 to 20 should be programmed with six 512-bit URB rows."
  876.     */
  877.    if (subop == 0x30 && alloc_size == 5)
  878.       alloc_size = 6;
  879.  
  880.    /* in multiples of 8 */
  881.    num_entries = (size / row_size / alloc_size) & ~7;
  882.  
  883.    switch (subop) {
  884.    case 0x30: /* 3DSTATE_URB_VS */
  885.       min_entries = 32;
  886.       max_entries = (dev->gt == 2) ? 704 : 512;
  887.  
  888.       assert(num_entries >= min_entries);
  889.       if (num_entries > max_entries)
  890.          num_entries = max_entries;
  891.       break;
  892.    case 0x31: /* 3DSTATE_URB_HS */
  893.       max_entries = (dev->gt == 2) ? 64 : 32;
  894.       if (num_entries > max_entries)
  895.          num_entries = max_entries;
  896.       break;
  897.    case 0x32: /* 3DSTATE_URB_DS */
  898.       if (num_entries)
  899.          assert(num_entries >= 138);
  900.       break;
  901.    case 0x33: /* 3DSTATE_URB_GS */
  902.       max_entries = (dev->gt == 2) ? 320 : 192;
  903.       if (num_entries > max_entries)
  904.          num_entries = max_entries;
  905.       break;
  906.    default:
  907.       break;
  908.    }
  909.  
  910.    ilo_cp_begin(cp, cmd_len);
  911.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  912.    ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
  913.                     (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
  914.                     num_entries);
  915.    ilo_cp_end(cp);
  916. }
  917.  
  918. static void
  919. gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
  920.                          int offset, int size, int entry_size,
  921.                          struct ilo_cp *cp)
  922. {
  923.    gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
  924. }
  925.  
  926. static void
  927. gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
  928.                          int offset, int size, int entry_size,
  929.                          struct ilo_cp *cp)
  930. {
  931.    gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
  932. }
  933.  
  934. static void
  935. gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
  936.                          int offset, int size, int entry_size,
  937.                          struct ilo_cp *cp)
  938. {
  939.    gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
  940. }
  941.  
  942. static void
  943. gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
  944.                          int offset, int size, int entry_size,
  945.                          struct ilo_cp *cp)
  946. {
  947.    gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
  948. }
  949.  
  950. static void
  951. gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
  952.                                       int subop, int offset, int size,
  953.                                       struct ilo_cp *cp)
  954. {
  955.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
  956.    const uint8_t cmd_len = 2;
  957.    int end;
  958.  
  959.    ILO_GPE_VALID_GEN(dev, 7, 7);
  960.  
  961.    /* VS, HS, DS, GS, and PS variants */
  962.    assert(subop >= 0x12 && subop <= 0x16);
  963.  
  964.    /*
  965.     * From the Ivy Bridge PRM, volume 2 part 1, page 68:
  966.     *
  967.     *     "(A table that says the maximum size of each constant buffer is
  968.     *      16KB")
  969.     *
  970.     * From the Ivy Bridge PRM, volume 2 part 1, page 115:
  971.     *
  972.     *     "The sum of the Constant Buffer Offset and the Constant Buffer Size
  973.     *      may not exceed the maximum value of the Constant Buffer Size."
  974.     *
  975.     * Thus, the valid range of buffer end is [0KB, 16KB].
  976.     */
  977.    end = (offset + size) / 1024;
  978.    if (end > 16) {
  979.       assert(!"invalid constant buffer end");
  980.       end = 16;
  981.    }
  982.  
  983.    /* the valid range of buffer offset is [0KB, 15KB] */
  984.    offset = (offset + 1023) / 1024;
  985.    if (offset > 15) {
  986.       assert(!"invalid constant buffer offset");
  987.       offset = 15;
  988.    }
  989.  
  990.    if (offset > end) {
  991.       assert(!size);
  992.       offset = end;
  993.    }
  994.  
  995.    /* the valid range of buffer size is [0KB, 15KB] */
  996.    size = end - offset;
  997.    if (size > 15) {
  998.       assert(!"invalid constant buffer size");
  999.       size = 15;
  1000.    }
  1001.  
  1002.    ilo_cp_begin(cp, cmd_len);
  1003.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1004.    ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
  1005.                     size);
  1006.    ilo_cp_end(cp);
  1007. }
  1008.  
  1009. static void
  1010. gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
  1011.                                          int offset, int size,
  1012.                                          struct ilo_cp *cp)
  1013. {
  1014.    gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
  1015. }
  1016.  
  1017. static void
  1018. gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
  1019.                                          int offset, int size,
  1020.                                          struct ilo_cp *cp)
  1021. {
  1022.    gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
  1023. }
  1024.  
  1025. static void
  1026. gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
  1027.                                          int offset, int size,
  1028.                                          struct ilo_cp *cp)
  1029. {
  1030.    gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
  1031. }
  1032.  
  1033. static void
  1034. gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
  1035.                                          int offset, int size,
  1036.                                          struct ilo_cp *cp)
  1037. {
  1038.    gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
  1039. }
  1040.  
  1041. static void
  1042. gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
  1043.                                          int offset, int size,
  1044.                                          struct ilo_cp *cp)
  1045. {
  1046.    gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
  1047. }
  1048.  
  1049. static void
  1050. gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
  1051.                                const struct pipe_stream_output_info *so_info,
  1052.                                struct ilo_cp *cp)
  1053. {
  1054.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
  1055.    uint16_t cmd_len;
  1056.    int buffer_selects, num_entries, i;
  1057.    uint16_t so_decls[128];
  1058.  
  1059.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1060.  
  1061.    buffer_selects = 0;
  1062.    num_entries = 0;
  1063.  
  1064.    if (so_info) {
  1065.       int buffer_offsets[PIPE_MAX_SO_BUFFERS];
  1066.  
  1067.       memset(buffer_offsets, 0, sizeof(buffer_offsets));
  1068.  
  1069.       for (i = 0; i < so_info->num_outputs; i++) {
  1070.          unsigned decl, buf, reg, mask;
  1071.  
  1072.          buf = so_info->output[i].output_buffer;
  1073.  
  1074.          /* pad with holes */
  1075.          assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
  1076.          while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
  1077.             int num_dwords;
  1078.  
  1079.             num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
  1080.             if (num_dwords > 4)
  1081.                num_dwords = 4;
  1082.  
  1083.             decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
  1084.                    SO_DECL_HOLE_FLAG |
  1085.                    ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
  1086.  
  1087.             so_decls[num_entries++] = decl;
  1088.             buffer_offsets[buf] += num_dwords;
  1089.          }
  1090.  
  1091.          reg = so_info->output[i].register_index;
  1092.          mask = ((1 << so_info->output[i].num_components) - 1) <<
  1093.             so_info->output[i].start_component;
  1094.  
  1095.          decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
  1096.                 reg << SO_DECL_REGISTER_INDEX_SHIFT |
  1097.                 mask << SO_DECL_COMPONENT_MASK_SHIFT;
  1098.  
  1099.          so_decls[num_entries++] = decl;
  1100.          buffer_selects |= 1 << buf;
  1101.          buffer_offsets[buf] += so_info->output[i].num_components;
  1102.       }
  1103.    }
  1104.  
  1105.    /*
  1106.     * From the Ivy Bridge PRM, volume 2 part 1, page 201:
  1107.     *
  1108.     *     "Errata: All 128 decls for all four streams must be included
  1109.     *      whenever this command is issued. The "Num Entries [n]" fields still
  1110.     *      contain the actual numbers of valid decls."
  1111.     *
  1112.     * Also note that "DWord Length" has 9 bits for this command, and the type
  1113.     * of cmd_len is thus uint16_t.
  1114.     */
  1115.    cmd_len = 2 * 128 + 3;
  1116.  
  1117.    ilo_cp_begin(cp, cmd_len);
  1118.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1119.    ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
  1120.                     0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
  1121.                     0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
  1122.                     buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
  1123.    ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
  1124.                     0 << SO_NUM_ENTRIES_2_SHIFT |
  1125.                     0 << SO_NUM_ENTRIES_1_SHIFT |
  1126.                     num_entries << SO_NUM_ENTRIES_0_SHIFT);
  1127.  
  1128.    for (i = 0; i < num_entries; i++) {
  1129.       ilo_cp_write(cp, so_decls[i]);
  1130.       ilo_cp_write(cp, 0);
  1131.    }
  1132.    for (; i < 128; i++) {
  1133.       ilo_cp_write(cp, 0);
  1134.       ilo_cp_write(cp, 0);
  1135.    }
  1136.  
  1137.    ilo_cp_end(cp);
  1138. }
  1139.  
  1140. static void
  1141. gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
  1142.                             int index, int base, int stride,
  1143.                             const struct pipe_stream_output_target *so_target,
  1144.                             struct ilo_cp *cp)
  1145. {
  1146.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
  1147.    const uint8_t cmd_len = 4;
  1148.    struct ilo_buffer *buf;
  1149.    int end;
  1150.  
  1151.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1152.  
  1153.    if (!so_target || !so_target->buffer) {
  1154.       ilo_cp_begin(cp, cmd_len);
  1155.       ilo_cp_write(cp, cmd | (cmd_len - 2));
  1156.       ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
  1157.       ilo_cp_write(cp, 0);
  1158.       ilo_cp_write(cp, 0);
  1159.       ilo_cp_end(cp);
  1160.       return;
  1161.    }
  1162.  
  1163.    buf = ilo_buffer(so_target->buffer);
  1164.  
  1165.    /* DWord-aligned */
  1166.    assert(stride % 4 == 0 && base % 4 == 0);
  1167.    assert(so_target->buffer_offset % 4 == 0);
  1168.  
  1169.    stride &= ~3;
  1170.    base = (base + so_target->buffer_offset) & ~3;
  1171.    end = (base + so_target->buffer_size) & ~3;
  1172.  
  1173.    ilo_cp_begin(cp, cmd_len);
  1174.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1175.    ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
  1176.                     stride);
  1177.    ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
  1178.    ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
  1179.    ilo_cp_end(cp);
  1180. }
  1181.  
  1182. static void
  1183. gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
  1184.                       const struct pipe_draw_info *info,
  1185.                       const struct ilo_ib_state *ib,
  1186.                       bool rectlist,
  1187.                       struct ilo_cp *cp)
  1188. {
  1189.    const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
  1190.    const uint8_t cmd_len = 7;
  1191.    const int prim = (rectlist) ?
  1192.       _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
  1193.    const int vb_access = (info->indexed) ?
  1194.       GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
  1195.       GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
  1196.    const uint32_t vb_start = info->start +
  1197.       ((info->indexed) ? ib->draw_start_offset : 0);
  1198.  
  1199.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1200.  
  1201.    ilo_cp_begin(cp, cmd_len);
  1202.    ilo_cp_write(cp, cmd | (cmd_len - 2));
  1203.    ilo_cp_write(cp, vb_access | prim);
  1204.    ilo_cp_write(cp, info->count);
  1205.    ilo_cp_write(cp, vb_start);
  1206.    ilo_cp_write(cp, info->instance_count);
  1207.    ilo_cp_write(cp, info->start_instance);
  1208.    ilo_cp_write(cp, info->index_bias);
  1209.    ilo_cp_end(cp);
  1210. }
  1211.  
  1212. static uint32_t
  1213. gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
  1214.                            const struct ilo_viewport_cso *viewports,
  1215.                            unsigned num_viewports,
  1216.                            struct ilo_cp *cp)
  1217. {
  1218.    const int state_align = 64 / 4;
  1219.    const int state_len = 16 * num_viewports;
  1220.    uint32_t state_offset, *dw;
  1221.    unsigned i;
  1222.  
  1223.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1224.  
  1225.    /*
  1226.     * From the Ivy Bridge PRM, volume 2 part 1, page 270:
  1227.     *
  1228.     *     "The viewport-specific state used by both the SF and CL units
  1229.     *      (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
  1230.     *      of which contains the DWords described below. The start of each
  1231.     *      element is spaced 16 DWords apart. The location of first element of
  1232.     *      the array, as specified by both Pointer to SF_VIEWPORT and Pointer
  1233.     *      to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
  1234.     */
  1235.    assert(num_viewports && num_viewports <= 16);
  1236.  
  1237.    dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
  1238.          state_len, state_align, &state_offset);
  1239.  
  1240.    for (i = 0; i < num_viewports; i++) {
  1241.       const struct ilo_viewport_cso *vp = &viewports[i];
  1242.  
  1243.       dw[0] = fui(vp->m00);
  1244.       dw[1] = fui(vp->m11);
  1245.       dw[2] = fui(vp->m22);
  1246.       dw[3] = fui(vp->m30);
  1247.       dw[4] = fui(vp->m31);
  1248.       dw[5] = fui(vp->m32);
  1249.       dw[6] = 0;
  1250.       dw[7] = 0;
  1251.       dw[8] = fui(vp->min_gbx);
  1252.       dw[9] = fui(vp->max_gbx);
  1253.       dw[10] = fui(vp->min_gby);
  1254.       dw[11] = fui(vp->max_gby);
  1255.       dw[12] = 0;
  1256.       dw[13] = 0;
  1257.       dw[14] = 0;
  1258.       dw[15] = 0;
  1259.  
  1260.       dw += 16;
  1261.    }
  1262.  
  1263.    return state_offset;
  1264. }
  1265.  
  1266. void
  1267. ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
  1268.                                     unsigned width, unsigned height,
  1269.                                     unsigned depth, unsigned level,
  1270.                                     struct ilo_view_surface *surf)
  1271. {
  1272.    uint32_t *dw;
  1273.  
  1274.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1275.  
  1276.    /*
  1277.     * From the Ivy Bridge PRM, volume 4 part 1, page 62:
  1278.     *
  1279.     *     "A null surface is used in instances where an actual surface is not
  1280.     *      bound. When a write message is generated to a null surface, no
  1281.     *      actual surface is written to. When a read message (including any
  1282.     *      sampling engine message) is generated to a null surface, the result
  1283.     *      is all zeros.  Note that a null surface type is allowed to be used
  1284.     *      with all messages, even if it is not specificially indicated as
  1285.     *      supported. All of the remaining fields in surface state are ignored
  1286.     *      for null surfaces, with the following exceptions:
  1287.     *
  1288.     *      * Width, Height, Depth, LOD, and Render Target View Extent fields
  1289.     *        must match the depth buffer's corresponding state for all render
  1290.     *        target surfaces, including null.
  1291.     *      * All sampling engine and data port messages support null surfaces
  1292.     *        with the above behavior, even if not mentioned as specifically
  1293.     *        supported, except for the following:
  1294.     *        * Data Port Media Block Read/Write messages.
  1295.     *      * The Surface Type of a surface used as a render target (accessed
  1296.     *        via the Data Port's Render Target Write message) must be the same
  1297.     *        as the Surface Type of all other render targets and of the depth
  1298.     *        buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
  1299.     *        buffer or render targets are SURFTYPE_NULL."
  1300.     *
  1301.     * From the Ivy Bridge PRM, volume 4 part 1, page 65:
  1302.     *
  1303.     *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
  1304.     *      true"
  1305.     */
  1306.  
  1307.    STATIC_ASSERT(Elements(surf->payload) >= 8);
  1308.    dw = surf->payload;
  1309.  
  1310.    dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
  1311.            BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
  1312.            BRW_SURFACE_TILED << 13;
  1313.  
  1314.    dw[1] = 0;
  1315.  
  1316.    dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
  1317.            SET_FIELD(width  - 1, GEN7_SURFACE_WIDTH);
  1318.  
  1319.    dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
  1320.  
  1321.    dw[4] = 0;
  1322.    dw[5] = level;
  1323.  
  1324.    dw[6] = 0;
  1325.    dw[7] = 0;
  1326.  
  1327.    surf->bo = NULL;
  1328. }
  1329.  
  1330. void
  1331. ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
  1332.                                           const struct ilo_buffer *buf,
  1333.                                           unsigned offset, unsigned size,
  1334.                                           unsigned struct_size,
  1335.                                           enum pipe_format elem_format,
  1336.                                           bool is_rt, bool render_cache_rw,
  1337.                                           struct ilo_view_surface *surf)
  1338. {
  1339.    const bool typed = (elem_format != PIPE_FORMAT_NONE);
  1340.    const bool structured = (!typed && struct_size > 1);
  1341.    const int elem_size = (typed) ?
  1342.       util_format_get_blocksize(elem_format) : 1;
  1343.    int width, height, depth, pitch;
  1344.    int surface_type, surface_format, num_entries;
  1345.    uint32_t *dw;
  1346.  
  1347.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1348.  
  1349.    surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
  1350.  
  1351.    surface_format = (typed) ?
  1352.       ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
  1353.  
  1354.    num_entries = size / struct_size;
  1355.    /* see if there is enough space to fit another element */
  1356.    if (size % struct_size >= elem_size && !structured)
  1357.       num_entries++;
  1358.  
  1359.    /*
  1360.     * From the Ivy Bridge PRM, volume 4 part 1, page 67:
  1361.     *
  1362.     *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
  1363.     *      Address) specifies the base address of first element of the
  1364.     *      surface. The surface is interpreted as a simple array of that
  1365.     *      single element type. The address must be naturally-aligned to the
  1366.     *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
  1367.     *      must be 16-byte aligned)
  1368.     *
  1369.     *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
  1370.     *      the base address of the first element of the surface, computed in
  1371.     *      software by adding the surface base address to the byte offset of
  1372.     *      the element in the buffer."
  1373.     */
  1374.    if (is_rt)
  1375.       assert(offset % elem_size == 0);
  1376.  
  1377.    /*
  1378.     * From the Ivy Bridge PRM, volume 4 part 1, page 68:
  1379.     *
  1380.     *     "For typed buffer and structured buffer surfaces, the number of
  1381.     *      entries in the buffer ranges from 1 to 2^27.  For raw buffer
  1382.     *      surfaces, the number of entries in the buffer is the number of
  1383.     *      bytes which can range from 1 to 2^30."
  1384.     */
  1385.    assert(num_entries >= 1 &&
  1386.           num_entries <= 1 << ((typed || structured) ? 27 : 30));
  1387.  
  1388.    /*
  1389.     * From the Ivy Bridge PRM, volume 4 part 1, page 69:
  1390.     *
  1391.     *     "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
  1392.     *      11 if the Surface Format is RAW (the size of the buffer must be a
  1393.     *      multiple of 4 bytes)."
  1394.     *
  1395.     * From the Ivy Bridge PRM, volume 4 part 1, page 70:
  1396.     *
  1397.     *     "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
  1398.     *      field (Surface Pitch) indicates the size of the structure."
  1399.     *
  1400.     *     "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
  1401.     *      must be a multiple of 4 bytes."
  1402.     */
  1403.    if (structured)
  1404.       assert(struct_size % 4 == 0);
  1405.    else if (!typed)
  1406.       assert(num_entries % 4 == 0);
  1407.  
  1408.    pitch = struct_size;
  1409.  
  1410.    pitch--;
  1411.    num_entries--;
  1412.    /* bits [6:0] */
  1413.    width  = (num_entries & 0x0000007f);
  1414.    /* bits [20:7] */
  1415.    height = (num_entries & 0x001fff80) >> 7;
  1416.    /* bits [30:21] */
  1417.    depth  = (num_entries & 0x7fe00000) >> 21;
  1418.    /* limit to [26:21] */
  1419.    if (typed || structured)
  1420.       depth &= 0x3f;
  1421.  
  1422.    STATIC_ASSERT(Elements(surf->payload) >= 8);
  1423.    dw = surf->payload;
  1424.  
  1425.    dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
  1426.            surface_format << BRW_SURFACE_FORMAT_SHIFT;
  1427.    if (render_cache_rw)
  1428.       dw[0] |= BRW_SURFACE_RC_READ_WRITE;
  1429.  
  1430.    dw[1] = offset;
  1431.  
  1432.    dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
  1433.            SET_FIELD(width, GEN7_SURFACE_WIDTH);
  1434.  
  1435.    dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
  1436.            pitch;
  1437.  
  1438.    dw[4] = 0;
  1439.    dw[5] = 0;
  1440.  
  1441.    dw[6] = 0;
  1442.    dw[7] = 0;
  1443.  
  1444.    /* do not increment reference count */
  1445.    surf->bo = buf->bo;
  1446. }
  1447.  
  1448. void
  1449. ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
  1450.                                            const struct ilo_texture *tex,
  1451.                                            enum pipe_format format,
  1452.                                            unsigned first_level,
  1453.                                            unsigned num_levels,
  1454.                                            unsigned first_layer,
  1455.                                            unsigned num_layers,
  1456.                                            bool is_rt, bool render_cache_rw,
  1457.                                            struct ilo_view_surface *surf)
  1458. {
  1459.    int surface_type, surface_format;
  1460.    int width, height, depth, pitch, lod;
  1461.    unsigned layer_offset, x_offset, y_offset;
  1462.    uint32_t *dw;
  1463.  
  1464.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1465.  
  1466.    surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
  1467.    assert(surface_type != BRW_SURFACE_BUFFER);
  1468.  
  1469.    if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
  1470.       format = PIPE_FORMAT_Z32_FLOAT;
  1471.  
  1472.    if (is_rt)
  1473.       surface_format = ilo_translate_render_format(format);
  1474.    else
  1475.       surface_format = ilo_translate_texture_format(format);
  1476.    assert(surface_format >= 0);
  1477.  
  1478.    width = tex->base.width0;
  1479.    height = tex->base.height0;
  1480.    depth = (tex->base.target == PIPE_TEXTURE_3D) ?
  1481.       tex->base.depth0 : num_layers;
  1482.    pitch = tex->bo_stride;
  1483.  
  1484.    if (surface_type == BRW_SURFACE_CUBE) {
  1485.       /*
  1486.        * From the Ivy Bridge PRM, volume 4 part 1, page 70:
  1487.        *
  1488.        *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
  1489.        *      this field is [0,340], indicating the number of cube array
  1490.        *      elements (equal to the number of underlying 2D array elements
  1491.        *      divided by 6). For other surfaces, this field must be zero."
  1492.        *
  1493.        * When is_rt is true, we treat the texture as a 2D one to avoid the
  1494.        * restriction.
  1495.        */
  1496.       if (is_rt) {
  1497.          surface_type = BRW_SURFACE_2D;
  1498.       }
  1499.       else {
  1500.          assert(num_layers % 6 == 0);
  1501.          depth = num_layers / 6;
  1502.       }
  1503.    }
  1504.  
  1505.    /* sanity check the size */
  1506.    assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
  1507.    assert(first_layer < 2048 && num_layers <= 2048);
  1508.    switch (surface_type) {
  1509.    case BRW_SURFACE_1D:
  1510.       assert(width <= 16384 && height == 1 && depth <= 2048);
  1511.       break;
  1512.    case BRW_SURFACE_2D:
  1513.       assert(width <= 16384 && height <= 16384 && depth <= 2048);
  1514.       break;
  1515.    case BRW_SURFACE_3D:
  1516.       assert(width <= 2048 && height <= 2048 && depth <= 2048);
  1517.       if (!is_rt)
  1518.          assert(first_layer == 0);
  1519.       break;
  1520.    case BRW_SURFACE_CUBE:
  1521.       assert(width <= 16384 && height <= 16384 && depth <= 86);
  1522.       assert(width == height);
  1523.       if (is_rt)
  1524.          assert(first_layer == 0);
  1525.       break;
  1526.    default:
  1527.       assert(!"unexpected surface type");
  1528.       break;
  1529.    }
  1530.  
  1531.    if (is_rt) {
  1532.       /*
  1533.        * Compute the offset to the layer manually.
  1534.        *
  1535.        * For rendering, the hardware requires LOD to be the same for all
  1536.        * render targets and the depth buffer.  We need to compute the offset
  1537.        * to the layer manually and always set LOD to 0.
  1538.        */
  1539.       if (true) {
  1540.          /* we lose the capability for layered rendering */
  1541.          assert(num_layers == 1);
  1542.  
  1543.          layer_offset = ilo_texture_get_slice_offset(tex,
  1544.                first_level, first_layer, &x_offset, &y_offset);
  1545.  
  1546.          assert(x_offset % 4 == 0);
  1547.          assert(y_offset % 2 == 0);
  1548.          x_offset /= 4;
  1549.          y_offset /= 2;
  1550.  
  1551.          /* derive the size for the LOD */
  1552.          width = u_minify(width, first_level);
  1553.          height = u_minify(height, first_level);
  1554.          if (surface_type == BRW_SURFACE_3D)
  1555.             depth = u_minify(depth, first_level);
  1556.          else
  1557.             depth = 1;
  1558.  
  1559.          first_level = 0;
  1560.          first_layer = 0;
  1561.          lod = 0;
  1562.       }
  1563.       else {
  1564.          layer_offset = 0;
  1565.          x_offset = 0;
  1566.          y_offset = 0;
  1567.       }
  1568.  
  1569.       assert(num_levels == 1);
  1570.       lod = first_level;
  1571.    }
  1572.    else {
  1573.       layer_offset = 0;
  1574.       x_offset = 0;
  1575.       y_offset = 0;
  1576.  
  1577.       lod = num_levels - 1;
  1578.    }
  1579.  
  1580.    /*
  1581.     * From the Ivy Bridge PRM, volume 4 part 1, page 68:
  1582.     *
  1583.     *     "The Base Address for linear render target surfaces and surfaces
  1584.     *      accessed with the typed surface read/write data port messages must
  1585.     *      be element-size aligned, for non-YUV surface formats, or a multiple
  1586.     *      of 2 element-sizes for YUV surface formats.  Other linear surfaces
  1587.     *      have no alignment requirements (byte alignment is sufficient)."
  1588.     *
  1589.     * From the Ivy Bridge PRM, volume 4 part 1, page 70:
  1590.     *
  1591.     *     "For linear render target surfaces and surfaces accessed with the
  1592.     *      typed data port messages, the pitch must be a multiple of the
  1593.     *      element size for non-YUV surface formats. Pitch must be a multiple
  1594.     *      of 2 * element size for YUV surface formats. For linear surfaces
  1595.     *      with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
  1596.     *      of 4 bytes.For other linear surfaces, the pitch can be any multiple
  1597.     *      of bytes."
  1598.     *
  1599.     * From the Ivy Bridge PRM, volume 4 part 1, page 74:
  1600.     *
  1601.     *     "For linear surfaces, this field (X Offset) must be zero."
  1602.     */
  1603.    if (tex->tiling == INTEL_TILING_NONE) {
  1604.       if (is_rt) {
  1605.          const int elem_size = util_format_get_blocksize(format);
  1606.          assert(layer_offset % elem_size == 0);
  1607.          assert(pitch % elem_size == 0);
  1608.       }
  1609.  
  1610.       assert(!x_offset);
  1611.    }
  1612.  
  1613.    STATIC_ASSERT(Elements(surf->payload) >= 8);
  1614.    dw = surf->payload;
  1615.  
  1616.    dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
  1617.            surface_format << BRW_SURFACE_FORMAT_SHIFT |
  1618.            ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
  1619.  
  1620.    /*
  1621.     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
  1622.     *
  1623.     *     "If this field (Surface Array) is enabled, the Surface Type must be
  1624.     *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
  1625.     *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
  1626.     *      SURFTYPE_CUBE, the Depth field must be set to zero."
  1627.     *
  1628.     * For non-3D sampler surfaces, resinfo (the sampler message) always
  1629.     * returns zero for the number of layers when this field is not set.
  1630.     */
  1631.    if (surface_type != BRW_SURFACE_3D) {
  1632.       if (util_resource_is_array_texture(&tex->base))
  1633.          dw[0] |= GEN7_SURFACE_IS_ARRAY;
  1634.       else
  1635.          assert(depth == 1);
  1636.    }
  1637.  
  1638.    if (tex->valign_4)
  1639.       dw[0] |= GEN7_SURFACE_VALIGN_4;
  1640.  
  1641.    if (tex->halign_8)
  1642.       dw[0] |= GEN7_SURFACE_HALIGN_8;
  1643.  
  1644.    if (tex->array_spacing_full)
  1645.       dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
  1646.    else
  1647.       dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
  1648.  
  1649.    if (render_cache_rw)
  1650.       dw[0] |= BRW_SURFACE_RC_READ_WRITE;
  1651.  
  1652.    if (surface_type == BRW_SURFACE_CUBE && !is_rt)
  1653.       dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
  1654.  
  1655.    dw[1] = layer_offset;
  1656.  
  1657.    dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
  1658.            SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
  1659.  
  1660.    dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
  1661.            (pitch - 1);
  1662.  
  1663.    dw[4] = first_layer << 18 |
  1664.            (num_layers - 1) << 7;
  1665.  
  1666.    /*
  1667.     * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
  1668.     * means the samples are interleaved.  The layouts are the same when the
  1669.     * number of samples is 1.
  1670.     */
  1671.    if (tex->interleaved && tex->base.nr_samples > 1) {
  1672.       assert(!is_rt);
  1673.       dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
  1674.    }
  1675.    else {
  1676.       dw[4] |= GEN7_SURFACE_MSFMT_MSS;
  1677.    }
  1678.  
  1679.    if (tex->base.nr_samples > 4)
  1680.       dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
  1681.    else if (tex->base.nr_samples > 2)
  1682.       dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
  1683.    else
  1684.       dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
  1685.  
  1686.    dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
  1687.            y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
  1688.            SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
  1689.            lod;
  1690.  
  1691.    dw[6] = 0;
  1692.    dw[7] = 0;
  1693.  
  1694.    /* do not increment reference count */
  1695.    surf->bo = tex->bo;
  1696. }
  1697.  
  1698. static int
  1699. gen7_estimate_command_size(const struct ilo_dev_info *dev,
  1700.                            enum ilo_gpe_gen7_command cmd,
  1701.                            int arg)
  1702. {
  1703.    static const struct {
  1704.       int header;
  1705.       int body;
  1706.    } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
  1707.       [ILO_GPE_GEN7_STATE_BASE_ADDRESS]                       = { 0,  10 },
  1708.       [ILO_GPE_GEN7_STATE_SIP]                                = { 0,  2  },
  1709.       [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS]                    = { 0,  1  },
  1710.       [ILO_GPE_GEN7_PIPELINE_SELECT]                          = { 0,  1  },
  1711.       [ILO_GPE_GEN7_MEDIA_VFE_STATE]                          = { 0,  8  },
  1712.       [ILO_GPE_GEN7_MEDIA_CURBE_LOAD]                         = { 0,  4  },
  1713.       [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD]          = { 0,  4  },
  1714.       [ILO_GPE_GEN7_MEDIA_STATE_FLUSH]                        = { 0,  2  },
  1715.       [ILO_GPE_GEN7_GPGPU_WALKER]                             = { 0,  11 },
  1716.       [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS]                     = { 0,  3  },
  1717.       [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER]                     = { 0,  7  },
  1718.       [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER]                   = { 0,  3  },
  1719.       [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER]                = { 0,  3  },
  1720.       [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS]                   = { 1,  4  },
  1721.       [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS]                  = { 1,  2  },
  1722.       [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER]                     = { 0,  3  },
  1723.       [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS]                = { 0,  2  },
  1724.       [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS]           = { 0,  2  },
  1725.       [ILO_GPE_GEN7_3DSTATE_VS]                               = { 0,  6  },
  1726.       [ILO_GPE_GEN7_3DSTATE_GS]                               = { 0,  7  },
  1727.       [ILO_GPE_GEN7_3DSTATE_CLIP]                             = { 0,  4  },
  1728.       [ILO_GPE_GEN7_3DSTATE_SF]                               = { 0,  7  },
  1729.       [ILO_GPE_GEN7_3DSTATE_WM]                               = { 0,  3  },
  1730.       [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS]                      = { 0,  7  },
  1731.       [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS]                      = { 0,  7  },
  1732.       [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS]                      = { 0,  7  },
  1733.       [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK]                      = { 0,  2  },
  1734.       [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS]                      = { 0,  7  },
  1735.       [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS]                      = { 0,  7  },
  1736.       [ILO_GPE_GEN7_3DSTATE_HS]                               = { 0,  7  },
  1737.       [ILO_GPE_GEN7_3DSTATE_TE]                               = { 0,  4  },
  1738.       [ILO_GPE_GEN7_3DSTATE_DS]                               = { 0,  6  },
  1739.       [ILO_GPE_GEN7_3DSTATE_STREAMOUT]                        = { 0,  3  },
  1740.       [ILO_GPE_GEN7_3DSTATE_SBE]                              = { 0,  14 },
  1741.       [ILO_GPE_GEN7_3DSTATE_PS]                               = { 0,  8  },
  1742.       [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP]  = { 0,  2  },
  1743.       [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC]       = { 0,  2  },
  1744.       [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS]             = { 0,  2  },
  1745.       [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS]     = { 0,  2  },
  1746.       [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS]        = { 0,  2  },
  1747.       [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS]        = { 0,  2  },
  1748.       [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS]        = { 0,  2  },
  1749.       [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS]        = { 0,  2  },
  1750.       [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS]        = { 0,  2  },
  1751.       [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS]        = { 0,  2  },
  1752.       [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS]        = { 0,  2  },
  1753.       [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS]        = { 0,  2  },
  1754.       [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS]        = { 0,  2  },
  1755.       [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS]        = { 0,  2  },
  1756.       [ILO_GPE_GEN7_3DSTATE_URB_VS]                           = { 0,  2  },
  1757.       [ILO_GPE_GEN7_3DSTATE_URB_HS]                           = { 0,  2  },
  1758.       [ILO_GPE_GEN7_3DSTATE_URB_DS]                           = { 0,  2  },
  1759.       [ILO_GPE_GEN7_3DSTATE_URB_GS]                           = { 0,  2  },
  1760.       [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE]                = { 0,  4  },
  1761.       [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET]              = { 0,  2  },
  1762.       [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN]             = { 0,  33, },
  1763.       [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE]                     = { 0,  3  },
  1764.       [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS]               = { 0,  3  },
  1765.       [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE]                      = { 0,  4  },
  1766.       [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS]           = { 0,  2  },
  1767.       [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS]           = { 0,  2  },
  1768.       [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS]           = { 0,  2  },
  1769.       [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS]           = { 0,  2  },
  1770.       [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS]           = { 0,  2  },
  1771.       [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST]                     = { 3,  2  },
  1772.       [ILO_GPE_GEN7_3DSTATE_SO_BUFFER]                        = { 0,  4  },
  1773.       [ILO_GPE_GEN7_PIPE_CONTROL]                             = { 0,  5  },
  1774.       [ILO_GPE_GEN7_3DPRIMITIVE]                              = { 0,  7  },
  1775.    };
  1776.    const int header = gen7_command_size_table[cmd].header;
  1777.    const int body = gen7_command_size_table[cmd].body;
  1778.    const int count = arg;
  1779.  
  1780.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1781.    assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
  1782.  
  1783.    return (likely(count)) ? header + body * count : 0;
  1784. }
  1785.  
  1786. static int
  1787. gen7_estimate_state_size(const struct ilo_dev_info *dev,
  1788.                          enum ilo_gpe_gen7_state state,
  1789.                          int arg)
  1790. {
  1791.    static const struct {
  1792.       int alignment;
  1793.       int body;
  1794.       bool is_array;
  1795.    } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
  1796.       [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA]          = { 8,  8,  true },
  1797.       [ILO_GPE_GEN7_SF_CLIP_VIEWPORT]                   = { 16, 16, true },
  1798.       [ILO_GPE_GEN7_CC_VIEWPORT]                        = { 8,  2,  true },
  1799.       [ILO_GPE_GEN7_COLOR_CALC_STATE]                   = { 16, 6,  false },
  1800.       [ILO_GPE_GEN7_BLEND_STATE]                        = { 16, 2,  true },
  1801.       [ILO_GPE_GEN7_DEPTH_STENCIL_STATE]                = { 16, 3,  false },
  1802.       [ILO_GPE_GEN7_SCISSOR_RECT]                       = { 8,  2,  true },
  1803.       [ILO_GPE_GEN7_BINDING_TABLE_STATE]                = { 8,  1,  true },
  1804.       [ILO_GPE_GEN7_SURFACE_STATE]                      = { 8,  8,  false },
  1805.       [ILO_GPE_GEN7_SAMPLER_STATE]                      = { 8,  4,  true },
  1806.       [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE]         = { 8,  4,  false },
  1807.       [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER]               = { 8,  1,  true },
  1808.    };
  1809.    const int alignment = gen7_state_size_table[state].alignment;
  1810.    const int body = gen7_state_size_table[state].body;
  1811.    const bool is_array = gen7_state_size_table[state].is_array;
  1812.    const int count = arg;
  1813.    int estimate;
  1814.  
  1815.    ILO_GPE_VALID_GEN(dev, 7, 7);
  1816.    assert(state < ILO_GPE_GEN7_STATE_COUNT);
  1817.  
  1818.    if (likely(count)) {
  1819.       if (is_array) {
  1820.          estimate = (alignment - 1) + body * count;
  1821.       }
  1822.       else {
  1823.          estimate = (alignment - 1) + body;
  1824.          /* all states are aligned */
  1825.          if (count > 1)
  1826.             estimate += util_align_npot(body, alignment) * (count - 1);
  1827.       }
  1828.    }
  1829.    else {
  1830.       estimate = 0;
  1831.    }
  1832.  
  1833.    return estimate;
  1834. }
  1835.  
  1836. static void
  1837. gen7_init(struct ilo_gpe_gen7 *gen7)
  1838. {
  1839.    const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
  1840.  
  1841.    gen7->estimate_command_size = gen7_estimate_command_size;
  1842.    gen7->estimate_state_size = gen7_estimate_state_size;
  1843.  
  1844. #define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
  1845. #define GEN7_SET(gen7, name)       gen7->emit_ ## name = gen7_emit_ ## name
  1846.    GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
  1847.    GEN7_USE(gen7, STATE_SIP, gen6);
  1848.    GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
  1849.    GEN7_USE(gen7, PIPELINE_SELECT, gen6);
  1850.    GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
  1851.    GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
  1852.    GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
  1853.    GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
  1854.    GEN7_SET(gen7, GPGPU_WALKER);
  1855.    GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
  1856.    GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
  1857.    GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
  1858.    GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
  1859.    GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
  1860.    GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
  1861.    GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
  1862.    GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
  1863.    GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
  1864.    GEN7_USE(gen7, 3DSTATE_VS, gen6);
  1865.    GEN7_SET(gen7, 3DSTATE_GS);
  1866.    GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
  1867.    GEN7_SET(gen7, 3DSTATE_SF);
  1868.    GEN7_SET(gen7, 3DSTATE_WM);
  1869.    GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
  1870.    GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
  1871.    GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
  1872.    GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
  1873.    GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
  1874.    GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
  1875.    GEN7_SET(gen7, 3DSTATE_HS);
  1876.    GEN7_SET(gen7, 3DSTATE_TE);
  1877.    GEN7_SET(gen7, 3DSTATE_DS);
  1878.    GEN7_SET(gen7, 3DSTATE_STREAMOUT);
  1879.    GEN7_SET(gen7, 3DSTATE_SBE);
  1880.    GEN7_SET(gen7, 3DSTATE_PS);
  1881.    GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
  1882.    GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
  1883.    GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
  1884.    GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
  1885.    GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
  1886.    GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
  1887.    GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
  1888.    GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
  1889.    GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
  1890.    GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
  1891.    GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
  1892.    GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
  1893.    GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
  1894.    GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
  1895.    GEN7_SET(gen7, 3DSTATE_URB_VS);
  1896.    GEN7_SET(gen7, 3DSTATE_URB_HS);
  1897.    GEN7_SET(gen7, 3DSTATE_URB_DS);
  1898.    GEN7_SET(gen7, 3DSTATE_URB_GS);
  1899.    GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
  1900.    GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
  1901.    GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
  1902.    GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
  1903.    GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
  1904.    GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
  1905.    GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
  1906.    GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
  1907.    GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
  1908.    GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
  1909.    GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
  1910.    GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
  1911.    GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
  1912.    GEN7_USE(gen7, PIPE_CONTROL, gen6);
  1913.    GEN7_SET(gen7, 3DPRIMITIVE);
  1914.    GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
  1915.    GEN7_SET(gen7, SF_CLIP_VIEWPORT);
  1916.    GEN7_USE(gen7, CC_VIEWPORT, gen6);
  1917.    GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
  1918.    GEN7_USE(gen7, BLEND_STATE, gen6);
  1919.    GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
  1920.    GEN7_USE(gen7, SCISSOR_RECT, gen6);
  1921.    GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
  1922.    GEN7_USE(gen7, SURFACE_STATE, gen6);
  1923.    GEN7_USE(gen7, SAMPLER_STATE, gen6);
  1924.    GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
  1925.    GEN7_USE(gen7, push_constant_buffer, gen6);
  1926. #undef GEN7_USE
  1927. #undef GEN7_SET
  1928. }
  1929.  
  1930. static struct ilo_gpe_gen7 gen7_gpe;
  1931.  
  1932. const struct ilo_gpe_gen7 *
  1933. ilo_gpe_gen7_get(void)
  1934. {
  1935.    if (!gen7_gpe.estimate_command_size)
  1936.       gen7_init(&gen7_gpe);
  1937.  
  1938.    return &gen7_gpe;
  1939. }
  1940.