Subversion Repositories Kolibri OS

Rev

Rev 4358 | Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include <assert.h>
  25.  
  26. #include "intel_batchbuffer.h"
  27. #include "intel_fbo.h"
  28. #include "intel_mipmap_tree.h"
  29.  
  30. #include "brw_context.h"
  31. #include "brw_defines.h"
  32. #include "brw_state.h"
  33.  
  34. #include "brw_blorp.h"
  35. #include "gen7_blorp.h"
  36.  
  37.  
  38. /* 3DSTATE_URB_VS
  39.  * 3DSTATE_URB_HS
  40.  * 3DSTATE_URB_DS
  41.  * 3DSTATE_URB_GS
  42.  *
  43.  * If the 3DSTATE_URB_VS is emitted, than the others must be also.
  44.  * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
  45.  *
  46.  *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
  47.  *     programmed in order for the programming of this state to be
  48.  *     valid.
  49.  */
  50. static void
  51. gen7_blorp_emit_urb_config(struct brw_context *brw,
  52.                            const brw_blorp_params *params)
  53. {
  54.    /* The minimum valid value is 32. See 3DSTATE_URB_VS,
  55.     * Dword 1.15:0 "VS Number of URB Entries".
  56.     */
  57.    int num_vs_entries = 32;
  58.    int vs_size = 2;
  59.    int vs_start = 2; /* skip over push constants */
  60.  
  61.    gen7_emit_urb_state(brw, num_vs_entries, vs_size, vs_start);
  62. }
  63.  
  64.  
  65. /* 3DSTATE_BLEND_STATE_POINTERS */
  66. static void
  67. gen7_blorp_emit_blend_state_pointer(struct brw_context *brw,
  68.                                     const brw_blorp_params *params,
  69.                                     uint32_t cc_blend_state_offset)
  70. {
  71.    BEGIN_BATCH(2);
  72.    OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
  73.    OUT_BATCH(cc_blend_state_offset | 1);
  74.    ADVANCE_BATCH();
  75. }
  76.  
  77.  
  78. /* 3DSTATE_CC_STATE_POINTERS */
  79. static void
  80. gen7_blorp_emit_cc_state_pointer(struct brw_context *brw,
  81.                                  const brw_blorp_params *params,
  82.                                  uint32_t cc_state_offset)
  83. {
  84.    BEGIN_BATCH(2);
  85.    OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
  86.    OUT_BATCH(cc_state_offset | 1);
  87.    ADVANCE_BATCH();
  88. }
  89.  
  90. static void
  91. gen7_blorp_emit_cc_viewport(struct brw_context *brw,
  92.                             const brw_blorp_params *params)
  93. {
  94.    struct brw_cc_viewport *ccv;
  95.    uint32_t cc_vp_offset;
  96.  
  97.    ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
  98.                                                    sizeof(*ccv), 32,
  99.                                                    &cc_vp_offset);
  100.    ccv->min_depth = 0.0;
  101.    ccv->max_depth = 1.0;
  102.  
  103.    BEGIN_BATCH(2);
  104.    OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2));
  105.    OUT_BATCH(cc_vp_offset);
  106.    ADVANCE_BATCH();
  107. }
  108.  
  109.  
  110. /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
  111.  *
  112.  * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
  113.  */
  114. static void
  115. gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw,
  116.                                              const brw_blorp_params *params,
  117.                                              uint32_t depthstencil_offset)
  118. {
  119.    BEGIN_BATCH(2);
  120.    OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
  121.    OUT_BATCH(depthstencil_offset | 1);
  122.    ADVANCE_BATCH();
  123. }
  124.  
  125.  
  126. /* SURFACE_STATE for renderbuffer or texture surface (see
  127.  * brw_update_renderbuffer_surface and brw_update_texture_surface)
  128.  */
  129. static uint32_t
  130. gen7_blorp_emit_surface_state(struct brw_context *brw,
  131.                               const brw_blorp_params *params,
  132.                               const brw_blorp_surface_info *surface,
  133.                               uint32_t read_domains, uint32_t write_domain,
  134.                               bool is_render_target)
  135. {
  136.    uint32_t wm_surf_offset;
  137.    uint32_t width = surface->width;
  138.    uint32_t height = surface->height;
  139.    /* Note: since gen7 uses INTEL_MSAA_LAYOUT_CMS or INTEL_MSAA_LAYOUT_UMS for
  140.     * color surfaces, width and height are measured in pixels; we don't need
  141.     * to divide them by 2 as we do for Gen6 (see
  142.     * gen6_blorp_emit_surface_state).
  143.     */
  144.    struct intel_region *region = surface->mt->region;
  145.    uint32_t tile_x, tile_y;
  146.    uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
  147.  
  148.    uint32_t tiling = surface->map_stencil_as_y_tiled
  149.       ? I915_TILING_Y : region->tiling;
  150.  
  151.    uint32_t *surf = (uint32_t *)
  152.       brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &wm_surf_offset);
  153.    memset(surf, 0, 8 * 4);
  154.  
  155.    surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
  156.              surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT |
  157.              gen7_surface_tiling_mode(tiling);
  158.  
  159.    if (surface->mt->align_h == 4)
  160.       surf[0] |= GEN7_SURFACE_VALIGN_4;
  161.    if (surface->mt->align_w == 8)
  162.       surf[0] |= GEN7_SURFACE_HALIGN_8;
  163.  
  164.    if (surface->array_spacing_lod0)
  165.       surf[0] |= GEN7_SURFACE_ARYSPC_LOD0;
  166.    else
  167.       surf[0] |= GEN7_SURFACE_ARYSPC_FULL;
  168.  
  169.    /* reloc */
  170.    surf[1] =
  171.       surface->compute_tile_offsets(&tile_x, &tile_y) + region->bo->offset;
  172.  
  173.    /* Note that the low bits of these fields are missing, so
  174.     * there's the possibility of getting in trouble.
  175.     */
  176.    assert(tile_x % 4 == 0);
  177.    assert(tile_y % 2 == 0);
  178.    surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
  179.              SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) |
  180.              SET_FIELD(mocs, GEN7_SURFACE_MOCS);
  181.  
  182.    surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
  183.              SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
  184.  
  185.    uint32_t pitch_bytes = region->pitch;
  186.    if (surface->map_stencil_as_y_tiled)
  187.       pitch_bytes *= 2;
  188.    surf[3] = pitch_bytes - 1;
  189.  
  190.    surf[4] = gen7_surface_msaa_bits(surface->num_samples, surface->msaa_layout);
  191.    if (surface->mt->mcs_mt) {
  192.       gen7_set_surface_mcs_info(brw, surf, wm_surf_offset, surface->mt->mcs_mt,
  193.                                 is_render_target);
  194.    }
  195.  
  196.    surf[7] = surface->mt->fast_clear_color_value;
  197.  
  198.    if (brw->is_haswell) {
  199.       surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
  200.                   SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
  201.                   SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
  202.                   SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
  203.    }
  204.  
  205.    /* Emit relocation to surface contents */
  206.    drm_intel_bo_emit_reloc(brw->batch.bo,
  207.                            wm_surf_offset + 4,
  208.                            region->bo,
  209.                            surf[1] - region->bo->offset,
  210.                            read_domains, write_domain);
  211.  
  212.    gen7_check_surface_setup(surf, is_render_target);
  213.  
  214.    return wm_surf_offset;
  215. }
  216.  
  217.  
  218. /**
  219.  * SAMPLER_STATE.  See gen7_update_sampler_state().
  220.  */
  221. static uint32_t
  222. gen7_blorp_emit_sampler_state(struct brw_context *brw,
  223.                               const brw_blorp_params *params)
  224. {
  225.    uint32_t sampler_offset;
  226.  
  227.    struct gen7_sampler_state *sampler = (struct gen7_sampler_state *)
  228.       brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
  229.                       sizeof(struct gen7_sampler_state),
  230.                       32, &sampler_offset);
  231.    memset(sampler, 0, sizeof(*sampler));
  232.  
  233.    sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
  234.    sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
  235.    sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
  236.  
  237.    sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
  238.    sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
  239.    sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
  240.  
  241.    //   sampler->ss0.min_mag_neq = 1;
  242.  
  243.    /* Set LOD bias:
  244.     */
  245.    sampler->ss0.lod_bias = 0;
  246.  
  247.    sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
  248.    sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
  249.  
  250.    /* Set BaseMipLevel, MaxLOD, MinLOD:
  251.     *
  252.     * XXX: I don't think that using firstLevel, lastLevel works,
  253.     * because we always setup the surface state as if firstLevel ==
  254.     * level zero.  Probably have to subtract firstLevel from each of
  255.     * these:
  256.     */
  257.    sampler->ss0.base_level = U_FIXED(0, 1);
  258.  
  259.    sampler->ss1.max_lod = U_FIXED(0, 8);
  260.    sampler->ss1.min_lod = U_FIXED(0, 8);
  261.  
  262.    sampler->ss3.non_normalized_coord = 1;
  263.  
  264.    sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
  265.       BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
  266.       BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
  267.    sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
  268.       BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
  269.       BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
  270.  
  271.    return sampler_offset;
  272. }
  273.  
  274.  
  275. /* 3DSTATE_VS
  276.  *
  277.  * Disable vertex shader.
  278.  */
  279. static void
  280. gen7_blorp_emit_vs_disable(struct brw_context *brw,
  281.                            const brw_blorp_params *params)
  282. {
  283.    BEGIN_BATCH(7);
  284.    OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
  285.    OUT_BATCH(0);
  286.    OUT_BATCH(0);
  287.    OUT_BATCH(0);
  288.    OUT_BATCH(0);
  289.    OUT_BATCH(0);
  290.    OUT_BATCH(0);
  291.    ADVANCE_BATCH();
  292.  
  293.    BEGIN_BATCH(6);
  294.    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
  295.    OUT_BATCH(0);
  296.    OUT_BATCH(0);
  297.    OUT_BATCH(0);
  298.    OUT_BATCH(0);
  299.    OUT_BATCH(0);
  300.    ADVANCE_BATCH();
  301. }
  302.  
  303.  
  304. /* 3DSTATE_HS
  305.  *
  306.  * Disable the hull shader.
  307.  */
  308. static void
  309. gen7_blorp_emit_hs_disable(struct brw_context *brw,
  310.                            const brw_blorp_params *params)
  311. {
  312.    BEGIN_BATCH(7);
  313.    OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
  314.    OUT_BATCH(0);
  315.    OUT_BATCH(0);
  316.    OUT_BATCH(0);
  317.    OUT_BATCH(0);
  318.    OUT_BATCH(0);
  319.    OUT_BATCH(0);
  320.    ADVANCE_BATCH();
  321.  
  322.    BEGIN_BATCH(7);
  323.    OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
  324.    OUT_BATCH(0);
  325.    OUT_BATCH(0);
  326.    OUT_BATCH(0);
  327.    OUT_BATCH(0);
  328.    OUT_BATCH(0);
  329.    OUT_BATCH(0);
  330.    ADVANCE_BATCH();
  331. }
  332.  
  333.  
  334. /* 3DSTATE_TE
  335.  *
  336.  * Disable the tesselation engine.
  337.  */
  338. static void
  339. gen7_blorp_emit_te_disable(struct brw_context *brw,
  340.                            const brw_blorp_params *params)
  341. {
  342.    BEGIN_BATCH(4);
  343.    OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
  344.    OUT_BATCH(0);
  345.    OUT_BATCH(0);
  346.    OUT_BATCH(0);
  347.    ADVANCE_BATCH();
  348. }
  349.  
  350.  
  351. /* 3DSTATE_DS
  352.  *
  353.  * Disable the domain shader.
  354.  */
  355. static void
  356. gen7_blorp_emit_ds_disable(struct brw_context *brw,
  357.                            const brw_blorp_params *params)
  358. {
  359.    BEGIN_BATCH(7);
  360.    OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
  361.    OUT_BATCH(0);
  362.    OUT_BATCH(0);
  363.    OUT_BATCH(0);
  364.    OUT_BATCH(0);
  365.    OUT_BATCH(0);
  366.    OUT_BATCH(0);
  367.    ADVANCE_BATCH();
  368.  
  369.    BEGIN_BATCH(6);
  370.    OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
  371.    OUT_BATCH(0);
  372.    OUT_BATCH(0);
  373.    OUT_BATCH(0);
  374.    OUT_BATCH(0);
  375.    OUT_BATCH(0);
  376.    ADVANCE_BATCH();
  377. }
  378.  
  379. /* 3DSTATE_GS
  380.  *
  381.  * Disable the geometry shader.
  382.  */
  383. static void
  384. gen7_blorp_emit_gs_disable(struct brw_context *brw,
  385.                            const brw_blorp_params *params)
  386. {
  387.    BEGIN_BATCH(7);
  388.    OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
  389.    OUT_BATCH(0);
  390.    OUT_BATCH(0);
  391.    OUT_BATCH(0);
  392.    OUT_BATCH(0);
  393.    OUT_BATCH(0);
  394.    OUT_BATCH(0);
  395.    ADVANCE_BATCH();
  396.  
  397.    BEGIN_BATCH(7);
  398.    OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
  399.    OUT_BATCH(0);
  400.    OUT_BATCH(0);
  401.    OUT_BATCH(0);
  402.    OUT_BATCH(0);
  403.    OUT_BATCH(0);
  404.    OUT_BATCH(0);
  405.    ADVANCE_BATCH();
  406. }
  407.  
  408. /* 3DSTATE_STREAMOUT
  409.  *
  410.  * Disable streamout.
  411.  */
  412. static void
  413. gen7_blorp_emit_streamout_disable(struct brw_context *brw,
  414.                                   const brw_blorp_params *params)
  415. {
  416.    BEGIN_BATCH(3);
  417.    OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
  418.    OUT_BATCH(0);
  419.    OUT_BATCH(0);
  420.    ADVANCE_BATCH();
  421. }
  422.  
  423.  
  424. static void
  425. gen7_blorp_emit_sf_config(struct brw_context *brw,
  426.                           const brw_blorp_params *params)
  427. {
  428.    /* 3DSTATE_SF
  429.     *
  430.     * Disable ViewportTransformEnable (dw1.1)
  431.     *
  432.     * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
  433.     * Primitives Overview":
  434.     *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
  435.     *     use of screen- space coordinates).
  436.     *
  437.     * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
  438.     * and BackFaceFillMode (dw1.4:3) to SOLID(0).
  439.     *
  440.     * From the Sandy Bridge PRM, Volume 2, Part 1, Section
  441.     * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
  442.     *     SOLID: Any triangle or rectangle object found to be front-facing
  443.     *     is rendered as a solid object. This setting is required when
  444.     *     (rendering rectangle (RECTLIST) objects.
  445.     */
  446.    {
  447.       BEGIN_BATCH(7);
  448.       OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
  449.       OUT_BATCH(params->depth_format <<
  450.                 GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
  451.       OUT_BATCH(params->num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
  452.       OUT_BATCH(0);
  453.       OUT_BATCH(0);
  454.       OUT_BATCH(0);
  455.       OUT_BATCH(0);
  456.       ADVANCE_BATCH();
  457.    }
  458.  
  459.    /* 3DSTATE_SBE */
  460.    {
  461.       BEGIN_BATCH(14);
  462.       OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
  463.       OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
  464.                 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
  465.                 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
  466.       for (int i = 0; i < 12; ++i)
  467.          OUT_BATCH(0);
  468.       ADVANCE_BATCH();
  469.    }
  470. }
  471.  
  472.  
  473. /**
  474.  * Disable thread dispatch (dw5.19) and enable the HiZ op.
  475.  */
  476. static void
  477. gen7_blorp_emit_wm_config(struct brw_context *brw,
  478.                           const brw_blorp_params *params,
  479.                           brw_blorp_prog_data *prog_data)
  480. {
  481.    uint32_t dw1 = 0, dw2 = 0;
  482.  
  483.    switch (params->hiz_op) {
  484.    case GEN6_HIZ_OP_DEPTH_CLEAR:
  485.       dw1 |= GEN7_WM_DEPTH_CLEAR;
  486.       break;
  487.    case GEN6_HIZ_OP_DEPTH_RESOLVE:
  488.       dw1 |= GEN7_WM_DEPTH_RESOLVE;
  489.       break;
  490.    case GEN6_HIZ_OP_HIZ_RESOLVE:
  491.       dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
  492.       break;
  493.    case GEN6_HIZ_OP_NONE:
  494.       break;
  495.    default:
  496.       assert(0);
  497.       break;
  498.    }
  499.    dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
  500.    dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
  501.    dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
  502.    if (params->use_wm_prog) {
  503.       dw1 |= GEN7_WM_KILL_ENABLE; /* TODO: temporarily smash on */
  504.       dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */
  505.    }
  506.  
  507.       if (params->num_samples > 1) {
  508.          dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
  509.          if (prog_data && prog_data->persample_msaa_dispatch)
  510.             dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
  511.          else
  512.             dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
  513.       } else {
  514.          dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
  515.          dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
  516.       }
  517.  
  518.    BEGIN_BATCH(3);
  519.    OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
  520.    OUT_BATCH(dw1);
  521.    OUT_BATCH(dw2);
  522.    ADVANCE_BATCH();
  523. }
  524.  
  525.  
  526. /**
  527.  * 3DSTATE_PS
  528.  *
  529.  * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
  530.  * that, thread dispatch info must still be specified.
  531.  *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
  532.  *       valid range for this field is [0x3, 0x2f].
  533.  *     - A dispatch mode must be given; that is, at least one of the
  534.  *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
  535.  *       discovered through simulator error messages.
  536.  */
  537. static void
  538. gen7_blorp_emit_ps_config(struct brw_context *brw,
  539.                           const brw_blorp_params *params,
  540.                           uint32_t prog_offset,
  541.                           brw_blorp_prog_data *prog_data)
  542. {
  543.    uint32_t dw2, dw4, dw5;
  544.    const int max_threads_shift = brw->is_haswell ?
  545.       HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
  546.  
  547.    dw2 = dw4 = dw5 = 0;
  548.    dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
  549.  
  550.    /* If there's a WM program, we need to do 16-pixel dispatch since that's
  551.     * what the program is compiled for.  If there isn't, then it shouldn't
  552.     * matter because no program is actually being run.  However, the hardware
  553.     * gets angry if we don't enable at least one dispatch mode, so just enable
  554.     * 16-pixel dispatch unconditionally.
  555.     */
  556.    dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
  557.  
  558.    if (brw->is_haswell)
  559.       dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
  560.    if (params->use_wm_prog) {
  561.       dw2 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
  562.       dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
  563.       dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
  564.    }
  565.  
  566.    switch (params->fast_clear_op) {
  567.    case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
  568.       dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
  569.       break;
  570.    case GEN7_FAST_CLEAR_OP_RESOLVE:
  571.       dw4 |= GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE;
  572.       break;
  573.    default:
  574.       break;
  575.    }
  576.  
  577.    BEGIN_BATCH(8);
  578.    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
  579.    OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
  580.    OUT_BATCH(dw2);
  581.    OUT_BATCH(0);
  582.    OUT_BATCH(dw4);
  583.    OUT_BATCH(dw5);
  584.    OUT_BATCH(0);
  585.    OUT_BATCH(0);
  586.    ADVANCE_BATCH();
  587. }
  588.  
  589.  
  590. static void
  591. gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw,
  592.                                           const brw_blorp_params *params,
  593.                                           uint32_t wm_bind_bo_offset)
  594. {
  595.    BEGIN_BATCH(2);
  596.    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
  597.    OUT_BATCH(wm_bind_bo_offset);
  598.    ADVANCE_BATCH();
  599. }
  600.  
  601.  
  602. static void
  603. gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw,
  604.                                           const brw_blorp_params *params,
  605.                                           uint32_t sampler_offset)
  606. {
  607.    BEGIN_BATCH(2);
  608.    OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
  609.    OUT_BATCH(sampler_offset);
  610.    ADVANCE_BATCH();
  611. }
  612.  
  613.  
  614. static void
  615. gen7_blorp_emit_constant_ps(struct brw_context *brw,
  616.                             const brw_blorp_params *params,
  617.                             uint32_t wm_push_const_offset)
  618. {
  619.    uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
  620.  
  621.    /* Make sure the push constants fill an exact integer number of
  622.     * registers.
  623.     */
  624.    assert(sizeof(brw_blorp_wm_push_constants) % 32 == 0);
  625.  
  626.    /* There must be at least one register worth of push constant data. */
  627.    assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0);
  628.  
  629.    /* Enable push constant buffer 0. */
  630.    BEGIN_BATCH(7);
  631.    OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
  632.              (7 - 2));
  633.    OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS);
  634.    OUT_BATCH(0);
  635.    OUT_BATCH(wm_push_const_offset | mocs);
  636.    OUT_BATCH(0);
  637.    OUT_BATCH(0);
  638.    OUT_BATCH(0);
  639.    ADVANCE_BATCH();
  640. }
  641.  
  642. static void
  643. gen7_blorp_emit_constant_ps_disable(struct brw_context *brw,
  644.                                     const brw_blorp_params *params)
  645. {
  646.    BEGIN_BATCH(7);
  647.    OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
  648.    OUT_BATCH(0);
  649.    OUT_BATCH(0);
  650.    OUT_BATCH(0);
  651.    OUT_BATCH(0);
  652.    OUT_BATCH(0);
  653.    OUT_BATCH(0);
  654.    ADVANCE_BATCH();
  655. }
  656.  
  657. static void
  658. gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
  659.                                      const brw_blorp_params *params)
  660. {
  661.    struct gl_context *ctx = &brw->ctx;
  662.    uint32_t draw_x = params->depth.x_offset;
  663.    uint32_t draw_y = params->depth.y_offset;
  664.    uint32_t tile_mask_x, tile_mask_y;
  665.    uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
  666.  
  667.    brw_get_depthstencil_tile_masks(params->depth.mt,
  668.                                    params->depth.level,
  669.                                    params->depth.layer,
  670.                                    NULL,
  671.                                    &tile_mask_x, &tile_mask_y);
  672.  
  673.    /* 3DSTATE_DEPTH_BUFFER */
  674.    {
  675.       uint32_t tile_x = draw_x & tile_mask_x;
  676.       uint32_t tile_y = draw_y & tile_mask_y;
  677.       uint32_t offset =
  678.          intel_region_get_aligned_offset(params->depth.mt->region,
  679.                                          draw_x & ~tile_mask_x,
  680.                                          draw_y & ~tile_mask_y, false);
  681.  
  682.       /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
  683.        * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
  684.        * Coordinate Offset X/Y":
  685.        *
  686.        *   "The 3 LSBs of both offsets must be zero to ensure correct
  687.        *   alignment"
  688.        *
  689.        * We have no guarantee that tile_x and tile_y are correctly aligned,
  690.        * since they are determined by the mipmap layout, which is only aligned
  691.        * to multiples of 4.
  692.        *
  693.        * So, to avoid hanging the GPU, just smash the low order 3 bits of
  694.        * tile_x and tile_y to 0.  This is a temporary workaround until we come
  695.        * up with a better solution.
  696.        */
  697.       WARN_ONCE((tile_x & 7) || (tile_y & 7),
  698.                 "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
  699.                 "Truncating offset, bad rendering may occur.\n");
  700.       tile_x &= ~7;
  701.       tile_y &= ~7;
  702.  
  703.       intel_emit_depth_stall_flushes(brw);
  704.  
  705.       BEGIN_BATCH(7);
  706.       OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
  707.       OUT_BATCH((params->depth.mt->region->pitch - 1) |
  708.                 params->depth_format << 18 |
  709.                 1 << 22 | /* hiz enable */
  710.                 1 << 28 | /* depth write */
  711.                 BRW_SURFACE_2D << 29);
  712.       OUT_RELOC(params->depth.mt->region->bo,
  713.                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  714.                 offset);
  715.       OUT_BATCH((params->depth.width + tile_x - 1) << 4 |
  716.                 (params->depth.height + tile_y - 1) << 18);
  717.       OUT_BATCH(mocs);
  718.       OUT_BATCH(tile_x |
  719.                 tile_y << 16);
  720.       OUT_BATCH(0);
  721.       ADVANCE_BATCH();
  722.    }
  723.  
  724.    /* 3DSTATE_HIER_DEPTH_BUFFER */
  725.    {
  726.       struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
  727.       uint32_t hiz_offset =
  728.          intel_region_get_aligned_offset(hiz_region,
  729.                                          draw_x & ~tile_mask_x,
  730.                                          (draw_y & ~tile_mask_y) / 2, false);
  731.  
  732.       BEGIN_BATCH(3);
  733.       OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
  734.       OUT_BATCH((mocs << 25) |
  735.                 (hiz_region->pitch - 1));
  736.       OUT_RELOC(hiz_region->bo,
  737.                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  738.                 hiz_offset);
  739.       ADVANCE_BATCH();
  740.    }
  741.  
  742.    /* 3DSTATE_STENCIL_BUFFER */
  743.    {
  744.       BEGIN_BATCH(3);
  745.       OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
  746.       OUT_BATCH(0);
  747.       OUT_BATCH(0);
  748.       ADVANCE_BATCH();
  749.    }
  750. }
  751.  
  752.  
  753. static void
  754. gen7_blorp_emit_depth_disable(struct brw_context *brw,
  755.                               const brw_blorp_params *params)
  756. {
  757.    BEGIN_BATCH(7);
  758.    OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
  759.    OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT << 18 | (BRW_SURFACE_NULL << 29));
  760.    OUT_BATCH(0);
  761.    OUT_BATCH(0);
  762.    OUT_BATCH(0);
  763.    OUT_BATCH(0);
  764.    OUT_BATCH(0);
  765.    ADVANCE_BATCH();
  766.  
  767.    BEGIN_BATCH(3);
  768.    OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
  769.    OUT_BATCH(0);
  770.    OUT_BATCH(0);
  771.    ADVANCE_BATCH();
  772.  
  773.    BEGIN_BATCH(3);
  774.    OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
  775.    OUT_BATCH(0);
  776.    OUT_BATCH(0);
  777.    ADVANCE_BATCH();
  778. }
  779.  
  780.  
  781. /* 3DSTATE_CLEAR_PARAMS
  782.  *
  783.  * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4
  784.  * 3DSTATE_CLEAR_PARAMS:
  785.  *    3DSTATE_CLEAR_PARAMS must always be programmed in the along
  786.  *    with the other Depth/Stencil state commands(i.e.  3DSTATE_DEPTH_BUFFER,
  787.  *    3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
  788.  */
  789. static void
  790. gen7_blorp_emit_clear_params(struct brw_context *brw,
  791.                              const brw_blorp_params *params)
  792. {
  793.    BEGIN_BATCH(3);
  794.    OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
  795.    OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0);
  796.    OUT_BATCH(GEN7_DEPTH_CLEAR_VALID);
  797.    ADVANCE_BATCH();
  798. }
  799.  
  800.  
  801. /* 3DPRIMITIVE */
  802. static void
  803. gen7_blorp_emit_primitive(struct brw_context *brw,
  804.                           const brw_blorp_params *params)
  805. {
  806.    BEGIN_BATCH(7);
  807.    OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
  808.    OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
  809.              _3DPRIM_RECTLIST);
  810.    OUT_BATCH(3); /* vertex count per instance */
  811.    OUT_BATCH(0);
  812.    OUT_BATCH(1); /* instance count */
  813.    OUT_BATCH(0);
  814.    OUT_BATCH(0);
  815.    ADVANCE_BATCH();
  816. }
  817.  
  818.  
  819. /**
  820.  * \copydoc gen6_blorp_exec()
  821.  */
  822. void
  823. gen7_blorp_exec(struct brw_context *brw,
  824.                 const brw_blorp_params *params)
  825. {
  826.    brw_blorp_prog_data *prog_data = NULL;
  827.    uint32_t cc_blend_state_offset = 0;
  828.    uint32_t cc_state_offset = 0;
  829.    uint32_t depthstencil_offset;
  830.    uint32_t wm_push_const_offset = 0;
  831.    uint32_t wm_bind_bo_offset = 0;
  832.    uint32_t sampler_offset = 0;
  833.  
  834.    uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
  835.    gen6_blorp_emit_batch_head(brw, params);
  836.    gen6_emit_3dstate_multisample(brw, params->num_samples);
  837.    gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
  838.    gen6_blorp_emit_state_base_address(brw, params);
  839.    gen6_blorp_emit_vertices(brw, params);
  840.    gen7_blorp_emit_urb_config(brw, params);
  841.    if (params->use_wm_prog) {
  842.       cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params);
  843.       cc_state_offset = gen6_blorp_emit_cc_state(brw, params);
  844.       gen7_blorp_emit_blend_state_pointer(brw, params, cc_blend_state_offset);
  845.       gen7_blorp_emit_cc_state_pointer(brw, params, cc_state_offset);
  846.    }
  847.    depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
  848.    gen7_blorp_emit_depth_stencil_state_pointers(brw, params,
  849.                                                 depthstencil_offset);
  850.    if (params->use_wm_prog) {
  851.       uint32_t wm_surf_offset_renderbuffer;
  852.       uint32_t wm_surf_offset_texture = 0;
  853.       wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
  854.       intel_miptree_used_for_rendering(params->dst.mt);
  855.       wm_surf_offset_renderbuffer =
  856.          gen7_blorp_emit_surface_state(brw, params, &params->dst,
  857.                                        I915_GEM_DOMAIN_RENDER,
  858.                                        I915_GEM_DOMAIN_RENDER,
  859.                                        true /* is_render_target */);
  860.       if (params->src.mt) {
  861.          wm_surf_offset_texture =
  862.             gen7_blorp_emit_surface_state(brw, params, &params->src,
  863.                                           I915_GEM_DOMAIN_SAMPLER, 0,
  864.                                           false /* is_render_target */);
  865.       }
  866.       wm_bind_bo_offset =
  867.          gen6_blorp_emit_binding_table(brw, params,
  868.                                        wm_surf_offset_renderbuffer,
  869.                                        wm_surf_offset_texture);
  870.       sampler_offset = gen7_blorp_emit_sampler_state(brw, params);
  871.    }
  872.    gen7_blorp_emit_vs_disable(brw, params);
  873.    gen7_blorp_emit_hs_disable(brw, params);
  874.    gen7_blorp_emit_te_disable(brw, params);
  875.    gen7_blorp_emit_ds_disable(brw, params);
  876.    gen7_blorp_emit_gs_disable(brw, params);
  877.    gen7_blorp_emit_streamout_disable(brw, params);
  878.    gen6_blorp_emit_clip_disable(brw, params);
  879.    gen7_blorp_emit_sf_config(brw, params);
  880.    gen7_blorp_emit_wm_config(brw, params, prog_data);
  881.    if (params->use_wm_prog) {
  882.       gen7_blorp_emit_binding_table_pointers_ps(brw, params,
  883.                                                 wm_bind_bo_offset);
  884.       gen7_blorp_emit_sampler_state_pointers_ps(brw, params, sampler_offset);
  885.       gen7_blorp_emit_constant_ps(brw, params, wm_push_const_offset);
  886.    } else {
  887.       gen7_blorp_emit_constant_ps_disable(brw, params);
  888.    }
  889.    gen7_blorp_emit_ps_config(brw, params, prog_offset, prog_data);
  890.    gen7_blorp_emit_cc_viewport(brw, params);
  891.  
  892.    if (params->depth.mt)
  893.       gen7_blorp_emit_depth_stencil_config(brw, params);
  894.    else
  895.       gen7_blorp_emit_depth_disable(brw, params);
  896.    gen7_blorp_emit_clear_params(brw, params);
  897.    gen6_blorp_emit_drawing_rectangle(brw, params);
  898.    gen7_blorp_emit_primitive(brw, params);
  899. }
  900.