Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2006 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include <GL/gl.h>
  29. #include <GL/internal/dri_interface.h>
  30.  
  31. #include "intel_batchbuffer.h"
  32. #include "intel_mipmap_tree.h"
  33. #include "intel_resolve_map.h"
  34. #include "intel_tex.h"
  35. #include "intel_blit.h"
  36. #include "intel_fbo.h"
  37.  
  38. #include "brw_blorp.h"
  39. #include "brw_context.h"
  40.  
  41. #include "main/enums.h"
  42. #include "main/fbobject.h"
  43. #include "main/formats.h"
  44. #include "main/glformats.h"
  45. #include "main/texcompress_etc.h"
  46. #include "main/teximage.h"
  47. #include "main/streaming-load-memcpy.h"
  48. #include "x86/common_x86_asm.h"
  49.  
  50. #define FILE_DEBUG_FLAG DEBUG_MIPTREE
  51.  
  52. static bool
  53. intel_miptree_alloc_mcs(struct brw_context *brw,
  54.                         struct intel_mipmap_tree *mt,
  55.                         GLuint num_samples);
  56.  
  57. /**
  58.  * Determine which MSAA layout should be used by the MSAA surface being
  59.  * created, based on the chip generation and the surface type.
  60.  */
  61. static enum intel_msaa_layout
  62. compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target,
  63.                     bool disable_aux_buffers)
  64. {
  65.    /* Prior to Gen7, all MSAA surfaces used IMS layout. */
  66.    if (brw->gen < 7)
  67.       return INTEL_MSAA_LAYOUT_IMS;
  68.  
  69.    /* In Gen7, IMS layout is only used for depth and stencil buffers. */
  70.    switch (_mesa_get_format_base_format(format)) {
  71.    case GL_DEPTH_COMPONENT:
  72.    case GL_STENCIL_INDEX:
  73.    case GL_DEPTH_STENCIL:
  74.       return INTEL_MSAA_LAYOUT_IMS;
  75.    default:
  76.       /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
  77.        *
  78.        *   This field must be set to 0 for all SINT MSRTs when all RT channels
  79.        *   are not written
  80.        *
  81.        * In practice this means that we have to disable MCS for all signed
  82.        * integer MSAA buffers.  The alternative, to disable MCS only when one
  83.        * of the render target channels is disabled, is impractical because it
  84.        * would require converting between CMS and UMS MSAA layouts on the fly,
  85.        * which is expensive.
  86.        */
  87.       if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) {
  88.          return INTEL_MSAA_LAYOUT_UMS;
  89.       } else if (disable_aux_buffers) {
  90.          /* We can't use the CMS layout because it uses an aux buffer, the MCS
  91.           * buffer. So fallback to UMS, which is identical to CMS without the
  92.           * MCS. */
  93.          return INTEL_MSAA_LAYOUT_UMS;
  94.       } else {
  95.          return INTEL_MSAA_LAYOUT_CMS;
  96.       }
  97.    }
  98. }
  99.  
  100.  
  101. /**
  102.  * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
  103.  * scaled-down bitfield representation of the color buffer which is capable of
  104.  * recording when blocks of the color buffer are equal to the clear value.
  105.  * This function returns the block size that will be used by the MCS buffer
  106.  * corresponding to a certain color miptree.
  107.  *
  108.  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
  109.  * beneath the "Fast Color Clear" bullet (p327):
  110.  *
  111.  *     The following table describes the RT alignment
  112.  *
  113.  *                       Pixels  Lines
  114.  *         TiledY RT CL
  115.  *             bpp
  116.  *              32          8      4
  117.  *              64          4      4
  118.  *             128          2      4
  119.  *         TiledX RT CL
  120.  *             bpp
  121.  *              32         16      2
  122.  *              64          8      2
  123.  *             128          4      2
  124.  *
  125.  * This alignment has the following uses:
  126.  *
  127.  * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
  128.  *   buffer contains 128 blocks horizontally and 256 blocks vertically.
  129.  *
  130.  * - For figuring out alignment restrictions for a fast clear operation.  Fast
  131.  *   clear operations must always clear aligned multiples of 16 blocks
  132.  *   horizontally and 32 blocks vertically.
  133.  *
  134.  * - For scaling down the coordinates sent through the render pipeline during
  135.  *   a fast clear.  X coordinates must be scaled down by 8 times the block
  136.  *   width, and Y coordinates by 16 times the block height.
  137.  *
  138.  * - For scaling down the coordinates sent through the render pipeline during
  139.  *   a "Render Target Resolve" operation.  X coordinates must be scaled down
  140.  *   by half the block width, and Y coordinates by half the block height.
  141.  */
  142. void
  143. intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
  144.                                  struct intel_mipmap_tree *mt,
  145.                                  unsigned *width_px, unsigned *height)
  146. {
  147.    switch (mt->tiling) {
  148.    default:
  149.       unreachable("Non-MSRT MCS requires X or Y tiling");
  150.       /* In release builds, fall through */
  151.    case I915_TILING_Y:
  152.       *width_px = 32 / mt->cpp;
  153.       *height = 4;
  154.       break;
  155.    case I915_TILING_X:
  156.       *width_px = 64 / mt->cpp;
  157.       *height = 2;
  158.    }
  159. }
  160.  
  161.  
  162. /**
  163.  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
  164.  * can be used.
  165.  *
  166.  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
  167.  * beneath the "Fast Color Clear" bullet (p326):
  168.  *
  169.  *     - Support is limited to tiled render targets.
  170.  *     - Support is for non-mip-mapped and non-array surface types only.
  171.  *
  172.  * And then later, on p327:
  173.  *
  174.  *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
  175.  *       64bpp, and 128bpp.
  176.  */
  177. bool
  178. intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
  179.                                        struct intel_mipmap_tree *mt)
  180. {
  181.    /* MCS support does not exist prior to Gen7 */
  182.    if (brw->gen < 7)
  183.       return false;
  184.  
  185.    if (mt->disable_aux_buffers)
  186.       return false;
  187.  
  188.    /* MCS is only supported for color buffers */
  189.    switch (_mesa_get_format_base_format(mt->format)) {
  190.    case GL_DEPTH_COMPONENT:
  191.    case GL_DEPTH_STENCIL:
  192.    case GL_STENCIL_INDEX:
  193.       return false;
  194.    }
  195.  
  196.    if (mt->tiling != I915_TILING_X &&
  197.        mt->tiling != I915_TILING_Y)
  198.       return false;
  199.    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
  200.       return false;
  201.    if (mt->first_level != 0 || mt->last_level != 0)
  202.       return false;
  203.    if (mt->physical_depth0 != 1)
  204.       return false;
  205.  
  206.    /* There's no point in using an MCS buffer if the surface isn't in a
  207.     * renderable format.
  208.     */
  209.    if (!brw->format_supported_as_render_target[mt->format])
  210.       return false;
  211.  
  212.    return true;
  213. }
  214.  
  215.  
  216. /**
  217.  * Determine depth format corresponding to a depth+stencil format,
  218.  * for separate stencil.
  219.  */
  220. mesa_format
  221. intel_depth_format_for_depthstencil_format(mesa_format format) {
  222.    switch (format) {
  223.    case MESA_FORMAT_Z24_UNORM_S8_UINT:
  224.       return MESA_FORMAT_Z24_UNORM_X8_UINT;
  225.    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
  226.       return MESA_FORMAT_Z_FLOAT32;
  227.    default:
  228.       return format;
  229.    }
  230. }
  231.  
  232.  
  233. /**
  234.  * @param for_bo Indicates that the caller is
  235.  *        intel_miptree_create_for_bo(). If true, then do not create
  236.  *        \c stencil_mt.
  237.  */
  238. static struct intel_mipmap_tree *
  239. intel_miptree_create_layout(struct brw_context *brw,
  240.                             GLenum target,
  241.                             mesa_format format,
  242.                             GLuint first_level,
  243.                             GLuint last_level,
  244.                             GLuint width0,
  245.                             GLuint height0,
  246.                             GLuint depth0,
  247.                             bool for_bo,
  248.                             GLuint num_samples,
  249.                             bool force_all_slices_at_each_lod,
  250.                             bool disable_aux_buffers)
  251. {
  252.    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
  253.    if (!mt)
  254.       return NULL;
  255.  
  256.    DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__,
  257.        _mesa_lookup_enum_by_nr(target),
  258.        _mesa_get_format_name(format),
  259.        first_level, last_level, depth0, mt);
  260.  
  261.    if (target == GL_TEXTURE_1D_ARRAY) {
  262.       /* For a 1D Array texture the OpenGL API will treat the height0
  263.        * parameter as the number of array slices. For Intel hardware, we treat
  264.        * the 1D array as a 2D Array with a height of 1.
  265.        *
  266.        * So, when we first come through this path to create a 1D Array
  267.        * texture, height0 stores the number of slices, and depth0 is 1. In
  268.        * this case, we want to swap height0 and depth0.
  269.        *
  270.        * Since some miptrees will be created based on the base miptree, we may
  271.        * come through this path and see height0 as 1 and depth0 being the
  272.        * number of slices. In this case we don't need to do the swap.
  273.        */
  274.       assert(height0 == 1 || depth0 == 1);
  275.       if (height0 > 1) {
  276.          depth0 = height0;
  277.          height0 = 1;
  278.       }
  279.    }
  280.  
  281.    mt->target = target;
  282.    mt->format = format;
  283.    mt->first_level = first_level;
  284.    mt->last_level = last_level;
  285.    mt->logical_width0 = width0;
  286.    mt->logical_height0 = height0;
  287.    mt->logical_depth0 = depth0;
  288.    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
  289.    mt->disable_aux_buffers = disable_aux_buffers;
  290.    exec_list_make_empty(&mt->hiz_map);
  291.  
  292.    /* The cpp is bytes per (1, blockheight)-sized block for compressed
  293.     * textures.  This is why you'll see divides by blockheight all over
  294.     */
  295.    unsigned bw, bh;
  296.    _mesa_get_format_block_size(format, &bw, &bh);
  297.    assert(_mesa_get_format_bytes(mt->format) % bw == 0);
  298.    mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
  299.  
  300.    mt->num_samples = num_samples;
  301.    mt->compressed = _mesa_is_format_compressed(format);
  302.    mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
  303.    mt->refcount = 1;
  304.  
  305.    if (num_samples > 1) {
  306.       /* Adjust width/height/depth for MSAA */
  307.       mt->msaa_layout = compute_msaa_layout(brw, format,
  308.                                             mt->target, mt->disable_aux_buffers);
  309.       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
  310.          /* From the Ivybridge PRM, Volume 1, Part 1, page 108:
  311.           * "If the surface is multisampled and it is a depth or stencil
  312.           *  surface or Multisampled Surface StorageFormat in SURFACE_STATE is
  313.           *  MSFMT_DEPTH_STENCIL, WL and HL must be adjusted as follows before
  314.           *  proceeding:
  315.           *
  316.           *  +----------------------------------------------------------------+
  317.           *  | Num Multisamples |        W_l =         |        H_l =         |
  318.           *  +----------------------------------------------------------------+
  319.           *  |         2        | ceiling(W_l / 2) * 4 | H_l (no adjustment)  |
  320.           *  |         4        | ceiling(W_l / 2) * 4 | ceiling(H_l / 2) * 4 |
  321.           *  |         8        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 4 |
  322.           *  |        16        | ceiling(W_l / 2) * 8 | ceiling(H_l / 2) * 8 |
  323.           *  +----------------------------------------------------------------+
  324.           * "
  325.           *
  326.           * Note that MSFMT_DEPTH_STENCIL just means the IMS (interleaved)
  327.           * format rather than UMS/CMS (array slices).  The Sandybridge PRM,
  328.           * Volume 1, Part 1, Page 111 has the same formula for 4x MSAA.
  329.           *
  330.           * Another more complicated explanation for these adjustments comes
  331.           * from the Sandybridge PRM, volume 4, part 1, page 31:
  332.           *
  333.           *     "Any of the other messages (sample*, LOD, load4) used with a
  334.           *      (4x) multisampled surface will in-effect sample a surface with
  335.           *      double the height and width as that indicated in the surface
  336.           *      state. Each pixel position on the original-sized surface is
  337.           *      replaced with a 2x2 of samples with the following arrangement:
  338.           *
  339.           *         sample 0 sample 2
  340.           *         sample 1 sample 3"
  341.           *
  342.           * Thus, when sampling from a multisampled texture, it behaves as
  343.           * though the layout in memory for (x,y,sample) is:
  344.           *
  345.           *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
  346.           *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
  347.           *
  348.           *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
  349.           *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
  350.           *
  351.           * However, the actual layout of multisampled data in memory is:
  352.           *
  353.           *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
  354.           *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
  355.           *
  356.           *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
  357.           *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
  358.           *
  359.           * This pattern repeats for each 2x2 pixel block.
  360.           *
  361.           * As a result, when calculating the size of our 4-sample buffer for
  362.           * an odd width or height, we have to align before scaling up because
  363.           * sample 3 is in that bottom right 2x2 block.
  364.           */
  365.          switch (num_samples) {
  366.          case 2:
  367.             assert(brw->gen >= 8);
  368.             width0 = ALIGN(width0, 2) * 2;
  369.             height0 = ALIGN(height0, 2);
  370.             break;
  371.          case 4:
  372.             width0 = ALIGN(width0, 2) * 2;
  373.             height0 = ALIGN(height0, 2) * 2;
  374.             break;
  375.          case 8:
  376.             width0 = ALIGN(width0, 2) * 4;
  377.             height0 = ALIGN(height0, 2) * 2;
  378.             break;
  379.          default:
  380.             /* num_samples should already have been quantized to 0, 1, 2, 4, or
  381.              * 8.
  382.              */
  383.             unreachable("not reached");
  384.          }
  385.       } else {
  386.          /* Non-interleaved */
  387.          depth0 *= num_samples;
  388.       }
  389.    }
  390.  
  391.    /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 can
  392.     * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on
  393.     * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is still
  394.     * used on Gen8 to make it pick a qpitch value which doesn't include space
  395.     * for the mipmaps. On Gen9 this is not necessary because it will
  396.     * automatically pick a packed qpitch value whenever mt->first_level ==
  397.     * mt->last_level.
  398.     * TODO: can we use it elsewhere?
  399.     * TODO: also disable this on Gen8 and pick the qpitch value like Gen9
  400.     */
  401.    if (brw->gen >= 9) {
  402.       mt->array_layout = ALL_LOD_IN_EACH_SLICE;
  403.    } else {
  404.       switch (mt->msaa_layout) {
  405.       case INTEL_MSAA_LAYOUT_NONE:
  406.       case INTEL_MSAA_LAYOUT_IMS:
  407.          mt->array_layout = ALL_LOD_IN_EACH_SLICE;
  408.          break;
  409.       case INTEL_MSAA_LAYOUT_UMS:
  410.       case INTEL_MSAA_LAYOUT_CMS:
  411.          mt->array_layout = ALL_SLICES_AT_EACH_LOD;
  412.          break;
  413.       }
  414.    }
  415.  
  416.    if (target == GL_TEXTURE_CUBE_MAP) {
  417.       assert(depth0 == 1);
  418.       depth0 = 6;
  419.    }
  420.  
  421.    mt->physical_width0 = width0;
  422.    mt->physical_height0 = height0;
  423.    mt->physical_depth0 = depth0;
  424.  
  425.    if (!for_bo &&
  426.        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
  427.        (brw->must_use_separate_stencil ||
  428.         (brw->has_separate_stencil &&
  429.          intel_miptree_wants_hiz_buffer(brw, mt)))) {
  430.       const bool force_all_slices_at_each_lod = brw->gen == 6;
  431.       mt->stencil_mt = intel_miptree_create(brw,
  432.                                             mt->target,
  433.                                             MESA_FORMAT_S_UINT8,
  434.                                             mt->first_level,
  435.                                             mt->last_level,
  436.                                             mt->logical_width0,
  437.                                             mt->logical_height0,
  438.                                             mt->logical_depth0,
  439.                                             true,
  440.                                             num_samples,
  441.                                             INTEL_MIPTREE_TILING_ANY,
  442.                                             force_all_slices_at_each_lod);
  443.       if (!mt->stencil_mt) {
  444.          intel_miptree_release(&mt);
  445.          return NULL;
  446.       }
  447.  
  448.       /* Fix up the Z miptree format for how we're splitting out separate
  449.        * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
  450.        */
  451.       mt->format = intel_depth_format_for_depthstencil_format(mt->format);
  452.       mt->cpp = 4;
  453.  
  454.       if (format == mt->format) {
  455.          _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
  456.                        _mesa_get_format_name(mt->format));
  457.       }
  458.    }
  459.  
  460.    if (force_all_slices_at_each_lod)
  461.       mt->array_layout = ALL_SLICES_AT_EACH_LOD;
  462.  
  463.    brw_miptree_layout(brw, mt);
  464.  
  465.    if (mt->disable_aux_buffers)
  466.       assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
  467.  
  468.    return mt;
  469. }
  470.  
  471. /**
  472.  * \brief Helper function for intel_miptree_create().
  473.  */
  474. static uint32_t
  475. intel_miptree_choose_tiling(struct brw_context *brw,
  476.                             mesa_format format,
  477.                             uint32_t width0,
  478.                             uint32_t num_samples,
  479.                             enum intel_miptree_tiling_mode requested,
  480.                             struct intel_mipmap_tree *mt)
  481. {
  482.    if (format == MESA_FORMAT_S_UINT8) {
  483.       /* The stencil buffer is W tiled. However, we request from the kernel a
  484.        * non-tiled buffer because the GTT is incapable of W fencing.
  485.        */
  486.       return I915_TILING_NONE;
  487.    }
  488.  
  489.    /* Some usages may want only one type of tiling, like depth miptrees (Y
  490.     * tiled), or temporary BOs for uploading data once (linear).
  491.     */
  492.    switch (requested) {
  493.    case INTEL_MIPTREE_TILING_ANY:
  494.       break;
  495.    case INTEL_MIPTREE_TILING_Y:
  496.       return I915_TILING_Y;
  497.    case INTEL_MIPTREE_TILING_NONE:
  498.       return I915_TILING_NONE;
  499.    }
  500.  
  501.    if (num_samples > 1) {
  502.       /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
  503.        * Surface"):
  504.        *
  505.        *   [DevSNB+]: For multi-sample render targets, this field must be
  506.        *   1. MSRTs can only be tiled.
  507.        *
  508.        * Our usual reason for preferring X tiling (fast blits using the
  509.        * blitting engine) doesn't apply to MSAA, since we'll generally be
  510.        * downsampling or upsampling when blitting between the MSAA buffer
  511.        * and another buffer, and the blitting engine doesn't support that.
  512.        * So use Y tiling, since it makes better use of the cache.
  513.        */
  514.       return I915_TILING_Y;
  515.    }
  516.  
  517.    GLenum base_format = _mesa_get_format_base_format(format);
  518.    if (base_format == GL_DEPTH_COMPONENT ||
  519.        base_format == GL_DEPTH_STENCIL_EXT)
  520.       return I915_TILING_Y;
  521.  
  522.    /* 1D textures (and 1D array textures) don't get any benefit from tiling,
  523.     * in fact it leads to a less efficient use of memory space and bandwidth
  524.     * due to tile alignment.
  525.     */
  526.    if (mt->logical_height0 == 1)
  527.       return I915_TILING_NONE;
  528.  
  529.    int minimum_pitch = mt->total_width * mt->cpp;
  530.  
  531.    /* If the width is much smaller than a tile, don't bother tiling. */
  532.    if (minimum_pitch < 64)
  533.       return I915_TILING_NONE;
  534.  
  535.    if (ALIGN(minimum_pitch, 512) >= 32768 ||
  536.        mt->total_width >= 32768 || mt->total_height >= 32768) {
  537.       perf_debug("%dx%d miptree too large to blit, falling back to untiled",
  538.                  mt->total_width, mt->total_height);
  539.       return I915_TILING_NONE;
  540.    }
  541.  
  542.    /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
  543.    if (brw->gen < 6)
  544.       return I915_TILING_X;
  545.  
  546.    /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
  547.     * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
  548.     *  or Linear."
  549.     * 128 bits per pixel translates to 16 bytes per pixel. This is necessary
  550.     * all the way back to 965, but is permitted on Gen7+.
  551.     */
  552.    if (brw->gen < 7 && mt->cpp >= 16)
  553.       return I915_TILING_X;
  554.  
  555.    /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
  556.     * messages), on p64, under the heading "Surface Vertical Alignment":
  557.     *
  558.     *     This field must be set to VALIGN_4 for all tiled Y Render Target
  559.     *     surfaces.
  560.     *
  561.     * So if the surface is renderable and uses a vertical alignment of 2,
  562.     * force it to be X tiled.  This is somewhat conservative (it's possible
  563.     * that the client won't ever render to this surface), but it's difficult
  564.     * to know that ahead of time.  And besides, since we use a vertical
  565.     * alignment of 4 as often as we can, this shouldn't happen very often.
  566.     */
  567.    if (brw->gen == 7 && mt->align_h == 2 &&
  568.        brw->format_supported_as_render_target[format]) {
  569.       return I915_TILING_X;
  570.    }
  571.  
  572.    return I915_TILING_Y | I915_TILING_X;
  573. }
  574.  
  575.  
  576. /**
  577.  * Choose an appropriate uncompressed format for a requested
  578.  * compressed format, if unsupported.
  579.  */
  580. mesa_format
  581. intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
  582. {
  583.    /* No need to lower ETC formats on these platforms,
  584.     * they are supported natively.
  585.     */
  586.    if (brw->gen >= 8 || brw->is_baytrail)
  587.       return format;
  588.  
  589.    switch (format) {
  590.    case MESA_FORMAT_ETC1_RGB8:
  591.       return MESA_FORMAT_R8G8B8X8_UNORM;
  592.    case MESA_FORMAT_ETC2_RGB8:
  593.       return MESA_FORMAT_R8G8B8X8_UNORM;
  594.    case MESA_FORMAT_ETC2_SRGB8:
  595.    case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
  596.    case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
  597.       return MESA_FORMAT_B8G8R8A8_SRGB;
  598.    case MESA_FORMAT_ETC2_RGBA8_EAC:
  599.    case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
  600.       return MESA_FORMAT_R8G8B8A8_UNORM;
  601.    case MESA_FORMAT_ETC2_R11_EAC:
  602.       return MESA_FORMAT_R_UNORM16;
  603.    case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
  604.       return MESA_FORMAT_R_SNORM16;
  605.    case MESA_FORMAT_ETC2_RG11_EAC:
  606.       return MESA_FORMAT_R16G16_UNORM;
  607.    case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
  608.       return MESA_FORMAT_R16G16_SNORM;
  609.    default:
  610.       /* Non ETC1 / ETC2 format */
  611.       return format;
  612.    }
  613. }
  614.  
  615.  
  616. struct intel_mipmap_tree *
  617. intel_miptree_create(struct brw_context *brw,
  618.                      GLenum target,
  619.                      mesa_format format,
  620.                      GLuint first_level,
  621.                      GLuint last_level,
  622.                      GLuint width0,
  623.                      GLuint height0,
  624.                      GLuint depth0,
  625.                      bool expect_accelerated_upload,
  626.                      GLuint num_samples,
  627.                      enum intel_miptree_tiling_mode requested_tiling,
  628.                      bool force_all_slices_at_each_lod)
  629. {
  630.    struct intel_mipmap_tree *mt;
  631.    mesa_format tex_format = format;
  632.    mesa_format etc_format = MESA_FORMAT_NONE;
  633.    GLuint total_width, total_height;
  634.  
  635.    format = intel_lower_compressed_format(brw, format);
  636.  
  637.    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
  638.  
  639.    mt = intel_miptree_create_layout(brw, target, format,
  640.                                       first_level, last_level, width0,
  641.                                       height0, depth0,
  642.                                     false, num_samples,
  643.                                     force_all_slices_at_each_lod,
  644.                                     false /*disable_aux_buffers*/);
  645.    /*
  646.     * pitch == 0 || height == 0  indicates the null texture
  647.     */
  648.    if (!mt || !mt->total_width || !mt->total_height) {
  649.       intel_miptree_release(&mt);
  650.       return NULL;
  651.    }
  652.  
  653.    total_width = mt->total_width;
  654.    total_height = mt->total_height;
  655.  
  656.    if (format == MESA_FORMAT_S_UINT8) {
  657.       /* Align to size of W tile, 64x64. */
  658.       total_width = ALIGN(total_width, 64);
  659.       total_height = ALIGN(total_height, 64);
  660.    }
  661.  
  662.    uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
  663.                                                  num_samples, requested_tiling,
  664.                                                  mt);
  665.    bool y_or_x = false;
  666.  
  667.    if (tiling == (I915_TILING_Y | I915_TILING_X)) {
  668.       y_or_x = true;
  669.       mt->tiling = I915_TILING_Y;
  670.    } else {
  671.       mt->tiling = tiling;
  672.    }
  673.  
  674.    unsigned long pitch;
  675.    mt->etc_format = etc_format;
  676.    mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
  677.                                      total_width, total_height, mt->cpp,
  678.                                      &mt->tiling, &pitch,
  679.                                      (expect_accelerated_upload ?
  680.                                       BO_ALLOC_FOR_RENDER : 0));
  681.    mt->pitch = pitch;
  682.  
  683.    /* If the BO is too large to fit in the aperture, we need to use the
  684.     * BLT engine to support it.  Prior to Sandybridge, the BLT paths can't
  685.     * handle Y-tiling, so we need to fall back to X.
  686.     */
  687.    if (brw->gen < 6 && y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) {
  688.       perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
  689.                  mt->total_width, mt->total_height);
  690.  
  691.       mt->tiling = I915_TILING_X;
  692.       drm_intel_bo_unreference(mt->bo);
  693.       mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
  694.                                         total_width, total_height, mt->cpp,
  695.                                         &mt->tiling, &pitch,
  696.                                         (expect_accelerated_upload ?
  697.                                          BO_ALLOC_FOR_RENDER : 0));
  698.       mt->pitch = pitch;
  699.    }
  700.  
  701.    mt->offset = 0;
  702.  
  703.    if (!mt->bo) {
  704.        intel_miptree_release(&mt);
  705.        return NULL;
  706.    }
  707.  
  708.  
  709.    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
  710.       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
  711.          intel_miptree_release(&mt);
  712.          return NULL;
  713.       }
  714.    }
  715.  
  716.    /* If this miptree is capable of supporting fast color clears, set
  717.     * fast_clear_state appropriately to ensure that fast clears will occur.
  718.     * Allocation of the MCS miptree will be deferred until the first fast
  719.     * clear actually occurs.
  720.     */
  721.    if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
  722.       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
  723.  
  724.    return mt;
  725. }
  726.  
  727. struct intel_mipmap_tree *
  728. intel_miptree_create_for_bo(struct brw_context *brw,
  729.                             drm_intel_bo *bo,
  730.                             mesa_format format,
  731.                             uint32_t offset,
  732.                             uint32_t width,
  733.                             uint32_t height,
  734.                             uint32_t depth,
  735.                             int pitch,
  736.                             bool disable_aux_buffers)
  737. {
  738.    struct intel_mipmap_tree *mt;
  739.    uint32_t tiling, swizzle;
  740.    GLenum target;
  741.  
  742.    drm_intel_bo_get_tiling(bo, &tiling, &swizzle);
  743.  
  744.    /* Nothing will be able to use this miptree with the BO if the offset isn't
  745.     * aligned.
  746.     */
  747.    if (tiling != I915_TILING_NONE)
  748.       assert(offset % 4096 == 0);
  749.  
  750.    /* miptrees can't handle negative pitch.  If you need flipping of images,
  751.     * that's outside of the scope of the mt.
  752.     */
  753.    assert(pitch >= 0);
  754.  
  755.    target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
  756.  
  757.    mt = intel_miptree_create_layout(brw, target, format,
  758.                                     0, 0,
  759.                                     width, height, depth,
  760.                                     true, 0, false,
  761.                                     disable_aux_buffers);
  762.    if (!mt)
  763.       return NULL;
  764.  
  765.    drm_intel_bo_reference(bo);
  766.    mt->bo = bo;
  767.    mt->pitch = pitch;
  768.    mt->offset = offset;
  769.    mt->tiling = tiling;
  770.  
  771.    return mt;
  772. }
  773.  
  774. /**
  775.  * For a singlesample renderbuffer, this simply wraps the given BO with a
  776.  * miptree.
  777.  *
  778.  * For a multisample renderbuffer, this wraps the window system's
  779.  * (singlesample) BO with a singlesample miptree attached to the
  780.  * intel_renderbuffer, then creates a multisample miptree attached to irb->mt
  781.  * that will contain the actual rendering (which is lazily resolved to
  782.  * irb->singlesample_mt).
  783.  */
  784. void
  785. intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
  786.                                          struct intel_renderbuffer *irb,
  787.                                          drm_intel_bo *bo,
  788.                                          uint32_t width, uint32_t height,
  789.                                          uint32_t pitch)
  790. {
  791.    struct intel_mipmap_tree *singlesample_mt = NULL;
  792.    struct intel_mipmap_tree *multisample_mt = NULL;
  793.    struct gl_renderbuffer *rb = &irb->Base.Base;
  794.    mesa_format format = rb->Format;
  795.    int num_samples = rb->NumSamples;
  796.  
  797.    /* Only the front and back buffers, which are color buffers, are allocated
  798.     * through the image loader.
  799.     */
  800.    assert(_mesa_get_format_base_format(format) == GL_RGB ||
  801.           _mesa_get_format_base_format(format) == GL_RGBA);
  802.  
  803.    singlesample_mt = intel_miptree_create_for_bo(intel,
  804.                                                  bo,
  805.                                                  format,
  806.                                                  0,
  807.                                                  width,
  808.                                                  height,
  809.                                                  1,
  810.                                                  pitch,
  811.                                                  false);
  812.    if (!singlesample_mt)
  813.       goto fail;
  814.  
  815.    /* If this miptree is capable of supporting fast color clears, set
  816.     * mcs_state appropriately to ensure that fast clears will occur.
  817.     * Allocation of the MCS miptree will be deferred until the first fast
  818.     * clear actually occurs.
  819.     */
  820.    if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
  821.       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
  822.  
  823.    if (num_samples == 0) {
  824.       intel_miptree_release(&irb->mt);
  825.       irb->mt = singlesample_mt;
  826.  
  827.       assert(!irb->singlesample_mt);
  828.    } else {
  829.       intel_miptree_release(&irb->singlesample_mt);
  830.       irb->singlesample_mt = singlesample_mt;
  831.  
  832.       if (!irb->mt ||
  833.           irb->mt->logical_width0 != width ||
  834.           irb->mt->logical_height0 != height) {
  835.          multisample_mt = intel_miptree_create_for_renderbuffer(intel,
  836.                                                                 format,
  837.                                                                 width,
  838.                                                                 height,
  839.                                                                 num_samples);
  840.          if (!multisample_mt)
  841.             goto fail;
  842.  
  843.          irb->need_downsample = false;
  844.          intel_miptree_release(&irb->mt);
  845.          irb->mt = multisample_mt;
  846.       }
  847.    }
  848.    return;
  849.  
  850. fail:
  851.    intel_miptree_release(&irb->singlesample_mt);
  852.    intel_miptree_release(&irb->mt);
  853.    return;
  854. }
  855.  
  856. struct intel_mipmap_tree*
  857. intel_miptree_create_for_renderbuffer(struct brw_context *brw,
  858.                                       mesa_format format,
  859.                                       uint32_t width,
  860.                                       uint32_t height,
  861.                                       uint32_t num_samples)
  862. {
  863.    struct intel_mipmap_tree *mt;
  864.    uint32_t depth = 1;
  865.    bool ok;
  866.    GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
  867.  
  868.    mt = intel_miptree_create(brw, target, format, 0, 0,
  869.                              width, height, depth, true, num_samples,
  870.                              INTEL_MIPTREE_TILING_ANY, false);
  871.    if (!mt)
  872.       goto fail;
  873.  
  874.    if (intel_miptree_wants_hiz_buffer(brw, mt)) {
  875.       ok = intel_miptree_alloc_hiz(brw, mt);
  876.       if (!ok)
  877.          goto fail;
  878.    }
  879.  
  880.    return mt;
  881.  
  882. fail:
  883.    intel_miptree_release(&mt);
  884.    return NULL;
  885. }
  886.  
  887. void
  888. intel_miptree_reference(struct intel_mipmap_tree **dst,
  889.                         struct intel_mipmap_tree *src)
  890. {
  891.    if (*dst == src)
  892.       return;
  893.  
  894.    intel_miptree_release(dst);
  895.  
  896.    if (src) {
  897.       src->refcount++;
  898.       DBG("%s %p refcount now %d\n", __func__, src, src->refcount);
  899.    }
  900.  
  901.    *dst = src;
  902. }
  903.  
  904.  
  905. void
  906. intel_miptree_release(struct intel_mipmap_tree **mt)
  907. {
  908.    if (!*mt)
  909.       return;
  910.  
  911.    DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1);
  912.    if (--(*mt)->refcount <= 0) {
  913.       GLuint i;
  914.  
  915.       DBG("%s deleting %p\n", __func__, *mt);
  916.  
  917.       drm_intel_bo_unreference((*mt)->bo);
  918.       intel_miptree_release(&(*mt)->stencil_mt);
  919.       if ((*mt)->hiz_buf) {
  920.          if ((*mt)->hiz_buf->mt)
  921.             intel_miptree_release(&(*mt)->hiz_buf->mt);
  922.          else
  923.             drm_intel_bo_unreference((*mt)->hiz_buf->bo);
  924.          free((*mt)->hiz_buf);
  925.       }
  926.       intel_miptree_release(&(*mt)->mcs_mt);
  927.       intel_resolve_map_clear(&(*mt)->hiz_map);
  928.  
  929.       for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
  930.          free((*mt)->level[i].slice);
  931.       }
  932.  
  933.       free(*mt);
  934.    }
  935.    *mt = NULL;
  936. }
  937.  
  938. void
  939. intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
  940.                                        int *width, int *height, int *depth)
  941. {
  942.    switch (image->TexObject->Target) {
  943.    case GL_TEXTURE_1D_ARRAY:
  944.       *width = image->Width;
  945.       *height = 1;
  946.       *depth = image->Height;
  947.       break;
  948.    default:
  949.       *width = image->Width;
  950.       *height = image->Height;
  951.       *depth = image->Depth;
  952.       break;
  953.    }
  954. }
  955.  
  956. /**
  957.  * Can the image be pulled into a unified mipmap tree?  This mirrors
  958.  * the completeness test in a lot of ways.
  959.  *
  960.  * Not sure whether I want to pass gl_texture_image here.
  961.  */
  962. bool
  963. intel_miptree_match_image(struct intel_mipmap_tree *mt,
  964.                           struct gl_texture_image *image)
  965. {
  966.    struct intel_texture_image *intelImage = intel_texture_image(image);
  967.    GLuint level = intelImage->base.Base.Level;
  968.    int width, height, depth;
  969.  
  970.    /* glTexImage* choose the texture object based on the target passed in, and
  971.     * objects can't change targets over their lifetimes, so this should be
  972.     * true.
  973.     */
  974.    assert(image->TexObject->Target == mt->target);
  975.  
  976.    mesa_format mt_format = mt->format;
  977.    if (mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT && mt->stencil_mt)
  978.       mt_format = MESA_FORMAT_Z24_UNORM_S8_UINT;
  979.    if (mt->format == MESA_FORMAT_Z_FLOAT32 && mt->stencil_mt)
  980.       mt_format = MESA_FORMAT_Z32_FLOAT_S8X24_UINT;
  981.    if (mt->etc_format != MESA_FORMAT_NONE)
  982.       mt_format = mt->etc_format;
  983.  
  984.    if (image->TexFormat != mt_format)
  985.       return false;
  986.  
  987.    intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
  988.  
  989.    if (mt->target == GL_TEXTURE_CUBE_MAP)
  990.       depth = 6;
  991.  
  992.    int level_depth = mt->level[level].depth;
  993.    if (mt->num_samples > 1) {
  994.       switch (mt->msaa_layout) {
  995.       case INTEL_MSAA_LAYOUT_NONE:
  996.       case INTEL_MSAA_LAYOUT_IMS:
  997.          break;
  998.       case INTEL_MSAA_LAYOUT_UMS:
  999.       case INTEL_MSAA_LAYOUT_CMS:
  1000.          level_depth /= mt->num_samples;
  1001.          break;
  1002.       }
  1003.    }
  1004.  
  1005.    /* Test image dimensions against the base level image adjusted for
  1006.     * minification.  This will also catch images not present in the
  1007.     * tree, changed targets, etc.
  1008.     */
  1009.    if (width != minify(mt->logical_width0, level - mt->first_level) ||
  1010.        height != minify(mt->logical_height0, level - mt->first_level) ||
  1011.        depth != level_depth) {
  1012.       return false;
  1013.    }
  1014.  
  1015.    if (image->NumSamples != mt->num_samples)
  1016.       return false;
  1017.  
  1018.    return true;
  1019. }
  1020.  
  1021.  
  1022. void
  1023. intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
  1024.                              GLuint level,
  1025.                              GLuint x, GLuint y, GLuint d)
  1026. {
  1027.    mt->level[level].depth = d;
  1028.    mt->level[level].level_x = x;
  1029.    mt->level[level].level_y = y;
  1030.  
  1031.    DBG("%s level %d, depth %d, offset %d,%d\n", __func__,
  1032.        level, d, x, y);
  1033.  
  1034.    assert(mt->level[level].slice == NULL);
  1035.  
  1036.    mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
  1037.    mt->level[level].slice[0].x_offset = mt->level[level].level_x;
  1038.    mt->level[level].slice[0].y_offset = mt->level[level].level_y;
  1039. }
  1040.  
  1041.  
  1042. void
  1043. intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
  1044.                                GLuint level, GLuint img,
  1045.                                GLuint x, GLuint y)
  1046. {
  1047.    if (img == 0 && level == 0)
  1048.       assert(x == 0 && y == 0);
  1049.  
  1050.    assert(img < mt->level[level].depth);
  1051.  
  1052.    mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
  1053.    mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
  1054.  
  1055.    DBG("%s level %d img %d pos %d,%d\n",
  1056.        __func__, level, img,
  1057.        mt->level[level].slice[img].x_offset,
  1058.        mt->level[level].slice[img].y_offset);
  1059. }
  1060.  
  1061. void
  1062. intel_miptree_get_image_offset(const struct intel_mipmap_tree *mt,
  1063.                                GLuint level, GLuint slice,
  1064.                                GLuint *x, GLuint *y)
  1065. {
  1066.    assert(slice < mt->level[level].depth);
  1067.  
  1068.    *x = mt->level[level].slice[slice].x_offset;
  1069.    *y = mt->level[level].slice[slice].y_offset;
  1070. }
  1071.  
  1072. /**
  1073.  * This function computes masks that may be used to select the bits of the X
  1074.  * and Y coordinates that indicate the offset within a tile.  If the BO is
  1075.  * untiled, the masks are set to 0.
  1076.  */
  1077. void
  1078. intel_miptree_get_tile_masks(const struct intel_mipmap_tree *mt,
  1079.                              uint32_t *mask_x, uint32_t *mask_y,
  1080.                              bool map_stencil_as_y_tiled)
  1081. {
  1082.    int cpp = mt->cpp;
  1083.    uint32_t tiling = mt->tiling;
  1084.  
  1085.    if (map_stencil_as_y_tiled)
  1086.       tiling = I915_TILING_Y;
  1087.  
  1088.    switch (tiling) {
  1089.    default:
  1090.       unreachable("not reached");
  1091.    case I915_TILING_NONE:
  1092.       *mask_x = *mask_y = 0;
  1093.       break;
  1094.    case I915_TILING_X:
  1095.       *mask_x = 512 / cpp - 1;
  1096.       *mask_y = 7;
  1097.       break;
  1098.    case I915_TILING_Y:
  1099.       *mask_x = 128 / cpp - 1;
  1100.       *mask_y = 31;
  1101.       break;
  1102.    }
  1103. }
  1104.  
  1105. /**
  1106.  * Compute the offset (in bytes) from the start of the BO to the given x
  1107.  * and y coordinate.  For tiled BOs, caller must ensure that x and y are
  1108.  * multiples of the tile size.
  1109.  */
  1110. uint32_t
  1111. intel_miptree_get_aligned_offset(const struct intel_mipmap_tree *mt,
  1112.                                  uint32_t x, uint32_t y,
  1113.                                  bool map_stencil_as_y_tiled)
  1114. {
  1115.    int cpp = mt->cpp;
  1116.    uint32_t pitch = mt->pitch;
  1117.    uint32_t tiling = mt->tiling;
  1118.  
  1119.    if (map_stencil_as_y_tiled) {
  1120.       tiling = I915_TILING_Y;
  1121.  
  1122.       /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile
  1123.        * gets transformed into a 32-high Y-tile.  Accordingly, the pitch of
  1124.        * the resulting surface is twice the pitch of the original miptree,
  1125.        * since each row in the Y-tiled view corresponds to two rows in the
  1126.        * actual W-tiled surface.  So we need to correct the pitch before
  1127.        * computing the offsets.
  1128.        */
  1129.       pitch *= 2;
  1130.    }
  1131.  
  1132.    switch (tiling) {
  1133.    default:
  1134.       unreachable("not reached");
  1135.    case I915_TILING_NONE:
  1136.       return y * pitch + x * cpp;
  1137.    case I915_TILING_X:
  1138.       assert((x % (512 / cpp)) == 0);
  1139.       assert((y % 8) == 0);
  1140.       return y * pitch + x / (512 / cpp) * 4096;
  1141.    case I915_TILING_Y:
  1142.       assert((x % (128 / cpp)) == 0);
  1143.       assert((y % 32) == 0);
  1144.       return y * pitch + x / (128 / cpp) * 4096;
  1145.    }
  1146. }
  1147.  
  1148. /**
  1149.  * Rendering with tiled buffers requires that the base address of the buffer
  1150.  * be aligned to a page boundary.  For renderbuffers, and sometimes with
  1151.  * textures, we may want the surface to point at a texture image level that
  1152.  * isn't at a page boundary.
  1153.  *
  1154.  * This function returns an appropriately-aligned base offset
  1155.  * according to the tiling restrictions, plus any required x/y offset
  1156.  * from there.
  1157.  */
  1158. uint32_t
  1159. intel_miptree_get_tile_offsets(const struct intel_mipmap_tree *mt,
  1160.                                GLuint level, GLuint slice,
  1161.                                uint32_t *tile_x,
  1162.                                uint32_t *tile_y)
  1163. {
  1164.    uint32_t x, y;
  1165.    uint32_t mask_x, mask_y;
  1166.  
  1167.    intel_miptree_get_tile_masks(mt, &mask_x, &mask_y, false);
  1168.    intel_miptree_get_image_offset(mt, level, slice, &x, &y);
  1169.  
  1170.    *tile_x = x & mask_x;
  1171.    *tile_y = y & mask_y;
  1172.  
  1173.    return intel_miptree_get_aligned_offset(mt, x & ~mask_x, y & ~mask_y, false);
  1174. }
  1175.  
  1176. static void
  1177. intel_miptree_copy_slice_sw(struct brw_context *brw,
  1178.                             struct intel_mipmap_tree *dst_mt,
  1179.                             struct intel_mipmap_tree *src_mt,
  1180.                             int level,
  1181.                             int slice,
  1182.                             int width,
  1183.                             int height)
  1184. {
  1185.    void *src, *dst;
  1186.    ptrdiff_t src_stride, dst_stride;
  1187.    int cpp = dst_mt->cpp;
  1188.  
  1189.    intel_miptree_map(brw, src_mt,
  1190.                      level, slice,
  1191.                      0, 0,
  1192.                      width, height,
  1193.                      GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
  1194.                      &src, &src_stride);
  1195.  
  1196.    intel_miptree_map(brw, dst_mt,
  1197.                      level, slice,
  1198.                      0, 0,
  1199.                      width, height,
  1200.                      GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
  1201.                      BRW_MAP_DIRECT_BIT,
  1202.                      &dst, &dst_stride);
  1203.  
  1204.    DBG("sw blit %s mt %p %p/%"PRIdPTR" -> %s mt %p %p/%"PRIdPTR" (%dx%d)\n",
  1205.        _mesa_get_format_name(src_mt->format),
  1206.        src_mt, src, src_stride,
  1207.        _mesa_get_format_name(dst_mt->format),
  1208.        dst_mt, dst, dst_stride,
  1209.        width, height);
  1210.  
  1211.    int row_size = cpp * width;
  1212.    if (src_stride == row_size &&
  1213.        dst_stride == row_size) {
  1214.       memcpy(dst, src, row_size * height);
  1215.    } else {
  1216.       for (int i = 0; i < height; i++) {
  1217.          memcpy(dst, src, row_size);
  1218.          dst += dst_stride;
  1219.          src += src_stride;
  1220.       }
  1221.    }
  1222.  
  1223.    intel_miptree_unmap(brw, dst_mt, level, slice);
  1224.    intel_miptree_unmap(brw, src_mt, level, slice);
  1225.  
  1226.    /* Don't forget to copy the stencil data over, too.  We could have skipped
  1227.     * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
  1228.     * shuffling the two data sources in/out of temporary storage instead of
  1229.     * the direct mapping we get this way.
  1230.     */
  1231.    if (dst_mt->stencil_mt) {
  1232.       assert(src_mt->stencil_mt);
  1233.       intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
  1234.                                   level, slice, width, height);
  1235.    }
  1236. }
  1237.  
  1238. static void
  1239. intel_miptree_copy_slice(struct brw_context *brw,
  1240.                          struct intel_mipmap_tree *dst_mt,
  1241.                          struct intel_mipmap_tree *src_mt,
  1242.                          int level,
  1243.                          int face,
  1244.                          int depth)
  1245.  
  1246. {
  1247.    mesa_format format = src_mt->format;
  1248.    uint32_t width = minify(src_mt->physical_width0, level - src_mt->first_level);
  1249.    uint32_t height = minify(src_mt->physical_height0, level - src_mt->first_level);
  1250.    int slice;
  1251.  
  1252.    if (face > 0)
  1253.       slice = face;
  1254.    else
  1255.       slice = depth;
  1256.  
  1257.    assert(depth < src_mt->level[level].depth);
  1258.    assert(src_mt->format == dst_mt->format);
  1259.  
  1260.    if (dst_mt->compressed) {
  1261.       height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
  1262.       width = ALIGN(width, dst_mt->align_w);
  1263.    }
  1264.  
  1265.    /* If it's a packed depth/stencil buffer with separate stencil, the blit
  1266.     * below won't apply since we can't do the depth's Y tiling or the
  1267.     * stencil's W tiling in the blitter.
  1268.     */
  1269.    if (src_mt->stencil_mt) {
  1270.       intel_miptree_copy_slice_sw(brw,
  1271.                                   dst_mt, src_mt,
  1272.                                   level, slice,
  1273.                                   width, height);
  1274.       return;
  1275.    }
  1276.  
  1277.    uint32_t dst_x, dst_y, src_x, src_y;
  1278.    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
  1279.    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
  1280.  
  1281.    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
  1282.        _mesa_get_format_name(src_mt->format),
  1283.        src_mt, src_x, src_y, src_mt->pitch,
  1284.        _mesa_get_format_name(dst_mt->format),
  1285.        dst_mt, dst_x, dst_y, dst_mt->pitch,
  1286.        width, height);
  1287.  
  1288.    if (!intel_miptree_blit(brw,
  1289.                            src_mt, level, slice, 0, 0, false,
  1290.                            dst_mt, level, slice, 0, 0, false,
  1291.                            width, height, GL_COPY)) {
  1292.       perf_debug("miptree validate blit for %s failed\n",
  1293.                  _mesa_get_format_name(format));
  1294.  
  1295.       intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
  1296.                                   width, height);
  1297.    }
  1298. }
  1299.  
  1300. /**
  1301.  * Copies the image's current data to the given miptree, and associates that
  1302.  * miptree with the image.
  1303.  *
  1304.  * If \c invalidate is true, then the actual image data does not need to be
  1305.  * copied, but the image still needs to be associated to the new miptree (this
  1306.  * is set to true if we're about to clear the image).
  1307.  */
  1308. void
  1309. intel_miptree_copy_teximage(struct brw_context *brw,
  1310.                             struct intel_texture_image *intelImage,
  1311.                             struct intel_mipmap_tree *dst_mt,
  1312.                             bool invalidate)
  1313. {
  1314.    struct intel_mipmap_tree *src_mt = intelImage->mt;
  1315.    struct intel_texture_object *intel_obj =
  1316.       intel_texture_object(intelImage->base.Base.TexObject);
  1317.    int level = intelImage->base.Base.Level;
  1318.    int face = intelImage->base.Base.Face;
  1319.  
  1320.    GLuint depth;
  1321.    if (intel_obj->base.Target == GL_TEXTURE_1D_ARRAY)
  1322.       depth = intelImage->base.Base.Height;
  1323.    else
  1324.       depth = intelImage->base.Base.Depth;
  1325.  
  1326.    if (!invalidate) {
  1327.       for (int slice = 0; slice < depth; slice++) {
  1328.          intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
  1329.       }
  1330.    }
  1331.  
  1332.    intel_miptree_reference(&intelImage->mt, dst_mt);
  1333.    intel_obj->needs_validate = true;
  1334. }
  1335.  
  1336. static bool
  1337. intel_miptree_alloc_mcs(struct brw_context *brw,
  1338.                         struct intel_mipmap_tree *mt,
  1339.                         GLuint num_samples)
  1340. {
  1341.    assert(brw->gen >= 7); /* MCS only used on Gen7+ */
  1342.    assert(mt->mcs_mt == NULL);
  1343.    assert(!mt->disable_aux_buffers);
  1344.  
  1345.    /* Choose the correct format for the MCS buffer.  All that really matters
  1346.     * is that we allocate the right buffer size, since we'll always be
  1347.     * accessing this miptree using MCS-specific hardware mechanisms, which
  1348.     * infer the correct format based on num_samples.
  1349.     */
  1350.    mesa_format format;
  1351.    switch (num_samples) {
  1352.    case 2:
  1353.    case 4:
  1354.       /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
  1355.        * each sample).
  1356.        */
  1357.       format = MESA_FORMAT_R_UNORM8;
  1358.       break;
  1359.    case 8:
  1360.       /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
  1361.        * for each sample, plus 8 padding bits).
  1362.        */
  1363.       format = MESA_FORMAT_R_UINT32;
  1364.       break;
  1365.    default:
  1366.       unreachable("Unrecognized sample count in intel_miptree_alloc_mcs");
  1367.    };
  1368.  
  1369.    /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
  1370.     *
  1371.     *     "The MCS surface must be stored as Tile Y."
  1372.     */
  1373.    mt->mcs_mt = intel_miptree_create(brw,
  1374.                                      mt->target,
  1375.                                      format,
  1376.                                      mt->first_level,
  1377.                                      mt->last_level,
  1378.                                      mt->logical_width0,
  1379.                                      mt->logical_height0,
  1380.                                      mt->logical_depth0,
  1381.                                      true,
  1382.                                      0 /* num_samples */,
  1383.                                      INTEL_MIPTREE_TILING_Y,
  1384.                                      false);
  1385.  
  1386.    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
  1387.     *
  1388.     *     When MCS buffer is enabled and bound to MSRT, it is required that it
  1389.     *     is cleared prior to any rendering.
  1390.     *
  1391.     * Since we don't use the MCS buffer for any purpose other than rendering,
  1392.     * it makes sense to just clear it immediately upon allocation.
  1393.     *
  1394.     * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
  1395.     */
  1396.    void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
  1397.    memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch);
  1398.    intel_miptree_unmap_raw(brw, mt->mcs_mt);
  1399.    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR;
  1400.  
  1401.    return mt->mcs_mt;
  1402. }
  1403.  
  1404.  
  1405. bool
  1406. intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
  1407.                                  struct intel_mipmap_tree *mt)
  1408. {
  1409.    assert(mt->mcs_mt == NULL);
  1410.    assert(!mt->disable_aux_buffers);
  1411.  
  1412.    /* The format of the MCS buffer is opaque to the driver; all that matters
  1413.     * is that we get its size and pitch right.  We'll pretend that the format
  1414.     * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
  1415.     * R32 buffer is 32 pixels across, we'll need to scale the width down by
  1416.     * the block width and then a further factor of 4.  Since an MCS tile
  1417.     * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
  1418.     * we'll need to scale the height down by the block height and then a
  1419.     * further factor of 8.
  1420.     */
  1421.    const mesa_format format = MESA_FORMAT_R_UINT32;
  1422.    unsigned block_width_px;
  1423.    unsigned block_height;
  1424.    intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
  1425.    unsigned width_divisor = block_width_px * 4;
  1426.    unsigned height_divisor = block_height * 8;
  1427.    unsigned mcs_width =
  1428.       ALIGN(mt->logical_width0, width_divisor) / width_divisor;
  1429.    unsigned mcs_height =
  1430.       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
  1431.    assert(mt->logical_depth0 == 1);
  1432.    mt->mcs_mt = intel_miptree_create(brw,
  1433.                                      mt->target,
  1434.                                      format,
  1435.                                      mt->first_level,
  1436.                                      mt->last_level,
  1437.                                      mcs_width,
  1438.                                      mcs_height,
  1439.                                      mt->logical_depth0,
  1440.                                      true,
  1441.                                      0 /* num_samples */,
  1442.                                      INTEL_MIPTREE_TILING_Y,
  1443.                                      false);
  1444.  
  1445.    return mt->mcs_mt;
  1446. }
  1447.  
  1448.  
  1449. /**
  1450.  * Helper for intel_miptree_alloc_hiz() that sets
  1451.  * \c mt->level[level].has_hiz. Return true if and only if
  1452.  * \c has_hiz was set.
  1453.  */
  1454. static bool
  1455. intel_miptree_level_enable_hiz(struct brw_context *brw,
  1456.                                struct intel_mipmap_tree *mt,
  1457.                                uint32_t level)
  1458. {
  1459.    assert(mt->hiz_buf);
  1460.  
  1461.    if (brw->gen >= 8 || brw->is_haswell) {
  1462.       uint32_t width = minify(mt->physical_width0, level);
  1463.       uint32_t height = minify(mt->physical_height0, level);
  1464.  
  1465.       /* Disable HiZ for LOD > 0 unless the width is 8 aligned
  1466.        * and the height is 4 aligned. This allows our HiZ support
  1467.        * to fulfill Haswell restrictions for HiZ ops. For LOD == 0,
  1468.        * we can grow the width & height to allow the HiZ op to
  1469.        * force the proper size alignments.
  1470.        */
  1471.       if (level > 0 && ((width & 7) || (height & 3))) {
  1472.          DBG("mt %p level %d: HiZ DISABLED\n", mt, level);
  1473.          return false;
  1474.       }
  1475.    }
  1476.  
  1477.    DBG("mt %p level %d: HiZ enabled\n", mt, level);
  1478.    mt->level[level].has_hiz = true;
  1479.    return true;
  1480. }
  1481.  
  1482.  
  1483. /**
  1484.  * Helper for intel_miptree_alloc_hiz() that determines the required hiz
  1485.  * buffer dimensions and allocates a bo for the hiz buffer.
  1486.  */
  1487. static struct intel_miptree_aux_buffer *
  1488. intel_gen7_hiz_buf_create(struct brw_context *brw,
  1489.                           struct intel_mipmap_tree *mt)
  1490. {
  1491.    unsigned z_width = mt->logical_width0;
  1492.    unsigned z_height = mt->logical_height0;
  1493.    const unsigned z_depth = MAX2(mt->logical_depth0, 1);
  1494.    unsigned hz_width, hz_height;
  1495.    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
  1496.  
  1497.    if (!buf)
  1498.       return NULL;
  1499.  
  1500.    /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
  1501.     * adjustments required for Z_Height and Z_Width based on multisampling.
  1502.     */
  1503.    switch (mt->num_samples) {
  1504.    case 0:
  1505.    case 1:
  1506.       break;
  1507.    case 2:
  1508.    case 4:
  1509.       z_width *= 2;
  1510.       z_height *= 2;
  1511.       break;
  1512.    case 8:
  1513.       z_width *= 4;
  1514.       z_height *= 2;
  1515.       break;
  1516.    default:
  1517.       unreachable("unsupported sample count");
  1518.    }
  1519.  
  1520.    const unsigned vertical_align = 8; /* 'j' in the docs */
  1521.    const unsigned H0 = z_height;
  1522.    const unsigned h0 = ALIGN(H0, vertical_align);
  1523.    const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
  1524.    const unsigned Z0 = z_depth;
  1525.  
  1526.    /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
  1527.    hz_width = ALIGN(z_width, 16);
  1528.  
  1529.    if (mt->target == GL_TEXTURE_3D) {
  1530.       unsigned H_i = H0;
  1531.       unsigned Z_i = Z0;
  1532.       hz_height = 0;
  1533.       for (int level = mt->first_level; level <= mt->last_level; ++level) {
  1534.          unsigned h_i = ALIGN(H_i, vertical_align);
  1535.          /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
  1536.          hz_height += h_i * Z_i;
  1537.          H_i = minify(H_i, 1);
  1538.          Z_i = minify(Z_i, 1);
  1539.       }
  1540.       /* HZ_Height =
  1541.        *    (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i)))
  1542.        */
  1543.       hz_height = DIV_ROUND_UP(hz_height, 2);
  1544.    } else {
  1545.       const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align);
  1546.       if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
  1547.           mt->target == GL_TEXTURE_CUBE_MAP) {
  1548.          /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth * 6/2) /8 ) * 8 */
  1549.          hz_height = DIV_ROUND_UP(hz_qpitch * Z0 * 6, 2 * 8) * 8;
  1550.       } else {
  1551.          /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
  1552.          hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
  1553.       }
  1554.    }
  1555.  
  1556.    unsigned long pitch;
  1557.    uint32_t tiling = I915_TILING_Y;
  1558.    buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
  1559.                                       hz_width, hz_height, 1,
  1560.                                       &tiling, &pitch,
  1561.                                       BO_ALLOC_FOR_RENDER);
  1562.    if (!buf->bo) {
  1563.       free(buf);
  1564.       return NULL;
  1565.    } else if (tiling != I915_TILING_Y) {
  1566.       drm_intel_bo_unreference(buf->bo);
  1567.       free(buf);
  1568.       return NULL;
  1569.    }
  1570.  
  1571.    buf->pitch = pitch;
  1572.  
  1573.    return buf;
  1574. }
  1575.  
  1576.  
  1577. /**
  1578.  * Helper for intel_miptree_alloc_hiz() that determines the required hiz
  1579.  * buffer dimensions and allocates a bo for the hiz buffer.
  1580.  */
  1581. static struct intel_miptree_aux_buffer *
  1582. intel_gen8_hiz_buf_create(struct brw_context *brw,
  1583.                           struct intel_mipmap_tree *mt)
  1584. {
  1585.    unsigned z_width = mt->logical_width0;
  1586.    unsigned z_height = mt->logical_height0;
  1587.    const unsigned z_depth = MAX2(mt->logical_depth0, 1);
  1588.    unsigned hz_width, hz_height;
  1589.    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
  1590.  
  1591.    if (!buf)
  1592.       return NULL;
  1593.  
  1594.    /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
  1595.     * adjustments required for Z_Height and Z_Width based on multisampling.
  1596.     */
  1597.    switch (mt->num_samples) {
  1598.    case 0:
  1599.    case 1:
  1600.       break;
  1601.    case 2:
  1602.    case 4:
  1603.       z_width *= 2;
  1604.       z_height *= 2;
  1605.       break;
  1606.    case 8:
  1607.       z_width *= 4;
  1608.       z_height *= 2;
  1609.       break;
  1610.    default:
  1611.       unreachable("unsupported sample count");
  1612.    }
  1613.  
  1614.    const unsigned vertical_align = 8; /* 'j' in the docs */
  1615.    const unsigned H0 = z_height;
  1616.    const unsigned h0 = ALIGN(H0, vertical_align);
  1617.    const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
  1618.    const unsigned Z0 = z_depth;
  1619.  
  1620.    /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
  1621.    hz_width = ALIGN(z_width, 16);
  1622.  
  1623.    unsigned H_i = H0;
  1624.    unsigned Z_i = Z0;
  1625.    unsigned sum_h_i = 0;
  1626.    unsigned hz_height_3d_sum = 0;
  1627.    for (int level = mt->first_level; level <= mt->last_level; ++level) {
  1628.       unsigned i = level - mt->first_level;
  1629.       unsigned h_i = ALIGN(H_i, vertical_align);
  1630.       /* sum(i=2 to m; h_i) */
  1631.       if (i >= 2) {
  1632.          sum_h_i += h_i;
  1633.       }
  1634.       /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
  1635.       hz_height_3d_sum += h_i * Z_i;
  1636.       H_i = minify(H_i, 1);
  1637.       Z_i = minify(Z_i, 1);
  1638.    }
  1639.    /* HZ_QPitch = h0 + max(h1, sum(i=2 to m; h_i)) */
  1640.    buf->qpitch = h0 + MAX2(h1, sum_h_i);
  1641.  
  1642.    if (mt->target == GL_TEXTURE_3D) {
  1643.       /* (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
  1644.       hz_height = DIV_ROUND_UP(hz_height_3d_sum, 2);
  1645.    } else {
  1646.       /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */
  1647.       hz_height = DIV_ROUND_UP(buf->qpitch, 2 * 8) * 8 * Z0;
  1648.       if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
  1649.           mt->target == GL_TEXTURE_CUBE_MAP) {
  1650.          /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * 6 * Z_Depth
  1651.           *
  1652.           * We can can just take our hz_height calculation from above, and
  1653.           * multiply by 6 for the cube map and cube map array types.
  1654.           */
  1655.          hz_height *= 6;
  1656.       }
  1657.    }
  1658.  
  1659.    unsigned long pitch;
  1660.    uint32_t tiling = I915_TILING_Y;
  1661.    buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
  1662.                                       hz_width, hz_height, 1,
  1663.                                       &tiling, &pitch,
  1664.                                       BO_ALLOC_FOR_RENDER);
  1665.    if (!buf->bo) {
  1666.       free(buf);
  1667.       return NULL;
  1668.    } else if (tiling != I915_TILING_Y) {
  1669.       drm_intel_bo_unreference(buf->bo);
  1670.       free(buf);
  1671.       return NULL;
  1672.    }
  1673.  
  1674.    buf->pitch = pitch;
  1675.  
  1676.    return buf;
  1677. }
  1678.  
  1679.  
  1680. static struct intel_miptree_aux_buffer *
  1681. intel_hiz_miptree_buf_create(struct brw_context *brw,
  1682.                              struct intel_mipmap_tree *mt)
  1683. {
  1684.    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
  1685.    const bool force_all_slices_at_each_lod = brw->gen == 6;
  1686.  
  1687.    if (!buf)
  1688.       return NULL;
  1689.  
  1690.    buf->mt = intel_miptree_create(brw,
  1691.                                   mt->target,
  1692.                                   mt->format,
  1693.                                   mt->first_level,
  1694.                                   mt->last_level,
  1695.                                   mt->logical_width0,
  1696.                                   mt->logical_height0,
  1697.                                   mt->logical_depth0,
  1698.                                   true,
  1699.                                   mt->num_samples,
  1700.                                   INTEL_MIPTREE_TILING_ANY,
  1701.                                   force_all_slices_at_each_lod);
  1702.    if (!buf->mt) {
  1703.       free(buf);
  1704.       return NULL;
  1705.    }
  1706.  
  1707.    buf->bo = buf->mt->bo;
  1708.    buf->pitch = buf->mt->pitch;
  1709.    buf->qpitch = buf->mt->qpitch;
  1710.  
  1711.    return buf;
  1712. }
  1713.  
  1714. bool
  1715. intel_miptree_wants_hiz_buffer(struct brw_context *brw,
  1716.                                struct intel_mipmap_tree *mt)
  1717. {
  1718.    if (!brw->has_hiz)
  1719.       return false;
  1720.  
  1721.    if (mt->hiz_buf != NULL)
  1722.       return false;
  1723.  
  1724.    if (mt->disable_aux_buffers)
  1725.       return false;
  1726.  
  1727.    switch (mt->format) {
  1728.    case MESA_FORMAT_Z_FLOAT32:
  1729.    case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
  1730.    case MESA_FORMAT_Z24_UNORM_X8_UINT:
  1731.    case MESA_FORMAT_Z24_UNORM_S8_UINT:
  1732.    case MESA_FORMAT_Z_UNORM16:
  1733.       return true;
  1734.    default:
  1735.       return false;
  1736.    }
  1737. }
  1738.  
  1739. bool
  1740. intel_miptree_alloc_hiz(struct brw_context *brw,
  1741.                         struct intel_mipmap_tree *mt)
  1742. {
  1743.    assert(mt->hiz_buf == NULL);
  1744.    assert(!mt->disable_aux_buffers);
  1745.  
  1746.    if (brw->gen == 7) {
  1747.       mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt);
  1748.    } else if (brw->gen >= 8) {
  1749.       mt->hiz_buf = intel_gen8_hiz_buf_create(brw, mt);
  1750.    } else {
  1751.       mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);
  1752.    }
  1753.  
  1754.    if (!mt->hiz_buf)
  1755.       return false;
  1756.  
  1757.    /* Mark that all slices need a HiZ resolve. */
  1758.    for (int level = mt->first_level; level <= mt->last_level; ++level) {
  1759.       if (!intel_miptree_level_enable_hiz(brw, mt, level))
  1760.          continue;
  1761.  
  1762.       for (int layer = 0; layer < mt->level[level].depth; ++layer) {
  1763.          struct intel_resolve_map *m = malloc(sizeof(struct intel_resolve_map));
  1764.          exec_node_init(&m->link);
  1765.          m->level = level;
  1766.          m->layer = layer;
  1767.          m->need = GEN6_HIZ_OP_HIZ_RESOLVE;
  1768.  
  1769.          exec_list_push_tail(&mt->hiz_map, &m->link);
  1770.       }
  1771.    }
  1772.  
  1773.    return true;
  1774. }
  1775.  
  1776. /**
  1777.  * Does the miptree slice have hiz enabled?
  1778.  */
  1779. bool
  1780. intel_miptree_level_has_hiz(struct intel_mipmap_tree *mt, uint32_t level)
  1781. {
  1782.    intel_miptree_check_level_layer(mt, level, 0);
  1783.    return mt->level[level].has_hiz;
  1784. }
  1785.  
  1786. void
  1787. intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
  1788.                                           uint32_t level,
  1789.                                           uint32_t layer)
  1790. {
  1791.    if (!intel_miptree_level_has_hiz(mt, level))
  1792.       return;
  1793.  
  1794.    intel_resolve_map_set(&mt->hiz_map,
  1795.                          level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
  1796. }
  1797.  
  1798.  
  1799. void
  1800. intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
  1801.                                             uint32_t level,
  1802.                                             uint32_t layer)
  1803. {
  1804.    if (!intel_miptree_level_has_hiz(mt, level))
  1805.       return;
  1806.  
  1807.    intel_resolve_map_set(&mt->hiz_map,
  1808.                          level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
  1809. }
  1810.  
  1811. void
  1812. intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt,
  1813.                                                 uint32_t level)
  1814. {
  1815.    uint32_t layer;
  1816.    uint32_t end_layer = mt->level[level].depth;
  1817.  
  1818.    for (layer = 0; layer < end_layer; layer++) {
  1819.       intel_miptree_slice_set_needs_depth_resolve(mt, level, layer);
  1820.    }
  1821. }
  1822.  
  1823. static bool
  1824. intel_miptree_slice_resolve(struct brw_context *brw,
  1825.                             struct intel_mipmap_tree *mt,
  1826.                             uint32_t level,
  1827.                             uint32_t layer,
  1828.                             enum gen6_hiz_op need)
  1829. {
  1830.    intel_miptree_check_level_layer(mt, level, layer);
  1831.  
  1832.    struct intel_resolve_map *item =
  1833.          intel_resolve_map_get(&mt->hiz_map, level, layer);
  1834.  
  1835.    if (!item || item->need != need)
  1836.       return false;
  1837.  
  1838.    intel_hiz_exec(brw, mt, level, layer, need);
  1839.    intel_resolve_map_remove(item);
  1840.    return true;
  1841. }
  1842.  
  1843. bool
  1844. intel_miptree_slice_resolve_hiz(struct brw_context *brw,
  1845.                                 struct intel_mipmap_tree *mt,
  1846.                                 uint32_t level,
  1847.                                 uint32_t layer)
  1848. {
  1849.    return intel_miptree_slice_resolve(brw, mt, level, layer,
  1850.                                       GEN6_HIZ_OP_HIZ_RESOLVE);
  1851. }
  1852.  
  1853. bool
  1854. intel_miptree_slice_resolve_depth(struct brw_context *brw,
  1855.                                   struct intel_mipmap_tree *mt,
  1856.                                   uint32_t level,
  1857.                                   uint32_t layer)
  1858. {
  1859.    return intel_miptree_slice_resolve(brw, mt, level, layer,
  1860.                                       GEN6_HIZ_OP_DEPTH_RESOLVE);
  1861. }
  1862.  
  1863. static bool
  1864. intel_miptree_all_slices_resolve(struct brw_context *brw,
  1865.                                  struct intel_mipmap_tree *mt,
  1866.                                  enum gen6_hiz_op need)
  1867. {
  1868.    bool did_resolve = false;
  1869.  
  1870.    foreach_list_typed_safe(struct intel_resolve_map, map, link, &mt->hiz_map) {
  1871.       if (map->need != need)
  1872.          continue;
  1873.  
  1874.       intel_hiz_exec(brw, mt, map->level, map->layer, need);
  1875.       intel_resolve_map_remove(map);
  1876.       did_resolve = true;
  1877.    }
  1878.  
  1879.    return did_resolve;
  1880. }
  1881.  
  1882. bool
  1883. intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
  1884.                                      struct intel_mipmap_tree *mt)
  1885. {
  1886.    return intel_miptree_all_slices_resolve(brw, mt,
  1887.                                            GEN6_HIZ_OP_HIZ_RESOLVE);
  1888. }
  1889.  
  1890. bool
  1891. intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
  1892.                                        struct intel_mipmap_tree *mt)
  1893. {
  1894.    return intel_miptree_all_slices_resolve(brw, mt,
  1895.                                            GEN6_HIZ_OP_DEPTH_RESOLVE);
  1896. }
  1897.  
  1898.  
  1899. void
  1900. intel_miptree_resolve_color(struct brw_context *brw,
  1901.                             struct intel_mipmap_tree *mt)
  1902. {
  1903.    switch (mt->fast_clear_state) {
  1904.    case INTEL_FAST_CLEAR_STATE_NO_MCS:
  1905.    case INTEL_FAST_CLEAR_STATE_RESOLVED:
  1906.       /* No resolve needed */
  1907.       break;
  1908.    case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
  1909.    case INTEL_FAST_CLEAR_STATE_CLEAR:
  1910.       /* Fast color clear resolves only make sense for non-MSAA buffers. */
  1911.       if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE)
  1912.          brw_meta_resolve_color(brw, mt);
  1913.       break;
  1914.    }
  1915. }
  1916.  
  1917.  
  1918. /**
  1919.  * Make it possible to share the BO backing the given miptree with another
  1920.  * process or another miptree.
  1921.  *
  1922.  * Fast color clears are unsafe with shared buffers, so we need to resolve and
  1923.  * then discard the MCS buffer, if present.  We also set the fast_clear_state
  1924.  * to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
  1925.  * allocated in the future.
  1926.  */
  1927. void
  1928. intel_miptree_make_shareable(struct brw_context *brw,
  1929.                              struct intel_mipmap_tree *mt)
  1930. {
  1931.    /* MCS buffers are also used for multisample buffers, but we can't resolve
  1932.     * away a multisample MCS buffer because it's an integral part of how the
  1933.     * pixel data is stored.  Fortunately this code path should never be
  1934.     * reached for multisample buffers.
  1935.     */
  1936.    assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
  1937.  
  1938.    if (mt->mcs_mt) {
  1939.       intel_miptree_resolve_color(brw, mt);
  1940.       intel_miptree_release(&mt->mcs_mt);
  1941.       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
  1942.    }
  1943. }
  1944.  
  1945.  
  1946. /**
  1947.  * \brief Get pointer offset into stencil buffer.
  1948.  *
  1949.  * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
  1950.  * must decode the tile's layout in software.
  1951.  *
  1952.  * See
  1953.  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
  1954.  *     Format.
  1955.  *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
  1956.  *
  1957.  * Even though the returned offset is always positive, the return type is
  1958.  * signed due to
  1959.  *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
  1960.  *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
  1961.  */
  1962. static intptr_t
  1963. intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
  1964. {
  1965.    uint32_t tile_size = 4096;
  1966.    uint32_t tile_width = 64;
  1967.    uint32_t tile_height = 64;
  1968.    uint32_t row_size = 64 * stride;
  1969.  
  1970.    uint32_t tile_x = x / tile_width;
  1971.    uint32_t tile_y = y / tile_height;
  1972.  
  1973.    /* The byte's address relative to the tile's base addres. */
  1974.    uint32_t byte_x = x % tile_width;
  1975.    uint32_t byte_y = y % tile_height;
  1976.  
  1977.    uintptr_t u = tile_y * row_size
  1978.                + tile_x * tile_size
  1979.                + 512 * (byte_x / 8)
  1980.                +  64 * (byte_y / 8)
  1981.                +  32 * ((byte_y / 4) % 2)
  1982.                +  16 * ((byte_x / 4) % 2)
  1983.                +   8 * ((byte_y / 2) % 2)
  1984.                +   4 * ((byte_x / 2) % 2)
  1985.                +   2 * (byte_y % 2)
  1986.                +   1 * (byte_x % 2);
  1987.  
  1988.    if (swizzled) {
  1989.       /* adjust for bit6 swizzling */
  1990.       if (((byte_x / 8) % 2) == 1) {
  1991.          if (((byte_y / 8) % 2) == 0) {
  1992.             u += 64;
  1993.          } else {
  1994.             u -= 64;
  1995.          }
  1996.       }
  1997.    }
  1998.  
  1999.    return u;
  2000. }
  2001.  
  2002. void
  2003. intel_miptree_updownsample(struct brw_context *brw,
  2004.                            struct intel_mipmap_tree *src,
  2005.                            struct intel_mipmap_tree *dst)
  2006. {
  2007.    if (brw->gen < 8) {
  2008.       brw_blorp_blit_miptrees(brw,
  2009.                               src, 0 /* level */, 0 /* layer */, src->format,
  2010.                               dst, 0 /* level */, 0 /* layer */, dst->format,
  2011.                               0, 0,
  2012.                               src->logical_width0, src->logical_height0,
  2013.                               0, 0,
  2014.                               dst->logical_width0, dst->logical_height0,
  2015.                               GL_NEAREST, false, false /*mirror x, y*/);
  2016.    } else if (src->format == MESA_FORMAT_S_UINT8) {
  2017.       brw_meta_stencil_updownsample(brw, src, dst);
  2018.    } else {
  2019.       brw_meta_updownsample(brw, src, dst);
  2020.    }
  2021.  
  2022.    if (src->stencil_mt) {
  2023.       if (brw->gen >= 8) {
  2024.          brw_meta_stencil_updownsample(brw, src->stencil_mt, dst);
  2025.          return;
  2026.       }
  2027.  
  2028.       brw_blorp_blit_miptrees(brw,
  2029.                               src->stencil_mt, 0 /* level */, 0 /* layer */,
  2030.                               src->stencil_mt->format,
  2031.                               dst->stencil_mt, 0 /* level */, 0 /* layer */,
  2032.                               dst->stencil_mt->format,
  2033.                               0, 0,
  2034.                               src->logical_width0, src->logical_height0,
  2035.                               0, 0,
  2036.                               dst->logical_width0, dst->logical_height0,
  2037.                               GL_NEAREST, false, false /*mirror x, y*/);
  2038.    }
  2039. }
  2040.  
  2041. void *
  2042. intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
  2043. {
  2044.    /* CPU accesses to color buffers don't understand fast color clears, so
  2045.     * resolve any pending fast color clears before we map.
  2046.     */
  2047.    intel_miptree_resolve_color(brw, mt);
  2048.  
  2049.    drm_intel_bo *bo = mt->bo;
  2050.  
  2051.    if (drm_intel_bo_references(brw->batch.bo, bo))
  2052.       intel_batchbuffer_flush(brw);
  2053.  
  2054.    if (mt->tiling != I915_TILING_NONE)
  2055.       brw_bo_map_gtt(brw, bo, "miptree");
  2056.    else
  2057.       brw_bo_map(brw, bo, true, "miptree");
  2058.  
  2059.    return bo->virtual;
  2060. }
  2061.  
  2062. void
  2063. intel_miptree_unmap_raw(struct brw_context *brw,
  2064.                         struct intel_mipmap_tree *mt)
  2065. {
  2066.    drm_intel_bo_unmap(mt->bo);
  2067. }
  2068.  
  2069. static void
  2070. intel_miptree_map_gtt(struct brw_context *brw,
  2071.                       struct intel_mipmap_tree *mt,
  2072.                       struct intel_miptree_map *map,
  2073.                       unsigned int level, unsigned int slice)
  2074. {
  2075.    unsigned int bw, bh;
  2076.    void *base;
  2077.    unsigned int image_x, image_y;
  2078.    intptr_t x = map->x;
  2079.    intptr_t y = map->y;
  2080.  
  2081.    /* For compressed formats, the stride is the number of bytes per
  2082.     * row of blocks.  intel_miptree_get_image_offset() already does
  2083.     * the divide.
  2084.     */
  2085.    _mesa_get_format_block_size(mt->format, &bw, &bh);
  2086.    assert(y % bh == 0);
  2087.    y /= bh;
  2088.  
  2089.    base = intel_miptree_map_raw(brw, mt) + mt->offset;
  2090.  
  2091.    if (base == NULL)
  2092.       map->ptr = NULL;
  2093.    else {
  2094.       /* Note that in the case of cube maps, the caller must have passed the
  2095.        * slice number referencing the face.
  2096.       */
  2097.       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
  2098.       x += image_x;
  2099.       y += image_y;
  2100.  
  2101.       map->stride = mt->pitch;
  2102.       map->ptr = base + y * map->stride + x * mt->cpp;
  2103.    }
  2104.  
  2105.    DBG("%s: %d,%d %dx%d from mt %p (%s) "
  2106.        "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__,
  2107.        map->x, map->y, map->w, map->h,
  2108.        mt, _mesa_get_format_name(mt->format),
  2109.        x, y, map->ptr, map->stride);
  2110. }
  2111.  
  2112. static void
  2113. intel_miptree_unmap_gtt(struct brw_context *brw,
  2114.                         struct intel_mipmap_tree *mt,
  2115.                         struct intel_miptree_map *map,
  2116.                         unsigned int level,
  2117.                         unsigned int slice)
  2118. {
  2119.    intel_miptree_unmap_raw(brw, mt);
  2120. }
  2121.  
  2122. static void
  2123. intel_miptree_map_blit(struct brw_context *brw,
  2124.                        struct intel_mipmap_tree *mt,
  2125.                        struct intel_miptree_map *map,
  2126.                        unsigned int level, unsigned int slice)
  2127. {
  2128.    map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
  2129.                                   0, 0,
  2130.                                   map->w, map->h, 1,
  2131.                                   false, 0,
  2132.                                   INTEL_MIPTREE_TILING_NONE,
  2133.                                   false);
  2134.    if (!map->mt) {
  2135.       fprintf(stderr, "Failed to allocate blit temporary\n");
  2136.       goto fail;
  2137.    }
  2138.    map->stride = map->mt->pitch;
  2139.  
  2140.    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
  2141.     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
  2142.     * invalidate is set, since we'll be writing the whole rectangle from our
  2143.     * temporary buffer back out.
  2144.     */
  2145.    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
  2146.       if (!intel_miptree_blit(brw,
  2147.                               mt, level, slice,
  2148.                               map->x, map->y, false,
  2149.                               map->mt, 0, 0,
  2150.                               0, 0, false,
  2151.                               map->w, map->h, GL_COPY)) {
  2152.          fprintf(stderr, "Failed to blit\n");
  2153.          goto fail;
  2154.       }
  2155.    }
  2156.  
  2157.    map->ptr = intel_miptree_map_raw(brw, map->mt);
  2158.  
  2159.    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
  2160.        map->x, map->y, map->w, map->h,
  2161.        mt, _mesa_get_format_name(mt->format),
  2162.        level, slice, map->ptr, map->stride);
  2163.  
  2164.    return;
  2165.  
  2166. fail:
  2167.    intel_miptree_release(&map->mt);
  2168.    map->ptr = NULL;
  2169.    map->stride = 0;
  2170. }
  2171.  
  2172. static void
  2173. intel_miptree_unmap_blit(struct brw_context *brw,
  2174.                          struct intel_mipmap_tree *mt,
  2175.                          struct intel_miptree_map *map,
  2176.                          unsigned int level,
  2177.                          unsigned int slice)
  2178. {
  2179.    struct gl_context *ctx = &brw->ctx;
  2180.  
  2181.    intel_miptree_unmap_raw(brw, map->mt);
  2182.  
  2183.    if (map->mode & GL_MAP_WRITE_BIT) {
  2184.       bool ok = intel_miptree_blit(brw,
  2185.                                    map->mt, 0, 0,
  2186.                                    0, 0, false,
  2187.                                    mt, level, slice,
  2188.                                    map->x, map->y, false,
  2189.                                    map->w, map->h, GL_COPY);
  2190.       WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
  2191.    }
  2192.  
  2193.    intel_miptree_release(&map->mt);
  2194. }
  2195.  
  2196. /**
  2197.  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
  2198.  */
  2199. #if defined(USE_SSE41)
  2200. static void
  2201. intel_miptree_map_movntdqa(struct brw_context *brw,
  2202.                            struct intel_mipmap_tree *mt,
  2203.                            struct intel_miptree_map *map,
  2204.                            unsigned int level, unsigned int slice)
  2205. {
  2206.    assert(map->mode & GL_MAP_READ_BIT);
  2207.    assert(!(map->mode & GL_MAP_WRITE_BIT));
  2208.  
  2209.    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
  2210.        map->x, map->y, map->w, map->h,
  2211.        mt, _mesa_get_format_name(mt->format),
  2212.        level, slice, map->ptr, map->stride);
  2213.  
  2214.    /* Map the original image */
  2215.    uint32_t image_x;
  2216.    uint32_t image_y;
  2217.    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
  2218.    image_x += map->x;
  2219.    image_y += map->y;
  2220.  
  2221.    void *src = intel_miptree_map_raw(brw, mt);
  2222.    if (!src)
  2223.       return;
  2224.    src += image_y * mt->pitch;
  2225.    src += image_x * mt->cpp;
  2226.  
  2227.    /* Due to the pixel offsets for the particular image being mapped, our
  2228.     * src pointer may not be 16-byte aligned.  However, if the pitch is
  2229.     * divisible by 16, then the amount by which it's misaligned will remain
  2230.     * consistent from row to row.
  2231.     */
  2232.    assert((mt->pitch % 16) == 0);
  2233.    const int misalignment = ((uintptr_t) src) & 15;
  2234.  
  2235.    /* Create an untiled temporary buffer for the mapping. */
  2236.    const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w);
  2237.  
  2238.    map->stride = ALIGN(misalignment + width_bytes, 16);
  2239.  
  2240.    map->buffer = _mesa_align_malloc(map->stride * map->h, 16);
  2241.    /* Offset the destination so it has the same misalignment as src. */
  2242.    map->ptr = map->buffer + misalignment;
  2243.  
  2244.    assert((((uintptr_t) map->ptr) & 15) == misalignment);
  2245.  
  2246.    for (uint32_t y = 0; y < map->h; y++) {
  2247.       void *dst_ptr = map->ptr + y * map->stride;
  2248.       void *src_ptr = src + y * mt->pitch;
  2249.  
  2250.       _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes);
  2251.    }
  2252.  
  2253.    intel_miptree_unmap_raw(brw, mt);
  2254. }
  2255.  
  2256. static void
  2257. intel_miptree_unmap_movntdqa(struct brw_context *brw,
  2258.                              struct intel_mipmap_tree *mt,
  2259.                              struct intel_miptree_map *map,
  2260.                              unsigned int level,
  2261.                              unsigned int slice)
  2262. {
  2263.    _mesa_align_free(map->buffer);
  2264.    map->buffer = NULL;
  2265.    map->ptr = NULL;
  2266. }
  2267. #endif
  2268.  
  2269. static void
  2270. intel_miptree_map_s8(struct brw_context *brw,
  2271.                      struct intel_mipmap_tree *mt,
  2272.                      struct intel_miptree_map *map,
  2273.                      unsigned int level, unsigned int slice)
  2274. {
  2275.    map->stride = map->w;
  2276.    map->buffer = map->ptr = malloc(map->stride * map->h);
  2277.    if (!map->buffer)
  2278.       return;
  2279.  
  2280.    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
  2281.     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
  2282.     * invalidate is set, since we'll be writing the whole rectangle from our
  2283.     * temporary buffer back out.
  2284.     */
  2285.    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
  2286.       uint8_t *untiled_s8_map = map->ptr;
  2287.       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
  2288.       unsigned int image_x, image_y;
  2289.  
  2290.       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
  2291.  
  2292.       for (uint32_t y = 0; y < map->h; y++) {
  2293.          for (uint32_t x = 0; x < map->w; x++) {
  2294.             ptrdiff_t offset = intel_offset_S8(mt->pitch,
  2295.                                                x + image_x + map->x,
  2296.                                                y + image_y + map->y,
  2297.                                                brw->has_swizzling);
  2298.             untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
  2299.          }
  2300.       }
  2301.  
  2302.       intel_miptree_unmap_raw(brw, mt);
  2303.  
  2304.       DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__,
  2305.           map->x, map->y, map->w, map->h,
  2306.           mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
  2307.    } else {
  2308.       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
  2309.           map->x, map->y, map->w, map->h,
  2310.           mt, map->ptr, map->stride);
  2311.    }
  2312. }
  2313.  
  2314. static void
  2315. intel_miptree_unmap_s8(struct brw_context *brw,
  2316.                        struct intel_mipmap_tree *mt,
  2317.                        struct intel_miptree_map *map,
  2318.                        unsigned int level,
  2319.                        unsigned int slice)
  2320. {
  2321.    if (map->mode & GL_MAP_WRITE_BIT) {
  2322.       unsigned int image_x, image_y;
  2323.       uint8_t *untiled_s8_map = map->ptr;
  2324.       uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
  2325.  
  2326.       intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
  2327.  
  2328.       for (uint32_t y = 0; y < map->h; y++) {
  2329.          for (uint32_t x = 0; x < map->w; x++) {
  2330.             ptrdiff_t offset = intel_offset_S8(mt->pitch,
  2331.                                                x + map->x,
  2332.                                                y + map->y,
  2333.                                                brw->has_swizzling);
  2334.             tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
  2335.          }
  2336.       }
  2337.  
  2338.       intel_miptree_unmap_raw(brw, mt);
  2339.    }
  2340.  
  2341.    free(map->buffer);
  2342. }
  2343.  
  2344. static void
  2345. intel_miptree_map_etc(struct brw_context *brw,
  2346.                       struct intel_mipmap_tree *mt,
  2347.                       struct intel_miptree_map *map,
  2348.                       unsigned int level,
  2349.                       unsigned int slice)
  2350. {
  2351.    assert(mt->etc_format != MESA_FORMAT_NONE);
  2352.    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
  2353.       assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
  2354.    }
  2355.  
  2356.    assert(map->mode & GL_MAP_WRITE_BIT);
  2357.    assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
  2358.  
  2359.    map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
  2360.    map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
  2361.                                                 map->w, map->h, 1));
  2362.    map->ptr = map->buffer;
  2363. }
  2364.  
  2365. static void
  2366. intel_miptree_unmap_etc(struct brw_context *brw,
  2367.                         struct intel_mipmap_tree *mt,
  2368.                         struct intel_miptree_map *map,
  2369.                         unsigned int level,
  2370.                         unsigned int slice)
  2371. {
  2372.    uint32_t image_x;
  2373.    uint32_t image_y;
  2374.    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
  2375.  
  2376.    image_x += map->x;
  2377.    image_y += map->y;
  2378.  
  2379.    uint8_t *dst = intel_miptree_map_raw(brw, mt)
  2380.                 + image_y * mt->pitch
  2381.                 + image_x * mt->cpp;
  2382.  
  2383.    if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
  2384.       _mesa_etc1_unpack_rgba8888(dst, mt->pitch,
  2385.                                  map->ptr, map->stride,
  2386.                                  map->w, map->h);
  2387.    else
  2388.       _mesa_unpack_etc2_format(dst, mt->pitch,
  2389.                                map->ptr, map->stride,
  2390.                                map->w, map->h, mt->etc_format);
  2391.  
  2392.    intel_miptree_unmap_raw(brw, mt);
  2393.    free(map->buffer);
  2394. }
  2395.  
  2396. /**
  2397.  * Mapping function for packed depth/stencil miptrees backed by real separate
  2398.  * miptrees for depth and stencil.
  2399.  *
  2400.  * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
  2401.  * separate from the depth buffer.  Yet at the GL API level, we have to expose
  2402.  * packed depth/stencil textures and FBO attachments, and Mesa core expects to
  2403.  * be able to map that memory for texture storage and glReadPixels-type
  2404.  * operations.  We give Mesa core that access by mallocing a temporary and
  2405.  * copying the data between the actual backing store and the temporary.
  2406.  */
  2407. static void
  2408. intel_miptree_map_depthstencil(struct brw_context *brw,
  2409.                                struct intel_mipmap_tree *mt,
  2410.                                struct intel_miptree_map *map,
  2411.                                unsigned int level, unsigned int slice)
  2412. {
  2413.    struct intel_mipmap_tree *z_mt = mt;
  2414.    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
  2415.    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
  2416.    int packed_bpp = map_z32f_x24s8 ? 8 : 4;
  2417.  
  2418.    map->stride = map->w * packed_bpp;
  2419.    map->buffer = map->ptr = malloc(map->stride * map->h);
  2420.    if (!map->buffer)
  2421.       return;
  2422.  
  2423.    /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
  2424.     * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
  2425.     * invalidate is set, since we'll be writing the whole rectangle from our
  2426.     * temporary buffer back out.
  2427.     */
  2428.    if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
  2429.       uint32_t *packed_map = map->ptr;
  2430.       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
  2431.       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
  2432.       unsigned int s_image_x, s_image_y;
  2433.       unsigned int z_image_x, z_image_y;
  2434.  
  2435.       intel_miptree_get_image_offset(s_mt, level, slice,
  2436.                                      &s_image_x, &s_image_y);
  2437.       intel_miptree_get_image_offset(z_mt, level, slice,
  2438.                                      &z_image_x, &z_image_y);
  2439.  
  2440.       for (uint32_t y = 0; y < map->h; y++) {
  2441.          for (uint32_t x = 0; x < map->w; x++) {
  2442.             int map_x = map->x + x, map_y = map->y + y;
  2443.             ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch,
  2444.                                                  map_x + s_image_x,
  2445.                                                  map_y + s_image_y,
  2446.                                                  brw->has_swizzling);
  2447.             ptrdiff_t z_offset = ((map_y + z_image_y) *
  2448.                                   (z_mt->pitch / 4) +
  2449.                                   (map_x + z_image_x));
  2450.             uint8_t s = s_map[s_offset];
  2451.             uint32_t z = z_map[z_offset];
  2452.  
  2453.             if (map_z32f_x24s8) {
  2454.                packed_map[(y * map->w + x) * 2 + 0] = z;
  2455.                packed_map[(y * map->w + x) * 2 + 1] = s;
  2456.             } else {
  2457.                packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
  2458.             }
  2459.          }
  2460.       }
  2461.  
  2462.       intel_miptree_unmap_raw(brw, s_mt);
  2463.       intel_miptree_unmap_raw(brw, z_mt);
  2464.  
  2465.       DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
  2466.           __func__,
  2467.           map->x, map->y, map->w, map->h,
  2468.           z_mt, map->x + z_image_x, map->y + z_image_y,
  2469.           s_mt, map->x + s_image_x, map->y + s_image_y,
  2470.           map->ptr, map->stride);
  2471.    } else {
  2472.       DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__,
  2473.           map->x, map->y, map->w, map->h,
  2474.           mt, map->ptr, map->stride);
  2475.    }
  2476. }
  2477.  
  2478. static void
  2479. intel_miptree_unmap_depthstencil(struct brw_context *brw,
  2480.                                  struct intel_mipmap_tree *mt,
  2481.                                  struct intel_miptree_map *map,
  2482.                                  unsigned int level,
  2483.                                  unsigned int slice)
  2484. {
  2485.    struct intel_mipmap_tree *z_mt = mt;
  2486.    struct intel_mipmap_tree *s_mt = mt->stencil_mt;
  2487.    bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z_FLOAT32;
  2488.  
  2489.    if (map->mode & GL_MAP_WRITE_BIT) {
  2490.       uint32_t *packed_map = map->ptr;
  2491.       uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
  2492.       uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
  2493.       unsigned int s_image_x, s_image_y;
  2494.       unsigned int z_image_x, z_image_y;
  2495.  
  2496.       intel_miptree_get_image_offset(s_mt, level, slice,
  2497.                                      &s_image_x, &s_image_y);
  2498.       intel_miptree_get_image_offset(z_mt, level, slice,
  2499.                                      &z_image_x, &z_image_y);
  2500.  
  2501.       for (uint32_t y = 0; y < map->h; y++) {
  2502.          for (uint32_t x = 0; x < map->w; x++) {
  2503.             ptrdiff_t s_offset = intel_offset_S8(s_mt->pitch,
  2504.                                                  x + s_image_x + map->x,
  2505.                                                  y + s_image_y + map->y,
  2506.                                                  brw->has_swizzling);
  2507.             ptrdiff_t z_offset = ((y + z_image_y + map->y) *
  2508.                                   (z_mt->pitch / 4) +
  2509.                                   (x + z_image_x + map->x));
  2510.  
  2511.             if (map_z32f_x24s8) {
  2512.                z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
  2513.                s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
  2514.             } else {
  2515.                uint32_t packed = packed_map[y * map->w + x];
  2516.                s_map[s_offset] = packed >> 24;
  2517.                z_map[z_offset] = packed;
  2518.             }
  2519.          }
  2520.       }
  2521.  
  2522.       intel_miptree_unmap_raw(brw, s_mt);
  2523.       intel_miptree_unmap_raw(brw, z_mt);
  2524.  
  2525.       DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
  2526.           __func__,
  2527.           map->x, map->y, map->w, map->h,
  2528.           z_mt, _mesa_get_format_name(z_mt->format),
  2529.           map->x + z_image_x, map->y + z_image_y,
  2530.           s_mt, map->x + s_image_x, map->y + s_image_y,
  2531.           map->ptr, map->stride);
  2532.    }
  2533.  
  2534.    free(map->buffer);
  2535. }
  2536.  
  2537. /**
  2538.  * Create and attach a map to the miptree at (level, slice). Return the
  2539.  * attached map.
  2540.  */
  2541. static struct intel_miptree_map*
  2542. intel_miptree_attach_map(struct intel_mipmap_tree *mt,
  2543.                          unsigned int level,
  2544.                          unsigned int slice,
  2545.                          unsigned int x,
  2546.                          unsigned int y,
  2547.                          unsigned int w,
  2548.                          unsigned int h,
  2549.                          GLbitfield mode)
  2550. {
  2551.    struct intel_miptree_map *map = calloc(1, sizeof(*map));
  2552.  
  2553.    if (!map)
  2554.       return NULL;
  2555.  
  2556.    assert(mt->level[level].slice[slice].map == NULL);
  2557.    mt->level[level].slice[slice].map = map;
  2558.  
  2559.    map->mode = mode;
  2560.    map->x = x;
  2561.    map->y = y;
  2562.    map->w = w;
  2563.    map->h = h;
  2564.  
  2565.    return map;
  2566. }
  2567.  
  2568. /**
  2569.  * Release the map at (level, slice).
  2570.  */
  2571. static void
  2572. intel_miptree_release_map(struct intel_mipmap_tree *mt,
  2573.                          unsigned int level,
  2574.                          unsigned int slice)
  2575. {
  2576.    struct intel_miptree_map **map;
  2577.  
  2578.    map = &mt->level[level].slice[slice].map;
  2579.    free(*map);
  2580.    *map = NULL;
  2581. }
  2582.  
  2583. static bool
  2584. can_blit_slice(struct intel_mipmap_tree *mt,
  2585.                unsigned int level, unsigned int slice)
  2586. {
  2587.    uint32_t image_x;
  2588.    uint32_t image_y;
  2589.    intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
  2590.    if (image_x >= 32768 || image_y >= 32768)
  2591.       return false;
  2592.  
  2593.    /* See intel_miptree_blit() for details on the 32k pitch limit. */
  2594.    if (mt->pitch >= 32768)
  2595.       return false;
  2596.  
  2597.    return true;
  2598. }
  2599.  
  2600. static bool
  2601. use_intel_mipree_map_blit(struct brw_context *brw,
  2602.                           struct intel_mipmap_tree *mt,
  2603.                           GLbitfield mode,
  2604.                           unsigned int level,
  2605.                           unsigned int slice)
  2606. {
  2607.    if (brw->has_llc &&
  2608.       /* It's probably not worth swapping to the blit ring because of
  2609.        * all the overhead involved.
  2610.        */
  2611.        !(mode & GL_MAP_WRITE_BIT) &&
  2612.        !mt->compressed &&
  2613.        (mt->tiling == I915_TILING_X ||
  2614.         /* Prior to Sandybridge, the blitter can't handle Y tiling */
  2615.         (brw->gen >= 6 && mt->tiling == I915_TILING_Y)) &&
  2616.        can_blit_slice(mt, level, slice))
  2617.       return true;
  2618.  
  2619.    if (mt->tiling != I915_TILING_NONE &&
  2620.        mt->bo->size >= brw->max_gtt_map_object_size) {
  2621.       assert(can_blit_slice(mt, level, slice));
  2622.       return true;
  2623.    }
  2624.  
  2625.    return false;
  2626. }
  2627.  
  2628. /**
  2629.  * Parameter \a out_stride has type ptrdiff_t not because the buffer stride may
  2630.  * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
  2631.  * arithmetic overflow.
  2632.  *
  2633.  * If you call this function and use \a out_stride, then you're doing pointer
  2634.  * arithmetic on \a out_ptr. The type of \a out_stride doesn't prevent all
  2635.  * bugs.  The caller must still take care to avoid 32-bit overflow errors in
  2636.  * all arithmetic expressions that contain buffer offsets and pixel sizes,
  2637.  * which usually have type uint32_t or GLuint.
  2638.  */
  2639. void
  2640. intel_miptree_map(struct brw_context *brw,
  2641.                   struct intel_mipmap_tree *mt,
  2642.                   unsigned int level,
  2643.                   unsigned int slice,
  2644.                   unsigned int x,
  2645.                   unsigned int y,
  2646.                   unsigned int w,
  2647.                   unsigned int h,
  2648.                   GLbitfield mode,
  2649.                   void **out_ptr,
  2650.                   ptrdiff_t *out_stride)
  2651. {
  2652.    struct intel_miptree_map *map;
  2653.  
  2654.    assert(mt->num_samples <= 1);
  2655.  
  2656.    map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
  2657.    if (!map){
  2658.       *out_ptr = NULL;
  2659.       *out_stride = 0;
  2660.       return;
  2661.    }
  2662.  
  2663.    intel_miptree_slice_resolve_depth(brw, mt, level, slice);
  2664.    if (map->mode & GL_MAP_WRITE_BIT) {
  2665.       intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
  2666.    }
  2667.  
  2668.    if (mt->format == MESA_FORMAT_S_UINT8) {
  2669.       intel_miptree_map_s8(brw, mt, map, level, slice);
  2670.    } else if (mt->etc_format != MESA_FORMAT_NONE &&
  2671.               !(mode & BRW_MAP_DIRECT_BIT)) {
  2672.       intel_miptree_map_etc(brw, mt, map, level, slice);
  2673.    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
  2674.       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
  2675.    } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
  2676.       intel_miptree_map_blit(brw, mt, map, level, slice);
  2677. #if defined(USE_SSE41)
  2678.    } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) {
  2679.       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
  2680. #endif
  2681.    } else {
  2682.       intel_miptree_map_gtt(brw, mt, map, level, slice);
  2683.    }
  2684.  
  2685.    *out_ptr = map->ptr;
  2686.    *out_stride = map->stride;
  2687.  
  2688.    if (map->ptr == NULL)
  2689.       intel_miptree_release_map(mt, level, slice);
  2690. }
  2691.  
  2692. void
  2693. intel_miptree_unmap(struct brw_context *brw,
  2694.                     struct intel_mipmap_tree *mt,
  2695.                     unsigned int level,
  2696.                     unsigned int slice)
  2697. {
  2698.    struct intel_miptree_map *map = mt->level[level].slice[slice].map;
  2699.  
  2700.    assert(mt->num_samples <= 1);
  2701.  
  2702.    if (!map)
  2703.       return;
  2704.  
  2705.    DBG("%s: mt %p (%s) level %d slice %d\n", __func__,
  2706.        mt, _mesa_get_format_name(mt->format), level, slice);
  2707.  
  2708.    if (mt->format == MESA_FORMAT_S_UINT8) {
  2709.       intel_miptree_unmap_s8(brw, mt, map, level, slice);
  2710.    } else if (mt->etc_format != MESA_FORMAT_NONE &&
  2711.               !(map->mode & BRW_MAP_DIRECT_BIT)) {
  2712.       intel_miptree_unmap_etc(brw, mt, map, level, slice);
  2713.    } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
  2714.       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
  2715.    } else if (map->mt) {
  2716.       intel_miptree_unmap_blit(brw, mt, map, level, slice);
  2717. #if defined(USE_SSE41)
  2718.    } else if (map->buffer && cpu_has_sse4_1) {
  2719.       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
  2720. #endif
  2721.    } else {
  2722.       intel_miptree_unmap_gtt(brw, mt, map, level, slice);
  2723.    }
  2724.  
  2725.    intel_miptree_release_map(mt, level, slice);
  2726. }
  2727.