Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2014 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "ilo_debug.h"
  29. #include "ilo_image.h"
  30.  
  31. enum {
  32.    IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
  33.    IMAGE_TILING_X    = 1 << GEN6_TILING_X,
  34.    IMAGE_TILING_Y    = 1 << GEN6_TILING_Y,
  35.    IMAGE_TILING_W    = 1 << GEN8_TILING_W,
  36.  
  37.    IMAGE_TILING_ALL  = (IMAGE_TILING_NONE |
  38.                         IMAGE_TILING_X |
  39.                         IMAGE_TILING_Y |
  40.                         IMAGE_TILING_W)
  41. };
  42.  
  43. struct ilo_image_params {
  44.    const struct ilo_dev *dev;
  45.    const struct pipe_resource *templ;
  46.    unsigned valid_tilings;
  47.  
  48.    bool compressed;
  49.  
  50.    unsigned h0, h1;
  51.    unsigned max_x, max_y;
  52. };
  53.  
  54. static void
  55. img_get_slice_size(const struct ilo_image *img,
  56.                    const struct ilo_image_params *params,
  57.                    unsigned level, unsigned *width, unsigned *height)
  58. {
  59.    const struct pipe_resource *templ = params->templ;
  60.    unsigned w, h;
  61.  
  62.    w = u_minify(img->width0, level);
  63.    h = u_minify(img->height0, level);
  64.  
  65.    /*
  66.     * From the Sandy Bridge PRM, volume 1 part 1, page 114:
  67.     *
  68.     *     "The dimensions of the mip maps are first determined by applying the
  69.     *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
  70.     *      if necessary, they are padded out to compression block boundaries."
  71.     */
  72.    w = align(w, img->block_width);
  73.    h = align(h, img->block_height);
  74.  
  75.    /*
  76.     * From the Sandy Bridge PRM, volume 1 part 1, page 111:
  77.     *
  78.     *     "If the surface is multisampled (4x), these values must be adjusted
  79.     *      as follows before proceeding:
  80.     *
  81.     *        W_L = ceiling(W_L / 2) * 4
  82.     *        H_L = ceiling(H_L / 2) * 4"
  83.     *
  84.     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
  85.     *
  86.     *     "If the surface is multisampled and it is a depth or stencil surface
  87.     *      or Multisampled Surface StorageFormat in SURFACE_STATE is
  88.     *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
  89.     *      proceeding:
  90.     *
  91.     *        #samples  W_L =                    H_L =
  92.     *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
  93.     *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
  94.     *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
  95.     *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
  96.     *
  97.     * For interleaved samples (4x), where pixels
  98.     *
  99.     *   (x, y  ) (x+1, y  )
  100.     *   (x, y+1) (x+1, y+1)
  101.     *
  102.     * would be is occupied by
  103.     *
  104.     *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
  105.     *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
  106.     *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
  107.     *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
  108.     *
  109.     * Thus the need to
  110.     *
  111.     *   w = align(w, 2) * 2;
  112.     *   y = align(y, 2) * 2;
  113.     */
  114.    if (img->interleaved_samples) {
  115.       switch (templ->nr_samples) {
  116.       case 0:
  117.       case 1:
  118.          break;
  119.       case 2:
  120.          w = align(w, 2) * 2;
  121.          break;
  122.       case 4:
  123.          w = align(w, 2) * 2;
  124.          h = align(h, 2) * 2;
  125.          break;
  126.       case 8:
  127.          w = align(w, 2) * 4;
  128.          h = align(h, 2) * 2;
  129.          break;
  130.       case 16:
  131.          w = align(w, 2) * 4;
  132.          h = align(h, 2) * 4;
  133.          break;
  134.       default:
  135.          assert(!"unsupported sample count");
  136.          break;
  137.       }
  138.    }
  139.  
  140.    /*
  141.     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
  142.     *
  143.     *     "For separate stencil buffer, the width must be mutiplied by 2 and
  144.     *      height divided by 2..."
  145.     *
  146.     * To make things easier (for transfer), we will just double the stencil
  147.     * stride in 3DSTATE_STENCIL_BUFFER.
  148.     */
  149.    w = align(w, img->align_i);
  150.    h = align(h, img->align_j);
  151.  
  152.    *width = w;
  153.    *height = h;
  154. }
  155.  
  156. static unsigned
  157. img_get_num_layers(const struct ilo_image *img,
  158.                    const struct ilo_image_params *params)
  159. {
  160.    const struct pipe_resource *templ = params->templ;
  161.    unsigned num_layers = templ->array_size;
  162.  
  163.    /* samples of the same index are stored in a layer */
  164.    if (templ->nr_samples > 1 && !img->interleaved_samples)
  165.       num_layers *= templ->nr_samples;
  166.  
  167.    return num_layers;
  168. }
  169.  
  170. static void
  171. img_init_layer_height(struct ilo_image *img,
  172.                       struct ilo_image_params *params)
  173. {
  174.    const struct pipe_resource *templ = params->templ;
  175.    unsigned num_layers;
  176.  
  177.    if (img->walk != ILO_IMAGE_WALK_LAYER)
  178.       return;
  179.  
  180.    num_layers = img_get_num_layers(img, params);
  181.    if (num_layers <= 1)
  182.       return;
  183.  
  184.    /*
  185.     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
  186.     *
  187.     *     "The following equation is used for surface formats other than
  188.     *      compressed textures:
  189.     *
  190.     *        QPitch = (h0 + h1 + 11j)"
  191.     *
  192.     *     "The equation for compressed textures (BC* and FXT1 surface formats)
  193.     *      follows:
  194.     *
  195.     *        QPitch = (h0 + h1 + 11j) / 4"
  196.     *
  197.     *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
  198.     *      value calculated in the equation above, for every other odd Surface
  199.     *      Height starting from 1 i.e. 1,5,9,13"
  200.     *
  201.     * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
  202.     *
  203.     *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
  204.     *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
  205.     *
  206.     *        QPitch = (h0 + h1 + 12j)
  207.     *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
  208.     *
  209.     *      (There are many typos or missing words here...)"
  210.     *
  211.     * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
  212.     * the base address.  The PRM divides QPitch by 4 for compressed formats
  213.     * because the block height for those formats are 4, and it wants QPitch to
  214.     * mean the number of memory rows, as opposed to texel rows, between
  215.     * slices.  Since we use texel rows everywhere, we do not need to divide
  216.     * QPitch by 4.
  217.     */
  218.    img->walk_layer_height = params->h0 + params->h1 +
  219.       ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
  220.  
  221.    if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
  222.        img->height0 % 4 == 1)
  223.       img->walk_layer_height += 4;
  224.  
  225.    params->max_y += img->walk_layer_height * (num_layers - 1);
  226. }
  227.  
  228. static void
  229. img_init_lods(struct ilo_image *img,
  230.               struct ilo_image_params *params)
  231. {
  232.    const struct pipe_resource *templ = params->templ;
  233.    unsigned cur_x, cur_y;
  234.    unsigned lv;
  235.  
  236.    cur_x = 0;
  237.    cur_y = 0;
  238.    for (lv = 0; lv <= templ->last_level; lv++) {
  239.       unsigned lod_w, lod_h;
  240.  
  241.       img_get_slice_size(img, params, lv, &lod_w, &lod_h);
  242.  
  243.       img->lods[lv].x = cur_x;
  244.       img->lods[lv].y = cur_y;
  245.       img->lods[lv].slice_width = lod_w;
  246.       img->lods[lv].slice_height = lod_h;
  247.  
  248.       switch (img->walk) {
  249.       case ILO_IMAGE_WALK_LAYER:
  250.          /* MIPLAYOUT_BELOW */
  251.          if (lv == 1)
  252.             cur_x += lod_w;
  253.          else
  254.             cur_y += lod_h;
  255.          break;
  256.       case ILO_IMAGE_WALK_LOD:
  257.          lod_h *= img_get_num_layers(img, params);
  258.          if (lv == 1)
  259.             cur_x += lod_w;
  260.          else
  261.             cur_y += lod_h;
  262.  
  263.          /* every LOD begins at tile boundaries */
  264.          if (templ->last_level > 0) {
  265.             assert(img->format == PIPE_FORMAT_S8_UINT);
  266.             cur_x = align(cur_x, 64);
  267.             cur_y = align(cur_y, 64);
  268.          }
  269.          break;
  270.       case ILO_IMAGE_WALK_3D:
  271.          {
  272.             const unsigned num_slices = u_minify(templ->depth0, lv);
  273.             const unsigned num_slices_per_row = 1 << lv;
  274.             const unsigned num_rows =
  275.                (num_slices + num_slices_per_row - 1) / num_slices_per_row;
  276.  
  277.             lod_w *= num_slices_per_row;
  278.             lod_h *= num_rows;
  279.  
  280.             cur_y += lod_h;
  281.          }
  282.          break;
  283.       }
  284.  
  285.       if (params->max_x < img->lods[lv].x + lod_w)
  286.          params->max_x = img->lods[lv].x + lod_w;
  287.       if (params->max_y < img->lods[lv].y + lod_h)
  288.          params->max_y = img->lods[lv].y + lod_h;
  289.    }
  290.  
  291.    if (img->walk == ILO_IMAGE_WALK_LAYER) {
  292.       params->h0 = img->lods[0].slice_height;
  293.  
  294.       if (templ->last_level > 0)
  295.          params->h1 = img->lods[1].slice_height;
  296.       else
  297.          img_get_slice_size(img, params, 1, &cur_x, &params->h1);
  298.    }
  299. }
  300.  
  301. static void
  302. img_init_alignments(struct ilo_image *img,
  303.                     const struct ilo_image_params *params)
  304. {
  305.    const struct pipe_resource *templ = params->templ;
  306.  
  307.    /*
  308.     * From the Sandy Bridge PRM, volume 1 part 1, page 113:
  309.     *
  310.     *     "surface format           align_i     align_j
  311.     *      YUV 4:2:2 formats        4           *see below
  312.     *      BC1-5                    4           4
  313.     *      FXT1                     8           4
  314.     *      all other formats        4           *see below"
  315.     *
  316.     *     "- align_j = 4 for any depth buffer
  317.     *      - align_j = 2 for separate stencil buffer
  318.     *      - align_j = 4 for any render target surface is multisampled (4x)
  319.     *      - align_j = 4 for any render target surface with Surface Vertical
  320.     *        Alignment = VALIGN_4
  321.     *      - align_j = 2 for any render target surface with Surface Vertical
  322.     *        Alignment = VALIGN_2
  323.     *      - align_j = 2 for all other render target surface
  324.     *      - align_j = 2 for any sampling engine surface with Surface Vertical
  325.     *        Alignment = VALIGN_2
  326.     *      - align_j = 4 for any sampling engine surface with Surface Vertical
  327.     *        Alignment = VALIGN_4"
  328.     *
  329.     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
  330.     *
  331.     *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
  332.     *      the Surface Format is 96 bits per element (BPE)."
  333.     *
  334.     * They can be rephrased as
  335.     *
  336.     *                                  align_i        align_j
  337.     *   compressed formats             block width    block height
  338.     *   PIPE_FORMAT_S8_UINT            4              2
  339.     *   other depth/stencil formats    4              4
  340.     *   4x multisampled                4              4
  341.     *   bpp 96                         4              2
  342.     *   others                         4              2 or 4
  343.     */
  344.  
  345.    /*
  346.     * From the Ivy Bridge PRM, volume 1 part 1, page 110:
  347.     *
  348.     *     "surface defined by      surface format     align_i     align_j
  349.     *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
  350.     *                              not D16_UNORM      4           4
  351.     *      3DSTATE_STENCIL_BUFFER  N/A                8           8
  352.     *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
  353.     *                              FXT1               8           4
  354.     *                              all others         (set by SURFACE_STATE)"
  355.     *
  356.     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
  357.     *
  358.     *     "- This field (Surface Vertical Aligment) is intended to be set to
  359.     *        VALIGN_4 if the surface was rendered as a depth buffer, for a
  360.     *        multisampled (4x) render target, or for a multisampled (8x)
  361.     *        render target, since these surfaces support only alignment of 4.
  362.     *      - Use of VALIGN_4 for other surfaces is supported, but uses more
  363.     *        memory.
  364.     *      - This field must be set to VALIGN_4 for all tiled Y Render Target
  365.     *        surfaces.
  366.     *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
  367.     *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
  368.     *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
  369.     *        must be set to VALIGN_4."
  370.     *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
  371.     *
  372.     *     "- This field (Surface Horizontal Aligment) is intended to be set to
  373.     *        HALIGN_8 only if the surface was rendered as a depth buffer with
  374.     *        Z16 format or a stencil buffer, since these surfaces support only
  375.     *        alignment of 8.
  376.     *      - Use of HALIGN_8 for other surfaces is supported, but uses more
  377.     *        memory.
  378.     *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
  379.     *      - This field must be set to HALIGN_8 if the Surface Format is
  380.     *        FXT1."
  381.     *
  382.     * They can be rephrased as
  383.     *
  384.     *                                  align_i        align_j
  385.     *  compressed formats              block width    block height
  386.     *  PIPE_FORMAT_Z16_UNORM           8              4
  387.     *  PIPE_FORMAT_S8_UINT             8              8
  388.     *  other depth/stencil formats     4              4
  389.     *  2x or 4x multisampled           4 or 8         4
  390.     *  tiled Y                         4 or 8         4 (if rt)
  391.     *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
  392.     *  others                          4 or 8         2 or 4
  393.     */
  394.  
  395.    if (params->compressed) {
  396.       /* this happens to be the case */
  397.       img->align_i = img->block_width;
  398.       img->align_j = img->block_height;
  399.    } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
  400.       if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
  401.          switch (img->format) {
  402.          case PIPE_FORMAT_Z16_UNORM:
  403.             img->align_i = 8;
  404.             img->align_j = 4;
  405.             break;
  406.          case PIPE_FORMAT_S8_UINT:
  407.             img->align_i = 8;
  408.             img->align_j = 8;
  409.             break;
  410.          default:
  411.             img->align_i = 4;
  412.             img->align_j = 4;
  413.             break;
  414.          }
  415.       } else {
  416.          switch (img->format) {
  417.          case PIPE_FORMAT_S8_UINT:
  418.             img->align_i = 4;
  419.             img->align_j = 2;
  420.             break;
  421.          default:
  422.             img->align_i = 4;
  423.             img->align_j = 4;
  424.             break;
  425.          }
  426.       }
  427.    } else {
  428.       const bool valign_4 =
  429.          (templ->nr_samples > 1) ||
  430.          (ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
  431.          (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
  432.           img->tiling == GEN6_TILING_Y &&
  433.           (templ->bind & PIPE_BIND_RENDER_TARGET));
  434.  
  435.       if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
  436.           ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
  437.          assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
  438.  
  439.       img->align_i = 4;
  440.       img->align_j = (valign_4) ? 4 : 2;
  441.    }
  442.  
  443.    /*
  444.     * the fact that align i and j are multiples of block width and height
  445.     * respectively is what makes the size of the bo a multiple of the block
  446.     * size, slices start at block boundaries, and many of the computations
  447.     * work.
  448.     */
  449.    assert(img->align_i % img->block_width == 0);
  450.    assert(img->align_j % img->block_height == 0);
  451.  
  452.    /* make sure align() works */
  453.    assert(util_is_power_of_two(img->align_i) &&
  454.           util_is_power_of_two(img->align_j));
  455.    assert(util_is_power_of_two(img->block_width) &&
  456.           util_is_power_of_two(img->block_height));
  457. }
  458.  
  459. static void
  460. img_init_tiling(struct ilo_image *img,
  461.                 const struct ilo_image_params *params)
  462. {
  463.    const struct pipe_resource *templ = params->templ;
  464.    unsigned preferred_tilings = params->valid_tilings;
  465.  
  466.    /* no fencing nor BLT support */
  467.    if (preferred_tilings & ~IMAGE_TILING_W)
  468.       preferred_tilings &= ~IMAGE_TILING_W;
  469.  
  470.    if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
  471.       /*
  472.        * heuristically set a minimum width/height for enabling tiling
  473.        */
  474.       if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
  475.          preferred_tilings &= ~IMAGE_TILING_X;
  476.  
  477.       if ((img->width0 < 32 || img->height0 < 16) &&
  478.           (img->width0 < 16 || img->height0 < 32) &&
  479.           (preferred_tilings & ~IMAGE_TILING_Y))
  480.          preferred_tilings &= ~IMAGE_TILING_Y;
  481.    } else {
  482.       /* force linear if we are not sure where the texture is bound to */
  483.       if (preferred_tilings & IMAGE_TILING_NONE)
  484.          preferred_tilings &= IMAGE_TILING_NONE;
  485.    }
  486.  
  487.    /* prefer tiled over linear */
  488.    if (preferred_tilings & IMAGE_TILING_Y)
  489.       img->tiling = GEN6_TILING_Y;
  490.    else if (preferred_tilings & IMAGE_TILING_X)
  491.       img->tiling = GEN6_TILING_X;
  492.    else if (preferred_tilings & IMAGE_TILING_W)
  493.       img->tiling = GEN8_TILING_W;
  494.    else
  495.       img->tiling = GEN6_TILING_NONE;
  496. }
  497.  
  498. static void
  499. img_init_walk_gen7(struct ilo_image *img,
  500.                    const struct ilo_image_params *params)
  501. {
  502.    const struct pipe_resource *templ = params->templ;
  503.  
  504.    /*
  505.     * It is not explicitly states, but render targets are expected to be
  506.     * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
  507.     * to be IMS (samples interleaved).
  508.     *
  509.     * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
  510.     */
  511.    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
  512.       /*
  513.        * From the Ivy Bridge PRM, volume 1 part 1, page 111:
  514.        *
  515.        *     "note that the depth buffer and stencil buffer have an implied
  516.        *      value of ARYSPC_FULL"
  517.        */
  518.       img->walk = (templ->target == PIPE_TEXTURE_3D) ?
  519.          ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
  520.  
  521.       img->interleaved_samples = true;
  522.    } else {
  523.       /*
  524.        * From the Ivy Bridge PRM, volume 4 part 1, page 66:
  525.        *
  526.        *     "If Multisampled Surface Storage Format is MSFMT_MSS and Number
  527.        *      of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
  528.        *      Array Spacing) must be set to ARYSPC_LOD0."
  529.        *
  530.        * As multisampled resources are not mipmapped, we never use
  531.        * ARYSPC_FULL for them.
  532.        */
  533.       if (templ->nr_samples > 1)
  534.          assert(templ->last_level == 0);
  535.  
  536.       img->walk =
  537.          (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
  538.          (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
  539.          ILO_IMAGE_WALK_LOD;
  540.  
  541.       img->interleaved_samples = false;
  542.    }
  543. }
  544.  
  545. static void
  546. img_init_walk_gen6(struct ilo_image *img,
  547.                    const struct ilo_image_params *params)
  548. {
  549.    /*
  550.     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
  551.     *
  552.     *     "The separate stencil buffer does not support mip mapping, thus the
  553.     *      storage for LODs other than LOD 0 is not needed. The following
  554.     *      QPitch equation applies only to the separate stencil buffer:
  555.     *
  556.     *        QPitch = h_0"
  557.     *
  558.     * GEN6 does not support compact spacing otherwise.
  559.     */
  560.    img->walk =
  561.       (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
  562.       (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
  563.       ILO_IMAGE_WALK_LAYER;
  564.  
  565.    /* GEN6 supports only interleaved samples */
  566.    img->interleaved_samples = true;
  567. }
  568.  
  569. static void
  570. img_init_walk(struct ilo_image *img,
  571.               const struct ilo_image_params *params)
  572. {
  573.    if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
  574.       img_init_walk_gen7(img, params);
  575.    else
  576.       img_init_walk_gen6(img, params);
  577. }
  578.  
  579. static unsigned
  580. img_get_valid_tilings(const struct ilo_image *img,
  581.                       const struct ilo_image_params *params)
  582. {
  583.    const struct pipe_resource *templ = params->templ;
  584.    const enum pipe_format format = img->format;
  585.    unsigned valid_tilings = params->valid_tilings;
  586.  
  587.    /*
  588.     * From the Sandy Bridge PRM, volume 1 part 2, page 32:
  589.     *
  590.     *     "Display/Overlay   Y-Major not supported.
  591.     *                        X-Major required for Async Flips"
  592.     */
  593.    if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
  594.       valid_tilings &= IMAGE_TILING_X;
  595.  
  596.    /*
  597.     * From the Sandy Bridge PRM, volume 3 part 2, page 158:
  598.     *
  599.     *     "The cursor surface address must be 4K byte aligned. The cursor must
  600.     *      be in linear memory, it cannot be tiled."
  601.     */
  602.    if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
  603.       valid_tilings &= IMAGE_TILING_NONE;
  604.  
  605.    /*
  606.     * From the Sandy Bridge PRM, volume 2 part 1, page 318:
  607.     *
  608.     *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
  609.     *      Depth Buffer is not supported."
  610.     *
  611.     *     "The Depth Buffer, if tiled, must use Y-Major tiling."
  612.     *
  613.     * From the Sandy Bridge PRM, volume 1 part 2, page 22:
  614.     *
  615.     *     "W-Major Tile Format is used for separate stencil."
  616.     */
  617.    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
  618.       switch (format) {
  619.       case PIPE_FORMAT_S8_UINT:
  620.          valid_tilings &= IMAGE_TILING_W;
  621.          break;
  622.       default:
  623.          valid_tilings &= IMAGE_TILING_Y;
  624.          break;
  625.       }
  626.    }
  627.  
  628.    if (templ->bind & PIPE_BIND_RENDER_TARGET) {
  629.       /*
  630.        * From the Sandy Bridge PRM, volume 1 part 2, page 32:
  631.        *
  632.        *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
  633.        *      either TileX or Linear."
  634.        *
  635.        * From the Haswell PRM, volume 5, page 32:
  636.        *
  637.        *     "NOTE: 128 BPP format color buffer (render target) supports
  638.        *      Linear, TiledX and TiledY."
  639.        */
  640.       if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
  641.          valid_tilings &= ~IMAGE_TILING_Y;
  642.  
  643.       /*
  644.        * From the Ivy Bridge PRM, volume 4 part 1, page 63:
  645.        *
  646.        *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
  647.        *      for all tiled Y Render Target surfaces."
  648.        *
  649.        *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
  650.        */
  651.       if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
  652.           ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
  653.           img->format == PIPE_FORMAT_R32G32B32_FLOAT)
  654.          valid_tilings &= ~IMAGE_TILING_Y;
  655.  
  656.       valid_tilings &= ~IMAGE_TILING_W;
  657.    }
  658.  
  659.    if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
  660.       if (ilo_dev_gen(params->dev) < ILO_GEN(8))
  661.          valid_tilings &= ~IMAGE_TILING_W;
  662.    }
  663.  
  664.    /* no conflicting binding flags */
  665.    assert(valid_tilings);
  666.  
  667.    return valid_tilings;
  668. }
  669.  
  670. static void
  671. img_init_size_and_format(struct ilo_image *img,
  672.                          struct ilo_image_params *params)
  673. {
  674.    const struct pipe_resource *templ = params->templ;
  675.    enum pipe_format format = templ->format;
  676.    bool require_separate_stencil = false;
  677.  
  678.    img->width0 = templ->width0;
  679.    img->height0 = templ->height0;
  680.    img->depth0 = templ->depth0;
  681.    img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
  682.  
  683.    /*
  684.     * From the Sandy Bridge PRM, volume 2 part 1, page 317:
  685.     *
  686.     *     "This field (Separate Stencil Buffer Enable) must be set to the same
  687.     *      value (enabled or disabled) as Hierarchical Depth Buffer Enable."
  688.     *
  689.     * GEN7+ requires separate stencil buffers.
  690.     */
  691.    if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
  692.       if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
  693.          require_separate_stencil = true;
  694.       else
  695.          require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
  696.    }
  697.  
  698.    switch (format) {
  699.    case PIPE_FORMAT_ETC1_RGB8:
  700.       format = PIPE_FORMAT_R8G8B8X8_UNORM;
  701.       break;
  702.    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
  703.       if (require_separate_stencil) {
  704.          format = PIPE_FORMAT_Z24X8_UNORM;
  705.          img->separate_stencil = true;
  706.       }
  707.       break;
  708.    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
  709.       if (require_separate_stencil) {
  710.          format = PIPE_FORMAT_Z32_FLOAT;
  711.          img->separate_stencil = true;
  712.       }
  713.       break;
  714.    default:
  715.       break;
  716.    }
  717.  
  718.    img->format = format;
  719.    img->block_width = util_format_get_blockwidth(format);
  720.    img->block_height = util_format_get_blockheight(format);
  721.    img->block_size = util_format_get_blocksize(format);
  722.  
  723.    params->valid_tilings = img_get_valid_tilings(img, params);
  724.    params->compressed = util_format_is_compressed(img->format);
  725. }
  726.  
  727. static bool
  728. img_want_mcs(const struct ilo_image *img,
  729.              const struct ilo_image_params *params)
  730. {
  731.    const struct pipe_resource *templ = params->templ;
  732.    bool want_mcs = false;
  733.  
  734.    /* MCS is for RT on GEN7+ */
  735.    if (ilo_dev_gen(params->dev) < ILO_GEN(7))
  736.       return false;
  737.  
  738.    if (templ->target != PIPE_TEXTURE_2D ||
  739.        !(templ->bind & PIPE_BIND_RENDER_TARGET))
  740.       return false;
  741.  
  742.    /*
  743.     * From the Ivy Bridge PRM, volume 4 part 1, page 77:
  744.     *
  745.     *     "For Render Target and Sampling Engine Surfaces:If the surface is
  746.     *      multisampled (Number of Multisamples any value other than
  747.     *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
  748.     *
  749.     *     "This field must be set to 0 for all SINT MSRTs when all RT channels
  750.     *      are not written"
  751.     */
  752.    if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
  753.       want_mcs = true;
  754.    } else if (templ->nr_samples <= 1) {
  755.       /*
  756.        * From the Ivy Bridge PRM, volume 2 part 1, page 326:
  757.        *
  758.        *     "When MCS is buffer is used for color clear of non-multisampler
  759.        *      render target, the following restrictions apply.
  760.        *      - Support is limited to tiled render targets.
  761.        *      - Support is for non-mip-mapped and non-array surface types
  762.        *        only.
  763.        *      - Clear is supported only on the full RT; i.e., no partial clear
  764.        *        or overlapping clears.
  765.        *      - MCS buffer for non-MSRT is supported only for RT formats
  766.        *        32bpp, 64bpp and 128bpp.
  767.        *      ..."
  768.        */
  769.       if (img->tiling != GEN6_TILING_NONE &&
  770.           templ->last_level == 0 && templ->array_size == 1) {
  771.          switch (img->block_size) {
  772.          case 4:
  773.          case 8:
  774.          case 16:
  775.             want_mcs = true;
  776.             break;
  777.          default:
  778.             break;
  779.          }
  780.       }
  781.    }
  782.  
  783.    return want_mcs;
  784. }
  785.  
  786. static bool
  787. img_want_hiz(const struct ilo_image *img,
  788.              const struct ilo_image_params *params)
  789. {
  790.    const struct pipe_resource *templ = params->templ;
  791.    const struct util_format_description *desc =
  792.       util_format_description(templ->format);
  793.  
  794.    if (ilo_debug & ILO_DEBUG_NOHIZ)
  795.       return false;
  796.  
  797.    if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
  798.       return false;
  799.  
  800.    if (!util_format_has_depth(desc))
  801.       return false;
  802.  
  803.    /* no point in having HiZ */
  804.    if (templ->usage == PIPE_USAGE_STAGING)
  805.       return false;
  806.  
  807.    /*
  808.     * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
  809.     * for every level.  This is generally fine except on GEN6, where HiZ and
  810.     * separate stencil are enabled and disabled at the same time.  When the
  811.     * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
  812.     * can result in incompatible formats.
  813.     */
  814.    if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
  815.        templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
  816.        templ->last_level)
  817.       return false;
  818.  
  819.    return true;
  820. }
  821.  
  822. static void
  823. img_init_aux(struct ilo_image *img,
  824.              const struct ilo_image_params *params)
  825. {
  826.    if (img_want_hiz(img, params))
  827.       img->aux.type = ILO_IMAGE_AUX_HIZ;
  828.    else if (img_want_mcs(img, params))
  829.       img->aux.type = ILO_IMAGE_AUX_MCS;
  830. }
  831.  
  832. static void
  833. img_align(struct ilo_image *img, struct ilo_image_params *params)
  834. {
  835.    const struct pipe_resource *templ = params->templ;
  836.    int align_w = 1, align_h = 1, pad_h = 0;
  837.  
  838.    /*
  839.     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
  840.     *
  841.     *     "To determine the necessary padding on the bottom and right side of
  842.     *      the surface, refer to the table in Section 7.18.3.4 for the i and j
  843.     *      parameters for the surface format in use. The surface must then be
  844.     *      extended to the next multiple of the alignment unit size in each
  845.     *      dimension, and all texels contained in this extended surface must
  846.     *      have valid GTT entries."
  847.     *
  848.     *     "For cube surfaces, an additional two rows of padding are required
  849.     *      at the bottom of the surface. This must be ensured regardless of
  850.     *      whether the surface is stored tiled or linear.  This is due to the
  851.     *      potential rotation of cache line orientation from memory to cache."
  852.     *
  853.     *     "For compressed textures (BC* and FXT1 surface formats), padding at
  854.     *      the bottom of the surface is to an even compressed row, which is
  855.     *      equal to a multiple of 8 uncompressed texel rows. Thus, for padding
  856.     *      purposes, these surfaces behave as if j = 8 only for surface
  857.     *      padding purposes. The value of 4 for j still applies for mip level
  858.     *      alignment and QPitch calculation."
  859.     */
  860.    if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
  861.       align_w = MAX2(align_w, img->align_i);
  862.       align_h = MAX2(align_h, img->align_j);
  863.  
  864.       if (templ->target == PIPE_TEXTURE_CUBE)
  865.          pad_h += 2;
  866.  
  867.       if (params->compressed)
  868.          align_h = MAX2(align_h, img->align_j * 2);
  869.    }
  870.  
  871.    /*
  872.     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
  873.     *
  874.     *     "If the surface contains an odd number of rows of data, a final row
  875.     *      below the surface must be allocated."
  876.     */
  877.    if (templ->bind & PIPE_BIND_RENDER_TARGET)
  878.       align_h = MAX2(align_h, 2);
  879.  
  880.    /*
  881.     * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  Pad to allow HiZ
  882.     * for unaligned non-mipmapped and non-array images.
  883.     */
  884.    if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
  885.        templ->last_level == 0 &&
  886.        templ->array_size == 1 &&
  887.        templ->depth0 == 1) {
  888.       align_w = MAX2(align_w, 8);
  889.       align_h = MAX2(align_h, 4);
  890.    }
  891.  
  892.    params->max_x = align(params->max_x, align_w);
  893.    params->max_y = align(params->max_y + pad_h, align_h);
  894. }
  895.  
  896. /* note that this may force the texture to be linear */
  897. static void
  898. img_calculate_bo_size(struct ilo_image *img,
  899.                       const struct ilo_image_params *params)
  900. {
  901.    assert(params->max_x % img->block_width == 0);
  902.    assert(params->max_y % img->block_height == 0);
  903.    assert(img->walk_layer_height % img->block_height == 0);
  904.  
  905.    img->bo_stride =
  906.       (params->max_x / img->block_width) * img->block_size;
  907.    img->bo_height = params->max_y / img->block_height;
  908.  
  909.    while (true) {
  910.       unsigned w = img->bo_stride, h = img->bo_height;
  911.       unsigned align_w, align_h;
  912.  
  913.       /*
  914.        * From the Haswell PRM, volume 5, page 163:
  915.        *
  916.        *     "For linear surfaces, additional padding of 64 bytes is required
  917.        *      at the bottom of the surface. This is in addition to the padding
  918.        *      required above."
  919.        */
  920.       if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
  921.           (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
  922.           img->tiling == GEN6_TILING_NONE)
  923.          h += (64 + img->bo_stride - 1) / img->bo_stride;
  924.  
  925.       /*
  926.        * From the Sandy Bridge PRM, volume 4 part 1, page 81:
  927.        *
  928.        *     "- For linear render target surfaces, the pitch must be a
  929.        *        multiple of the element size for non-YUV surface formats.
  930.        *        Pitch must be a multiple of 2 * element size for YUV surface
  931.        *        formats.
  932.        *      - For other linear surfaces, the pitch can be any multiple of
  933.        *        bytes.
  934.        *      - For tiled surfaces, the pitch must be a multiple of the tile
  935.        *        width."
  936.        *
  937.        * Different requirements may exist when the bo is used in different
  938.        * places, but our alignments here should be good enough that we do not
  939.        * need to check params->templ->bind.
  940.        */
  941.       switch (img->tiling) {
  942.       case GEN6_TILING_X:
  943.          align_w = 512;
  944.          align_h = 8;
  945.          break;
  946.       case GEN6_TILING_Y:
  947.          align_w = 128;
  948.          align_h = 32;
  949.          break;
  950.       case GEN8_TILING_W:
  951.          /*
  952.           * From the Sandy Bridge PRM, volume 1 part 2, page 22:
  953.           *
  954.           *     "A 4KB tile is subdivided into 8-high by 8-wide array of
  955.           *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
  956.           *      bytes."
  957.           */
  958.          align_w = 64;
  959.          align_h = 64;
  960.          break;
  961.       default:
  962.          assert(img->tiling == GEN6_TILING_NONE);
  963.          /* some good enough values */
  964.          align_w = 64;
  965.          align_h = 2;
  966.          break;
  967.       }
  968.  
  969.       w = align(w, align_w);
  970.       h = align(h, align_h);
  971.  
  972.       /* make sure the bo is mappable */
  973.       if (img->tiling != GEN6_TILING_NONE) {
  974.          /*
  975.           * Usually only the first 256MB of the GTT is mappable.
  976.           *
  977.           * See also how intel_context::max_gtt_map_object_size is calculated.
  978.           */
  979.          const size_t mappable_gtt_size = 256 * 1024 * 1024;
  980.  
  981.          /*
  982.           * Be conservative.  We may be able to switch from VALIGN_4 to
  983.           * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
  984.           */
  985.          if (mappable_gtt_size / w / 4 < h) {
  986.             if (params->valid_tilings & IMAGE_TILING_NONE) {
  987.                img->tiling = GEN6_TILING_NONE;
  988.                /* MCS support for non-MSRTs is limited to tiled RTs */
  989.                if (img->aux.type == ILO_IMAGE_AUX_MCS &&
  990.                    params->templ->nr_samples <= 1)
  991.                   img->aux.type = ILO_IMAGE_AUX_NONE;
  992.  
  993.                continue;
  994.             } else {
  995.                ilo_warn("cannot force texture to be linear\n");
  996.             }
  997.          }
  998.       }
  999.  
  1000.       img->bo_stride = w;
  1001.       img->bo_height = h;
  1002.       break;
  1003.    }
  1004. }
  1005.  
  1006. static void
  1007. img_calculate_hiz_size(struct ilo_image *img,
  1008.                        const struct ilo_image_params *params)
  1009. {
  1010.    const struct pipe_resource *templ = params->templ;
  1011.    const unsigned hz_align_j = 8;
  1012.    enum ilo_image_walk_type hz_walk;
  1013.    unsigned hz_width, hz_height, lv;
  1014.    unsigned hz_clear_w, hz_clear_h;
  1015.  
  1016.    assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
  1017.  
  1018.    assert(img->walk == ILO_IMAGE_WALK_LAYER ||
  1019.           img->walk == ILO_IMAGE_WALK_3D);
  1020.  
  1021.    /*
  1022.     * From the Sandy Bridge PRM, volume 2 part 1, page 312:
  1023.     *
  1024.     *     "The hierarchical depth buffer does not support the LOD field, it is
  1025.     *      assumed by hardware to be zero. A separate hierarachical depth
  1026.     *      buffer is required for each LOD used, and the corresponding
  1027.     *      buffer's state delivered to hardware each time a new depth buffer
  1028.     *      state with modified LOD is delivered."
  1029.     *
  1030.     * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
  1031.     */
  1032.    if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
  1033.       hz_walk = img->walk;
  1034.    else
  1035.       hz_walk = ILO_IMAGE_WALK_LOD;
  1036.  
  1037.    /*
  1038.     * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
  1039.     * PRM, volume 2 part 1, page 312-313.
  1040.     *
  1041.     * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
  1042.     * memory row.
  1043.     */
  1044.    switch (hz_walk) {
  1045.    case ILO_IMAGE_WALK_LAYER:
  1046.       {
  1047.          const unsigned h0 = align(params->h0, hz_align_j);
  1048.          const unsigned h1 = align(params->h1, hz_align_j);
  1049.          const unsigned htail =
  1050.             ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
  1051.          const unsigned hz_qpitch = h0 + h1 + htail;
  1052.  
  1053.          hz_width = align(img->lods[0].slice_width, 16);
  1054.  
  1055.          hz_height = hz_qpitch * templ->array_size / 2;
  1056.          if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
  1057.             hz_height = align(hz_height, 8);
  1058.  
  1059.          img->aux.walk_layer_height = hz_qpitch;
  1060.       }
  1061.       break;
  1062.    case ILO_IMAGE_WALK_LOD:
  1063.       {
  1064.          unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
  1065.          unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
  1066.          unsigned cur_tx, cur_ty;
  1067.  
  1068.          /* figure out the tile offsets of LODs */
  1069.          hz_width = 0;
  1070.          hz_height = 0;
  1071.          cur_tx = 0;
  1072.          cur_ty = 0;
  1073.          for (lv = 0; lv <= templ->last_level; lv++) {
  1074.             unsigned tw, th;
  1075.  
  1076.             lod_tx[lv] = cur_tx;
  1077.             lod_ty[lv] = cur_ty;
  1078.  
  1079.             tw = align(img->lods[lv].slice_width, 16);
  1080.             th = align(img->lods[lv].slice_height, hz_align_j) *
  1081.                templ->array_size / 2;
  1082.             /* convert to Y-tiles */
  1083.             tw = align(tw, 128) / 128;
  1084.             th = align(th, 32) / 32;
  1085.  
  1086.             if (hz_width < cur_tx + tw)
  1087.                hz_width = cur_tx + tw;
  1088.             if (hz_height < cur_ty + th)
  1089.                hz_height = cur_ty + th;
  1090.  
  1091.             if (lv == 1)
  1092.                cur_tx += tw;
  1093.             else
  1094.                cur_ty += th;
  1095.          }
  1096.  
  1097.          /* convert tile offsets to memory offsets */
  1098.          for (lv = 0; lv <= templ->last_level; lv++) {
  1099.             img->aux.walk_lod_offsets[lv] =
  1100.                (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
  1101.          }
  1102.          hz_width *= 128;
  1103.          hz_height *= 32;
  1104.       }
  1105.       break;
  1106.    case ILO_IMAGE_WALK_3D:
  1107.       hz_width = align(img->lods[0].slice_width, 16);
  1108.  
  1109.       hz_height = 0;
  1110.       for (lv = 0; lv <= templ->last_level; lv++) {
  1111.          const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
  1112.          /* according to the formula, slices are packed together vertically */
  1113.          hz_height += h * u_minify(templ->depth0, lv);
  1114.       }
  1115.       hz_height /= 2;
  1116.       break;
  1117.    default:
  1118.       assert(!"unknown HiZ walk");
  1119.       hz_width = 0;
  1120.       hz_height = 0;
  1121.       break;
  1122.    }
  1123.  
  1124.    /*
  1125.     * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
  1126.     * Experiments on Haswell show that aligning the RECTLIST primitive and
  1127.     * 3DSTATE_DRAWING_RECTANGLE alone are not enough.  The LOD sizes must be
  1128.     * aligned.
  1129.     */
  1130.    hz_clear_w = 8;
  1131.    hz_clear_h = 4;
  1132.    switch (templ->nr_samples) {
  1133.    case 0:
  1134.    case 1:
  1135.    default:
  1136.       break;
  1137.    case 2:
  1138.       hz_clear_w /= 2;
  1139.       break;
  1140.    case 4:
  1141.       hz_clear_w /= 2;
  1142.       hz_clear_h /= 2;
  1143.       break;
  1144.    case 8:
  1145.       hz_clear_w /= 4;
  1146.       hz_clear_h /= 2;
  1147.       break;
  1148.    case 16:
  1149.       hz_clear_w /= 4;
  1150.       hz_clear_h /= 4;
  1151.       break;
  1152.    }
  1153.  
  1154.    for (lv = 0; lv <= templ->last_level; lv++) {
  1155.       if (u_minify(img->width0, lv) % hz_clear_w ||
  1156.           u_minify(img->height0, lv) % hz_clear_h)
  1157.          break;
  1158.       img->aux.enables |= 1 << lv;
  1159.    }
  1160.  
  1161.    /* we padded to allow this in img_align() */
  1162.    if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
  1163.       img->aux.enables |= 0x1;
  1164.  
  1165.    /* align to Y-tile */
  1166.    img->aux.bo_stride = align(hz_width, 128);
  1167.    img->aux.bo_height = align(hz_height, 32);
  1168. }
  1169.  
  1170. static void
  1171. img_calculate_mcs_size(struct ilo_image *img,
  1172.                        const struct ilo_image_params *params)
  1173. {
  1174.    const struct pipe_resource *templ = params->templ;
  1175.    int mcs_width, mcs_height, mcs_cpp;
  1176.    int downscale_x, downscale_y;
  1177.  
  1178.    assert(img->aux.type == ILO_IMAGE_AUX_MCS);
  1179.  
  1180.    if (templ->nr_samples > 1) {
  1181.       /*
  1182.        * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
  1183.        * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA.  The
  1184.        * need of scale down could be that the clear rectangle is used to clear
  1185.        * the MCS instead of the RT.
  1186.        *
  1187.        * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT.  The
  1188.        * 2x2 factor could come from that the hardware writes 128 bits (an
  1189.        * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
  1190.        * the RT.  For 4X MSAA, we need 8 bits in MCS for every pixel in the
  1191.        * RT.  Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
  1192.        * pixel block in the RT.
  1193.        */
  1194.       switch (templ->nr_samples) {
  1195.       case 2:
  1196.       case 4:
  1197.          downscale_x = 8;
  1198.          downscale_y = 2;
  1199.          mcs_cpp = 1;
  1200.          break;
  1201.       case 8:
  1202.          downscale_x = 2;
  1203.          downscale_y = 2;
  1204.          mcs_cpp = 4;
  1205.          break;
  1206.       case 16:
  1207.          downscale_x = 2;
  1208.          downscale_y = 1;
  1209.          mcs_cpp = 8;
  1210.          break;
  1211.       default:
  1212.          assert(!"unsupported sample count");
  1213.          return;
  1214.          break;
  1215.       }
  1216.  
  1217.       /*
  1218.        * It also appears that the 2x2 subspans generated by the scaled-down
  1219.        * clear rectangle cannot be masked.  The scale-down clear rectangle
  1220.        * thus must be aligned to 2x2, and we need to pad.
  1221.        */
  1222.       mcs_width = align(img->width0, downscale_x * 2);
  1223.       mcs_height = align(img->height0, downscale_y * 2);
  1224.    } else {
  1225.       /*
  1226.        * From the Ivy Bridge PRM, volume 2 part 1, page 327:
  1227.        *
  1228.        *     "              Pixels  Lines
  1229.        *      TiledY RT CL
  1230.        *          bpp
  1231.        *          32          8        4
  1232.        *          64          4        4
  1233.        *          128         2        4
  1234.        *
  1235.        *      TiledX RT CL
  1236.        *          bpp
  1237.        *          32          16       2
  1238.        *          64          8        2
  1239.        *          128         4        2"
  1240.        *
  1241.        * This table and the two following tables define the RT alignments, the
  1242.        * clear rectangle alignments, and the clear rectangle scale factors.
  1243.        * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
  1244.        * that the clear rectangle alignments are 16x32 blocks, and the clear
  1245.        * rectangle scale factors are 8x16 blocks.
  1246.        *
  1247.        * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
  1248.        * RT.  Similar to the MSAA cases, we can argue that an OWord maps to
  1249.        * 8x16 blocks.
  1250.        *
  1251.        * One problem with this reasoning is that a Y-tile in MCS has 8x32
  1252.        * OWords and maps to 64x512 128-byte blocks.  This differs from i965,
  1253.        * which says that a Y-tile maps to 128x256 blocks (\see
  1254.        * intel_get_non_msrt_mcs_alignment).  It does not really change
  1255.        * anything except for the size of the allocated MCS.  Let's see if we
  1256.        * hit out-of-bound access.
  1257.        */
  1258.       switch (img->tiling) {
  1259.       case GEN6_TILING_X:
  1260.          downscale_x = 64 / img->block_size;
  1261.          downscale_y = 2;
  1262.          break;
  1263.       case GEN6_TILING_Y:
  1264.          downscale_x = 32 / img->block_size;
  1265.          downscale_y = 4;
  1266.          break;
  1267.       default:
  1268.          assert(!"unsupported tiling mode");
  1269.          return;
  1270.          break;
  1271.       }
  1272.  
  1273.       downscale_x *= 8;
  1274.       downscale_y *= 16;
  1275.  
  1276.       /*
  1277.        * From the Haswell PRM, volume 7, page 652:
  1278.        *
  1279.        *     "Clear rectangle must be aligned to two times the number of
  1280.        *      pixels in the table shown below due to 16X16 hashing across the
  1281.        *      slice."
  1282.        *
  1283.        * The scaled-down clear rectangle must be aligned to 4x4 instead of
  1284.        * 2x2, and we need to pad.
  1285.        */
  1286.       mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
  1287.       mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
  1288.       mcs_cpp = 16; /* an OWord */
  1289.    }
  1290.  
  1291.    img->aux.enables = (1 << (templ->last_level + 1)) - 1;
  1292.    /* align to Y-tile */
  1293.    img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
  1294.    img->aux.bo_height = align(mcs_height, 32);
  1295. }
  1296.  
  1297. static void
  1298. img_init(struct ilo_image *img,
  1299.          struct ilo_image_params *params)
  1300. {
  1301.    /* there are hard dependencies between every function here */
  1302.  
  1303.    img_init_aux(img, params);
  1304.    img_init_size_and_format(img, params);
  1305.    img_init_walk(img, params);
  1306.    img_init_tiling(img, params);
  1307.    img_init_alignments(img, params);
  1308.    img_init_lods(img, params);
  1309.    img_init_layer_height(img, params);
  1310.  
  1311.    img_align(img, params);
  1312.    img_calculate_bo_size(img, params);
  1313.  
  1314.    img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
  1315.  
  1316.    switch (img->aux.type) {
  1317.    case ILO_IMAGE_AUX_HIZ:
  1318.       img_calculate_hiz_size(img, params);
  1319.       break;
  1320.    case ILO_IMAGE_AUX_MCS:
  1321.       img_calculate_mcs_size(img, params);
  1322.       break;
  1323.    default:
  1324.       break;
  1325.    }
  1326. }
  1327.  
  1328. /**
  1329.  * The texutre is for transfer only.  We can define our own layout to save
  1330.  * space.
  1331.  */
  1332. static void
  1333. img_init_for_transfer(struct ilo_image *img,
  1334.                       const struct ilo_dev *dev,
  1335.                       const struct pipe_resource *templ)
  1336. {
  1337.    const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
  1338.       templ->depth0 : templ->array_size;
  1339.    unsigned layer_width, layer_height;
  1340.  
  1341.    assert(templ->last_level == 0);
  1342.    assert(templ->nr_samples <= 1);
  1343.  
  1344.    img->aux.type = ILO_IMAGE_AUX_NONE;
  1345.  
  1346.    img->width0 = templ->width0;
  1347.    img->height0 = templ->height0;
  1348.    img->depth0 = templ->depth0;
  1349.    img->sample_count = 1;
  1350.  
  1351.    img->format = templ->format;
  1352.    img->block_width = util_format_get_blockwidth(templ->format);
  1353.    img->block_height = util_format_get_blockheight(templ->format);
  1354.    img->block_size = util_format_get_blocksize(templ->format);
  1355.  
  1356.    img->walk = ILO_IMAGE_WALK_LOD;
  1357.  
  1358.    img->tiling = GEN6_TILING_NONE;
  1359.  
  1360.    img->align_i = img->block_width;
  1361.    img->align_j = img->block_height;
  1362.  
  1363.    assert(util_is_power_of_two(img->block_width) &&
  1364.           util_is_power_of_two(img->block_height));
  1365.  
  1366.    /* use packed layout */
  1367.    layer_width = align(templ->width0, img->align_i);
  1368.    layer_height = align(templ->height0, img->align_j);
  1369.  
  1370.    img->lods[0].slice_width = layer_width;
  1371.    img->lods[0].slice_height = layer_height;
  1372.  
  1373.    img->bo_stride = (layer_width / img->block_width) * img->block_size;
  1374.    img->bo_stride = align(img->bo_stride, 64);
  1375.  
  1376.    img->bo_height = (layer_height / img->block_height) * num_layers;
  1377. }
  1378.  
  1379. /**
  1380.  * Initialize the image.  Callers should zero-initialize \p img first.
  1381.  */
  1382. void ilo_image_init(struct ilo_image *img,
  1383.                     const struct ilo_dev *dev,
  1384.                     const struct pipe_resource *templ)
  1385. {
  1386.    struct ilo_image_params params;
  1387.    bool transfer_only;
  1388.  
  1389.    /* use transfer layout when the texture is never bound to GPU */
  1390.    transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
  1391.                                      PIPE_BIND_TRANSFER_READ));
  1392.    if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
  1393.       img_init_for_transfer(img, dev, templ);
  1394.       return;
  1395.    }
  1396.  
  1397.    memset(&params, 0, sizeof(params));
  1398.    params.dev = dev;
  1399.    params.templ = templ;
  1400.    params.valid_tilings = IMAGE_TILING_ALL;
  1401.  
  1402.    img_init(img, &params);
  1403. }
  1404.  
  1405. bool
  1406. ilo_image_init_for_imported(struct ilo_image *img,
  1407.                             const struct ilo_dev *dev,
  1408.                             const struct pipe_resource *templ,
  1409.                             enum gen_surface_tiling tiling,
  1410.                             unsigned bo_stride)
  1411. {
  1412.    struct ilo_image_params params;
  1413.  
  1414.    if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
  1415.        (tiling == GEN6_TILING_Y && bo_stride % 128) ||
  1416.        (tiling == GEN8_TILING_W && bo_stride % 64))
  1417.       return false;
  1418.  
  1419.    memset(&params, 0, sizeof(params));
  1420.    params.dev = dev;
  1421.    params.templ = templ;
  1422.    params.valid_tilings = 1 << tiling;
  1423.  
  1424.    img_init(img, &params);
  1425.  
  1426.    assert(img->tiling == tiling);
  1427.    if (img->bo_stride > bo_stride)
  1428.       return false;
  1429.  
  1430.    img->bo_stride = bo_stride;
  1431.  
  1432.    /* assume imported RTs are also scanouts */
  1433.    if (!img->scanout)
  1434.       img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
  1435.  
  1436.    return true;
  1437. }
  1438.