Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Broadcom
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /** @file vc4_tiling.c
  25.  *
  26.  * Handles information about the VC4 tiling formats, and loading and storing
  27.  * from them.
  28.  *
  29.  * Texture mipmap levels on VC4 are (with the exception of 32-bit RGBA raster
  30.  * textures for scanout) stored as groups of microtiles.  If the texture is at
  31.  * least 4x4 microtiles (utiles), then those microtiles are arranged in a sort
  32.  * of Hilbert-fractal-ish layout (T), otherwise the microtiles are in raster
  33.  * order (LT).
  34.  *
  35.  * Specifically, the T format has:
  36.  *
  37.  * - 64b utiles of pixels in a raster-order grid according to cpp.  It's 4x4
  38.  *   pixels at 32 bit depth.
  39.  *
  40.  * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually
  41.  *   16x16 pixels).
  42.  *
  43.  * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels).  On
  44.  *   even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows
  45.  *   they're (TR, BR, BL, TL), where bottom left is start of memory.
  46.  *
  47.  * - an image made of 4k tiles in rows either left-to-right (even rows of 4k
  48.  *   tiles) or right-to-left (odd rows of 4k tiles).
  49.  */
  50.  
  51. #include "vc4_screen.h"
  52. #include "vc4_context.h"
  53. #include "vc4_tiling.h"
  54.  
  55. /** Return the width in pixels of a 64-byte microtile. */
  56. uint32_t
  57. vc4_utile_width(int cpp)
  58. {
  59.         switch (cpp) {
  60.         case 1:
  61.         case 2:
  62.                 return 8;
  63.         case 4:
  64.                 return 4;
  65.         case 8:
  66.                 return 2;
  67.         default:
  68.                 fprintf(stderr, "unknown cpp: %d\n", cpp);
  69.                 abort();
  70.         }
  71. }
  72.  
  73. /** Return the height in pixels of a 64-byte microtile. */
  74. uint32_t
  75. vc4_utile_height(int cpp)
  76. {
  77.         switch (cpp) {
  78.         case 1:
  79.                 return 8;
  80.         case 2:
  81.         case 4:
  82.         case 8:
  83.                 return 4;
  84.         default:
  85.                 fprintf(stderr, "unknown cpp: %d\n", cpp);
  86.                 abort();
  87.         }
  88. }
  89.  
  90. /**
  91.  * The texture unit decides what tiling format a particular miplevel is using
  92.  * this function, so we lay out our miptrees accordingly.
  93.  */
  94. bool
  95. vc4_size_is_lt(uint32_t width, uint32_t height, int cpp)
  96. {
  97.         return (width <= 4 * vc4_utile_width(cpp) ||
  98.                 height <= 4 * vc4_utile_height(cpp));
  99. }
  100.  
  101. void
  102. vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp)
  103. {
  104.         uint32_t utile_h = vc4_utile_height(cpp);
  105.         uint32_t row_size = 64 / utile_h;
  106.  
  107.         for (int y = 0; y < utile_h; y++) {
  108.                 memcpy(dst, src, row_size);
  109.                 dst += dst_stride;
  110.                 src += row_size;
  111.         }
  112. }
  113.  
  114. void
  115. vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp)
  116. {
  117.         uint32_t utile_h = vc4_utile_height(cpp);
  118.         uint32_t row_size = 64 / utile_h;
  119.  
  120.         for (int y = 0; y < utile_h; y++) {
  121.                 memcpy(dst, src, row_size);
  122.                 dst += row_size;
  123.                 src += src_stride;
  124.         }
  125. }
  126.  
  127. static void
  128. check_box_utile_alignment(const struct pipe_box *box, int cpp)
  129. {
  130.         uint32_t utile_w = vc4_utile_width(cpp);
  131.         uint32_t utile_h = vc4_utile_height(cpp);
  132.  
  133.         assert(!(box->x & (utile_w - 1)));
  134.         assert(!(box->y & (utile_h - 1)));
  135.         assert(!(box->width & (utile_w - 1)));
  136.         assert(!(box->height & (utile_h - 1)));
  137. }
  138.  
  139. static void
  140. vc4_load_lt_image(void *dst, uint32_t dst_stride,
  141.                   void *src, uint32_t src_stride,
  142.                   int cpp, const struct pipe_box *box)
  143. {
  144.         uint32_t utile_w = vc4_utile_width(cpp);
  145.         uint32_t utile_h = vc4_utile_height(cpp);
  146.         uint32_t xstart = box->x / utile_w;
  147.         uint32_t ystart = box->y / utile_h;
  148.  
  149.         for (uint32_t y = 0; y < box->height; y += utile_h) {
  150.                 for (int x = 0; x < box->width; x += utile_w) {
  151.                         vc4_load_utile(dst + (dst_stride * y +
  152.                                               x * cpp),
  153.                                        src + ((ystart + y) * src_stride +
  154.                                               (xstart + x) * 64 / utile_w),
  155.                                        dst_stride, cpp);
  156.                 }
  157.         }
  158. }
  159.  
  160. static void
  161. vc4_store_lt_image(void *dst, uint32_t dst_stride,
  162.                    void *src, uint32_t src_stride,
  163.                    int cpp, const struct pipe_box *box)
  164. {
  165.         uint32_t utile_w = vc4_utile_width(cpp);
  166.         uint32_t utile_h = vc4_utile_height(cpp);
  167.         uint32_t xstart = box->x / utile_w;
  168.         uint32_t ystart = box->y / utile_h;
  169.  
  170.         for (uint32_t y = 0; y < box->height; y += utile_h) {
  171.                 for (int x = 0; x < box->width; x += utile_w) {
  172.                         vc4_store_utile(dst + ((ystart + y) * dst_stride +
  173.                                                (xstart + x) * 64 / utile_w),
  174.                                         src + (src_stride * y +
  175.                                                x * cpp),
  176.                                         src_stride, cpp);
  177.                 }
  178.         }
  179. }
  180.  
  181. /**
  182.  * Takes a utile x and y (and the number of utiles of width of the image) and
  183.  * returns the offset to the utile within a VC4_TILING_FORMAT_TF image.
  184.  */
  185. static uint32_t
  186. t_utile_address(uint32_t utile_x, uint32_t utile_y,
  187.                 uint32_t utile_stride)
  188. {
  189.         /* T images have to be aligned to 8 utiles (4x4 subtiles, which are
  190.          * 2x2 in a 4k tile).
  191.          */
  192.         assert(!(utile_stride & 7));
  193.         uint32_t tile_stride = utile_stride >> 3;
  194.         /* 4k tile offsets. */
  195.         uint32_t tile_x = utile_x >> 3;
  196.         uint32_t tile_y = utile_y >> 3;
  197.         bool odd_tile_y = tile_y & 1;
  198.  
  199.         /* Odd lines of 4k tiles go right-to-left. */
  200.         if (odd_tile_y)
  201.                 tile_x = tile_stride - tile_x - 1;
  202.  
  203.         uint32_t tile_offset = 4096 * (tile_y * tile_stride + tile_x);
  204.  
  205.         uint32_t stile_x = (utile_x >> 2) & 1;
  206.         uint32_t stile_y = (utile_y >> 2) & 1;
  207.         uint32_t stile_index = (stile_y << 1) + stile_x;
  208.         static const uint32_t odd_stile_map[4] = {2, 1, 3, 0};
  209.         static const uint32_t even_stile_map[4] = {0, 3, 1, 2};
  210.  
  211.         uint32_t stile_offset = 1024 * (odd_tile_y ?
  212.                                         odd_stile_map[stile_index] :
  213.                                         even_stile_map[stile_index]);
  214.  
  215.         uint32_t utile_offset = 64 * ((utile_y & 3) * 4 + (utile_x & 3));
  216.  
  217. #if 0
  218.         fprintf(stderr, "utile %d,%d -> %d + %d + %d (stride %d,%d) = %d\n",
  219.                 utile_x, utile_y,
  220.                 tile_offset, stile_offset, utile_offset,
  221.                 utile_stride, tile_stride,
  222.                 tile_offset + stile_offset + utile_offset);
  223. #endif
  224.  
  225.         return tile_offset + stile_offset + utile_offset;
  226. }
  227.  
  228. static void
  229. vc4_load_t_image(void *dst, uint32_t dst_stride,
  230.                  void *src, uint32_t src_stride,
  231.                  int cpp, const struct pipe_box *box)
  232. {
  233.         uint32_t utile_w = vc4_utile_width(cpp);
  234.         uint32_t utile_h = vc4_utile_height(cpp);
  235.         uint32_t utile_stride = src_stride / cpp / utile_w;
  236.         uint32_t xstart = box->x / utile_w;
  237.         uint32_t ystart = box->y / utile_h;
  238.  
  239.         for (uint32_t y = 0; y < box->height / utile_h; y++) {
  240.                 for (int x = 0; x < box->width / utile_w; x++) {
  241.                         vc4_load_utile(dst + (y * utile_h * dst_stride +
  242.                                               x * utile_w * cpp),
  243.                                        src + t_utile_address(xstart + x,
  244.                                                              ystart + y,
  245.                                                              utile_stride),
  246.                                        dst_stride, cpp);
  247.                 }
  248.         }
  249. }
  250.  
  251. static void
  252. vc4_store_t_image(void *dst, uint32_t dst_stride,
  253.                   void *src, uint32_t src_stride,
  254.                   int cpp, const struct pipe_box *box)
  255. {
  256.         uint32_t utile_w = vc4_utile_width(cpp);
  257.         uint32_t utile_h = vc4_utile_height(cpp);
  258.         uint32_t utile_stride = dst_stride / cpp / utile_w;
  259.         uint32_t xstart = box->x / utile_w;
  260.         uint32_t ystart = box->y / utile_h;
  261.  
  262.         for (uint32_t y = 0; y < box->height / utile_h; y++) {
  263.                 for (int x = 0; x < box->width / utile_w; x++) {
  264.                         vc4_store_utile(dst + t_utile_address(xstart + x,
  265.                                                               ystart + y,
  266.                                                               utile_stride),
  267.                                         src + (y * utile_h * src_stride +
  268.                                                x * utile_w * cpp),
  269.                                         src_stride, cpp);
  270.                 }
  271.         }
  272. }
  273.  
  274. /**
  275.  * Loads pixel data from the start (microtile-aligned) box in @src to the
  276.  * start of @dst according to the given tiling format.
  277.  */
  278. void
  279. vc4_load_tiled_image(void *dst, uint32_t dst_stride,
  280.                      void *src, uint32_t src_stride,
  281.                      uint8_t tiling_format, int cpp,
  282.                      const struct pipe_box *box)
  283. {
  284.         check_box_utile_alignment(box, cpp);
  285.  
  286.         if (tiling_format == VC4_TILING_FORMAT_LT) {
  287.                 vc4_load_lt_image(dst, dst_stride,
  288.                                   src, src_stride,
  289.                                   cpp, box);
  290.         } else {
  291.                 assert(tiling_format == VC4_TILING_FORMAT_T);
  292.                 vc4_load_t_image(dst, dst_stride,
  293.                                  src, src_stride,
  294.                                  cpp, box);
  295.         }
  296. }
  297.  
  298. /**
  299.  * Stores pixel data from the start of @src into a (microtile-aligned) box in
  300.  * @dst according to the given tiling format.
  301.  */
  302. void
  303. vc4_store_tiled_image(void *dst, uint32_t dst_stride,
  304.                       void *src, uint32_t src_stride,
  305.                       uint8_t tiling_format, int cpp,
  306.                       const struct pipe_box *box)
  307. {
  308.         check_box_utile_alignment(box, cpp);
  309.  
  310.         if (tiling_format == VC4_TILING_FORMAT_LT) {
  311.                 vc4_store_lt_image(dst, dst_stride,
  312.                                    src, src_stride,
  313.                                    cpp, box);
  314.         } else {
  315.                 assert(tiling_format == VC4_TILING_FORMAT_T);
  316.                 vc4_store_t_image(dst, dst_stride,
  317.                                   src, src_stride,
  318.                                   cpp, box);
  319.         }
  320. }
  321.  
  322.