Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "util/u_surface.h"
  29. #include "util/u_transfer.h"
  30. #include "util/u_format_etc.h"
  31.  
  32. #include "ilo_cp.h"
  33. #include "ilo_context.h"
  34. #include "ilo_resource.h"
  35. #include "ilo_state.h"
  36. #include "ilo_transfer.h"
  37.  
  38. static bool
  39. is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush)
  40. {
  41.    const bool referenced = intel_bo_references(ilo->cp->bo, bo);
  42.  
  43.    if (need_flush)
  44.       *need_flush = referenced;
  45.  
  46.    if (referenced)
  47.       return true;
  48.  
  49.    return intel_bo_is_busy(bo);
  50. }
  51.  
  52. static bool
  53. map_bo_for_transfer(struct ilo_context *ilo, struct intel_bo *bo,
  54.                     const struct ilo_transfer *xfer)
  55. {
  56.    int err;
  57.  
  58.    switch (xfer->method) {
  59.    case ILO_TRANSFER_MAP_CPU:
  60.       err = intel_bo_map(bo, (xfer->base.usage & PIPE_TRANSFER_WRITE));
  61.       break;
  62.    case ILO_TRANSFER_MAP_GTT:
  63.       err = intel_bo_map_gtt(bo);
  64.       break;
  65.    case ILO_TRANSFER_MAP_UNSYNC:
  66.       err = intel_bo_map_unsynchronized(bo);
  67.       break;
  68.    default:
  69.       assert(!"unknown mapping method");
  70.       err = -1;
  71.       break;
  72.    }
  73.  
  74.    return !err;
  75. }
  76.  
  77. /**
  78.  * Choose the best mapping method, depending on the transfer usage and whether
  79.  * the bo is busy.
  80.  */
  81. static bool
  82. choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
  83. {
  84.    struct pipe_resource *res = xfer->base.resource;
  85.    const unsigned usage = xfer->base.usage;
  86.    /* prefer map() when there is the last-level cache */
  87.    const bool prefer_cpu =
  88.       (ilo->dev->has_llc || (usage & PIPE_TRANSFER_READ));
  89.    struct ilo_texture *tex;
  90.    struct ilo_buffer *buf;
  91.    struct intel_bo *bo;
  92.    bool tiled, need_flush;
  93.  
  94.    if (res->target == PIPE_BUFFER) {
  95.       tex = NULL;
  96.  
  97.       buf = ilo_buffer(res);
  98.       bo = buf->bo;
  99.       tiled = false;
  100.    }
  101.    else {
  102.       buf = NULL;
  103.  
  104.       tex = ilo_texture(res);
  105.       bo = tex->bo;
  106.       tiled = (tex->tiling != INTEL_TILING_NONE);
  107.    }
  108.  
  109.    /* choose between mapping through CPU or GTT */
  110.    if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
  111.       /* we do not want fencing */
  112.       if (tiled || prefer_cpu)
  113.          xfer->method = ILO_TRANSFER_MAP_CPU;
  114.       else
  115.          xfer->method = ILO_TRANSFER_MAP_GTT;
  116.    }
  117.    else {
  118.       if (!tiled && prefer_cpu)
  119.          xfer->method = ILO_TRANSFER_MAP_CPU;
  120.       else
  121.          xfer->method = ILO_TRANSFER_MAP_GTT;
  122.    }
  123.  
  124.    /* see if we can avoid stalling */
  125.    if (is_bo_busy(ilo, bo, &need_flush)) {
  126.       bool will_stall = true;
  127.  
  128.       if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
  129.          /* nothing we can do */
  130.       }
  131.       else if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
  132.          /* unsynchronized gtt mapping does not stall */
  133.          xfer->method = ILO_TRANSFER_MAP_UNSYNC;
  134.          will_stall = false;
  135.       }
  136.       else if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
  137.          /* discard old bo and allocate a new one for mapping */
  138.          if ((tex && ilo_texture_alloc_bo(tex)) ||
  139.              (buf && ilo_buffer_alloc_bo(buf))) {
  140.             ilo_mark_states_with_resource_dirty(ilo, res);
  141.             will_stall = false;
  142.          }
  143.       }
  144.       else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
  145.          /*
  146.           * We could allocate and return a system buffer here.  When a region of
  147.           * the buffer is explicitly flushed, we pwrite() the region to a
  148.           * temporary bo and emit pipelined copy blit.
  149.           *
  150.           * For now, do nothing.
  151.           */
  152.       }
  153.       else if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
  154.          /*
  155.           * We could allocate a temporary bo for mapping, and emit pipelined copy
  156.           * blit upon unmapping.
  157.           *
  158.           * For now, do nothing.
  159.           */
  160.       }
  161.  
  162.       if (will_stall) {
  163.          if (usage & PIPE_TRANSFER_DONTBLOCK)
  164.             return false;
  165.  
  166.          /* flush to make bo busy (so that map() stalls as it should be) */
  167.          if (need_flush)
  168.             ilo_cp_flush(ilo->cp);
  169.       }
  170.    }
  171.  
  172.    if (tex && !(usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
  173.       if (tex->separate_s8 || tex->bo_format == PIPE_FORMAT_S8_UINT)
  174.          xfer->method = ILO_TRANSFER_MAP_SW_ZS;
  175.       /* need to convert on-the-fly */
  176.       else if (tex->bo_format != tex->base.format)
  177.          xfer->method = ILO_TRANSFER_MAP_SW_CONVERT;
  178.    }
  179.  
  180.    return true;
  181. }
  182.  
  183. static void
  184. tex_get_box_origin(const struct ilo_texture *tex,
  185.                    unsigned level, unsigned slice,
  186.                    const struct pipe_box *box,
  187.                    unsigned *mem_x, unsigned *mem_y)
  188. {
  189.    unsigned x, y;
  190.  
  191.    x = tex->slice_offsets[level][slice + box->z].x + box->x;
  192.    y = tex->slice_offsets[level][slice + box->z].y + box->y;
  193.  
  194.    assert(x % tex->block_width == 0 && y % tex->block_height == 0);
  195.  
  196.    *mem_x = x / tex->block_width * tex->bo_cpp;
  197.    *mem_y = y / tex->block_height;
  198. }
  199.  
  200. static unsigned
  201. tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
  202.                    const struct pipe_box *box)
  203. {
  204.    unsigned mem_x, mem_y;
  205.  
  206.    tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
  207.  
  208.    return mem_y * tex->bo_stride + mem_x;
  209. }
  210.  
  211. static unsigned
  212. tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
  213. {
  214.    unsigned qpitch;
  215.  
  216.    /* there is no 3D array texture */
  217.    assert(tex->base.array_size == 1 || tex->base.depth0 == 1);
  218.  
  219.    if (tex->base.array_size == 1) {
  220.       /* non-array, non-3D */
  221.       if (tex->base.depth0 == 1)
  222.          return 0;
  223.  
  224.       /* only the first level has a fixed slice stride */
  225.       if (level > 0) {
  226.          assert(!"no slice stride for 3D texture with level > 0");
  227.          return 0;
  228.       }
  229.    }
  230.  
  231.    qpitch = tex->slice_offsets[level][1].y - tex->slice_offsets[level][0].y;
  232.    assert(qpitch % tex->block_height == 0);
  233.  
  234.    return (qpitch / tex->block_height) * tex->bo_stride;
  235. }
  236.  
  237. static unsigned
  238. tex_tile_x_swizzle(unsigned addr)
  239. {
  240.    /*
  241.     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
  242.     *
  243.     *     "As shown in the tiling algorithm, the new address bit[6] should be:
  244.     *
  245.     *        Address bit[6] <= TiledAddr bit[6] XOR
  246.     *                          TiledAddr bit[9] XOR
  247.     *                          TiledAddr bit[10]"
  248.     */
  249.    return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
  250. }
  251.  
  252. static unsigned
  253. tex_tile_y_swizzle(unsigned addr)
  254. {
  255.    /*
  256.     * From the Ivy Bridge PRM, volume 1 part 2, page 24:
  257.     *
  258.     *     "As shown in the tiling algorithm, The new address bit[6] becomes:
  259.     *
  260.     *        Address bit[6] <= TiledAddr bit[6] XOR
  261.     *                          TiledAddr bit[9]"
  262.     */
  263.    return addr ^ ((addr >> 3) & 0x40);
  264. }
  265.  
  266. static unsigned
  267. tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
  268.                   unsigned tiles_per_row, bool swizzle)
  269. {
  270.    /*
  271.     * From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
  272.     * X-major tile has 8 rows and 32 OWord columns (512 bytes).  Tiles in the
  273.     * tiled region are numbered in row-major order, starting from zero.  The
  274.     * tile number can thus be calculated as follows:
  275.     *
  276.     *    tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
  277.     *
  278.     * OWords in that tile are also numbered in row-major order, starting from
  279.     * zero.  The OWord number can thus be calculated as follows:
  280.     *
  281.     *    oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
  282.     *
  283.     * and the tiled offset is
  284.     *
  285.     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
  286.     *           = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
  287.     */
  288.    unsigned tile, offset;
  289.  
  290.    tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
  291.    offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
  292.  
  293.    return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
  294. }
  295.  
  296. static unsigned
  297. tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
  298.                   unsigned tiles_per_row, bool swizzle)
  299. {
  300.    /*
  301.     * From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
  302.     * Y-major tile has 32 rows and 8 OWord columns (128 bytes).  Tiles in the
  303.     * tiled region are numbered in row-major order, starting from zero.  The
  304.     * tile number can thus be calculated as follows:
  305.     *
  306.     *    tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
  307.     *
  308.     * OWords in that tile are numbered in column-major order, starting from
  309.     * zero.  The OWord number can thus be calculated as follows:
  310.     *
  311.     *    oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
  312.     *
  313.     * and the tiled offset is
  314.     *
  315.     *    offset = tile * 4096 + oword * 16 + (mem_x % 16)
  316.     */
  317.    unsigned tile, oword, offset;
  318.  
  319.    tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
  320.    oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
  321.    offset = tile << 12 | oword << 4 | (mem_x & 0xf);
  322.  
  323.    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
  324. }
  325.  
  326. static unsigned
  327. tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
  328.                   unsigned tiles_per_row, bool swizzle)
  329. {
  330.    /*
  331.     * From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
  332.     * W-major tile has 8 8x8-block rows and 8 8x8-block columns.  Tiles in the
  333.     * tiled region are numbered in row-major order, starting from zero.  The
  334.     * tile number can thus be calculated as follows:
  335.     *
  336.     *    tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
  337.     *
  338.     * 8x8-blocks in that tile are numbered in column-major order, starting
  339.     * from zero.  The 8x8-block number can thus be calculated as follows:
  340.     *
  341.     *    blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
  342.     *
  343.     * Each 8x8-block is divided into 4 4x4-blocks, in row-major order.  Each
  344.     * 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
  345.     * We have
  346.     *
  347.     *    blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
  348.     *    blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
  349.     *    blk1 = (((mem_y % 64)    ) & 1) * 2 + (((mem_x % 64)    ) & 1)
  350.     *
  351.     * and the tiled offset is
  352.     *
  353.     *    offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
  354.     */
  355.    unsigned tile, blk8, blk4, blk2, blk1, offset;
  356.  
  357.    tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
  358.    blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
  359.    blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
  360.    blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
  361.    blk1 = ((mem_y     ) & 0x1) << 1 | ((mem_x     ) & 0x1);
  362.    offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
  363.  
  364.    return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
  365. }
  366.  
  367. static unsigned
  368. tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
  369.                      unsigned tiles_per_row, bool swizzle)
  370. {
  371.    return mem_y * tiles_per_row + mem_x;
  372. }
  373.  
  374. typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
  375.                                          unsigned tiles_per_row,
  376.                                          bool swizzle);
  377.  
  378. static tex_tile_offset_func
  379. tex_tile_choose_offset_func(const struct ilo_texture *tex,
  380.                             unsigned *tiles_per_row)
  381. {
  382.    switch (tex->tiling) {
  383.    case INTEL_TILING_X:
  384.       *tiles_per_row = tex->bo_stride / 512;
  385.       return tex_tile_x_offset;
  386.    case INTEL_TILING_Y:
  387.       *tiles_per_row = tex->bo_stride / 128;
  388.       return tex_tile_y_offset;
  389.    case INTEL_TILING_NONE:
  390.    default:
  391.       /* W-tiling */
  392.       if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
  393.          *tiles_per_row = tex->bo_stride / 64;
  394.          return tex_tile_w_offset;
  395.       }
  396.       else {
  397.          *tiles_per_row = tex->bo_stride;
  398.          return tex_tile_none_offset;
  399.       }
  400.    }
  401. }
  402.  
  403. static void
  404. tex_staging_sys_zs_read(struct ilo_context *ilo,
  405.                         struct ilo_texture *tex,
  406.                         const struct ilo_transfer *xfer)
  407. {
  408.    const bool swizzle = ilo->dev->has_address_swizzling;
  409.    const struct pipe_box *box = &xfer->base.box;
  410.    const uint8_t *src = intel_bo_get_virtual(tex->bo);
  411.    tex_tile_offset_func tile_offset;
  412.    unsigned tiles_per_row;
  413.    int slice;
  414.  
  415.    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
  416.  
  417.    assert(tex->block_width == 1 && tex->block_height == 1);
  418.  
  419.    if (tex->separate_s8) {
  420.       struct ilo_texture *s8_tex = tex->separate_s8;
  421.       const uint8_t *s8_src = intel_bo_get_virtual(s8_tex->bo);
  422.       tex_tile_offset_func s8_tile_offset;
  423.       unsigned s8_tiles_per_row;
  424.       int dst_cpp, dst_s8_pos, src_cpp_used;
  425.  
  426.       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
  427.  
  428.       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
  429.          assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
  430.  
  431.          dst_cpp = 4;
  432.          dst_s8_pos = 3;
  433.          src_cpp_used = 3;
  434.       }
  435.       else {
  436.          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
  437.          assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
  438.  
  439.          dst_cpp = 8;
  440.          dst_s8_pos = 4;
  441.          src_cpp_used = 4;
  442.       }
  443.  
  444.       for (slice = 0; slice < box->depth; slice++) {
  445.          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
  446.          uint8_t *dst;
  447.          int i, j;
  448.  
  449.          tex_get_box_origin(tex, xfer->base.level, slice,
  450.                             box, &mem_x, &mem_y);
  451.          tex_get_box_origin(s8_tex, xfer->base.level, slice,
  452.                             box, &s8_mem_x, &s8_mem_y);
  453.  
  454.          dst = xfer->staging_sys + xfer->base.layer_stride * slice;
  455.  
  456.          for (i = 0; i < box->height; i++) {
  457.             unsigned x = mem_x, s8_x = s8_mem_x;
  458.             uint8_t *d = dst;
  459.  
  460.             for (j = 0; j < box->width; j++) {
  461.                const unsigned offset =
  462.                   tile_offset(x, mem_y, tiles_per_row, swizzle);
  463.                const unsigned s8_offset =
  464.                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
  465.  
  466.                memcpy(d, src + offset, src_cpp_used);
  467.                d[dst_s8_pos] = s8_src[s8_offset];
  468.  
  469.                d += dst_cpp;
  470.                x += tex->bo_cpp;
  471.                s8_x++;
  472.             }
  473.  
  474.             dst += xfer->base.stride;
  475.             mem_y++;
  476.             s8_mem_y++;
  477.          }
  478.       }
  479.    }
  480.    else {
  481.       assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
  482.  
  483.       for (slice = 0; slice < box->depth; slice++) {
  484.          unsigned mem_x, mem_y;
  485.          uint8_t *dst;
  486.          int i, j;
  487.  
  488.          tex_get_box_origin(tex, xfer->base.level, slice,
  489.                             box, &mem_x, &mem_y);
  490.  
  491.          dst = xfer->staging_sys + xfer->base.layer_stride * slice;
  492.  
  493.          for (i = 0; i < box->height; i++) {
  494.             unsigned x = mem_x;
  495.             uint8_t *d = dst;
  496.  
  497.             for (j = 0; j < box->width; j++) {
  498.                const unsigned offset =
  499.                   tile_offset(x, mem_y, tiles_per_row, swizzle);
  500.  
  501.                *d = src[offset];
  502.  
  503.                d++;
  504.                x++;
  505.             }
  506.  
  507.             dst += xfer->base.stride;
  508.             mem_y++;
  509.          }
  510.       }
  511.    }
  512. }
  513.  
  514. static void
  515. tex_staging_sys_zs_write(struct ilo_context *ilo,
  516.                          struct ilo_texture *tex,
  517.                          const struct ilo_transfer *xfer)
  518. {
  519.    const bool swizzle = ilo->dev->has_address_swizzling;
  520.    const struct pipe_box *box = &xfer->base.box;
  521.    uint8_t *dst = intel_bo_get_virtual(tex->bo);
  522.    tex_tile_offset_func tile_offset;
  523.    unsigned tiles_per_row;
  524.    int slice;
  525.  
  526.    tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
  527.  
  528.    assert(tex->block_width == 1 && tex->block_height == 1);
  529.  
  530.    if (tex->separate_s8) {
  531.       struct ilo_texture *s8_tex = tex->separate_s8;
  532.       uint8_t *s8_dst = intel_bo_get_virtual(s8_tex->bo);
  533.       tex_tile_offset_func s8_tile_offset;
  534.       unsigned s8_tiles_per_row;
  535.       int src_cpp, src_s8_pos, dst_cpp_used;
  536.  
  537.       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
  538.  
  539.       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
  540.          assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
  541.  
  542.          src_cpp = 4;
  543.          src_s8_pos = 3;
  544.          dst_cpp_used = 3;
  545.       }
  546.       else {
  547.          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
  548.          assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
  549.  
  550.          src_cpp = 8;
  551.          src_s8_pos = 4;
  552.          dst_cpp_used = 4;
  553.       }
  554.  
  555.       for (slice = 0; slice < box->depth; slice++) {
  556.          unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
  557.          const uint8_t *src;
  558.          int i, j;
  559.  
  560.          tex_get_box_origin(tex, xfer->base.level, slice,
  561.                             box, &mem_x, &mem_y);
  562.          tex_get_box_origin(s8_tex, xfer->base.level, slice,
  563.                             box, &s8_mem_x, &s8_mem_y);
  564.  
  565.          src = xfer->staging_sys + xfer->base.layer_stride * slice;
  566.  
  567.          for (i = 0; i < box->height; i++) {
  568.             unsigned x = mem_x, s8_x = s8_mem_x;
  569.             const uint8_t *s = src;
  570.  
  571.             for (j = 0; j < box->width; j++) {
  572.                const unsigned offset =
  573.                   tile_offset(x, mem_y, tiles_per_row, swizzle);
  574.                const unsigned s8_offset =
  575.                   s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
  576.  
  577.                memcpy(dst + offset, s, dst_cpp_used);
  578.                s8_dst[s8_offset] = s[src_s8_pos];
  579.  
  580.                s += src_cpp;
  581.                x += tex->bo_cpp;
  582.                s8_x++;
  583.             }
  584.  
  585.             src += xfer->base.stride;
  586.             mem_y++;
  587.             s8_mem_y++;
  588.          }
  589.       }
  590.    }
  591.    else {
  592.       assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
  593.  
  594.       for (slice = 0; slice < box->depth; slice++) {
  595.          unsigned mem_x, mem_y;
  596.          const uint8_t *src;
  597.          int i, j;
  598.  
  599.          tex_get_box_origin(tex, xfer->base.level, slice,
  600.                             box, &mem_x, &mem_y);
  601.  
  602.          src = xfer->staging_sys + xfer->base.layer_stride * slice;
  603.  
  604.          for (i = 0; i < box->height; i++) {
  605.             unsigned x = mem_x;
  606.             const uint8_t *s = src;
  607.  
  608.             for (j = 0; j < box->width; j++) {
  609.                const unsigned offset =
  610.                   tile_offset(x, mem_y, tiles_per_row, swizzle);
  611.  
  612.                dst[offset] = *s;
  613.  
  614.                s++;
  615.                x++;
  616.             }
  617.  
  618.             src += xfer->base.stride;
  619.             mem_y++;
  620.          }
  621.       }
  622.    }
  623. }
  624.  
  625. static void
  626. tex_staging_sys_convert_write(struct ilo_context *ilo,
  627.                               struct ilo_texture *tex,
  628.                               const struct ilo_transfer *xfer)
  629. {
  630.    const struct pipe_box *box = &xfer->base.box;
  631.    unsigned dst_slice_stride;
  632.    void *dst;
  633.    int slice;
  634.  
  635.    dst = intel_bo_get_virtual(tex->bo);
  636.    dst += tex_get_box_offset(tex, xfer->base.level, box);
  637.  
  638.    /* slice stride is not always available */
  639.    if (box->depth > 1)
  640.       dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
  641.    else
  642.       dst_slice_stride = 0;
  643.  
  644.    if (unlikely(tex->bo_format == tex->base.format)) {
  645.       util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride,
  646.             0, 0, 0, box->width, box->height, box->depth,
  647.             xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride,
  648.             0, 0, 0);
  649.       return;
  650.    }
  651.  
  652.    switch (tex->base.format) {
  653.    case PIPE_FORMAT_ETC1_RGB8:
  654.       assert(tex->bo_format == PIPE_FORMAT_R8G8B8X8_UNORM);
  655.  
  656.       for (slice = 0; slice < box->depth; slice++) {
  657.          const void *src =
  658.             xfer->staging_sys + xfer->base.layer_stride * slice;
  659.  
  660.          util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
  661.                tex->bo_stride, src, xfer->base.stride,
  662.                box->width, box->height);
  663.  
  664.          dst += dst_slice_stride;
  665.       }
  666.       break;
  667.    default:
  668.       assert(!"unable to convert the staging data");
  669.       break;
  670.    }
  671. }
  672.  
  673. static bool
  674. tex_staging_sys_map_bo(const struct ilo_context *ilo,
  675.                        const struct ilo_texture *tex,
  676.                        bool for_read_back, bool linear_view)
  677. {
  678.    const bool prefer_cpu = (ilo->dev->has_llc || for_read_back);
  679.    int err;
  680.  
  681.    if (prefer_cpu && (tex->tiling == INTEL_TILING_NONE || !linear_view))
  682.       err = intel_bo_map(tex->bo, !for_read_back);
  683.    else
  684.       err = intel_bo_map_gtt(tex->bo);
  685.  
  686.    if (!tex->separate_s8)
  687.       return !err;
  688.  
  689.    err = intel_bo_map(tex->separate_s8->bo, !for_read_back);
  690.    if (err)
  691.       intel_bo_unmap(tex->bo);
  692.  
  693.    return !err;
  694. }
  695.  
  696. static void
  697. tex_staging_sys_unmap_bo(const struct ilo_context *ilo,
  698.                          const struct ilo_texture *tex)
  699. {
  700.    if (tex->separate_s8)
  701.       intel_bo_unmap(tex->separate_s8->bo);
  702.  
  703.    intel_bo_unmap(tex->bo);
  704. }
  705.  
  706. static void
  707. tex_staging_sys_unmap(struct ilo_context *ilo,
  708.                       struct ilo_texture *tex,
  709.                       struct ilo_transfer *xfer)
  710. {
  711.    bool success;
  712.  
  713.    if (!(xfer->base.usage & PIPE_TRANSFER_WRITE)) {
  714.       FREE(xfer->staging_sys);
  715.       return;
  716.    }
  717.  
  718.    switch (xfer->method) {
  719.    case ILO_TRANSFER_MAP_SW_CONVERT:
  720.       success = tex_staging_sys_map_bo(ilo, tex, false, true);
  721.       if (success) {
  722.          tex_staging_sys_convert_write(ilo, tex, xfer);
  723.          tex_staging_sys_unmap_bo(ilo, tex);
  724.       }
  725.       break;
  726.    case ILO_TRANSFER_MAP_SW_ZS:
  727.       success = tex_staging_sys_map_bo(ilo, tex, false, false);
  728.       if (success) {
  729.          tex_staging_sys_zs_write(ilo, tex, xfer);
  730.          tex_staging_sys_unmap_bo(ilo, tex);
  731.       }
  732.       break;
  733.    default:
  734.       assert(!"unknown mapping method");
  735.       success = false;
  736.       break;
  737.    }
  738.  
  739.    if (!success)
  740.       ilo_err("failed to map resource for moving staging data\n");
  741.  
  742.    FREE(xfer->staging_sys);
  743. }
  744.  
  745. static bool
  746. tex_staging_sys_map(struct ilo_context *ilo,
  747.                     struct ilo_texture *tex,
  748.                     struct ilo_transfer *xfer)
  749. {
  750.    const struct pipe_box *box = &xfer->base.box;
  751.    const size_t stride = util_format_get_stride(tex->base.format, box->width);
  752.    const size_t size =
  753.       util_format_get_2d_size(tex->base.format, stride, box->height);
  754.    bool read_back = false, success;
  755.  
  756.    xfer->staging_sys = MALLOC(size * box->depth);
  757.    if (!xfer->staging_sys)
  758.       return false;
  759.  
  760.    xfer->base.stride = stride;
  761.    xfer->base.layer_stride = size;
  762.    xfer->ptr = xfer->staging_sys;
  763.  
  764.    /* see if we need to read the resource back */
  765.    if (xfer->base.usage & PIPE_TRANSFER_READ) {
  766.       read_back = true;
  767.    }
  768.    else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
  769.       const unsigned discard_flags =
  770.          (PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
  771.  
  772.       if (!(xfer->base.usage & discard_flags))
  773.          read_back = true;
  774.    }
  775.  
  776.    if (!read_back)
  777.       return true;
  778.  
  779.    switch (xfer->method) {
  780.    case ILO_TRANSFER_MAP_SW_CONVERT:
  781.       assert(!"no on-the-fly format conversion for mapping");
  782.       success = false;
  783.       break;
  784.    case ILO_TRANSFER_MAP_SW_ZS:
  785.       success = tex_staging_sys_map_bo(ilo, tex, true, false);
  786.       if (success) {
  787.          tex_staging_sys_zs_read(ilo, tex, xfer);
  788.          tex_staging_sys_unmap_bo(ilo, tex);
  789.       }
  790.       break;
  791.    default:
  792.       assert(!"unknown mapping method");
  793.       success = false;
  794.       break;
  795.    }
  796.  
  797.    return success;
  798. }
  799.  
  800. static void
  801. tex_direct_unmap(struct ilo_context *ilo,
  802.                  struct ilo_texture *tex,
  803.                  struct ilo_transfer *xfer)
  804. {
  805.    intel_bo_unmap(tex->bo);
  806. }
  807.  
  808. static bool
  809. tex_direct_map(struct ilo_context *ilo,
  810.                struct ilo_texture *tex,
  811.                struct ilo_transfer *xfer)
  812. {
  813.    if (!map_bo_for_transfer(ilo, tex->bo, xfer))
  814.       return false;
  815.  
  816.    /* note that stride is for a block row, not a texel row */
  817.    xfer->base.stride = tex->bo_stride;
  818.  
  819.    /* slice stride is not always available */
  820.    if (xfer->base.box.depth > 1)
  821.       xfer->base.layer_stride = tex_get_slice_stride(tex, xfer->base.level);
  822.    else
  823.       xfer->base.layer_stride = 0;
  824.  
  825.    xfer->ptr = intel_bo_get_virtual(tex->bo);
  826.    xfer->ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
  827.  
  828.    return true;
  829. }
  830.  
  831. static bool
  832. tex_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
  833. {
  834.    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
  835.    bool success;
  836.  
  837.    if (!choose_transfer_method(ilo, xfer))
  838.       return false;
  839.  
  840.    switch (xfer->method) {
  841.    case ILO_TRANSFER_MAP_CPU:
  842.    case ILO_TRANSFER_MAP_GTT:
  843.    case ILO_TRANSFER_MAP_UNSYNC:
  844.       success = tex_direct_map(ilo, tex, xfer);
  845.       break;
  846.    case ILO_TRANSFER_MAP_SW_CONVERT:
  847.    case ILO_TRANSFER_MAP_SW_ZS:
  848.       success = tex_staging_sys_map(ilo, tex, xfer);
  849.       break;
  850.    default:
  851.       assert(!"unknown mapping method");
  852.       success = false;
  853.       break;
  854.    }
  855.  
  856.    return success;
  857. }
  858.  
  859. static void
  860. tex_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
  861. {
  862.    struct ilo_texture *tex = ilo_texture(xfer->base.resource);
  863.  
  864.    switch (xfer->method) {
  865.    case ILO_TRANSFER_MAP_CPU:
  866.    case ILO_TRANSFER_MAP_GTT:
  867.    case ILO_TRANSFER_MAP_UNSYNC:
  868.       tex_direct_unmap(ilo, tex, xfer);
  869.       break;
  870.    case ILO_TRANSFER_MAP_SW_CONVERT:
  871.    case ILO_TRANSFER_MAP_SW_ZS:
  872.       tex_staging_sys_unmap(ilo, tex, xfer);
  873.       break;
  874.    default:
  875.       assert(!"unknown mapping method");
  876.       break;
  877.    }
  878. }
  879.  
  880. static bool
  881. buf_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
  882. {
  883.    struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
  884.  
  885.    if (!choose_transfer_method(ilo, xfer))
  886.       return false;
  887.  
  888.    if (!map_bo_for_transfer(ilo, buf->bo, xfer))
  889.       return false;
  890.  
  891.    assert(xfer->base.level == 0);
  892.    assert(xfer->base.box.y == 0);
  893.    assert(xfer->base.box.z == 0);
  894.    assert(xfer->base.box.height == 1);
  895.    assert(xfer->base.box.depth == 1);
  896.  
  897.    xfer->base.stride = 0;
  898.    xfer->base.layer_stride = 0;
  899.  
  900.    xfer->ptr = intel_bo_get_virtual(buf->bo);
  901.    xfer->ptr += xfer->base.box.x;
  902.  
  903.    return true;
  904. }
  905.  
  906. static void
  907. buf_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
  908. {
  909.    struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
  910.  
  911.    intel_bo_unmap(buf->bo);
  912. }
  913.  
  914. static void
  915. buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf,
  916.            unsigned usage, int offset, int size, const void *data)
  917. {
  918.    bool need_flush;
  919.  
  920.    /* see if we can avoid stalling */
  921.    if (is_bo_busy(ilo, buf->bo, &need_flush)) {
  922.       bool will_stall = true;
  923.  
  924.       if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
  925.          /* old data not needed so discard the old bo to avoid stalling */
  926.          if (ilo_buffer_alloc_bo(buf)) {
  927.             ilo_mark_states_with_resource_dirty(ilo, &buf->base);
  928.             will_stall = false;
  929.          }
  930.       }
  931.       else {
  932.          /*
  933.           * We could allocate a temporary bo to hold the data and emit
  934.           * pipelined copy blit to move them to buf->bo.  But for now, do
  935.           * nothing.
  936.           */
  937.       }
  938.  
  939.       /* flush to make bo busy (so that pwrite() stalls as it should be) */
  940.       if (will_stall && need_flush)
  941.          ilo_cp_flush(ilo->cp);
  942.    }
  943.  
  944.    intel_bo_pwrite(buf->bo, offset, size, data);
  945. }
  946.  
  947. static void
  948. ilo_transfer_flush_region(struct pipe_context *pipe,
  949.                           struct pipe_transfer *transfer,
  950.                           const struct pipe_box *box)
  951. {
  952. }
  953.  
  954. static void
  955. ilo_transfer_unmap(struct pipe_context *pipe,
  956.                    struct pipe_transfer *transfer)
  957. {
  958.    struct ilo_context *ilo = ilo_context(pipe);
  959.    struct ilo_transfer *xfer = ilo_transfer(transfer);
  960.  
  961.    if (xfer->base.resource->target == PIPE_BUFFER)
  962.       buf_unmap(ilo, xfer);
  963.    else
  964.       tex_unmap(ilo, xfer);
  965.  
  966.    pipe_resource_reference(&xfer->base.resource, NULL);
  967.  
  968.    util_slab_free(&ilo->transfer_mempool, xfer);
  969. }
  970.  
  971. static void *
  972. ilo_transfer_map(struct pipe_context *pipe,
  973.                  struct pipe_resource *res,
  974.                  unsigned level,
  975.                  unsigned usage,
  976.                  const struct pipe_box *box,
  977.                  struct pipe_transfer **transfer)
  978. {
  979.    struct ilo_context *ilo = ilo_context(pipe);
  980.    struct ilo_transfer *xfer;
  981.    bool success;
  982.  
  983.    xfer = util_slab_alloc(&ilo->transfer_mempool);
  984.    if (!xfer) {
  985.       *transfer = NULL;
  986.       return NULL;
  987.    }
  988.  
  989.    xfer->base.resource = NULL;
  990.    pipe_resource_reference(&xfer->base.resource, res);
  991.    xfer->base.level = level;
  992.    xfer->base.usage = usage;
  993.    xfer->base.box = *box;
  994.  
  995.    if (res->target == PIPE_BUFFER)
  996.       success = buf_map(ilo, xfer);
  997.    else
  998.       success = tex_map(ilo, xfer);
  999.  
  1000.    if (!success) {
  1001.       pipe_resource_reference(&xfer->base.resource, NULL);
  1002.       FREE(xfer);
  1003.       *transfer = NULL;
  1004.       return NULL;
  1005.    }
  1006.  
  1007.    *transfer = &xfer->base;
  1008.  
  1009.    return xfer->ptr;
  1010. }
  1011.  
  1012. static void
  1013. ilo_transfer_inline_write(struct pipe_context *pipe,
  1014.                           struct pipe_resource *res,
  1015.                           unsigned level,
  1016.                           unsigned usage,
  1017.                           const struct pipe_box *box,
  1018.                           const void *data,
  1019.                           unsigned stride,
  1020.                           unsigned layer_stride)
  1021. {
  1022.    if (likely(res->target == PIPE_BUFFER) &&
  1023.        !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
  1024.       /* they should specify just an offset and a size */
  1025.       assert(level == 0);
  1026.       assert(box->y == 0);
  1027.       assert(box->z == 0);
  1028.       assert(box->height == 1);
  1029.       assert(box->depth == 1);
  1030.  
  1031.       buf_pwrite(ilo_context(pipe), ilo_buffer(res),
  1032.             usage, box->x, box->width, data);
  1033.    }
  1034.    else {
  1035.       u_default_transfer_inline_write(pipe, res,
  1036.             level, usage, box, data, stride, layer_stride);
  1037.    }
  1038. }
  1039.  
  1040. /**
  1041.  * Initialize transfer-related functions.
  1042.  */
  1043. void
  1044. ilo_init_transfer_functions(struct ilo_context *ilo)
  1045. {
  1046.    ilo->base.transfer_map = ilo_transfer_map;
  1047.    ilo->base.transfer_flush_region = ilo_transfer_flush_region;
  1048.    ilo->base.transfer_unmap = ilo_transfer_unmap;
  1049.    ilo->base.transfer_inline_write = ilo_transfer_inline_write;
  1050. }
  1051.