Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2013 Advanced Micro Devices, Inc.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  8.  * license, and/or sell copies of the Software, and to permit persons to whom
  9.  * the Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18.  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *      Marek Olšák
  25.  */
  26.  
  27. #include "r600_cs.h"
  28. #include "util/u_memory.h"
  29. #include "util/u_upload_mgr.h"
  30. #include <inttypes.h>
  31. #include <stdio.h>
  32.  
  33. boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
  34.                                         struct radeon_winsys_cs_handle *buf,
  35.                                         enum radeon_bo_usage usage)
  36. {
  37.         if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
  38.                 return TRUE;
  39.         }
  40.         if (ctx->rings.dma.cs && ctx->rings.dma.cs->cdw &&
  41.             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
  42.                 return TRUE;
  43.         }
  44.         return FALSE;
  45. }
  46.  
  47. void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
  48.                                       struct r600_resource *resource,
  49.                                       unsigned usage)
  50. {
  51.         enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
  52.         bool busy = false;
  53.  
  54.         if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
  55.                 return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
  56.         }
  57.  
  58.         if (!(usage & PIPE_TRANSFER_WRITE)) {
  59.                 /* have to wait for the last write */
  60.                 rusage = RADEON_USAGE_WRITE;
  61.         }
  62.  
  63.         if (ctx->rings.gfx.cs->cdw != ctx->initial_gfx_cs_size &&
  64.             ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
  65.                                              resource->cs_buf, rusage)) {
  66.                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
  67.                         ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
  68.                         return NULL;
  69.                 } else {
  70.                         ctx->rings.gfx.flush(ctx, 0, NULL);
  71.                         busy = true;
  72.                 }
  73.         }
  74.         if (ctx->rings.dma.cs &&
  75.             ctx->rings.dma.cs->cdw &&
  76.             ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
  77.                                              resource->cs_buf, rusage)) {
  78.                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
  79.                         ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
  80.                         return NULL;
  81.                 } else {
  82.                         ctx->rings.dma.flush(ctx, 0, NULL);
  83.                         busy = true;
  84.                 }
  85.         }
  86.  
  87.         if (busy || ctx->ws->buffer_is_busy(resource->buf, rusage)) {
  88.                 if (usage & PIPE_TRANSFER_DONTBLOCK) {
  89.                         return NULL;
  90.                 } else {
  91.                         /* We will be wait for the GPU. Wait for any offloaded
  92.                          * CS flush to complete to avoid busy-waiting in the winsys. */
  93.                         ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
  94.                         if (ctx->rings.dma.cs)
  95.                                 ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
  96.                 }
  97.         }
  98.  
  99.         /* Setting the CS to NULL will prevent doing checks we have done already. */
  100.         return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
  101. }
  102.  
  103. bool r600_init_resource(struct r600_common_screen *rscreen,
  104.                         struct r600_resource *res,
  105.                         unsigned size, unsigned alignment,
  106.                         bool use_reusable_pool)
  107. {
  108.         struct r600_texture *rtex = (struct r600_texture*)res;
  109.         struct pb_buffer *old_buf, *new_buf;
  110.         enum radeon_bo_flag flags = 0;
  111.  
  112.         switch (res->b.b.usage) {
  113.         case PIPE_USAGE_STREAM:
  114.                 flags = RADEON_FLAG_GTT_WC;
  115.                 /* fall through */
  116.         case PIPE_USAGE_STAGING:
  117.                 /* Transfers are likely to occur more often with these resources. */
  118.                 res->domains = RADEON_DOMAIN_GTT;
  119.                 break;
  120.         case PIPE_USAGE_DYNAMIC:
  121.                 /* Older kernels didn't always flush the HDP cache before
  122.                  * CS execution
  123.                  */
  124.                 if (rscreen->info.drm_minor < 40) {
  125.                         res->domains = RADEON_DOMAIN_GTT;
  126.                         flags |= RADEON_FLAG_GTT_WC;
  127.                         break;
  128.                 }
  129.                 flags |= RADEON_FLAG_CPU_ACCESS;
  130.                 /* fall through */
  131.         case PIPE_USAGE_DEFAULT:
  132.         case PIPE_USAGE_IMMUTABLE:
  133.         default:
  134.                 /* Not listing GTT here improves performance in some apps. */
  135.                 res->domains = RADEON_DOMAIN_VRAM;
  136.                 flags |= RADEON_FLAG_GTT_WC;
  137.                 break;
  138.         }
  139.  
  140.         if (res->b.b.target == PIPE_BUFFER &&
  141.             res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
  142.                               PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
  143.                 /* Use GTT for all persistent mappings with older kernels,
  144.                  * because they didn't always flush the HDP cache before CS
  145.                  * execution.
  146.                  *
  147.                  * Write-combined CPU mappings are fine, the kernel ensures all CPU
  148.                  * writes finish before the GPU executes a command stream.
  149.                  */
  150.                 if (rscreen->info.drm_minor < 40)
  151.                         res->domains = RADEON_DOMAIN_GTT;
  152.                 else if (res->domains & RADEON_DOMAIN_VRAM)
  153.                         flags |= RADEON_FLAG_CPU_ACCESS;
  154.         }
  155.  
  156.         /* Tiled textures are unmappable. Always put them in VRAM. */
  157.         if (res->b.b.target != PIPE_BUFFER &&
  158.             rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
  159.                 res->domains = RADEON_DOMAIN_VRAM;
  160.                 flags &= ~RADEON_FLAG_CPU_ACCESS;
  161.                 flags |= RADEON_FLAG_NO_CPU_ACCESS;
  162.         }
  163.  
  164.         /* Allocate a new resource. */
  165.         new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
  166.                                              use_reusable_pool,
  167.                                              res->domains, flags);
  168.         if (!new_buf) {
  169.                 return false;
  170.         }
  171.  
  172.         /* Replace the pointer such that if res->buf wasn't NULL, it won't be
  173.          * NULL. This should prevent crashes with multiple contexts using
  174.          * the same buffer where one of the contexts invalidates it while
  175.          * the others are using it. */
  176.         old_buf = res->buf;
  177.         res->cs_buf = rscreen->ws->buffer_get_cs_handle(new_buf); /* should be atomic */
  178.         res->buf = new_buf; /* should be atomic */
  179.  
  180.         if (rscreen->info.r600_virtual_address)
  181.                 res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->cs_buf);
  182.         else
  183.                 res->gpu_address = 0;
  184.  
  185.         pb_reference(&old_buf, NULL);
  186.  
  187.         util_range_set_empty(&res->valid_buffer_range);
  188.         res->TC_L2_dirty = false;
  189.  
  190.         if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
  191.                 fprintf(stderr, "VM start=0x%"PRIX64"  end=0x%"PRIX64" | Buffer %u bytes\n",
  192.                         res->gpu_address, res->gpu_address + res->buf->size,
  193.                         res->buf->size);
  194.         }
  195.         return true;
  196. }
  197.  
  198. static void r600_buffer_destroy(struct pipe_screen *screen,
  199.                                 struct pipe_resource *buf)
  200. {
  201.         struct r600_resource *rbuffer = r600_resource(buf);
  202.  
  203.         util_range_destroy(&rbuffer->valid_buffer_range);
  204.         pb_reference(&rbuffer->buf, NULL);
  205.         FREE(rbuffer);
  206. }
  207.  
  208. static void *r600_buffer_get_transfer(struct pipe_context *ctx,
  209.                                       struct pipe_resource *resource,
  210.                                       unsigned level,
  211.                                       unsigned usage,
  212.                                       const struct pipe_box *box,
  213.                                       struct pipe_transfer **ptransfer,
  214.                                       void *data, struct r600_resource *staging,
  215.                                       unsigned offset)
  216. {
  217.         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
  218.         struct r600_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
  219.  
  220.         transfer->transfer.resource = resource;
  221.         transfer->transfer.level = level;
  222.         transfer->transfer.usage = usage;
  223.         transfer->transfer.box = *box;
  224.         transfer->transfer.stride = 0;
  225.         transfer->transfer.layer_stride = 0;
  226.         transfer->offset = offset;
  227.         transfer->staging = staging;
  228.         *ptransfer = &transfer->transfer;
  229.         return data;
  230. }
  231.  
  232. static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
  233.                                      unsigned dstx, unsigned srcx, unsigned size)
  234. {
  235.         bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
  236.  
  237.         return rctx->screen->has_cp_dma ||
  238.                (dword_aligned && (rctx->rings.dma.cs ||
  239.                                   rctx->screen->has_streamout));
  240.  
  241. }
  242.  
  243. static void *r600_buffer_transfer_map(struct pipe_context *ctx,
  244.                                       struct pipe_resource *resource,
  245.                                       unsigned level,
  246.                                       unsigned usage,
  247.                                       const struct pipe_box *box,
  248.                                       struct pipe_transfer **ptransfer)
  249. {
  250.         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
  251.         struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
  252.         struct r600_resource *rbuffer = r600_resource(resource);
  253.         uint8_t *data;
  254.  
  255.         assert(box->x + box->width <= resource->width0);
  256.  
  257.         /* See if the buffer range being mapped has never been initialized,
  258.          * in which case it can be mapped unsynchronized. */
  259.         if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
  260.             usage & PIPE_TRANSFER_WRITE &&
  261.             !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
  262.                 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
  263.         }
  264.  
  265.         /* If discarding the entire range, discard the whole resource instead. */
  266.         if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
  267.             box->x == 0 && box->width == resource->width0) {
  268.                 usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
  269.         }
  270.  
  271.         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
  272.             !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
  273.                 assert(usage & PIPE_TRANSFER_WRITE);
  274.  
  275.                 /* Check if mapping this buffer would cause waiting for the GPU. */
  276.                 if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
  277.                     rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
  278.                         rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
  279.                 }
  280.                 /* At this point, the buffer is always idle. */
  281.                 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
  282.         }
  283.         else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
  284.                  !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
  285.                  !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
  286.                  r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
  287.                 assert(usage & PIPE_TRANSFER_WRITE);
  288.  
  289.                 /* Check if mapping this buffer would cause waiting for the GPU. */
  290.                 if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
  291.                     rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
  292.                         /* Do a wait-free write-only transfer using a temporary buffer. */
  293.                         unsigned offset;
  294.                         struct r600_resource *staging = NULL;
  295.  
  296.                         u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
  297.                                        &offset, (struct pipe_resource**)&staging, (void**)&data);
  298.  
  299.                         if (staging) {
  300.                                 data += box->x % R600_MAP_BUFFER_ALIGNMENT;
  301.                                 return r600_buffer_get_transfer(ctx, resource, level, usage, box,
  302.                                                                 ptransfer, data, staging, offset);
  303.                         } else {
  304.                                 return NULL; /* error, shouldn't occur though */
  305.                         }
  306.                 }
  307.                 /* At this point, the buffer is always idle (we checked it above). */
  308.                 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
  309.         }
  310.         /* Using a staging buffer in GTT for larger reads is much faster. */
  311.         else if ((usage & PIPE_TRANSFER_READ) &&
  312.                  !(usage & PIPE_TRANSFER_WRITE) &&
  313.                  rbuffer->domains == RADEON_DOMAIN_VRAM &&
  314.                  r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
  315.                 struct r600_resource *staging;
  316.  
  317.                 staging = (struct r600_resource*) pipe_buffer_create(
  318.                                 ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING,
  319.                                 box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
  320.                 if (staging) {
  321.                         /* Copy the VRAM buffer to the staging buffer. */
  322.                         rctx->dma_copy(ctx, &staging->b.b, 0,
  323.                                        box->x % R600_MAP_BUFFER_ALIGNMENT,
  324.                                        0, 0, resource, level, box);
  325.  
  326.                         data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
  327.                         data += box->x % R600_MAP_BUFFER_ALIGNMENT;
  328.  
  329.                         return r600_buffer_get_transfer(ctx, resource, level, usage, box,
  330.                                                         ptransfer, data, staging, 0);
  331.                 }
  332.         }
  333.  
  334.         data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
  335.         if (!data) {
  336.                 return NULL;
  337.         }
  338.         data += box->x;
  339.  
  340.         return r600_buffer_get_transfer(ctx, resource, level, usage, box,
  341.                                         ptransfer, data, NULL, 0);
  342. }
  343.  
  344. static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
  345.                                        struct pipe_transfer *transfer)
  346. {
  347.         struct r600_common_context *rctx = (struct r600_common_context*)ctx;
  348.         struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
  349.         struct r600_resource *rbuffer = r600_resource(transfer->resource);
  350.  
  351.         if (rtransfer->staging) {
  352.                 if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) {
  353.                         struct pipe_resource *dst, *src;
  354.                         unsigned soffset, doffset, size;
  355.                         struct pipe_box box;
  356.  
  357.                         dst = transfer->resource;
  358.                         src = &rtransfer->staging->b.b;
  359.                         size = transfer->box.width;
  360.                         doffset = transfer->box.x;
  361.                         soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
  362.  
  363.                         u_box_1d(soffset, size, &box);
  364.  
  365.                         /* Copy the staging buffer into the original one. */
  366.                         rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
  367.                 }
  368.                 pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
  369.         }
  370.  
  371.         if (transfer->usage & PIPE_TRANSFER_WRITE) {
  372.                 util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
  373.                                transfer->box.x + transfer->box.width);
  374.         }
  375.         util_slab_free(&rctx->pool_transfers, transfer);
  376. }
  377.  
  378. static const struct u_resource_vtbl r600_buffer_vtbl =
  379. {
  380.         NULL,                           /* get_handle */
  381.         r600_buffer_destroy,            /* resource_destroy */
  382.         r600_buffer_transfer_map,       /* transfer_map */
  383.         NULL,                           /* transfer_flush_region */
  384.         r600_buffer_transfer_unmap,     /* transfer_unmap */
  385.         NULL                            /* transfer_inline_write */
  386. };
  387.  
  388. static struct r600_resource *
  389. r600_alloc_buffer_struct(struct pipe_screen *screen,
  390.                          const struct pipe_resource *templ)
  391. {
  392.         struct r600_resource *rbuffer;
  393.  
  394.         rbuffer = MALLOC_STRUCT(r600_resource);
  395.  
  396.         rbuffer->b.b = *templ;
  397.         pipe_reference_init(&rbuffer->b.b.reference, 1);
  398.         rbuffer->b.b.screen = screen;
  399.         rbuffer->b.vtbl = &r600_buffer_vtbl;
  400.         rbuffer->buf = NULL;
  401.         rbuffer->TC_L2_dirty = false;
  402.         util_range_init(&rbuffer->valid_buffer_range);
  403.         return rbuffer;
  404. }
  405.  
  406. struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
  407.                                          const struct pipe_resource *templ,
  408.                                          unsigned alignment)
  409. {
  410.         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
  411.         struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
  412.  
  413.         if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE)) {
  414.                 FREE(rbuffer);
  415.                 return NULL;
  416.         }
  417.         return &rbuffer->b.b;
  418. }
  419.  
  420. struct pipe_resource *
  421. r600_buffer_from_user_memory(struct pipe_screen *screen,
  422.                              const struct pipe_resource *templ,
  423.                              void *user_memory)
  424. {
  425.         struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
  426.         struct radeon_winsys *ws = rscreen->ws;
  427.         struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
  428.  
  429.         rbuffer->domains = RADEON_DOMAIN_GTT;
  430.         util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
  431.  
  432.         /* Convert a user pointer to a buffer. */
  433.         rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
  434.         if (!rbuffer->buf) {
  435.                 FREE(rbuffer);
  436.                 return NULL;
  437.         }
  438.  
  439.         rbuffer->cs_buf = ws->buffer_get_cs_handle(rbuffer->buf);
  440.  
  441.         if (rscreen->info.r600_virtual_address)
  442.                 rbuffer->gpu_address =
  443.                         ws->buffer_get_virtual_address(rbuffer->cs_buf);
  444.         else
  445.                 rbuffer->gpu_address = 0;
  446.  
  447.         return &rbuffer->b.b;
  448. }
  449.