Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * on the rights to use, copy, modify, merge, publish, distribute, sub
  8.  * license, and/or sell copies of the Software, and to permit persons to whom
  9.  * the Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18.  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *      Jerome Glisse
  25.  */
  26. #include "r600_pipe.h"
  27. #include "evergreend.h"
  28. #include "util/u_memory.h"
  29. #include "util/u_math.h"
  30.  
  31. void evergreen_dma_copy_buffer(struct r600_context *rctx,
  32.                                struct pipe_resource *dst,
  33.                                struct pipe_resource *src,
  34.                                uint64_t dst_offset,
  35.                                uint64_t src_offset,
  36.                                uint64_t size)
  37. {
  38.         struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
  39.         unsigned i, ncopy, csize, sub_cmd, shift;
  40.         struct r600_resource *rdst = (struct r600_resource*)dst;
  41.         struct r600_resource *rsrc = (struct r600_resource*)src;
  42.  
  43.         /* Mark the buffer range of destination as valid (initialized),
  44.          * so that transfer_map knows it should wait for the GPU when mapping
  45.          * that range. */
  46.         util_range_add(&rdst->valid_buffer_range, dst_offset,
  47.                        dst_offset + size);
  48.  
  49.         dst_offset += rdst->gpu_address;
  50.         src_offset += rsrc->gpu_address;
  51.  
  52.         /* see if we use dword or byte copy */
  53.         if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
  54.                 size >>= 2;
  55.                 sub_cmd = EG_DMA_COPY_DWORD_ALIGNED;
  56.                 shift = 2;
  57.         } else {
  58.                 sub_cmd = EG_DMA_COPY_BYTE_ALIGNED;
  59.                 shift = 0;
  60.         }
  61.         ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE);
  62.  
  63.         r600_need_dma_space(&rctx->b, ncopy * 5);
  64.         for (i = 0; i < ncopy; i++) {
  65.                 csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
  66.                 /* emit reloc before writing cs so that cs is always in consistent state */
  67.                 r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
  68.                                       RADEON_PRIO_MIN);
  69.                 r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
  70.                                       RADEON_PRIO_MIN);
  71.                 cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
  72.                 cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
  73.                 cs->buf[cs->cdw++] = src_offset & 0xffffffff;
  74.                 cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff;
  75.                 cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff;
  76.                 dst_offset += csize << shift;
  77.                 src_offset += csize << shift;
  78.                 size -= csize;
  79.         }
  80. }
  81.  
  82. /* The max number of bytes to copy per packet. */
  83. #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
  84.  
  85. void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
  86.                                    struct pipe_resource *dst, uint64_t offset,
  87.                                    unsigned size, uint32_t clear_value)
  88. {
  89.         struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
  90.  
  91.         assert(size);
  92.         assert(rctx->screen->b.has_cp_dma);
  93.  
  94.         /* Mark the buffer range of destination as valid (initialized),
  95.          * so that transfer_map knows it should wait for the GPU when mapping
  96.          * that range. */
  97.         util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
  98.                        offset + size);
  99.  
  100.         offset += r600_resource(dst)->gpu_address;
  101.  
  102.         /* Flush the cache where the resource is bound. */
  103.         rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
  104.                          R600_CONTEXT_INV_VERTEX_CACHE |
  105.                          R600_CONTEXT_INV_TEX_CACHE |
  106.                          R600_CONTEXT_FLUSH_AND_INV |
  107.                          R600_CONTEXT_FLUSH_AND_INV_CB |
  108.                          R600_CONTEXT_FLUSH_AND_INV_DB |
  109.                          R600_CONTEXT_FLUSH_AND_INV_CB_META |
  110.                          R600_CONTEXT_FLUSH_AND_INV_DB_META |
  111.                          R600_CONTEXT_STREAMOUT_FLUSH |
  112.                          R600_CONTEXT_WAIT_3D_IDLE;
  113.  
  114.         while (size) {
  115.                 unsigned sync = 0;
  116.                 unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
  117.                 unsigned reloc;
  118.  
  119.                 r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0), FALSE);
  120.  
  121.                 /* Flush the caches for the first copy only. */
  122.                 if (rctx->b.flags) {
  123.                         r600_flush_emit(rctx);
  124.                 }
  125.  
  126.                 /* Do the synchronization after the last copy, so that all data is written to memory. */
  127.                 if (size == byte_count) {
  128.                         sync = PKT3_CP_DMA_CP_SYNC;
  129.                 }
  130.  
  131.                 /* This must be done after r600_need_cs_space. */
  132.                 reloc = r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
  133.                                               (struct r600_resource*)dst, RADEON_USAGE_WRITE,
  134.                                               RADEON_PRIO_MIN);
  135.  
  136.                 radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
  137.                 radeon_emit(cs, clear_value);   /* DATA [31:0] */
  138.                 radeon_emit(cs, sync | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
  139.                 radeon_emit(cs, offset);        /* DST_ADDR_LO [31:0] */
  140.                 radeon_emit(cs, (offset >> 32) & 0xff);         /* DST_ADDR_HI [7:0] */
  141.                 radeon_emit(cs, byte_count);    /* COMMAND [29:22] | BYTE_COUNT [20:0] */
  142.  
  143.                 radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
  144.                 radeon_emit(cs, reloc);
  145.  
  146.                 size -= byte_count;
  147.                 offset += byte_count;
  148.         }
  149.  
  150.         /* Invalidate the read caches. */
  151.         rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE |
  152.                          R600_CONTEXT_INV_VERTEX_CACHE |
  153.                          R600_CONTEXT_INV_TEX_CACHE;
  154. }
  155.  
  156.