Subversion Repositories Kolibri OS

Rev

Rev 5139 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright 2013 Advanced Micro Devices, Inc.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  *
  22.  * Authors: Alex Deucher
  23.  */
  24. #include <drm/drmP.h>
  25. #include "radeon.h"
  26. #include "radeon_asic.h"
  27. #include "r600d.h"
  28.  
  29. u32 r600_gpu_check_soft_reset(struct radeon_device *rdev);
  30.  
  31. /*
  32.  * DMA
  33.  * Starting with R600, the GPU has an asynchronous
  34.  * DMA engine.  The programming model is very similar
  35.  * to the 3D engine (ring buffer, IBs, etc.), but the
  36.  * DMA controller has it's own packet format that is
  37.  * different form the PM4 format used by the 3D engine.
  38.  * It supports copying data, writing embedded data,
  39.  * solid fills, and a number of other things.  It also
  40.  * has support for tiling/detiling of buffers.
  41.  */
  42.  
  43. /**
  44.  * r600_dma_get_rptr - get the current read pointer
  45.  *
  46.  * @rdev: radeon_device pointer
  47.  * @ring: radeon ring pointer
  48.  *
  49.  * Get the current rptr from the hardware (r6xx+).
  50.  */
  51. uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
  52.                            struct radeon_ring *ring)
  53. {
  54.         u32 rptr;
  55.  
  56.         if (rdev->wb.enabled)
  57.                 rptr = rdev->wb.wb[ring->rptr_offs/4];
  58.         else
  59.                 rptr = RREG32(DMA_RB_RPTR);
  60.  
  61.         return (rptr & 0x3fffc) >> 2;
  62. }
  63.  
  64. /**
  65.  * r600_dma_get_wptr - get the current write pointer
  66.  *
  67.  * @rdev: radeon_device pointer
  68.  * @ring: radeon ring pointer
  69.  *
  70.  * Get the current wptr from the hardware (r6xx+).
  71.  */
  72. uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
  73.                            struct radeon_ring *ring)
  74. {
  75.         return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2;
  76. }
  77.  
  78. /**
  79.  * r600_dma_set_wptr - commit the write pointer
  80.  *
  81.  * @rdev: radeon_device pointer
  82.  * @ring: radeon ring pointer
  83.  *
  84.  * Write the wptr back to the hardware (r6xx+).
  85.  */
  86. void r600_dma_set_wptr(struct radeon_device *rdev,
  87.                        struct radeon_ring *ring)
  88. {
  89.         WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc);
  90. }
  91.  
  92. /**
  93.  * r600_dma_stop - stop the async dma engine
  94.  *
  95.  * @rdev: radeon_device pointer
  96.  *
  97.  * Stop the async dma engine (r6xx-evergreen).
  98.  */
  99. void r600_dma_stop(struct radeon_device *rdev)
  100. {
  101.         u32 rb_cntl = RREG32(DMA_RB_CNTL);
  102.  
  103.         if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX)
  104.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
  105.  
  106.         rb_cntl &= ~DMA_RB_ENABLE;
  107.         WREG32(DMA_RB_CNTL, rb_cntl);
  108.  
  109.         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
  110. }
  111.  
  112. /**
  113.  * r600_dma_resume - setup and start the async dma engine
  114.  *
  115.  * @rdev: radeon_device pointer
  116.  *
  117.  * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
  118.  * Returns 0 for success, error for failure.
  119.  */
  120. int r600_dma_resume(struct radeon_device *rdev)
  121. {
  122.         struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
  123.         u32 rb_cntl, dma_cntl, ib_cntl;
  124.         u32 rb_bufsz;
  125.         int r;
  126.  
  127.         /* Reset dma */
  128.         if (rdev->family >= CHIP_RV770)
  129.                 WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
  130.         else
  131.                 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
  132.         RREG32(SRBM_SOFT_RESET);
  133.         udelay(50);
  134.         WREG32(SRBM_SOFT_RESET, 0);
  135.  
  136.         WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
  137.         WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
  138.  
  139.         /* Set ring buffer size in dwords */
  140.         rb_bufsz = order_base_2(ring->ring_size / 4);
  141.         rb_cntl = rb_bufsz << 1;
  142. #ifdef __BIG_ENDIAN
  143.         rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
  144. #endif
  145.         WREG32(DMA_RB_CNTL, rb_cntl);
  146.  
  147.         /* Initialize the ring buffer's read and write pointers */
  148.         WREG32(DMA_RB_RPTR, 0);
  149.         WREG32(DMA_RB_WPTR, 0);
  150.  
  151.         /* set the wb address whether it's enabled or not */
  152.         WREG32(DMA_RB_RPTR_ADDR_HI,
  153.                upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
  154.         WREG32(DMA_RB_RPTR_ADDR_LO,
  155.                ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
  156.  
  157.         if (rdev->wb.enabled)
  158.                 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
  159.  
  160.         WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
  161.  
  162.         /* enable DMA IBs */
  163.         ib_cntl = DMA_IB_ENABLE;
  164. #ifdef __BIG_ENDIAN
  165.         ib_cntl |= DMA_IB_SWAP_ENABLE;
  166. #endif
  167.         WREG32(DMA_IB_CNTL, ib_cntl);
  168.  
  169.         dma_cntl = RREG32(DMA_CNTL);
  170.         dma_cntl &= ~CTXEMPTY_INT_ENABLE;
  171.         WREG32(DMA_CNTL, dma_cntl);
  172.  
  173.         if (rdev->family >= CHIP_RV770)
  174.                 WREG32(DMA_MODE, 1);
  175.  
  176.         ring->wptr = 0;
  177.         WREG32(DMA_RB_WPTR, ring->wptr << 2);
  178.  
  179.         WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
  180.  
  181.         ring->ready = true;
  182.  
  183.         r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
  184.         if (r) {
  185.                 ring->ready = false;
  186.                 return r;
  187.         }
  188.  
  189.         if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX)
  190.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
  191.  
  192.         return 0;
  193. }
  194.  
  195. /**
  196.  * r600_dma_fini - tear down the async dma engine
  197.  *
  198.  * @rdev: radeon_device pointer
  199.  *
  200.  * Stop the async dma engine and free the ring (r6xx-evergreen).
  201.  */
  202. void r600_dma_fini(struct radeon_device *rdev)
  203. {
  204.         r600_dma_stop(rdev);
  205.         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
  206. }
  207.  
  208. /**
  209.  * r600_dma_is_lockup - Check if the DMA engine is locked up
  210.  *
  211.  * @rdev: radeon_device pointer
  212.  * @ring: radeon_ring structure holding ring information
  213.  *
  214.  * Check if the async DMA engine is locked up.
  215.  * Returns true if the engine appears to be locked up, false if not.
  216.  */
  217. bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
  218. {
  219.         u32 reset_mask = r600_gpu_check_soft_reset(rdev);
  220.  
  221.         if (!(reset_mask & RADEON_RESET_DMA)) {
  222.                 radeon_ring_lockup_update(rdev, ring);
  223.                 return false;
  224.         }
  225.         return radeon_ring_test_lockup(rdev, ring);
  226. }
  227.  
  228.  
  229. /**
  230.  * r600_dma_ring_test - simple async dma engine test
  231.  *
  232.  * @rdev: radeon_device pointer
  233.  * @ring: radeon_ring structure holding ring information
  234.  *
  235.  * Test the DMA engine by writing using it to write an
  236.  * value to memory. (r6xx-SI).
  237.  * Returns 0 for success, error for failure.
  238.  */
  239. int r600_dma_ring_test(struct radeon_device *rdev,
  240.                        struct radeon_ring *ring)
  241. {
  242.         unsigned i;
  243.         int r;
  244.         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
  245.         u32 tmp;
  246.  
  247.         if (!ptr) {
  248.                 DRM_ERROR("invalid vram scratch pointer\n");
  249.                 return -EINVAL;
  250.         }
  251.  
  252.         tmp = 0xCAFEDEAD;
  253.         writel(tmp, ptr);
  254.  
  255.         r = radeon_ring_lock(rdev, ring, 4);
  256.         if (r) {
  257.                 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
  258.                 return r;
  259.         }
  260.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
  261.         radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
  262.         radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
  263.         radeon_ring_write(ring, 0xDEADBEEF);
  264.         radeon_ring_unlock_commit(rdev, ring, false);
  265.  
  266.         for (i = 0; i < rdev->usec_timeout; i++) {
  267.                 tmp = readl(ptr);
  268.                 if (tmp == 0xDEADBEEF)
  269.                         break;
  270.                 DRM_UDELAY(1);
  271.         }
  272.  
  273.         if (i < rdev->usec_timeout) {
  274.                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
  275.         } else {
  276.                 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
  277.                           ring->idx, tmp);
  278.                 r = -EINVAL;
  279.         }
  280.         return r;
  281. }
  282.  
  283. /**
  284.  * r600_dma_fence_ring_emit - emit a fence on the DMA ring
  285.  *
  286.  * @rdev: radeon_device pointer
  287.  * @fence: radeon fence object
  288.  *
  289.  * Add a DMA fence packet to the ring to write
  290.  * the fence seq number and DMA trap packet to generate
  291.  * an interrupt if needed (r6xx-r7xx).
  292.  */
  293. void r600_dma_fence_ring_emit(struct radeon_device *rdev,
  294.                               struct radeon_fence *fence)
  295. {
  296.         struct radeon_ring *ring = &rdev->ring[fence->ring];
  297.         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
  298.  
  299.         /* write the fence */
  300.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
  301.         radeon_ring_write(ring, addr & 0xfffffffc);
  302.         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
  303.         radeon_ring_write(ring, lower_32_bits(fence->seq));
  304.         /* generate an interrupt */
  305.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
  306. }
  307.  
  308. /**
  309.  * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
  310.  *
  311.  * @rdev: radeon_device pointer
  312.  * @ring: radeon_ring structure holding ring information
  313.  * @semaphore: radeon semaphore object
  314.  * @emit_wait: wait or signal semaphore
  315.  *
  316.  * Add a DMA semaphore packet to the ring wait on or signal
  317.  * other rings (r6xx-SI).
  318.  */
  319. bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
  320.                                   struct radeon_ring *ring,
  321.                                   struct radeon_semaphore *semaphore,
  322.                                   bool emit_wait)
  323. {
  324.         u64 addr = semaphore->gpu_addr;
  325.         u32 s = emit_wait ? 0 : 1;
  326.  
  327.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
  328.         radeon_ring_write(ring, addr & 0xfffffffc);
  329.         radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
  330.  
  331.         return true;
  332. }
  333.  
  334. /**
  335.  * r600_dma_ib_test - test an IB on the DMA engine
  336.  *
  337.  * @rdev: radeon_device pointer
  338.  * @ring: radeon_ring structure holding ring information
  339.  *
  340.  * Test a simple IB in the DMA ring (r6xx-SI).
  341.  * Returns 0 on success, error on failure.
  342.  */
  343. int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
  344. {
  345.         struct radeon_ib ib;
  346.         unsigned i;
  347.         int r;
  348.         void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
  349.         u32 tmp = 0;
  350.  
  351.         if (!ptr) {
  352.                 DRM_ERROR("invalid vram scratch pointer\n");
  353.                 return -EINVAL;
  354.         }
  355.  
  356.         tmp = 0xCAFEDEAD;
  357.         writel(tmp, ptr);
  358.  
  359.         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
  360.         if (r) {
  361.                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
  362.                 return r;
  363.         }
  364.  
  365.         ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
  366.         ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
  367.         ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
  368.         ib.ptr[3] = 0xDEADBEEF;
  369.         ib.length_dw = 4;
  370.  
  371.         r = radeon_ib_schedule(rdev, &ib, NULL, false);
  372.         if (r) {
  373.                 radeon_ib_free(rdev, &ib);
  374.                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
  375.                 return r;
  376.         }
  377.         r = radeon_fence_wait(ib.fence, false);
  378.         if (r) {
  379.                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
  380.                 return r;
  381.         }
  382.         for (i = 0; i < rdev->usec_timeout; i++) {
  383.                 tmp = readl(ptr);
  384.                 if (tmp == 0xDEADBEEF)
  385.                         break;
  386.                 DRM_UDELAY(1);
  387.         }
  388.         if (i < rdev->usec_timeout) {
  389.                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
  390.         } else {
  391.                 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
  392.                 r = -EINVAL;
  393.         }
  394.         radeon_ib_free(rdev, &ib);
  395.         return r;
  396. }
  397.  
  398. /**
  399.  * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
  400.  *
  401.  * @rdev: radeon_device pointer
  402.  * @ib: IB object to schedule
  403.  *
  404.  * Schedule an IB in the DMA ring (r6xx-r7xx).
  405.  */
  406. void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
  407. {
  408.         struct radeon_ring *ring = &rdev->ring[ib->ring];
  409.  
  410.         if (rdev->wb.enabled) {
  411.                 u32 next_rptr = ring->wptr + 4;
  412.                 while ((next_rptr & 7) != 5)
  413.                         next_rptr++;
  414.                 next_rptr += 3;
  415.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
  416.                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
  417.                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
  418.                 radeon_ring_write(ring, next_rptr);
  419.         }
  420.  
  421.         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
  422.          * Pad as necessary with NOPs.
  423.          */
  424.         while ((ring->wptr & 7) != 5)
  425.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
  426.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
  427.         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
  428.         radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
  429.  
  430. }
  431.  
  432. /**
  433.  * r600_copy_dma - copy pages using the DMA engine
  434.  *
  435.  * @rdev: radeon_device pointer
  436.  * @src_offset: src GPU address
  437.  * @dst_offset: dst GPU address
  438.  * @num_gpu_pages: number of GPU pages to xfer
  439.  * @fence: radeon fence object
  440.  *
  441.  * Copy GPU paging using the DMA engine (r6xx).
  442.  * Used by the radeon ttm implementation to move pages if
  443.  * registered as the asic copy callback.
  444.  */
  445. int r600_copy_dma(struct radeon_device *rdev,
  446.                   uint64_t src_offset, uint64_t dst_offset,
  447.                   unsigned num_gpu_pages,
  448.                   struct radeon_fence **fence)
  449. {
  450.         struct radeon_semaphore *sem = NULL;
  451.         int ring_index = rdev->asic->copy.dma_ring_index;
  452.         struct radeon_ring *ring = &rdev->ring[ring_index];
  453.         u32 size_in_dw, cur_size_in_dw;
  454.         int i, num_loops;
  455.         int r = 0;
  456.  
  457.         r = radeon_semaphore_create(rdev, &sem);
  458.         if (r) {
  459.                 DRM_ERROR("radeon: moving bo (%d).\n", r);
  460.                 return r;
  461.         }
  462.  
  463.         size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
  464.         num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
  465.         r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
  466.         if (r) {
  467.                 DRM_ERROR("radeon: moving bo (%d).\n", r);
  468.                 radeon_semaphore_free(rdev, &sem, NULL);
  469.                 return r;
  470.         }
  471.  
  472.         radeon_semaphore_sync_to(sem, *fence);
  473.         radeon_semaphore_sync_rings(rdev, sem, ring->idx);
  474.  
  475.         for (i = 0; i < num_loops; i++) {
  476.                 cur_size_in_dw = size_in_dw;
  477.                 if (cur_size_in_dw > 0xFFFE)
  478.                         cur_size_in_dw = 0xFFFE;
  479.                 size_in_dw -= cur_size_in_dw;
  480.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
  481.                 radeon_ring_write(ring, dst_offset & 0xfffffffc);
  482.                 radeon_ring_write(ring, src_offset & 0xfffffffc);
  483.                 radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
  484.                                          (upper_32_bits(src_offset) & 0xff)));
  485.                 src_offset += cur_size_in_dw * 4;
  486.                 dst_offset += cur_size_in_dw * 4;
  487.         }
  488.  
  489.         r = radeon_fence_emit(rdev, fence, ring->idx);
  490.         if (r) {
  491.                 radeon_ring_unlock_undo(rdev, ring);
  492.                 radeon_semaphore_free(rdev, &sem, NULL);
  493.                 return r;
  494.         }
  495.  
  496.         radeon_ring_unlock_commit(rdev, ring, false);
  497.         radeon_semaphore_free(rdev, &sem, *fence);
  498.  
  499.         return r;
  500. }
  501.