Subversion Repositories Kolibri OS

Rev

Rev 5139 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright 2010 Advanced Micro Devices, Inc.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  *
  22.  * Authors: Alex Deucher
  23.  */
  24. #include <drm/drmP.h>
  25. #include "radeon.h"
  26. #include "radeon_asic.h"
  27. #include "radeon_trace.h"
  28. #include "nid.h"
  29.  
  30. u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
  31.  
  32. /*
  33.  * DMA
  34.  * Starting with R600, the GPU has an asynchronous
  35.  * DMA engine.  The programming model is very similar
  36.  * to the 3D engine (ring buffer, IBs, etc.), but the
  37.  * DMA controller has it's own packet format that is
  38.  * different form the PM4 format used by the 3D engine.
  39.  * It supports copying data, writing embedded data,
  40.  * solid fills, and a number of other things.  It also
  41.  * has support for tiling/detiling of buffers.
  42.  * Cayman and newer support two asynchronous DMA engines.
  43.  */
  44.  
  45. /**
  46.  * cayman_dma_get_rptr - get the current read pointer
  47.  *
  48.  * @rdev: radeon_device pointer
  49.  * @ring: radeon ring pointer
  50.  *
  51.  * Get the current rptr from the hardware (cayman+).
  52.  */
  53. uint32_t cayman_dma_get_rptr(struct radeon_device *rdev,
  54.                              struct radeon_ring *ring)
  55. {
  56.         u32 rptr, reg;
  57.  
  58.         if (rdev->wb.enabled) {
  59.                 rptr = rdev->wb.wb[ring->rptr_offs/4];
  60.         } else {
  61.                 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  62.                         reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET;
  63.                 else
  64.                         reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET;
  65.  
  66.                 rptr = RREG32(reg);
  67.         }
  68.  
  69.         return (rptr & 0x3fffc) >> 2;
  70. }
  71.  
  72. /**
  73.  * cayman_dma_get_wptr - get the current write pointer
  74.  *
  75.  * @rdev: radeon_device pointer
  76.  * @ring: radeon ring pointer
  77.  *
  78.  * Get the current wptr from the hardware (cayman+).
  79.  */
  80. uint32_t cayman_dma_get_wptr(struct radeon_device *rdev,
  81.                            struct radeon_ring *ring)
  82. {
  83.         u32 reg;
  84.  
  85.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  86.                 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
  87.         else
  88.                 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
  89.  
  90.         return (RREG32(reg) & 0x3fffc) >> 2;
  91. }
  92.  
  93. /**
  94.  * cayman_dma_set_wptr - commit the write pointer
  95.  *
  96.  * @rdev: radeon_device pointer
  97.  * @ring: radeon ring pointer
  98.  *
  99.  * Write the wptr back to the hardware (cayman+).
  100.  */
  101. void cayman_dma_set_wptr(struct radeon_device *rdev,
  102.                          struct radeon_ring *ring)
  103. {
  104.         u32 reg;
  105.  
  106.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  107.                 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
  108.         else
  109.                 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
  110.  
  111.         WREG32(reg, (ring->wptr << 2) & 0x3fffc);
  112. }
  113.  
  114. /**
  115.  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
  116.  *
  117.  * @rdev: radeon_device pointer
  118.  * @ib: IB object to schedule
  119.  *
  120.  * Schedule an IB in the DMA ring (cayman-SI).
  121.  */
  122. void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
  123.                                 struct radeon_ib *ib)
  124. {
  125.         struct radeon_ring *ring = &rdev->ring[ib->ring];
  126.  
  127.         if (rdev->wb.enabled) {
  128.                 u32 next_rptr = ring->wptr + 4;
  129.                 while ((next_rptr & 7) != 5)
  130.                         next_rptr++;
  131.                 next_rptr += 3;
  132.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
  133.                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
  134.                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
  135.                 radeon_ring_write(ring, next_rptr);
  136.         }
  137.  
  138.         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
  139.          * Pad as necessary with NOPs.
  140.          */
  141.         while ((ring->wptr & 7) != 5)
  142.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
  143.         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
  144.         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
  145.         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
  146.  
  147. }
  148.  
  149. /**
  150.  * cayman_dma_stop - stop the async dma engines
  151.  *
  152.  * @rdev: radeon_device pointer
  153.  *
  154.  * Stop the async dma engines (cayman-SI).
  155.  */
  156. void cayman_dma_stop(struct radeon_device *rdev)
  157. {
  158.         u32 rb_cntl;
  159.  
  160.         if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
  161.             (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
  162.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
  163.  
  164.         /* dma0 */
  165.         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
  166.         rb_cntl &= ~DMA_RB_ENABLE;
  167.         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
  168.  
  169.         /* dma1 */
  170.         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
  171.         rb_cntl &= ~DMA_RB_ENABLE;
  172.         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
  173.  
  174.         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
  175.         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
  176. }
  177.  
  178. /**
  179.  * cayman_dma_resume - setup and start the async dma engines
  180.  *
  181.  * @rdev: radeon_device pointer
  182.  *
  183.  * Set up the DMA ring buffers and enable them. (cayman-SI).
  184.  * Returns 0 for success, error for failure.
  185.  */
  186. int cayman_dma_resume(struct radeon_device *rdev)
  187. {
  188.         struct radeon_ring *ring;
  189.         u32 rb_cntl, dma_cntl, ib_cntl;
  190.         u32 rb_bufsz;
  191.         u32 reg_offset, wb_offset;
  192.         int i, r;
  193.  
  194.         /* Reset dma */
  195.         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
  196.         RREG32(SRBM_SOFT_RESET);
  197.         udelay(50);
  198.         WREG32(SRBM_SOFT_RESET, 0);
  199.  
  200.         for (i = 0; i < 2; i++) {
  201.                 if (i == 0) {
  202.                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
  203.                         reg_offset = DMA0_REGISTER_OFFSET;
  204.                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
  205.                 } else {
  206.                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
  207.                         reg_offset = DMA1_REGISTER_OFFSET;
  208.                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
  209.                 }
  210.  
  211.                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
  212.                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
  213.  
  214.                 /* Set ring buffer size in dwords */
  215.                 rb_bufsz = order_base_2(ring->ring_size / 4);
  216.                 rb_cntl = rb_bufsz << 1;
  217. #ifdef __BIG_ENDIAN
  218.                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
  219. #endif
  220.                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
  221.  
  222.                 /* Initialize the ring buffer's read and write pointers */
  223.                 WREG32(DMA_RB_RPTR + reg_offset, 0);
  224.                 WREG32(DMA_RB_WPTR + reg_offset, 0);
  225.  
  226.                 /* set the wb address whether it's enabled or not */
  227.                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
  228.                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
  229.                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
  230.                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
  231.  
  232.                 if (rdev->wb.enabled)
  233.                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
  234.  
  235.                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
  236.  
  237.                 /* enable DMA IBs */
  238.                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
  239. #ifdef __BIG_ENDIAN
  240.                 ib_cntl |= DMA_IB_SWAP_ENABLE;
  241. #endif
  242.                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
  243.  
  244.                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
  245.                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
  246.                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
  247.  
  248.                 ring->wptr = 0;
  249.                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
  250.  
  251.                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
  252.  
  253.                 ring->ready = true;
  254.  
  255.                 r = radeon_ring_test(rdev, ring->idx, ring);
  256.                 if (r) {
  257.                         ring->ready = false;
  258.                         return r;
  259.                 }
  260.         }
  261.  
  262.         if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
  263.             (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
  264.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
  265.  
  266.         return 0;
  267. }
  268.  
  269. /**
  270.  * cayman_dma_fini - tear down the async dma engines
  271.  *
  272.  * @rdev: radeon_device pointer
  273.  *
  274.  * Stop the async dma engines and free the rings (cayman-SI).
  275.  */
  276. void cayman_dma_fini(struct radeon_device *rdev)
  277. {
  278.         cayman_dma_stop(rdev);
  279.         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
  280.         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
  281. }
  282.  
  283. /**
  284.  * cayman_dma_is_lockup - Check if the DMA engine is locked up
  285.  *
  286.  * @rdev: radeon_device pointer
  287.  * @ring: radeon_ring structure holding ring information
  288.  *
  289.  * Check if the async DMA engine is locked up.
  290.  * Returns true if the engine appears to be locked up, false if not.
  291.  */
  292. bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
  293. {
  294.         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
  295.         u32 mask;
  296.  
  297.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  298.                 mask = RADEON_RESET_DMA;
  299.         else
  300.                 mask = RADEON_RESET_DMA1;
  301.  
  302.         if (!(reset_mask & mask)) {
  303.                 radeon_ring_lockup_update(rdev, ring);
  304.                 return false;
  305.         }
  306.         return radeon_ring_test_lockup(rdev, ring);
  307. }
  308.  
  309. /**
  310.  * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART
  311.  *
  312.  * @rdev: radeon_device pointer
  313.  * @ib: indirect buffer to fill with commands
  314.  * @pe: addr of the page entry
  315.  * @src: src addr where to copy from
  316.  * @count: number of page entries to update
  317.  *
  318.  * Update PTEs by copying them from the GART using the DMA (cayman/TN).
  319.  */
  320. void cayman_dma_vm_copy_pages(struct radeon_device *rdev,
  321.                               struct radeon_ib *ib,
  322.                               uint64_t pe, uint64_t src,
  323.                               unsigned count)
  324. {
  325.         unsigned ndw;
  326.  
  327.         while (count) {
  328.                 ndw = count * 2;
  329.                 if (ndw > 0xFFFFE)
  330.                         ndw = 0xFFFFE;
  331.  
  332.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
  333.                                                       0, 0, ndw);
  334.                 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
  335.                 ib->ptr[ib->length_dw++] = lower_32_bits(src);
  336.                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  337.                 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
  338.  
  339.                 pe += ndw * 4;
  340.                 src += ndw * 4;
  341.                 count -= ndw / 2;
  342.         }
  343. }
  344.  
  345. /**
  346.  * cayman_dma_vm_write_pages - update PTEs by writing them manually
  347.  *
  348.  * @rdev: radeon_device pointer
  349.  * @ib: indirect buffer to fill with commands
  350.  * @pe: addr of the page entry
  351.  * @addr: dst addr to write into pe
  352.  * @count: number of page entries to update
  353.  * @incr: increase next addr by incr bytes
  354.  * @flags: hw access flags
  355.  *
  356.  * Update PTEs by writing them manually using the DMA (cayman/TN).
  357.  */
  358. void cayman_dma_vm_write_pages(struct radeon_device *rdev,
  359.                             struct radeon_ib *ib,
  360.                             uint64_t pe,
  361.                             uint64_t addr, unsigned count,
  362.                             uint32_t incr, uint32_t flags)
  363. {
  364.         uint64_t value;
  365.         unsigned ndw;
  366.  
  367.                 while (count) {
  368.                         ndw = count * 2;
  369.                         if (ndw > 0xFFFFE)
  370.                                 ndw = 0xFFFFE;
  371.  
  372.                         /* for non-physically contiguous pages (system) */
  373.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE,
  374.                                                       0, 0, ndw);
  375.                         ib->ptr[ib->length_dw++] = pe;
  376.                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  377.                         for (; ndw > 0; ndw -= 2, --count, pe += 8) {
  378.                                 if (flags & R600_PTE_SYSTEM) {
  379.                                         value = radeon_vm_map_gart(rdev, addr);
  380.                                         value &= 0xFFFFFFFFFFFFF000ULL;
  381.                                 } else if (flags & R600_PTE_VALID) {
  382.                                         value = addr;
  383.                                 } else {
  384.                                         value = 0;
  385.                                 }
  386.                                 addr += incr;
  387.                                 value |= flags;
  388.                                 ib->ptr[ib->length_dw++] = value;
  389.                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
  390.                         }
  391.                 }
  392. }
  393.  
  394. /**
  395.  * cayman_dma_vm_set_pages - update the page tables using the DMA
  396.  *
  397.  * @rdev: radeon_device pointer
  398.  * @ib: indirect buffer to fill with commands
  399.  * @pe: addr of the page entry
  400.  * @addr: dst addr to write into pe
  401.  * @count: number of page entries to update
  402.  * @incr: increase next addr by incr bytes
  403.  * @flags: hw access flags
  404.  *
  405.  * Update the page tables using the DMA (cayman/TN).
  406.  */
  407. void cayman_dma_vm_set_pages(struct radeon_device *rdev,
  408.                              struct radeon_ib *ib,
  409.                              uint64_t pe,
  410.                              uint64_t addr, unsigned count,
  411.                              uint32_t incr, uint32_t flags)
  412. {
  413.         uint64_t value;
  414.         unsigned ndw;
  415.  
  416.                 while (count) {
  417.                         ndw = count * 2;
  418.                         if (ndw > 0xFFFFE)
  419.                                 ndw = 0xFFFFE;
  420.  
  421.                         if (flags & R600_PTE_VALID)
  422.                                 value = addr;
  423.                         else
  424.                                 value = 0;
  425.  
  426.                         /* for physically contiguous pages (vram) */
  427.                         ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
  428.                         ib->ptr[ib->length_dw++] = pe; /* dst addr */
  429.                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  430.                         ib->ptr[ib->length_dw++] = flags; /* mask */
  431.                         ib->ptr[ib->length_dw++] = 0;
  432.                         ib->ptr[ib->length_dw++] = value; /* value */
  433.                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
  434.                         ib->ptr[ib->length_dw++] = incr; /* increment size */
  435.                         ib->ptr[ib->length_dw++] = 0;
  436.  
  437.                         pe += ndw * 4;
  438.                         addr += (ndw / 2) * incr;
  439.                         count -= ndw / 2;
  440.                 }
  441. }
  442.  
  443. /**
  444.  * cayman_dma_vm_pad_ib - pad the IB to the required number of dw
  445.  *
  446.  * @ib: indirect buffer to fill with padding
  447.  *
  448.  */
  449. void cayman_dma_vm_pad_ib(struct radeon_ib *ib)
  450. {
  451.         while (ib->length_dw & 0x7)
  452.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
  453. }
  454.  
  455. void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
  456. {
  457.         struct radeon_ring *ring = &rdev->ring[ridx];
  458.  
  459.         if (vm == NULL)
  460.                 return;
  461.  
  462.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  463.         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
  464.         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
  465.  
  466.         /* flush hdp cache */
  467.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  468.         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
  469.         radeon_ring_write(ring, 1);
  470.  
  471.         /* bits 0-7 are the VM contexts0-7 */
  472.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  473.         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
  474.         radeon_ring_write(ring, 1 << vm->id);
  475. }
  476.  
  477.