Subversion Repositories Kolibri OS

Rev

Rev 5078 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright 2010 Advanced Micro Devices, Inc.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  *
  22.  * Authors: Alex Deucher
  23.  */
  24. #include <drm/drmP.h>
  25. #include "radeon.h"
  26. #include "radeon_asic.h"
  27. #include "radeon_trace.h"
  28. #include "nid.h"
  29.  
  30. u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
  31.  
  32. /*
  33.  * DMA
  34.  * Starting with R600, the GPU has an asynchronous
  35.  * DMA engine.  The programming model is very similar
  36.  * to the 3D engine (ring buffer, IBs, etc.), but the
  37.  * DMA controller has it's own packet format that is
  38.  * different form the PM4 format used by the 3D engine.
  39.  * It supports copying data, writing embedded data,
  40.  * solid fills, and a number of other things.  It also
  41.  * has support for tiling/detiling of buffers.
  42.  * Cayman and newer support two asynchronous DMA engines.
  43.  */
  44.  
  45. /**
  46.  * cayman_dma_get_rptr - get the current read pointer
  47.  *
  48.  * @rdev: radeon_device pointer
  49.  * @ring: radeon ring pointer
  50.  *
  51.  * Get the current rptr from the hardware (cayman+).
  52.  */
  53. uint32_t cayman_dma_get_rptr(struct radeon_device *rdev,
  54.                              struct radeon_ring *ring)
  55. {
  56.         u32 rptr, reg;
  57.  
  58.         if (rdev->wb.enabled) {
  59.                 rptr = rdev->wb.wb[ring->rptr_offs/4];
  60.         } else {
  61.                 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  62.                         reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET;
  63.                 else
  64.                         reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET;
  65.  
  66.                 rptr = RREG32(reg);
  67.         }
  68.  
  69.         return (rptr & 0x3fffc) >> 2;
  70. }
  71.  
  72. /**
  73.  * cayman_dma_get_wptr - get the current write pointer
  74.  *
  75.  * @rdev: radeon_device pointer
  76.  * @ring: radeon ring pointer
  77.  *
  78.  * Get the current wptr from the hardware (cayman+).
  79.  */
  80. uint32_t cayman_dma_get_wptr(struct radeon_device *rdev,
  81.                            struct radeon_ring *ring)
  82. {
  83.         u32 reg;
  84.  
  85.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  86.                 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
  87.         else
  88.                 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
  89.  
  90.         return (RREG32(reg) & 0x3fffc) >> 2;
  91. }
  92.  
  93. /**
  94.  * cayman_dma_set_wptr - commit the write pointer
  95.  *
  96.  * @rdev: radeon_device pointer
  97.  * @ring: radeon ring pointer
  98.  *
  99.  * Write the wptr back to the hardware (cayman+).
  100.  */
  101. void cayman_dma_set_wptr(struct radeon_device *rdev,
  102.                          struct radeon_ring *ring)
  103. {
  104.         u32 reg;
  105.  
  106.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  107.                 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
  108.         else
  109.                 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
  110.  
  111.         WREG32(reg, (ring->wptr << 2) & 0x3fffc);
  112. }
  113.  
  114. /**
  115.  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
  116.  *
  117.  * @rdev: radeon_device pointer
  118.  * @ib: IB object to schedule
  119.  *
  120.  * Schedule an IB in the DMA ring (cayman-SI).
  121.  */
  122. void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
  123.                                 struct radeon_ib *ib)
  124. {
  125.         struct radeon_ring *ring = &rdev->ring[ib->ring];
  126.  
  127.         if (rdev->wb.enabled) {
  128.                 u32 next_rptr = ring->wptr + 4;
  129.                 while ((next_rptr & 7) != 5)
  130.                         next_rptr++;
  131.                 next_rptr += 3;
  132.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
  133.                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
  134.                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
  135.                 radeon_ring_write(ring, next_rptr);
  136.         }
  137.  
  138.         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
  139.          * Pad as necessary with NOPs.
  140.          */
  141.         while ((ring->wptr & 7) != 5)
  142.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
  143.         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
  144.         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
  145.         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
  146.  
  147. }
  148.  
  149. /**
  150.  * cayman_dma_stop - stop the async dma engines
  151.  *
  152.  * @rdev: radeon_device pointer
  153.  *
  154.  * Stop the async dma engines (cayman-SI).
  155.  */
  156. void cayman_dma_stop(struct radeon_device *rdev)
  157. {
  158.         u32 rb_cntl;
  159.  
  160.         if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
  161.             (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
  162.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
  163.  
  164.         /* dma0 */
  165.         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
  166.         rb_cntl &= ~DMA_RB_ENABLE;
  167.         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
  168.  
  169.         /* dma1 */
  170.         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
  171.         rb_cntl &= ~DMA_RB_ENABLE;
  172.         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
  173.  
  174.         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
  175.         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
  176. }
  177.  
  178. /**
  179.  * cayman_dma_resume - setup and start the async dma engines
  180.  *
  181.  * @rdev: radeon_device pointer
  182.  *
  183.  * Set up the DMA ring buffers and enable them. (cayman-SI).
  184.  * Returns 0 for success, error for failure.
  185.  */
  186. int cayman_dma_resume(struct radeon_device *rdev)
  187. {
  188.         struct radeon_ring *ring;
  189.         u32 rb_cntl, dma_cntl, ib_cntl;
  190.         u32 rb_bufsz;
  191.         u32 reg_offset, wb_offset;
  192.         int i, r;
  193.  
  194.         for (i = 0; i < 2; i++) {
  195.                 if (i == 0) {
  196.                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
  197.                         reg_offset = DMA0_REGISTER_OFFSET;
  198.                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
  199.                 } else {
  200.                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
  201.                         reg_offset = DMA1_REGISTER_OFFSET;
  202.                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
  203.                 }
  204.  
  205.                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
  206.                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
  207.  
  208.                 /* Set ring buffer size in dwords */
  209.                 rb_bufsz = order_base_2(ring->ring_size / 4);
  210.                 rb_cntl = rb_bufsz << 1;
  211. #ifdef __BIG_ENDIAN
  212.                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
  213. #endif
  214.                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
  215.  
  216.                 /* Initialize the ring buffer's read and write pointers */
  217.                 WREG32(DMA_RB_RPTR + reg_offset, 0);
  218.                 WREG32(DMA_RB_WPTR + reg_offset, 0);
  219.  
  220.                 /* set the wb address whether it's enabled or not */
  221.                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
  222.                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
  223.                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
  224.                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
  225.  
  226.                 if (rdev->wb.enabled)
  227.                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
  228.  
  229.                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
  230.  
  231.                 /* enable DMA IBs */
  232.                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
  233. #ifdef __BIG_ENDIAN
  234.                 ib_cntl |= DMA_IB_SWAP_ENABLE;
  235. #endif
  236.                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
  237.  
  238.                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
  239.                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
  240.                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
  241.  
  242.                 ring->wptr = 0;
  243.                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
  244.  
  245.                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
  246.  
  247.                 ring->ready = true;
  248.  
  249.                 r = radeon_ring_test(rdev, ring->idx, ring);
  250.                 if (r) {
  251.                         ring->ready = false;
  252.                         return r;
  253.                 }
  254.         }
  255.  
  256.         if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
  257.             (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
  258.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
  259.  
  260.         return 0;
  261. }
  262.  
  263. /**
  264.  * cayman_dma_fini - tear down the async dma engines
  265.  *
  266.  * @rdev: radeon_device pointer
  267.  *
  268.  * Stop the async dma engines and free the rings (cayman-SI).
  269.  */
  270. void cayman_dma_fini(struct radeon_device *rdev)
  271. {
  272.         cayman_dma_stop(rdev);
  273.         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
  274.         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
  275. }
  276.  
  277. /**
  278.  * cayman_dma_is_lockup - Check if the DMA engine is locked up
  279.  *
  280.  * @rdev: radeon_device pointer
  281.  * @ring: radeon_ring structure holding ring information
  282.  *
  283.  * Check if the async DMA engine is locked up.
  284.  * Returns true if the engine appears to be locked up, false if not.
  285.  */
  286. bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
  287. {
  288.         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
  289.         u32 mask;
  290.  
  291.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  292.                 mask = RADEON_RESET_DMA;
  293.         else
  294.                 mask = RADEON_RESET_DMA1;
  295.  
  296.         if (!(reset_mask & mask)) {
  297.                 radeon_ring_lockup_update(rdev, ring);
  298.                 return false;
  299.         }
  300.         return radeon_ring_test_lockup(rdev, ring);
  301. }
  302.  
  303. /**
  304.  * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART
  305.  *
  306.  * @rdev: radeon_device pointer
  307.  * @ib: indirect buffer to fill with commands
  308.  * @pe: addr of the page entry
  309.  * @src: src addr where to copy from
  310.  * @count: number of page entries to update
  311.  *
  312.  * Update PTEs by copying them from the GART using the DMA (cayman/TN).
  313.  */
  314. void cayman_dma_vm_copy_pages(struct radeon_device *rdev,
  315.                               struct radeon_ib *ib,
  316.                               uint64_t pe, uint64_t src,
  317.                               unsigned count)
  318. {
  319.         unsigned ndw;
  320.  
  321.         while (count) {
  322.                 ndw = count * 2;
  323.                 if (ndw > 0xFFFFE)
  324.                         ndw = 0xFFFFE;
  325.  
  326.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
  327.                                                       0, 0, ndw);
  328.                 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
  329.                 ib->ptr[ib->length_dw++] = lower_32_bits(src);
  330.                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  331.                 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
  332.  
  333.                 pe += ndw * 4;
  334.                 src += ndw * 4;
  335.                 count -= ndw / 2;
  336.         }
  337. }
  338.  
  339. /**
  340.  * cayman_dma_vm_write_pages - update PTEs by writing them manually
  341.  *
  342.  * @rdev: radeon_device pointer
  343.  * @ib: indirect buffer to fill with commands
  344.  * @pe: addr of the page entry
  345.  * @addr: dst addr to write into pe
  346.  * @count: number of page entries to update
  347.  * @incr: increase next addr by incr bytes
  348.  * @flags: hw access flags
  349.  *
  350.  * Update PTEs by writing them manually using the DMA (cayman/TN).
  351.  */
  352. void cayman_dma_vm_write_pages(struct radeon_device *rdev,
  353.                             struct radeon_ib *ib,
  354.                             uint64_t pe,
  355.                             uint64_t addr, unsigned count,
  356.                             uint32_t incr, uint32_t flags)
  357. {
  358.         uint64_t value;
  359.         unsigned ndw;
  360.  
  361.                 while (count) {
  362.                         ndw = count * 2;
  363.                         if (ndw > 0xFFFFE)
  364.                                 ndw = 0xFFFFE;
  365.  
  366.                         /* for non-physically contiguous pages (system) */
  367.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE,
  368.                                                       0, 0, ndw);
  369.                         ib->ptr[ib->length_dw++] = pe;
  370.                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  371.                         for (; ndw > 0; ndw -= 2, --count, pe += 8) {
  372.                                 if (flags & R600_PTE_SYSTEM) {
  373.                                         value = radeon_vm_map_gart(rdev, addr);
  374.                                         value &= 0xFFFFFFFFFFFFF000ULL;
  375.                                 } else if (flags & R600_PTE_VALID) {
  376.                                         value = addr;
  377.                                 } else {
  378.                                         value = 0;
  379.                                 }
  380.                                 addr += incr;
  381.                                 value |= flags;
  382.                                 ib->ptr[ib->length_dw++] = value;
  383.                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
  384.                         }
  385.                 }
  386. }
  387.  
  388. /**
  389.  * cayman_dma_vm_set_pages - update the page tables using the DMA
  390.  *
  391.  * @rdev: radeon_device pointer
  392.  * @ib: indirect buffer to fill with commands
  393.  * @pe: addr of the page entry
  394.  * @addr: dst addr to write into pe
  395.  * @count: number of page entries to update
  396.  * @incr: increase next addr by incr bytes
  397.  * @flags: hw access flags
  398.  *
  399.  * Update the page tables using the DMA (cayman/TN).
  400.  */
  401. void cayman_dma_vm_set_pages(struct radeon_device *rdev,
  402.                              struct radeon_ib *ib,
  403.                              uint64_t pe,
  404.                              uint64_t addr, unsigned count,
  405.                              uint32_t incr, uint32_t flags)
  406. {
  407.         uint64_t value;
  408.         unsigned ndw;
  409.  
  410.                 while (count) {
  411.                         ndw = count * 2;
  412.                         if (ndw > 0xFFFFE)
  413.                                 ndw = 0xFFFFE;
  414.  
  415.                         if (flags & R600_PTE_VALID)
  416.                                 value = addr;
  417.                         else
  418.                                 value = 0;
  419.  
  420.                         /* for physically contiguous pages (vram) */
  421.                         ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
  422.                         ib->ptr[ib->length_dw++] = pe; /* dst addr */
  423.                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  424.                         ib->ptr[ib->length_dw++] = flags; /* mask */
  425.                         ib->ptr[ib->length_dw++] = 0;
  426.                         ib->ptr[ib->length_dw++] = value; /* value */
  427.                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
  428.                         ib->ptr[ib->length_dw++] = incr; /* increment size */
  429.                         ib->ptr[ib->length_dw++] = 0;
  430.  
  431.                         pe += ndw * 4;
  432.                         addr += (ndw / 2) * incr;
  433.                         count -= ndw / 2;
  434.                 }
  435. }
  436.  
  437. /**
  438.  * cayman_dma_vm_pad_ib - pad the IB to the required number of dw
  439.  *
  440.  * @ib: indirect buffer to fill with padding
  441.  *
  442.  */
  443. void cayman_dma_vm_pad_ib(struct radeon_ib *ib)
  444. {
  445.         while (ib->length_dw & 0x7)
  446.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
  447. }
  448.  
  449. void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
  450. {
  451.         struct radeon_ring *ring = &rdev->ring[ridx];
  452.  
  453.         if (vm == NULL)
  454.                 return;
  455.  
  456.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  457.         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
  458.         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
  459.  
  460.         /* flush hdp cache */
  461.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  462.         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
  463.         radeon_ring_write(ring, 1);
  464.  
  465.         /* bits 0-7 are the VM contexts0-7 */
  466.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  467.         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
  468.         radeon_ring_write(ring, 1 << vm->id);
  469. }
  470.  
  471.