Subversion Repositories Kolibri OS

Rev

Rev 5271 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright 2010 Advanced Micro Devices, Inc.
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice shall be included in
  12.  * all copies or substantial portions of the Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18.  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19.  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20.  * OTHER DEALINGS IN THE SOFTWARE.
  21.  *
  22.  * Authors: Alex Deucher
  23.  */
  24. #include <drm/drmP.h>
  25. #include "radeon.h"
  26. #include "radeon_asic.h"
  27. #include "radeon_trace.h"
  28. #include "nid.h"
  29.  
  30. u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
  31.  
  32. /*
  33.  * DMA
  34.  * Starting with R600, the GPU has an asynchronous
  35.  * DMA engine.  The programming model is very similar
  36.  * to the 3D engine (ring buffer, IBs, etc.), but the
  37.  * DMA controller has it's own packet format that is
  38.  * different form the PM4 format used by the 3D engine.
  39.  * It supports copying data, writing embedded data,
  40.  * solid fills, and a number of other things.  It also
  41.  * has support for tiling/detiling of buffers.
  42.  * Cayman and newer support two asynchronous DMA engines.
  43.  */
  44.  
  45. /**
  46.  * cayman_dma_get_rptr - get the current read pointer
  47.  *
  48.  * @rdev: radeon_device pointer
  49.  * @ring: radeon ring pointer
  50.  *
  51.  * Get the current rptr from the hardware (cayman+).
  52.  */
  53. uint32_t cayman_dma_get_rptr(struct radeon_device *rdev,
  54.                              struct radeon_ring *ring)
  55. {
  56.         u32 rptr, reg;
  57.  
  58.         if (rdev->wb.enabled) {
  59.                 rptr = rdev->wb.wb[ring->rptr_offs/4];
  60.         } else {
  61.                 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  62.                         reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET;
  63.                 else
  64.                         reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET;
  65.  
  66.                 rptr = RREG32(reg);
  67.         }
  68.  
  69.         return (rptr & 0x3fffc) >> 2;
  70. }
  71.  
  72. /**
  73.  * cayman_dma_get_wptr - get the current write pointer
  74.  *
  75.  * @rdev: radeon_device pointer
  76.  * @ring: radeon ring pointer
  77.  *
  78.  * Get the current wptr from the hardware (cayman+).
  79.  */
  80. uint32_t cayman_dma_get_wptr(struct radeon_device *rdev,
  81.                            struct radeon_ring *ring)
  82. {
  83.         u32 reg;
  84.  
  85.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  86.                 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
  87.         else
  88.                 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
  89.  
  90.         return (RREG32(reg) & 0x3fffc) >> 2;
  91. }
  92.  
  93. /**
  94.  * cayman_dma_set_wptr - commit the write pointer
  95.  *
  96.  * @rdev: radeon_device pointer
  97.  * @ring: radeon ring pointer
  98.  *
  99.  * Write the wptr back to the hardware (cayman+).
  100.  */
  101. void cayman_dma_set_wptr(struct radeon_device *rdev,
  102.                          struct radeon_ring *ring)
  103. {
  104.         u32 reg;
  105.  
  106.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  107.                 reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
  108.         else
  109.                 reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
  110.  
  111.         WREG32(reg, (ring->wptr << 2) & 0x3fffc);
  112. }
  113.  
  114. /**
  115.  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
  116.  *
  117.  * @rdev: radeon_device pointer
  118.  * @ib: IB object to schedule
  119.  *
  120.  * Schedule an IB in the DMA ring (cayman-SI).
  121.  */
  122. void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
  123.                                 struct radeon_ib *ib)
  124. {
  125.         struct radeon_ring *ring = &rdev->ring[ib->ring];
  126.         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
  127.  
  128.         if (rdev->wb.enabled) {
  129.                 u32 next_rptr = ring->wptr + 4;
  130.                 while ((next_rptr & 7) != 5)
  131.                         next_rptr++;
  132.                 next_rptr += 3;
  133.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
  134.                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
  135.                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
  136.                 radeon_ring_write(ring, next_rptr);
  137.         }
  138.  
  139.         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
  140.          * Pad as necessary with NOPs.
  141.          */
  142.         while ((ring->wptr & 7) != 5)
  143.                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
  144.         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
  145.         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
  146.         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
  147.  
  148. }
  149.  
  150. /**
  151.  * cayman_dma_stop - stop the async dma engines
  152.  *
  153.  * @rdev: radeon_device pointer
  154.  *
  155.  * Stop the async dma engines (cayman-SI).
  156.  */
  157. void cayman_dma_stop(struct radeon_device *rdev)
  158. {
  159.         u32 rb_cntl;
  160.  
  161.         if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
  162.             (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
  163.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
  164.  
  165.         /* dma0 */
  166.         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
  167.         rb_cntl &= ~DMA_RB_ENABLE;
  168.         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
  169.  
  170.         /* dma1 */
  171.         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
  172.         rb_cntl &= ~DMA_RB_ENABLE;
  173.         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
  174.  
  175.         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
  176.         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
  177. }
  178.  
  179. /**
  180.  * cayman_dma_resume - setup and start the async dma engines
  181.  *
  182.  * @rdev: radeon_device pointer
  183.  *
  184.  * Set up the DMA ring buffers and enable them. (cayman-SI).
  185.  * Returns 0 for success, error for failure.
  186.  */
  187. int cayman_dma_resume(struct radeon_device *rdev)
  188. {
  189.         struct radeon_ring *ring;
  190.         u32 rb_cntl, dma_cntl, ib_cntl;
  191.         u32 rb_bufsz;
  192.         u32 reg_offset, wb_offset;
  193.         int i, r;
  194.  
  195.         for (i = 0; i < 2; i++) {
  196.                 if (i == 0) {
  197.                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
  198.                         reg_offset = DMA0_REGISTER_OFFSET;
  199.                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
  200.                 } else {
  201.                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
  202.                         reg_offset = DMA1_REGISTER_OFFSET;
  203.                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
  204.                 }
  205.  
  206.                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
  207.                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
  208.  
  209.                 /* Set ring buffer size in dwords */
  210.                 rb_bufsz = order_base_2(ring->ring_size / 4);
  211.                 rb_cntl = rb_bufsz << 1;
  212. #ifdef __BIG_ENDIAN
  213.                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
  214. #endif
  215.                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
  216.  
  217.                 /* Initialize the ring buffer's read and write pointers */
  218.                 WREG32(DMA_RB_RPTR + reg_offset, 0);
  219.                 WREG32(DMA_RB_WPTR + reg_offset, 0);
  220.  
  221.                 /* set the wb address whether it's enabled or not */
  222.                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
  223.                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
  224.                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
  225.                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
  226.  
  227.                 if (rdev->wb.enabled)
  228.                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
  229.  
  230.                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
  231.  
  232.                 /* enable DMA IBs */
  233.                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
  234. #ifdef __BIG_ENDIAN
  235.                 ib_cntl |= DMA_IB_SWAP_ENABLE;
  236. #endif
  237.                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
  238.  
  239.                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
  240.                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
  241.                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
  242.  
  243.                 ring->wptr = 0;
  244.                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
  245.  
  246.                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
  247.  
  248.                 ring->ready = true;
  249.  
  250.                 r = radeon_ring_test(rdev, ring->idx, ring);
  251.                 if (r) {
  252.                         ring->ready = false;
  253.                         return r;
  254.                 }
  255.         }
  256.  
  257.         if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
  258.             (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
  259.                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
  260.  
  261.         return 0;
  262. }
  263.  
  264. /**
  265.  * cayman_dma_fini - tear down the async dma engines
  266.  *
  267.  * @rdev: radeon_device pointer
  268.  *
  269.  * Stop the async dma engines and free the rings (cayman-SI).
  270.  */
  271. void cayman_dma_fini(struct radeon_device *rdev)
  272. {
  273.         cayman_dma_stop(rdev);
  274.         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
  275.         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
  276. }
  277.  
  278. /**
  279.  * cayman_dma_is_lockup - Check if the DMA engine is locked up
  280.  *
  281.  * @rdev: radeon_device pointer
  282.  * @ring: radeon_ring structure holding ring information
  283.  *
  284.  * Check if the async DMA engine is locked up.
  285.  * Returns true if the engine appears to be locked up, false if not.
  286.  */
  287. bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
  288. {
  289.         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
  290.         u32 mask;
  291.  
  292.         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  293.                 mask = RADEON_RESET_DMA;
  294.         else
  295.                 mask = RADEON_RESET_DMA1;
  296.  
  297.         if (!(reset_mask & mask)) {
  298.                 radeon_ring_lockup_update(rdev, ring);
  299.                 return false;
  300.         }
  301.         return radeon_ring_test_lockup(rdev, ring);
  302. }
  303.  
  304. /**
  305.  * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART
  306.  *
  307.  * @rdev: radeon_device pointer
  308.  * @ib: indirect buffer to fill with commands
  309.  * @pe: addr of the page entry
  310.  * @src: src addr where to copy from
  311.  * @count: number of page entries to update
  312.  *
  313.  * Update PTEs by copying them from the GART using the DMA (cayman/TN).
  314.  */
  315. void cayman_dma_vm_copy_pages(struct radeon_device *rdev,
  316.                               struct radeon_ib *ib,
  317.                               uint64_t pe, uint64_t src,
  318.                               unsigned count)
  319. {
  320.         unsigned ndw;
  321.  
  322.         while (count) {
  323.                 ndw = count * 2;
  324.                 if (ndw > 0xFFFFE)
  325.                         ndw = 0xFFFFE;
  326.  
  327.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
  328.                                                       0, 0, ndw);
  329.                 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
  330.                 ib->ptr[ib->length_dw++] = lower_32_bits(src);
  331.                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  332.                 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
  333.  
  334.                 pe += ndw * 4;
  335.                 src += ndw * 4;
  336.                 count -= ndw / 2;
  337.         }
  338. }
  339.  
  340. /**
  341.  * cayman_dma_vm_write_pages - update PTEs by writing them manually
  342.  *
  343.  * @rdev: radeon_device pointer
  344.  * @ib: indirect buffer to fill with commands
  345.  * @pe: addr of the page entry
  346.  * @addr: dst addr to write into pe
  347.  * @count: number of page entries to update
  348.  * @incr: increase next addr by incr bytes
  349.  * @flags: hw access flags
  350.  *
  351.  * Update PTEs by writing them manually using the DMA (cayman/TN).
  352.  */
  353. void cayman_dma_vm_write_pages(struct radeon_device *rdev,
  354.                                struct radeon_ib *ib,
  355.                                uint64_t pe,
  356.                                uint64_t addr, unsigned count,
  357.                                uint32_t incr, uint32_t flags)
  358. {
  359.         uint64_t value;
  360.         unsigned ndw;
  361.  
  362.         while (count) {
  363.                 ndw = count * 2;
  364.                 if (ndw > 0xFFFFE)
  365.                         ndw = 0xFFFFE;
  366.  
  367.                 /* for non-physically contiguous pages (system) */
  368.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE,
  369.                                                       0, 0, ndw);
  370.                 ib->ptr[ib->length_dw++] = pe;
  371.                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  372.                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
  373.                         if (flags & R600_PTE_SYSTEM) {
  374.                                 value = radeon_vm_map_gart(rdev, addr);
  375.                         } else if (flags & R600_PTE_VALID) {
  376.                                 value = addr;
  377.                         } else {
  378.                                 value = 0;
  379.                         }
  380.                         addr += incr;
  381.                         value |= flags;
  382.                         ib->ptr[ib->length_dw++] = value;
  383.                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
  384.                 }
  385.         }
  386. }
  387.  
  388. /**
  389.  * cayman_dma_vm_set_pages - update the page tables using the DMA
  390.  *
  391.  * @rdev: radeon_device pointer
  392.  * @ib: indirect buffer to fill with commands
  393.  * @pe: addr of the page entry
  394.  * @addr: dst addr to write into pe
  395.  * @count: number of page entries to update
  396.  * @incr: increase next addr by incr bytes
  397.  * @flags: hw access flags
  398.  *
  399.  * Update the page tables using the DMA (cayman/TN).
  400.  */
  401. void cayman_dma_vm_set_pages(struct radeon_device *rdev,
  402.                              struct radeon_ib *ib,
  403.                              uint64_t pe,
  404.                              uint64_t addr, unsigned count,
  405.                              uint32_t incr, uint32_t flags)
  406. {
  407.         uint64_t value;
  408.         unsigned ndw;
  409.  
  410.         while (count) {
  411.                 ndw = count * 2;
  412.                 if (ndw > 0xFFFFE)
  413.                         ndw = 0xFFFFE;
  414.  
  415.                 if (flags & R600_PTE_VALID)
  416.                         value = addr;
  417.                 else
  418.                         value = 0;
  419.  
  420.                 /* for physically contiguous pages (vram) */
  421.                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
  422.                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
  423.                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
  424.                 ib->ptr[ib->length_dw++] = flags; /* mask */
  425.                 ib->ptr[ib->length_dw++] = 0;
  426.                 ib->ptr[ib->length_dw++] = value; /* value */
  427.                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
  428.                 ib->ptr[ib->length_dw++] = incr; /* increment size */
  429.                 ib->ptr[ib->length_dw++] = 0;
  430.  
  431.                 pe += ndw * 4;
  432.                 addr += (ndw / 2) * incr;
  433.                 count -= ndw / 2;
  434.         }
  435. }
  436.  
  437. /**
  438.  * cayman_dma_vm_pad_ib - pad the IB to the required number of dw
  439.  *
  440.  * @ib: indirect buffer to fill with padding
  441.  *
  442.  */
  443. void cayman_dma_vm_pad_ib(struct radeon_ib *ib)
  444. {
  445.         while (ib->length_dw & 0x7)
  446.                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
  447. }
  448.  
  449. void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
  450.                          unsigned vm_id, uint64_t pd_addr)
  451. {
  452.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  453.         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
  454.         radeon_ring_write(ring, pd_addr >> 12);
  455.  
  456.         /* flush hdp cache */
  457.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  458.         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
  459.         radeon_ring_write(ring, 1);
  460.  
  461.         /* bits 0-7 are the VM contexts0-7 */
  462.         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
  463.         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
  464.         radeon_ring_write(ring, 1 << vm_id);
  465.  
  466.         /* wait for invalidate to complete */
  467.         radeon_ring_write(ring, DMA_SRBM_READ_PACKET);
  468.         radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2));
  469.         radeon_ring_write(ring, 0); /* mask */
  470.         radeon_ring_write(ring, 0); /* value */
  471. }
  472.  
  473.