Subversion Repositories Kolibri OS

Rev

Rev 5271 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright 2009 Jerome Glisse.
  3.  * All Rights Reserved.
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the
  7.  * "Software"), to deal in the Software without restriction, including
  8.  * without limitation the rights to use, copy, modify, merge, publish,
  9.  * distribute, sub license, and/or sell copies of the Software, and to
  10.  * permit persons to whom the Software is furnished to do so, subject to
  11.  * the following conditions:
  12.  *
  13.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15.  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16.  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17.  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18.  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19.  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20.  *
  21.  * The above copyright notice and this permission notice (including the
  22.  * next paragraph) shall be included in all copies or substantial portions
  23.  * of the Software.
  24.  *
  25.  */
  26. /*
  27.  * Authors:
  28.  *    Jerome Glisse <glisse@freedesktop.org>
  29.  *    Dave Airlie
  30.  */
  31. #include <linux/seq_file.h>
  32. #include <linux/atomic.h>
  33. #include <linux/wait.h>
  34. #include <linux/kref.h>
  35. #include <linux/slab.h>
  36. #include <linux/firmware.h>
  37. #include <drm/drmP.h>
  38. #include "radeon_reg.h"
  39. #include "radeon.h"
  40. #include "radeon_trace.h"
  41.  
  42. /*
  43.  * Fences
  44.  * Fences mark an event in the GPUs pipeline and are used
  45.  * for GPU/CPU synchronization.  When the fence is written,
  46.  * it is expected that all buffers associated with that fence
  47.  * are no longer in use by the associated ring on the GPU and
  48.  * that the the relevant GPU caches have been flushed.  Whether
  49.  * we use a scratch register or memory location depends on the asic
  50.  * and whether writeback is enabled.
  51.  */
  52.  
  53. /**
  54.  * radeon_fence_write - write a fence value
  55.  *
  56.  * @rdev: radeon_device pointer
  57.  * @seq: sequence number to write
  58.  * @ring: ring index the fence is associated with
  59.  *
  60.  * Writes a fence value to memory or a scratch register (all asics).
  61.  */
  62. static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  63. {
  64.         struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  65.         if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  66.                 if (drv->cpu_addr) {
  67.                         *drv->cpu_addr = cpu_to_le32(seq);
  68.                 }
  69.         } else {
  70.                 WREG32(drv->scratch_reg, seq);
  71.         }
  72. }
  73.  
  74. /**
  75.  * radeon_fence_read - read a fence value
  76.  *
  77.  * @rdev: radeon_device pointer
  78.  * @ring: ring index the fence is associated with
  79.  *
  80.  * Reads a fence value from memory or a scratch register (all asics).
  81.  * Returns the value of the fence read from memory or register.
  82.  */
  83. static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  84. {
  85.         struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  86.         u32 seq = 0;
  87.  
  88.         if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  89.                 if (drv->cpu_addr) {
  90.                         seq = le32_to_cpu(*drv->cpu_addr);
  91.                 } else {
  92.                         seq = lower_32_bits(atomic64_read(&drv->last_seq));
  93.                 }
  94.         } else {
  95.                 seq = RREG32(drv->scratch_reg);
  96.         }
  97.         return seq;
  98. }
  99.  
  100. /**
  101.  * radeon_fence_emit - emit a fence on the requested ring
  102.  *
  103.  * @rdev: radeon_device pointer
  104.  * @fence: radeon fence object
  105.  * @ring: ring index the fence is associated with
  106.  *
  107.  * Emits a fence command on the requested ring (all asics).
  108.  * Returns 0 on success, -ENOMEM on failure.
  109.  */
  110. int radeon_fence_emit(struct radeon_device *rdev,
  111.                       struct radeon_fence **fence,
  112.                       int ring)
  113. {
  114.         u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
  115.  
  116.         /* we are protected by the ring emission mutex */
  117.         *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
  118.         if ((*fence) == NULL) {
  119.                 return -ENOMEM;
  120.         }
  121.         (*fence)->rdev = rdev;
  122.         (*fence)->seq = seq;
  123.         (*fence)->ring = ring;
  124.         (*fence)->is_vm_update = false;
  125.         fence_init(&(*fence)->base, &radeon_fence_ops,
  126.                    &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
  127.         radeon_fence_ring_emit(rdev, ring, *fence);
  128.         trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
  129.         return 0;
  130. }
  131.  
  132. /**
  133.  * radeon_fence_check_signaled - callback from fence_queue
  134.  *
  135.  * this function is called with fence_queue lock held, which is also used
  136.  * for the fence locking itself, so unlocked variants are used for
  137.  * fence_signal, and remove_wait_queue.
  138.  */
  139. static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
  140. {
  141.         struct radeon_fence *fence;
  142.         u64 seq;
  143.  
  144.         fence = container_of(wait, struct radeon_fence, fence_wake);
  145.  
  146.         /*
  147.          * We cannot use radeon_fence_process here because we're already
  148.          * in the waitqueue, in a call from wake_up_all.
  149.          */
  150.         seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
  151.         if (seq >= fence->seq) {
  152.                 int ret = fence_signal_locked(&fence->base);
  153.  
  154.                 if (!ret)
  155.                         FENCE_TRACE(&fence->base, "signaled from irq context\n");
  156.                 else
  157.                         FENCE_TRACE(&fence->base, "was already signaled\n");
  158.  
  159.                 radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
  160. //       __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
  161.                 fence_put(&fence->base);
  162.         } else
  163.                 FENCE_TRACE(&fence->base, "pending\n");
  164.         return 0;
  165. }
  166.  
  167. /**
  168.  * radeon_fence_activity - check for fence activity
  169.  *
  170.  * @rdev: radeon_device pointer
  171.  * @ring: ring index the fence is associated with
  172.  *
  173.  * Checks the current fence value and calculates the last
  174.  * signalled fence value. Returns true if activity occured
  175.  * on the ring, and the fence_queue should be waken up.
  176.  */
  177. static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
  178. {
  179.         uint64_t seq, last_seq, last_emitted;
  180.         unsigned count_loop = 0;
  181.         bool wake = false;
  182.  
  183.         /* Note there is a scenario here for an infinite loop but it's
  184.          * very unlikely to happen. For it to happen, the current polling
  185.          * process need to be interrupted by another process and another
  186.          * process needs to update the last_seq btw the atomic read and
  187.          * xchg of the current process.
  188.          *
  189.          * More over for this to go in infinite loop there need to be
  190.          * continuously new fence signaled ie radeon_fence_read needs
  191.          * to return a different value each time for both the currently
  192.          * polling process and the other process that xchg the last_seq
  193.          * btw atomic read and xchg of the current process. And the
  194.          * value the other process set as last seq must be higher than
  195.          * the seq value we just read. Which means that current process
  196.          * need to be interrupted after radeon_fence_read and before
  197.          * atomic xchg.
  198.          *
  199.          * To be even more safe we count the number of time we loop and
  200.          * we bail after 10 loop just accepting the fact that we might
  201.          * have temporarly set the last_seq not to the true real last
  202.          * seq but to an older one.
  203.          */
  204.         last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  205.         do {
  206.                 last_emitted = rdev->fence_drv[ring].sync_seq[ring];
  207.                 seq = radeon_fence_read(rdev, ring);
  208.                 seq |= last_seq & 0xffffffff00000000LL;
  209.                 if (seq < last_seq) {
  210.                         seq &= 0xffffffff;
  211.                         seq |= last_emitted & 0xffffffff00000000LL;
  212.                 }
  213.  
  214.                 if (seq <= last_seq || seq > last_emitted) {
  215.                         break;
  216.                 }
  217.                 /* If we loop over we don't want to return without
  218.                  * checking if a fence is signaled as it means that the
  219.                  * seq we just read is different from the previous on.
  220.                  */
  221.                 wake = true;
  222.                 last_seq = seq;
  223.                 if ((count_loop++) > 10) {
  224.                         /* We looped over too many time leave with the
  225.                          * fact that we might have set an older fence
  226.                          * seq then the current real last seq as signaled
  227.                          * by the hw.
  228.                          */
  229.                         break;
  230.                 }
  231.         } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
  232.  
  233. //      if (seq < last_emitted)
  234. //              radeon_fence_schedule_check(rdev, ring);
  235.  
  236.         return wake;
  237. }
  238.  
  239. /**
  240.  * radeon_fence_check_lockup - check for hardware lockup
  241.  *
  242.  * @work: delayed work item
  243.  *
  244.  * Checks for fence activity and if there is none probe
  245.  * the hardware if a lockup occured.
  246.  */
  247. static void radeon_fence_check_lockup(struct work_struct *work)
  248. {
  249.         struct radeon_fence_driver *fence_drv;
  250.         struct radeon_device *rdev;
  251.         int ring;
  252.  
  253.         fence_drv = container_of(work, struct radeon_fence_driver,
  254.                                  lockup_work.work);
  255.         rdev = fence_drv->rdev;
  256.         ring = fence_drv - &rdev->fence_drv[0];
  257.  
  258. //      if (!down_read_trylock(&rdev->exclusive_lock)) {
  259. //              /* just reschedule the check if a reset is going on */
  260. //              radeon_fence_schedule_check(rdev, ring);
  261. //              return;
  262. //      }
  263.  
  264.         if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
  265.                 unsigned long irqflags;
  266.  
  267.                 fence_drv->delayed_irq = false;
  268.                 spin_lock_irqsave(&rdev->irq.lock, irqflags);
  269.                 radeon_irq_set(rdev);
  270.                 spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
  271.         }
  272.  
  273.         if (radeon_fence_activity(rdev, ring))
  274.                 wake_up_all(&rdev->fence_queue);
  275.  
  276.         else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  277.  
  278.                 /* good news we believe it's a lockup */
  279.                 dev_warn(rdev->dev, "GPU lockup (current fence id "
  280.                          "0x%016llx last fence id 0x%016llx on ring %d)\n",
  281.                          (uint64_t)atomic64_read(&fence_drv->last_seq),
  282.                          fence_drv->sync_seq[ring], ring);
  283.  
  284.                 /* remember that we need an reset */
  285.                 rdev->needs_reset = true;
  286.                 wake_up_all(&rdev->fence_queue);
  287.         }
  288. //      up_read(&rdev->exclusive_lock);
  289. }
  290.  
  291. /**
  292.  * radeon_fence_process - process a fence
  293.  *
  294.  * @rdev: radeon_device pointer
  295.  * @ring: ring index the fence is associated with
  296.  *
  297.  * Checks the current fence value and wakes the fence queue
  298.  * if the sequence number has increased (all asics).
  299.  */
  300. void radeon_fence_process(struct radeon_device *rdev, int ring)
  301. {
  302.         if (radeon_fence_activity(rdev, ring))
  303.                 wake_up_all(&rdev->fence_queue);
  304. }
  305.  
  306. /**
  307.  * radeon_fence_seq_signaled - check if a fence sequence number has signaled
  308.  *
  309.  * @rdev: radeon device pointer
  310.  * @seq: sequence number
  311.  * @ring: ring index the fence is associated with
  312.  *
  313.  * Check if the last signaled fence sequnce number is >= the requested
  314.  * sequence number (all asics).
  315.  * Returns true if the fence has signaled (current fence value
  316.  * is >= requested value) or false if it has not (current fence
  317.  * value is < the requested value.  Helper function for
  318.  * radeon_fence_signaled().
  319.  */
  320. static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  321.                                       u64 seq, unsigned ring)
  322. {
  323.         if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  324.                 return true;
  325.         }
  326.         /* poll new last sequence at least once */
  327.         radeon_fence_process(rdev, ring);
  328.         if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  329.                 return true;
  330.         }
  331.         return false;
  332. }
  333.  
  334. static bool radeon_fence_is_signaled(struct fence *f)
  335. {
  336.         struct radeon_fence *fence = to_radeon_fence(f);
  337.         struct radeon_device *rdev = fence->rdev;
  338.         unsigned ring = fence->ring;
  339.         u64 seq = fence->seq;
  340.  
  341.         if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  342.                 return true;
  343.         }
  344.  
  345.         if (down_read_trylock(&rdev->exclusive_lock)) {
  346.                 radeon_fence_process(rdev, ring);
  347.                 up_read(&rdev->exclusive_lock);
  348.  
  349.                 if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  350.                         return true;
  351.                 }
  352.         }
  353.         return false;
  354. }
  355.  
  356. /**
  357.  * radeon_fence_enable_signaling - enable signalling on fence
  358.  * @fence: fence
  359.  *
  360.  * This function is called with fence_queue lock held, and adds a callback
  361.  * to fence_queue that checks if this fence is signaled, and if so it
  362.  * signals the fence and removes itself.
  363.  */
  364. static bool radeon_fence_enable_signaling(struct fence *f)
  365. {
  366.         struct radeon_fence *fence = to_radeon_fence(f);
  367.         struct radeon_device *rdev = fence->rdev;
  368.  
  369.         if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
  370.                 return false;
  371.  
  372. //   if (down_read_trylock(&rdev->exclusive_lock))
  373.          {
  374.                 radeon_irq_kms_sw_irq_get(rdev, fence->ring);
  375.  
  376. //       if (radeon_fence_activity(rdev, fence->ring))
  377. //           wake_up_all_locked(&rdev->fence_queue);
  378.  
  379.                 /* did fence get signaled after we enabled the sw irq? */
  380.                 if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
  381.                         radeon_irq_kms_sw_irq_put(rdev, fence->ring);
  382. //           up_read(&rdev->exclusive_lock);
  383.                         return false;
  384.                 }
  385.  
  386. //       up_read(&rdev->exclusive_lock);
  387. //   } else {
  388.                 /* we're probably in a lockup, lets not fiddle too much */
  389. //       if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
  390. //           rdev->fence_drv[fence->ring].delayed_irq = true;
  391. //       radeon_fence_schedule_check(rdev, fence->ring);
  392.         }
  393.  
  394. //      fence->fence_wake.flags = 0;
  395. //      fence->fence_wake.private = NULL;
  396.         fence->fence_wake.func = radeon_fence_check_signaled;
  397.         __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
  398.         fence_get(f);
  399.  
  400.         FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
  401.         return true;
  402. }
  403.  
  404. /**
  405.  * radeon_fence_signaled - check if a fence has signaled
  406.  *
  407.  * @fence: radeon fence object
  408.  *
  409.  * Check if the requested fence has signaled (all asics).
  410.  * Returns true if the fence has signaled or false if it has not.
  411.                                  */
  412. bool radeon_fence_signaled(struct radeon_fence *fence)
  413. {
  414.         if (!fence)
  415.                 return true;
  416.  
  417.         if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
  418.                 int ret;
  419.  
  420.                 ret = fence_signal(&fence->base);
  421.                 if (!ret)
  422.                         FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
  423.                 return true;
  424.         }
  425.         return false;
  426. }
  427.  
  428. /**
  429.  * radeon_fence_any_seq_signaled - check if any sequence number is signaled
  430.  *
  431.  * @rdev: radeon device pointer
  432.  * @seq: sequence numbers
  433.  *
  434.  * Check if the last signaled fence sequnce number is >= the requested
  435.  * sequence number (all asics).
  436.  * Returns true if any has signaled (current value is >= requested value)
  437.  * or false if it has not. Helper function for radeon_fence_wait_seq.
  438.  */
  439. static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
  440. {
  441.         unsigned i;
  442.  
  443.         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  444.                 if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
  445.                         return true;
  446.         }
  447.         return false;
  448. }
  449.  
  450. /**
  451.  * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
  452.  *
  453.  * @rdev: radeon device pointer
  454.  * @target_seq: sequence number(s) we want to wait for
  455.  * @intr: use interruptable sleep
  456.  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
  457.  *
  458.  * Wait for the requested sequence number(s) to be written by any ring
  459.  * (all asics).  Sequnce number array is indexed by ring id.
  460.  * @intr selects whether to use interruptable (true) or non-interruptable
  461.  * (false) sleep when waiting for the sequence number.  Helper function
  462.  * for radeon_fence_wait_*().
  463.  * Returns remaining time if the sequence number has passed, 0 when
  464.  * the wait timeout, or an error for all other cases.
  465.  * -EDEADLK is returned when a GPU lockup has been detected.
  466.  */
  467. static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
  468.                                           u64 *target_seq, bool intr,
  469.                                           long timeout)
  470. {
  471.         long r;
  472.         int i;
  473.  
  474.         if (radeon_fence_any_seq_signaled(rdev, target_seq))
  475.                 return timeout;
  476.  
  477.         /* enable IRQs and tracing */
  478.                 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  479.                         if (!target_seq[i])
  480.                                 continue;
  481.  
  482.                         trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
  483.                         radeon_irq_kms_sw_irq_get(rdev, i);
  484.         }
  485.  
  486.                 if (intr) {
  487.                         r = wait_event_interruptible_timeout(rdev->fence_queue, (
  488.                         radeon_fence_any_seq_signaled(rdev, target_seq)
  489.                          || rdev->needs_reset), timeout);
  490.                 } else {
  491.                         r = wait_event_timeout(rdev->fence_queue, (
  492.                         radeon_fence_any_seq_signaled(rdev, target_seq)
  493.                          || rdev->needs_reset), timeout);
  494.         }
  495.  
  496.         if (rdev->needs_reset)
  497.                 r = -EDEADLK;
  498.  
  499.                 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  500.                         if (!target_seq[i])
  501.                                 continue;
  502.  
  503.                         radeon_irq_kms_sw_irq_put(rdev, i);
  504.                         trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
  505.                 }
  506.  
  507.             return r;
  508. }
  509.  
  510. /**
  511.  * radeon_fence_wait - wait for a fence to signal
  512.  *
  513.  * @fence: radeon fence object
  514.  * @intr: use interruptible sleep
  515.  *
  516.  * Wait for the requested fence to signal (all asics).
  517.  * @intr selects whether to use interruptable (true) or non-interruptable
  518.  * (false) sleep when waiting for the fence.
  519.  * Returns 0 if the fence has passed, error for all other cases.
  520.  */
  521. int radeon_fence_wait(struct radeon_fence *fence, bool intr)
  522. {
  523.         uint64_t seq[RADEON_NUM_RINGS] = {};
  524.         long r;
  525.  
  526.         /*
  527.          * This function should not be called on !radeon fences.
  528.          * If this is the case, it would mean this function can
  529.          * also be called on radeon fences belonging to another card.
  530.          * exclusive_lock is not held in that case.
  531.          */
  532.         if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
  533.                 return fence_wait(&fence->base, intr);
  534.  
  535.         seq[fence->ring] = fence->seq;
  536.         r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
  537.         if (r < 0) {
  538.                         return r;
  539.         }
  540.  
  541.         r = fence_signal(&fence->base);
  542.         if (!r)
  543.                 FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
  544.     return 0;
  545. }
  546.  
  547. /**
  548.  * radeon_fence_wait_any - wait for a fence to signal on any ring
  549.  *
  550.  * @rdev: radeon device pointer
  551.  * @fences: radeon fence object(s)
  552.  * @intr: use interruptable sleep
  553.  *
  554.  * Wait for any requested fence to signal (all asics).  Fence
  555.  * array is indexed by ring id.  @intr selects whether to use
  556.  * interruptable (true) or non-interruptable (false) sleep when
  557.  * waiting for the fences. Used by the suballocator.
  558.  * Returns 0 if any fence has passed, error for all other cases.
  559.  */
  560. int radeon_fence_wait_any(struct radeon_device *rdev,
  561.                           struct radeon_fence **fences,
  562.                           bool intr)
  563. {
  564.         uint64_t seq[RADEON_NUM_RINGS];
  565.         unsigned i, num_rings = 0;
  566.         long r;
  567.  
  568.         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  569.                 seq[i] = 0;
  570.  
  571.                 if (!fences[i]) {
  572.                         continue;
  573.                 }
  574.  
  575.                 seq[i] = fences[i]->seq;
  576.                 ++num_rings;
  577.         }
  578.  
  579.         /* nothing to wait for ? */
  580.         if (num_rings == 0)
  581.                 return -ENOENT;
  582.  
  583.         r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
  584.         if (r < 0) {
  585.                 return r;
  586.         }
  587.         return 0;
  588. }
  589.  
  590. /**
  591.  * radeon_fence_wait_next - wait for the next fence to signal
  592.  *
  593.  * @rdev: radeon device pointer
  594.  * @ring: ring index the fence is associated with
  595.  *
  596.  * Wait for the next fence on the requested ring to signal (all asics).
  597.  * Returns 0 if the next fence has passed, error for all other cases.
  598.  * Caller must hold ring lock.
  599.  */
  600. int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
  601. {
  602.         uint64_t seq[RADEON_NUM_RINGS] = {};
  603.         long r;
  604.  
  605.         seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
  606.         if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
  607.                 /* nothing to wait for, last_seq is
  608.                    already the last emited fence */
  609.                 return -ENOENT;
  610.         }
  611.         r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
  612.         if (r < 0)
  613.                 return r;
  614.         return 0;
  615. }
  616.  
  617. /**
  618.  * radeon_fence_wait_empty - wait for all fences to signal
  619.  *
  620.  * @rdev: radeon device pointer
  621.  * @ring: ring index the fence is associated with
  622.  *
  623.  * Wait for all fences on the requested ring to signal (all asics).
  624.  * Returns 0 if the fences have passed, error for all other cases.
  625.  * Caller must hold ring lock.
  626.  */
  627. int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
  628. {
  629.         uint64_t seq[RADEON_NUM_RINGS] = {};
  630.         long r;
  631.  
  632.         seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
  633.         if (!seq[ring])
  634.                 return 0;
  635.  
  636.         r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
  637.         if (r < 0) {
  638.                 if (r == -EDEADLK)
  639.                         return -EDEADLK;
  640.  
  641.                 dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
  642.                         ring, r);
  643.         }
  644.         return 0;
  645. }
  646.  
  647. /**
  648.  * radeon_fence_ref - take a ref on a fence
  649.  *
  650.  * @fence: radeon fence object
  651.  *
  652.  * Take a reference on a fence (all asics).
  653.  * Returns the fence.
  654.  */
  655. struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
  656. {
  657.         fence_get(&fence->base);
  658.         return fence;
  659. }
  660.  
  661. /**
  662.  * radeon_fence_unref - remove a ref on a fence
  663.  *
  664.  * @fence: radeon fence object
  665.  *
  666.  * Remove a reference on a fence (all asics).
  667.  */
  668. void radeon_fence_unref(struct radeon_fence **fence)
  669. {
  670.         struct radeon_fence *tmp = *fence;
  671.  
  672.         *fence = NULL;
  673.         if (tmp) {
  674.                 fence_put(&tmp->base);
  675.         }
  676. }
  677.  
  678. /**
  679.  * radeon_fence_count_emitted - get the count of emitted fences
  680.  *
  681.  * @rdev: radeon device pointer
  682.  * @ring: ring index the fence is associated with
  683.  *
  684.  * Get the number of fences emitted on the requested ring (all asics).
  685.  * Returns the number of emitted fences on the ring.  Used by the
  686.  * dynpm code to ring track activity.
  687.  */
  688. unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
  689. {
  690.         uint64_t emitted;
  691.  
  692.         /* We are not protected by ring lock when reading the last sequence
  693.          * but it's ok to report slightly wrong fence count here.
  694.          */
  695.         radeon_fence_process(rdev, ring);
  696.         emitted = rdev->fence_drv[ring].sync_seq[ring]
  697.                 - atomic64_read(&rdev->fence_drv[ring].last_seq);
  698.         /* to avoid 32bits warp around */
  699.         if (emitted > 0x10000000) {
  700.                 emitted = 0x10000000;
  701.         }
  702.         return (unsigned)emitted;
  703. }
  704.  
  705. /**
  706.  * radeon_fence_need_sync - do we need a semaphore
  707.  *
  708.  * @fence: radeon fence object
  709.  * @dst_ring: which ring to check against
  710.  *
  711.  * Check if the fence needs to be synced against another ring
  712.  * (all asics).  If so, we need to emit a semaphore.
  713.  * Returns true if we need to sync with another ring, false if
  714.  * not.
  715.  */
  716. bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
  717. {
  718.         struct radeon_fence_driver *fdrv;
  719.  
  720.         if (!fence) {
  721.                 return false;
  722.         }
  723.  
  724.         if (fence->ring == dst_ring) {
  725.                 return false;
  726.         }
  727.  
  728.         /* we are protected by the ring mutex */
  729.         fdrv = &fence->rdev->fence_drv[dst_ring];
  730.         if (fence->seq <= fdrv->sync_seq[fence->ring]) {
  731.                 return false;
  732.         }
  733.  
  734.         return true;
  735. }
  736.  
  737. /**
  738.  * radeon_fence_note_sync - record the sync point
  739.  *
  740.  * @fence: radeon fence object
  741.  * @dst_ring: which ring to check against
  742.  *
  743.  * Note the sequence number at which point the fence will
  744.  * be synced with the requested ring (all asics).
  745.  */
  746. void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
  747. {
  748.         struct radeon_fence_driver *dst, *src;
  749.         unsigned i;
  750.  
  751.         if (!fence) {
  752.                 return;
  753.         }
  754.  
  755.         if (fence->ring == dst_ring) {
  756.                 return;
  757.         }
  758.  
  759.         /* we are protected by the ring mutex */
  760.         src = &fence->rdev->fence_drv[fence->ring];
  761.         dst = &fence->rdev->fence_drv[dst_ring];
  762.         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  763.                 if (i == dst_ring) {
  764.                         continue;
  765.                 }
  766.                 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
  767.         }
  768. }
  769.  
  770. /**
  771.  * radeon_fence_driver_start_ring - make the fence driver
  772.  * ready for use on the requested ring.
  773.  *
  774.  * @rdev: radeon device pointer
  775.  * @ring: ring index to start the fence driver on
  776.  *
  777.  * Make the fence driver ready for processing (all asics).
  778.  * Not all asics have all rings, so each asic will only
  779.  * start the fence driver on the rings it has.
  780.  * Returns 0 for success, errors for failure.
  781.  */
  782. int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
  783. {
  784.         uint64_t index;
  785.         int r;
  786.  
  787.         radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  788.         if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
  789.                 rdev->fence_drv[ring].scratch_reg = 0;
  790.                 if (ring != R600_RING_TYPE_UVD_INDEX) {
  791.                         index = R600_WB_EVENT_OFFSET + ring * 4;
  792.                         rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  793.                         rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
  794.                                                          index;
  795.  
  796.                 } else {
  797.                         /* put fence directly behind firmware */
  798.                         index = ALIGN(rdev->uvd_fw->size, 8);
  799.                         rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
  800.                         rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
  801.                 }
  802.  
  803.         } else {
  804.                 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
  805.                 if (r) {
  806.                         dev_err(rdev->dev, "fence failed to get scratch register\n");
  807.                         return r;
  808.                 }
  809.                 index = RADEON_WB_SCRATCH_OFFSET +
  810.                         rdev->fence_drv[ring].scratch_reg -
  811.                         rdev->scratch.reg_base;
  812.                 rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  813.                 rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
  814.         }
  815.         radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
  816.         rdev->fence_drv[ring].initialized = true;
  817.         dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
  818.                  ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
  819.         return 0;
  820. }
  821.  
  822. /**
  823.  * radeon_fence_driver_init_ring - init the fence driver
  824.  * for the requested ring.
  825.  *
  826.  * @rdev: radeon device pointer
  827.  * @ring: ring index to start the fence driver on
  828.  *
  829.  * Init the fence driver for the requested ring (all asics).
  830.  * Helper function for radeon_fence_driver_init().
  831.  */
  832. static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
  833. {
  834.         int i;
  835.  
  836.         rdev->fence_drv[ring].scratch_reg = -1;
  837.         rdev->fence_drv[ring].cpu_addr = NULL;
  838.         rdev->fence_drv[ring].gpu_addr = 0;
  839.         for (i = 0; i < RADEON_NUM_RINGS; ++i)
  840.                 rdev->fence_drv[ring].sync_seq[i] = 0;
  841.         atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
  842.         rdev->fence_drv[ring].initialized = false;
  843.         INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
  844.                           radeon_fence_check_lockup);
  845.         rdev->fence_drv[ring].rdev = rdev;
  846. }
  847.  
  848. /**
  849.  * radeon_fence_driver_init - init the fence driver
  850.  * for all possible rings.
  851.  *
  852.  * @rdev: radeon device pointer
  853.  *
  854.  * Init the fence driver for all possible rings (all asics).
  855.  * Not all asics have all rings, so each asic will only
  856.  * start the fence driver on the rings it has using
  857.  * radeon_fence_driver_start_ring().
  858.  * Returns 0 for success.
  859.  */
  860. int radeon_fence_driver_init(struct radeon_device *rdev)
  861. {
  862.         int ring;
  863.  
  864.         init_waitqueue_head(&rdev->fence_queue);
  865.         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  866.                 radeon_fence_driver_init_ring(rdev, ring);
  867.         }
  868.         if (radeon_debugfs_fence_init(rdev)) {
  869.                 dev_err(rdev->dev, "fence debugfs file creation failed\n");
  870.         }
  871.         return 0;
  872. }
  873.  
  874. /**
  875.  * radeon_fence_driver_fini - tear down the fence driver
  876.  * for all possible rings.
  877.  *
  878.  * @rdev: radeon device pointer
  879.  *
  880.  * Tear down the fence driver for all possible rings (all asics).
  881.  */
  882. void radeon_fence_driver_fini(struct radeon_device *rdev)
  883. {
  884.         int ring, r;
  885.  
  886.         mutex_lock(&rdev->ring_lock);
  887.         for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  888.                 if (!rdev->fence_drv[ring].initialized)
  889.                         continue;
  890.                 r = radeon_fence_wait_empty(rdev, ring);
  891.                 if (r) {
  892.                         /* no need to trigger GPU reset as we are unloading */
  893.                         radeon_fence_driver_force_completion(rdev, ring);
  894.                 }
  895.                 wake_up_all(&rdev->fence_queue);
  896.                 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  897.                 rdev->fence_drv[ring].initialized = false;
  898.         }
  899.         mutex_unlock(&rdev->ring_lock);
  900. }
  901.  
  902. /**
  903.  * radeon_fence_driver_force_completion - force all fence waiter to complete
  904.  *
  905.  * @rdev: radeon device pointer
  906.  * @ring: the ring to complete
  907.  *
  908.  * In case of GPU reset failure make sure no process keep waiting on fence
  909.  * that will never complete.
  910.  */
  911. void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
  912. {
  913.         if (rdev->fence_drv[ring].initialized) {
  914.                 radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
  915.         }
  916. }
  917.  
  918.  
  919. /*
  920.  * Fence debugfs
  921.  */
  922. #if defined(CONFIG_DEBUG_FS)
  923. static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
  924. {
  925.         struct drm_info_node *node = (struct drm_info_node *)m->private;
  926.         struct drm_device *dev = node->minor->dev;
  927.         struct radeon_device *rdev = dev->dev_private;
  928.         int i, j;
  929.  
  930.         for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  931.                 if (!rdev->fence_drv[i].initialized)
  932.                         continue;
  933.  
  934.                 radeon_fence_process(rdev, i);
  935.  
  936.                 seq_printf(m, "--- ring %d ---\n", i);
  937.                 seq_printf(m, "Last signaled fence 0x%016llx\n",
  938.                            (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
  939.                 seq_printf(m, "Last emitted        0x%016llx\n",
  940.                            rdev->fence_drv[i].sync_seq[i]);
  941.  
  942.                 for (j = 0; j < RADEON_NUM_RINGS; ++j) {
  943.                         if (i != j && rdev->fence_drv[j].initialized)
  944.                                 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
  945.                                            j, rdev->fence_drv[i].sync_seq[j]);
  946.                 }
  947.         }
  948.         return 0;
  949. }
  950.  
  951. /**
  952.  * radeon_debugfs_gpu_reset - manually trigger a gpu reset
  953.  *
  954.  * Manually trigger a gpu reset at the next fence wait.
  955.  */
  956. static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
  957. {
  958.         struct drm_info_node *node = (struct drm_info_node *) m->private;
  959.         struct drm_device *dev = node->minor->dev;
  960.         struct radeon_device *rdev = dev->dev_private;
  961.  
  962.         down_read(&rdev->exclusive_lock);
  963.         seq_printf(m, "%d\n", rdev->needs_reset);
  964.         rdev->needs_reset = true;
  965.         wake_up_all(&rdev->fence_queue);
  966.         up_read(&rdev->exclusive_lock);
  967.  
  968.         return 0;
  969. }
  970.  
  971. static struct drm_info_list radeon_debugfs_fence_list[] = {
  972.         {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
  973.         {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
  974. };
  975. #endif
  976.  
  977. int radeon_debugfs_fence_init(struct radeon_device *rdev)
  978. {
  979. #if defined(CONFIG_DEBUG_FS)
  980.         return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
  981. #else
  982.         return 0;
  983. #endif
  984. }
  985.  
  986. static const char *radeon_fence_get_driver_name(struct fence *fence)
  987. {
  988.         return "radeon";
  989. }
  990.  
  991. static const char *radeon_fence_get_timeline_name(struct fence *f)
  992. {
  993.         struct radeon_fence *fence = to_radeon_fence(f);
  994.         switch (fence->ring) {
  995.         case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
  996.         case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
  997.         case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
  998.         case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
  999.         case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
  1000.         case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
  1001.         case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
  1002.         case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
  1003.         default: WARN_ON_ONCE(1); return "radeon.unk";
  1004.         }
  1005. }
  1006.  
  1007. static inline bool radeon_test_signaled(struct radeon_fence *fence)
  1008. {
  1009.         return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
  1010. }
  1011.  
  1012. static signed long radeon_fence_default_wait(struct fence *f, bool intr,
  1013.                                              signed long t)
  1014. {
  1015.         struct radeon_fence *fence = to_radeon_fence(f);
  1016.         struct radeon_device *rdev = fence->rdev;
  1017.         bool signaled;
  1018.  
  1019.         fence_enable_sw_signaling(&fence->base);
  1020.  
  1021.         /*
  1022.          * This function has to return -EDEADLK, but cannot hold
  1023.          * exclusive_lock during the wait because some callers
  1024.          * may already hold it. This means checking needs_reset without
  1025.          * lock, and not fiddling with any gpu internals.
  1026.          *
  1027.          * The callback installed with fence_enable_sw_signaling will
  1028.          * run before our wait_event_*timeout call, so we will see
  1029.          * both the signaled fence and the changes to needs_reset.
  1030.          */
  1031.  
  1032.         if (intr)
  1033.                 t = wait_event_interruptible_timeout(rdev->fence_queue,
  1034.                         ((signaled = radeon_test_signaled(fence)) ||
  1035.                          rdev->needs_reset), t);
  1036.         else
  1037.                 t = wait_event_timeout(rdev->fence_queue,
  1038.                         ((signaled = radeon_test_signaled(fence)) ||
  1039.                          rdev->needs_reset), t);
  1040.  
  1041.         if (t > 0 && !signaled)
  1042.                 return -EDEADLK;
  1043.         return t;
  1044. }
  1045.  
  1046. const struct fence_ops radeon_fence_ops = {
  1047.         .get_driver_name = radeon_fence_get_driver_name,
  1048.         .get_timeline_name = radeon_fence_get_timeline_name,
  1049.         .enable_signaling = radeon_fence_enable_signaling,
  1050.         .signaled = radeon_fence_is_signaled,
  1051.         .wait = radeon_fence_default_wait,
  1052.         .release = NULL,
  1053. };
  1054.