29,9 → 29,8 |
* Dave Airlie |
*/ |
#include <linux/seq_file.h> |
#include <asm/atomic.h> |
#include <linux/atomic.h> |
#include <linux/wait.h> |
#include <linux/list.h> |
#include <linux/kref.h> |
#include <linux/slab.h> |
#include <drm/drmP.h> |
111,15 → 110,19 |
struct radeon_fence **fence, |
int ring) |
{ |
u64 seq = ++rdev->fence_drv[ring].sync_seq[ring]; |
|
/* we are protected by the ring emission mutex */ |
*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); |
if ((*fence) == NULL) { |
return -ENOMEM; |
} |
kref_init(&((*fence)->kref)); |
(*fence)->rdev = rdev; |
(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; |
(*fence)->seq = seq; |
(*fence)->ring = ring; |
(*fence)->is_vm_update = false; |
fence_init(&(*fence)->base, &radeon_fence_ops, |
&rdev->fence_queue.lock, rdev->fence_context + ring, seq); |
radeon_fence_ring_emit(rdev, ring, *fence); |
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); |
return 0; |
126,15 → 129,51 |
} |
|
/** |
* radeon_fence_process - process a fence |
* radeon_fence_check_signaled - callback from fence_queue |
* |
* this function is called with fence_queue lock held, which is also used |
* for the fence locking itself, so unlocked variants are used for |
* fence_signal, and remove_wait_queue. |
*/ |
static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) |
{ |
struct radeon_fence *fence; |
u64 seq; |
|
fence = container_of(wait, struct radeon_fence, fence_wake); |
|
/* |
* We cannot use radeon_fence_process here because we're already |
* in the waitqueue, in a call from wake_up_all. |
*/ |
seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq); |
if (seq >= fence->seq) { |
int ret = fence_signal_locked(&fence->base); |
|
if (!ret) |
FENCE_TRACE(&fence->base, "signaled from irq context\n"); |
else |
FENCE_TRACE(&fence->base, "was already signaled\n"); |
|
radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring); |
// __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake); |
fence_put(&fence->base); |
} else |
FENCE_TRACE(&fence->base, "pending\n"); |
return 0; |
} |
|
/** |
* radeon_fence_activity - check for fence activity |
* |
* @rdev: radeon_device pointer |
* @ring: ring index the fence is associated with |
* |
* Checks the current fence value and wakes the fence queue |
* if the sequence number has increased (all asics). |
* Checks the current fence value and calculates the last |
* signalled fence value. Returns true if activity occured |
* on the ring, and the fence_queue should be waken up. |
*/ |
void radeon_fence_process(struct radeon_device *rdev, int ring) |
static bool radeon_fence_activity(struct radeon_device *rdev, int ring) |
{ |
uint64_t seq, last_seq, last_emitted; |
unsigned count_loop = 0; |
190,26 → 229,80 |
} |
} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); |
|
if (wake) |
wake_up_all(&rdev->fence_queue); |
// if (seq < last_emitted) |
// radeon_fence_schedule_check(rdev, ring); |
|
return wake; |
} |
|
/** |
* radeon_fence_destroy - destroy a fence |
* radeon_fence_check_lockup - check for hardware lockup |
* |
* @kref: fence kref |
* @work: delayed work item |
* |
* Frees the fence object (all asics). |
* Checks for fence activity and if there is none probe |
* the hardware if a lockup occured. |
*/ |
static void radeon_fence_destroy(struct kref *kref) |
static void radeon_fence_check_lockup(struct work_struct *work) |
{ |
struct radeon_fence *fence; |
struct radeon_fence_driver *fence_drv; |
struct radeon_device *rdev; |
int ring; |
|
fence = container_of(kref, struct radeon_fence, kref); |
kfree(fence); |
fence_drv = container_of(work, struct radeon_fence_driver, |
lockup_work.work); |
rdev = fence_drv->rdev; |
ring = fence_drv - &rdev->fence_drv[0]; |
|
// if (!down_read_trylock(&rdev->exclusive_lock)) { |
// /* just reschedule the check if a reset is going on */ |
// radeon_fence_schedule_check(rdev, ring); |
// return; |
// } |
|
if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) { |
unsigned long irqflags; |
|
fence_drv->delayed_irq = false; |
spin_lock_irqsave(&rdev->irq.lock, irqflags); |
radeon_irq_set(rdev); |
spin_unlock_irqrestore(&rdev->irq.lock, irqflags); |
} |
|
if (radeon_fence_activity(rdev, ring)) |
wake_up_all(&rdev->fence_queue); |
|
else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { |
|
/* good news we believe it's a lockup */ |
dev_warn(rdev->dev, "GPU lockup (current fence id " |
"0x%016llx last fence id 0x%016llx on ring %d)\n", |
(uint64_t)atomic64_read(&fence_drv->last_seq), |
fence_drv->sync_seq[ring], ring); |
|
/* remember that we need an reset */ |
rdev->needs_reset = true; |
wake_up_all(&rdev->fence_queue); |
} |
// up_read(&rdev->exclusive_lock); |
} |
|
/** |
* radeon_fence_process - process a fence |
* |
* @rdev: radeon_device pointer |
* @ring: ring index the fence is associated with |
* |
* Checks the current fence value and wakes the fence queue |
* if the sequence number has increased (all asics). |
*/ |
void radeon_fence_process(struct radeon_device *rdev, int ring) |
{ |
if (radeon_fence_activity(rdev, ring)) |
wake_up_all(&rdev->fence_queue); |
} |
|
/** |
* radeon_fence_seq_signaled - check if a fence sequence number has signaled |
* |
* @rdev: radeon device pointer |
237,7 → 330,78 |
return false; |
} |
|
static bool radeon_fence_is_signaled(struct fence *f) |
{ |
struct radeon_fence *fence = to_radeon_fence(f); |
struct radeon_device *rdev = fence->rdev; |
unsigned ring = fence->ring; |
u64 seq = fence->seq; |
|
if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { |
return true; |
} |
|
// if (down_read_trylock(&rdev->exclusive_lock)) |
{ |
radeon_fence_process(rdev, ring); |
// up_read(&rdev->exclusive_lock); |
|
if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { |
return true; |
} |
} |
return false; |
} |
|
/** |
* radeon_fence_enable_signaling - enable signalling on fence |
* @fence: fence |
* |
* This function is called with fence_queue lock held, and adds a callback |
* to fence_queue that checks if this fence is signaled, and if so it |
* signals the fence and removes itself. |
*/ |
static bool radeon_fence_enable_signaling(struct fence *f) |
{ |
struct radeon_fence *fence = to_radeon_fence(f); |
struct radeon_device *rdev = fence->rdev; |
|
if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) |
return false; |
|
// if (down_read_trylock(&rdev->exclusive_lock)) |
{ |
radeon_irq_kms_sw_irq_get(rdev, fence->ring); |
|
// if (radeon_fence_activity(rdev, fence->ring)) |
// wake_up_all_locked(&rdev->fence_queue); |
|
/* did fence get signaled after we enabled the sw irq? */ |
if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) { |
radeon_irq_kms_sw_irq_put(rdev, fence->ring); |
// up_read(&rdev->exclusive_lock); |
return false; |
} |
|
// up_read(&rdev->exclusive_lock); |
// } else { |
/* we're probably in a lockup, lets not fiddle too much */ |
// if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring)) |
// rdev->fence_drv[fence->ring].delayed_irq = true; |
// radeon_fence_schedule_check(rdev, fence->ring); |
} |
|
// fence->fence_wake.flags = 0; |
// fence->fence_wake.private = NULL; |
fence->fence_wake.func = radeon_fence_check_signaled; |
__add_wait_queue(&rdev->fence_queue, &fence->fence_wake); |
fence_get(f); |
|
FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); |
return true; |
} |
|
/** |
* radeon_fence_signaled - check if a fence has signaled |
* |
* @fence: radeon fence object |
247,14 → 411,15 |
*/ |
bool radeon_fence_signaled(struct radeon_fence *fence) |
{ |
if (!fence) { |
if (!fence) |
return true; |
} |
if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { |
return true; |
} |
|
if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { |
fence->seq = RADEON_FENCE_SIGNALED_SEQ; |
int ret; |
|
ret = fence_signal(&fence->base); |
if (!ret) |
FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); |
return true; |
} |
return false; |
283,11 → 448,12 |
} |
|
/** |
* radeon_fence_wait_seq - wait for a specific sequence numbers |
* radeon_fence_wait_seq_timeout - wait for a specific sequence numbers |
* |
* @rdev: radeon device pointer |
* @target_seq: sequence number(s) we want to wait for |
* @intr: use interruptable sleep |
* @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait |
* |
* Wait for the requested sequence number(s) to be written by any ring |
* (all asics). Sequnce number array is indexed by ring id. |
294,24 → 460,25 |
* @intr selects whether to use interruptable (true) or non-interruptable |
* (false) sleep when waiting for the sequence number. Helper function |
* for radeon_fence_wait_*(). |
* Returns 0 if the sequence number has passed, error for all other cases. |
* Returns remaining time if the sequence number has passed, 0 when |
* the wait timeout, or an error for all other cases. |
* -EDEADLK is returned when a GPU lockup has been detected. |
*/ |
static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, |
bool intr) |
static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, |
u64 *target_seq, bool intr, |
long timeout) |
{ |
uint64_t last_seq[RADEON_NUM_RINGS]; |
bool signaled; |
int i, r; |
long r; |
int i; |
|
while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { |
if (radeon_fence_any_seq_signaled(rdev, target_seq)) |
return timeout; |
|
/* Save current sequence values, used to check for GPU lockups */ |
/* enable IRQs and tracing */ |
for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
if (!target_seq[i]) |
continue; |
|
last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq); |
trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); |
radeon_irq_kms_sw_irq_get(rdev, i); |
} |
318,14 → 485,17 |
|
if (intr) { |
r = wait_event_interruptible_timeout(rdev->fence_queue, ( |
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) |
|| rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); |
radeon_fence_any_seq_signaled(rdev, target_seq) |
|| rdev->needs_reset), timeout); |
} else { |
r = wait_event_timeout(rdev->fence_queue, ( |
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) |
|| rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); |
radeon_fence_any_seq_signaled(rdev, target_seq) |
|| rdev->needs_reset), timeout); |
} |
|
if (rdev->needs_reset) |
r = -EDEADLK; |
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
if (!target_seq[i]) |
continue; |
334,59 → 504,14 |
trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); |
} |
|
if (unlikely(r < 0)) |
return r; |
|
if (unlikely(!signaled)) { |
if (rdev->needs_reset) |
return -EDEADLK; |
|
/* we were interrupted for some reason and fence |
* isn't signaled yet, resume waiting */ |
if (r) |
continue; |
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
if (!target_seq[i]) |
continue; |
|
if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq)) |
break; |
} |
|
if (i != RADEON_NUM_RINGS) |
continue; |
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
if (!target_seq[i]) |
continue; |
|
if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i])) |
break; |
} |
|
if (i < RADEON_NUM_RINGS) { |
/* good news we believe it's a lockup */ |
dev_warn(rdev->dev, "GPU lockup (waiting for " |
"0x%016llx last fence id 0x%016llx on" |
" ring %d)\n", |
target_seq[i], last_seq[i], i); |
|
/* remember that we need an reset */ |
rdev->needs_reset = true; |
wake_up_all(&rdev->fence_queue); |
return -EDEADLK; |
} |
} |
} |
return 0; |
} |
|
/** |
* radeon_fence_wait - wait for a fence to signal |
* |
* @fence: radeon fence object |
* @intr: use interruptable sleep |
* @intr: use interruptible sleep |
* |
* Wait for the requested fence to signal (all asics). |
* @intr selects whether to use interruptable (true) or non-interruptable |
396,22 → 521,26 |
int radeon_fence_wait(struct radeon_fence *fence, bool intr) |
{ |
uint64_t seq[RADEON_NUM_RINGS] = {}; |
int r; |
long r; |
|
if (fence == NULL) { |
WARN(1, "Querying an invalid fence : %p !\n", fence); |
return -EINVAL; |
} |
/* |
* This function should not be called on !radeon fences. |
* If this is the case, it would mean this function can |
* also be called on radeon fences belonging to another card. |
* exclusive_lock is not held in that case. |
*/ |
if (WARN_ON_ONCE(!to_radeon_fence(&fence->base))) |
return fence_wait(&fence->base, intr); |
|
seq[fence->ring] = fence->seq; |
if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) |
return 0; |
|
r = radeon_fence_wait_seq(fence->rdev, seq, intr); |
if (r) |
r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); |
if (r < 0) { |
return r; |
} |
|
fence->seq = RADEON_FENCE_SIGNALED_SEQ; |
r = fence_signal(&fence->base); |
if (!r) |
FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); |
return 0; |
} |
|
434,7 → 563,7 |
{ |
uint64_t seq[RADEON_NUM_RINGS]; |
unsigned i, num_rings = 0; |
int r; |
long r; |
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
seq[i] = 0; |
445,10 → 574,6 |
|
seq[i] = fences[i]->seq; |
++num_rings; |
|
/* test if something was allready signaled */ |
if (seq[i] == RADEON_FENCE_SIGNALED_SEQ) |
return 0; |
} |
|
/* nothing to wait for ? */ |
455,8 → 580,8 |
if (num_rings == 0) |
return -ENOENT; |
|
r = radeon_fence_wait_seq(rdev, seq, intr); |
if (r) { |
r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); |
if (r < 0) { |
return r; |
} |
return 0; |
475,6 → 600,7 |
int radeon_fence_wait_next(struct radeon_device *rdev, int ring) |
{ |
uint64_t seq[RADEON_NUM_RINGS] = {}; |
long r; |
|
seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; |
if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) { |
482,7 → 608,10 |
already the last emited fence */ |
return -ENOENT; |
} |
return radeon_fence_wait_seq(rdev, seq, false); |
r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); |
if (r < 0) |
return r; |
return 0; |
} |
|
/** |
498,18 → 627,18 |
int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) |
{ |
uint64_t seq[RADEON_NUM_RINGS] = {}; |
int r; |
long r; |
|
seq[ring] = rdev->fence_drv[ring].sync_seq[ring]; |
if (!seq[ring]) |
return 0; |
|
r = radeon_fence_wait_seq(rdev, seq, false); |
if (r) { |
r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); |
if (r < 0) { |
if (r == -EDEADLK) |
return -EDEADLK; |
|
dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", |
dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n", |
ring, r); |
} |
return 0; |
525,7 → 654,7 |
*/ |
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) |
{ |
kref_get(&fence->kref); |
fence_get(&fence->base); |
return fence; |
} |
|
542,7 → 671,7 |
|
*fence = NULL; |
if (tmp) { |
kref_put(&tmp->kref, radeon_fence_destroy); |
fence_put(&tmp->base); |
} |
} |
|
711,6 → 840,9 |
rdev->fence_drv[ring].sync_seq[i] = 0; |
atomic64_set(&rdev->fence_drv[ring].last_seq, 0); |
rdev->fence_drv[ring].initialized = false; |
INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work, |
radeon_fence_check_lockup); |
rdev->fence_drv[ring].rdev = rdev; |
} |
|
/** |
758,7 → 890,7 |
r = radeon_fence_wait_empty(rdev, ring); |
if (r) { |
/* no need to trigger GPU reset as we are unloading */ |
radeon_fence_driver_force_completion(rdev); |
radeon_fence_driver_force_completion(rdev, ring); |
} |
wake_up_all(&rdev->fence_queue); |
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); |
771,17 → 903,14 |
* radeon_fence_driver_force_completion - force all fence waiter to complete |
* |
* @rdev: radeon device pointer |
* @ring: the ring to complete |
* |
* In case of GPU reset failure make sure no process keep waiting on fence |
* that will never complete. |
*/ |
void radeon_fence_driver_force_completion(struct radeon_device *rdev) |
void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring) |
{ |
int ring; |
|
for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { |
if (!rdev->fence_drv[ring].initialized) |
continue; |
if (rdev->fence_drv[ring].initialized) { |
radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); |
} |
} |
833,6 → 962,7 |
down_read(&rdev->exclusive_lock); |
seq_printf(m, "%d\n", rdev->needs_reset); |
rdev->needs_reset = true; |
wake_up_all(&rdev->fence_queue); |
up_read(&rdev->exclusive_lock); |
|
return 0; |
852,3 → 982,72 |
return 0; |
#endif |
} |
|
static const char *radeon_fence_get_driver_name(struct fence *fence) |
{ |
return "radeon"; |
} |
|
static const char *radeon_fence_get_timeline_name(struct fence *f) |
{ |
struct radeon_fence *fence = to_radeon_fence(f); |
switch (fence->ring) { |
case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx"; |
case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1"; |
case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2"; |
case R600_RING_TYPE_DMA_INDEX: return "radeon.dma"; |
case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1"; |
case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd"; |
case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1"; |
case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2"; |
default: WARN_ON_ONCE(1); return "radeon.unk"; |
} |
} |
|
static inline bool radeon_test_signaled(struct radeon_fence *fence) |
{ |
return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); |
} |
|
static signed long radeon_fence_default_wait(struct fence *f, bool intr, |
signed long t) |
{ |
struct radeon_fence *fence = to_radeon_fence(f); |
struct radeon_device *rdev = fence->rdev; |
bool signaled; |
|
fence_enable_sw_signaling(&fence->base); |
|
/* |
* This function has to return -EDEADLK, but cannot hold |
* exclusive_lock during the wait because some callers |
* may already hold it. This means checking needs_reset without |
* lock, and not fiddling with any gpu internals. |
* |
* The callback installed with fence_enable_sw_signaling will |
* run before our wait_event_*timeout call, so we will see |
* both the signaled fence and the changes to needs_reset. |
*/ |
|
if (intr) |
t = wait_event_interruptible_timeout(rdev->fence_queue, |
((signaled = radeon_test_signaled(fence)) || |
rdev->needs_reset), t); |
else |
t = wait_event_timeout(rdev->fence_queue, |
((signaled = radeon_test_signaled(fence)) || |
rdev->needs_reset), t); |
|
if (t > 0 && !signaled) |
return -EDEADLK; |
return t; |
} |
|
const struct fence_ops radeon_fence_ops = { |
.get_driver_name = radeon_fence_get_driver_name, |
.get_timeline_name = radeon_fence_get_timeline_name, |
.enable_signaling = radeon_fence_enable_signaling, |
.signaled = radeon_fence_is_signaled, |
.wait = radeon_fence_default_wait, |
.release = NULL, |
}; |