Subversion Repositories Kolibri OS

Rev

Rev 3192 | Rev 5078 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1125 serge 1
/*
2
 * Copyright 2009 Jerome Glisse.
3
 * All Rights Reserved.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the
7
 * "Software"), to deal in the Software without restriction, including
8
 * without limitation the rights to use, copy, modify, merge, publish,
9
 * distribute, sub license, and/or sell copies of the Software, and to
10
 * permit persons to whom the Software is furnished to do so, subject to
11
 * the following conditions:
12
 *
13
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20
 *
21
 * The above copyright notice and this permission notice (including the
22
 * next paragraph) shall be included in all copies or substantial portions
23
 * of the Software.
24
 *
25
 */
26
/*
27
 * Authors:
28
 *    Jerome Glisse 
29
 *    Dave Airlie
30
 */
31
#include 
32
#include 
3192 Serge 33
#include 
1125 serge 34
#include 
35
#include 
1986 serge 36
#include 
2997 Serge 37
#include 
1125 serge 38
#include "radeon_reg.h"
39
#include "radeon.h"
40
 
2997 Serge 41
/*
42
 * Fences
43
 * Fences mark an event in the GPUs pipeline and are used
44
 * for GPU/CPU synchronization.  When the fence is written,
45
 * it is expected that all buffers associated with that fence
46
 * are no longer in use by the associated ring on the GPU and
47
 * that the the relevant GPU caches have been flushed.  Whether
48
 * we use a scratch register or memory location depends on the asic
49
 * and whether writeback is enabled.
50
 */
51
 
52
/**
53
 * radeon_fence_write - write a fence value
54
 *
55
 * @rdev: radeon_device pointer
56
 * @seq: sequence number to write
57
 * @ring: ring index the fence is associated with
58
 *
59
 * Writes a fence value to memory or a scratch register (all asics).
60
 */
61
static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
2004 serge 62
{
2997 Serge 63
	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
64
	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
3764 Serge 65
		if (drv->cpu_addr) {
2997 Serge 66
		*drv->cpu_addr = cpu_to_le32(seq);
3764 Serge 67
		}
2997 Serge 68
	} else {
69
		WREG32(drv->scratch_reg, seq);
70
	}
2004 serge 71
}
72
 
2997 Serge 73
/**
74
 * radeon_fence_read - read a fence value
75
 *
76
 * @rdev: radeon_device pointer
77
 * @ring: ring index the fence is associated with
78
 *
79
 * Reads a fence value from memory or a scratch register (all asics).
80
 * Returns the value of the fence read from memory or register.
81
 */
82
static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
2004 serge 83
{
2997 Serge 84
	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
85
	u32 seq = 0;
2004 serge 86
 
2997 Serge 87
	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
3764 Serge 88
		if (drv->cpu_addr) {
2997 Serge 89
		seq = le32_to_cpu(*drv->cpu_addr);
90
	} else {
3764 Serge 91
			seq = lower_32_bits(atomic64_read(&drv->last_seq));
92
		}
93
	} else {
2997 Serge 94
		seq = RREG32(drv->scratch_reg);
95
	}
2004 serge 96
	return seq;
97
}
98
 
2997 Serge 99
/**
100
 * radeon_fence_emit - emit a fence on the requested ring
101
 *
102
 * @rdev: radeon_device pointer
103
 * @fence: radeon fence object
104
 * @ring: ring index the fence is associated with
105
 *
106
 * Emits a fence command on the requested ring (all asics).
107
 * Returns 0 on success, -ENOMEM on failure.
108
 */
109
int radeon_fence_emit(struct radeon_device *rdev,
110
		      struct radeon_fence **fence,
111
		      int ring)
1125 serge 112
{
2997 Serge 113
	/* we are protected by the ring emission mutex */
114
	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
115
	if ((*fence) == NULL) {
116
		return -ENOMEM;
1125 serge 117
	}
2997 Serge 118
	kref_init(&((*fence)->kref));
119
	(*fence)->rdev = rdev;
120
	(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
121
	(*fence)->ring = ring;
122
	radeon_fence_ring_emit(rdev, ring, *fence);
123
//   trace_radeon_fence_emit(rdev->ddev, (*fence)->seq);
1125 serge 124
	return 0;
125
}
126
 
2997 Serge 127
/**
128
 * radeon_fence_process - process a fence
129
 *
130
 * @rdev: radeon_device pointer
131
 * @ring: ring index the fence is associated with
132
 *
133
 * Checks the current fence value and wakes the fence queue
134
 * if the sequence number has increased (all asics).
135
 */
136
void radeon_fence_process(struct radeon_device *rdev, int ring)
1125 serge 137
{
2997 Serge 138
	uint64_t seq, last_seq, last_emitted;
139
	unsigned count_loop = 0;
1125 serge 140
	bool wake = false;
141
 
2997 Serge 142
	/* Note there is a scenario here for an infinite loop but it's
143
	 * very unlikely to happen. For it to happen, the current polling
144
	 * process need to be interrupted by another process and another
145
	 * process needs to update the last_seq btw the atomic read and
146
	 * xchg of the current process.
147
	 *
148
	 * More over for this to go in infinite loop there need to be
149
	 * continuously new fence signaled ie radeon_fence_read needs
150
	 * to return a different value each time for both the currently
151
	 * polling process and the other process that xchg the last_seq
152
	 * btw atomic read and xchg of the current process. And the
153
	 * value the other process set as last seq must be higher than
154
	 * the seq value we just read. Which means that current process
155
	 * need to be interrupted after radeon_fence_read and before
156
	 * atomic xchg.
157
	 *
158
	 * To be even more safe we count the number of time we loop and
159
	 * we bail after 10 loop just accepting the fact that we might
160
	 * have temporarly set the last_seq not to the true real last
161
	 * seq but to an older one.
162
	 */
163
	last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
164
	do {
165
		last_emitted = rdev->fence_drv[ring].sync_seq[ring];
166
		seq = radeon_fence_read(rdev, ring);
167
		seq |= last_seq & 0xffffffff00000000LL;
168
		if (seq < last_seq) {
169
			seq &= 0xffffffff;
170
			seq |= last_emitted & 0xffffffff00000000LL;
171
		}
172
 
173
		if (seq <= last_seq || seq > last_emitted) {
174
			break;
175
		}
176
		/* If we loop over we don't want to return without
177
		 * checking if a fence is signaled as it means that the
178
		 * seq we just read is different from the previous on.
179
		 */
180
		wake = true;
181
		last_seq = seq;
182
		if ((count_loop++) > 10) {
183
			/* We looped over too many time leave with the
184
			 * fact that we might have set an older fence
185
			 * seq then the current real last seq as signaled
186
			 * by the hw.
1963 serge 187
			 */
1125 serge 188
			break;
189
		}
2997 Serge 190
	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
191
 
192
	if (wake) {
193
		rdev->fence_drv[ring].last_activity = GetTimerTicks();
194
		wake_up_all(&rdev->fence_queue);
1125 serge 195
	}
2997 Serge 196
}
197
 
198
/**
199
 * radeon_fence_destroy - destroy a fence
200
 *
201
 * @kref: fence kref
202
 *
203
 * Frees the fence object (all asics).
204
 */
205
static void radeon_fence_destroy(struct kref *kref)
206
{
207
	struct radeon_fence *fence;
208
 
209
	fence = container_of(kref, struct radeon_fence, kref);
210
	kfree(fence);
211
}
212
 
213
/**
214
 * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
215
 *
216
 * @rdev: radeon device pointer
217
 * @seq: sequence number
218
 * @ring: ring index the fence is associated with
219
 *
220
 * Check if the last singled fence sequnce number is >= the requested
221
 * sequence number (all asics).
222
 * Returns true if the fence has signaled (current fence value
223
 * is >= requested value) or false if it has not (current fence
224
 * value is < the requested value.  Helper function for
225
 * radeon_fence_signaled().
226
 */
227
static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
228
				      u64 seq, unsigned ring)
229
{
230
	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
231
		return true;
1125 serge 232
	}
2997 Serge 233
	/* poll new last sequence at least once */
234
	radeon_fence_process(rdev, ring);
235
	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
236
		return true;
237
	}
238
	return false;
1125 serge 239
}
240
 
2997 Serge 241
/**
242
 * radeon_fence_signaled - check if a fence has signaled
243
 *
244
 * @fence: radeon fence object
245
 *
246
 * Check if the requested fence has signaled (all asics).
247
 * Returns true if the fence has signaled or false if it has not.
248
				 */
249
bool radeon_fence_signaled(struct radeon_fence *fence)
250
{
251
	if (!fence) {
252
		return true;
253
	}
254
	if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
255
		return true;
256
	}
257
	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
258
		fence->seq = RADEON_FENCE_SIGNALED_SEQ;
259
		return true;
260
	}
261
	return false;
262
}
1125 serge 263
 
2997 Serge 264
/**
265
 * radeon_fence_wait_seq - wait for a specific sequence number
266
 *
267
 * @rdev: radeon device pointer
268
 * @target_seq: sequence number we want to wait for
269
 * @ring: ring index the fence is associated with
270
 * @intr: use interruptable sleep
271
 * @lock_ring: whether the ring should be locked or not
272
 *
273
 * Wait for the requested sequence number to be written (all asics).
274
 * @intr selects whether to use interruptable (true) or non-interruptable
275
 * (false) sleep when waiting for the sequence number.  Helper function
276
 * for radeon_fence_wait(), et al.
277
 * Returns 0 if the sequence number has passed, error for all other cases.
278
 * -EDEADLK is returned when a GPU lockup has been detected and the ring is
279
 * marked as not ready so no further jobs get scheduled until a successful
280
 * reset.
281
 */
282
static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
283
				 unsigned ring, bool intr, bool lock_ring)
1125 serge 284
{
2997 Serge 285
	unsigned long timeout, last_activity;
286
	uint64_t seq;
287
	unsigned i;
288
	bool signaled;
289
	int r;
1125 serge 290
 
2997 Serge 291
	while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) {
292
		if (!rdev->ring[ring].ready) {
293
			return -EBUSY;
3764 Serge 294
        }
2997 Serge 295
 
296
		timeout = GetTimerTicks() - RADEON_FENCE_JIFFIES_TIMEOUT;
297
		if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
298
			/* the normal case, timeout is somewhere before last_activity */
299
			timeout = rdev->fence_drv[ring].last_activity - timeout;
300
		} else {
301
			/* either jiffies wrapped around, or no fence was signaled in the last 500ms
302
			 * anyway we will just wait for the minimum amount and then check for a lockup
303
			 */
304
			timeout = 1;
3764 Serge 305
        }
2997 Serge 306
		seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
307
		/* Save current last activity valuee, used to check for GPU lockups */
308
		last_activity = rdev->fence_drv[ring].last_activity;
2005 serge 309
 
2997 Serge 310
//		trace_radeon_fence_wait_begin(rdev->ddev, seq);
311
		radeon_irq_kms_sw_irq_get(rdev, ring);
3192 Serge 312
        if (intr) {
313
			r = wait_event_interruptible_timeout(rdev->fence_queue,
314
                (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
315
                timeout);
316
            } else {
317
            r = wait_event_timeout(rdev->fence_queue,
318
                (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
319
               timeout);
320
            }
3764 Serge 321
        radeon_irq_kms_sw_irq_put(rdev, ring);
3192 Serge 322
        if (unlikely(r < 0)) {
323
            return r;
324
        }
2997 Serge 325
//		trace_radeon_fence_wait_end(rdev->ddev, seq);
1125 serge 326
 
2997 Serge 327
		if (unlikely(!signaled)) {
328
			/* we were interrupted for some reason and fence
329
			 * isn't signaled yet, resume waiting */
330
			if (r) {
331
				continue;
3764 Serge 332
            }
1125 serge 333
 
2997 Serge 334
			/* check if sequence value has changed since last_activity */
335
			if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
336
				continue;
337
			}
1125 serge 338
 
2997 Serge 339
			if (lock_ring) {
340
				mutex_lock(&rdev->ring_lock);
3764 Serge 341
            }
1404 serge 342
 
2997 Serge 343
			/* test if somebody else has already decided that this is a lockup */
344
			if (last_activity != rdev->fence_drv[ring].last_activity) {
345
				if (lock_ring) {
346
					mutex_unlock(&rdev->ring_lock);
347
				}
348
				continue;
349
			}
1404 serge 350
 
2997 Serge 351
			if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
352
				/* good news we believe it's a lockup */
353
				dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
354
					 target_seq, seq);
355
 
356
				/* change last activity so nobody else think there is a lockup */
357
				for (i = 0; i < RADEON_NUM_RINGS; ++i) {
3031 serge 358
                    rdev->fence_drv[i].last_activity = GetTimerTicks();
2997 Serge 359
				}
360
 
361
				/* mark the ring as not ready any more */
362
				rdev->ring[ring].ready = false;
363
				if (lock_ring) {
364
					mutex_unlock(&rdev->ring_lock);
365
				}
366
				return -EDEADLK;
367
			}
368
 
369
			if (lock_ring) {
370
				mutex_unlock(&rdev->ring_lock);
371
			}
372
		}
1125 serge 373
	}
2997 Serge 374
	return 0;
1125 serge 375
}
376
 
2997 Serge 377
/**
378
 * radeon_fence_wait - wait for a fence to signal
379
 *
380
 * @fence: radeon fence object
381
 * @intr: use interruptable sleep
382
 *
383
 * Wait for the requested fence to signal (all asics).
384
 * @intr selects whether to use interruptable (true) or non-interruptable
385
 * (false) sleep when waiting for the fence.
386
 * Returns 0 if the fence has passed, error for all other cases.
387
 */
1179 serge 388
int radeon_fence_wait(struct radeon_fence *fence, bool intr)
389
{
1125 serge 390
	int r;
391
 
392
	if (fence == NULL) {
393
		WARN(1, "Querying an invalid fence : %p !\n", fence);
2997 Serge 394
		return -EINVAL;
1125 serge 395
	}
2997 Serge 396
 
397
	r = radeon_fence_wait_seq(fence->rdev, fence->seq,
398
				  fence->ring, intr, true);
399
	if (r) {
400
		return r;
401
	}
402
	fence->seq = RADEON_FENCE_SIGNALED_SEQ;
1125 serge 403
		return 0;
2997 Serge 404
}
405
 
406
static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
407
{
408
	unsigned i;
409
 
410
	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
411
		if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) {
412
			return true;
413
		}
1125 serge 414
	}
2997 Serge 415
	return false;
416
}
2005 serge 417
 
2997 Serge 418
/**
419
 * radeon_fence_wait_any_seq - wait for a sequence number on any ring
420
 *
421
 * @rdev: radeon device pointer
422
 * @target_seq: sequence number(s) we want to wait for
423
 * @intr: use interruptable sleep
424
 *
425
 * Wait for the requested sequence number(s) to be written by any ring
426
 * (all asics).  Sequnce number array is indexed by ring id.
427
 * @intr selects whether to use interruptable (true) or non-interruptable
428
 * (false) sleep when waiting for the sequence number.  Helper function
429
 * for radeon_fence_wait_any(), et al.
430
 * Returns 0 if the sequence number has passed, error for all other cases.
431
 */
432
static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
433
				     u64 *target_seq, bool intr)
434
{
435
	unsigned long timeout, last_activity, tmp;
436
	unsigned i, ring = RADEON_NUM_RINGS;
437
	bool signaled;
438
	int r;
2005 serge 439
 
2997 Serge 440
	for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) {
441
		if (!target_seq[i]) {
442
			continue;
1963 serge 443
		}
2005 serge 444
 
2997 Serge 445
		/* use the most recent one as indicator */
446
		if (time_after(rdev->fence_drv[i].last_activity, last_activity)) {
447
			last_activity = rdev->fence_drv[i].last_activity;
448
	}
2005 serge 449
 
2997 Serge 450
		/* For lockup detection just pick the lowest ring we are
451
		 * actively waiting for
452
		 */
453
		if (i < ring) {
454
			ring = i;
455
		}
1125 serge 456
	}
2997 Serge 457
 
458
	/* nothing to wait for ? */
459
	if (ring == RADEON_NUM_RINGS) {
460
		return -ENOENT;
461
	}
462
 
463
	while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
464
        timeout = GetTimerTicks() - RADEON_FENCE_JIFFIES_TIMEOUT;
465
		if (time_after(last_activity, timeout)) {
466
			/* the normal case, timeout is somewhere before last_activity */
467
			timeout = last_activity - timeout;
468
		} else {
469
			/* either jiffies wrapped around, or no fence was signaled in the last 500ms
470
			 * anyway we will just wait for the minimum amount and then check for a lockup
471
			 */
472
			timeout = 1;
473
		}
474
 
475
//		trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]);
476
		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
477
			if (target_seq[i]) {
478
				radeon_irq_kms_sw_irq_get(rdev, i);
479
			}
480
		}
3192 Serge 481
		if (intr) {
482
			r = wait_event_interruptible_timeout(rdev->fence_queue,
483
				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
484
				timeout);
485
		} else {
486
			r = wait_event_timeout(rdev->fence_queue,
487
				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
488
				timeout);
489
		}
2997 Serge 490
		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
491
			if (target_seq[i]) {
492
				radeon_irq_kms_sw_irq_put(rdev, i);
493
			}
494
		}
495
		if (unlikely(r < 0)) {
496
			return r;
497
		}
2004 serge 498
//   trace_radeon_fence_wait_end(rdev->ddev, seq);
2997 Serge 499
 
500
		if (unlikely(!signaled)) {
501
			/* we were interrupted for some reason and fence
502
			 * isn't signaled yet, resume waiting */
1963 serge 503
		if (r) {
2997 Serge 504
				continue;
505
			}
506
 
507
			mutex_lock(&rdev->ring_lock);
508
			for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) {
509
				if (time_after(rdev->fence_drv[i].last_activity, tmp)) {
510
					tmp = rdev->fence_drv[i].last_activity;
511
				}
512
			}
513
			/* test if somebody else has already decided that this is a lockup */
514
			if (last_activity != tmp) {
515
				last_activity = tmp;
516
				mutex_unlock(&rdev->ring_lock);
517
				continue;
1125 serge 518
		}
2997 Serge 519
 
520
			if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
1963 serge 521
			/* good news we believe it's a lockup */
2997 Serge 522
				dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n",
523
					 target_seq[ring]);
2005 serge 524
 
2997 Serge 525
				/* change last activity so nobody else think there is a lockup */
526
				for (i = 0; i < RADEON_NUM_RINGS; ++i) {
527
					rdev->fence_drv[i].last_activity = GetTimerTicks();
528
				}
529
 
530
				/* mark the ring as not ready any more */
531
				rdev->ring[ring].ready = false;
532
				mutex_unlock(&rdev->ring_lock);
533
				return -EDEADLK;
1125 serge 534
			}
2997 Serge 535
			mutex_unlock(&rdev->ring_lock);
536
			}
1125 serge 537
	}
2997 Serge 538
    return 0;
1125 serge 539
}
540
 
2997 Serge 541
/**
542
 * radeon_fence_wait_any - wait for a fence to signal on any ring
543
 *
544
 * @rdev: radeon device pointer
545
 * @fences: radeon fence object(s)
546
 * @intr: use interruptable sleep
547
 *
548
 * Wait for any requested fence to signal (all asics).  Fence
549
 * array is indexed by ring id.  @intr selects whether to use
550
 * interruptable (true) or non-interruptable (false) sleep when
551
 * waiting for the fences. Used by the suballocator.
552
 * Returns 0 if any fence has passed, error for all other cases.
553
 */
554
int radeon_fence_wait_any(struct radeon_device *rdev,
555
			  struct radeon_fence **fences,
556
			  bool intr)
1125 serge 557
{
2997 Serge 558
	uint64_t seq[RADEON_NUM_RINGS];
559
	unsigned i;
1125 serge 560
	int r;
561
 
2997 Serge 562
	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
563
		seq[i] = 0;
564
 
565
		if (!fences[i]) {
566
			continue;
567
		}
568
 
569
		if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) {
570
			/* something was allready signaled */
1125 serge 571
		return 0;
572
	}
2997 Serge 573
 
574
		seq[i] = fences[i]->seq;
1125 serge 575
	}
2997 Serge 576
 
577
	r = radeon_fence_wait_any_seq(rdev, seq, intr);
578
	if (r) {
579
		return r;
580
	}
581
	return 0;
1125 serge 582
}
583
 
2997 Serge 584
/**
585
 * radeon_fence_wait_next_locked - wait for the next fence to signal
586
 *
587
 * @rdev: radeon device pointer
588
 * @ring: ring index the fence is associated with
589
 *
590
 * Wait for the next fence on the requested ring to signal (all asics).
591
 * Returns 0 if the next fence has passed, error for all other cases.
592
 * Caller must hold ring lock.
593
 */
594
int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
1125 serge 595
{
2997 Serge 596
	uint64_t seq;
1125 serge 597
 
2997 Serge 598
	seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
599
	if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
600
		/* nothing to wait for, last_seq is
601
		   already the last emited fence */
602
		return -ENOENT;
1125 serge 603
	}
2997 Serge 604
	return radeon_fence_wait_seq(rdev, seq, ring, false, false);
605
}
606
 
607
/**
608
 * radeon_fence_wait_empty_locked - wait for all fences to signal
609
 *
610
 * @rdev: radeon device pointer
611
 * @ring: ring index the fence is associated with
612
 *
613
 * Wait for all fences on the requested ring to signal (all asics).
614
 * Returns 0 if the fences have passed, error for all other cases.
615
 * Caller must hold ring lock.
616
 */
3192 Serge 617
int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
2997 Serge 618
{
619
	uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
3192 Serge 620
	int r;
2997 Serge 621
 
622
		r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
3192 Serge 623
	if (r) {
2997 Serge 624
		if (r == -EDEADLK) {
3192 Serge 625
			return -EDEADLK;
3764 Serge 626
		}
3192 Serge 627
		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
628
			ring, r);
2997 Serge 629
	}
3192 Serge 630
	return 0;
1125 serge 631
}
632
 
2997 Serge 633
/**
634
 * radeon_fence_ref - take a ref on a fence
635
 *
636
 * @fence: radeon fence object
637
 *
638
 * Take a reference on a fence (all asics).
639
 * Returns the fence.
640
 */
1125 serge 641
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
642
{
643
	kref_get(&fence->kref);
644
	return fence;
645
}
646
 
2997 Serge 647
/**
648
 * radeon_fence_unref - remove a ref on a fence
649
 *
650
 * @fence: radeon fence object
651
 *
652
 * Remove a reference on a fence (all asics).
653
 */
1125 serge 654
void radeon_fence_unref(struct radeon_fence **fence)
655
{
2997 Serge 656
    struct radeon_fence *tmp = *fence;
1125 serge 657
 
2997 Serge 658
    *fence = NULL;
659
	if (tmp) {
660
		kref_put(&tmp->kref, radeon_fence_destroy);
661
	}
662
}
2005 serge 663
 
2997 Serge 664
/**
665
 * radeon_fence_count_emitted - get the count of emitted fences
666
 *
667
 * @rdev: radeon device pointer
668
 * @ring: ring index the fence is associated with
669
 *
670
 * Get the number of fences emitted on the requested ring (all asics).
671
 * Returns the number of emitted fences on the ring.  Used by the
672
 * dynpm code to ring track activity.
673
 */
674
unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
675
{
676
	uint64_t emitted;
677
 
678
	/* We are not protected by ring lock when reading the last sequence
679
	 * but it's ok to report slightly wrong fence count here.
680
	 */
681
	radeon_fence_process(rdev, ring);
682
	emitted = rdev->fence_drv[ring].sync_seq[ring]
683
		- atomic64_read(&rdev->fence_drv[ring].last_seq);
684
	/* to avoid 32bits warp around */
685
	if (emitted > 0x10000000) {
686
		emitted = 0x10000000;
687
	}
688
	return (unsigned)emitted;
1125 serge 689
}
690
 
2997 Serge 691
/**
692
 * radeon_fence_need_sync - do we need a semaphore
693
 *
694
 * @fence: radeon fence object
695
 * @dst_ring: which ring to check against
696
 *
697
 * Check if the fence needs to be synced against another ring
698
 * (all asics).  If so, we need to emit a semaphore.
699
 * Returns true if we need to sync with another ring, false if
700
 * not.
701
 */
702
bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
1125 serge 703
{
2997 Serge 704
	struct radeon_fence_driver *fdrv;
1125 serge 705
 
2997 Serge 706
	if (!fence) {
707
		return false;
708
	}
709
 
710
	if (fence->ring == dst_ring) {
711
		return false;
712
	}
713
 
714
	/* we are protected by the ring mutex */
715
	fdrv = &fence->rdev->fence_drv[dst_ring];
716
	if (fence->seq <= fdrv->sync_seq[fence->ring]) {
717
		return false;
718
	}
719
 
720
	return true;
1125 serge 721
}
722
 
2997 Serge 723
/**
724
 * radeon_fence_note_sync - record the sync point
725
 *
726
 * @fence: radeon fence object
727
 * @dst_ring: which ring to check against
728
 *
729
 * Note the sequence number at which point the fence will
730
 * be synced with the requested ring (all asics).
731
 */
732
void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
1125 serge 733
{
2997 Serge 734
	struct radeon_fence_driver *dst, *src;
735
	unsigned i;
736
 
737
	if (!fence) {
738
		return;
739
	}
740
 
741
	if (fence->ring == dst_ring) {
742
		return;
743
	}
744
 
745
	/* we are protected by the ring mutex */
746
	src = &fence->rdev->fence_drv[fence->ring];
747
	dst = &fence->rdev->fence_drv[dst_ring];
748
	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
749
		if (i == dst_ring) {
750
			continue;
751
		}
752
		dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
753
	}
754
}
755
 
756
/**
757
 * radeon_fence_driver_start_ring - make the fence driver
758
 * ready for use on the requested ring.
759
 *
760
 * @rdev: radeon device pointer
761
 * @ring: ring index to start the fence driver on
762
 *
763
 * Make the fence driver ready for processing (all asics).
764
 * Not all asics have all rings, so each asic will only
765
 * start the fence driver on the rings it has.
766
 * Returns 0 for success, errors for failure.
767
 */
768
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
769
{
770
	uint64_t index;
1125 serge 771
	int r;
772
 
2997 Serge 773
	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
3192 Serge 774
	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
2997 Serge 775
		rdev->fence_drv[ring].scratch_reg = 0;
3764 Serge 776
		if (ring != R600_RING_TYPE_UVD_INDEX) {
2997 Serge 777
		index = R600_WB_EVENT_OFFSET + ring * 4;
3764 Serge 778
			rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
779
			rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
780
							 index;
781
 
782
		} else {
783
			/* put fence directly behind firmware */
784
			index = ALIGN(rdev->uvd_fw->size, 8);
785
			rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
786
			rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
787
		}
788
 
2997 Serge 789
	} else {
790
		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
1125 serge 791
	if (r) {
1404 serge 792
		dev_err(rdev->dev, "fence failed to get scratch register\n");
1125 serge 793
		return r;
794
	}
2997 Serge 795
		index = RADEON_WB_SCRATCH_OFFSET +
796
			rdev->fence_drv[ring].scratch_reg -
797
			rdev->scratch.reg_base;
798
	rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
799
	rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
3764 Serge 800
	}
2997 Serge 801
	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
802
	rdev->fence_drv[ring].initialized = true;
803
	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
804
		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
2004 serge 805
    return 0;
1125 serge 806
}
807
 
2997 Serge 808
/**
809
 * radeon_fence_driver_init_ring - init the fence driver
810
 * for the requested ring.
811
 *
812
 * @rdev: radeon device pointer
813
 * @ring: ring index to start the fence driver on
814
 *
815
 * Init the fence driver for the requested ring (all asics).
816
 * Helper function for radeon_fence_driver_init().
817
 */
818
static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
819
{
820
	int i;
1125 serge 821
 
2997 Serge 822
	rdev->fence_drv[ring].scratch_reg = -1;
823
	rdev->fence_drv[ring].cpu_addr = NULL;
824
	rdev->fence_drv[ring].gpu_addr = 0;
825
	for (i = 0; i < RADEON_NUM_RINGS; ++i)
826
		rdev->fence_drv[ring].sync_seq[i] = 0;
827
	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
3031 serge 828
    rdev->fence_drv[ring].last_activity = GetTimerTicks();
2997 Serge 829
	rdev->fence_drv[ring].initialized = false;
830
}
831
 
832
/**
833
 * radeon_fence_driver_init - init the fence driver
834
 * for all possible rings.
835
 *
836
 * @rdev: radeon device pointer
837
 *
838
 * Init the fence driver for all possible rings (all asics).
839
 * Not all asics have all rings, so each asic will only
840
 * start the fence driver on the rings it has using
841
 * radeon_fence_driver_start_ring().
842
 * Returns 0 for success.
843
 */
844
int radeon_fence_driver_init(struct radeon_device *rdev)
845
{
846
	int ring;
847
 
848
	init_waitqueue_head(&rdev->fence_queue);
849
	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
850
		radeon_fence_driver_init_ring(rdev, ring);
851
	}
852
	if (radeon_debugfs_fence_init(rdev)) {
853
		dev_err(rdev->dev, "fence debugfs file creation failed\n");
854
	}
855
	return 0;
856
}
857
 
858
/**
859
 * radeon_fence_driver_fini - tear down the fence driver
860
 * for all possible rings.
861
 *
862
 * @rdev: radeon device pointer
863
 *
864
 * Tear down the fence driver for all possible rings (all asics).
865
 */
866
void radeon_fence_driver_fini(struct radeon_device *rdev)
867
{
3192 Serge 868
	int ring, r;
2997 Serge 869
 
870
	mutex_lock(&rdev->ring_lock);
871
	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
872
		if (!rdev->fence_drv[ring].initialized)
873
			continue;
3192 Serge 874
		r = radeon_fence_wait_empty_locked(rdev, ring);
875
		if (r) {
876
			/* no need to trigger GPU reset as we are unloading */
877
			radeon_fence_driver_force_completion(rdev);
878
		}
2997 Serge 879
		wake_up_all(&rdev->fence_queue);
880
		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
881
		rdev->fence_drv[ring].initialized = false;
882
	}
883
	mutex_unlock(&rdev->ring_lock);
884
}
885
 
3192 Serge 886
/**
887
 * radeon_fence_driver_force_completion - force all fence waiter to complete
888
 *
889
 * @rdev: radeon device pointer
890
 *
891
 * In case of GPU reset failure make sure no process keep waiting on fence
892
 * that will never complete.
893
 */
894
void radeon_fence_driver_force_completion(struct radeon_device *rdev)
895
{
896
	int ring;
2997 Serge 897
 
3192 Serge 898
	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
899
		if (!rdev->fence_drv[ring].initialized)
900
			continue;
901
		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
902
	}
903
}
904
 
905
 
1125 serge 906
/*
907
 * Fence debugfs
908
 */
909
#if defined(CONFIG_DEBUG_FS)
910
static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
911
{
912
	struct drm_info_node *node = (struct drm_info_node *)m->private;
913
	struct drm_device *dev = node->minor->dev;
914
	struct radeon_device *rdev = dev->dev_private;
2997 Serge 915
	int i, j;
1125 serge 916
 
2997 Serge 917
	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
918
		if (!rdev->fence_drv[i].initialized)
919
			continue;
920
 
921
		seq_printf(m, "--- ring %d ---\n", i);
922
		seq_printf(m, "Last signaled fence 0x%016llx\n",
923
			   (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
924
		seq_printf(m, "Last emitted        0x%016llx\n",
925
			   rdev->fence_drv[i].sync_seq[i]);
926
 
927
		for (j = 0; j < RADEON_NUM_RINGS; ++j) {
928
			if (i != j && rdev->fence_drv[j].initialized)
929
				seq_printf(m, "Last sync to ring %d 0x%016llx\n",
930
					   j, rdev->fence_drv[i].sync_seq[j]);
931
		}
1125 serge 932
	}
933
	return 0;
934
}
935
 
936
static struct drm_info_list radeon_debugfs_fence_list[] = {
937
	{"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
938
};
939
#endif
940
 
941
int radeon_debugfs_fence_init(struct radeon_device *rdev)
942
{
943
#if defined(CONFIG_DEBUG_FS)
944
	return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
945
#else
946
	return 0;
947
#endif
948
}