Subversion Repositories Kolibri OS

Rev

Rev 5271 | Rev 6104 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5078 serge 1
/*
2
 * Copyright 2008 Jerome Glisse.
3
 * All Rights Reserved.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Jerome Glisse 
26
 */
27
#include 
28
#include 
29
#include 
30
#include "radeon_reg.h"
31
#include "radeon.h"
32
#include "radeon_trace.h"
33
 
34
#define RADEON_CS_MAX_PRIORITY		32u
35
#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36
 
37
static inline unsigned long
38
copy_from_user(void *to, const void __user *from, unsigned long n)
39
{
40
    memcpy(to, from, n);
41
    return n;
42
}
43
 
44
/* This is based on the bucket sort with O(n) time complexity.
45
 * An item with priority "i" is added to bucket[i]. The lists are then
46
 * concatenated in descending order.
47
 */
48
struct radeon_cs_buckets {
49
	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
50
};
51
 
52
static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
53
{
54
	unsigned i;
55
 
56
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
57
		INIT_LIST_HEAD(&b->bucket[i]);
58
}
59
 
60
static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
61
				  struct list_head *item, unsigned priority)
62
{
63
	/* Since buffers which appear sooner in the relocation list are
64
	 * likely to be used more often than buffers which appear later
65
	 * in the list, the sort mustn't change the ordering of buffers
66
	 * with the same priority, i.e. it must be stable.
67
	 */
68
	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
69
}
70
 
71
static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
72
				       struct list_head *out_list)
73
{
74
	unsigned i;
75
 
76
	/* Connect the sorted buckets in the output list. */
77
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
78
		list_splice(&b->bucket[i], out_list);
79
	}
80
}
81
 
82
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
83
{
84
	struct drm_device *ddev = p->rdev->ddev;
85
	struct radeon_cs_chunk *chunk;
86
	struct radeon_cs_buckets buckets;
5271 serge 87
	unsigned i;
88
	bool need_mmap_lock = false;
89
	int r;
5078 serge 90
 
5271 serge 91
	if (p->chunk_relocs == NULL) {
5078 serge 92
		return 0;
93
	}
5271 serge 94
	chunk = p->chunk_relocs;
5078 serge 95
	p->dma_reloc_idx = 0;
96
	/* FIXME: we assume that each relocs use 4 dwords */
97
	p->nrelocs = chunk->length_dw / 4;
5271 serge 98
	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
5078 serge 99
	if (p->relocs == NULL) {
100
		return -ENOMEM;
101
	}
102
 
103
	radeon_cs_buckets_init(&buckets);
104
 
105
	for (i = 0; i < p->nrelocs; i++) {
106
		struct drm_radeon_cs_reloc *r;
5271 serge 107
		struct drm_gem_object *gobj;
5078 serge 108
		unsigned priority;
109
 
110
		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
5271 serge 111
		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
112
		if (gobj == NULL) {
5078 serge 113
			DRM_ERROR("gem object lookup failed 0x%x\n",
114
				  r->handle);
115
			return -ENOENT;
116
		}
5271 serge 117
		p->relocs[i].robj = gem_to_radeon_bo(gobj);
5078 serge 118
 
119
		/* The userspace buffer priorities are from 0 to 15. A higher
120
		 * number means the buffer is more important.
121
		 * Also, the buffers used for write have a higher priority than
122
		 * the buffers used for read only, which doubles the range
123
		 * to 0 to 31. 32 is reserved for the kernel driver.
124
		 */
125
		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
126
			   + !!r->write_domain;
127
 
128
		/* the first reloc of an UVD job is the msg and that must be in
5271 serge 129
		   VRAM, also but everything into VRAM on AGP cards and older
130
		   IGP chips to avoid image corruptions */
5078 serge 131
		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
5271 serge 132
		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
133
		     p->rdev->family == CHIP_RS780 ||
134
		     p->rdev->family == CHIP_RS880)) {
135
 
5078 serge 136
			/* TODO: is this still needed for NI+ ? */
137
			p->relocs[i].prefered_domains =
138
				RADEON_GEM_DOMAIN_VRAM;
139
 
140
			p->relocs[i].allowed_domains =
141
				RADEON_GEM_DOMAIN_VRAM;
142
 
143
			/* prioritize this over any other relocation */
144
			priority = RADEON_CS_MAX_PRIORITY;
145
		} else {
146
			uint32_t domain = r->write_domain ?
147
				r->write_domain : r->read_domains;
148
 
149
			if (domain & RADEON_GEM_DOMAIN_CPU) {
150
				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
151
					  "for command submission\n");
152
				return -EINVAL;
153
			}
154
 
155
			p->relocs[i].prefered_domains = domain;
156
			if (domain == RADEON_GEM_DOMAIN_VRAM)
157
				domain |= RADEON_GEM_DOMAIN_GTT;
158
			p->relocs[i].allowed_domains = domain;
159
		}
5271 serge 160
/*
161
		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
162
			uint32_t domain = p->relocs[i].prefered_domains;
163
			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
164
				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
165
					  "allowed for userptr BOs\n");
166
				return -EINVAL;
167
			}
168
			need_mmap_lock = true;
169
			domain = RADEON_GEM_DOMAIN_GTT;
170
			p->relocs[i].prefered_domains = domain;
171
			p->relocs[i].allowed_domains = domain;
172
		}
173
*/
5078 serge 174
		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
5271 serge 175
		p->relocs[i].tv.shared = !r->write_domain;
5078 serge 176
 
177
		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
178
				      priority);
179
	}
180
 
181
	radeon_cs_buckets_get_list(&buckets, &p->validated);
182
 
183
	if (p->cs_flags & RADEON_CS_USE_VM)
184
		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
185
					      &p->validated);
186
 
5271 serge 187
	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
188
 
189
	return r;
5078 serge 190
}
191
 
192
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
193
{
194
	p->priority = priority;
195
 
196
	switch (ring) {
197
	default:
198
		DRM_ERROR("unknown ring id: %d\n", ring);
199
		return -EINVAL;
200
	case RADEON_CS_RING_GFX:
201
		p->ring = RADEON_RING_TYPE_GFX_INDEX;
202
		break;
203
	case RADEON_CS_RING_COMPUTE:
204
		if (p->rdev->family >= CHIP_TAHITI) {
205
			if (p->priority > 0)
206
				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
207
			else
208
				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
209
		} else
210
			p->ring = RADEON_RING_TYPE_GFX_INDEX;
211
		break;
212
	case RADEON_CS_RING_DMA:
213
		if (p->rdev->family >= CHIP_CAYMAN) {
214
			if (p->priority > 0)
215
				p->ring = R600_RING_TYPE_DMA_INDEX;
216
			else
217
				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
218
		} else if (p->rdev->family >= CHIP_RV770) {
219
			p->ring = R600_RING_TYPE_DMA_INDEX;
220
		} else {
221
			return -EINVAL;
222
		}
223
		break;
224
	case RADEON_CS_RING_UVD:
225
		p->ring = R600_RING_TYPE_UVD_INDEX;
226
		break;
227
	case RADEON_CS_RING_VCE:
228
		/* TODO: only use the low priority ring for now */
229
		p->ring = TN_RING_TYPE_VCE1_INDEX;
230
		break;
231
	}
232
	return 0;
233
}
234
 
5271 serge 235
static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
5078 serge 236
{
5271 serge 237
	struct radeon_bo_list *reloc;
238
	int r;
5078 serge 239
 
5271 serge 240
	list_for_each_entry(reloc, &p->validated, tv.head) {
241
		struct reservation_object *resv;
5078 serge 242
 
5271 serge 243
		resv = reloc->robj->tbo.resv;
244
		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
245
				     reloc->tv.shared);
246
		if (r)
247
			return r;
5078 serge 248
	}
5271 serge 249
	return 0;
5078 serge 250
}
251
 
252
/* XXX: note that this is called from the legacy UMS CS ioctl as well */
253
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
254
{
255
	struct drm_radeon_cs *cs = data;
256
	uint64_t *chunk_array_ptr;
257
	unsigned size, i;
258
	u32 ring = RADEON_CS_RING_GFX;
259
	s32 priority = 0;
260
 
261
	if (!cs->num_chunks) {
262
		return 0;
263
	}
264
	/* get chunks */
265
	INIT_LIST_HEAD(&p->validated);
266
	p->idx = 0;
267
	p->ib.sa_bo = NULL;
268
	p->const_ib.sa_bo = NULL;
5271 serge 269
	p->chunk_ib = NULL;
270
	p->chunk_relocs = NULL;
271
	p->chunk_flags = NULL;
272
	p->chunk_const_ib = NULL;
5078 serge 273
	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
274
	if (p->chunks_array == NULL) {
275
		return -ENOMEM;
276
	}
277
	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
278
	if (copy_from_user(p->chunks_array, chunk_array_ptr,
279
			       sizeof(uint64_t)*cs->num_chunks)) {
280
		return -EFAULT;
281
	}
282
	p->cs_flags = 0;
283
	p->nchunks = cs->num_chunks;
284
	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
285
	if (p->chunks == NULL) {
286
		return -ENOMEM;
287
	}
288
	for (i = 0; i < p->nchunks; i++) {
289
		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
290
		struct drm_radeon_cs_chunk user_chunk;
291
		uint32_t __user *cdata;
292
 
293
		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
294
		if (copy_from_user(&user_chunk, chunk_ptr,
295
				       sizeof(struct drm_radeon_cs_chunk))) {
296
			return -EFAULT;
297
		}
298
		p->chunks[i].length_dw = user_chunk.length_dw;
5271 serge 299
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
300
			p->chunk_relocs = &p->chunks[i];
5078 serge 301
		}
5271 serge 302
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
303
			p->chunk_ib = &p->chunks[i];
5078 serge 304
			/* zero length IB isn't useful */
305
			if (p->chunks[i].length_dw == 0)
306
				return -EINVAL;
307
		}
5271 serge 308
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
309
			p->chunk_const_ib = &p->chunks[i];
5078 serge 310
			/* zero length CONST IB isn't useful */
311
			if (p->chunks[i].length_dw == 0)
312
				return -EINVAL;
313
		}
5271 serge 314
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
315
			p->chunk_flags = &p->chunks[i];
5078 serge 316
			/* zero length flags aren't useful */
317
			if (p->chunks[i].length_dw == 0)
318
				return -EINVAL;
319
		}
320
 
321
		size = p->chunks[i].length_dw;
322
		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
323
		p->chunks[i].user_ptr = cdata;
5271 serge 324
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
5078 serge 325
			continue;
326
 
5271 serge 327
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
5078 serge 328
			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
329
				continue;
330
		}
331
 
332
		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
333
		size *= sizeof(uint32_t);
334
		if (p->chunks[i].kdata == NULL) {
335
			return -ENOMEM;
336
		}
337
		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
338
			return -EFAULT;
339
		}
5271 serge 340
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
5078 serge 341
			p->cs_flags = p->chunks[i].kdata[0];
342
			if (p->chunks[i].length_dw > 1)
343
				ring = p->chunks[i].kdata[1];
344
			if (p->chunks[i].length_dw > 2)
345
				priority = (s32)p->chunks[i].kdata[2];
346
		}
347
	}
348
 
349
	/* these are KMS only */
350
	if (p->rdev) {
351
		if ((p->cs_flags & RADEON_CS_USE_VM) &&
352
		    !p->rdev->vm_manager.enabled) {
353
			DRM_ERROR("VM not active on asic!\n");
354
			return -EINVAL;
355
		}
356
 
357
		if (radeon_cs_get_ring(p, ring, priority))
358
			return -EINVAL;
359
 
360
		/* we only support VM on some SI+ rings */
361
		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
362
			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
363
				DRM_ERROR("Ring %d requires VM!\n", p->ring);
364
				return -EINVAL;
365
			}
366
		} else {
367
			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
368
				DRM_ERROR("VM not supported on ring %d!\n",
369
					  p->ring);
370
				return -EINVAL;
371
			}
372
		}
373
	}
374
 
375
	return 0;
376
}
377
 
378
static int cmp_size_smaller_first(void *priv, struct list_head *a,
379
				  struct list_head *b)
380
{
5271 serge 381
	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
382
	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
5078 serge 383
 
384
	/* Sort A before B if A is smaller. */
385
	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
386
}
387
 
388
/**
389
 * cs_parser_fini() - clean parser states
390
 * @parser:	parser structure holding parsing context.
391
 * @error:	error number
392
 *
393
 * If error is set than unvalidate buffer, otherwise just free memory
394
 * used by parsing context.
395
 **/
396
static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
397
{
398
	unsigned i;
399
 
400
	if (!error) {
401
		/* Sort the buffer list from the smallest to largest buffer,
402
		 * which affects the order of buffers in the LRU list.
403
		 * This assures that the smallest buffers are added first
404
		 * to the LRU list, so they are likely to be later evicted
405
		 * first, instead of large buffers whose eviction is more
406
		 * expensive.
407
		 *
408
		 * This slightly lowers the number of bytes moved by TTM
409
		 * per frame under memory pressure.
410
		 */
411
		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
412
 
413
		ttm_eu_fence_buffer_objects(&parser->ticket,
414
					    &parser->validated,
5271 serge 415
					    &parser->ib.fence->base);
5078 serge 416
	} else if (backoff) {
417
		ttm_eu_backoff_reservation(&parser->ticket,
418
					   &parser->validated);
419
	}
420
 
421
	if (parser->relocs != NULL) {
422
		for (i = 0; i < parser->nrelocs; i++) {
5271 serge 423
			struct radeon_bo *bo = parser->relocs[i].robj;
424
			if (bo == NULL)
425
				continue;
426
 
427
			drm_gem_object_unreference_unlocked(&bo->gem_base);
5078 serge 428
		}
429
	}
430
	kfree(parser->track);
431
	kfree(parser->relocs);
5271 serge 432
	drm_free_large(parser->vm_bos);
5078 serge 433
	for (i = 0; i < parser->nchunks; i++)
434
		drm_free_large(parser->chunks[i].kdata);
435
	kfree(parser->chunks);
436
	kfree(parser->chunks_array);
437
	radeon_ib_free(parser->rdev, &parser->ib);
438
	radeon_ib_free(parser->rdev, &parser->const_ib);
439
}
440
 
441
static int radeon_cs_ib_chunk(struct radeon_device *rdev,
442
			      struct radeon_cs_parser *parser)
443
{
444
	int r;
445
 
5271 serge 446
	if (parser->chunk_ib == NULL)
5078 serge 447
		return 0;
448
 
449
	if (parser->cs_flags & RADEON_CS_USE_VM)
450
		return 0;
451
 
452
	r = radeon_cs_parse(rdev, parser->ring, parser);
453
	if (r || parser->parser_error) {
454
		DRM_ERROR("Invalid command stream !\n");
455
		return r;
456
	}
457
 
5271 serge 458
	r = radeon_cs_sync_rings(parser);
459
	if (r) {
460
		if (r != -ERESTARTSYS)
461
			DRM_ERROR("Failed to sync rings: %i\n", r);
462
		return r;
463
	}
464
 
5078 serge 465
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
466
		radeon_uvd_note_usage(rdev);
467
	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
468
		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
469
		radeon_vce_note_usage(rdev);
470
 
471
	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
472
	if (r) {
473
		DRM_ERROR("Failed to schedule IB !\n");
474
	}
475
	return r;
476
}
477
 
478
static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
479
				   struct radeon_vm *vm)
480
{
481
	struct radeon_device *rdev = p->rdev;
482
	struct radeon_bo_va *bo_va;
483
	int i, r;
484
 
485
	r = radeon_vm_update_page_directory(rdev, vm);
486
	if (r)
487
		return r;
488
 
489
	r = radeon_vm_clear_freed(rdev, vm);
490
	if (r)
491
		return r;
492
 
493
	if (vm->ib_bo_va == NULL) {
494
		DRM_ERROR("Tmp BO not in VM!\n");
495
		return -EINVAL;
496
	}
497
 
498
	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
499
				&rdev->ring_tmp_bo.bo->tbo.mem);
500
	if (r)
501
		return r;
502
 
503
	for (i = 0; i < p->nrelocs; i++) {
504
		struct radeon_bo *bo;
505
 
506
		bo = p->relocs[i].robj;
507
		bo_va = radeon_vm_bo_find(vm, bo);
508
		if (bo_va == NULL) {
509
			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
510
			return -EINVAL;
511
		}
512
 
513
		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
514
		if (r)
515
			return r;
5271 serge 516
 
517
		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
5078 serge 518
	}
519
 
520
	return radeon_vm_clear_invalids(rdev, vm);
521
}
522
 
523
static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
524
				 struct radeon_cs_parser *parser)
525
{
526
	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
527
	struct radeon_vm *vm = &fpriv->vm;
528
	int r;
529
 
5271 serge 530
	if (parser->chunk_ib == NULL)
5078 serge 531
		return 0;
532
	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
533
		return 0;
534
 
535
	if (parser->const_ib.length_dw) {
536
		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
537
		if (r) {
538
			return r;
539
		}
540
	}
541
 
542
	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
543
	if (r) {
544
		return r;
545
	}
546
 
547
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
548
		radeon_uvd_note_usage(rdev);
549
 
550
	mutex_lock(&vm->mutex);
551
	r = radeon_bo_vm_update_pte(parser, vm);
552
	if (r) {
553
		goto out;
554
	}
555
 
5271 serge 556
	r = radeon_cs_sync_rings(parser);
557
	if (r) {
558
		if (r != -ERESTARTSYS)
559
			DRM_ERROR("Failed to sync rings: %i\n", r);
560
		goto out;
561
	}
562
 
5078 serge 563
	if ((rdev->family >= CHIP_TAHITI) &&
5271 serge 564
	    (parser->chunk_const_ib != NULL)) {
5078 serge 565
		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
566
	} else {
567
		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
568
	}
569
 
570
out:
571
	mutex_unlock(&vm->mutex);
572
	return r;
573
}
574
 
575
static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
576
{
577
	if (r == -EDEADLK) {
578
		r = radeon_gpu_reset(rdev);
579
		if (!r)
580
			r = -EAGAIN;
581
	}
582
	return r;
583
}
584
 
585
static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
586
{
587
	struct radeon_cs_chunk *ib_chunk;
588
	struct radeon_vm *vm = NULL;
589
	int r;
590
 
5271 serge 591
	if (parser->chunk_ib == NULL)
5078 serge 592
		return 0;
593
 
594
	if (parser->cs_flags & RADEON_CS_USE_VM) {
595
		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
596
		vm = &fpriv->vm;
597
 
598
		if ((rdev->family >= CHIP_TAHITI) &&
5271 serge 599
		    (parser->chunk_const_ib != NULL)) {
600
			ib_chunk = parser->chunk_const_ib;
5078 serge 601
			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
602
				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
603
				return -EINVAL;
604
			}
605
			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
606
					   vm, ib_chunk->length_dw * 4);
607
			if (r) {
608
				DRM_ERROR("Failed to get const ib !\n");
609
				return r;
610
			}
611
			parser->const_ib.is_const_ib = true;
612
			parser->const_ib.length_dw = ib_chunk->length_dw;
613
			if (copy_from_user(parser->const_ib.ptr,
614
					       ib_chunk->user_ptr,
615
					       ib_chunk->length_dw * 4))
616
				return -EFAULT;
617
		}
618
 
5271 serge 619
		ib_chunk = parser->chunk_ib;
5078 serge 620
		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
621
			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
622
			return -EINVAL;
623
		}
624
	}
5271 serge 625
	ib_chunk = parser->chunk_ib;
5078 serge 626
 
627
	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
628
			   vm, ib_chunk->length_dw * 4);
629
	if (r) {
630
		DRM_ERROR("Failed to get ib !\n");
631
		return r;
632
	}
633
	parser->ib.length_dw = ib_chunk->length_dw;
634
	if (ib_chunk->kdata)
635
		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
636
	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
637
		return -EFAULT;
638
	return 0;
639
}
640
 
641
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
642
{
643
	struct radeon_device *rdev = dev->dev_private;
644
	struct radeon_cs_parser parser;
645
	int r;
646
 
5346 serge 647
	down_read(&rdev->exclusive_lock);
5078 serge 648
	if (!rdev->accel_working) {
5346 serge 649
		up_read(&rdev->exclusive_lock);
5078 serge 650
		return -EBUSY;
651
	}
652
	/* initialize parser */
653
	memset(&parser, 0, sizeof(struct radeon_cs_parser));
654
	parser.filp = filp;
655
	parser.rdev = rdev;
656
	parser.dev = rdev->dev;
657
	parser.family = rdev->family;
658
	r = radeon_cs_parser_init(&parser, data);
659
	if (r) {
660
		DRM_ERROR("Failed to initialize parser !\n");
661
		radeon_cs_parser_fini(&parser, r, false);
5346 serge 662
		up_read(&rdev->exclusive_lock);
5078 serge 663
		r = radeon_cs_handle_lockup(rdev, r);
664
		return r;
665
	}
666
 
667
	r = radeon_cs_ib_fill(rdev, &parser);
668
	if (!r) {
669
		r = radeon_cs_parser_relocs(&parser);
670
		if (r && r != -ERESTARTSYS)
671
			DRM_ERROR("Failed to parse relocation %d!\n", r);
672
	}
673
 
674
	if (r) {
675
		radeon_cs_parser_fini(&parser, r, false);
5346 serge 676
		up_read(&rdev->exclusive_lock);
5078 serge 677
		r = radeon_cs_handle_lockup(rdev, r);
678
		return r;
679
	}
680
 
681
	trace_radeon_cs(&parser);
682
 
683
	r = radeon_cs_ib_chunk(rdev, &parser);
684
	if (r) {
685
		goto out;
686
	}
687
	r = radeon_cs_ib_vm_chunk(rdev, &parser);
688
	if (r) {
689
		goto out;
690
	}
691
out:
692
	radeon_cs_parser_fini(&parser, r, true);
5346 serge 693
	up_read(&rdev->exclusive_lock);
5078 serge 694
	r = radeon_cs_handle_lockup(rdev, r);
695
	return r;
696
}
697
 
698
/**
699
 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
700
 * @parser:	parser structure holding parsing context.
701
 * @pkt:	where to store packet information
702
 *
703
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
704
 * if packet is bigger than remaining ib size. or if packets is unknown.
705
 **/
706
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
707
			   struct radeon_cs_packet *pkt,
708
			   unsigned idx)
709
{
5271 serge 710
	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
5078 serge 711
	struct radeon_device *rdev = p->rdev;
712
	uint32_t header;
713
 
714
	if (idx >= ib_chunk->length_dw) {
715
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
716
			  idx, ib_chunk->length_dw);
717
		return -EINVAL;
718
	}
719
	header = radeon_get_ib_value(p, idx);
720
	pkt->idx = idx;
721
	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
722
	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
723
	pkt->one_reg_wr = 0;
724
	switch (pkt->type) {
725
	case RADEON_PACKET_TYPE0:
726
		if (rdev->family < CHIP_R600) {
727
			pkt->reg = R100_CP_PACKET0_GET_REG(header);
728
			pkt->one_reg_wr =
729
				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
730
		} else
731
			pkt->reg = R600_CP_PACKET0_GET_REG(header);
732
		break;
733
	case RADEON_PACKET_TYPE3:
734
		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
735
		break;
736
	case RADEON_PACKET_TYPE2:
737
		pkt->count = -1;
738
		break;
739
	default:
740
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
741
		return -EINVAL;
742
	}
743
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
744
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
745
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
746
		return -EINVAL;
747
	}
748
	return 0;
749
}
750
 
751
/**
752
 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
753
 * @p:		structure holding the parser context.
754
 *
755
 * Check if the next packet is NOP relocation packet3.
756
 **/
757
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
758
{
759
	struct radeon_cs_packet p3reloc;
760
	int r;
761
 
762
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
763
	if (r)
764
		return false;
765
	if (p3reloc.type != RADEON_PACKET_TYPE3)
766
		return false;
767
	if (p3reloc.opcode != RADEON_PACKET3_NOP)
768
		return false;
769
	return true;
770
}
771
 
772
/**
773
 * radeon_cs_dump_packet() - dump raw packet context
774
 * @p:		structure holding the parser context.
775
 * @pkt:	structure holding the packet.
776
 *
777
 * Used mostly for debugging and error reporting.
778
 **/
779
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
780
			   struct radeon_cs_packet *pkt)
781
{
782
	volatile uint32_t *ib;
783
	unsigned i;
784
	unsigned idx;
785
 
786
	ib = p->ib.ptr;
787
	idx = pkt->idx;
788
	for (i = 0; i <= (pkt->count + 1); i++, idx++)
789
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
790
}
791
 
792
/**
793
 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
794
 * @parser:		parser structure holding parsing context.
795
 * @data:		pointer to relocation data
796
 * @offset_start:	starting offset
797
 * @offset_mask:	offset mask (to align start offset on)
798
 * @reloc:		reloc informations
799
 *
800
 * Check if next packet is relocation packet3, do bo validation and compute
801
 * GPU offset using the provided start.
802
 **/
803
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
5271 serge 804
				struct radeon_bo_list **cs_reloc,
5078 serge 805
				int nomm)
806
{
807
	struct radeon_cs_chunk *relocs_chunk;
808
	struct radeon_cs_packet p3reloc;
809
	unsigned idx;
810
	int r;
811
 
5271 serge 812
	if (p->chunk_relocs == NULL) {
5078 serge 813
		DRM_ERROR("No relocation chunk !\n");
814
		return -EINVAL;
815
	}
816
	*cs_reloc = NULL;
5271 serge 817
	relocs_chunk = p->chunk_relocs;
5078 serge 818
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
819
	if (r)
820
		return r;
821
	p->idx += p3reloc.count + 2;
822
	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
823
	    p3reloc.opcode != RADEON_PACKET3_NOP) {
824
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
825
			  p3reloc.idx);
826
		radeon_cs_dump_packet(p, &p3reloc);
827
		return -EINVAL;
828
	}
829
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
830
	if (idx >= relocs_chunk->length_dw) {
831
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
832
			  idx, relocs_chunk->length_dw);
833
		radeon_cs_dump_packet(p, &p3reloc);
834
		return -EINVAL;
835
	}
836
	/* FIXME: we assume reloc size is 4 dwords */
837
	if (nomm) {
838
		*cs_reloc = p->relocs;
839
		(*cs_reloc)->gpu_offset =
840
			(u64)relocs_chunk->kdata[idx + 3] << 32;
841
		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
842
	} else
5271 serge 843
		*cs_reloc = &p->relocs[(idx / 4)];
5078 serge 844
	return 0;
845
}