Subversion Repositories Kolibri OS

Rev

Rev 5078 | Rev 5346 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5078 serge 1
/*
2
 * Copyright 2008 Jerome Glisse.
3
 * All Rights Reserved.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Jerome Glisse 
26
 */
27
#include 
28
#include 
29
#include 
30
#include "radeon_reg.h"
31
#include "radeon.h"
32
#include "radeon_trace.h"
33
 
34
#define RADEON_CS_MAX_PRIORITY		32u
35
#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36
 
37
static inline unsigned long
38
copy_from_user(void *to, const void __user *from, unsigned long n)
39
{
40
    memcpy(to, from, n);
41
    return n;
42
}
43
 
44
/* This is based on the bucket sort with O(n) time complexity.
45
 * An item with priority "i" is added to bucket[i]. The lists are then
46
 * concatenated in descending order.
47
 */
48
struct radeon_cs_buckets {
49
	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
50
};
51
 
52
static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
53
{
54
	unsigned i;
55
 
56
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
57
		INIT_LIST_HEAD(&b->bucket[i]);
58
}
59
 
60
static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
61
				  struct list_head *item, unsigned priority)
62
{
63
	/* Since buffers which appear sooner in the relocation list are
64
	 * likely to be used more often than buffers which appear later
65
	 * in the list, the sort mustn't change the ordering of buffers
66
	 * with the same priority, i.e. it must be stable.
67
	 */
68
	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
69
}
70
 
71
static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
72
				       struct list_head *out_list)
73
{
74
	unsigned i;
75
 
76
	/* Connect the sorted buckets in the output list. */
77
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
78
		list_splice(&b->bucket[i], out_list);
79
	}
80
}
81
 
82
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
83
{
84
	struct drm_device *ddev = p->rdev->ddev;
85
	struct radeon_cs_chunk *chunk;
86
	struct radeon_cs_buckets buckets;
5271 serge 87
	unsigned i;
88
	bool need_mmap_lock = false;
89
	int r;
5078 serge 90
 
5271 serge 91
	if (p->chunk_relocs == NULL) {
5078 serge 92
		return 0;
93
	}
5271 serge 94
	chunk = p->chunk_relocs;
5078 serge 95
	p->dma_reloc_idx = 0;
96
	/* FIXME: we assume that each relocs use 4 dwords */
97
	p->nrelocs = chunk->length_dw / 4;
5271 serge 98
	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
5078 serge 99
	if (p->relocs == NULL) {
100
		return -ENOMEM;
101
	}
102
 
103
	radeon_cs_buckets_init(&buckets);
104
 
105
	for (i = 0; i < p->nrelocs; i++) {
106
		struct drm_radeon_cs_reloc *r;
5271 serge 107
		struct drm_gem_object *gobj;
5078 serge 108
		unsigned priority;
109
 
110
		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
5271 serge 111
		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
112
		if (gobj == NULL) {
5078 serge 113
			DRM_ERROR("gem object lookup failed 0x%x\n",
114
				  r->handle);
115
			return -ENOENT;
116
		}
5271 serge 117
		p->relocs[i].robj = gem_to_radeon_bo(gobj);
5078 serge 118
 
119
		/* The userspace buffer priorities are from 0 to 15. A higher
120
		 * number means the buffer is more important.
121
		 * Also, the buffers used for write have a higher priority than
122
		 * the buffers used for read only, which doubles the range
123
		 * to 0 to 31. 32 is reserved for the kernel driver.
124
		 */
125
		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
126
			   + !!r->write_domain;
127
 
128
		/* the first reloc of an UVD job is the msg and that must be in
5271 serge 129
		   VRAM, also but everything into VRAM on AGP cards and older
130
		   IGP chips to avoid image corruptions */
5078 serge 131
		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
5271 serge 132
		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
133
		     p->rdev->family == CHIP_RS780 ||
134
		     p->rdev->family == CHIP_RS880)) {
135
 
5078 serge 136
			/* TODO: is this still needed for NI+ ? */
137
			p->relocs[i].prefered_domains =
138
				RADEON_GEM_DOMAIN_VRAM;
139
 
140
			p->relocs[i].allowed_domains =
141
				RADEON_GEM_DOMAIN_VRAM;
142
 
143
			/* prioritize this over any other relocation */
144
			priority = RADEON_CS_MAX_PRIORITY;
145
		} else {
146
			uint32_t domain = r->write_domain ?
147
				r->write_domain : r->read_domains;
148
 
149
			if (domain & RADEON_GEM_DOMAIN_CPU) {
150
				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
151
					  "for command submission\n");
152
				return -EINVAL;
153
			}
154
 
155
			p->relocs[i].prefered_domains = domain;
156
			if (domain == RADEON_GEM_DOMAIN_VRAM)
157
				domain |= RADEON_GEM_DOMAIN_GTT;
158
			p->relocs[i].allowed_domains = domain;
159
		}
5271 serge 160
/*
161
		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
162
			uint32_t domain = p->relocs[i].prefered_domains;
163
			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
164
				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
165
					  "allowed for userptr BOs\n");
166
				return -EINVAL;
167
			}
168
			need_mmap_lock = true;
169
			domain = RADEON_GEM_DOMAIN_GTT;
170
			p->relocs[i].prefered_domains = domain;
171
			p->relocs[i].allowed_domains = domain;
172
		}
173
*/
5078 serge 174
		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
5271 serge 175
		p->relocs[i].tv.shared = !r->write_domain;
5078 serge 176
 
177
		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
178
				      priority);
179
	}
180
 
181
	radeon_cs_buckets_get_list(&buckets, &p->validated);
182
 
183
	if (p->cs_flags & RADEON_CS_USE_VM)
184
		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
185
					      &p->validated);
5271 serge 186
//	if (need_mmap_lock)
187
//		down_read(¤t->mm->mmap_sem);
5078 serge 188
 
5271 serge 189
	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
190
 
191
//	if (need_mmap_lock)
192
//		up_read(¤t->mm->mmap_sem);
193
 
194
	return r;
5078 serge 195
}
196
 
197
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
198
{
199
	p->priority = priority;
200
 
201
	switch (ring) {
202
	default:
203
		DRM_ERROR("unknown ring id: %d\n", ring);
204
		return -EINVAL;
205
	case RADEON_CS_RING_GFX:
206
		p->ring = RADEON_RING_TYPE_GFX_INDEX;
207
		break;
208
	case RADEON_CS_RING_COMPUTE:
209
		if (p->rdev->family >= CHIP_TAHITI) {
210
			if (p->priority > 0)
211
				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
212
			else
213
				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
214
		} else
215
			p->ring = RADEON_RING_TYPE_GFX_INDEX;
216
		break;
217
	case RADEON_CS_RING_DMA:
218
		if (p->rdev->family >= CHIP_CAYMAN) {
219
			if (p->priority > 0)
220
				p->ring = R600_RING_TYPE_DMA_INDEX;
221
			else
222
				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
223
		} else if (p->rdev->family >= CHIP_RV770) {
224
			p->ring = R600_RING_TYPE_DMA_INDEX;
225
		} else {
226
			return -EINVAL;
227
		}
228
		break;
229
	case RADEON_CS_RING_UVD:
230
		p->ring = R600_RING_TYPE_UVD_INDEX;
231
		break;
232
	case RADEON_CS_RING_VCE:
233
		/* TODO: only use the low priority ring for now */
234
		p->ring = TN_RING_TYPE_VCE1_INDEX;
235
		break;
236
	}
237
	return 0;
238
}
239
 
5271 serge 240
static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
5078 serge 241
{
5271 serge 242
	struct radeon_bo_list *reloc;
243
	int r;
5078 serge 244
 
5271 serge 245
	list_for_each_entry(reloc, &p->validated, tv.head) {
246
		struct reservation_object *resv;
5078 serge 247
 
5271 serge 248
		resv = reloc->robj->tbo.resv;
249
		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
250
				     reloc->tv.shared);
251
		if (r)
252
			return r;
5078 serge 253
	}
5271 serge 254
	return 0;
5078 serge 255
}
256
 
257
/* XXX: note that this is called from the legacy UMS CS ioctl as well */
258
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
259
{
260
	struct drm_radeon_cs *cs = data;
261
	uint64_t *chunk_array_ptr;
262
	unsigned size, i;
263
	u32 ring = RADEON_CS_RING_GFX;
264
	s32 priority = 0;
265
 
266
	if (!cs->num_chunks) {
267
		return 0;
268
	}
269
	/* get chunks */
270
	INIT_LIST_HEAD(&p->validated);
271
	p->idx = 0;
272
	p->ib.sa_bo = NULL;
273
	p->const_ib.sa_bo = NULL;
5271 serge 274
	p->chunk_ib = NULL;
275
	p->chunk_relocs = NULL;
276
	p->chunk_flags = NULL;
277
	p->chunk_const_ib = NULL;
5078 serge 278
	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
279
	if (p->chunks_array == NULL) {
280
		return -ENOMEM;
281
	}
282
	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
283
	if (copy_from_user(p->chunks_array, chunk_array_ptr,
284
			       sizeof(uint64_t)*cs->num_chunks)) {
285
		return -EFAULT;
286
	}
287
	p->cs_flags = 0;
288
	p->nchunks = cs->num_chunks;
289
	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
290
	if (p->chunks == NULL) {
291
		return -ENOMEM;
292
	}
293
	for (i = 0; i < p->nchunks; i++) {
294
		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
295
		struct drm_radeon_cs_chunk user_chunk;
296
		uint32_t __user *cdata;
297
 
298
		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
299
		if (copy_from_user(&user_chunk, chunk_ptr,
300
				       sizeof(struct drm_radeon_cs_chunk))) {
301
			return -EFAULT;
302
		}
303
		p->chunks[i].length_dw = user_chunk.length_dw;
5271 serge 304
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
305
			p->chunk_relocs = &p->chunks[i];
5078 serge 306
		}
5271 serge 307
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
308
			p->chunk_ib = &p->chunks[i];
5078 serge 309
			/* zero length IB isn't useful */
310
			if (p->chunks[i].length_dw == 0)
311
				return -EINVAL;
312
		}
5271 serge 313
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
314
			p->chunk_const_ib = &p->chunks[i];
5078 serge 315
			/* zero length CONST IB isn't useful */
316
			if (p->chunks[i].length_dw == 0)
317
				return -EINVAL;
318
		}
5271 serge 319
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
320
			p->chunk_flags = &p->chunks[i];
5078 serge 321
			/* zero length flags aren't useful */
322
			if (p->chunks[i].length_dw == 0)
323
				return -EINVAL;
324
		}
325
 
326
		size = p->chunks[i].length_dw;
327
		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
328
		p->chunks[i].user_ptr = cdata;
5271 serge 329
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
5078 serge 330
			continue;
331
 
5271 serge 332
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
5078 serge 333
			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
334
				continue;
335
		}
336
 
337
		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
338
		size *= sizeof(uint32_t);
339
		if (p->chunks[i].kdata == NULL) {
340
			return -ENOMEM;
341
		}
342
		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
343
			return -EFAULT;
344
		}
5271 serge 345
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
5078 serge 346
			p->cs_flags = p->chunks[i].kdata[0];
347
			if (p->chunks[i].length_dw > 1)
348
				ring = p->chunks[i].kdata[1];
349
			if (p->chunks[i].length_dw > 2)
350
				priority = (s32)p->chunks[i].kdata[2];
351
		}
352
	}
353
 
354
	/* these are KMS only */
355
	if (p->rdev) {
356
		if ((p->cs_flags & RADEON_CS_USE_VM) &&
357
		    !p->rdev->vm_manager.enabled) {
358
			DRM_ERROR("VM not active on asic!\n");
359
			return -EINVAL;
360
		}
361
 
362
		if (radeon_cs_get_ring(p, ring, priority))
363
			return -EINVAL;
364
 
365
		/* we only support VM on some SI+ rings */
366
		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
367
			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
368
				DRM_ERROR("Ring %d requires VM!\n", p->ring);
369
				return -EINVAL;
370
			}
371
		} else {
372
			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
373
				DRM_ERROR("VM not supported on ring %d!\n",
374
					  p->ring);
375
				return -EINVAL;
376
			}
377
		}
378
	}
379
 
380
	return 0;
381
}
382
 
383
static int cmp_size_smaller_first(void *priv, struct list_head *a,
384
				  struct list_head *b)
385
{
5271 serge 386
	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
387
	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
5078 serge 388
 
389
	/* Sort A before B if A is smaller. */
390
	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
391
}
392
 
393
/**
394
 * cs_parser_fini() - clean parser states
395
 * @parser:	parser structure holding parsing context.
396
 * @error:	error number
397
 *
398
 * If error is set than unvalidate buffer, otherwise just free memory
399
 * used by parsing context.
400
 **/
401
static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
402
{
403
	unsigned i;
404
 
405
	if (!error) {
406
		/* Sort the buffer list from the smallest to largest buffer,
407
		 * which affects the order of buffers in the LRU list.
408
		 * This assures that the smallest buffers are added first
409
		 * to the LRU list, so they are likely to be later evicted
410
		 * first, instead of large buffers whose eviction is more
411
		 * expensive.
412
		 *
413
		 * This slightly lowers the number of bytes moved by TTM
414
		 * per frame under memory pressure.
415
		 */
416
		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
417
 
418
		ttm_eu_fence_buffer_objects(&parser->ticket,
419
					    &parser->validated,
5271 serge 420
					    &parser->ib.fence->base);
5078 serge 421
	} else if (backoff) {
422
		ttm_eu_backoff_reservation(&parser->ticket,
423
					   &parser->validated);
424
	}
425
 
426
	if (parser->relocs != NULL) {
427
		for (i = 0; i < parser->nrelocs; i++) {
5271 serge 428
			struct radeon_bo *bo = parser->relocs[i].robj;
429
			if (bo == NULL)
430
				continue;
431
 
432
			drm_gem_object_unreference_unlocked(&bo->gem_base);
5078 serge 433
		}
434
	}
435
	kfree(parser->track);
436
	kfree(parser->relocs);
5271 serge 437
	drm_free_large(parser->vm_bos);
5078 serge 438
	for (i = 0; i < parser->nchunks; i++)
439
		drm_free_large(parser->chunks[i].kdata);
440
	kfree(parser->chunks);
441
	kfree(parser->chunks_array);
442
	radeon_ib_free(parser->rdev, &parser->ib);
443
	radeon_ib_free(parser->rdev, &parser->const_ib);
444
}
445
 
446
static int radeon_cs_ib_chunk(struct radeon_device *rdev,
447
			      struct radeon_cs_parser *parser)
448
{
449
	int r;
450
 
5271 serge 451
	if (parser->chunk_ib == NULL)
5078 serge 452
		return 0;
453
 
454
	if (parser->cs_flags & RADEON_CS_USE_VM)
455
		return 0;
456
 
457
	r = radeon_cs_parse(rdev, parser->ring, parser);
458
	if (r || parser->parser_error) {
459
		DRM_ERROR("Invalid command stream !\n");
460
		return r;
461
	}
462
 
5271 serge 463
	r = radeon_cs_sync_rings(parser);
464
	if (r) {
465
		if (r != -ERESTARTSYS)
466
			DRM_ERROR("Failed to sync rings: %i\n", r);
467
		return r;
468
	}
469
 
5078 serge 470
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
471
		radeon_uvd_note_usage(rdev);
472
	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
473
		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
474
		radeon_vce_note_usage(rdev);
475
 
476
	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
477
	if (r) {
478
		DRM_ERROR("Failed to schedule IB !\n");
479
	}
480
	return r;
481
}
482
 
483
static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
484
				   struct radeon_vm *vm)
485
{
486
	struct radeon_device *rdev = p->rdev;
487
	struct radeon_bo_va *bo_va;
488
	int i, r;
489
 
490
	r = radeon_vm_update_page_directory(rdev, vm);
491
	if (r)
492
		return r;
493
 
494
	r = radeon_vm_clear_freed(rdev, vm);
495
	if (r)
496
		return r;
497
 
498
	if (vm->ib_bo_va == NULL) {
499
		DRM_ERROR("Tmp BO not in VM!\n");
500
		return -EINVAL;
501
	}
502
 
503
	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
504
				&rdev->ring_tmp_bo.bo->tbo.mem);
505
	if (r)
506
		return r;
507
 
508
	for (i = 0; i < p->nrelocs; i++) {
509
		struct radeon_bo *bo;
510
 
511
		bo = p->relocs[i].robj;
512
		bo_va = radeon_vm_bo_find(vm, bo);
513
		if (bo_va == NULL) {
514
			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
515
			return -EINVAL;
516
		}
517
 
518
		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
519
		if (r)
520
			return r;
5271 serge 521
 
522
		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
5078 serge 523
	}
524
 
525
	return radeon_vm_clear_invalids(rdev, vm);
526
}
527
 
528
static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
529
				 struct radeon_cs_parser *parser)
530
{
531
	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
532
	struct radeon_vm *vm = &fpriv->vm;
533
	int r;
534
 
5271 serge 535
	if (parser->chunk_ib == NULL)
5078 serge 536
		return 0;
537
	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
538
		return 0;
539
 
540
	if (parser->const_ib.length_dw) {
541
		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
542
		if (r) {
543
			return r;
544
		}
545
	}
546
 
547
	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
548
	if (r) {
549
		return r;
550
	}
551
 
552
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
553
		radeon_uvd_note_usage(rdev);
554
 
555
	mutex_lock(&vm->mutex);
556
	r = radeon_bo_vm_update_pte(parser, vm);
557
	if (r) {
558
		goto out;
559
	}
560
 
5271 serge 561
	r = radeon_cs_sync_rings(parser);
562
	if (r) {
563
		if (r != -ERESTARTSYS)
564
			DRM_ERROR("Failed to sync rings: %i\n", r);
565
		goto out;
566
	}
567
 
5078 serge 568
	if ((rdev->family >= CHIP_TAHITI) &&
5271 serge 569
	    (parser->chunk_const_ib != NULL)) {
5078 serge 570
		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
571
	} else {
572
		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
573
	}
574
 
575
out:
576
	mutex_unlock(&vm->mutex);
577
	return r;
578
}
579
 
580
static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
581
{
582
	if (r == -EDEADLK) {
583
		r = radeon_gpu_reset(rdev);
584
		if (!r)
585
			r = -EAGAIN;
586
	}
587
	return r;
588
}
589
 
590
static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
591
{
592
	struct radeon_cs_chunk *ib_chunk;
593
	struct radeon_vm *vm = NULL;
594
	int r;
595
 
5271 serge 596
	if (parser->chunk_ib == NULL)
5078 serge 597
		return 0;
598
 
599
	if (parser->cs_flags & RADEON_CS_USE_VM) {
600
		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
601
		vm = &fpriv->vm;
602
 
603
		if ((rdev->family >= CHIP_TAHITI) &&
5271 serge 604
		    (parser->chunk_const_ib != NULL)) {
605
			ib_chunk = parser->chunk_const_ib;
5078 serge 606
			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
607
				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
608
				return -EINVAL;
609
			}
610
			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
611
					   vm, ib_chunk->length_dw * 4);
612
			if (r) {
613
				DRM_ERROR("Failed to get const ib !\n");
614
				return r;
615
			}
616
			parser->const_ib.is_const_ib = true;
617
			parser->const_ib.length_dw = ib_chunk->length_dw;
618
			if (copy_from_user(parser->const_ib.ptr,
619
					       ib_chunk->user_ptr,
620
					       ib_chunk->length_dw * 4))
621
				return -EFAULT;
622
		}
623
 
5271 serge 624
		ib_chunk = parser->chunk_ib;
5078 serge 625
		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
626
			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
627
			return -EINVAL;
628
		}
629
	}
5271 serge 630
	ib_chunk = parser->chunk_ib;
5078 serge 631
 
632
	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
633
			   vm, ib_chunk->length_dw * 4);
634
	if (r) {
635
		DRM_ERROR("Failed to get ib !\n");
636
		return r;
637
	}
638
	parser->ib.length_dw = ib_chunk->length_dw;
639
	if (ib_chunk->kdata)
640
		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
641
	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
642
		return -EFAULT;
643
	return 0;
644
}
645
 
646
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
647
{
648
	struct radeon_device *rdev = dev->dev_private;
649
	struct radeon_cs_parser parser;
650
	int r;
651
 
652
//   down_read(&rdev->exclusive_lock);
653
	if (!rdev->accel_working) {
654
//       up_read(&rdev->exclusive_lock);
655
		return -EBUSY;
656
	}
657
	/* initialize parser */
658
	memset(&parser, 0, sizeof(struct radeon_cs_parser));
659
	parser.filp = filp;
660
	parser.rdev = rdev;
661
	parser.dev = rdev->dev;
662
	parser.family = rdev->family;
663
	r = radeon_cs_parser_init(&parser, data);
664
	if (r) {
665
		DRM_ERROR("Failed to initialize parser !\n");
666
		radeon_cs_parser_fini(&parser, r, false);
667
//       up_read(&rdev->exclusive_lock);
668
		r = radeon_cs_handle_lockup(rdev, r);
669
		return r;
670
	}
671
 
672
	r = radeon_cs_ib_fill(rdev, &parser);
673
	if (!r) {
674
		r = radeon_cs_parser_relocs(&parser);
675
		if (r && r != -ERESTARTSYS)
676
			DRM_ERROR("Failed to parse relocation %d!\n", r);
677
	}
678
 
679
	if (r) {
680
		radeon_cs_parser_fini(&parser, r, false);
681
//       up_read(&rdev->exclusive_lock);
682
		r = radeon_cs_handle_lockup(rdev, r);
683
		return r;
684
	}
685
 
686
	trace_radeon_cs(&parser);
687
 
688
	r = radeon_cs_ib_chunk(rdev, &parser);
689
	if (r) {
690
		goto out;
691
	}
692
	r = radeon_cs_ib_vm_chunk(rdev, &parser);
693
	if (r) {
694
		goto out;
695
	}
696
out:
697
	radeon_cs_parser_fini(&parser, r, true);
698
//   up_read(&rdev->exclusive_lock);
699
	r = radeon_cs_handle_lockup(rdev, r);
700
	return r;
701
}
702
 
703
/**
704
 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
705
 * @parser:	parser structure holding parsing context.
706
 * @pkt:	where to store packet information
707
 *
708
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
709
 * if packet is bigger than remaining ib size. or if packets is unknown.
710
 **/
711
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
712
			   struct radeon_cs_packet *pkt,
713
			   unsigned idx)
714
{
5271 serge 715
	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
5078 serge 716
	struct radeon_device *rdev = p->rdev;
717
	uint32_t header;
718
 
719
	if (idx >= ib_chunk->length_dw) {
720
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
721
			  idx, ib_chunk->length_dw);
722
		return -EINVAL;
723
	}
724
	header = radeon_get_ib_value(p, idx);
725
	pkt->idx = idx;
726
	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
727
	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
728
	pkt->one_reg_wr = 0;
729
	switch (pkt->type) {
730
	case RADEON_PACKET_TYPE0:
731
		if (rdev->family < CHIP_R600) {
732
			pkt->reg = R100_CP_PACKET0_GET_REG(header);
733
			pkt->one_reg_wr =
734
				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
735
		} else
736
			pkt->reg = R600_CP_PACKET0_GET_REG(header);
737
		break;
738
	case RADEON_PACKET_TYPE3:
739
		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
740
		break;
741
	case RADEON_PACKET_TYPE2:
742
		pkt->count = -1;
743
		break;
744
	default:
745
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
746
		return -EINVAL;
747
	}
748
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
749
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
750
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
751
		return -EINVAL;
752
	}
753
	return 0;
754
}
755
 
756
/**
757
 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
758
 * @p:		structure holding the parser context.
759
 *
760
 * Check if the next packet is NOP relocation packet3.
761
 **/
762
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
763
{
764
	struct radeon_cs_packet p3reloc;
765
	int r;
766
 
767
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
768
	if (r)
769
		return false;
770
	if (p3reloc.type != RADEON_PACKET_TYPE3)
771
		return false;
772
	if (p3reloc.opcode != RADEON_PACKET3_NOP)
773
		return false;
774
	return true;
775
}
776
 
777
/**
778
 * radeon_cs_dump_packet() - dump raw packet context
779
 * @p:		structure holding the parser context.
780
 * @pkt:	structure holding the packet.
781
 *
782
 * Used mostly for debugging and error reporting.
783
 **/
784
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
785
			   struct radeon_cs_packet *pkt)
786
{
787
	volatile uint32_t *ib;
788
	unsigned i;
789
	unsigned idx;
790
 
791
	ib = p->ib.ptr;
792
	idx = pkt->idx;
793
	for (i = 0; i <= (pkt->count + 1); i++, idx++)
794
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
795
}
796
 
797
/**
798
 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
799
 * @parser:		parser structure holding parsing context.
800
 * @data:		pointer to relocation data
801
 * @offset_start:	starting offset
802
 * @offset_mask:	offset mask (to align start offset on)
803
 * @reloc:		reloc informations
804
 *
805
 * Check if next packet is relocation packet3, do bo validation and compute
806
 * GPU offset using the provided start.
807
 **/
808
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
5271 serge 809
				struct radeon_bo_list **cs_reloc,
5078 serge 810
				int nomm)
811
{
812
	struct radeon_cs_chunk *relocs_chunk;
813
	struct radeon_cs_packet p3reloc;
814
	unsigned idx;
815
	int r;
816
 
5271 serge 817
	if (p->chunk_relocs == NULL) {
5078 serge 818
		DRM_ERROR("No relocation chunk !\n");
819
		return -EINVAL;
820
	}
821
	*cs_reloc = NULL;
5271 serge 822
	relocs_chunk = p->chunk_relocs;
5078 serge 823
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
824
	if (r)
825
		return r;
826
	p->idx += p3reloc.count + 2;
827
	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
828
	    p3reloc.opcode != RADEON_PACKET3_NOP) {
829
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
830
			  p3reloc.idx);
831
		radeon_cs_dump_packet(p, &p3reloc);
832
		return -EINVAL;
833
	}
834
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
835
	if (idx >= relocs_chunk->length_dw) {
836
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
837
			  idx, relocs_chunk->length_dw);
838
		radeon_cs_dump_packet(p, &p3reloc);
839
		return -EINVAL;
840
	}
841
	/* FIXME: we assume reloc size is 4 dwords */
842
	if (nomm) {
843
		*cs_reloc = p->relocs;
844
		(*cs_reloc)->gpu_offset =
845
			(u64)relocs_chunk->kdata[idx + 3] << 32;
846
		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
847
	} else
5271 serge 848
		*cs_reloc = &p->relocs[(idx / 4)];
5078 serge 849
	return 0;
850
}