Subversion Repositories Kolibri OS

Rev

Rev 5346 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5078 serge 1
/*
2
 * Copyright 2008 Jerome Glisse.
3
 * All Rights Reserved.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Jerome Glisse 
26
 */
27
#include 
28
#include 
29
#include 
30
#include "radeon_reg.h"
31
#include "radeon.h"
32
#include "radeon_trace.h"
33
 
34
#define RADEON_CS_MAX_PRIORITY		32u
35
#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36
 
37
static inline unsigned long
38
copy_from_user(void *to, const void __user *from, unsigned long n)
39
{
40
    memcpy(to, from, n);
41
    return n;
42
}
43
 
44
/* This is based on the bucket sort with O(n) time complexity.
45
 * An item with priority "i" is added to bucket[i]. The lists are then
46
 * concatenated in descending order.
47
 */
48
struct radeon_cs_buckets {
49
	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
50
};
51
 
52
static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
53
{
54
	unsigned i;
55
 
56
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
57
		INIT_LIST_HEAD(&b->bucket[i]);
58
}
59
 
60
static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
61
				  struct list_head *item, unsigned priority)
62
{
63
	/* Since buffers which appear sooner in the relocation list are
64
	 * likely to be used more often than buffers which appear later
65
	 * in the list, the sort mustn't change the ordering of buffers
66
	 * with the same priority, i.e. it must be stable.
67
	 */
68
	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
69
}
70
 
71
static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
72
				       struct list_head *out_list)
73
{
74
	unsigned i;
75
 
76
	/* Connect the sorted buckets in the output list. */
77
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
78
		list_splice(&b->bucket[i], out_list);
79
	}
80
}
81
 
82
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
83
{
84
	struct drm_device *ddev = p->rdev->ddev;
85
	struct radeon_cs_chunk *chunk;
86
	struct radeon_cs_buckets buckets;
5271 serge 87
	unsigned i;
88
	bool need_mmap_lock = false;
89
	int r;
5078 serge 90
 
5271 serge 91
	if (p->chunk_relocs == NULL) {
5078 serge 92
		return 0;
93
	}
5271 serge 94
	chunk = p->chunk_relocs;
5078 serge 95
	p->dma_reloc_idx = 0;
96
	/* FIXME: we assume that each relocs use 4 dwords */
97
	p->nrelocs = chunk->length_dw / 4;
5271 serge 98
	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
5078 serge 99
	if (p->relocs == NULL) {
100
		return -ENOMEM;
101
	}
102
 
103
	radeon_cs_buckets_init(&buckets);
104
 
105
	for (i = 0; i < p->nrelocs; i++) {
106
		struct drm_radeon_cs_reloc *r;
5271 serge 107
		struct drm_gem_object *gobj;
5078 serge 108
		unsigned priority;
109
 
110
		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
5271 serge 111
		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
112
		if (gobj == NULL) {
5078 serge 113
			DRM_ERROR("gem object lookup failed 0x%x\n",
114
				  r->handle);
115
			return -ENOENT;
116
		}
5271 serge 117
		p->relocs[i].robj = gem_to_radeon_bo(gobj);
5078 serge 118
 
119
		/* The userspace buffer priorities are from 0 to 15. A higher
120
		 * number means the buffer is more important.
121
		 * Also, the buffers used for write have a higher priority than
122
		 * the buffers used for read only, which doubles the range
123
		 * to 0 to 31. 32 is reserved for the kernel driver.
124
		 */
125
		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
126
			   + !!r->write_domain;
127
 
128
		/* the first reloc of an UVD job is the msg and that must be in
5271 serge 129
		   VRAM, also but everything into VRAM on AGP cards and older
130
		   IGP chips to avoid image corruptions */
5078 serge 131
		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
5271 serge 132
		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
133
		     p->rdev->family == CHIP_RS780 ||
134
		     p->rdev->family == CHIP_RS880)) {
135
 
5078 serge 136
			/* TODO: is this still needed for NI+ ? */
137
			p->relocs[i].prefered_domains =
138
				RADEON_GEM_DOMAIN_VRAM;
139
 
140
			p->relocs[i].allowed_domains =
141
				RADEON_GEM_DOMAIN_VRAM;
142
 
143
			/* prioritize this over any other relocation */
144
			priority = RADEON_CS_MAX_PRIORITY;
145
		} else {
146
			uint32_t domain = r->write_domain ?
147
				r->write_domain : r->read_domains;
148
 
149
			if (domain & RADEON_GEM_DOMAIN_CPU) {
150
				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
151
					  "for command submission\n");
152
				return -EINVAL;
153
			}
154
 
155
			p->relocs[i].prefered_domains = domain;
156
			if (domain == RADEON_GEM_DOMAIN_VRAM)
157
				domain |= RADEON_GEM_DOMAIN_GTT;
158
			p->relocs[i].allowed_domains = domain;
159
		}
5271 serge 160
/*
161
		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
162
			uint32_t domain = p->relocs[i].prefered_domains;
163
			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
164
				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
165
					  "allowed for userptr BOs\n");
166
				return -EINVAL;
167
			}
168
			need_mmap_lock = true;
169
			domain = RADEON_GEM_DOMAIN_GTT;
170
			p->relocs[i].prefered_domains = domain;
171
			p->relocs[i].allowed_domains = domain;
172
		}
173
*/
5078 serge 174
		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
5271 serge 175
		p->relocs[i].tv.shared = !r->write_domain;
5078 serge 176
 
177
		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
178
				      priority);
179
	}
180
 
181
	radeon_cs_buckets_get_list(&buckets, &p->validated);
182
 
183
	if (p->cs_flags & RADEON_CS_USE_VM)
184
		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
185
					      &p->validated);
186
 
5271 serge 187
	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
188
 
189
	return r;
5078 serge 190
}
191
 
192
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
193
{
194
	p->priority = priority;
195
 
196
	switch (ring) {
197
	default:
198
		DRM_ERROR("unknown ring id: %d\n", ring);
199
		return -EINVAL;
200
	case RADEON_CS_RING_GFX:
201
		p->ring = RADEON_RING_TYPE_GFX_INDEX;
202
		break;
203
	case RADEON_CS_RING_COMPUTE:
204
		if (p->rdev->family >= CHIP_TAHITI) {
205
			if (p->priority > 0)
206
				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
207
			else
208
				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
209
		} else
210
			p->ring = RADEON_RING_TYPE_GFX_INDEX;
211
		break;
212
	case RADEON_CS_RING_DMA:
213
		if (p->rdev->family >= CHIP_CAYMAN) {
214
			if (p->priority > 0)
215
				p->ring = R600_RING_TYPE_DMA_INDEX;
216
			else
217
				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
218
		} else if (p->rdev->family >= CHIP_RV770) {
219
			p->ring = R600_RING_TYPE_DMA_INDEX;
220
		} else {
221
			return -EINVAL;
222
		}
223
		break;
224
	case RADEON_CS_RING_UVD:
225
		p->ring = R600_RING_TYPE_UVD_INDEX;
226
		break;
227
	case RADEON_CS_RING_VCE:
228
		/* TODO: only use the low priority ring for now */
229
		p->ring = TN_RING_TYPE_VCE1_INDEX;
230
		break;
231
	}
232
	return 0;
233
}
234
 
5271 serge 235
static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
5078 serge 236
{
5271 serge 237
	struct radeon_bo_list *reloc;
238
	int r;
5078 serge 239
 
5271 serge 240
	list_for_each_entry(reloc, &p->validated, tv.head) {
241
		struct reservation_object *resv;
5078 serge 242
 
5271 serge 243
		resv = reloc->robj->tbo.resv;
244
		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
245
				     reloc->tv.shared);
246
		if (r)
247
			return r;
5078 serge 248
	}
5271 serge 249
	return 0;
5078 serge 250
}
251
 
252
/* XXX: note that this is called from the legacy UMS CS ioctl as well */
253
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
254
{
255
	struct drm_radeon_cs *cs = data;
256
	uint64_t *chunk_array_ptr;
257
	unsigned size, i;
258
	u32 ring = RADEON_CS_RING_GFX;
259
	s32 priority = 0;
260
 
6104 serge 261
	INIT_LIST_HEAD(&p->validated);
262
 
5078 serge 263
	if (!cs->num_chunks) {
264
		return 0;
265
	}
6104 serge 266
 
5078 serge 267
	/* get chunks */
268
	p->idx = 0;
269
	p->ib.sa_bo = NULL;
270
	p->const_ib.sa_bo = NULL;
5271 serge 271
	p->chunk_ib = NULL;
272
	p->chunk_relocs = NULL;
273
	p->chunk_flags = NULL;
274
	p->chunk_const_ib = NULL;
5078 serge 275
	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
276
	if (p->chunks_array == NULL) {
277
		return -ENOMEM;
278
	}
279
	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
280
	if (copy_from_user(p->chunks_array, chunk_array_ptr,
281
			       sizeof(uint64_t)*cs->num_chunks)) {
282
		return -EFAULT;
283
	}
284
	p->cs_flags = 0;
285
	p->nchunks = cs->num_chunks;
286
	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
287
	if (p->chunks == NULL) {
288
		return -ENOMEM;
289
	}
290
	for (i = 0; i < p->nchunks; i++) {
291
		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
292
		struct drm_radeon_cs_chunk user_chunk;
293
		uint32_t __user *cdata;
294
 
295
		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
296
		if (copy_from_user(&user_chunk, chunk_ptr,
297
				       sizeof(struct drm_radeon_cs_chunk))) {
298
			return -EFAULT;
299
		}
300
		p->chunks[i].length_dw = user_chunk.length_dw;
5271 serge 301
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
302
			p->chunk_relocs = &p->chunks[i];
5078 serge 303
		}
5271 serge 304
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
305
			p->chunk_ib = &p->chunks[i];
5078 serge 306
			/* zero length IB isn't useful */
307
			if (p->chunks[i].length_dw == 0)
308
				return -EINVAL;
309
		}
5271 serge 310
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
311
			p->chunk_const_ib = &p->chunks[i];
5078 serge 312
			/* zero length CONST IB isn't useful */
313
			if (p->chunks[i].length_dw == 0)
314
				return -EINVAL;
315
		}
5271 serge 316
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
317
			p->chunk_flags = &p->chunks[i];
5078 serge 318
			/* zero length flags aren't useful */
319
			if (p->chunks[i].length_dw == 0)
320
				return -EINVAL;
321
		}
322
 
323
		size = p->chunks[i].length_dw;
324
		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
325
		p->chunks[i].user_ptr = cdata;
5271 serge 326
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
5078 serge 327
			continue;
328
 
5271 serge 329
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
5078 serge 330
			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
331
				continue;
332
		}
333
 
334
		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
335
		size *= sizeof(uint32_t);
336
		if (p->chunks[i].kdata == NULL) {
337
			return -ENOMEM;
338
		}
339
		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
340
			return -EFAULT;
341
		}
5271 serge 342
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
5078 serge 343
			p->cs_flags = p->chunks[i].kdata[0];
344
			if (p->chunks[i].length_dw > 1)
345
				ring = p->chunks[i].kdata[1];
346
			if (p->chunks[i].length_dw > 2)
347
				priority = (s32)p->chunks[i].kdata[2];
348
		}
349
	}
350
 
351
	/* these are KMS only */
352
	if (p->rdev) {
353
		if ((p->cs_flags & RADEON_CS_USE_VM) &&
354
		    !p->rdev->vm_manager.enabled) {
355
			DRM_ERROR("VM not active on asic!\n");
356
			return -EINVAL;
357
		}
358
 
359
		if (radeon_cs_get_ring(p, ring, priority))
360
			return -EINVAL;
361
 
362
		/* we only support VM on some SI+ rings */
363
		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
364
			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
365
				DRM_ERROR("Ring %d requires VM!\n", p->ring);
366
				return -EINVAL;
367
			}
368
		} else {
369
			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
370
				DRM_ERROR("VM not supported on ring %d!\n",
371
					  p->ring);
372
				return -EINVAL;
373
			}
374
		}
375
	}
376
 
377
	return 0;
378
}
379
 
380
static int cmp_size_smaller_first(void *priv, struct list_head *a,
381
				  struct list_head *b)
382
{
5271 serge 383
	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
384
	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
5078 serge 385
 
386
	/* Sort A before B if A is smaller. */
387
	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
388
}
389
 
390
/**
391
 * cs_parser_fini() - clean parser states
392
 * @parser:	parser structure holding parsing context.
393
 * @error:	error number
394
 *
395
 * If error is set than unvalidate buffer, otherwise just free memory
396
 * used by parsing context.
397
 **/
398
static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
399
{
400
	unsigned i;
401
 
402
	if (!error) {
403
		/* Sort the buffer list from the smallest to largest buffer,
404
		 * which affects the order of buffers in the LRU list.
405
		 * This assures that the smallest buffers are added first
406
		 * to the LRU list, so they are likely to be later evicted
407
		 * first, instead of large buffers whose eviction is more
408
		 * expensive.
409
		 *
410
		 * This slightly lowers the number of bytes moved by TTM
411
		 * per frame under memory pressure.
412
		 */
413
		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
414
 
415
		ttm_eu_fence_buffer_objects(&parser->ticket,
416
					    &parser->validated,
5271 serge 417
					    &parser->ib.fence->base);
5078 serge 418
	} else if (backoff) {
419
		ttm_eu_backoff_reservation(&parser->ticket,
420
					   &parser->validated);
421
	}
422
 
423
	if (parser->relocs != NULL) {
424
		for (i = 0; i < parser->nrelocs; i++) {
5271 serge 425
			struct radeon_bo *bo = parser->relocs[i].robj;
426
			if (bo == NULL)
427
				continue;
428
 
429
			drm_gem_object_unreference_unlocked(&bo->gem_base);
5078 serge 430
		}
431
	}
432
	kfree(parser->track);
433
	kfree(parser->relocs);
5271 serge 434
	drm_free_large(parser->vm_bos);
5078 serge 435
	for (i = 0; i < parser->nchunks; i++)
436
		drm_free_large(parser->chunks[i].kdata);
437
	kfree(parser->chunks);
438
	kfree(parser->chunks_array);
439
	radeon_ib_free(parser->rdev, &parser->ib);
440
	radeon_ib_free(parser->rdev, &parser->const_ib);
441
}
442
 
443
static int radeon_cs_ib_chunk(struct radeon_device *rdev,
444
			      struct radeon_cs_parser *parser)
445
{
446
	int r;
447
 
5271 serge 448
	if (parser->chunk_ib == NULL)
5078 serge 449
		return 0;
450
 
451
	if (parser->cs_flags & RADEON_CS_USE_VM)
452
		return 0;
453
 
454
	r = radeon_cs_parse(rdev, parser->ring, parser);
455
	if (r || parser->parser_error) {
456
		DRM_ERROR("Invalid command stream !\n");
457
		return r;
458
	}
459
 
5271 serge 460
	r = radeon_cs_sync_rings(parser);
461
	if (r) {
462
		if (r != -ERESTARTSYS)
463
			DRM_ERROR("Failed to sync rings: %i\n", r);
464
		return r;
465
	}
466
 
5078 serge 467
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
468
		radeon_uvd_note_usage(rdev);
469
	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
470
		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
471
		radeon_vce_note_usage(rdev);
472
 
473
	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
474
	if (r) {
475
		DRM_ERROR("Failed to schedule IB !\n");
476
	}
477
	return r;
478
}
479
 
480
static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
481
				   struct radeon_vm *vm)
482
{
483
	struct radeon_device *rdev = p->rdev;
484
	struct radeon_bo_va *bo_va;
485
	int i, r;
486
 
487
	r = radeon_vm_update_page_directory(rdev, vm);
488
	if (r)
489
		return r;
490
 
491
	r = radeon_vm_clear_freed(rdev, vm);
492
	if (r)
493
		return r;
494
 
495
	if (vm->ib_bo_va == NULL) {
496
		DRM_ERROR("Tmp BO not in VM!\n");
497
		return -EINVAL;
498
	}
499
 
500
	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
501
				&rdev->ring_tmp_bo.bo->tbo.mem);
502
	if (r)
503
		return r;
504
 
505
	for (i = 0; i < p->nrelocs; i++) {
506
		struct radeon_bo *bo;
507
 
508
		bo = p->relocs[i].robj;
509
		bo_va = radeon_vm_bo_find(vm, bo);
510
		if (bo_va == NULL) {
511
			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
512
			return -EINVAL;
513
		}
514
 
515
		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
516
		if (r)
517
			return r;
5271 serge 518
 
519
		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
5078 serge 520
	}
521
 
522
	return radeon_vm_clear_invalids(rdev, vm);
523
}
524
 
525
static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
526
				 struct radeon_cs_parser *parser)
527
{
528
	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
529
	struct radeon_vm *vm = &fpriv->vm;
530
	int r;
531
 
5271 serge 532
	if (parser->chunk_ib == NULL)
5078 serge 533
		return 0;
534
	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
535
		return 0;
536
 
537
	if (parser->const_ib.length_dw) {
538
		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
539
		if (r) {
540
			return r;
541
		}
542
	}
543
 
544
	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
545
	if (r) {
546
		return r;
547
	}
548
 
549
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
550
		radeon_uvd_note_usage(rdev);
551
 
552
	mutex_lock(&vm->mutex);
553
	r = radeon_bo_vm_update_pte(parser, vm);
554
	if (r) {
555
		goto out;
556
	}
557
 
5271 serge 558
	r = radeon_cs_sync_rings(parser);
559
	if (r) {
560
		if (r != -ERESTARTSYS)
561
			DRM_ERROR("Failed to sync rings: %i\n", r);
562
		goto out;
563
	}
564
 
5078 serge 565
	if ((rdev->family >= CHIP_TAHITI) &&
5271 serge 566
	    (parser->chunk_const_ib != NULL)) {
5078 serge 567
		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
568
	} else {
569
		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
570
	}
571
 
572
out:
573
	mutex_unlock(&vm->mutex);
574
	return r;
575
}
576
 
577
static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
578
{
579
	if (r == -EDEADLK) {
580
		r = radeon_gpu_reset(rdev);
581
		if (!r)
582
			r = -EAGAIN;
583
	}
584
	return r;
585
}
586
 
587
static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
588
{
589
	struct radeon_cs_chunk *ib_chunk;
590
	struct radeon_vm *vm = NULL;
591
	int r;
592
 
5271 serge 593
	if (parser->chunk_ib == NULL)
5078 serge 594
		return 0;
595
 
596
	if (parser->cs_flags & RADEON_CS_USE_VM) {
597
		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
598
		vm = &fpriv->vm;
599
 
600
		if ((rdev->family >= CHIP_TAHITI) &&
5271 serge 601
		    (parser->chunk_const_ib != NULL)) {
602
			ib_chunk = parser->chunk_const_ib;
5078 serge 603
			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
604
				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
605
				return -EINVAL;
606
			}
607
			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
608
					   vm, ib_chunk->length_dw * 4);
609
			if (r) {
610
				DRM_ERROR("Failed to get const ib !\n");
611
				return r;
612
			}
613
			parser->const_ib.is_const_ib = true;
614
			parser->const_ib.length_dw = ib_chunk->length_dw;
615
			if (copy_from_user(parser->const_ib.ptr,
616
					       ib_chunk->user_ptr,
617
					       ib_chunk->length_dw * 4))
618
				return -EFAULT;
619
		}
620
 
5271 serge 621
		ib_chunk = parser->chunk_ib;
5078 serge 622
		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
623
			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
624
			return -EINVAL;
625
		}
626
	}
5271 serge 627
	ib_chunk = parser->chunk_ib;
5078 serge 628
 
629
	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
630
			   vm, ib_chunk->length_dw * 4);
631
	if (r) {
632
		DRM_ERROR("Failed to get ib !\n");
633
		return r;
634
	}
635
	parser->ib.length_dw = ib_chunk->length_dw;
636
	if (ib_chunk->kdata)
637
		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
638
	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
639
		return -EFAULT;
640
	return 0;
641
}
642
 
643
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
644
{
645
	struct radeon_device *rdev = dev->dev_private;
646
	struct radeon_cs_parser parser;
647
	int r;
648
 
5346 serge 649
	down_read(&rdev->exclusive_lock);
5078 serge 650
	if (!rdev->accel_working) {
5346 serge 651
		up_read(&rdev->exclusive_lock);
5078 serge 652
		return -EBUSY;
653
	}
654
	/* initialize parser */
655
	memset(&parser, 0, sizeof(struct radeon_cs_parser));
656
	parser.filp = filp;
657
	parser.rdev = rdev;
658
	parser.dev = rdev->dev;
659
	parser.family = rdev->family;
660
	r = radeon_cs_parser_init(&parser, data);
661
	if (r) {
662
		DRM_ERROR("Failed to initialize parser !\n");
663
		radeon_cs_parser_fini(&parser, r, false);
5346 serge 664
		up_read(&rdev->exclusive_lock);
5078 serge 665
		r = radeon_cs_handle_lockup(rdev, r);
666
		return r;
667
	}
668
 
669
	r = radeon_cs_ib_fill(rdev, &parser);
670
	if (!r) {
671
		r = radeon_cs_parser_relocs(&parser);
672
		if (r && r != -ERESTARTSYS)
673
			DRM_ERROR("Failed to parse relocation %d!\n", r);
674
	}
675
 
676
	if (r) {
677
		radeon_cs_parser_fini(&parser, r, false);
5346 serge 678
		up_read(&rdev->exclusive_lock);
5078 serge 679
		r = radeon_cs_handle_lockup(rdev, r);
680
		return r;
681
	}
682
 
683
	trace_radeon_cs(&parser);
684
 
685
	r = radeon_cs_ib_chunk(rdev, &parser);
686
	if (r) {
687
		goto out;
688
	}
689
	r = radeon_cs_ib_vm_chunk(rdev, &parser);
690
	if (r) {
691
		goto out;
692
	}
693
out:
694
	radeon_cs_parser_fini(&parser, r, true);
5346 serge 695
	up_read(&rdev->exclusive_lock);
5078 serge 696
	r = radeon_cs_handle_lockup(rdev, r);
697
	return r;
698
}
699
 
700
/**
701
 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
702
 * @parser:	parser structure holding parsing context.
703
 * @pkt:	where to store packet information
704
 *
705
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
706
 * if packet is bigger than remaining ib size. or if packets is unknown.
707
 **/
708
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
709
			   struct radeon_cs_packet *pkt,
710
			   unsigned idx)
711
{
5271 serge 712
	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
5078 serge 713
	struct radeon_device *rdev = p->rdev;
714
	uint32_t header;
6104 serge 715
	int ret = 0, i;
5078 serge 716
 
717
	if (idx >= ib_chunk->length_dw) {
718
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
719
			  idx, ib_chunk->length_dw);
720
		return -EINVAL;
721
	}
722
	header = radeon_get_ib_value(p, idx);
723
	pkt->idx = idx;
724
	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
725
	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
726
	pkt->one_reg_wr = 0;
727
	switch (pkt->type) {
728
	case RADEON_PACKET_TYPE0:
729
		if (rdev->family < CHIP_R600) {
730
			pkt->reg = R100_CP_PACKET0_GET_REG(header);
731
			pkt->one_reg_wr =
732
				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
733
		} else
734
			pkt->reg = R600_CP_PACKET0_GET_REG(header);
735
		break;
736
	case RADEON_PACKET_TYPE3:
737
		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
738
		break;
739
	case RADEON_PACKET_TYPE2:
740
		pkt->count = -1;
741
		break;
742
	default:
743
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
6104 serge 744
		ret = -EINVAL;
745
		goto dump_ib;
5078 serge 746
	}
747
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
748
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
749
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
6104 serge 750
		ret = -EINVAL;
751
		goto dump_ib;
5078 serge 752
	}
753
	return 0;
6104 serge 754
 
755
dump_ib:
756
	for (i = 0; i < ib_chunk->length_dw; i++) {
757
		if (i == idx)
758
			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
759
		else
760
			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
761
	}
762
	return ret;
5078 serge 763
}
764
 
765
/**
766
 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
767
 * @p:		structure holding the parser context.
768
 *
769
 * Check if the next packet is NOP relocation packet3.
770
 **/
771
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
772
{
773
	struct radeon_cs_packet p3reloc;
774
	int r;
775
 
776
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
777
	if (r)
778
		return false;
779
	if (p3reloc.type != RADEON_PACKET_TYPE3)
780
		return false;
781
	if (p3reloc.opcode != RADEON_PACKET3_NOP)
782
		return false;
783
	return true;
784
}
785
 
786
/**
787
 * radeon_cs_dump_packet() - dump raw packet context
788
 * @p:		structure holding the parser context.
789
 * @pkt:	structure holding the packet.
790
 *
791
 * Used mostly for debugging and error reporting.
792
 **/
793
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
794
			   struct radeon_cs_packet *pkt)
795
{
796
	volatile uint32_t *ib;
797
	unsigned i;
798
	unsigned idx;
799
 
800
	ib = p->ib.ptr;
801
	idx = pkt->idx;
802
	for (i = 0; i <= (pkt->count + 1); i++, idx++)
803
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
804
}
805
 
806
/**
807
 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
808
 * @parser:		parser structure holding parsing context.
809
 * @data:		pointer to relocation data
810
 * @offset_start:	starting offset
811
 * @offset_mask:	offset mask (to align start offset on)
812
 * @reloc:		reloc informations
813
 *
814
 * Check if next packet is relocation packet3, do bo validation and compute
815
 * GPU offset using the provided start.
816
 **/
817
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
5271 serge 818
				struct radeon_bo_list **cs_reloc,
5078 serge 819
				int nomm)
820
{
821
	struct radeon_cs_chunk *relocs_chunk;
822
	struct radeon_cs_packet p3reloc;
823
	unsigned idx;
824
	int r;
825
 
5271 serge 826
	if (p->chunk_relocs == NULL) {
5078 serge 827
		DRM_ERROR("No relocation chunk !\n");
828
		return -EINVAL;
829
	}
830
	*cs_reloc = NULL;
5271 serge 831
	relocs_chunk = p->chunk_relocs;
5078 serge 832
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
833
	if (r)
834
		return r;
835
	p->idx += p3reloc.count + 2;
836
	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
837
	    p3reloc.opcode != RADEON_PACKET3_NOP) {
838
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
839
			  p3reloc.idx);
840
		radeon_cs_dump_packet(p, &p3reloc);
841
		return -EINVAL;
842
	}
843
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
844
	if (idx >= relocs_chunk->length_dw) {
845
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
846
			  idx, relocs_chunk->length_dw);
847
		radeon_cs_dump_packet(p, &p3reloc);
848
		return -EINVAL;
849
	}
850
	/* FIXME: we assume reloc size is 4 dwords */
851
	if (nomm) {
852
		*cs_reloc = p->relocs;
853
		(*cs_reloc)->gpu_offset =
854
			(u64)relocs_chunk->kdata[idx + 3] << 32;
855
		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
856
	} else
5271 serge 857
		*cs_reloc = &p->relocs[(idx / 4)];
5078 serge 858
	return 0;
859
}