Subversion Repositories Kolibri OS

Rev

Rev 6104 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 6104 Rev 6321
1
/*
1
/*
2
 * Copyright 2008 Jerome Glisse.
2
 * Copyright 2008 Jerome Glisse.
3
 * All Rights Reserved.
3
 * All Rights Reserved.
4
 *
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
11
 *
12
 * The above copyright notice and this permission notice (including the next
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
14
 * Software.
15
 *
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
22
 * DEALINGS IN THE SOFTWARE.
23
 *
23
 *
24
 * Authors:
24
 * Authors:
25
 *    Jerome Glisse 
25
 *    Jerome Glisse 
26
 */
26
 */
27
#include 
27
#include 
28
#include 
28
#include 
29
#include 
29
#include 
30
#include "radeon_reg.h"
30
#include "radeon_reg.h"
31
#include "radeon.h"
31
#include "radeon.h"
32
#include "radeon_trace.h"
32
#include "radeon_trace.h"
33
 
33
 
34
#define RADEON_CS_MAX_PRIORITY		32u
34
#define RADEON_CS_MAX_PRIORITY		32u
35
#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
35
#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36
 
-
 
37
static inline unsigned long
-
 
38
copy_from_user(void *to, const void __user *from, unsigned long n)
-
 
39
{
-
 
40
    memcpy(to, from, n);
-
 
41
    return n;
-
 
42
}
-
 
43
 
36
 
44
/* This is based on the bucket sort with O(n) time complexity.
37
/* This is based on the bucket sort with O(n) time complexity.
45
 * An item with priority "i" is added to bucket[i]. The lists are then
38
 * An item with priority "i" is added to bucket[i]. The lists are then
46
 * concatenated in descending order.
39
 * concatenated in descending order.
47
 */
40
 */
48
struct radeon_cs_buckets {
41
struct radeon_cs_buckets {
49
	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
42
	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
50
};
43
};
51
 
44
 
52
static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
45
static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
53
{
46
{
54
	unsigned i;
47
	unsigned i;
55
 
48
 
56
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
49
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
57
		INIT_LIST_HEAD(&b->bucket[i]);
50
		INIT_LIST_HEAD(&b->bucket[i]);
58
}
51
}
59
 
52
 
60
static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
53
static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
61
				  struct list_head *item, unsigned priority)
54
				  struct list_head *item, unsigned priority)
62
{
55
{
63
	/* Since buffers which appear sooner in the relocation list are
56
	/* Since buffers which appear sooner in the relocation list are
64
	 * likely to be used more often than buffers which appear later
57
	 * likely to be used more often than buffers which appear later
65
	 * in the list, the sort mustn't change the ordering of buffers
58
	 * in the list, the sort mustn't change the ordering of buffers
66
	 * with the same priority, i.e. it must be stable.
59
	 * with the same priority, i.e. it must be stable.
67
	 */
60
	 */
68
	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
61
	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
69
}
62
}
70
 
63
 
71
static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
64
static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
72
				       struct list_head *out_list)
65
				       struct list_head *out_list)
73
{
66
{
74
	unsigned i;
67
	unsigned i;
75
 
68
 
76
	/* Connect the sorted buckets in the output list. */
69
	/* Connect the sorted buckets in the output list. */
77
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
70
	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
78
		list_splice(&b->bucket[i], out_list);
71
		list_splice(&b->bucket[i], out_list);
79
	}
72
	}
80
}
73
}
81
 
74
 
82
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
75
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
83
{
76
{
84
	struct drm_device *ddev = p->rdev->ddev;
77
	struct drm_device *ddev = p->rdev->ddev;
85
	struct radeon_cs_chunk *chunk;
78
	struct radeon_cs_chunk *chunk;
86
	struct radeon_cs_buckets buckets;
79
	struct radeon_cs_buckets buckets;
87
	unsigned i;
80
	unsigned i;
88
	bool need_mmap_lock = false;
81
	bool need_mmap_lock = false;
89
	int r;
82
	int r;
90
 
83
 
91
	if (p->chunk_relocs == NULL) {
84
	if (p->chunk_relocs == NULL) {
92
		return 0;
85
		return 0;
93
	}
86
	}
94
	chunk = p->chunk_relocs;
87
	chunk = p->chunk_relocs;
95
	p->dma_reloc_idx = 0;
88
	p->dma_reloc_idx = 0;
96
	/* FIXME: we assume that each relocs use 4 dwords */
89
	/* FIXME: we assume that each relocs use 4 dwords */
97
	p->nrelocs = chunk->length_dw / 4;
90
	p->nrelocs = chunk->length_dw / 4;
98
	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
91
	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
99
	if (p->relocs == NULL) {
92
	if (p->relocs == NULL) {
100
		return -ENOMEM;
93
		return -ENOMEM;
101
	}
94
	}
102
 
95
 
103
	radeon_cs_buckets_init(&buckets);
96
	radeon_cs_buckets_init(&buckets);
104
 
97
 
105
	for (i = 0; i < p->nrelocs; i++) {
98
	for (i = 0; i < p->nrelocs; i++) {
106
		struct drm_radeon_cs_reloc *r;
99
		struct drm_radeon_cs_reloc *r;
107
		struct drm_gem_object *gobj;
100
		struct drm_gem_object *gobj;
108
		unsigned priority;
101
		unsigned priority;
109
 
102
 
110
		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
103
		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
111
		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
104
		gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
112
		if (gobj == NULL) {
105
		if (gobj == NULL) {
113
			DRM_ERROR("gem object lookup failed 0x%x\n",
106
			DRM_ERROR("gem object lookup failed 0x%x\n",
114
				  r->handle);
107
				  r->handle);
115
			return -ENOENT;
108
			return -ENOENT;
116
		}
109
		}
117
		p->relocs[i].robj = gem_to_radeon_bo(gobj);
110
		p->relocs[i].robj = gem_to_radeon_bo(gobj);
118
 
111
 
119
		/* The userspace buffer priorities are from 0 to 15. A higher
112
		/* The userspace buffer priorities are from 0 to 15. A higher
120
		 * number means the buffer is more important.
113
		 * number means the buffer is more important.
121
		 * Also, the buffers used for write have a higher priority than
114
		 * Also, the buffers used for write have a higher priority than
122
		 * the buffers used for read only, which doubles the range
115
		 * the buffers used for read only, which doubles the range
123
		 * to 0 to 31. 32 is reserved for the kernel driver.
116
		 * to 0 to 31. 32 is reserved for the kernel driver.
124
		 */
117
		 */
125
		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
118
		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
126
			   + !!r->write_domain;
119
			   + !!r->write_domain;
127
 
120
 
128
		/* the first reloc of an UVD job is the msg and that must be in
121
		/* the first reloc of an UVD job is the msg and that must be in
129
		   VRAM, also but everything into VRAM on AGP cards and older
122
		   VRAM, also but everything into VRAM on AGP cards and older
130
		   IGP chips to avoid image corruptions */
123
		   IGP chips to avoid image corruptions */
131
		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
124
		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
132
		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
125
		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
133
		     p->rdev->family == CHIP_RS780 ||
126
		     p->rdev->family == CHIP_RS780 ||
134
		     p->rdev->family == CHIP_RS880)) {
127
		     p->rdev->family == CHIP_RS880)) {
135
 
128
 
136
			/* TODO: is this still needed for NI+ ? */
129
			/* TODO: is this still needed for NI+ ? */
137
			p->relocs[i].prefered_domains =
130
			p->relocs[i].prefered_domains =
138
				RADEON_GEM_DOMAIN_VRAM;
131
				RADEON_GEM_DOMAIN_VRAM;
139
 
132
 
140
			p->relocs[i].allowed_domains =
133
			p->relocs[i].allowed_domains =
141
				RADEON_GEM_DOMAIN_VRAM;
134
				RADEON_GEM_DOMAIN_VRAM;
142
 
135
 
143
			/* prioritize this over any other relocation */
136
			/* prioritize this over any other relocation */
144
			priority = RADEON_CS_MAX_PRIORITY;
137
			priority = RADEON_CS_MAX_PRIORITY;
145
		} else {
138
		} else {
146
			uint32_t domain = r->write_domain ?
139
			uint32_t domain = r->write_domain ?
147
				r->write_domain : r->read_domains;
140
				r->write_domain : r->read_domains;
148
 
141
 
149
			if (domain & RADEON_GEM_DOMAIN_CPU) {
142
			if (domain & RADEON_GEM_DOMAIN_CPU) {
150
				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
143
				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
151
					  "for command submission\n");
144
					  "for command submission\n");
152
				return -EINVAL;
145
				return -EINVAL;
153
			}
146
			}
154
 
147
 
155
			p->relocs[i].prefered_domains = domain;
148
			p->relocs[i].prefered_domains = domain;
156
			if (domain == RADEON_GEM_DOMAIN_VRAM)
149
			if (domain == RADEON_GEM_DOMAIN_VRAM)
157
				domain |= RADEON_GEM_DOMAIN_GTT;
150
				domain |= RADEON_GEM_DOMAIN_GTT;
158
			p->relocs[i].allowed_domains = domain;
151
			p->relocs[i].allowed_domains = domain;
159
		}
152
		}
160
/*
153
/*
161
		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
154
		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
162
			uint32_t domain = p->relocs[i].prefered_domains;
155
			uint32_t domain = p->relocs[i].prefered_domains;
163
			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
156
			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
164
				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
157
				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
165
					  "allowed for userptr BOs\n");
158
					  "allowed for userptr BOs\n");
166
				return -EINVAL;
159
				return -EINVAL;
167
			}
160
			}
168
			need_mmap_lock = true;
161
			need_mmap_lock = true;
169
			domain = RADEON_GEM_DOMAIN_GTT;
162
			domain = RADEON_GEM_DOMAIN_GTT;
170
			p->relocs[i].prefered_domains = domain;
163
			p->relocs[i].prefered_domains = domain;
171
			p->relocs[i].allowed_domains = domain;
164
			p->relocs[i].allowed_domains = domain;
172
		}
165
		}
173
*/
166
*/
174
		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
167
		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
175
		p->relocs[i].tv.shared = !r->write_domain;
168
		p->relocs[i].tv.shared = !r->write_domain;
176
 
169
 
177
		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
170
		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
178
				      priority);
171
				      priority);
179
	}
172
	}
180
 
173
 
181
	radeon_cs_buckets_get_list(&buckets, &p->validated);
174
	radeon_cs_buckets_get_list(&buckets, &p->validated);
182
 
175
 
183
	if (p->cs_flags & RADEON_CS_USE_VM)
176
	if (p->cs_flags & RADEON_CS_USE_VM)
184
		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
177
		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
185
					      &p->validated);
178
					      &p->validated);
186
 
179
 
187
	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
180
	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
188
 
181
 
189
	return r;
182
	return r;
190
}
183
}
191
 
184
 
192
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
185
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
193
{
186
{
194
	p->priority = priority;
187
	p->priority = priority;
195
 
188
 
196
	switch (ring) {
189
	switch (ring) {
197
	default:
190
	default:
198
		DRM_ERROR("unknown ring id: %d\n", ring);
191
		DRM_ERROR("unknown ring id: %d\n", ring);
199
		return -EINVAL;
192
		return -EINVAL;
200
	case RADEON_CS_RING_GFX:
193
	case RADEON_CS_RING_GFX:
201
		p->ring = RADEON_RING_TYPE_GFX_INDEX;
194
		p->ring = RADEON_RING_TYPE_GFX_INDEX;
202
		break;
195
		break;
203
	case RADEON_CS_RING_COMPUTE:
196
	case RADEON_CS_RING_COMPUTE:
204
		if (p->rdev->family >= CHIP_TAHITI) {
197
		if (p->rdev->family >= CHIP_TAHITI) {
205
			if (p->priority > 0)
198
			if (p->priority > 0)
206
				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
199
				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
207
			else
200
			else
208
				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
201
				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
209
		} else
202
		} else
210
			p->ring = RADEON_RING_TYPE_GFX_INDEX;
203
			p->ring = RADEON_RING_TYPE_GFX_INDEX;
211
		break;
204
		break;
212
	case RADEON_CS_RING_DMA:
205
	case RADEON_CS_RING_DMA:
213
		if (p->rdev->family >= CHIP_CAYMAN) {
206
		if (p->rdev->family >= CHIP_CAYMAN) {
214
			if (p->priority > 0)
207
			if (p->priority > 0)
215
				p->ring = R600_RING_TYPE_DMA_INDEX;
208
				p->ring = R600_RING_TYPE_DMA_INDEX;
216
			else
209
			else
217
				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
210
				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
218
		} else if (p->rdev->family >= CHIP_RV770) {
211
		} else if (p->rdev->family >= CHIP_RV770) {
219
			p->ring = R600_RING_TYPE_DMA_INDEX;
212
			p->ring = R600_RING_TYPE_DMA_INDEX;
220
		} else {
213
		} else {
221
			return -EINVAL;
214
			return -EINVAL;
222
		}
215
		}
223
		break;
216
		break;
224
	case RADEON_CS_RING_UVD:
217
	case RADEON_CS_RING_UVD:
225
		p->ring = R600_RING_TYPE_UVD_INDEX;
218
		p->ring = R600_RING_TYPE_UVD_INDEX;
226
		break;
219
		break;
227
	case RADEON_CS_RING_VCE:
220
	case RADEON_CS_RING_VCE:
228
		/* TODO: only use the low priority ring for now */
221
		/* TODO: only use the low priority ring for now */
229
		p->ring = TN_RING_TYPE_VCE1_INDEX;
222
		p->ring = TN_RING_TYPE_VCE1_INDEX;
230
		break;
223
		break;
231
	}
224
	}
232
	return 0;
225
	return 0;
233
}
226
}
234
 
227
 
235
static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
228
static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
236
{
229
{
237
	struct radeon_bo_list *reloc;
230
	struct radeon_bo_list *reloc;
238
	int r;
231
	int r;
239
 
232
 
240
	list_for_each_entry(reloc, &p->validated, tv.head) {
233
	list_for_each_entry(reloc, &p->validated, tv.head) {
241
		struct reservation_object *resv;
234
		struct reservation_object *resv;
242
 
235
 
243
		resv = reloc->robj->tbo.resv;
236
		resv = reloc->robj->tbo.resv;
244
		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
237
		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
245
				     reloc->tv.shared);
238
				     reloc->tv.shared);
246
		if (r)
239
		if (r)
247
			return r;
240
			return r;
248
	}
241
	}
249
	return 0;
242
	return 0;
250
}
243
}
251
 
244
 
252
/* XXX: note that this is called from the legacy UMS CS ioctl as well */
245
/* XXX: note that this is called from the legacy UMS CS ioctl as well */
253
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
246
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
254
{
247
{
255
	struct drm_radeon_cs *cs = data;
248
	struct drm_radeon_cs *cs = data;
256
	uint64_t *chunk_array_ptr;
249
	uint64_t *chunk_array_ptr;
257
	unsigned size, i;
250
	unsigned size, i;
258
	u32 ring = RADEON_CS_RING_GFX;
251
	u32 ring = RADEON_CS_RING_GFX;
259
	s32 priority = 0;
252
	s32 priority = 0;
260
 
253
 
261
	INIT_LIST_HEAD(&p->validated);
254
	INIT_LIST_HEAD(&p->validated);
262
 
255
 
263
	if (!cs->num_chunks) {
256
	if (!cs->num_chunks) {
264
		return 0;
257
		return 0;
265
	}
258
	}
266
 
259
 
267
	/* get chunks */
260
	/* get chunks */
268
	p->idx = 0;
261
	p->idx = 0;
269
	p->ib.sa_bo = NULL;
262
	p->ib.sa_bo = NULL;
270
	p->const_ib.sa_bo = NULL;
263
	p->const_ib.sa_bo = NULL;
271
	p->chunk_ib = NULL;
264
	p->chunk_ib = NULL;
272
	p->chunk_relocs = NULL;
265
	p->chunk_relocs = NULL;
273
	p->chunk_flags = NULL;
266
	p->chunk_flags = NULL;
274
	p->chunk_const_ib = NULL;
267
	p->chunk_const_ib = NULL;
275
	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
268
	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
276
	if (p->chunks_array == NULL) {
269
	if (p->chunks_array == NULL) {
277
		return -ENOMEM;
270
		return -ENOMEM;
278
	}
271
	}
279
	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
272
	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
280
	if (copy_from_user(p->chunks_array, chunk_array_ptr,
273
	if (copy_from_user(p->chunks_array, chunk_array_ptr,
281
			       sizeof(uint64_t)*cs->num_chunks)) {
274
			       sizeof(uint64_t)*cs->num_chunks)) {
282
		return -EFAULT;
275
		return -EFAULT;
283
	}
276
	}
284
	p->cs_flags = 0;
277
	p->cs_flags = 0;
285
	p->nchunks = cs->num_chunks;
278
	p->nchunks = cs->num_chunks;
286
	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
279
	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
287
	if (p->chunks == NULL) {
280
	if (p->chunks == NULL) {
288
		return -ENOMEM;
281
		return -ENOMEM;
289
	}
282
	}
290
	for (i = 0; i < p->nchunks; i++) {
283
	for (i = 0; i < p->nchunks; i++) {
291
		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
284
		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
292
		struct drm_radeon_cs_chunk user_chunk;
285
		struct drm_radeon_cs_chunk user_chunk;
293
		uint32_t __user *cdata;
286
		uint32_t __user *cdata;
294
 
287
 
295
		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
288
		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
296
		if (copy_from_user(&user_chunk, chunk_ptr,
289
		if (copy_from_user(&user_chunk, chunk_ptr,
297
				       sizeof(struct drm_radeon_cs_chunk))) {
290
				       sizeof(struct drm_radeon_cs_chunk))) {
298
			return -EFAULT;
291
			return -EFAULT;
299
		}
292
		}
300
		p->chunks[i].length_dw = user_chunk.length_dw;
293
		p->chunks[i].length_dw = user_chunk.length_dw;
301
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
294
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
302
			p->chunk_relocs = &p->chunks[i];
295
			p->chunk_relocs = &p->chunks[i];
303
		}
296
		}
304
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
297
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
305
			p->chunk_ib = &p->chunks[i];
298
			p->chunk_ib = &p->chunks[i];
306
			/* zero length IB isn't useful */
299
			/* zero length IB isn't useful */
307
			if (p->chunks[i].length_dw == 0)
300
			if (p->chunks[i].length_dw == 0)
308
				return -EINVAL;
301
				return -EINVAL;
309
		}
302
		}
310
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
303
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
311
			p->chunk_const_ib = &p->chunks[i];
304
			p->chunk_const_ib = &p->chunks[i];
312
			/* zero length CONST IB isn't useful */
305
			/* zero length CONST IB isn't useful */
313
			if (p->chunks[i].length_dw == 0)
306
			if (p->chunks[i].length_dw == 0)
314
				return -EINVAL;
307
				return -EINVAL;
315
		}
308
		}
316
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
309
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
317
			p->chunk_flags = &p->chunks[i];
310
			p->chunk_flags = &p->chunks[i];
318
			/* zero length flags aren't useful */
311
			/* zero length flags aren't useful */
319
			if (p->chunks[i].length_dw == 0)
312
			if (p->chunks[i].length_dw == 0)
320
				return -EINVAL;
313
				return -EINVAL;
321
		}
314
		}
322
 
315
 
323
		size = p->chunks[i].length_dw;
316
		size = p->chunks[i].length_dw;
324
		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
317
		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
325
		p->chunks[i].user_ptr = cdata;
318
		p->chunks[i].user_ptr = cdata;
326
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
319
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
327
			continue;
320
			continue;
328
 
321
 
329
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
322
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
330
			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
323
			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
331
				continue;
324
				continue;
332
		}
325
		}
333
 
326
 
334
		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
327
		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
335
		size *= sizeof(uint32_t);
328
		size *= sizeof(uint32_t);
336
		if (p->chunks[i].kdata == NULL) {
329
		if (p->chunks[i].kdata == NULL) {
337
			return -ENOMEM;
330
			return -ENOMEM;
338
		}
331
		}
339
		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
332
		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
340
			return -EFAULT;
333
			return -EFAULT;
341
		}
334
		}
342
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
335
		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
343
			p->cs_flags = p->chunks[i].kdata[0];
336
			p->cs_flags = p->chunks[i].kdata[0];
344
			if (p->chunks[i].length_dw > 1)
337
			if (p->chunks[i].length_dw > 1)
345
				ring = p->chunks[i].kdata[1];
338
				ring = p->chunks[i].kdata[1];
346
			if (p->chunks[i].length_dw > 2)
339
			if (p->chunks[i].length_dw > 2)
347
				priority = (s32)p->chunks[i].kdata[2];
340
				priority = (s32)p->chunks[i].kdata[2];
348
		}
341
		}
349
	}
342
	}
350
 
343
 
351
	/* these are KMS only */
344
	/* these are KMS only */
352
	if (p->rdev) {
345
	if (p->rdev) {
353
		if ((p->cs_flags & RADEON_CS_USE_VM) &&
346
		if ((p->cs_flags & RADEON_CS_USE_VM) &&
354
		    !p->rdev->vm_manager.enabled) {
347
		    !p->rdev->vm_manager.enabled) {
355
			DRM_ERROR("VM not active on asic!\n");
348
			DRM_ERROR("VM not active on asic!\n");
356
			return -EINVAL;
349
			return -EINVAL;
357
		}
350
		}
358
 
351
 
359
		if (radeon_cs_get_ring(p, ring, priority))
352
		if (radeon_cs_get_ring(p, ring, priority))
360
			return -EINVAL;
353
			return -EINVAL;
361
 
354
 
362
		/* we only support VM on some SI+ rings */
355
		/* we only support VM on some SI+ rings */
363
		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
356
		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
364
			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
357
			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
365
				DRM_ERROR("Ring %d requires VM!\n", p->ring);
358
				DRM_ERROR("Ring %d requires VM!\n", p->ring);
366
				return -EINVAL;
359
				return -EINVAL;
367
			}
360
			}
368
		} else {
361
		} else {
369
			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
362
			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
370
				DRM_ERROR("VM not supported on ring %d!\n",
363
				DRM_ERROR("VM not supported on ring %d!\n",
371
					  p->ring);
364
					  p->ring);
372
				return -EINVAL;
365
				return -EINVAL;
373
			}
366
			}
374
		}
367
		}
375
	}
368
	}
376
 
369
 
377
	return 0;
370
	return 0;
378
}
371
}
379
 
372
 
380
static int cmp_size_smaller_first(void *priv, struct list_head *a,
373
static int cmp_size_smaller_first(void *priv, struct list_head *a,
381
				  struct list_head *b)
374
				  struct list_head *b)
382
{
375
{
383
	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
376
	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
384
	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
377
	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
385
 
378
 
386
	/* Sort A before B if A is smaller. */
379
	/* Sort A before B if A is smaller. */
387
	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
380
	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
388
}
381
}
389
 
382
 
390
/**
383
/**
391
 * cs_parser_fini() - clean parser states
384
 * cs_parser_fini() - clean parser states
392
 * @parser:	parser structure holding parsing context.
385
 * @parser:	parser structure holding parsing context.
393
 * @error:	error number
386
 * @error:	error number
394
 *
387
 *
395
 * If error is set than unvalidate buffer, otherwise just free memory
388
 * If error is set than unvalidate buffer, otherwise just free memory
396
 * used by parsing context.
389
 * used by parsing context.
397
 **/
390
 **/
398
static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
391
static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
399
{
392
{
400
	unsigned i;
393
	unsigned i;
401
 
394
 
402
	if (!error) {
395
	if (!error) {
403
		/* Sort the buffer list from the smallest to largest buffer,
396
		/* Sort the buffer list from the smallest to largest buffer,
404
		 * which affects the order of buffers in the LRU list.
397
		 * which affects the order of buffers in the LRU list.
405
		 * This assures that the smallest buffers are added first
398
		 * This assures that the smallest buffers are added first
406
		 * to the LRU list, so they are likely to be later evicted
399
		 * to the LRU list, so they are likely to be later evicted
407
		 * first, instead of large buffers whose eviction is more
400
		 * first, instead of large buffers whose eviction is more
408
		 * expensive.
401
		 * expensive.
409
		 *
402
		 *
410
		 * This slightly lowers the number of bytes moved by TTM
403
		 * This slightly lowers the number of bytes moved by TTM
411
		 * per frame under memory pressure.
404
		 * per frame under memory pressure.
412
		 */
405
		 */
413
		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
406
		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
414
 
407
 
415
		ttm_eu_fence_buffer_objects(&parser->ticket,
408
		ttm_eu_fence_buffer_objects(&parser->ticket,
416
					    &parser->validated,
409
					    &parser->validated,
417
					    &parser->ib.fence->base);
410
					    &parser->ib.fence->base);
418
	} else if (backoff) {
411
	} else if (backoff) {
419
		ttm_eu_backoff_reservation(&parser->ticket,
412
		ttm_eu_backoff_reservation(&parser->ticket,
420
					   &parser->validated);
413
					   &parser->validated);
421
	}
414
	}
422
 
415
 
423
	if (parser->relocs != NULL) {
416
	if (parser->relocs != NULL) {
424
		for (i = 0; i < parser->nrelocs; i++) {
417
		for (i = 0; i < parser->nrelocs; i++) {
425
			struct radeon_bo *bo = parser->relocs[i].robj;
418
			struct radeon_bo *bo = parser->relocs[i].robj;
426
			if (bo == NULL)
419
			if (bo == NULL)
427
				continue;
420
				continue;
428
 
421
 
429
			drm_gem_object_unreference_unlocked(&bo->gem_base);
422
			drm_gem_object_unreference_unlocked(&bo->gem_base);
430
		}
423
		}
431
	}
424
	}
432
	kfree(parser->track);
425
	kfree(parser->track);
433
	kfree(parser->relocs);
426
	kfree(parser->relocs);
434
	drm_free_large(parser->vm_bos);
427
	drm_free_large(parser->vm_bos);
435
	for (i = 0; i < parser->nchunks; i++)
428
	for (i = 0; i < parser->nchunks; i++)
436
		drm_free_large(parser->chunks[i].kdata);
429
		drm_free_large(parser->chunks[i].kdata);
437
	kfree(parser->chunks);
430
	kfree(parser->chunks);
438
	kfree(parser->chunks_array);
431
	kfree(parser->chunks_array);
439
	radeon_ib_free(parser->rdev, &parser->ib);
432
	radeon_ib_free(parser->rdev, &parser->ib);
440
	radeon_ib_free(parser->rdev, &parser->const_ib);
433
	radeon_ib_free(parser->rdev, &parser->const_ib);
441
}
434
}
442
 
435
 
443
static int radeon_cs_ib_chunk(struct radeon_device *rdev,
436
static int radeon_cs_ib_chunk(struct radeon_device *rdev,
444
			      struct radeon_cs_parser *parser)
437
			      struct radeon_cs_parser *parser)
445
{
438
{
446
	int r;
439
	int r;
447
 
440
 
448
	if (parser->chunk_ib == NULL)
441
	if (parser->chunk_ib == NULL)
449
		return 0;
442
		return 0;
450
 
443
 
451
	if (parser->cs_flags & RADEON_CS_USE_VM)
444
	if (parser->cs_flags & RADEON_CS_USE_VM)
452
		return 0;
445
		return 0;
453
 
446
 
454
	r = radeon_cs_parse(rdev, parser->ring, parser);
447
	r = radeon_cs_parse(rdev, parser->ring, parser);
455
	if (r || parser->parser_error) {
448
	if (r || parser->parser_error) {
456
		DRM_ERROR("Invalid command stream !\n");
449
		DRM_ERROR("Invalid command stream !\n");
457
		return r;
450
		return r;
458
	}
451
	}
459
 
452
 
460
	r = radeon_cs_sync_rings(parser);
453
	r = radeon_cs_sync_rings(parser);
461
	if (r) {
454
	if (r) {
462
		if (r != -ERESTARTSYS)
455
		if (r != -ERESTARTSYS)
463
			DRM_ERROR("Failed to sync rings: %i\n", r);
456
			DRM_ERROR("Failed to sync rings: %i\n", r);
464
		return r;
457
		return r;
465
	}
458
	}
466
 
459
 
467
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
460
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
468
		radeon_uvd_note_usage(rdev);
461
		radeon_uvd_note_usage(rdev);
469
	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
462
	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
470
		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
463
		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
471
		radeon_vce_note_usage(rdev);
464
		radeon_vce_note_usage(rdev);
472
 
465
 
473
	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
466
	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
474
	if (r) {
467
	if (r) {
475
		DRM_ERROR("Failed to schedule IB !\n");
468
		DRM_ERROR("Failed to schedule IB !\n");
476
	}
469
	}
477
	return r;
470
	return r;
478
}
471
}
479
 
472
 
480
static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
473
static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
481
				   struct radeon_vm *vm)
474
				   struct radeon_vm *vm)
482
{
475
{
483
	struct radeon_device *rdev = p->rdev;
476
	struct radeon_device *rdev = p->rdev;
484
	struct radeon_bo_va *bo_va;
477
	struct radeon_bo_va *bo_va;
485
	int i, r;
478
	int i, r;
486
 
479
 
487
	r = radeon_vm_update_page_directory(rdev, vm);
480
	r = radeon_vm_update_page_directory(rdev, vm);
488
	if (r)
481
	if (r)
489
		return r;
482
		return r;
490
 
483
 
491
	r = radeon_vm_clear_freed(rdev, vm);
484
	r = radeon_vm_clear_freed(rdev, vm);
492
	if (r)
485
	if (r)
493
		return r;
486
		return r;
494
 
487
 
495
	if (vm->ib_bo_va == NULL) {
488
	if (vm->ib_bo_va == NULL) {
496
		DRM_ERROR("Tmp BO not in VM!\n");
489
		DRM_ERROR("Tmp BO not in VM!\n");
497
		return -EINVAL;
490
		return -EINVAL;
498
	}
491
	}
499
 
492
 
500
	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
493
	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
501
				&rdev->ring_tmp_bo.bo->tbo.mem);
494
				&rdev->ring_tmp_bo.bo->tbo.mem);
502
	if (r)
495
	if (r)
503
		return r;
496
		return r;
504
 
497
 
505
	for (i = 0; i < p->nrelocs; i++) {
498
	for (i = 0; i < p->nrelocs; i++) {
506
		struct radeon_bo *bo;
499
		struct radeon_bo *bo;
507
 
500
 
508
		bo = p->relocs[i].robj;
501
		bo = p->relocs[i].robj;
509
		bo_va = radeon_vm_bo_find(vm, bo);
502
		bo_va = radeon_vm_bo_find(vm, bo);
510
		if (bo_va == NULL) {
503
		if (bo_va == NULL) {
511
			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
504
			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
512
			return -EINVAL;
505
			return -EINVAL;
513
		}
506
		}
514
 
507
 
515
		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
508
		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
516
		if (r)
509
		if (r)
517
			return r;
510
			return r;
518
 
511
 
519
		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
512
		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
520
	}
513
	}
521
 
514
 
522
	return radeon_vm_clear_invalids(rdev, vm);
515
	return radeon_vm_clear_invalids(rdev, vm);
523
}
516
}
524
 
517
 
525
static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
518
static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
526
				 struct radeon_cs_parser *parser)
519
				 struct radeon_cs_parser *parser)
527
{
520
{
528
	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
521
	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
529
	struct radeon_vm *vm = &fpriv->vm;
522
	struct radeon_vm *vm = &fpriv->vm;
530
	int r;
523
	int r;
531
 
524
 
532
	if (parser->chunk_ib == NULL)
525
	if (parser->chunk_ib == NULL)
533
		return 0;
526
		return 0;
534
	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
527
	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
535
		return 0;
528
		return 0;
536
 
529
 
537
	if (parser->const_ib.length_dw) {
530
	if (parser->const_ib.length_dw) {
538
		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
531
		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
539
		if (r) {
532
		if (r) {
540
			return r;
533
			return r;
541
		}
534
		}
542
	}
535
	}
543
 
536
 
544
	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
537
	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
545
	if (r) {
538
	if (r) {
546
		return r;
539
		return r;
547
	}
540
	}
548
 
541
 
549
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
542
	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
550
		radeon_uvd_note_usage(rdev);
543
		radeon_uvd_note_usage(rdev);
551
 
544
 
552
	mutex_lock(&vm->mutex);
545
	mutex_lock(&vm->mutex);
553
	r = radeon_bo_vm_update_pte(parser, vm);
546
	r = radeon_bo_vm_update_pte(parser, vm);
554
	if (r) {
547
	if (r) {
555
		goto out;
548
		goto out;
556
	}
549
	}
557
 
550
 
558
	r = radeon_cs_sync_rings(parser);
551
	r = radeon_cs_sync_rings(parser);
559
	if (r) {
552
	if (r) {
560
		if (r != -ERESTARTSYS)
553
		if (r != -ERESTARTSYS)
561
			DRM_ERROR("Failed to sync rings: %i\n", r);
554
			DRM_ERROR("Failed to sync rings: %i\n", r);
562
		goto out;
555
		goto out;
563
	}
556
	}
564
 
557
 
565
	if ((rdev->family >= CHIP_TAHITI) &&
558
	if ((rdev->family >= CHIP_TAHITI) &&
566
	    (parser->chunk_const_ib != NULL)) {
559
	    (parser->chunk_const_ib != NULL)) {
567
		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
560
		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
568
	} else {
561
	} else {
569
		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
562
		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
570
	}
563
	}
571
 
564
 
572
out:
565
out:
573
	mutex_unlock(&vm->mutex);
566
	mutex_unlock(&vm->mutex);
574
	return r;
567
	return r;
575
}
568
}
576
 
569
 
577
static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
570
static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
578
{
571
{
579
	if (r == -EDEADLK) {
572
	if (r == -EDEADLK) {
580
		r = radeon_gpu_reset(rdev);
573
		r = radeon_gpu_reset(rdev);
581
		if (!r)
574
		if (!r)
582
			r = -EAGAIN;
575
			r = -EAGAIN;
583
	}
576
	}
584
	return r;
577
	return r;
585
}
578
}
586
 
579
 
587
static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
580
static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
588
{
581
{
589
	struct radeon_cs_chunk *ib_chunk;
582
	struct radeon_cs_chunk *ib_chunk;
590
	struct radeon_vm *vm = NULL;
583
	struct radeon_vm *vm = NULL;
591
	int r;
584
	int r;
592
 
585
 
593
	if (parser->chunk_ib == NULL)
586
	if (parser->chunk_ib == NULL)
594
		return 0;
587
		return 0;
595
 
588
 
596
	if (parser->cs_flags & RADEON_CS_USE_VM) {
589
	if (parser->cs_flags & RADEON_CS_USE_VM) {
597
		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
590
		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
598
		vm = &fpriv->vm;
591
		vm = &fpriv->vm;
599
 
592
 
600
		if ((rdev->family >= CHIP_TAHITI) &&
593
		if ((rdev->family >= CHIP_TAHITI) &&
601
		    (parser->chunk_const_ib != NULL)) {
594
		    (parser->chunk_const_ib != NULL)) {
602
			ib_chunk = parser->chunk_const_ib;
595
			ib_chunk = parser->chunk_const_ib;
603
			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
596
			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
604
				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
597
				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
605
				return -EINVAL;
598
				return -EINVAL;
606
			}
599
			}
607
			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
600
			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
608
					   vm, ib_chunk->length_dw * 4);
601
					   vm, ib_chunk->length_dw * 4);
609
			if (r) {
602
			if (r) {
610
				DRM_ERROR("Failed to get const ib !\n");
603
				DRM_ERROR("Failed to get const ib !\n");
611
				return r;
604
				return r;
612
			}
605
			}
613
			parser->const_ib.is_const_ib = true;
606
			parser->const_ib.is_const_ib = true;
614
			parser->const_ib.length_dw = ib_chunk->length_dw;
607
			parser->const_ib.length_dw = ib_chunk->length_dw;
615
			if (copy_from_user(parser->const_ib.ptr,
608
			if (copy_from_user(parser->const_ib.ptr,
616
					       ib_chunk->user_ptr,
609
					       ib_chunk->user_ptr,
617
					       ib_chunk->length_dw * 4))
610
					       ib_chunk->length_dw * 4))
618
				return -EFAULT;
611
				return -EFAULT;
619
		}
612
		}
620
 
613
 
621
		ib_chunk = parser->chunk_ib;
614
		ib_chunk = parser->chunk_ib;
622
		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
615
		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
623
			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
616
			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
624
			return -EINVAL;
617
			return -EINVAL;
625
		}
618
		}
626
	}
619
	}
627
	ib_chunk = parser->chunk_ib;
620
	ib_chunk = parser->chunk_ib;
628
 
621
 
629
	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
622
	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
630
			   vm, ib_chunk->length_dw * 4);
623
			   vm, ib_chunk->length_dw * 4);
631
	if (r) {
624
	if (r) {
632
		DRM_ERROR("Failed to get ib !\n");
625
		DRM_ERROR("Failed to get ib !\n");
633
		return r;
626
		return r;
634
	}
627
	}
635
	parser->ib.length_dw = ib_chunk->length_dw;
628
	parser->ib.length_dw = ib_chunk->length_dw;
636
	if (ib_chunk->kdata)
629
	if (ib_chunk->kdata)
637
		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
630
		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
638
	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
631
	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
639
		return -EFAULT;
632
		return -EFAULT;
640
	return 0;
633
	return 0;
641
}
634
}
642
 
635
 
643
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
636
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
644
{
637
{
645
	struct radeon_device *rdev = dev->dev_private;
638
	struct radeon_device *rdev = dev->dev_private;
646
	struct radeon_cs_parser parser;
639
	struct radeon_cs_parser parser;
647
	int r;
640
	int r;
648
 
641
 
649
	down_read(&rdev->exclusive_lock);
642
	down_read(&rdev->exclusive_lock);
650
	if (!rdev->accel_working) {
643
	if (!rdev->accel_working) {
651
		up_read(&rdev->exclusive_lock);
644
		up_read(&rdev->exclusive_lock);
652
		return -EBUSY;
645
		return -EBUSY;
653
	}
646
	}
654
	/* initialize parser */
647
	/* initialize parser */
655
	memset(&parser, 0, sizeof(struct radeon_cs_parser));
648
	memset(&parser, 0, sizeof(struct radeon_cs_parser));
656
	parser.filp = filp;
649
	parser.filp = filp;
657
	parser.rdev = rdev;
650
	parser.rdev = rdev;
658
	parser.dev = rdev->dev;
651
	parser.dev = rdev->dev;
659
	parser.family = rdev->family;
652
	parser.family = rdev->family;
660
	r = radeon_cs_parser_init(&parser, data);
653
	r = radeon_cs_parser_init(&parser, data);
661
	if (r) {
654
	if (r) {
662
		DRM_ERROR("Failed to initialize parser !\n");
655
		DRM_ERROR("Failed to initialize parser !\n");
663
		radeon_cs_parser_fini(&parser, r, false);
656
		radeon_cs_parser_fini(&parser, r, false);
664
		up_read(&rdev->exclusive_lock);
657
		up_read(&rdev->exclusive_lock);
665
		r = radeon_cs_handle_lockup(rdev, r);
658
		r = radeon_cs_handle_lockup(rdev, r);
666
		return r;
659
		return r;
667
	}
660
	}
668
 
661
 
669
	r = radeon_cs_ib_fill(rdev, &parser);
662
	r = radeon_cs_ib_fill(rdev, &parser);
670
	if (!r) {
663
	if (!r) {
671
		r = radeon_cs_parser_relocs(&parser);
664
		r = radeon_cs_parser_relocs(&parser);
672
		if (r && r != -ERESTARTSYS)
665
		if (r && r != -ERESTARTSYS)
673
			DRM_ERROR("Failed to parse relocation %d!\n", r);
666
			DRM_ERROR("Failed to parse relocation %d!\n", r);
674
	}
667
	}
675
 
668
 
676
	if (r) {
669
	if (r) {
677
		radeon_cs_parser_fini(&parser, r, false);
670
		radeon_cs_parser_fini(&parser, r, false);
678
		up_read(&rdev->exclusive_lock);
671
		up_read(&rdev->exclusive_lock);
679
		r = radeon_cs_handle_lockup(rdev, r);
672
		r = radeon_cs_handle_lockup(rdev, r);
680
		return r;
673
		return r;
681
	}
674
	}
682
 
675
 
683
	trace_radeon_cs(&parser);
676
	trace_radeon_cs(&parser);
684
 
677
 
685
	r = radeon_cs_ib_chunk(rdev, &parser);
678
	r = radeon_cs_ib_chunk(rdev, &parser);
686
	if (r) {
679
	if (r) {
687
		goto out;
680
		goto out;
688
	}
681
	}
689
	r = radeon_cs_ib_vm_chunk(rdev, &parser);
682
	r = radeon_cs_ib_vm_chunk(rdev, &parser);
690
	if (r) {
683
	if (r) {
691
		goto out;
684
		goto out;
692
	}
685
	}
693
out:
686
out:
694
	radeon_cs_parser_fini(&parser, r, true);
687
	radeon_cs_parser_fini(&parser, r, true);
695
	up_read(&rdev->exclusive_lock);
688
	up_read(&rdev->exclusive_lock);
696
	r = radeon_cs_handle_lockup(rdev, r);
689
	r = radeon_cs_handle_lockup(rdev, r);
697
	return r;
690
	return r;
698
}
691
}
699
 
692
 
700
/**
693
/**
701
 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
694
 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
702
 * @parser:	parser structure holding parsing context.
695
 * @parser:	parser structure holding parsing context.
703
 * @pkt:	where to store packet information
696
 * @pkt:	where to store packet information
704
 *
697
 *
705
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
698
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
706
 * if packet is bigger than remaining ib size. or if packets is unknown.
699
 * if packet is bigger than remaining ib size. or if packets is unknown.
707
 **/
700
 **/
708
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
701
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
709
			   struct radeon_cs_packet *pkt,
702
			   struct radeon_cs_packet *pkt,
710
			   unsigned idx)
703
			   unsigned idx)
711
{
704
{
712
	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
705
	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
713
	struct radeon_device *rdev = p->rdev;
706
	struct radeon_device *rdev = p->rdev;
714
	uint32_t header;
707
	uint32_t header;
715
	int ret = 0, i;
708
	int ret = 0, i;
716
 
709
 
717
	if (idx >= ib_chunk->length_dw) {
710
	if (idx >= ib_chunk->length_dw) {
718
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
711
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
719
			  idx, ib_chunk->length_dw);
712
			  idx, ib_chunk->length_dw);
720
		return -EINVAL;
713
		return -EINVAL;
721
	}
714
	}
722
	header = radeon_get_ib_value(p, idx);
715
	header = radeon_get_ib_value(p, idx);
723
	pkt->idx = idx;
716
	pkt->idx = idx;
724
	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
717
	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
725
	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
718
	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
726
	pkt->one_reg_wr = 0;
719
	pkt->one_reg_wr = 0;
727
	switch (pkt->type) {
720
	switch (pkt->type) {
728
	case RADEON_PACKET_TYPE0:
721
	case RADEON_PACKET_TYPE0:
729
		if (rdev->family < CHIP_R600) {
722
		if (rdev->family < CHIP_R600) {
730
			pkt->reg = R100_CP_PACKET0_GET_REG(header);
723
			pkt->reg = R100_CP_PACKET0_GET_REG(header);
731
			pkt->one_reg_wr =
724
			pkt->one_reg_wr =
732
				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
725
				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
733
		} else
726
		} else
734
			pkt->reg = R600_CP_PACKET0_GET_REG(header);
727
			pkt->reg = R600_CP_PACKET0_GET_REG(header);
735
		break;
728
		break;
736
	case RADEON_PACKET_TYPE3:
729
	case RADEON_PACKET_TYPE3:
737
		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
730
		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
738
		break;
731
		break;
739
	case RADEON_PACKET_TYPE2:
732
	case RADEON_PACKET_TYPE2:
740
		pkt->count = -1;
733
		pkt->count = -1;
741
		break;
734
		break;
742
	default:
735
	default:
743
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
736
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
744
		ret = -EINVAL;
737
		ret = -EINVAL;
745
		goto dump_ib;
738
		goto dump_ib;
746
	}
739
	}
747
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
740
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
748
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
741
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
749
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
742
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
750
		ret = -EINVAL;
743
		ret = -EINVAL;
751
		goto dump_ib;
744
		goto dump_ib;
752
	}
745
	}
753
	return 0;
746
	return 0;
754
 
747
 
755
dump_ib:
748
dump_ib:
756
	for (i = 0; i < ib_chunk->length_dw; i++) {
749
	for (i = 0; i < ib_chunk->length_dw; i++) {
757
		if (i == idx)
750
		if (i == idx)
758
			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
751
			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
759
		else
752
		else
760
			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
753
			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
761
	}
754
	}
762
	return ret;
755
	return ret;
763
}
756
}
764
 
757
 
765
/**
758
/**
766
 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
759
 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
767
 * @p:		structure holding the parser context.
760
 * @p:		structure holding the parser context.
768
 *
761
 *
769
 * Check if the next packet is NOP relocation packet3.
762
 * Check if the next packet is NOP relocation packet3.
770
 **/
763
 **/
771
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
764
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
772
{
765
{
773
	struct radeon_cs_packet p3reloc;
766
	struct radeon_cs_packet p3reloc;
774
	int r;
767
	int r;
775
 
768
 
776
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
769
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
777
	if (r)
770
	if (r)
778
		return false;
771
		return false;
779
	if (p3reloc.type != RADEON_PACKET_TYPE3)
772
	if (p3reloc.type != RADEON_PACKET_TYPE3)
780
		return false;
773
		return false;
781
	if (p3reloc.opcode != RADEON_PACKET3_NOP)
774
	if (p3reloc.opcode != RADEON_PACKET3_NOP)
782
		return false;
775
		return false;
783
	return true;
776
	return true;
784
}
777
}
785
 
778
 
786
/**
779
/**
787
 * radeon_cs_dump_packet() - dump raw packet context
780
 * radeon_cs_dump_packet() - dump raw packet context
788
 * @p:		structure holding the parser context.
781
 * @p:		structure holding the parser context.
789
 * @pkt:	structure holding the packet.
782
 * @pkt:	structure holding the packet.
790
 *
783
 *
791
 * Used mostly for debugging and error reporting.
784
 * Used mostly for debugging and error reporting.
792
 **/
785
 **/
793
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
786
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
794
			   struct radeon_cs_packet *pkt)
787
			   struct radeon_cs_packet *pkt)
795
{
788
{
796
	volatile uint32_t *ib;
789
	volatile uint32_t *ib;
797
	unsigned i;
790
	unsigned i;
798
	unsigned idx;
791
	unsigned idx;
799
 
792
 
800
	ib = p->ib.ptr;
793
	ib = p->ib.ptr;
801
	idx = pkt->idx;
794
	idx = pkt->idx;
802
	for (i = 0; i <= (pkt->count + 1); i++, idx++)
795
	for (i = 0; i <= (pkt->count + 1); i++, idx++)
803
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
796
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
804
}
797
}
805
 
798
 
806
/**
799
/**
807
 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
800
 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
808
 * @parser:		parser structure holding parsing context.
801
 * @parser:		parser structure holding parsing context.
809
 * @data:		pointer to relocation data
802
 * @data:		pointer to relocation data
810
 * @offset_start:	starting offset
803
 * @offset_start:	starting offset
811
 * @offset_mask:	offset mask (to align start offset on)
804
 * @offset_mask:	offset mask (to align start offset on)
812
 * @reloc:		reloc informations
805
 * @reloc:		reloc informations
813
 *
806
 *
814
 * Check if next packet is relocation packet3, do bo validation and compute
807
 * Check if next packet is relocation packet3, do bo validation and compute
815
 * GPU offset using the provided start.
808
 * GPU offset using the provided start.
816
 **/
809
 **/
817
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
810
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
818
				struct radeon_bo_list **cs_reloc,
811
				struct radeon_bo_list **cs_reloc,
819
				int nomm)
812
				int nomm)
820
{
813
{
821
	struct radeon_cs_chunk *relocs_chunk;
814
	struct radeon_cs_chunk *relocs_chunk;
822
	struct radeon_cs_packet p3reloc;
815
	struct radeon_cs_packet p3reloc;
823
	unsigned idx;
816
	unsigned idx;
824
	int r;
817
	int r;
825
 
818
 
826
	if (p->chunk_relocs == NULL) {
819
	if (p->chunk_relocs == NULL) {
827
		DRM_ERROR("No relocation chunk !\n");
820
		DRM_ERROR("No relocation chunk !\n");
828
		return -EINVAL;
821
		return -EINVAL;
829
	}
822
	}
830
	*cs_reloc = NULL;
823
	*cs_reloc = NULL;
831
	relocs_chunk = p->chunk_relocs;
824
	relocs_chunk = p->chunk_relocs;
832
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
825
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
833
	if (r)
826
	if (r)
834
		return r;
827
		return r;
835
	p->idx += p3reloc.count + 2;
828
	p->idx += p3reloc.count + 2;
836
	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
829
	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
837
	    p3reloc.opcode != RADEON_PACKET3_NOP) {
830
	    p3reloc.opcode != RADEON_PACKET3_NOP) {
838
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
831
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
839
			  p3reloc.idx);
832
			  p3reloc.idx);
840
		radeon_cs_dump_packet(p, &p3reloc);
833
		radeon_cs_dump_packet(p, &p3reloc);
841
		return -EINVAL;
834
		return -EINVAL;
842
	}
835
	}
843
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
836
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
844
	if (idx >= relocs_chunk->length_dw) {
837
	if (idx >= relocs_chunk->length_dw) {
845
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
838
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
846
			  idx, relocs_chunk->length_dw);
839
			  idx, relocs_chunk->length_dw);
847
		radeon_cs_dump_packet(p, &p3reloc);
840
		radeon_cs_dump_packet(p, &p3reloc);
848
		return -EINVAL;
841
		return -EINVAL;
849
	}
842
	}
850
	/* FIXME: we assume reloc size is 4 dwords */
843
	/* FIXME: we assume reloc size is 4 dwords */
851
	if (nomm) {
844
	if (nomm) {
852
		*cs_reloc = p->relocs;
845
		*cs_reloc = p->relocs;
853
		(*cs_reloc)->gpu_offset =
846
		(*cs_reloc)->gpu_offset =
854
			(u64)relocs_chunk->kdata[idx + 3] << 32;
847
			(u64)relocs_chunk->kdata[idx + 3] << 32;
855
		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
848
		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
856
	} else
849
	} else
857
		*cs_reloc = &p->relocs[(idx / 4)];
850
		*cs_reloc = &p->relocs[(idx / 4)];
858
	return 0;
851
	return 0;
859
}
852
}