Subversion Repositories Kolibri OS

Rev

Rev 5271 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5271 Rev 6938
1
/*
1
/*
2
 * Copyright 2008 Advanced Micro Devices, Inc.
2
 * Copyright 2008 Advanced Micro Devices, Inc.
3
 * Copyright 2008 Red Hat Inc.
3
 * Copyright 2008 Red Hat Inc.
4
 * Copyright 2009 Jerome Glisse.
4
 * Copyright 2009 Jerome Glisse.
5
 *
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
12
 *
13
 * The above copyright notice and this permission notice shall be included in
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
14
 * all copies or substantial portions of the Software.
15
 *
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
 * OTHER DEALINGS IN THE SOFTWARE.
22
 * OTHER DEALINGS IN THE SOFTWARE.
23
 *
23
 *
24
 * Authors: Dave Airlie
24
 * Authors: Dave Airlie
25
 *          Alex Deucher
25
 *          Alex Deucher
26
 *          Jerome Glisse
26
 *          Jerome Glisse
27
 */
27
 */
28
#include 
28
#include 
29
#include 
29
#include 
30
#include "radeon.h"
30
#include "radeon.h"
31
#include "r600d.h"
31
#include "r600d.h"
32
#include "r600_reg_safe.h"
32
#include "r600_reg_safe.h"
33
 
33
 
34
static int r600_nomm;
34
static int r600_nomm;
35
extern void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size);
35
extern void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size);
36
 
36
 
37
 
37
 
38
struct r600_cs_track {
38
struct r600_cs_track {
39
	/* configuration we miror so that we use same code btw kms/ums */
39
	/* configuration we miror so that we use same code btw kms/ums */
40
	u32			group_size;
40
	u32			group_size;
41
	u32			nbanks;
41
	u32			nbanks;
42
	u32			npipes;
42
	u32			npipes;
43
	/* value we track */
43
	/* value we track */
44
	u32			sq_config;
44
	u32			sq_config;
45
	u32			log_nsamples;
45
	u32			log_nsamples;
46
	u32			nsamples;
46
	u32			nsamples;
47
	u32			cb_color_base_last[8];
47
	u32			cb_color_base_last[8];
48
	struct radeon_bo	*cb_color_bo[8];
48
	struct radeon_bo	*cb_color_bo[8];
49
	u64			cb_color_bo_mc[8];
49
	u64			cb_color_bo_mc[8];
50
	u64			cb_color_bo_offset[8];
50
	u64			cb_color_bo_offset[8];
51
	struct radeon_bo	*cb_color_frag_bo[8];
51
	struct radeon_bo	*cb_color_frag_bo[8];
52
	u64			cb_color_frag_offset[8];
52
	u64			cb_color_frag_offset[8];
53
	struct radeon_bo	*cb_color_tile_bo[8];
53
	struct radeon_bo	*cb_color_tile_bo[8];
54
	u64			cb_color_tile_offset[8];
54
	u64			cb_color_tile_offset[8];
55
	u32			cb_color_mask[8];
55
	u32			cb_color_mask[8];
56
	u32			cb_color_info[8];
56
	u32			cb_color_info[8];
57
	u32			cb_color_view[8];
57
	u32			cb_color_view[8];
58
	u32			cb_color_size_idx[8]; /* unused */
58
	u32			cb_color_size_idx[8]; /* unused */
59
	u32			cb_target_mask;
59
	u32			cb_target_mask;
60
	u32			cb_shader_mask;  /* unused */
60
	u32			cb_shader_mask;  /* unused */
61
	bool			is_resolve;
61
	bool			is_resolve;
62
	u32			cb_color_size[8];
62
	u32			cb_color_size[8];
63
	u32			vgt_strmout_en;
63
	u32			vgt_strmout_en;
64
	u32			vgt_strmout_buffer_en;
64
	u32			vgt_strmout_buffer_en;
65
	struct radeon_bo	*vgt_strmout_bo[4];
65
	struct radeon_bo	*vgt_strmout_bo[4];
66
	u64			vgt_strmout_bo_mc[4]; /* unused */
66
	u64			vgt_strmout_bo_mc[4]; /* unused */
67
	u32			vgt_strmout_bo_offset[4];
67
	u32			vgt_strmout_bo_offset[4];
68
	u32			vgt_strmout_size[4];
68
	u32			vgt_strmout_size[4];
69
	u32			db_depth_control;
69
	u32			db_depth_control;
70
	u32			db_depth_info;
70
	u32			db_depth_info;
71
	u32			db_depth_size_idx;
71
	u32			db_depth_size_idx;
72
	u32			db_depth_view;
72
	u32			db_depth_view;
73
	u32			db_depth_size;
73
	u32			db_depth_size;
74
	u32			db_offset;
74
	u32			db_offset;
75
	struct radeon_bo	*db_bo;
75
	struct radeon_bo	*db_bo;
76
	u64			db_bo_mc;
76
	u64			db_bo_mc;
77
	bool			sx_misc_kill_all_prims;
77
	bool			sx_misc_kill_all_prims;
78
	bool			cb_dirty;
78
	bool			cb_dirty;
79
	bool			db_dirty;
79
	bool			db_dirty;
80
	bool			streamout_dirty;
80
	bool			streamout_dirty;
81
	struct radeon_bo	*htile_bo;
81
	struct radeon_bo	*htile_bo;
82
	u64			htile_offset;
82
	u64			htile_offset;
83
	u32			htile_surface;
83
	u32			htile_surface;
84
};
84
};
85
 
85
 
86
#define FMT_8_BIT(fmt, vc)   [fmt] = { 1, 1, 1, vc, CHIP_R600 }
86
#define FMT_8_BIT(fmt, vc)   [fmt] = { 1, 1, 1, vc, CHIP_R600 }
87
#define FMT_16_BIT(fmt, vc)  [fmt] = { 1, 1, 2, vc, CHIP_R600 }
87
#define FMT_16_BIT(fmt, vc)  [fmt] = { 1, 1, 2, vc, CHIP_R600 }
88
#define FMT_24_BIT(fmt)      [fmt] = { 1, 1, 4,  0, CHIP_R600 }
88
#define FMT_24_BIT(fmt)      [fmt] = { 1, 1, 4,  0, CHIP_R600 }
89
#define FMT_32_BIT(fmt, vc)  [fmt] = { 1, 1, 4, vc, CHIP_R600 }
89
#define FMT_32_BIT(fmt, vc)  [fmt] = { 1, 1, 4, vc, CHIP_R600 }
90
#define FMT_48_BIT(fmt)      [fmt] = { 1, 1, 8,  0, CHIP_R600 }
90
#define FMT_48_BIT(fmt)      [fmt] = { 1, 1, 8,  0, CHIP_R600 }
91
#define FMT_64_BIT(fmt, vc)  [fmt] = { 1, 1, 8, vc, CHIP_R600 }
91
#define FMT_64_BIT(fmt, vc)  [fmt] = { 1, 1, 8, vc, CHIP_R600 }
92
#define FMT_96_BIT(fmt)      [fmt] = { 1, 1, 12, 0, CHIP_R600 }
92
#define FMT_96_BIT(fmt)      [fmt] = { 1, 1, 12, 0, CHIP_R600 }
93
#define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16,vc, CHIP_R600 }
93
#define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16,vc, CHIP_R600 }
94
 
94
 
95
struct gpu_formats {
95
struct gpu_formats {
96
	unsigned blockwidth;
96
	unsigned blockwidth;
97
	unsigned blockheight;
97
	unsigned blockheight;
98
	unsigned blocksize;
98
	unsigned blocksize;
99
	unsigned valid_color;
99
	unsigned valid_color;
100
	enum radeon_family min_family;
100
	enum radeon_family min_family;
101
};
101
};
102
 
102
 
103
static const struct gpu_formats color_formats_table[] = {
103
static const struct gpu_formats color_formats_table[] = {
104
	/* 8 bit */
104
	/* 8 bit */
105
	FMT_8_BIT(V_038004_COLOR_8, 1),
105
	FMT_8_BIT(V_038004_COLOR_8, 1),
106
	FMT_8_BIT(V_038004_COLOR_4_4, 1),
106
	FMT_8_BIT(V_038004_COLOR_4_4, 1),
107
	FMT_8_BIT(V_038004_COLOR_3_3_2, 1),
107
	FMT_8_BIT(V_038004_COLOR_3_3_2, 1),
108
	FMT_8_BIT(V_038004_FMT_1, 0),
108
	FMT_8_BIT(V_038004_FMT_1, 0),
109
 
109
 
110
	/* 16-bit */
110
	/* 16-bit */
111
	FMT_16_BIT(V_038004_COLOR_16, 1),
111
	FMT_16_BIT(V_038004_COLOR_16, 1),
112
	FMT_16_BIT(V_038004_COLOR_16_FLOAT, 1),
112
	FMT_16_BIT(V_038004_COLOR_16_FLOAT, 1),
113
	FMT_16_BIT(V_038004_COLOR_8_8, 1),
113
	FMT_16_BIT(V_038004_COLOR_8_8, 1),
114
	FMT_16_BIT(V_038004_COLOR_5_6_5, 1),
114
	FMT_16_BIT(V_038004_COLOR_5_6_5, 1),
115
	FMT_16_BIT(V_038004_COLOR_6_5_5, 1),
115
	FMT_16_BIT(V_038004_COLOR_6_5_5, 1),
116
	FMT_16_BIT(V_038004_COLOR_1_5_5_5, 1),
116
	FMT_16_BIT(V_038004_COLOR_1_5_5_5, 1),
117
	FMT_16_BIT(V_038004_COLOR_4_4_4_4, 1),
117
	FMT_16_BIT(V_038004_COLOR_4_4_4_4, 1),
118
	FMT_16_BIT(V_038004_COLOR_5_5_5_1, 1),
118
	FMT_16_BIT(V_038004_COLOR_5_5_5_1, 1),
119
 
119
 
120
	/* 24-bit */
120
	/* 24-bit */
121
	FMT_24_BIT(V_038004_FMT_8_8_8),
121
	FMT_24_BIT(V_038004_FMT_8_8_8),
122
 
122
 
123
	/* 32-bit */
123
	/* 32-bit */
124
	FMT_32_BIT(V_038004_COLOR_32, 1),
124
	FMT_32_BIT(V_038004_COLOR_32, 1),
125
	FMT_32_BIT(V_038004_COLOR_32_FLOAT, 1),
125
	FMT_32_BIT(V_038004_COLOR_32_FLOAT, 1),
126
	FMT_32_BIT(V_038004_COLOR_16_16, 1),
126
	FMT_32_BIT(V_038004_COLOR_16_16, 1),
127
	FMT_32_BIT(V_038004_COLOR_16_16_FLOAT, 1),
127
	FMT_32_BIT(V_038004_COLOR_16_16_FLOAT, 1),
128
	FMT_32_BIT(V_038004_COLOR_8_24, 1),
128
	FMT_32_BIT(V_038004_COLOR_8_24, 1),
129
	FMT_32_BIT(V_038004_COLOR_8_24_FLOAT, 1),
129
	FMT_32_BIT(V_038004_COLOR_8_24_FLOAT, 1),
130
	FMT_32_BIT(V_038004_COLOR_24_8, 1),
130
	FMT_32_BIT(V_038004_COLOR_24_8, 1),
131
	FMT_32_BIT(V_038004_COLOR_24_8_FLOAT, 1),
131
	FMT_32_BIT(V_038004_COLOR_24_8_FLOAT, 1),
132
	FMT_32_BIT(V_038004_COLOR_10_11_11, 1),
132
	FMT_32_BIT(V_038004_COLOR_10_11_11, 1),
133
	FMT_32_BIT(V_038004_COLOR_10_11_11_FLOAT, 1),
133
	FMT_32_BIT(V_038004_COLOR_10_11_11_FLOAT, 1),
134
	FMT_32_BIT(V_038004_COLOR_11_11_10, 1),
134
	FMT_32_BIT(V_038004_COLOR_11_11_10, 1),
135
	FMT_32_BIT(V_038004_COLOR_11_11_10_FLOAT, 1),
135
	FMT_32_BIT(V_038004_COLOR_11_11_10_FLOAT, 1),
136
	FMT_32_BIT(V_038004_COLOR_2_10_10_10, 1),
136
	FMT_32_BIT(V_038004_COLOR_2_10_10_10, 1),
137
	FMT_32_BIT(V_038004_COLOR_8_8_8_8, 1),
137
	FMT_32_BIT(V_038004_COLOR_8_8_8_8, 1),
138
	FMT_32_BIT(V_038004_COLOR_10_10_10_2, 1),
138
	FMT_32_BIT(V_038004_COLOR_10_10_10_2, 1),
139
	FMT_32_BIT(V_038004_FMT_5_9_9_9_SHAREDEXP, 0),
139
	FMT_32_BIT(V_038004_FMT_5_9_9_9_SHAREDEXP, 0),
140
	FMT_32_BIT(V_038004_FMT_32_AS_8, 0),
140
	FMT_32_BIT(V_038004_FMT_32_AS_8, 0),
141
	FMT_32_BIT(V_038004_FMT_32_AS_8_8, 0),
141
	FMT_32_BIT(V_038004_FMT_32_AS_8_8, 0),
142
 
142
 
143
	/* 48-bit */
143
	/* 48-bit */
144
	FMT_48_BIT(V_038004_FMT_16_16_16),
144
	FMT_48_BIT(V_038004_FMT_16_16_16),
145
	FMT_48_BIT(V_038004_FMT_16_16_16_FLOAT),
145
	FMT_48_BIT(V_038004_FMT_16_16_16_FLOAT),
146
 
146
 
147
	/* 64-bit */
147
	/* 64-bit */
148
	FMT_64_BIT(V_038004_COLOR_X24_8_32_FLOAT, 1),
148
	FMT_64_BIT(V_038004_COLOR_X24_8_32_FLOAT, 1),
149
	FMT_64_BIT(V_038004_COLOR_32_32, 1),
149
	FMT_64_BIT(V_038004_COLOR_32_32, 1),
150
	FMT_64_BIT(V_038004_COLOR_32_32_FLOAT, 1),
150
	FMT_64_BIT(V_038004_COLOR_32_32_FLOAT, 1),
151
	FMT_64_BIT(V_038004_COLOR_16_16_16_16, 1),
151
	FMT_64_BIT(V_038004_COLOR_16_16_16_16, 1),
152
	FMT_64_BIT(V_038004_COLOR_16_16_16_16_FLOAT, 1),
152
	FMT_64_BIT(V_038004_COLOR_16_16_16_16_FLOAT, 1),
153
 
153
 
154
	FMT_96_BIT(V_038004_FMT_32_32_32),
154
	FMT_96_BIT(V_038004_FMT_32_32_32),
155
	FMT_96_BIT(V_038004_FMT_32_32_32_FLOAT),
155
	FMT_96_BIT(V_038004_FMT_32_32_32_FLOAT),
156
 
156
 
157
	/* 128-bit */
157
	/* 128-bit */
158
	FMT_128_BIT(V_038004_COLOR_32_32_32_32, 1),
158
	FMT_128_BIT(V_038004_COLOR_32_32_32_32, 1),
159
	FMT_128_BIT(V_038004_COLOR_32_32_32_32_FLOAT, 1),
159
	FMT_128_BIT(V_038004_COLOR_32_32_32_32_FLOAT, 1),
160
 
160
 
161
	[V_038004_FMT_GB_GR] = { 2, 1, 4, 0 },
161
	[V_038004_FMT_GB_GR] = { 2, 1, 4, 0 },
162
	[V_038004_FMT_BG_RG] = { 2, 1, 4, 0 },
162
	[V_038004_FMT_BG_RG] = { 2, 1, 4, 0 },
163
 
163
 
164
	/* block compressed formats */
164
	/* block compressed formats */
165
	[V_038004_FMT_BC1] = { 4, 4, 8, 0 },
165
	[V_038004_FMT_BC1] = { 4, 4, 8, 0 },
166
	[V_038004_FMT_BC2] = { 4, 4, 16, 0 },
166
	[V_038004_FMT_BC2] = { 4, 4, 16, 0 },
167
	[V_038004_FMT_BC3] = { 4, 4, 16, 0 },
167
	[V_038004_FMT_BC3] = { 4, 4, 16, 0 },
168
	[V_038004_FMT_BC4] = { 4, 4, 8, 0 },
168
	[V_038004_FMT_BC4] = { 4, 4, 8, 0 },
169
	[V_038004_FMT_BC5] = { 4, 4, 16, 0},
169
	[V_038004_FMT_BC5] = { 4, 4, 16, 0},
170
	[V_038004_FMT_BC6] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
170
	[V_038004_FMT_BC6] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
171
	[V_038004_FMT_BC7] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
171
	[V_038004_FMT_BC7] = { 4, 4, 16, 0, CHIP_CEDAR}, /* Evergreen-only */
172
 
172
 
173
	/* The other Evergreen formats */
173
	/* The other Evergreen formats */
174
	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
174
	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
175
};
175
};
176
 
176
 
177
bool r600_fmt_is_valid_color(u32 format)
177
bool r600_fmt_is_valid_color(u32 format)
178
{
178
{
179
	if (format >= ARRAY_SIZE(color_formats_table))
179
	if (format >= ARRAY_SIZE(color_formats_table))
180
		return false;
180
		return false;
181
 
181
 
182
	if (color_formats_table[format].valid_color)
182
	if (color_formats_table[format].valid_color)
183
		return true;
183
		return true;
184
 
184
 
185
	return false;
185
	return false;
186
}
186
}
187
 
187
 
188
bool r600_fmt_is_valid_texture(u32 format, enum radeon_family family)
188
bool r600_fmt_is_valid_texture(u32 format, enum radeon_family family)
189
{
189
{
190
	if (format >= ARRAY_SIZE(color_formats_table))
190
	if (format >= ARRAY_SIZE(color_formats_table))
191
		return false;
191
		return false;
192
 
192
 
193
	if (family < color_formats_table[format].min_family)
193
	if (family < color_formats_table[format].min_family)
194
		return false;
194
		return false;
195
 
195
 
196
	if (color_formats_table[format].blockwidth > 0)
196
	if (color_formats_table[format].blockwidth > 0)
197
		return true;
197
		return true;
198
 
198
 
199
	return false;
199
	return false;
200
}
200
}
201
 
201
 
202
int r600_fmt_get_blocksize(u32 format)
202
int r600_fmt_get_blocksize(u32 format)
203
{
203
{
204
	if (format >= ARRAY_SIZE(color_formats_table))
204
	if (format >= ARRAY_SIZE(color_formats_table))
205
		return 0;
205
		return 0;
206
 
206
 
207
	return color_formats_table[format].blocksize;
207
	return color_formats_table[format].blocksize;
208
}
208
}
209
 
209
 
210
int r600_fmt_get_nblocksx(u32 format, u32 w)
210
int r600_fmt_get_nblocksx(u32 format, u32 w)
211
{
211
{
212
	unsigned bw;
212
	unsigned bw;
213
 
213
 
214
	if (format >= ARRAY_SIZE(color_formats_table))
214
	if (format >= ARRAY_SIZE(color_formats_table))
215
		return 0;
215
		return 0;
216
 
216
 
217
	bw = color_formats_table[format].blockwidth;
217
	bw = color_formats_table[format].blockwidth;
218
	if (bw == 0)
218
	if (bw == 0)
219
		return 0;
219
		return 0;
220
 
220
 
221
	return (w + bw - 1) / bw;
221
	return (w + bw - 1) / bw;
222
}
222
}
223
 
223
 
224
int r600_fmt_get_nblocksy(u32 format, u32 h)
224
int r600_fmt_get_nblocksy(u32 format, u32 h)
225
{
225
{
226
	unsigned bh;
226
	unsigned bh;
227
 
227
 
228
	if (format >= ARRAY_SIZE(color_formats_table))
228
	if (format >= ARRAY_SIZE(color_formats_table))
229
		return 0;
229
		return 0;
230
 
230
 
231
	bh = color_formats_table[format].blockheight;
231
	bh = color_formats_table[format].blockheight;
232
	if (bh == 0)
232
	if (bh == 0)
233
		return 0;
233
		return 0;
234
 
234
 
235
	return (h + bh - 1) / bh;
235
	return (h + bh - 1) / bh;
236
}
236
}
237
 
237
 
238
struct array_mode_checker {
238
struct array_mode_checker {
239
	int array_mode;
239
	int array_mode;
240
	u32 group_size;
240
	u32 group_size;
241
	u32 nbanks;
241
	u32 nbanks;
242
	u32 npipes;
242
	u32 npipes;
243
	u32 nsamples;
243
	u32 nsamples;
244
	u32 blocksize;
244
	u32 blocksize;
245
};
245
};
246
 
246
 
247
/* returns alignment in pixels for pitch/height/depth and bytes for base */
247
/* returns alignment in pixels for pitch/height/depth and bytes for base */
248
static int r600_get_array_mode_alignment(struct array_mode_checker *values,
248
static int r600_get_array_mode_alignment(struct array_mode_checker *values,
249
						u32 *pitch_align,
249
						u32 *pitch_align,
250
						u32 *height_align,
250
						u32 *height_align,
251
						u32 *depth_align,
251
						u32 *depth_align,
252
						u64 *base_align)
252
						u64 *base_align)
253
{
253
{
254
	u32 tile_width = 8;
254
	u32 tile_width = 8;
255
	u32 tile_height = 8;
255
	u32 tile_height = 8;
256
	u32 macro_tile_width = values->nbanks;
256
	u32 macro_tile_width = values->nbanks;
257
	u32 macro_tile_height = values->npipes;
257
	u32 macro_tile_height = values->npipes;
258
	u32 tile_bytes = tile_width * tile_height * values->blocksize * values->nsamples;
258
	u32 tile_bytes = tile_width * tile_height * values->blocksize * values->nsamples;
259
	u32 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
259
	u32 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
260
 
260
 
261
	switch (values->array_mode) {
261
	switch (values->array_mode) {
262
	case ARRAY_LINEAR_GENERAL:
262
	case ARRAY_LINEAR_GENERAL:
263
		/* technically tile_width/_height for pitch/height */
263
		/* technically tile_width/_height for pitch/height */
264
		*pitch_align = 1; /* tile_width */
264
		*pitch_align = 1; /* tile_width */
265
		*height_align = 1; /* tile_height */
265
		*height_align = 1; /* tile_height */
266
		*depth_align = 1;
266
		*depth_align = 1;
267
		*base_align = 1;
267
		*base_align = 1;
268
		break;
268
		break;
269
	case ARRAY_LINEAR_ALIGNED:
269
	case ARRAY_LINEAR_ALIGNED:
270
		*pitch_align = max((u32)64, (u32)(values->group_size / values->blocksize));
270
		*pitch_align = max((u32)64, (u32)(values->group_size / values->blocksize));
271
		*height_align = 1;
271
		*height_align = 1;
272
		*depth_align = 1;
272
		*depth_align = 1;
273
		*base_align = values->group_size;
273
		*base_align = values->group_size;
274
		break;
274
		break;
275
	case ARRAY_1D_TILED_THIN1:
275
	case ARRAY_1D_TILED_THIN1:
276
		*pitch_align = max((u32)tile_width,
276
		*pitch_align = max((u32)tile_width,
277
				   (u32)(values->group_size /
277
				   (u32)(values->group_size /
278
					 (tile_height * values->blocksize * values->nsamples)));
278
					 (tile_height * values->blocksize * values->nsamples)));
279
		*height_align = tile_height;
279
		*height_align = tile_height;
280
		*depth_align = 1;
280
		*depth_align = 1;
281
		*base_align = values->group_size;
281
		*base_align = values->group_size;
282
		break;
282
		break;
283
	case ARRAY_2D_TILED_THIN1:
283
	case ARRAY_2D_TILED_THIN1:
284
		*pitch_align = max((u32)macro_tile_width * tile_width,
284
		*pitch_align = max((u32)macro_tile_width * tile_width,
285
				(u32)((values->group_size * values->nbanks) /
285
				(u32)((values->group_size * values->nbanks) /
286
				(values->blocksize * values->nsamples * tile_width)));
286
				(values->blocksize * values->nsamples * tile_width)));
287
		*height_align = macro_tile_height * tile_height;
287
		*height_align = macro_tile_height * tile_height;
288
		*depth_align = 1;
288
		*depth_align = 1;
289
		*base_align = max(macro_tile_bytes,
289
		*base_align = max(macro_tile_bytes,
290
				  (*pitch_align) * values->blocksize * (*height_align) * values->nsamples);
290
				  (*pitch_align) * values->blocksize * (*height_align) * values->nsamples);
291
		break;
291
		break;
292
	default:
292
	default:
293
		return -EINVAL;
293
		return -EINVAL;
294
	}
294
	}
295
 
295
 
296
	return 0;
296
	return 0;
297
}
297
}
298
 
298
 
299
static void r600_cs_track_init(struct r600_cs_track *track)
299
static void r600_cs_track_init(struct r600_cs_track *track)
300
{
300
{
301
	int i;
301
	int i;
302
 
302
 
303
	/* assume DX9 mode */
303
	/* assume DX9 mode */
304
	track->sq_config = DX9_CONSTS;
304
	track->sq_config = DX9_CONSTS;
305
	for (i = 0; i < 8; i++) {
305
	for (i = 0; i < 8; i++) {
306
		track->cb_color_base_last[i] = 0;
306
		track->cb_color_base_last[i] = 0;
307
		track->cb_color_size[i] = 0;
307
		track->cb_color_size[i] = 0;
308
		track->cb_color_size_idx[i] = 0;
308
		track->cb_color_size_idx[i] = 0;
309
		track->cb_color_info[i] = 0;
309
		track->cb_color_info[i] = 0;
310
		track->cb_color_view[i] = 0xFFFFFFFF;
310
		track->cb_color_view[i] = 0xFFFFFFFF;
311
		track->cb_color_bo[i] = NULL;
311
		track->cb_color_bo[i] = NULL;
312
		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
312
		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
313
		track->cb_color_bo_mc[i] = 0xFFFFFFFF;
313
		track->cb_color_bo_mc[i] = 0xFFFFFFFF;
314
		track->cb_color_frag_bo[i] = NULL;
314
		track->cb_color_frag_bo[i] = NULL;
315
		track->cb_color_frag_offset[i] = 0xFFFFFFFF;
315
		track->cb_color_frag_offset[i] = 0xFFFFFFFF;
316
		track->cb_color_tile_bo[i] = NULL;
316
		track->cb_color_tile_bo[i] = NULL;
317
		track->cb_color_tile_offset[i] = 0xFFFFFFFF;
317
		track->cb_color_tile_offset[i] = 0xFFFFFFFF;
318
		track->cb_color_mask[i] = 0xFFFFFFFF;
318
		track->cb_color_mask[i] = 0xFFFFFFFF;
319
	}
319
	}
320
	track->is_resolve = false;
320
	track->is_resolve = false;
321
	track->nsamples = 16;
321
	track->nsamples = 16;
322
	track->log_nsamples = 4;
322
	track->log_nsamples = 4;
323
	track->cb_target_mask = 0xFFFFFFFF;
323
	track->cb_target_mask = 0xFFFFFFFF;
324
	track->cb_shader_mask = 0xFFFFFFFF;
324
	track->cb_shader_mask = 0xFFFFFFFF;
325
	track->cb_dirty = true;
325
	track->cb_dirty = true;
326
	track->db_bo = NULL;
326
	track->db_bo = NULL;
327
	track->db_bo_mc = 0xFFFFFFFF;
327
	track->db_bo_mc = 0xFFFFFFFF;
328
	/* assume the biggest format and that htile is enabled */
328
	/* assume the biggest format and that htile is enabled */
329
	track->db_depth_info = 7 | (1 << 25);
329
	track->db_depth_info = 7 | (1 << 25);
330
	track->db_depth_view = 0xFFFFC000;
330
	track->db_depth_view = 0xFFFFC000;
331
	track->db_depth_size = 0xFFFFFFFF;
331
	track->db_depth_size = 0xFFFFFFFF;
332
	track->db_depth_size_idx = 0;
332
	track->db_depth_size_idx = 0;
333
	track->db_depth_control = 0xFFFFFFFF;
333
	track->db_depth_control = 0xFFFFFFFF;
334
	track->db_dirty = true;
334
	track->db_dirty = true;
335
	track->htile_bo = NULL;
335
	track->htile_bo = NULL;
336
	track->htile_offset = 0xFFFFFFFF;
336
	track->htile_offset = 0xFFFFFFFF;
337
	track->htile_surface = 0;
337
	track->htile_surface = 0;
338
 
338
 
339
	for (i = 0; i < 4; i++) {
339
	for (i = 0; i < 4; i++) {
340
		track->vgt_strmout_size[i] = 0;
340
		track->vgt_strmout_size[i] = 0;
341
		track->vgt_strmout_bo[i] = NULL;
341
		track->vgt_strmout_bo[i] = NULL;
342
		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
342
		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
343
		track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF;
343
		track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF;
344
	}
344
	}
345
	track->streamout_dirty = true;
345
	track->streamout_dirty = true;
346
	track->sx_misc_kill_all_prims = false;
346
	track->sx_misc_kill_all_prims = false;
347
}
347
}
348
 
348
 
349
static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
349
static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
350
{
350
{
351
	struct r600_cs_track *track = p->track;
351
	struct r600_cs_track *track = p->track;
352
	u32 slice_tile_max, size, tmp;
352
	u32 slice_tile_max, size, tmp;
353
	u32 height, height_align, pitch, pitch_align, depth_align;
353
	u32 height, height_align, pitch, pitch_align, depth_align;
354
	u64 base_offset, base_align;
354
	u64 base_offset, base_align;
355
	struct array_mode_checker array_check;
355
	struct array_mode_checker array_check;
356
	volatile u32 *ib = p->ib.ptr;
356
	volatile u32 *ib = p->ib.ptr;
357
	unsigned array_mode;
357
	unsigned array_mode;
358
	u32 format;
358
	u32 format;
359
	/* When resolve is used, the second colorbuffer has always 1 sample. */
359
	/* When resolve is used, the second colorbuffer has always 1 sample. */
360
	unsigned nsamples = track->is_resolve && i == 1 ? 1 : track->nsamples;
360
	unsigned nsamples = track->is_resolve && i == 1 ? 1 : track->nsamples;
361
 
361
 
362
	size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i];
362
	size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i];
363
	format = G_0280A0_FORMAT(track->cb_color_info[i]);
363
	format = G_0280A0_FORMAT(track->cb_color_info[i]);
364
	if (!r600_fmt_is_valid_color(format)) {
364
	if (!r600_fmt_is_valid_color(format)) {
365
		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n",
365
		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n",
366
			 __func__, __LINE__, format,
366
			 __func__, __LINE__, format,
367
			i, track->cb_color_info[i]);
367
			i, track->cb_color_info[i]);
368
		return -EINVAL;
368
		return -EINVAL;
369
	}
369
	}
370
	/* pitch in pixels */
370
	/* pitch in pixels */
371
	pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) * 8;
371
	pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) * 8;
372
	slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
372
	slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
373
	slice_tile_max *= 64;
373
	slice_tile_max *= 64;
374
	height = slice_tile_max / pitch;
374
	height = slice_tile_max / pitch;
375
	if (height > 8192)
375
	if (height > 8192)
376
		height = 8192;
376
		height = 8192;
377
	array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]);
377
	array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]);
378
 
378
 
379
	base_offset = track->cb_color_bo_mc[i] + track->cb_color_bo_offset[i];
379
	base_offset = track->cb_color_bo_mc[i] + track->cb_color_bo_offset[i];
380
	array_check.array_mode = array_mode;
380
	array_check.array_mode = array_mode;
381
	array_check.group_size = track->group_size;
381
	array_check.group_size = track->group_size;
382
	array_check.nbanks = track->nbanks;
382
	array_check.nbanks = track->nbanks;
383
	array_check.npipes = track->npipes;
383
	array_check.npipes = track->npipes;
384
	array_check.nsamples = nsamples;
384
	array_check.nsamples = nsamples;
385
	array_check.blocksize = r600_fmt_get_blocksize(format);
385
	array_check.blocksize = r600_fmt_get_blocksize(format);
386
	if (r600_get_array_mode_alignment(&array_check,
386
	if (r600_get_array_mode_alignment(&array_check,
387
					  &pitch_align, &height_align, &depth_align, &base_align)) {
387
					  &pitch_align, &height_align, &depth_align, &base_align)) {
388
		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
388
		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
389
			 G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
389
			 G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
390
			 track->cb_color_info[i]);
390
			 track->cb_color_info[i]);
391
		return -EINVAL;
391
		return -EINVAL;
392
	}
392
	}
393
	switch (array_mode) {
393
	switch (array_mode) {
394
	case V_0280A0_ARRAY_LINEAR_GENERAL:
394
	case V_0280A0_ARRAY_LINEAR_GENERAL:
395
		break;
395
		break;
396
	case V_0280A0_ARRAY_LINEAR_ALIGNED:
396
	case V_0280A0_ARRAY_LINEAR_ALIGNED:
397
		break;
397
		break;
398
	case V_0280A0_ARRAY_1D_TILED_THIN1:
398
	case V_0280A0_ARRAY_1D_TILED_THIN1:
399
		/* avoid breaking userspace */
399
		/* avoid breaking userspace */
400
		if (height > 7)
400
		if (height > 7)
401
			height &= ~0x7;
401
			height &= ~0x7;
402
		break;
402
		break;
403
	case V_0280A0_ARRAY_2D_TILED_THIN1:
403
	case V_0280A0_ARRAY_2D_TILED_THIN1:
404
		break;
404
		break;
405
	default:
405
	default:
406
		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
406
		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
407
			G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
407
			G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
408
			track->cb_color_info[i]);
408
			track->cb_color_info[i]);
409
		return -EINVAL;
409
		return -EINVAL;
410
	}
410
	}
411
 
411
 
412
	if (!IS_ALIGNED(pitch, pitch_align)) {
412
	if (!IS_ALIGNED(pitch, pitch_align)) {
413
		dev_warn(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n",
413
		dev_warn(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n",
414
			 __func__, __LINE__, pitch, pitch_align, array_mode);
414
			 __func__, __LINE__, pitch, pitch_align, array_mode);
415
		return -EINVAL;
415
		return -EINVAL;
416
	}
416
	}
417
	if (!IS_ALIGNED(height, height_align)) {
417
	if (!IS_ALIGNED(height, height_align)) {
418
		dev_warn(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n",
418
		dev_warn(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n",
419
			 __func__, __LINE__, height, height_align, array_mode);
419
			 __func__, __LINE__, height, height_align, array_mode);
420
		return -EINVAL;
420
		return -EINVAL;
421
	}
421
	}
422
	if (!IS_ALIGNED(base_offset, base_align)) {
422
	if (!IS_ALIGNED(base_offset, base_align)) {
423
		dev_warn(p->dev, "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i,
423
		dev_warn(p->dev, "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i,
424
			 base_offset, base_align, array_mode);
424
			 base_offset, base_align, array_mode);
425
		return -EINVAL;
425
		return -EINVAL;
426
	}
426
	}
427
 
427
 
428
	/* check offset */
428
	/* check offset */
429
	tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) *
429
	tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) *
430
	      r600_fmt_get_blocksize(format) * nsamples;
430
	      r600_fmt_get_blocksize(format) * nsamples;
431
	switch (array_mode) {
431
	switch (array_mode) {
432
	default:
432
	default:
433
	case V_0280A0_ARRAY_LINEAR_GENERAL:
433
	case V_0280A0_ARRAY_LINEAR_GENERAL:
434
	case V_0280A0_ARRAY_LINEAR_ALIGNED:
434
	case V_0280A0_ARRAY_LINEAR_ALIGNED:
435
		tmp += track->cb_color_view[i] & 0xFF;
435
		tmp += track->cb_color_view[i] & 0xFF;
436
		break;
436
		break;
437
	case V_0280A0_ARRAY_1D_TILED_THIN1:
437
	case V_0280A0_ARRAY_1D_TILED_THIN1:
438
	case V_0280A0_ARRAY_2D_TILED_THIN1:
438
	case V_0280A0_ARRAY_2D_TILED_THIN1:
439
		tmp += G_028080_SLICE_MAX(track->cb_color_view[i]) * tmp;
439
		tmp += G_028080_SLICE_MAX(track->cb_color_view[i]) * tmp;
440
		break;
440
		break;
441
	}
441
	}
442
	if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
442
	if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
443
		if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
443
		if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
444
			/* the initial DDX does bad things with the CB size occasionally */
444
			/* the initial DDX does bad things with the CB size occasionally */
445
			/* it rounds up height too far for slice tile max but the BO is smaller */
445
			/* it rounds up height too far for slice tile max but the BO is smaller */
446
			/* r600c,g also seem to flush at bad times in some apps resulting in
446
			/* r600c,g also seem to flush at bad times in some apps resulting in
447
			 * bogus values here. So for linear just allow anything to avoid breaking
447
			 * bogus values here. So for linear just allow anything to avoid breaking
448
			 * broken userspace.
448
			 * broken userspace.
449
			 */
449
			 */
450
		} else {
450
		} else {
451
			dev_warn(p->dev, "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n",
451
			dev_warn(p->dev, "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n",
452
				 __func__, i, array_mode,
452
				 __func__, i, array_mode,
453
				 track->cb_color_bo_offset[i], tmp,
453
				 track->cb_color_bo_offset[i], tmp,
454
				 radeon_bo_size(track->cb_color_bo[i]),
454
				 radeon_bo_size(track->cb_color_bo[i]),
455
				 pitch, height, r600_fmt_get_nblocksx(format, pitch),
455
				 pitch, height, r600_fmt_get_nblocksx(format, pitch),
456
				 r600_fmt_get_nblocksy(format, height),
456
				 r600_fmt_get_nblocksy(format, height),
457
				 r600_fmt_get_blocksize(format));
457
				 r600_fmt_get_blocksize(format));
458
			return -EINVAL;
458
			return -EINVAL;
459
		}
459
		}
460
	}
460
	}
461
	/* limit max tile */
461
	/* limit max tile */
462
	tmp = (height * pitch) >> 6;
462
	tmp = (height * pitch) >> 6;
463
	if (tmp < slice_tile_max)
463
	if (tmp < slice_tile_max)
464
		slice_tile_max = tmp;
464
		slice_tile_max = tmp;
465
	tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) |
465
	tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) |
466
		S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
466
		S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
467
	ib[track->cb_color_size_idx[i]] = tmp;
467
	ib[track->cb_color_size_idx[i]] = tmp;
468
 
468
 
469
	/* FMASK/CMASK */
469
	/* FMASK/CMASK */
470
	switch (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
470
	switch (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
471
	case V_0280A0_TILE_DISABLE:
471
	case V_0280A0_TILE_DISABLE:
472
		break;
472
		break;
473
	case V_0280A0_FRAG_ENABLE:
473
	case V_0280A0_FRAG_ENABLE:
474
		if (track->nsamples > 1) {
474
		if (track->nsamples > 1) {
475
			uint32_t tile_max = G_028100_FMASK_TILE_MAX(track->cb_color_mask[i]);
475
			uint32_t tile_max = G_028100_FMASK_TILE_MAX(track->cb_color_mask[i]);
476
			/* the tile size is 8x8, but the size is in units of bits.
476
			/* the tile size is 8x8, but the size is in units of bits.
477
			 * for bytes, do just * 8. */
477
			 * for bytes, do just * 8. */
478
			uint32_t bytes = track->nsamples * track->log_nsamples * 8 * (tile_max + 1);
478
			uint32_t bytes = track->nsamples * track->log_nsamples * 8 * (tile_max + 1);
479
 
479
 
480
			if (bytes + track->cb_color_frag_offset[i] >
480
			if (bytes + track->cb_color_frag_offset[i] >
481
			    radeon_bo_size(track->cb_color_frag_bo[i])) {
481
			    radeon_bo_size(track->cb_color_frag_bo[i])) {
482
				dev_warn(p->dev, "%s FMASK_TILE_MAX too large "
482
				dev_warn(p->dev, "%s FMASK_TILE_MAX too large "
483
					 "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
483
					 "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
484
					 __func__, tile_max, bytes,
484
					 __func__, tile_max, bytes,
485
					 track->cb_color_frag_offset[i],
485
					 track->cb_color_frag_offset[i],
486
					 radeon_bo_size(track->cb_color_frag_bo[i]));
486
					 radeon_bo_size(track->cb_color_frag_bo[i]));
487
				return -EINVAL;
487
				return -EINVAL;
488
			}
488
			}
489
		}
489
		}
490
		/* fall through */
490
		/* fall through */
491
	case V_0280A0_CLEAR_ENABLE:
491
	case V_0280A0_CLEAR_ENABLE:
492
	{
492
	{
493
		uint32_t block_max = G_028100_CMASK_BLOCK_MAX(track->cb_color_mask[i]);
493
		uint32_t block_max = G_028100_CMASK_BLOCK_MAX(track->cb_color_mask[i]);
494
		/* One block = 128x128 pixels, one 8x8 tile has 4 bits..
494
		/* One block = 128x128 pixels, one 8x8 tile has 4 bits..
495
		 * (128*128) / (8*8) / 2 = 128 bytes per block. */
495
		 * (128*128) / (8*8) / 2 = 128 bytes per block. */
496
		uint32_t bytes = (block_max + 1) * 128;
496
		uint32_t bytes = (block_max + 1) * 128;
497
 
497
 
498
		if (bytes + track->cb_color_tile_offset[i] >
498
		if (bytes + track->cb_color_tile_offset[i] >
499
		    radeon_bo_size(track->cb_color_tile_bo[i])) {
499
		    radeon_bo_size(track->cb_color_tile_bo[i])) {
500
			dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large "
500
			dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large "
501
				 "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
501
				 "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
502
				 __func__, block_max, bytes,
502
				 __func__, block_max, bytes,
503
				 track->cb_color_tile_offset[i],
503
				 track->cb_color_tile_offset[i],
504
				 radeon_bo_size(track->cb_color_tile_bo[i]));
504
				 radeon_bo_size(track->cb_color_tile_bo[i]));
505
			return -EINVAL;
505
			return -EINVAL;
506
		}
506
		}
507
		break;
507
		break;
508
	}
508
	}
509
	default:
509
	default:
510
		dev_warn(p->dev, "%s invalid tile mode\n", __func__);
510
		dev_warn(p->dev, "%s invalid tile mode\n", __func__);
511
		return -EINVAL;
511
		return -EINVAL;
512
	}
512
	}
513
	return 0;
513
	return 0;
514
}
514
}
515
 
515
 
516
static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
516
static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
517
{
517
{
518
	struct r600_cs_track *track = p->track;
518
	struct r600_cs_track *track = p->track;
519
	u32 nviews, bpe, ntiles, size, slice_tile_max, tmp;
519
	u32 nviews, bpe, ntiles, size, slice_tile_max, tmp;
520
	u32 height_align, pitch_align, depth_align;
520
	u32 height_align, pitch_align, depth_align;
521
	u32 pitch = 8192;
521
	u32 pitch = 8192;
522
	u32 height = 8192;
522
	u32 height = 8192;
523
	u64 base_offset, base_align;
523
	u64 base_offset, base_align;
524
	struct array_mode_checker array_check;
524
	struct array_mode_checker array_check;
525
	int array_mode;
525
	int array_mode;
526
	volatile u32 *ib = p->ib.ptr;
526
	volatile u32 *ib = p->ib.ptr;
527
 
527
 
528
 
528
 
529
	if (track->db_bo == NULL) {
529
	if (track->db_bo == NULL) {
530
		dev_warn(p->dev, "z/stencil with no depth buffer\n");
530
		dev_warn(p->dev, "z/stencil with no depth buffer\n");
531
		return -EINVAL;
531
		return -EINVAL;
532
	}
532
	}
533
	switch (G_028010_FORMAT(track->db_depth_info)) {
533
	switch (G_028010_FORMAT(track->db_depth_info)) {
534
	case V_028010_DEPTH_16:
534
	case V_028010_DEPTH_16:
535
		bpe = 2;
535
		bpe = 2;
536
		break;
536
		break;
537
	case V_028010_DEPTH_X8_24:
537
	case V_028010_DEPTH_X8_24:
538
	case V_028010_DEPTH_8_24:
538
	case V_028010_DEPTH_8_24:
539
	case V_028010_DEPTH_X8_24_FLOAT:
539
	case V_028010_DEPTH_X8_24_FLOAT:
540
	case V_028010_DEPTH_8_24_FLOAT:
540
	case V_028010_DEPTH_8_24_FLOAT:
541
	case V_028010_DEPTH_32_FLOAT:
541
	case V_028010_DEPTH_32_FLOAT:
542
		bpe = 4;
542
		bpe = 4;
543
		break;
543
		break;
544
	case V_028010_DEPTH_X24_8_32_FLOAT:
544
	case V_028010_DEPTH_X24_8_32_FLOAT:
545
		bpe = 8;
545
		bpe = 8;
546
		break;
546
		break;
547
	default:
547
	default:
548
		dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info));
548
		dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info));
549
		return -EINVAL;
549
		return -EINVAL;
550
	}
550
	}
551
	if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
551
	if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
552
		if (!track->db_depth_size_idx) {
552
		if (!track->db_depth_size_idx) {
553
			dev_warn(p->dev, "z/stencil buffer size not set\n");
553
			dev_warn(p->dev, "z/stencil buffer size not set\n");
554
			return -EINVAL;
554
			return -EINVAL;
555
		}
555
		}
556
		tmp = radeon_bo_size(track->db_bo) - track->db_offset;
556
		tmp = radeon_bo_size(track->db_bo) - track->db_offset;
557
		tmp = (tmp / bpe) >> 6;
557
		tmp = (tmp / bpe) >> 6;
558
		if (!tmp) {
558
		if (!tmp) {
559
			dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n",
559
			dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n",
560
					track->db_depth_size, bpe, track->db_offset,
560
					track->db_depth_size, bpe, track->db_offset,
561
					radeon_bo_size(track->db_bo));
561
					radeon_bo_size(track->db_bo));
562
			return -EINVAL;
562
			return -EINVAL;
563
		}
563
		}
564
		ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
564
		ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
565
	} else {
565
	} else {
566
		size = radeon_bo_size(track->db_bo);
566
		size = radeon_bo_size(track->db_bo);
567
		/* pitch in pixels */
567
		/* pitch in pixels */
568
		pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8;
568
		pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8;
569
		slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
569
		slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
570
		slice_tile_max *= 64;
570
		slice_tile_max *= 64;
571
		height = slice_tile_max / pitch;
571
		height = slice_tile_max / pitch;
572
		if (height > 8192)
572
		if (height > 8192)
573
			height = 8192;
573
			height = 8192;
574
		base_offset = track->db_bo_mc + track->db_offset;
574
		base_offset = track->db_bo_mc + track->db_offset;
575
		array_mode = G_028010_ARRAY_MODE(track->db_depth_info);
575
		array_mode = G_028010_ARRAY_MODE(track->db_depth_info);
576
		array_check.array_mode = array_mode;
576
		array_check.array_mode = array_mode;
577
		array_check.group_size = track->group_size;
577
		array_check.group_size = track->group_size;
578
		array_check.nbanks = track->nbanks;
578
		array_check.nbanks = track->nbanks;
579
		array_check.npipes = track->npipes;
579
		array_check.npipes = track->npipes;
580
		array_check.nsamples = track->nsamples;
580
		array_check.nsamples = track->nsamples;
581
		array_check.blocksize = bpe;
581
		array_check.blocksize = bpe;
582
		if (r600_get_array_mode_alignment(&array_check,
582
		if (r600_get_array_mode_alignment(&array_check,
583
					&pitch_align, &height_align, &depth_align, &base_align)) {
583
					&pitch_align, &height_align, &depth_align, &base_align)) {
584
			dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
584
			dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
585
					G_028010_ARRAY_MODE(track->db_depth_info),
585
					G_028010_ARRAY_MODE(track->db_depth_info),
586
					track->db_depth_info);
586
					track->db_depth_info);
587
			return -EINVAL;
587
			return -EINVAL;
588
		}
588
		}
589
		switch (array_mode) {
589
		switch (array_mode) {
590
		case V_028010_ARRAY_1D_TILED_THIN1:
590
		case V_028010_ARRAY_1D_TILED_THIN1:
591
			/* don't break userspace */
591
			/* don't break userspace */
592
			height &= ~0x7;
592
			height &= ~0x7;
593
			break;
593
			break;
594
		case V_028010_ARRAY_2D_TILED_THIN1:
594
		case V_028010_ARRAY_2D_TILED_THIN1:
595
			break;
595
			break;
596
		default:
596
		default:
597
			dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
597
			dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
598
					G_028010_ARRAY_MODE(track->db_depth_info),
598
					G_028010_ARRAY_MODE(track->db_depth_info),
599
					track->db_depth_info);
599
					track->db_depth_info);
600
			return -EINVAL;
600
			return -EINVAL;
601
		}
601
		}
602
 
602
 
603
		if (!IS_ALIGNED(pitch, pitch_align)) {
603
		if (!IS_ALIGNED(pitch, pitch_align)) {
604
			dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n",
604
			dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n",
605
					__func__, __LINE__, pitch, pitch_align, array_mode);
605
					__func__, __LINE__, pitch, pitch_align, array_mode);
606
			return -EINVAL;
606
			return -EINVAL;
607
		}
607
		}
608
		if (!IS_ALIGNED(height, height_align)) {
608
		if (!IS_ALIGNED(height, height_align)) {
609
			dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n",
609
			dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n",
610
					__func__, __LINE__, height, height_align, array_mode);
610
					__func__, __LINE__, height, height_align, array_mode);
611
			return -EINVAL;
611
			return -EINVAL;
612
		}
612
		}
613
		if (!IS_ALIGNED(base_offset, base_align)) {
613
		if (!IS_ALIGNED(base_offset, base_align)) {
614
			dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__,
614
			dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__,
615
					base_offset, base_align, array_mode);
615
					base_offset, base_align, array_mode);
616
			return -EINVAL;
616
			return -EINVAL;
617
		}
617
		}
618
 
618
 
619
		ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
619
		ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
620
		nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
620
		nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
621
		tmp = ntiles * bpe * 64 * nviews * track->nsamples;
621
		tmp = ntiles * bpe * 64 * nviews * track->nsamples;
622
		if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
622
		if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
623
			dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n",
623
			dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n",
624
					array_mode,
624
					array_mode,
625
					track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
625
					track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
626
					radeon_bo_size(track->db_bo));
626
					radeon_bo_size(track->db_bo));
627
			return -EINVAL;
627
			return -EINVAL;
628
		}
628
		}
629
	}
629
	}
630
 
630
 
631
	/* hyperz */
631
	/* hyperz */
632
	if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
632
	if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
633
		unsigned long size;
633
		unsigned long size;
634
		unsigned nbx, nby;
634
		unsigned nbx, nby;
635
 
635
 
636
		if (track->htile_bo == NULL) {
636
		if (track->htile_bo == NULL) {
637
			dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
637
			dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
638
				 __func__, __LINE__, track->db_depth_info);
638
				 __func__, __LINE__, track->db_depth_info);
639
			return -EINVAL;
639
			return -EINVAL;
640
		}
640
		}
641
		if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
641
		if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
642
			dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n",
642
			dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n",
643
				 __func__, __LINE__, track->db_depth_size);
643
				 __func__, __LINE__, track->db_depth_size);
644
			return -EINVAL;
644
			return -EINVAL;
645
		}
645
		}
646
 
646
 
647
		nbx = pitch;
647
		nbx = pitch;
648
		nby = height;
648
		nby = height;
649
		if (G_028D24_LINEAR(track->htile_surface)) {
649
		if (G_028D24_LINEAR(track->htile_surface)) {
650
			/* nbx must be 16 htiles aligned == 16 * 8 pixel aligned */
650
			/* nbx must be 16 htiles aligned == 16 * 8 pixel aligned */
651
			nbx = round_up(nbx, 16 * 8);
651
			nbx = round_up(nbx, 16 * 8);
652
			/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
652
			/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
653
			nby = round_up(nby, track->npipes * 8);
653
			nby = round_up(nby, track->npipes * 8);
654
		} else {
654
		} else {
655
			/* always assume 8x8 htile */
655
			/* always assume 8x8 htile */
656
			/* align is htile align * 8, htile align vary according to
656
			/* align is htile align * 8, htile align vary according to
657
			 * number of pipe and tile width and nby
657
			 * number of pipe and tile width and nby
658
			 */
658
			 */
659
			switch (track->npipes) {
659
			switch (track->npipes) {
660
			case 8:
660
			case 8:
661
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
661
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
662
				nbx = round_up(nbx, 64 * 8);
662
				nbx = round_up(nbx, 64 * 8);
663
				nby = round_up(nby, 64 * 8);
663
				nby = round_up(nby, 64 * 8);
664
				break;
664
				break;
665
			case 4:
665
			case 4:
666
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
666
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
667
				nbx = round_up(nbx, 64 * 8);
667
				nbx = round_up(nbx, 64 * 8);
668
				nby = round_up(nby, 32 * 8);
668
				nby = round_up(nby, 32 * 8);
669
				break;
669
				break;
670
			case 2:
670
			case 2:
671
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
671
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
672
				nbx = round_up(nbx, 32 * 8);
672
				nbx = round_up(nbx, 32 * 8);
673
				nby = round_up(nby, 32 * 8);
673
				nby = round_up(nby, 32 * 8);
674
				break;
674
				break;
675
			case 1:
675
			case 1:
676
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
676
				/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
677
				nbx = round_up(nbx, 32 * 8);
677
				nbx = round_up(nbx, 32 * 8);
678
				nby = round_up(nby, 16 * 8);
678
				nby = round_up(nby, 16 * 8);
679
				break;
679
				break;
680
			default:
680
			default:
681
				dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
681
				dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
682
					 __func__, __LINE__, track->npipes);
682
					 __func__, __LINE__, track->npipes);
683
				return -EINVAL;
683
				return -EINVAL;
684
			}
684
			}
685
		}
685
		}
686
		/* compute number of htile */
686
		/* compute number of htile */
687
		nbx = nbx >> 3;
687
		nbx = nbx >> 3;
688
		nby = nby >> 3;
688
		nby = nby >> 3;
689
		/* size must be aligned on npipes * 2K boundary */
689
		/* size must be aligned on npipes * 2K boundary */
690
		size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
690
		size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
691
		size += track->htile_offset;
691
		size += track->htile_offset;
692
 
692
 
693
		if (size > radeon_bo_size(track->htile_bo)) {
693
		if (size > radeon_bo_size(track->htile_bo)) {
694
			dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
694
			dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
695
				 __func__, __LINE__, radeon_bo_size(track->htile_bo),
695
				 __func__, __LINE__, radeon_bo_size(track->htile_bo),
696
				 size, nbx, nby);
696
				 size, nbx, nby);
697
			return -EINVAL;
697
			return -EINVAL;
698
		}
698
		}
699
	}
699
	}
700
 
700
 
701
	track->db_dirty = false;
701
	track->db_dirty = false;
702
	return 0;
702
	return 0;
703
}
703
}
704
 
704
 
705
static int r600_cs_track_check(struct radeon_cs_parser *p)
705
static int r600_cs_track_check(struct radeon_cs_parser *p)
706
{
706
{
707
	struct r600_cs_track *track = p->track;
707
	struct r600_cs_track *track = p->track;
708
	u32 tmp;
708
	u32 tmp;
709
	int r, i;
709
	int r, i;
710
 
710
 
711
	/* on legacy kernel we don't perform advanced check */
711
	/* on legacy kernel we don't perform advanced check */
712
	if (p->rdev == NULL)
712
	if (p->rdev == NULL)
713
		return 0;
713
		return 0;
714
 
714
 
715
	/* check streamout */
715
	/* check streamout */
716
	if (track->streamout_dirty && track->vgt_strmout_en) {
716
	if (track->streamout_dirty && track->vgt_strmout_en) {
717
		for (i = 0; i < 4; i++) {
717
		for (i = 0; i < 4; i++) {
718
			if (track->vgt_strmout_buffer_en & (1 << i)) {
718
			if (track->vgt_strmout_buffer_en & (1 << i)) {
719
				if (track->vgt_strmout_bo[i]) {
719
				if (track->vgt_strmout_bo[i]) {
720
					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
720
					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
721
						(u64)track->vgt_strmout_size[i];
721
						(u64)track->vgt_strmout_size[i];
722
					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
722
					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
723
						DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
723
						DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
724
							  i, offset,
724
							  i, offset,
725
							  radeon_bo_size(track->vgt_strmout_bo[i]));
725
							  radeon_bo_size(track->vgt_strmout_bo[i]));
726
						return -EINVAL;
726
						return -EINVAL;
727
					}
727
					}
728
				} else {
728
				} else {
729
					dev_warn(p->dev, "No buffer for streamout %d\n", i);
729
					dev_warn(p->dev, "No buffer for streamout %d\n", i);
730
					return -EINVAL;
730
					return -EINVAL;
731
				}
731
				}
732
			}
732
			}
733
		}
733
		}
734
		track->streamout_dirty = false;
734
		track->streamout_dirty = false;
735
	}
735
	}
736
 
736
 
737
	if (track->sx_misc_kill_all_prims)
737
	if (track->sx_misc_kill_all_prims)
738
		return 0;
738
		return 0;
739
 
739
 
740
	/* check that we have a cb for each enabled target, we don't check
740
	/* check that we have a cb for each enabled target, we don't check
741
	 * shader_mask because it seems mesa isn't always setting it :(
741
	 * shader_mask because it seems mesa isn't always setting it :(
742
	 */
742
	 */
743
	if (track->cb_dirty) {
743
	if (track->cb_dirty) {
744
		tmp = track->cb_target_mask;
744
		tmp = track->cb_target_mask;
745
 
745
 
746
		/* We must check both colorbuffers for RESOLVE. */
746
		/* We must check both colorbuffers for RESOLVE. */
747
		if (track->is_resolve) {
747
		if (track->is_resolve) {
748
			tmp |= 0xff;
748
			tmp |= 0xff;
749
		}
749
		}
750
 
750
 
751
		for (i = 0; i < 8; i++) {
751
		for (i = 0; i < 8; i++) {
752
			u32 format = G_0280A0_FORMAT(track->cb_color_info[i]);
752
			u32 format = G_0280A0_FORMAT(track->cb_color_info[i]);
753
 
753
 
754
			if (format != V_0280A0_COLOR_INVALID &&
754
			if (format != V_0280A0_COLOR_INVALID &&
755
			    (tmp >> (i * 4)) & 0xF) {
755
			    (tmp >> (i * 4)) & 0xF) {
756
				/* at least one component is enabled */
756
				/* at least one component is enabled */
757
				if (track->cb_color_bo[i] == NULL) {
757
				if (track->cb_color_bo[i] == NULL) {
758
					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
758
					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
759
						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
759
						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
760
					return -EINVAL;
760
					return -EINVAL;
761
				}
761
				}
762
				/* perform rewrite of CB_COLOR[0-7]_SIZE */
762
				/* perform rewrite of CB_COLOR[0-7]_SIZE */
763
				r = r600_cs_track_validate_cb(p, i);
763
				r = r600_cs_track_validate_cb(p, i);
764
				if (r)
764
				if (r)
765
					return r;
765
					return r;
766
			}
766
			}
767
		}
767
		}
768
		track->cb_dirty = false;
768
		track->cb_dirty = false;
769
	}
769
	}
770
 
770
 
771
	/* Check depth buffer */
771
	/* Check depth buffer */
772
	if (track->db_dirty &&
772
	if (track->db_dirty &&
773
	    G_028010_FORMAT(track->db_depth_info) != V_028010_DEPTH_INVALID &&
773
	    G_028010_FORMAT(track->db_depth_info) != V_028010_DEPTH_INVALID &&
774
	    (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
774
	    (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
775
	     G_028800_Z_ENABLE(track->db_depth_control))) {
775
	     G_028800_Z_ENABLE(track->db_depth_control))) {
776
		r = r600_cs_track_validate_db(p);
776
		r = r600_cs_track_validate_db(p);
777
		if (r)
777
		if (r)
778
			return r;
778
			return r;
779
	}
779
	}
780
 
780
 
781
	return 0;
781
	return 0;
782
}
782
}
783
 
783
 
784
/**
784
/**
785
 * r600_cs_packet_parse_vline() - parse userspace VLINE packet
785
 * r600_cs_packet_parse_vline() - parse userspace VLINE packet
786
 * @parser:		parser structure holding parsing context.
786
 * @parser:		parser structure holding parsing context.
787
 *
787
 *
788
 * This is an R600-specific function for parsing VLINE packets.
788
 * This is an R600-specific function for parsing VLINE packets.
789
 * Real work is done by r600_cs_common_vline_parse function.
789
 * Real work is done by r600_cs_common_vline_parse function.
790
 * Here we just set up ASIC-specific register table and call
790
 * Here we just set up ASIC-specific register table and call
791
 * the common implementation function.
791
 * the common implementation function.
792
 */
792
 */
793
static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
793
static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
794
{
794
{
795
	static uint32_t vline_start_end[2] = {AVIVO_D1MODE_VLINE_START_END,
795
	static uint32_t vline_start_end[2] = {AVIVO_D1MODE_VLINE_START_END,
796
					      AVIVO_D2MODE_VLINE_START_END};
796
					      AVIVO_D2MODE_VLINE_START_END};
797
	static uint32_t vline_status[2] = {AVIVO_D1MODE_VLINE_STATUS,
797
	static uint32_t vline_status[2] = {AVIVO_D1MODE_VLINE_STATUS,
798
					   AVIVO_D2MODE_VLINE_STATUS};
798
					   AVIVO_D2MODE_VLINE_STATUS};
799
 
799
 
800
	return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
800
	return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
801
}
801
}
802
 
802
 
803
/**
803
/**
804
 * r600_cs_common_vline_parse() - common vline parser
804
 * r600_cs_common_vline_parse() - common vline parser
805
 * @parser:		parser structure holding parsing context.
805
 * @parser:		parser structure holding parsing context.
806
 * @vline_start_end:    table of vline_start_end registers
806
 * @vline_start_end:    table of vline_start_end registers
807
 * @vline_status:       table of vline_status registers
807
 * @vline_status:       table of vline_status registers
808
 *
808
 *
809
 * Userspace sends a special sequence for VLINE waits.
809
 * Userspace sends a special sequence for VLINE waits.
810
 * PACKET0 - VLINE_START_END + value
810
 * PACKET0 - VLINE_START_END + value
811
 * PACKET3 - WAIT_REG_MEM poll vline status reg
811
 * PACKET3 - WAIT_REG_MEM poll vline status reg
812
 * RELOC (P3) - crtc_id in reloc.
812
 * RELOC (P3) - crtc_id in reloc.
813
 *
813
 *
814
 * This function parses this and relocates the VLINE START END
814
 * This function parses this and relocates the VLINE START END
815
 * and WAIT_REG_MEM packets to the correct crtc.
815
 * and WAIT_REG_MEM packets to the correct crtc.
816
 * It also detects a switched off crtc and nulls out the
816
 * It also detects a switched off crtc and nulls out the
817
 * wait in that case. This function is common for all ASICs that
817
 * wait in that case. This function is common for all ASICs that
818
 * are R600 and newer. The parsing algorithm is the same, and only
818
 * are R600 and newer. The parsing algorithm is the same, and only
819
 * differs in which registers are used.
819
 * differs in which registers are used.
820
 *
820
 *
821
 * Caller is the ASIC-specific function which passes the parser
821
 * Caller is the ASIC-specific function which passes the parser
822
 * context and ASIC-specific register table
822
 * context and ASIC-specific register table
823
 */
823
 */
824
int r600_cs_common_vline_parse(struct radeon_cs_parser *p,
824
int r600_cs_common_vline_parse(struct radeon_cs_parser *p,
825
			       uint32_t *vline_start_end,
825
			       uint32_t *vline_start_end,
826
			       uint32_t *vline_status)
826
			       uint32_t *vline_status)
827
{
827
{
828
	struct drm_crtc *crtc;
828
	struct drm_crtc *crtc;
829
	struct radeon_crtc *radeon_crtc;
829
	struct radeon_crtc *radeon_crtc;
830
	struct radeon_cs_packet p3reloc, wait_reg_mem;
830
	struct radeon_cs_packet p3reloc, wait_reg_mem;
831
	int crtc_id;
831
	int crtc_id;
832
	int r;
832
	int r;
833
	uint32_t header, h_idx, reg, wait_reg_mem_info;
833
	uint32_t header, h_idx, reg, wait_reg_mem_info;
834
	volatile uint32_t *ib;
834
	volatile uint32_t *ib;
835
 
835
 
836
	ib = p->ib.ptr;
836
	ib = p->ib.ptr;
837
 
837
 
838
	/* parse the WAIT_REG_MEM */
838
	/* parse the WAIT_REG_MEM */
839
	r = radeon_cs_packet_parse(p, &wait_reg_mem, p->idx);
839
	r = radeon_cs_packet_parse(p, &wait_reg_mem, p->idx);
840
	if (r)
840
	if (r)
841
		return r;
841
		return r;
842
 
842
 
843
	/* check its a WAIT_REG_MEM */
843
	/* check its a WAIT_REG_MEM */
844
	if (wait_reg_mem.type != RADEON_PACKET_TYPE3 ||
844
	if (wait_reg_mem.type != RADEON_PACKET_TYPE3 ||
845
	    wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
845
	    wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
846
		DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
846
		DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
847
		return -EINVAL;
847
		return -EINVAL;
848
	}
848
	}
849
 
849
 
850
	wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
850
	wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
851
	/* bit 4 is reg (0) or mem (1) */
851
	/* bit 4 is reg (0) or mem (1) */
852
	if (wait_reg_mem_info & 0x10) {
852
	if (wait_reg_mem_info & 0x10) {
853
		DRM_ERROR("vline WAIT_REG_MEM waiting on MEM instead of REG\n");
853
		DRM_ERROR("vline WAIT_REG_MEM waiting on MEM instead of REG\n");
854
		return -EINVAL;
854
		return -EINVAL;
855
	}
855
	}
856
	/* bit 8 is me (0) or pfp (1) */
856
	/* bit 8 is me (0) or pfp (1) */
857
	if (wait_reg_mem_info & 0x100) {
857
	if (wait_reg_mem_info & 0x100) {
858
		DRM_ERROR("vline WAIT_REG_MEM waiting on PFP instead of ME\n");
858
		DRM_ERROR("vline WAIT_REG_MEM waiting on PFP instead of ME\n");
859
		return -EINVAL;
859
		return -EINVAL;
860
	}
860
	}
861
	/* waiting for value to be equal */
861
	/* waiting for value to be equal */
862
	if ((wait_reg_mem_info & 0x7) != 0x3) {
862
	if ((wait_reg_mem_info & 0x7) != 0x3) {
863
		DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
863
		DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
864
		return -EINVAL;
864
		return -EINVAL;
865
	}
865
	}
866
	if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != vline_status[0]) {
866
	if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != vline_status[0]) {
867
		DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
867
		DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
868
		return -EINVAL;
868
		return -EINVAL;
869
	}
869
	}
870
 
870
 
871
	if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != RADEON_VLINE_STAT) {
871
	if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != RADEON_VLINE_STAT) {
872
		DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
872
		DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
873
		return -EINVAL;
873
		return -EINVAL;
874
	}
874
	}
875
 
875
 
876
	/* jump over the NOP */
876
	/* jump over the NOP */
877
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
877
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
878
	if (r)
878
	if (r)
879
		return r;
879
		return r;
880
 
880
 
881
	h_idx = p->idx - 2;
881
	h_idx = p->idx - 2;
882
	p->idx += wait_reg_mem.count + 2;
882
	p->idx += wait_reg_mem.count + 2;
883
	p->idx += p3reloc.count + 2;
883
	p->idx += p3reloc.count + 2;
884
 
884
 
885
	header = radeon_get_ib_value(p, h_idx);
885
	header = radeon_get_ib_value(p, h_idx);
886
	crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
886
	crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
887
	reg = R600_CP_PACKET0_GET_REG(header);
887
	reg = R600_CP_PACKET0_GET_REG(header);
888
 
888
 
889
	crtc = drm_crtc_find(p->rdev->ddev, crtc_id);
889
	crtc = drm_crtc_find(p->rdev->ddev, crtc_id);
890
	if (!crtc) {
890
	if (!crtc) {
891
		DRM_ERROR("cannot find crtc %d\n", crtc_id);
891
		DRM_ERROR("cannot find crtc %d\n", crtc_id);
892
		return -ENOENT;
892
		return -ENOENT;
893
	}
893
	}
894
	radeon_crtc = to_radeon_crtc(crtc);
894
	radeon_crtc = to_radeon_crtc(crtc);
895
	crtc_id = radeon_crtc->crtc_id;
895
	crtc_id = radeon_crtc->crtc_id;
896
 
896
 
897
	if (!crtc->enabled) {
897
	if (!crtc->enabled) {
898
		/* CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
898
		/* CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
899
		ib[h_idx + 2] = PACKET2(0);
899
		ib[h_idx + 2] = PACKET2(0);
900
		ib[h_idx + 3] = PACKET2(0);
900
		ib[h_idx + 3] = PACKET2(0);
901
		ib[h_idx + 4] = PACKET2(0);
901
		ib[h_idx + 4] = PACKET2(0);
902
		ib[h_idx + 5] = PACKET2(0);
902
		ib[h_idx + 5] = PACKET2(0);
903
		ib[h_idx + 6] = PACKET2(0);
903
		ib[h_idx + 6] = PACKET2(0);
904
		ib[h_idx + 7] = PACKET2(0);
904
		ib[h_idx + 7] = PACKET2(0);
905
		ib[h_idx + 8] = PACKET2(0);
905
		ib[h_idx + 8] = PACKET2(0);
906
	} else if (reg == vline_start_end[0]) {
906
	} else if (reg == vline_start_end[0]) {
907
		header &= ~R600_CP_PACKET0_REG_MASK;
907
		header &= ~R600_CP_PACKET0_REG_MASK;
908
		header |= vline_start_end[crtc_id] >> 2;
908
		header |= vline_start_end[crtc_id] >> 2;
909
		ib[h_idx] = header;
909
		ib[h_idx] = header;
910
		ib[h_idx + 4] = vline_status[crtc_id] >> 2;
910
		ib[h_idx + 4] = vline_status[crtc_id] >> 2;
911
	} else {
911
	} else {
912
		DRM_ERROR("unknown crtc reloc\n");
912
		DRM_ERROR("unknown crtc reloc\n");
913
		return -EINVAL;
913
		return -EINVAL;
914
	}
914
	}
915
	return 0;
915
	return 0;
916
}
916
}
917
 
917
 
918
static int r600_packet0_check(struct radeon_cs_parser *p,
918
static int r600_packet0_check(struct radeon_cs_parser *p,
919
				struct radeon_cs_packet *pkt,
919
				struct radeon_cs_packet *pkt,
920
				unsigned idx, unsigned reg)
920
				unsigned idx, unsigned reg)
921
{
921
{
922
	int r;
922
	int r;
923
 
923
 
924
	switch (reg) {
924
	switch (reg) {
925
	case AVIVO_D1MODE_VLINE_START_END:
925
	case AVIVO_D1MODE_VLINE_START_END:
926
		r = r600_cs_packet_parse_vline(p);
926
		r = r600_cs_packet_parse_vline(p);
927
		if (r) {
927
		if (r) {
928
			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
928
			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
929
					idx, reg);
929
					idx, reg);
930
			return r;
930
			return r;
931
		}
931
		}
932
		break;
932
		break;
933
	default:
933
	default:
934
		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
934
		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
935
		       reg, idx);
935
		       reg, idx);
936
		return -EINVAL;
936
		return -EINVAL;
937
	}
937
	}
938
	return 0;
938
	return 0;
939
}
939
}
940
 
940
 
941
static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
941
static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
942
				struct radeon_cs_packet *pkt)
942
				struct radeon_cs_packet *pkt)
943
{
943
{
944
	unsigned reg, i;
944
	unsigned reg, i;
945
	unsigned idx;
945
	unsigned idx;
946
	int r;
946
	int r;
947
 
947
 
948
	idx = pkt->idx + 1;
948
	idx = pkt->idx + 1;
949
	reg = pkt->reg;
949
	reg = pkt->reg;
950
	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
950
	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
951
		r = r600_packet0_check(p, pkt, idx, reg);
951
		r = r600_packet0_check(p, pkt, idx, reg);
952
		if (r) {
952
		if (r) {
953
			return r;
953
			return r;
954
		}
954
		}
955
	}
955
	}
956
	return 0;
956
	return 0;
957
}
957
}
958
 
958
 
959
/**
959
/**
960
 * r600_cs_check_reg() - check if register is authorized or not
960
 * r600_cs_check_reg() - check if register is authorized or not
961
 * @parser: parser structure holding parsing context
961
 * @parser: parser structure holding parsing context
962
 * @reg: register we are testing
962
 * @reg: register we are testing
963
 * @idx: index into the cs buffer
963
 * @idx: index into the cs buffer
964
 *
964
 *
965
 * This function will test against r600_reg_safe_bm and return 0
965
 * This function will test against r600_reg_safe_bm and return 0
966
 * if register is safe. If register is not flag as safe this function
966
 * if register is safe. If register is not flag as safe this function
967
 * will test it against a list of register needind special handling.
967
 * will test it against a list of register needind special handling.
968
 */
968
 */
969
static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
969
static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
970
{
970
{
971
	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
971
	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
972
	struct radeon_bo_list *reloc;
972
	struct radeon_bo_list *reloc;
973
	u32 m, i, tmp, *ib;
973
	u32 m, i, tmp, *ib;
974
	int r;
974
	int r;
975
 
975
 
976
	i = (reg >> 7);
976
	i = (reg >> 7);
977
	if (i >= ARRAY_SIZE(r600_reg_safe_bm)) {
977
	if (i >= ARRAY_SIZE(r600_reg_safe_bm)) {
978
		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
978
		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
979
		return -EINVAL;
979
		return -EINVAL;
980
	}
980
	}
981
	m = 1 << ((reg >> 2) & 31);
981
	m = 1 << ((reg >> 2) & 31);
982
	if (!(r600_reg_safe_bm[i] & m))
982
	if (!(r600_reg_safe_bm[i] & m))
983
		return 0;
983
		return 0;
984
	ib = p->ib.ptr;
984
	ib = p->ib.ptr;
985
	switch (reg) {
985
	switch (reg) {
986
	/* force following reg to 0 in an attempt to disable out buffer
986
	/* force following reg to 0 in an attempt to disable out buffer
987
	 * which will need us to better understand how it works to perform
987
	 * which will need us to better understand how it works to perform
988
	 * security check on it (Jerome)
988
	 * security check on it (Jerome)
989
	 */
989
	 */
990
	case R_0288A8_SQ_ESGS_RING_ITEMSIZE:
990
	case R_0288A8_SQ_ESGS_RING_ITEMSIZE:
991
	case R_008C44_SQ_ESGS_RING_SIZE:
991
	case R_008C44_SQ_ESGS_RING_SIZE:
992
	case R_0288B0_SQ_ESTMP_RING_ITEMSIZE:
992
	case R_0288B0_SQ_ESTMP_RING_ITEMSIZE:
993
	case R_008C54_SQ_ESTMP_RING_SIZE:
993
	case R_008C54_SQ_ESTMP_RING_SIZE:
994
	case R_0288C0_SQ_FBUF_RING_ITEMSIZE:
994
	case R_0288C0_SQ_FBUF_RING_ITEMSIZE:
995
	case R_008C74_SQ_FBUF_RING_SIZE:
995
	case R_008C74_SQ_FBUF_RING_SIZE:
996
	case R_0288B4_SQ_GSTMP_RING_ITEMSIZE:
996
	case R_0288B4_SQ_GSTMP_RING_ITEMSIZE:
997
	case R_008C5C_SQ_GSTMP_RING_SIZE:
997
	case R_008C5C_SQ_GSTMP_RING_SIZE:
998
	case R_0288AC_SQ_GSVS_RING_ITEMSIZE:
998
	case R_0288AC_SQ_GSVS_RING_ITEMSIZE:
999
	case R_008C4C_SQ_GSVS_RING_SIZE:
999
	case R_008C4C_SQ_GSVS_RING_SIZE:
1000
	case R_0288BC_SQ_PSTMP_RING_ITEMSIZE:
1000
	case R_0288BC_SQ_PSTMP_RING_ITEMSIZE:
1001
	case R_008C6C_SQ_PSTMP_RING_SIZE:
1001
	case R_008C6C_SQ_PSTMP_RING_SIZE:
1002
	case R_0288C4_SQ_REDUC_RING_ITEMSIZE:
1002
	case R_0288C4_SQ_REDUC_RING_ITEMSIZE:
1003
	case R_008C7C_SQ_REDUC_RING_SIZE:
1003
	case R_008C7C_SQ_REDUC_RING_SIZE:
1004
	case R_0288B8_SQ_VSTMP_RING_ITEMSIZE:
1004
	case R_0288B8_SQ_VSTMP_RING_ITEMSIZE:
1005
	case R_008C64_SQ_VSTMP_RING_SIZE:
1005
	case R_008C64_SQ_VSTMP_RING_SIZE:
1006
	case R_0288C8_SQ_GS_VERT_ITEMSIZE:
1006
	case R_0288C8_SQ_GS_VERT_ITEMSIZE:
1007
		/* get value to populate the IB don't remove */
1007
		/* get value to populate the IB don't remove */
1008
		/*tmp =radeon_get_ib_value(p, idx);
1008
		/*tmp =radeon_get_ib_value(p, idx);
1009
		  ib[idx] = 0;*/
1009
		  ib[idx] = 0;*/
1010
		break;
1010
		break;
1011
	case SQ_ESGS_RING_BASE:
1011
	case SQ_ESGS_RING_BASE:
1012
	case SQ_GSVS_RING_BASE:
1012
	case SQ_GSVS_RING_BASE:
1013
	case SQ_ESTMP_RING_BASE:
1013
	case SQ_ESTMP_RING_BASE:
1014
	case SQ_GSTMP_RING_BASE:
1014
	case SQ_GSTMP_RING_BASE:
1015
	case SQ_PSTMP_RING_BASE:
1015
	case SQ_PSTMP_RING_BASE:
1016
	case SQ_VSTMP_RING_BASE:
1016
	case SQ_VSTMP_RING_BASE:
1017
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1017
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1018
		if (r) {
1018
		if (r) {
1019
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1019
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1020
					"0x%04X\n", reg);
1020
					"0x%04X\n", reg);
1021
			return -EINVAL;
1021
			return -EINVAL;
1022
		}
1022
		}
1023
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1023
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1024
		break;
1024
		break;
1025
	case SQ_CONFIG:
1025
	case SQ_CONFIG:
1026
		track->sq_config = radeon_get_ib_value(p, idx);
1026
		track->sq_config = radeon_get_ib_value(p, idx);
1027
		break;
1027
		break;
1028
	case R_028800_DB_DEPTH_CONTROL:
1028
	case R_028800_DB_DEPTH_CONTROL:
1029
		track->db_depth_control = radeon_get_ib_value(p, idx);
1029
		track->db_depth_control = radeon_get_ib_value(p, idx);
1030
		track->db_dirty = true;
1030
		track->db_dirty = true;
1031
		break;
1031
		break;
1032
	case R_028010_DB_DEPTH_INFO:
1032
	case R_028010_DB_DEPTH_INFO:
1033
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
1033
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
1034
		    radeon_cs_packet_next_is_pkt3_nop(p)) {
1034
		    radeon_cs_packet_next_is_pkt3_nop(p)) {
1035
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1035
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1036
			if (r) {
1036
			if (r) {
1037
				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1037
				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1038
					 "0x%04X\n", reg);
1038
					 "0x%04X\n", reg);
1039
				return -EINVAL;
1039
				return -EINVAL;
1040
			}
1040
			}
1041
			track->db_depth_info = radeon_get_ib_value(p, idx);
1041
			track->db_depth_info = radeon_get_ib_value(p, idx);
1042
			ib[idx] &= C_028010_ARRAY_MODE;
1042
			ib[idx] &= C_028010_ARRAY_MODE;
1043
			track->db_depth_info &= C_028010_ARRAY_MODE;
1043
			track->db_depth_info &= C_028010_ARRAY_MODE;
1044
			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1044
			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1045
				ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
1045
				ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
1046
				track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
1046
				track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
1047
			} else {
1047
			} else {
1048
				ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
1048
				ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
1049
				track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
1049
				track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
1050
			}
1050
			}
1051
		} else {
1051
		} else {
1052
			track->db_depth_info = radeon_get_ib_value(p, idx);
1052
			track->db_depth_info = radeon_get_ib_value(p, idx);
1053
		}
1053
		}
1054
		track->db_dirty = true;
1054
		track->db_dirty = true;
1055
		break;
1055
		break;
1056
	case R_028004_DB_DEPTH_VIEW:
1056
	case R_028004_DB_DEPTH_VIEW:
1057
		track->db_depth_view = radeon_get_ib_value(p, idx);
1057
		track->db_depth_view = radeon_get_ib_value(p, idx);
1058
		track->db_dirty = true;
1058
		track->db_dirty = true;
1059
		break;
1059
		break;
1060
	case R_028000_DB_DEPTH_SIZE:
1060
	case R_028000_DB_DEPTH_SIZE:
1061
		track->db_depth_size = radeon_get_ib_value(p, idx);
1061
		track->db_depth_size = radeon_get_ib_value(p, idx);
1062
		track->db_depth_size_idx = idx;
1062
		track->db_depth_size_idx = idx;
1063
		track->db_dirty = true;
1063
		track->db_dirty = true;
1064
		break;
1064
		break;
1065
	case R_028AB0_VGT_STRMOUT_EN:
1065
	case R_028AB0_VGT_STRMOUT_EN:
1066
		track->vgt_strmout_en = radeon_get_ib_value(p, idx);
1066
		track->vgt_strmout_en = radeon_get_ib_value(p, idx);
1067
		track->streamout_dirty = true;
1067
		track->streamout_dirty = true;
1068
		break;
1068
		break;
1069
	case R_028B20_VGT_STRMOUT_BUFFER_EN:
1069
	case R_028B20_VGT_STRMOUT_BUFFER_EN:
1070
		track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx);
1070
		track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx);
1071
		track->streamout_dirty = true;
1071
		track->streamout_dirty = true;
1072
		break;
1072
		break;
1073
	case VGT_STRMOUT_BUFFER_BASE_0:
1073
	case VGT_STRMOUT_BUFFER_BASE_0:
1074
	case VGT_STRMOUT_BUFFER_BASE_1:
1074
	case VGT_STRMOUT_BUFFER_BASE_1:
1075
	case VGT_STRMOUT_BUFFER_BASE_2:
1075
	case VGT_STRMOUT_BUFFER_BASE_2:
1076
	case VGT_STRMOUT_BUFFER_BASE_3:
1076
	case VGT_STRMOUT_BUFFER_BASE_3:
1077
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1077
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1078
		if (r) {
1078
		if (r) {
1079
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1079
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1080
					"0x%04X\n", reg);
1080
					"0x%04X\n", reg);
1081
			return -EINVAL;
1081
			return -EINVAL;
1082
		}
1082
		}
1083
		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1083
		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1084
		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1084
		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1085
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1085
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1086
		track->vgt_strmout_bo[tmp] = reloc->robj;
1086
		track->vgt_strmout_bo[tmp] = reloc->robj;
1087
		track->vgt_strmout_bo_mc[tmp] = reloc->gpu_offset;
1087
		track->vgt_strmout_bo_mc[tmp] = reloc->gpu_offset;
1088
		track->streamout_dirty = true;
1088
		track->streamout_dirty = true;
1089
		break;
1089
		break;
1090
	case VGT_STRMOUT_BUFFER_SIZE_0:
1090
	case VGT_STRMOUT_BUFFER_SIZE_0:
1091
	case VGT_STRMOUT_BUFFER_SIZE_1:
1091
	case VGT_STRMOUT_BUFFER_SIZE_1:
1092
	case VGT_STRMOUT_BUFFER_SIZE_2:
1092
	case VGT_STRMOUT_BUFFER_SIZE_2:
1093
	case VGT_STRMOUT_BUFFER_SIZE_3:
1093
	case VGT_STRMOUT_BUFFER_SIZE_3:
1094
		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1094
		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1095
		/* size in register is DWs, convert to bytes */
1095
		/* size in register is DWs, convert to bytes */
1096
		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1096
		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1097
		track->streamout_dirty = true;
1097
		track->streamout_dirty = true;
1098
		break;
1098
		break;
1099
	case CP_COHER_BASE:
1099
	case CP_COHER_BASE:
1100
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1100
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1101
		if (r) {
1101
		if (r) {
1102
			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1102
			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1103
					"0x%04X\n", reg);
1103
					"0x%04X\n", reg);
1104
			return -EINVAL;
1104
			return -EINVAL;
1105
		}
1105
		}
1106
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1106
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1107
		break;
1107
		break;
1108
	case R_028238_CB_TARGET_MASK:
1108
	case R_028238_CB_TARGET_MASK:
1109
		track->cb_target_mask = radeon_get_ib_value(p, idx);
1109
		track->cb_target_mask = radeon_get_ib_value(p, idx);
1110
		track->cb_dirty = true;
1110
		track->cb_dirty = true;
1111
		break;
1111
		break;
1112
	case R_02823C_CB_SHADER_MASK:
1112
	case R_02823C_CB_SHADER_MASK:
1113
		track->cb_shader_mask = radeon_get_ib_value(p, idx);
1113
		track->cb_shader_mask = radeon_get_ib_value(p, idx);
1114
		break;
1114
		break;
1115
	case R_028C04_PA_SC_AA_CONFIG:
1115
	case R_028C04_PA_SC_AA_CONFIG:
1116
		tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx));
1116
		tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx));
1117
		track->log_nsamples = tmp;
1117
		track->log_nsamples = tmp;
1118
		track->nsamples = 1 << tmp;
1118
		track->nsamples = 1 << tmp;
1119
		track->cb_dirty = true;
1119
		track->cb_dirty = true;
1120
		break;
1120
		break;
1121
	case R_028808_CB_COLOR_CONTROL:
1121
	case R_028808_CB_COLOR_CONTROL:
1122
		tmp = G_028808_SPECIAL_OP(radeon_get_ib_value(p, idx));
1122
		tmp = G_028808_SPECIAL_OP(radeon_get_ib_value(p, idx));
1123
		track->is_resolve = tmp == V_028808_SPECIAL_RESOLVE_BOX;
1123
		track->is_resolve = tmp == V_028808_SPECIAL_RESOLVE_BOX;
1124
		track->cb_dirty = true;
1124
		track->cb_dirty = true;
1125
		break;
1125
		break;
1126
	case R_0280A0_CB_COLOR0_INFO:
1126
	case R_0280A0_CB_COLOR0_INFO:
1127
	case R_0280A4_CB_COLOR1_INFO:
1127
	case R_0280A4_CB_COLOR1_INFO:
1128
	case R_0280A8_CB_COLOR2_INFO:
1128
	case R_0280A8_CB_COLOR2_INFO:
1129
	case R_0280AC_CB_COLOR3_INFO:
1129
	case R_0280AC_CB_COLOR3_INFO:
1130
	case R_0280B0_CB_COLOR4_INFO:
1130
	case R_0280B0_CB_COLOR4_INFO:
1131
	case R_0280B4_CB_COLOR5_INFO:
1131
	case R_0280B4_CB_COLOR5_INFO:
1132
	case R_0280B8_CB_COLOR6_INFO:
1132
	case R_0280B8_CB_COLOR6_INFO:
1133
	case R_0280BC_CB_COLOR7_INFO:
1133
	case R_0280BC_CB_COLOR7_INFO:
1134
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
1134
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
1135
		     radeon_cs_packet_next_is_pkt3_nop(p)) {
1135
		     radeon_cs_packet_next_is_pkt3_nop(p)) {
1136
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1136
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1137
			if (r) {
1137
			if (r) {
1138
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1138
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1139
				return -EINVAL;
1139
				return -EINVAL;
1140
			}
1140
			}
1141
			tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
1141
			tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
1142
			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1142
			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1143
			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1143
			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1144
				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
1144
				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
1145
				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
1145
				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
1146
			} else if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1146
			} else if (reloc->tiling_flags & RADEON_TILING_MICRO) {
1147
				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
1147
				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
1148
				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
1148
				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
1149
			}
1149
			}
1150
		} else {
1150
		} else {
1151
			tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
1151
			tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
1152
			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1152
			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1153
		}
1153
		}
1154
		track->cb_dirty = true;
1154
		track->cb_dirty = true;
1155
		break;
1155
		break;
1156
	case R_028080_CB_COLOR0_VIEW:
1156
	case R_028080_CB_COLOR0_VIEW:
1157
	case R_028084_CB_COLOR1_VIEW:
1157
	case R_028084_CB_COLOR1_VIEW:
1158
	case R_028088_CB_COLOR2_VIEW:
1158
	case R_028088_CB_COLOR2_VIEW:
1159
	case R_02808C_CB_COLOR3_VIEW:
1159
	case R_02808C_CB_COLOR3_VIEW:
1160
	case R_028090_CB_COLOR4_VIEW:
1160
	case R_028090_CB_COLOR4_VIEW:
1161
	case R_028094_CB_COLOR5_VIEW:
1161
	case R_028094_CB_COLOR5_VIEW:
1162
	case R_028098_CB_COLOR6_VIEW:
1162
	case R_028098_CB_COLOR6_VIEW:
1163
	case R_02809C_CB_COLOR7_VIEW:
1163
	case R_02809C_CB_COLOR7_VIEW:
1164
		tmp = (reg - R_028080_CB_COLOR0_VIEW) / 4;
1164
		tmp = (reg - R_028080_CB_COLOR0_VIEW) / 4;
1165
		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1165
		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1166
		track->cb_dirty = true;
1166
		track->cb_dirty = true;
1167
		break;
1167
		break;
1168
	case R_028060_CB_COLOR0_SIZE:
1168
	case R_028060_CB_COLOR0_SIZE:
1169
	case R_028064_CB_COLOR1_SIZE:
1169
	case R_028064_CB_COLOR1_SIZE:
1170
	case R_028068_CB_COLOR2_SIZE:
1170
	case R_028068_CB_COLOR2_SIZE:
1171
	case R_02806C_CB_COLOR3_SIZE:
1171
	case R_02806C_CB_COLOR3_SIZE:
1172
	case R_028070_CB_COLOR4_SIZE:
1172
	case R_028070_CB_COLOR4_SIZE:
1173
	case R_028074_CB_COLOR5_SIZE:
1173
	case R_028074_CB_COLOR5_SIZE:
1174
	case R_028078_CB_COLOR6_SIZE:
1174
	case R_028078_CB_COLOR6_SIZE:
1175
	case R_02807C_CB_COLOR7_SIZE:
1175
	case R_02807C_CB_COLOR7_SIZE:
1176
		tmp = (reg - R_028060_CB_COLOR0_SIZE) / 4;
1176
		tmp = (reg - R_028060_CB_COLOR0_SIZE) / 4;
1177
		track->cb_color_size[tmp] = radeon_get_ib_value(p, idx);
1177
		track->cb_color_size[tmp] = radeon_get_ib_value(p, idx);
1178
		track->cb_color_size_idx[tmp] = idx;
1178
		track->cb_color_size_idx[tmp] = idx;
1179
		track->cb_dirty = true;
1179
		track->cb_dirty = true;
1180
		break;
1180
		break;
1181
		/* This register were added late, there is userspace
1181
		/* This register were added late, there is userspace
1182
		 * which does provide relocation for those but set
1182
		 * which does provide relocation for those but set
1183
		 * 0 offset. In order to avoid breaking old userspace
1183
		 * 0 offset. In order to avoid breaking old userspace
1184
		 * we detect this and set address to point to last
1184
		 * we detect this and set address to point to last
1185
		 * CB_COLOR0_BASE, note that if userspace doesn't set
1185
		 * CB_COLOR0_BASE, note that if userspace doesn't set
1186
		 * CB_COLOR0_BASE before this register we will report
1186
		 * CB_COLOR0_BASE before this register we will report
1187
		 * error. Old userspace always set CB_COLOR0_BASE
1187
		 * error. Old userspace always set CB_COLOR0_BASE
1188
		 * before any of this.
1188
		 * before any of this.
1189
		 */
1189
		 */
1190
	case R_0280E0_CB_COLOR0_FRAG:
1190
	case R_0280E0_CB_COLOR0_FRAG:
1191
	case R_0280E4_CB_COLOR1_FRAG:
1191
	case R_0280E4_CB_COLOR1_FRAG:
1192
	case R_0280E8_CB_COLOR2_FRAG:
1192
	case R_0280E8_CB_COLOR2_FRAG:
1193
	case R_0280EC_CB_COLOR3_FRAG:
1193
	case R_0280EC_CB_COLOR3_FRAG:
1194
	case R_0280F0_CB_COLOR4_FRAG:
1194
	case R_0280F0_CB_COLOR4_FRAG:
1195
	case R_0280F4_CB_COLOR5_FRAG:
1195
	case R_0280F4_CB_COLOR5_FRAG:
1196
	case R_0280F8_CB_COLOR6_FRAG:
1196
	case R_0280F8_CB_COLOR6_FRAG:
1197
	case R_0280FC_CB_COLOR7_FRAG:
1197
	case R_0280FC_CB_COLOR7_FRAG:
1198
		tmp = (reg - R_0280E0_CB_COLOR0_FRAG) / 4;
1198
		tmp = (reg - R_0280E0_CB_COLOR0_FRAG) / 4;
1199
		if (!radeon_cs_packet_next_is_pkt3_nop(p)) {
1199
		if (!radeon_cs_packet_next_is_pkt3_nop(p)) {
1200
			if (!track->cb_color_base_last[tmp]) {
1200
			if (!track->cb_color_base_last[tmp]) {
1201
				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
1201
				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
1202
				return -EINVAL;
1202
				return -EINVAL;
1203
			}
1203
			}
1204
			track->cb_color_frag_bo[tmp] = track->cb_color_bo[tmp];
1204
			track->cb_color_frag_bo[tmp] = track->cb_color_bo[tmp];
1205
			track->cb_color_frag_offset[tmp] = track->cb_color_bo_offset[tmp];
1205
			track->cb_color_frag_offset[tmp] = track->cb_color_bo_offset[tmp];
1206
			ib[idx] = track->cb_color_base_last[tmp];
1206
			ib[idx] = track->cb_color_base_last[tmp];
1207
		} else {
1207
		} else {
1208
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1208
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1209
			if (r) {
1209
			if (r) {
1210
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1210
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1211
				return -EINVAL;
1211
				return -EINVAL;
1212
			}
1212
			}
1213
			track->cb_color_frag_bo[tmp] = reloc->robj;
1213
			track->cb_color_frag_bo[tmp] = reloc->robj;
1214
			track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8;
1214
			track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8;
1215
			ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1215
			ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1216
		}
1216
		}
1217
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
1217
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
1218
			track->cb_dirty = true;
1218
			track->cb_dirty = true;
1219
		}
1219
		}
1220
		break;
1220
		break;
1221
	case R_0280C0_CB_COLOR0_TILE:
1221
	case R_0280C0_CB_COLOR0_TILE:
1222
	case R_0280C4_CB_COLOR1_TILE:
1222
	case R_0280C4_CB_COLOR1_TILE:
1223
	case R_0280C8_CB_COLOR2_TILE:
1223
	case R_0280C8_CB_COLOR2_TILE:
1224
	case R_0280CC_CB_COLOR3_TILE:
1224
	case R_0280CC_CB_COLOR3_TILE:
1225
	case R_0280D0_CB_COLOR4_TILE:
1225
	case R_0280D0_CB_COLOR4_TILE:
1226
	case R_0280D4_CB_COLOR5_TILE:
1226
	case R_0280D4_CB_COLOR5_TILE:
1227
	case R_0280D8_CB_COLOR6_TILE:
1227
	case R_0280D8_CB_COLOR6_TILE:
1228
	case R_0280DC_CB_COLOR7_TILE:
1228
	case R_0280DC_CB_COLOR7_TILE:
1229
		tmp = (reg - R_0280C0_CB_COLOR0_TILE) / 4;
1229
		tmp = (reg - R_0280C0_CB_COLOR0_TILE) / 4;
1230
		if (!radeon_cs_packet_next_is_pkt3_nop(p)) {
1230
		if (!radeon_cs_packet_next_is_pkt3_nop(p)) {
1231
			if (!track->cb_color_base_last[tmp]) {
1231
			if (!track->cb_color_base_last[tmp]) {
1232
				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
1232
				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
1233
				return -EINVAL;
1233
				return -EINVAL;
1234
			}
1234
			}
1235
			track->cb_color_tile_bo[tmp] = track->cb_color_bo[tmp];
1235
			track->cb_color_tile_bo[tmp] = track->cb_color_bo[tmp];
1236
			track->cb_color_tile_offset[tmp] = track->cb_color_bo_offset[tmp];
1236
			track->cb_color_tile_offset[tmp] = track->cb_color_bo_offset[tmp];
1237
			ib[idx] = track->cb_color_base_last[tmp];
1237
			ib[idx] = track->cb_color_base_last[tmp];
1238
		} else {
1238
		} else {
1239
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1239
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1240
			if (r) {
1240
			if (r) {
1241
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1241
				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1242
				return -EINVAL;
1242
				return -EINVAL;
1243
			}
1243
			}
1244
			track->cb_color_tile_bo[tmp] = reloc->robj;
1244
			track->cb_color_tile_bo[tmp] = reloc->robj;
1245
			track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8;
1245
			track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8;
1246
			ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1246
			ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1247
		}
1247
		}
1248
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
1248
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
1249
			track->cb_dirty = true;
1249
			track->cb_dirty = true;
1250
		}
1250
		}
1251
		break;
1251
		break;
1252
	case R_028100_CB_COLOR0_MASK:
1252
	case R_028100_CB_COLOR0_MASK:
1253
	case R_028104_CB_COLOR1_MASK:
1253
	case R_028104_CB_COLOR1_MASK:
1254
	case R_028108_CB_COLOR2_MASK:
1254
	case R_028108_CB_COLOR2_MASK:
1255
	case R_02810C_CB_COLOR3_MASK:
1255
	case R_02810C_CB_COLOR3_MASK:
1256
	case R_028110_CB_COLOR4_MASK:
1256
	case R_028110_CB_COLOR4_MASK:
1257
	case R_028114_CB_COLOR5_MASK:
1257
	case R_028114_CB_COLOR5_MASK:
1258
	case R_028118_CB_COLOR6_MASK:
1258
	case R_028118_CB_COLOR6_MASK:
1259
	case R_02811C_CB_COLOR7_MASK:
1259
	case R_02811C_CB_COLOR7_MASK:
1260
		tmp = (reg - R_028100_CB_COLOR0_MASK) / 4;
1260
		tmp = (reg - R_028100_CB_COLOR0_MASK) / 4;
1261
		track->cb_color_mask[tmp] = radeon_get_ib_value(p, idx);
1261
		track->cb_color_mask[tmp] = radeon_get_ib_value(p, idx);
1262
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
1262
		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
1263
			track->cb_dirty = true;
1263
			track->cb_dirty = true;
1264
		}
1264
		}
1265
		break;
1265
		break;
1266
	case CB_COLOR0_BASE:
1266
	case CB_COLOR0_BASE:
1267
	case CB_COLOR1_BASE:
1267
	case CB_COLOR1_BASE:
1268
	case CB_COLOR2_BASE:
1268
	case CB_COLOR2_BASE:
1269
	case CB_COLOR3_BASE:
1269
	case CB_COLOR3_BASE:
1270
	case CB_COLOR4_BASE:
1270
	case CB_COLOR4_BASE:
1271
	case CB_COLOR5_BASE:
1271
	case CB_COLOR5_BASE:
1272
	case CB_COLOR6_BASE:
1272
	case CB_COLOR6_BASE:
1273
	case CB_COLOR7_BASE:
1273
	case CB_COLOR7_BASE:
1274
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1274
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1275
		if (r) {
1275
		if (r) {
1276
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1276
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1277
					"0x%04X\n", reg);
1277
					"0x%04X\n", reg);
1278
			return -EINVAL;
1278
			return -EINVAL;
1279
		}
1279
		}
1280
		tmp = (reg - CB_COLOR0_BASE) / 4;
1280
		tmp = (reg - CB_COLOR0_BASE) / 4;
1281
		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1281
		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1282
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1282
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1283
		track->cb_color_base_last[tmp] = ib[idx];
1283
		track->cb_color_base_last[tmp] = ib[idx];
1284
		track->cb_color_bo[tmp] = reloc->robj;
1284
		track->cb_color_bo[tmp] = reloc->robj;
1285
		track->cb_color_bo_mc[tmp] = reloc->gpu_offset;
1285
		track->cb_color_bo_mc[tmp] = reloc->gpu_offset;
1286
		track->cb_dirty = true;
1286
		track->cb_dirty = true;
1287
		break;
1287
		break;
1288
	case DB_DEPTH_BASE:
1288
	case DB_DEPTH_BASE:
1289
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1289
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1290
		if (r) {
1290
		if (r) {
1291
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1291
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1292
					"0x%04X\n", reg);
1292
					"0x%04X\n", reg);
1293
			return -EINVAL;
1293
			return -EINVAL;
1294
		}
1294
		}
1295
		track->db_offset = radeon_get_ib_value(p, idx) << 8;
1295
		track->db_offset = radeon_get_ib_value(p, idx) << 8;
1296
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1296
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1297
		track->db_bo = reloc->robj;
1297
		track->db_bo = reloc->robj;
1298
		track->db_bo_mc = reloc->gpu_offset;
1298
		track->db_bo_mc = reloc->gpu_offset;
1299
		track->db_dirty = true;
1299
		track->db_dirty = true;
1300
		break;
1300
		break;
1301
	case DB_HTILE_DATA_BASE:
1301
	case DB_HTILE_DATA_BASE:
1302
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1302
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1303
		if (r) {
1303
		if (r) {
1304
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1304
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1305
					"0x%04X\n", reg);
1305
					"0x%04X\n", reg);
1306
			return -EINVAL;
1306
			return -EINVAL;
1307
		}
1307
		}
1308
		track->htile_offset = radeon_get_ib_value(p, idx) << 8;
1308
		track->htile_offset = radeon_get_ib_value(p, idx) << 8;
1309
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1309
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1310
		track->htile_bo = reloc->robj;
1310
		track->htile_bo = reloc->robj;
1311
		track->db_dirty = true;
1311
		track->db_dirty = true;
1312
		break;
1312
		break;
1313
	case DB_HTILE_SURFACE:
1313
	case DB_HTILE_SURFACE:
1314
		track->htile_surface = radeon_get_ib_value(p, idx);
1314
		track->htile_surface = radeon_get_ib_value(p, idx);
1315
		/* force 8x8 htile width and height */
1315
		/* force 8x8 htile width and height */
1316
		ib[idx] |= 3;
1316
		ib[idx] |= 3;
1317
		track->db_dirty = true;
1317
		track->db_dirty = true;
1318
		break;
1318
		break;
1319
	case SQ_PGM_START_FS:
1319
	case SQ_PGM_START_FS:
1320
	case SQ_PGM_START_ES:
1320
	case SQ_PGM_START_ES:
1321
	case SQ_PGM_START_VS:
1321
	case SQ_PGM_START_VS:
1322
	case SQ_PGM_START_GS:
1322
	case SQ_PGM_START_GS:
1323
	case SQ_PGM_START_PS:
1323
	case SQ_PGM_START_PS:
1324
	case SQ_ALU_CONST_CACHE_GS_0:
1324
	case SQ_ALU_CONST_CACHE_GS_0:
1325
	case SQ_ALU_CONST_CACHE_GS_1:
1325
	case SQ_ALU_CONST_CACHE_GS_1:
1326
	case SQ_ALU_CONST_CACHE_GS_2:
1326
	case SQ_ALU_CONST_CACHE_GS_2:
1327
	case SQ_ALU_CONST_CACHE_GS_3:
1327
	case SQ_ALU_CONST_CACHE_GS_3:
1328
	case SQ_ALU_CONST_CACHE_GS_4:
1328
	case SQ_ALU_CONST_CACHE_GS_4:
1329
	case SQ_ALU_CONST_CACHE_GS_5:
1329
	case SQ_ALU_CONST_CACHE_GS_5:
1330
	case SQ_ALU_CONST_CACHE_GS_6:
1330
	case SQ_ALU_CONST_CACHE_GS_6:
1331
	case SQ_ALU_CONST_CACHE_GS_7:
1331
	case SQ_ALU_CONST_CACHE_GS_7:
1332
	case SQ_ALU_CONST_CACHE_GS_8:
1332
	case SQ_ALU_CONST_CACHE_GS_8:
1333
	case SQ_ALU_CONST_CACHE_GS_9:
1333
	case SQ_ALU_CONST_CACHE_GS_9:
1334
	case SQ_ALU_CONST_CACHE_GS_10:
1334
	case SQ_ALU_CONST_CACHE_GS_10:
1335
	case SQ_ALU_CONST_CACHE_GS_11:
1335
	case SQ_ALU_CONST_CACHE_GS_11:
1336
	case SQ_ALU_CONST_CACHE_GS_12:
1336
	case SQ_ALU_CONST_CACHE_GS_12:
1337
	case SQ_ALU_CONST_CACHE_GS_13:
1337
	case SQ_ALU_CONST_CACHE_GS_13:
1338
	case SQ_ALU_CONST_CACHE_GS_14:
1338
	case SQ_ALU_CONST_CACHE_GS_14:
1339
	case SQ_ALU_CONST_CACHE_GS_15:
1339
	case SQ_ALU_CONST_CACHE_GS_15:
1340
	case SQ_ALU_CONST_CACHE_PS_0:
1340
	case SQ_ALU_CONST_CACHE_PS_0:
1341
	case SQ_ALU_CONST_CACHE_PS_1:
1341
	case SQ_ALU_CONST_CACHE_PS_1:
1342
	case SQ_ALU_CONST_CACHE_PS_2:
1342
	case SQ_ALU_CONST_CACHE_PS_2:
1343
	case SQ_ALU_CONST_CACHE_PS_3:
1343
	case SQ_ALU_CONST_CACHE_PS_3:
1344
	case SQ_ALU_CONST_CACHE_PS_4:
1344
	case SQ_ALU_CONST_CACHE_PS_4:
1345
	case SQ_ALU_CONST_CACHE_PS_5:
1345
	case SQ_ALU_CONST_CACHE_PS_5:
1346
	case SQ_ALU_CONST_CACHE_PS_6:
1346
	case SQ_ALU_CONST_CACHE_PS_6:
1347
	case SQ_ALU_CONST_CACHE_PS_7:
1347
	case SQ_ALU_CONST_CACHE_PS_7:
1348
	case SQ_ALU_CONST_CACHE_PS_8:
1348
	case SQ_ALU_CONST_CACHE_PS_8:
1349
	case SQ_ALU_CONST_CACHE_PS_9:
1349
	case SQ_ALU_CONST_CACHE_PS_9:
1350
	case SQ_ALU_CONST_CACHE_PS_10:
1350
	case SQ_ALU_CONST_CACHE_PS_10:
1351
	case SQ_ALU_CONST_CACHE_PS_11:
1351
	case SQ_ALU_CONST_CACHE_PS_11:
1352
	case SQ_ALU_CONST_CACHE_PS_12:
1352
	case SQ_ALU_CONST_CACHE_PS_12:
1353
	case SQ_ALU_CONST_CACHE_PS_13:
1353
	case SQ_ALU_CONST_CACHE_PS_13:
1354
	case SQ_ALU_CONST_CACHE_PS_14:
1354
	case SQ_ALU_CONST_CACHE_PS_14:
1355
	case SQ_ALU_CONST_CACHE_PS_15:
1355
	case SQ_ALU_CONST_CACHE_PS_15:
1356
	case SQ_ALU_CONST_CACHE_VS_0:
1356
	case SQ_ALU_CONST_CACHE_VS_0:
1357
	case SQ_ALU_CONST_CACHE_VS_1:
1357
	case SQ_ALU_CONST_CACHE_VS_1:
1358
	case SQ_ALU_CONST_CACHE_VS_2:
1358
	case SQ_ALU_CONST_CACHE_VS_2:
1359
	case SQ_ALU_CONST_CACHE_VS_3:
1359
	case SQ_ALU_CONST_CACHE_VS_3:
1360
	case SQ_ALU_CONST_CACHE_VS_4:
1360
	case SQ_ALU_CONST_CACHE_VS_4:
1361
	case SQ_ALU_CONST_CACHE_VS_5:
1361
	case SQ_ALU_CONST_CACHE_VS_5:
1362
	case SQ_ALU_CONST_CACHE_VS_6:
1362
	case SQ_ALU_CONST_CACHE_VS_6:
1363
	case SQ_ALU_CONST_CACHE_VS_7:
1363
	case SQ_ALU_CONST_CACHE_VS_7:
1364
	case SQ_ALU_CONST_CACHE_VS_8:
1364
	case SQ_ALU_CONST_CACHE_VS_8:
1365
	case SQ_ALU_CONST_CACHE_VS_9:
1365
	case SQ_ALU_CONST_CACHE_VS_9:
1366
	case SQ_ALU_CONST_CACHE_VS_10:
1366
	case SQ_ALU_CONST_CACHE_VS_10:
1367
	case SQ_ALU_CONST_CACHE_VS_11:
1367
	case SQ_ALU_CONST_CACHE_VS_11:
1368
	case SQ_ALU_CONST_CACHE_VS_12:
1368
	case SQ_ALU_CONST_CACHE_VS_12:
1369
	case SQ_ALU_CONST_CACHE_VS_13:
1369
	case SQ_ALU_CONST_CACHE_VS_13:
1370
	case SQ_ALU_CONST_CACHE_VS_14:
1370
	case SQ_ALU_CONST_CACHE_VS_14:
1371
	case SQ_ALU_CONST_CACHE_VS_15:
1371
	case SQ_ALU_CONST_CACHE_VS_15:
1372
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1372
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1373
		if (r) {
1373
		if (r) {
1374
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1374
			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1375
					"0x%04X\n", reg);
1375
					"0x%04X\n", reg);
1376
			return -EINVAL;
1376
			return -EINVAL;
1377
		}
1377
		}
1378
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1378
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1379
		break;
1379
		break;
1380
	case SX_MEMORY_EXPORT_BASE:
1380
	case SX_MEMORY_EXPORT_BASE:
1381
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1381
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1382
		if (r) {
1382
		if (r) {
1383
			dev_warn(p->dev, "bad SET_CONFIG_REG "
1383
			dev_warn(p->dev, "bad SET_CONFIG_REG "
1384
					"0x%04X\n", reg);
1384
					"0x%04X\n", reg);
1385
			return -EINVAL;
1385
			return -EINVAL;
1386
		}
1386
		}
1387
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1387
		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1388
		break;
1388
		break;
1389
	case SX_MISC:
1389
	case SX_MISC:
1390
		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1390
		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1391
		break;
1391
		break;
1392
	default:
1392
	default:
1393
		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1393
		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1394
		return -EINVAL;
1394
		return -EINVAL;
1395
	}
1395
	}
1396
	return 0;
1396
	return 0;
1397
}
1397
}
1398
 
1398
 
1399
unsigned r600_mip_minify(unsigned size, unsigned level)
1399
unsigned r600_mip_minify(unsigned size, unsigned level)
1400
{
1400
{
1401
	unsigned val;
1401
	unsigned val;
1402
 
1402
 
1403
	val = max(1U, size >> level);
1403
	val = max(1U, size >> level);
1404
	if (level > 0)
1404
	if (level > 0)
1405
		val = roundup_pow_of_two(val);
1405
		val = roundup_pow_of_two(val);
1406
	return val;
1406
	return val;
1407
}
1407
}
1408
 
1408
 
1409
static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
1409
static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
1410
			      unsigned w0, unsigned h0, unsigned d0, unsigned nsamples, unsigned format,
1410
			      unsigned w0, unsigned h0, unsigned d0, unsigned nsamples, unsigned format,
1411
			      unsigned block_align, unsigned height_align, unsigned base_align,
1411
			      unsigned block_align, unsigned height_align, unsigned base_align,
1412
			      unsigned *l0_size, unsigned *mipmap_size)
1412
			      unsigned *l0_size, unsigned *mipmap_size)
1413
{
1413
{
1414
	unsigned offset, i, level;
1414
	unsigned offset, i, level;
1415
	unsigned width, height, depth, size;
1415
	unsigned width, height, depth, size;
1416
	unsigned blocksize;
1416
	unsigned blocksize;
1417
	unsigned nbx, nby;
1417
	unsigned nbx, nby;
1418
	unsigned nlevels = llevel - blevel + 1;
1418
	unsigned nlevels = llevel - blevel + 1;
1419
 
1419
 
1420
	*l0_size = -1;
1420
	*l0_size = -1;
1421
	blocksize = r600_fmt_get_blocksize(format);
1421
	blocksize = r600_fmt_get_blocksize(format);
1422
 
1422
 
1423
	w0 = r600_mip_minify(w0, 0);
1423
	w0 = r600_mip_minify(w0, 0);
1424
	h0 = r600_mip_minify(h0, 0);
1424
	h0 = r600_mip_minify(h0, 0);
1425
	d0 = r600_mip_minify(d0, 0);
1425
	d0 = r600_mip_minify(d0, 0);
1426
	for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) {
1426
	for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) {
1427
		width = r600_mip_minify(w0, i);
1427
		width = r600_mip_minify(w0, i);
1428
		nbx = r600_fmt_get_nblocksx(format, width);
1428
		nbx = r600_fmt_get_nblocksx(format, width);
1429
 
1429
 
1430
		nbx = round_up(nbx, block_align);
1430
		nbx = round_up(nbx, block_align);
1431
 
1431
 
1432
		height = r600_mip_minify(h0, i);
1432
		height = r600_mip_minify(h0, i);
1433
		nby = r600_fmt_get_nblocksy(format, height);
1433
		nby = r600_fmt_get_nblocksy(format, height);
1434
		nby = round_up(nby, height_align);
1434
		nby = round_up(nby, height_align);
1435
 
1435
 
1436
		depth = r600_mip_minify(d0, i);
1436
		depth = r600_mip_minify(d0, i);
1437
 
1437
 
1438
		size = nbx * nby * blocksize * nsamples;
1438
		size = nbx * nby * blocksize * nsamples;
1439
		if (nfaces)
1439
		if (nfaces)
1440
			size *= nfaces;
1440
			size *= nfaces;
1441
		else
1441
		else
1442
			size *= depth;
1442
			size *= depth;
1443
 
1443
 
1444
		if (i == 0)
1444
		if (i == 0)
1445
			*l0_size = size;
1445
			*l0_size = size;
1446
 
1446
 
1447
		if (i == 0 || i == 1)
1447
		if (i == 0 || i == 1)
1448
			offset = round_up(offset, base_align);
1448
			offset = round_up(offset, base_align);
1449
 
1449
 
1450
		offset += size;
1450
		offset += size;
1451
	}
1451
	}
1452
	*mipmap_size = offset;
1452
	*mipmap_size = offset;
1453
	if (llevel == 0)
1453
	if (llevel == 0)
1454
		*mipmap_size = *l0_size;
1454
		*mipmap_size = *l0_size;
1455
	if (!blevel)
1455
	if (!blevel)
1456
		*mipmap_size -= *l0_size;
1456
		*mipmap_size -= *l0_size;
1457
}
1457
}
1458
 
1458
 
1459
/**
1459
/**
1460
 * r600_check_texture_resource() - check if register is authorized or not
1460
 * r600_check_texture_resource() - check if register is authorized or not
1461
 * @p: parser structure holding parsing context
1461
 * @p: parser structure holding parsing context
1462
 * @idx: index into the cs buffer
1462
 * @idx: index into the cs buffer
1463
 * @texture: texture's bo structure
1463
 * @texture: texture's bo structure
1464
 * @mipmap: mipmap's bo structure
1464
 * @mipmap: mipmap's bo structure
1465
 *
1465
 *
1466
 * This function will check that the resource has valid field and that
1466
 * This function will check that the resource has valid field and that
1467
 * the texture and mipmap bo object are big enough to cover this resource.
1467
 * the texture and mipmap bo object are big enough to cover this resource.
1468
 */
1468
 */
1469
static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
1469
static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
1470
					      struct radeon_bo *texture,
1470
					      struct radeon_bo *texture,
1471
					      struct radeon_bo *mipmap,
1471
					      struct radeon_bo *mipmap,
1472
					      u64 base_offset,
1472
					      u64 base_offset,
1473
					      u64 mip_offset,
1473
					      u64 mip_offset,
1474
					      u32 tiling_flags)
1474
					      u32 tiling_flags)
1475
{
1475
{
1476
	struct r600_cs_track *track = p->track;
1476
	struct r600_cs_track *track = p->track;
1477
	u32 dim, nfaces, llevel, blevel, w0, h0, d0;
1477
	u32 dim, nfaces, llevel, blevel, w0, h0, d0;
1478
	u32 word0, word1, l0_size, mipmap_size, word2, word3, word4, word5;
1478
	u32 word0, word1, l0_size, mipmap_size, word2, word3, word4, word5;
1479
	u32 height_align, pitch, pitch_align, depth_align;
1479
	u32 height_align, pitch, pitch_align, depth_align;
1480
	u32 barray, larray;
1480
	u32 barray, larray;
1481
	u64 base_align;
1481
	u64 base_align;
1482
	struct array_mode_checker array_check;
1482
	struct array_mode_checker array_check;
1483
	u32 format;
1483
	u32 format;
1484
	bool is_array;
1484
	bool is_array;
1485
 
1485
 
1486
	/* on legacy kernel we don't perform advanced check */
1486
	/* on legacy kernel we don't perform advanced check */
1487
	if (p->rdev == NULL)
1487
	if (p->rdev == NULL)
1488
		return 0;
1488
		return 0;
1489
 
1489
 
1490
	/* convert to bytes */
1490
	/* convert to bytes */
1491
	base_offset <<= 8;
1491
	base_offset <<= 8;
1492
	mip_offset <<= 8;
1492
	mip_offset <<= 8;
1493
 
1493
 
1494
	word0 = radeon_get_ib_value(p, idx + 0);
1494
	word0 = radeon_get_ib_value(p, idx + 0);
1495
	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1495
	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1496
		if (tiling_flags & RADEON_TILING_MACRO)
1496
		if (tiling_flags & RADEON_TILING_MACRO)
1497
			word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1497
			word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1498
		else if (tiling_flags & RADEON_TILING_MICRO)
1498
		else if (tiling_flags & RADEON_TILING_MICRO)
1499
			word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
1499
			word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
1500
	}
1500
	}
1501
	word1 = radeon_get_ib_value(p, idx + 1);
1501
	word1 = radeon_get_ib_value(p, idx + 1);
1502
	word2 = radeon_get_ib_value(p, idx + 2) << 8;
1502
	word2 = radeon_get_ib_value(p, idx + 2) << 8;
1503
	word3 = radeon_get_ib_value(p, idx + 3) << 8;
1503
	word3 = radeon_get_ib_value(p, idx + 3) << 8;
1504
	word4 = radeon_get_ib_value(p, idx + 4);
1504
	word4 = radeon_get_ib_value(p, idx + 4);
1505
	word5 = radeon_get_ib_value(p, idx + 5);
1505
	word5 = radeon_get_ib_value(p, idx + 5);
1506
	dim = G_038000_DIM(word0);
1506
	dim = G_038000_DIM(word0);
1507
	w0 = G_038000_TEX_WIDTH(word0) + 1;
1507
	w0 = G_038000_TEX_WIDTH(word0) + 1;
1508
	pitch = (G_038000_PITCH(word0) + 1) * 8;
1508
	pitch = (G_038000_PITCH(word0) + 1) * 8;
1509
	h0 = G_038004_TEX_HEIGHT(word1) + 1;
1509
	h0 = G_038004_TEX_HEIGHT(word1) + 1;
1510
	d0 = G_038004_TEX_DEPTH(word1);
1510
	d0 = G_038004_TEX_DEPTH(word1);
1511
	format = G_038004_DATA_FORMAT(word1);
1511
	format = G_038004_DATA_FORMAT(word1);
1512
	blevel = G_038010_BASE_LEVEL(word4);
1512
	blevel = G_038010_BASE_LEVEL(word4);
1513
	llevel = G_038014_LAST_LEVEL(word5);
1513
	llevel = G_038014_LAST_LEVEL(word5);
1514
	/* pitch in texels */
1514
	/* pitch in texels */
1515
	array_check.array_mode = G_038000_TILE_MODE(word0);
1515
	array_check.array_mode = G_038000_TILE_MODE(word0);
1516
	array_check.group_size = track->group_size;
1516
	array_check.group_size = track->group_size;
1517
	array_check.nbanks = track->nbanks;
1517
	array_check.nbanks = track->nbanks;
1518
	array_check.npipes = track->npipes;
1518
	array_check.npipes = track->npipes;
1519
	array_check.nsamples = 1;
1519
	array_check.nsamples = 1;
1520
	array_check.blocksize = r600_fmt_get_blocksize(format);
1520
	array_check.blocksize = r600_fmt_get_blocksize(format);
1521
	nfaces = 1;
1521
	nfaces = 1;
1522
	is_array = false;
1522
	is_array = false;
1523
	switch (dim) {
1523
	switch (dim) {
1524
	case V_038000_SQ_TEX_DIM_1D:
1524
	case V_038000_SQ_TEX_DIM_1D:
1525
	case V_038000_SQ_TEX_DIM_2D:
1525
	case V_038000_SQ_TEX_DIM_2D:
1526
	case V_038000_SQ_TEX_DIM_3D:
1526
	case V_038000_SQ_TEX_DIM_3D:
1527
		break;
1527
		break;
1528
	case V_038000_SQ_TEX_DIM_CUBEMAP:
1528
	case V_038000_SQ_TEX_DIM_CUBEMAP:
1529
		if (p->family >= CHIP_RV770)
1529
		if (p->family >= CHIP_RV770)
1530
			nfaces = 8;
1530
			nfaces = 8;
1531
		else
1531
		else
1532
			nfaces = 6;
1532
			nfaces = 6;
1533
		break;
1533
		break;
1534
	case V_038000_SQ_TEX_DIM_1D_ARRAY:
1534
	case V_038000_SQ_TEX_DIM_1D_ARRAY:
1535
	case V_038000_SQ_TEX_DIM_2D_ARRAY:
1535
	case V_038000_SQ_TEX_DIM_2D_ARRAY:
1536
		is_array = true;
1536
		is_array = true;
1537
		break;
1537
		break;
1538
	case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA:
1538
	case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA:
1539
		is_array = true;
1539
		is_array = true;
1540
		/* fall through */
1540
		/* fall through */
1541
	case V_038000_SQ_TEX_DIM_2D_MSAA:
1541
	case V_038000_SQ_TEX_DIM_2D_MSAA:
1542
		array_check.nsamples = 1 << llevel;
1542
		array_check.nsamples = 1 << llevel;
1543
		llevel = 0;
1543
		llevel = 0;
1544
		break;
1544
		break;
1545
	default:
1545
	default:
1546
		dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0));
1546
		dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0));
1547
		return -EINVAL;
1547
		return -EINVAL;
1548
	}
1548
	}
1549
	if (!r600_fmt_is_valid_texture(format, p->family)) {
1549
	if (!r600_fmt_is_valid_texture(format, p->family)) {
1550
		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
1550
		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
1551
			 __func__, __LINE__, format);
1551
			 __func__, __LINE__, format);
1552
		return -EINVAL;
1552
		return -EINVAL;
1553
	}
1553
	}
1554
 
1554
 
1555
	if (r600_get_array_mode_alignment(&array_check,
1555
	if (r600_get_array_mode_alignment(&array_check,
1556
					  &pitch_align, &height_align, &depth_align, &base_align)) {
1556
					  &pitch_align, &height_align, &depth_align, &base_align)) {
1557
		dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n",
1557
		dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n",
1558
			 __func__, __LINE__, G_038000_TILE_MODE(word0));
1558
			 __func__, __LINE__, G_038000_TILE_MODE(word0));
1559
		return -EINVAL;
1559
		return -EINVAL;
1560
	}
1560
	}
1561
 
1561
 
1562
	/* XXX check height as well... */
1562
	/* XXX check height as well... */
1563
 
1563
 
1564
	if (!IS_ALIGNED(pitch, pitch_align)) {
1564
	if (!IS_ALIGNED(pitch, pitch_align)) {
1565
		dev_warn(p->dev, "%s:%d tex pitch (%d, 0x%x, %d) invalid\n",
1565
		dev_warn(p->dev, "%s:%d tex pitch (%d, 0x%x, %d) invalid\n",
1566
			 __func__, __LINE__, pitch, pitch_align, G_038000_TILE_MODE(word0));
1566
			 __func__, __LINE__, pitch, pitch_align, G_038000_TILE_MODE(word0));
1567
		return -EINVAL;
1567
		return -EINVAL;
1568
	}
1568
	}
1569
	if (!IS_ALIGNED(base_offset, base_align)) {
1569
	if (!IS_ALIGNED(base_offset, base_align)) {
1570
		dev_warn(p->dev, "%s:%d tex base offset (0x%llx, 0x%llx, %d) invalid\n",
1570
		dev_warn(p->dev, "%s:%d tex base offset (0x%llx, 0x%llx, %d) invalid\n",
1571
			 __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0));
1571
			 __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0));
1572
		return -EINVAL;
1572
		return -EINVAL;
1573
	}
1573
	}
1574
	if (!IS_ALIGNED(mip_offset, base_align)) {
1574
	if (!IS_ALIGNED(mip_offset, base_align)) {
1575
		dev_warn(p->dev, "%s:%d tex mip offset (0x%llx, 0x%llx, %d) invalid\n",
1575
		dev_warn(p->dev, "%s:%d tex mip offset (0x%llx, 0x%llx, %d) invalid\n",
1576
			 __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0));
1576
			 __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0));
1577
		return -EINVAL;
1577
		return -EINVAL;
1578
	}
1578
	}
1579
 
1579
 
1580
	if (blevel > llevel) {
1580
	if (blevel > llevel) {
1581
		dev_warn(p->dev, "texture blevel %d > llevel %d\n",
1581
		dev_warn(p->dev, "texture blevel %d > llevel %d\n",
1582
			 blevel, llevel);
1582
			 blevel, llevel);
1583
	}
1583
	}
1584
	if (is_array) {
1584
	if (is_array) {
1585
		barray = G_038014_BASE_ARRAY(word5);
1585
		barray = G_038014_BASE_ARRAY(word5);
1586
		larray = G_038014_LAST_ARRAY(word5);
1586
		larray = G_038014_LAST_ARRAY(word5);
1587
 
1587
 
1588
		nfaces = larray - barray + 1;
1588
		nfaces = larray - barray + 1;
1589
	}
1589
	}
1590
	r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, array_check.nsamples, format,
1590
	r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, array_check.nsamples, format,
1591
			  pitch_align, height_align, base_align,
1591
			  pitch_align, height_align, base_align,
1592
			  &l0_size, &mipmap_size);
1592
			  &l0_size, &mipmap_size);
1593
	/* using get ib will give us the offset into the texture bo */
1593
	/* using get ib will give us the offset into the texture bo */
1594
	if ((l0_size + word2) > radeon_bo_size(texture)) {
1594
	if ((l0_size + word2) > radeon_bo_size(texture)) {
1595
		dev_warn(p->dev, "texture bo too small ((%d %d) (%d %d) %d %d %d -> %d have %ld)\n",
1595
		dev_warn(p->dev, "texture bo too small ((%d %d) (%d %d) %d %d %d -> %d have %ld)\n",
1596
			 w0, h0, pitch_align, height_align,
1596
			 w0, h0, pitch_align, height_align,
1597
			 array_check.array_mode, format, word2,
1597
			 array_check.array_mode, format, word2,
1598
			 l0_size, radeon_bo_size(texture));
1598
			 l0_size, radeon_bo_size(texture));
1599
		dev_warn(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align);
1599
		dev_warn(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align);
1600
		return -EINVAL;
1600
		return -EINVAL;
1601
	}
1601
	}
1602
	/* using get ib will give us the offset into the mipmap bo */
1602
	/* using get ib will give us the offset into the mipmap bo */
1603
	if ((mipmap_size + word3) > radeon_bo_size(mipmap)) {
1603
	if ((mipmap_size + word3) > radeon_bo_size(mipmap)) {
1604
		/*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n",
1604
		/*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n",
1605
		  w0, h0, format, blevel, nlevels, word3, mipmap_size, radeon_bo_size(texture));*/
1605
		  w0, h0, format, blevel, nlevels, word3, mipmap_size, radeon_bo_size(texture));*/
1606
	}
1606
	}
1607
	return 0;
1607
	return 0;
1608
}
1608
}
1609
 
1609
 
1610
static bool r600_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1610
static bool r600_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1611
{
1611
{
1612
	u32 m, i;
1612
	u32 m, i;
1613
 
1613
 
1614
	i = (reg >> 7);
1614
	i = (reg >> 7);
1615
	if (i >= ARRAY_SIZE(r600_reg_safe_bm)) {
1615
	if (i >= ARRAY_SIZE(r600_reg_safe_bm)) {
1616
		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1616
		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1617
		return false;
1617
		return false;
1618
	}
1618
	}
1619
	m = 1 << ((reg >> 2) & 31);
1619
	m = 1 << ((reg >> 2) & 31);
1620
	if (!(r600_reg_safe_bm[i] & m))
1620
	if (!(r600_reg_safe_bm[i] & m))
1621
		return true;
1621
		return true;
1622
	dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1622
	dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1623
	return false;
1623
	return false;
1624
}
1624
}
1625
 
1625
 
1626
static int r600_packet3_check(struct radeon_cs_parser *p,
1626
static int r600_packet3_check(struct radeon_cs_parser *p,
1627
				struct radeon_cs_packet *pkt)
1627
				struct radeon_cs_packet *pkt)
1628
{
1628
{
1629
	struct radeon_bo_list *reloc;
1629
	struct radeon_bo_list *reloc;
1630
	struct r600_cs_track *track;
1630
	struct r600_cs_track *track;
1631
	volatile u32 *ib;
1631
	volatile u32 *ib;
1632
	unsigned idx;
1632
	unsigned idx;
1633
	unsigned i;
1633
	unsigned i;
1634
	unsigned start_reg, end_reg, reg;
1634
	unsigned start_reg, end_reg, reg;
1635
	int r;
1635
	int r;
1636
	u32 idx_value;
1636
	u32 idx_value;
1637
 
1637
 
1638
	track = (struct r600_cs_track *)p->track;
1638
	track = (struct r600_cs_track *)p->track;
1639
	ib = p->ib.ptr;
1639
	ib = p->ib.ptr;
1640
	idx = pkt->idx + 1;
1640
	idx = pkt->idx + 1;
1641
	idx_value = radeon_get_ib_value(p, idx);
1641
	idx_value = radeon_get_ib_value(p, idx);
1642
 
1642
 
1643
	switch (pkt->opcode) {
1643
	switch (pkt->opcode) {
1644
	case PACKET3_SET_PREDICATION:
1644
	case PACKET3_SET_PREDICATION:
1645
	{
1645
	{
1646
		int pred_op;
1646
		int pred_op;
1647
		int tmp;
1647
		int tmp;
1648
		uint64_t offset;
1648
		uint64_t offset;
1649
 
1649
 
1650
		if (pkt->count != 1) {
1650
		if (pkt->count != 1) {
1651
			DRM_ERROR("bad SET PREDICATION\n");
1651
			DRM_ERROR("bad SET PREDICATION\n");
1652
			return -EINVAL;
1652
			return -EINVAL;
1653
		}
1653
		}
1654
 
1654
 
1655
		tmp = radeon_get_ib_value(p, idx + 1);
1655
		tmp = radeon_get_ib_value(p, idx + 1);
1656
		pred_op = (tmp >> 16) & 0x7;
1656
		pred_op = (tmp >> 16) & 0x7;
1657
 
1657
 
1658
		/* for the clear predicate operation */
1658
		/* for the clear predicate operation */
1659
		if (pred_op == 0)
1659
		if (pred_op == 0)
1660
			return 0;
1660
			return 0;
1661
 
1661
 
1662
		if (pred_op > 2) {
1662
		if (pred_op > 2) {
1663
			DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1663
			DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1664
			return -EINVAL;
1664
			return -EINVAL;
1665
		}
1665
		}
1666
 
1666
 
1667
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1667
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1668
		if (r) {
1668
		if (r) {
1669
			DRM_ERROR("bad SET PREDICATION\n");
1669
			DRM_ERROR("bad SET PREDICATION\n");
1670
			return -EINVAL;
1670
			return -EINVAL;
1671
		}
1671
		}
1672
 
1672
 
1673
		offset = reloc->gpu_offset +
1673
		offset = reloc->gpu_offset +
1674
		         (idx_value & 0xfffffff0) +
1674
		         (idx_value & 0xfffffff0) +
1675
		         ((u64)(tmp & 0xff) << 32);
1675
		         ((u64)(tmp & 0xff) << 32);
1676
 
1676
 
1677
		ib[idx + 0] = offset;
1677
		ib[idx + 0] = offset;
1678
		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1678
		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1679
	}
1679
	}
1680
	break;
1680
	break;
1681
 
1681
 
1682
	case PACKET3_START_3D_CMDBUF:
1682
	case PACKET3_START_3D_CMDBUF:
1683
		if (p->family >= CHIP_RV770 || pkt->count) {
1683
		if (p->family >= CHIP_RV770 || pkt->count) {
1684
			DRM_ERROR("bad START_3D\n");
1684
			DRM_ERROR("bad START_3D\n");
1685
			return -EINVAL;
1685
			return -EINVAL;
1686
		}
1686
		}
1687
		break;
1687
		break;
1688
	case PACKET3_CONTEXT_CONTROL:
1688
	case PACKET3_CONTEXT_CONTROL:
1689
		if (pkt->count != 1) {
1689
		if (pkt->count != 1) {
1690
			DRM_ERROR("bad CONTEXT_CONTROL\n");
1690
			DRM_ERROR("bad CONTEXT_CONTROL\n");
1691
			return -EINVAL;
1691
			return -EINVAL;
1692
		}
1692
		}
1693
		break;
1693
		break;
1694
	case PACKET3_INDEX_TYPE:
1694
	case PACKET3_INDEX_TYPE:
1695
	case PACKET3_NUM_INSTANCES:
1695
	case PACKET3_NUM_INSTANCES:
1696
		if (pkt->count) {
1696
		if (pkt->count) {
1697
			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n");
1697
			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n");
1698
			return -EINVAL;
1698
			return -EINVAL;
1699
		}
1699
		}
1700
		break;
1700
		break;
1701
	case PACKET3_DRAW_INDEX:
1701
	case PACKET3_DRAW_INDEX:
1702
	{
1702
	{
1703
		uint64_t offset;
1703
		uint64_t offset;
1704
		if (pkt->count != 3) {
1704
		if (pkt->count != 3) {
1705
			DRM_ERROR("bad DRAW_INDEX\n");
1705
			DRM_ERROR("bad DRAW_INDEX\n");
1706
			return -EINVAL;
1706
			return -EINVAL;
1707
		}
1707
		}
1708
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1708
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1709
		if (r) {
1709
		if (r) {
1710
			DRM_ERROR("bad DRAW_INDEX\n");
1710
			DRM_ERROR("bad DRAW_INDEX\n");
1711
			return -EINVAL;
1711
			return -EINVAL;
1712
		}
1712
		}
1713
 
1713
 
1714
		offset = reloc->gpu_offset +
1714
		offset = reloc->gpu_offset +
1715
		         idx_value +
1715
		         idx_value +
1716
		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1716
		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1717
 
1717
 
1718
		ib[idx+0] = offset;
1718
		ib[idx+0] = offset;
1719
		ib[idx+1] = upper_32_bits(offset) & 0xff;
1719
		ib[idx+1] = upper_32_bits(offset) & 0xff;
1720
 
1720
 
1721
		r = r600_cs_track_check(p);
1721
		r = r600_cs_track_check(p);
1722
		if (r) {
1722
		if (r) {
1723
			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1723
			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1724
			return r;
1724
			return r;
1725
		}
1725
		}
1726
		break;
1726
		break;
1727
	}
1727
	}
1728
	case PACKET3_DRAW_INDEX_AUTO:
1728
	case PACKET3_DRAW_INDEX_AUTO:
1729
		if (pkt->count != 1) {
1729
		if (pkt->count != 1) {
1730
			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1730
			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1731
			return -EINVAL;
1731
			return -EINVAL;
1732
		}
1732
		}
1733
		r = r600_cs_track_check(p);
1733
		r = r600_cs_track_check(p);
1734
		if (r) {
1734
		if (r) {
1735
			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1735
			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1736
			return r;
1736
			return r;
1737
		}
1737
		}
1738
		break;
1738
		break;
1739
	case PACKET3_DRAW_INDEX_IMMD_BE:
1739
	case PACKET3_DRAW_INDEX_IMMD_BE:
1740
	case PACKET3_DRAW_INDEX_IMMD:
1740
	case PACKET3_DRAW_INDEX_IMMD:
1741
		if (pkt->count < 2) {
1741
		if (pkt->count < 2) {
1742
			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1742
			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1743
			return -EINVAL;
1743
			return -EINVAL;
1744
		}
1744
		}
1745
		r = r600_cs_track_check(p);
1745
		r = r600_cs_track_check(p);
1746
		if (r) {
1746
		if (r) {
1747
			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1747
			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1748
			return r;
1748
			return r;
1749
		}
1749
		}
1750
		break;
1750
		break;
1751
	case PACKET3_WAIT_REG_MEM:
1751
	case PACKET3_WAIT_REG_MEM:
1752
		if (pkt->count != 5) {
1752
		if (pkt->count != 5) {
1753
			DRM_ERROR("bad WAIT_REG_MEM\n");
1753
			DRM_ERROR("bad WAIT_REG_MEM\n");
1754
			return -EINVAL;
1754
			return -EINVAL;
1755
		}
1755
		}
1756
		/* bit 4 is reg (0) or mem (1) */
1756
		/* bit 4 is reg (0) or mem (1) */
1757
		if (idx_value & 0x10) {
1757
		if (idx_value & 0x10) {
1758
			uint64_t offset;
1758
			uint64_t offset;
1759
 
1759
 
1760
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1760
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1761
			if (r) {
1761
			if (r) {
1762
				DRM_ERROR("bad WAIT_REG_MEM\n");
1762
				DRM_ERROR("bad WAIT_REG_MEM\n");
1763
				return -EINVAL;
1763
				return -EINVAL;
1764
			}
1764
			}
1765
 
1765
 
1766
			offset = reloc->gpu_offset +
1766
			offset = reloc->gpu_offset +
1767
			         (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
1767
			         (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
1768
			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1768
			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1769
 
1769
 
1770
			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0);
1770
			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0);
1771
			ib[idx+2] = upper_32_bits(offset) & 0xff;
1771
			ib[idx+2] = upper_32_bits(offset) & 0xff;
1772
		} else if (idx_value & 0x100) {
1772
		} else if (idx_value & 0x100) {
1773
			DRM_ERROR("cannot use PFP on REG wait\n");
1773
			DRM_ERROR("cannot use PFP on REG wait\n");
1774
			return -EINVAL;
1774
			return -EINVAL;
1775
		}
1775
		}
1776
		break;
1776
		break;
1777
	case PACKET3_CP_DMA:
1777
	case PACKET3_CP_DMA:
1778
	{
1778
	{
1779
		u32 command, size;
1779
		u32 command, size;
1780
		u64 offset, tmp;
1780
		u64 offset, tmp;
1781
		if (pkt->count != 4) {
1781
		if (pkt->count != 4) {
1782
			DRM_ERROR("bad CP DMA\n");
1782
			DRM_ERROR("bad CP DMA\n");
1783
			return -EINVAL;
1783
			return -EINVAL;
1784
		}
1784
		}
1785
		command = radeon_get_ib_value(p, idx+4);
1785
		command = radeon_get_ib_value(p, idx+4);
1786
		size = command & 0x1fffff;
1786
		size = command & 0x1fffff;
1787
		if (command & PACKET3_CP_DMA_CMD_SAS) {
1787
		if (command & PACKET3_CP_DMA_CMD_SAS) {
1788
			/* src address space is register */
1788
			/* src address space is register */
1789
			DRM_ERROR("CP DMA SAS not supported\n");
1789
			DRM_ERROR("CP DMA SAS not supported\n");
1790
			return -EINVAL;
1790
			return -EINVAL;
1791
		} else {
1791
		} else {
1792
			if (command & PACKET3_CP_DMA_CMD_SAIC) {
1792
			if (command & PACKET3_CP_DMA_CMD_SAIC) {
1793
				DRM_ERROR("CP DMA SAIC only supported for registers\n");
1793
				DRM_ERROR("CP DMA SAIC only supported for registers\n");
1794
				return -EINVAL;
1794
				return -EINVAL;
1795
			}
1795
			}
1796
			/* src address space is memory */
1796
			/* src address space is memory */
1797
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1797
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1798
			if (r) {
1798
			if (r) {
1799
				DRM_ERROR("bad CP DMA SRC\n");
1799
				DRM_ERROR("bad CP DMA SRC\n");
1800
				return -EINVAL;
1800
				return -EINVAL;
1801
			}
1801
			}
1802
 
1802
 
1803
			tmp = radeon_get_ib_value(p, idx) +
1803
			tmp = radeon_get_ib_value(p, idx) +
1804
				((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1804
				((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1805
 
1805
 
1806
			offset = reloc->gpu_offset + tmp;
1806
			offset = reloc->gpu_offset + tmp;
1807
 
1807
 
1808
			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
1808
			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
1809
				dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
1809
				dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
1810
					 tmp + size, radeon_bo_size(reloc->robj));
1810
					 tmp + size, radeon_bo_size(reloc->robj));
1811
				return -EINVAL;
1811
				return -EINVAL;
1812
			}
1812
			}
1813
 
1813
 
1814
			ib[idx] = offset;
1814
			ib[idx] = offset;
1815
			ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1815
			ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1816
		}
1816
		}
1817
		if (command & PACKET3_CP_DMA_CMD_DAS) {
1817
		if (command & PACKET3_CP_DMA_CMD_DAS) {
1818
			/* dst address space is register */
1818
			/* dst address space is register */
1819
			DRM_ERROR("CP DMA DAS not supported\n");
1819
			DRM_ERROR("CP DMA DAS not supported\n");
1820
			return -EINVAL;
1820
			return -EINVAL;
1821
		} else {
1821
		} else {
1822
			/* dst address space is memory */
1822
			/* dst address space is memory */
1823
			if (command & PACKET3_CP_DMA_CMD_DAIC) {
1823
			if (command & PACKET3_CP_DMA_CMD_DAIC) {
1824
				DRM_ERROR("CP DMA DAIC only supported for registers\n");
1824
				DRM_ERROR("CP DMA DAIC only supported for registers\n");
1825
				return -EINVAL;
1825
				return -EINVAL;
1826
			}
1826
			}
1827
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1827
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1828
			if (r) {
1828
			if (r) {
1829
				DRM_ERROR("bad CP DMA DST\n");
1829
				DRM_ERROR("bad CP DMA DST\n");
1830
				return -EINVAL;
1830
				return -EINVAL;
1831
			}
1831
			}
1832
 
1832
 
1833
			tmp = radeon_get_ib_value(p, idx+2) +
1833
			tmp = radeon_get_ib_value(p, idx+2) +
1834
				((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
1834
				((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
1835
 
1835
 
1836
			offset = reloc->gpu_offset + tmp;
1836
			offset = reloc->gpu_offset + tmp;
1837
 
1837
 
1838
			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
1838
			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
1839
				dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
1839
				dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
1840
					 tmp + size, radeon_bo_size(reloc->robj));
1840
					 tmp + size, radeon_bo_size(reloc->robj));
1841
				return -EINVAL;
1841
				return -EINVAL;
1842
			}
1842
			}
1843
 
1843
 
1844
			ib[idx+2] = offset;
1844
			ib[idx+2] = offset;
1845
			ib[idx+3] = upper_32_bits(offset) & 0xff;
1845
			ib[idx+3] = upper_32_bits(offset) & 0xff;
1846
		}
1846
		}
1847
		break;
1847
		break;
1848
	}
1848
	}
1849
	case PACKET3_SURFACE_SYNC:
1849
	case PACKET3_SURFACE_SYNC:
1850
		if (pkt->count != 3) {
1850
		if (pkt->count != 3) {
1851
			DRM_ERROR("bad SURFACE_SYNC\n");
1851
			DRM_ERROR("bad SURFACE_SYNC\n");
1852
			return -EINVAL;
1852
			return -EINVAL;
1853
		}
1853
		}
1854
		/* 0xffffffff/0x0 is flush all cache flag */
1854
		/* 0xffffffff/0x0 is flush all cache flag */
1855
		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
1855
		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
1856
		    radeon_get_ib_value(p, idx + 2) != 0) {
1856
		    radeon_get_ib_value(p, idx + 2) != 0) {
1857
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1857
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1858
			if (r) {
1858
			if (r) {
1859
				DRM_ERROR("bad SURFACE_SYNC\n");
1859
				DRM_ERROR("bad SURFACE_SYNC\n");
1860
				return -EINVAL;
1860
				return -EINVAL;
1861
			}
1861
			}
1862
			ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1862
			ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1863
		}
1863
		}
1864
		break;
1864
		break;
1865
	case PACKET3_EVENT_WRITE:
1865
	case PACKET3_EVENT_WRITE:
1866
		if (pkt->count != 2 && pkt->count != 0) {
1866
		if (pkt->count != 2 && pkt->count != 0) {
1867
			DRM_ERROR("bad EVENT_WRITE\n");
1867
			DRM_ERROR("bad EVENT_WRITE\n");
1868
			return -EINVAL;
1868
			return -EINVAL;
1869
		}
1869
		}
1870
		if (pkt->count) {
1870
		if (pkt->count) {
1871
			uint64_t offset;
1871
			uint64_t offset;
1872
 
1872
 
1873
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1873
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1874
			if (r) {
1874
			if (r) {
1875
				DRM_ERROR("bad EVENT_WRITE\n");
1875
				DRM_ERROR("bad EVENT_WRITE\n");
1876
				return -EINVAL;
1876
				return -EINVAL;
1877
			}
1877
			}
1878
			offset = reloc->gpu_offset +
1878
			offset = reloc->gpu_offset +
1879
			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
1879
			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
1880
			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1880
			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1881
 
1881
 
1882
			ib[idx+1] = offset & 0xfffffff8;
1882
			ib[idx+1] = offset & 0xfffffff8;
1883
			ib[idx+2] = upper_32_bits(offset) & 0xff;
1883
			ib[idx+2] = upper_32_bits(offset) & 0xff;
1884
		}
1884
		}
1885
		break;
1885
		break;
1886
	case PACKET3_EVENT_WRITE_EOP:
1886
	case PACKET3_EVENT_WRITE_EOP:
1887
	{
1887
	{
1888
		uint64_t offset;
1888
		uint64_t offset;
1889
 
1889
 
1890
		if (pkt->count != 4) {
1890
		if (pkt->count != 4) {
1891
			DRM_ERROR("bad EVENT_WRITE_EOP\n");
1891
			DRM_ERROR("bad EVENT_WRITE_EOP\n");
1892
			return -EINVAL;
1892
			return -EINVAL;
1893
		}
1893
		}
1894
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1894
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1895
		if (r) {
1895
		if (r) {
1896
			DRM_ERROR("bad EVENT_WRITE\n");
1896
			DRM_ERROR("bad EVENT_WRITE\n");
1897
			return -EINVAL;
1897
			return -EINVAL;
1898
		}
1898
		}
1899
 
1899
 
1900
		offset = reloc->gpu_offset +
1900
		offset = reloc->gpu_offset +
1901
		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
1901
		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
1902
		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1902
		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1903
 
1903
 
1904
		ib[idx+1] = offset & 0xfffffffc;
1904
		ib[idx+1] = offset & 0xfffffffc;
1905
		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1905
		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1906
		break;
1906
		break;
1907
	}
1907
	}
1908
	case PACKET3_SET_CONFIG_REG:
1908
	case PACKET3_SET_CONFIG_REG:
1909
		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_OFFSET;
1909
		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_OFFSET;
1910
		end_reg = 4 * pkt->count + start_reg - 4;
1910
		end_reg = 4 * pkt->count + start_reg - 4;
1911
		if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) ||
1911
		if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) ||
1912
		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
1912
		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
1913
		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
1913
		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
1914
			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
1914
			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
1915
			return -EINVAL;
1915
			return -EINVAL;
1916
		}
1916
		}
1917
		for (i = 0; i < pkt->count; i++) {
1917
		for (i = 0; i < pkt->count; i++) {
1918
			reg = start_reg + (4 * i);
1918
			reg = start_reg + (4 * i);
1919
			r = r600_cs_check_reg(p, reg, idx+1+i);
1919
			r = r600_cs_check_reg(p, reg, idx+1+i);
1920
			if (r)
1920
			if (r)
1921
				return r;
1921
				return r;
1922
		}
1922
		}
1923
		break;
1923
		break;
1924
	case PACKET3_SET_CONTEXT_REG:
1924
	case PACKET3_SET_CONTEXT_REG:
1925
		start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_OFFSET;
1925
		start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_OFFSET;
1926
		end_reg = 4 * pkt->count + start_reg - 4;
1926
		end_reg = 4 * pkt->count + start_reg - 4;
1927
		if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) ||
1927
		if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) ||
1928
		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
1928
		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
1929
		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
1929
		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
1930
			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
1930
			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
1931
			return -EINVAL;
1931
			return -EINVAL;
1932
		}
1932
		}
1933
		for (i = 0; i < pkt->count; i++) {
1933
		for (i = 0; i < pkt->count; i++) {
1934
			reg = start_reg + (4 * i);
1934
			reg = start_reg + (4 * i);
1935
			r = r600_cs_check_reg(p, reg, idx+1+i);
1935
			r = r600_cs_check_reg(p, reg, idx+1+i);
1936
			if (r)
1936
			if (r)
1937
				return r;
1937
				return r;
1938
		}
1938
		}
1939
		break;
1939
		break;
1940
	case PACKET3_SET_RESOURCE:
1940
	case PACKET3_SET_RESOURCE:
1941
		if (pkt->count % 7) {
1941
		if (pkt->count % 7) {
1942
			DRM_ERROR("bad SET_RESOURCE\n");
1942
			DRM_ERROR("bad SET_RESOURCE\n");
1943
			return -EINVAL;
1943
			return -EINVAL;
1944
		}
1944
		}
1945
		start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_OFFSET;
1945
		start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_OFFSET;
1946
		end_reg = 4 * pkt->count + start_reg - 4;
1946
		end_reg = 4 * pkt->count + start_reg - 4;
1947
		if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) ||
1947
		if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) ||
1948
		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
1948
		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
1949
		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
1949
		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
1950
			DRM_ERROR("bad SET_RESOURCE\n");
1950
			DRM_ERROR("bad SET_RESOURCE\n");
1951
			return -EINVAL;
1951
			return -EINVAL;
1952
		}
1952
		}
1953
		for (i = 0; i < (pkt->count / 7); i++) {
1953
		for (i = 0; i < (pkt->count / 7); i++) {
1954
			struct radeon_bo *texture, *mipmap;
1954
			struct radeon_bo *texture, *mipmap;
1955
			u32 size, offset, base_offset, mip_offset;
1955
			u32 size, offset, base_offset, mip_offset;
1956
 
1956
 
1957
			switch (G__SQ_VTX_CONSTANT_TYPE(radeon_get_ib_value(p, idx+(i*7)+6+1))) {
1957
			switch (G__SQ_VTX_CONSTANT_TYPE(radeon_get_ib_value(p, idx+(i*7)+6+1))) {
1958
			case SQ_TEX_VTX_VALID_TEXTURE:
1958
			case SQ_TEX_VTX_VALID_TEXTURE:
1959
				/* tex base */
1959
				/* tex base */
1960
				r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1960
				r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1961
				if (r) {
1961
				if (r) {
1962
					DRM_ERROR("bad SET_RESOURCE\n");
1962
					DRM_ERROR("bad SET_RESOURCE\n");
1963
					return -EINVAL;
1963
					return -EINVAL;
1964
				}
1964
				}
1965
				base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1965
				base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1966
				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1966
				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1967
					if (reloc->tiling_flags & RADEON_TILING_MACRO)
1967
					if (reloc->tiling_flags & RADEON_TILING_MACRO)
1968
						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1968
						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1969
					else if (reloc->tiling_flags & RADEON_TILING_MICRO)
1969
					else if (reloc->tiling_flags & RADEON_TILING_MICRO)
1970
						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
1970
						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
1971
				}
1971
				}
1972
				texture = reloc->robj;
1972
				texture = reloc->robj;
1973
				/* tex mip base */
1973
				/* tex mip base */
1974
				r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1974
				r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1975
				if (r) {
1975
				if (r) {
1976
					DRM_ERROR("bad SET_RESOURCE\n");
1976
					DRM_ERROR("bad SET_RESOURCE\n");
1977
					return -EINVAL;
1977
					return -EINVAL;
1978
				}
1978
				}
1979
				mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1979
				mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1980
				mipmap = reloc->robj;
1980
				mipmap = reloc->robj;
1981
				r = r600_check_texture_resource(p,  idx+(i*7)+1,
1981
				r = r600_check_texture_resource(p,  idx+(i*7)+1,
1982
								texture, mipmap,
1982
								texture, mipmap,
1983
								base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2),
1983
								base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2),
1984
								mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3),
1984
								mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3),
1985
								reloc->tiling_flags);
1985
								reloc->tiling_flags);
1986
				if (r)
1986
				if (r)
1987
					return r;
1987
					return r;
1988
				ib[idx+1+(i*7)+2] += base_offset;
1988
				ib[idx+1+(i*7)+2] += base_offset;
1989
				ib[idx+1+(i*7)+3] += mip_offset;
1989
				ib[idx+1+(i*7)+3] += mip_offset;
1990
				break;
1990
				break;
1991
			case SQ_TEX_VTX_VALID_BUFFER:
1991
			case SQ_TEX_VTX_VALID_BUFFER:
1992
			{
1992
			{
1993
				uint64_t offset64;
1993
				uint64_t offset64;
1994
				/* vtx base */
1994
				/* vtx base */
1995
				r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1995
				r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
1996
				if (r) {
1996
				if (r) {
1997
					DRM_ERROR("bad SET_RESOURCE\n");
1997
					DRM_ERROR("bad SET_RESOURCE\n");
1998
					return -EINVAL;
1998
					return -EINVAL;
1999
				}
1999
				}
2000
				offset = radeon_get_ib_value(p, idx+1+(i*7)+0);
2000
				offset = radeon_get_ib_value(p, idx+1+(i*7)+0);
2001
				size = radeon_get_ib_value(p, idx+1+(i*7)+1) + 1;
2001
				size = radeon_get_ib_value(p, idx+1+(i*7)+1) + 1;
2002
				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2002
				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2003
					/* force size to size of the buffer */
2003
					/* force size to size of the buffer */
2004
					dev_warn(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n",
2004
					dev_warn(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n",
2005
						 size + offset, radeon_bo_size(reloc->robj));
2005
						 size + offset, radeon_bo_size(reloc->robj));
2006
					ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset;
2006
					ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset;
2007
				}
2007
				}
2008
 
2008
 
2009
				offset64 = reloc->gpu_offset + offset;
2009
				offset64 = reloc->gpu_offset + offset;
2010
				ib[idx+1+(i*8)+0] = offset64;
2010
				ib[idx+1+(i*8)+0] = offset64;
2011
				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2011
				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2012
						    (upper_32_bits(offset64) & 0xff);
2012
						    (upper_32_bits(offset64) & 0xff);
2013
				break;
2013
				break;
2014
			}
2014
			}
2015
			case SQ_TEX_VTX_INVALID_TEXTURE:
2015
			case SQ_TEX_VTX_INVALID_TEXTURE:
2016
			case SQ_TEX_VTX_INVALID_BUFFER:
2016
			case SQ_TEX_VTX_INVALID_BUFFER:
2017
			default:
2017
			default:
2018
				DRM_ERROR("bad SET_RESOURCE\n");
2018
				DRM_ERROR("bad SET_RESOURCE\n");
2019
				return -EINVAL;
2019
				return -EINVAL;
2020
			}
2020
			}
2021
		}
2021
		}
2022
		break;
2022
		break;
2023
	case PACKET3_SET_ALU_CONST:
2023
	case PACKET3_SET_ALU_CONST:
2024
		if (track->sq_config & DX9_CONSTS) {
2024
		if (track->sq_config & DX9_CONSTS) {
2025
			start_reg = (idx_value << 2) + PACKET3_SET_ALU_CONST_OFFSET;
2025
			start_reg = (idx_value << 2) + PACKET3_SET_ALU_CONST_OFFSET;
2026
			end_reg = 4 * pkt->count + start_reg - 4;
2026
			end_reg = 4 * pkt->count + start_reg - 4;
2027
			if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
2027
			if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
2028
			    (start_reg >= PACKET3_SET_ALU_CONST_END) ||
2028
			    (start_reg >= PACKET3_SET_ALU_CONST_END) ||
2029
			    (end_reg >= PACKET3_SET_ALU_CONST_END)) {
2029
			    (end_reg >= PACKET3_SET_ALU_CONST_END)) {
2030
				DRM_ERROR("bad SET_ALU_CONST\n");
2030
				DRM_ERROR("bad SET_ALU_CONST\n");
2031
				return -EINVAL;
2031
				return -EINVAL;
2032
			}
2032
			}
2033
		}
2033
		}
2034
		break;
2034
		break;
2035
	case PACKET3_SET_BOOL_CONST:
2035
	case PACKET3_SET_BOOL_CONST:
2036
		start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_OFFSET;
2036
		start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_OFFSET;
2037
		end_reg = 4 * pkt->count + start_reg - 4;
2037
		end_reg = 4 * pkt->count + start_reg - 4;
2038
		if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) ||
2038
		if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) ||
2039
		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2039
		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2040
		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2040
		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2041
			DRM_ERROR("bad SET_BOOL_CONST\n");
2041
			DRM_ERROR("bad SET_BOOL_CONST\n");
2042
			return -EINVAL;
2042
			return -EINVAL;
2043
		}
2043
		}
2044
		break;
2044
		break;
2045
	case PACKET3_SET_LOOP_CONST:
2045
	case PACKET3_SET_LOOP_CONST:
2046
		start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_OFFSET;
2046
		start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_OFFSET;
2047
		end_reg = 4 * pkt->count + start_reg - 4;
2047
		end_reg = 4 * pkt->count + start_reg - 4;
2048
		if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) ||
2048
		if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) ||
2049
		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2049
		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2050
		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2050
		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2051
			DRM_ERROR("bad SET_LOOP_CONST\n");
2051
			DRM_ERROR("bad SET_LOOP_CONST\n");
2052
			return -EINVAL;
2052
			return -EINVAL;
2053
		}
2053
		}
2054
		break;
2054
		break;
2055
	case PACKET3_SET_CTL_CONST:
2055
	case PACKET3_SET_CTL_CONST:
2056
		start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_OFFSET;
2056
		start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_OFFSET;
2057
		end_reg = 4 * pkt->count + start_reg - 4;
2057
		end_reg = 4 * pkt->count + start_reg - 4;
2058
		if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) ||
2058
		if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) ||
2059
		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2059
		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2060
		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2060
		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2061
			DRM_ERROR("bad SET_CTL_CONST\n");
2061
			DRM_ERROR("bad SET_CTL_CONST\n");
2062
			return -EINVAL;
2062
			return -EINVAL;
2063
		}
2063
		}
2064
		break;
2064
		break;
2065
	case PACKET3_SET_SAMPLER:
2065
	case PACKET3_SET_SAMPLER:
2066
		if (pkt->count % 3) {
2066
		if (pkt->count % 3) {
2067
			DRM_ERROR("bad SET_SAMPLER\n");
2067
			DRM_ERROR("bad SET_SAMPLER\n");
2068
			return -EINVAL;
2068
			return -EINVAL;
2069
		}
2069
		}
2070
		start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_OFFSET;
2070
		start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_OFFSET;
2071
		end_reg = 4 * pkt->count + start_reg - 4;
2071
		end_reg = 4 * pkt->count + start_reg - 4;
2072
		if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) ||
2072
		if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) ||
2073
		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2073
		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2074
		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2074
		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2075
			DRM_ERROR("bad SET_SAMPLER\n");
2075
			DRM_ERROR("bad SET_SAMPLER\n");
2076
			return -EINVAL;
2076
			return -EINVAL;
2077
		}
2077
		}
2078
		break;
2078
		break;
2079
	case PACKET3_STRMOUT_BASE_UPDATE:
2079
	case PACKET3_STRMOUT_BASE_UPDATE:
2080
		/* RS780 and RS880 also need this */
2080
		/* RS780 and RS880 also need this */
2081
		if (p->family < CHIP_RS780) {
2081
		if (p->family < CHIP_RS780) {
2082
			DRM_ERROR("STRMOUT_BASE_UPDATE only supported on 7xx\n");
2082
			DRM_ERROR("STRMOUT_BASE_UPDATE only supported on 7xx\n");
2083
			return -EINVAL;
2083
			return -EINVAL;
2084
		}
2084
		}
2085
		if (pkt->count != 1) {
2085
		if (pkt->count != 1) {
2086
			DRM_ERROR("bad STRMOUT_BASE_UPDATE packet count\n");
2086
			DRM_ERROR("bad STRMOUT_BASE_UPDATE packet count\n");
2087
			return -EINVAL;
2087
			return -EINVAL;
2088
		}
2088
		}
2089
		if (idx_value > 3) {
2089
		if (idx_value > 3) {
2090
			DRM_ERROR("bad STRMOUT_BASE_UPDATE index\n");
2090
			DRM_ERROR("bad STRMOUT_BASE_UPDATE index\n");
2091
			return -EINVAL;
2091
			return -EINVAL;
2092
		}
2092
		}
2093
		{
2093
		{
2094
			u64 offset;
2094
			u64 offset;
2095
 
2095
 
2096
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2096
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2097
			if (r) {
2097
			if (r) {
2098
				DRM_ERROR("bad STRMOUT_BASE_UPDATE reloc\n");
2098
				DRM_ERROR("bad STRMOUT_BASE_UPDATE reloc\n");
2099
				return -EINVAL;
2099
				return -EINVAL;
2100
			}
2100
			}
2101
 
2101
 
2102
			if (reloc->robj != track->vgt_strmout_bo[idx_value]) {
2102
			if (reloc->robj != track->vgt_strmout_bo[idx_value]) {
2103
				DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo does not match\n");
2103
				DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo does not match\n");
2104
				return -EINVAL;
2104
				return -EINVAL;
2105
			}
2105
			}
2106
 
2106
 
2107
			offset = radeon_get_ib_value(p, idx+1) << 8;
2107
			offset = radeon_get_ib_value(p, idx+1) << 8;
2108
			if (offset != track->vgt_strmout_bo_offset[idx_value]) {
2108
			if (offset != track->vgt_strmout_bo_offset[idx_value]) {
2109
				DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%llx, 0x%x\n",
2109
				DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%llx, 0x%x\n",
2110
					  offset, track->vgt_strmout_bo_offset[idx_value]);
2110
					  offset, track->vgt_strmout_bo_offset[idx_value]);
2111
				return -EINVAL;
2111
				return -EINVAL;
2112
			}
2112
			}
2113
 
2113
 
2114
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2114
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2115
				DRM_ERROR("bad STRMOUT_BASE_UPDATE bo too small: 0x%llx, 0x%lx\n",
2115
				DRM_ERROR("bad STRMOUT_BASE_UPDATE bo too small: 0x%llx, 0x%lx\n",
2116
					  offset + 4, radeon_bo_size(reloc->robj));
2116
					  offset + 4, radeon_bo_size(reloc->robj));
2117
				return -EINVAL;
2117
				return -EINVAL;
2118
			}
2118
			}
2119
			ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2119
			ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2120
		}
2120
		}
2121
		break;
2121
		break;
2122
	case PACKET3_SURFACE_BASE_UPDATE:
2122
	case PACKET3_SURFACE_BASE_UPDATE:
2123
		if (p->family >= CHIP_RV770 || p->family == CHIP_R600) {
2123
		if (p->family >= CHIP_RV770 || p->family == CHIP_R600) {
2124
			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
2124
			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
2125
			return -EINVAL;
2125
			return -EINVAL;
2126
		}
2126
		}
2127
		if (pkt->count) {
2127
		if (pkt->count) {
2128
			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
2128
			DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
2129
			return -EINVAL;
2129
			return -EINVAL;
2130
		}
2130
		}
2131
		break;
2131
		break;
2132
	case PACKET3_STRMOUT_BUFFER_UPDATE:
2132
	case PACKET3_STRMOUT_BUFFER_UPDATE:
2133
		if (pkt->count != 4) {
2133
		if (pkt->count != 4) {
2134
			DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2134
			DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2135
			return -EINVAL;
2135
			return -EINVAL;
2136
		}
2136
		}
2137
		/* Updating memory at DST_ADDRESS. */
2137
		/* Updating memory at DST_ADDRESS. */
2138
		if (idx_value & 0x1) {
2138
		if (idx_value & 0x1) {
2139
			u64 offset;
2139
			u64 offset;
2140
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2140
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2141
			if (r) {
2141
			if (r) {
2142
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2142
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2143
				return -EINVAL;
2143
				return -EINVAL;
2144
			}
2144
			}
2145
			offset = radeon_get_ib_value(p, idx+1);
2145
			offset = radeon_get_ib_value(p, idx+1);
2146
			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2146
			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2147
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2147
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2148
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2148
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2149
					  offset + 4, radeon_bo_size(reloc->robj));
2149
					  offset + 4, radeon_bo_size(reloc->robj));
2150
				return -EINVAL;
2150
				return -EINVAL;
2151
			}
2151
			}
2152
			offset += reloc->gpu_offset;
2152
			offset += reloc->gpu_offset;
2153
			ib[idx+1] = offset;
2153
			ib[idx+1] = offset;
2154
			ib[idx+2] = upper_32_bits(offset) & 0xff;
2154
			ib[idx+2] = upper_32_bits(offset) & 0xff;
2155
		}
2155
		}
2156
		/* Reading data from SRC_ADDRESS. */
2156
		/* Reading data from SRC_ADDRESS. */
2157
		if (((idx_value >> 1) & 0x3) == 2) {
2157
		if (((idx_value >> 1) & 0x3) == 2) {
2158
			u64 offset;
2158
			u64 offset;
2159
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2159
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2160
			if (r) {
2160
			if (r) {
2161
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2161
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2162
				return -EINVAL;
2162
				return -EINVAL;
2163
			}
2163
			}
2164
			offset = radeon_get_ib_value(p, idx+3);
2164
			offset = radeon_get_ib_value(p, idx+3);
2165
			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2165
			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2166
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2166
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2167
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2167
				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2168
					  offset + 4, radeon_bo_size(reloc->robj));
2168
					  offset + 4, radeon_bo_size(reloc->robj));
2169
				return -EINVAL;
2169
				return -EINVAL;
2170
			}
2170
			}
2171
			offset += reloc->gpu_offset;
2171
			offset += reloc->gpu_offset;
2172
			ib[idx+3] = offset;
2172
			ib[idx+3] = offset;
2173
			ib[idx+4] = upper_32_bits(offset) & 0xff;
2173
			ib[idx+4] = upper_32_bits(offset) & 0xff;
2174
		}
2174
		}
2175
		break;
2175
		break;
2176
	case PACKET3_MEM_WRITE:
2176
	case PACKET3_MEM_WRITE:
2177
	{
2177
	{
2178
		u64 offset;
2178
		u64 offset;
2179
 
2179
 
2180
		if (pkt->count != 3) {
2180
		if (pkt->count != 3) {
2181
			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2181
			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2182
			return -EINVAL;
2182
			return -EINVAL;
2183
		}
2183
		}
2184
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2184
		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2185
		if (r) {
2185
		if (r) {
2186
			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2186
			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2187
			return -EINVAL;
2187
			return -EINVAL;
2188
		}
2188
		}
2189
		offset = radeon_get_ib_value(p, idx+0);
2189
		offset = radeon_get_ib_value(p, idx+0);
2190
		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2190
		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2191
		if (offset & 0x7) {
2191
		if (offset & 0x7) {
2192
			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2192
			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2193
			return -EINVAL;
2193
			return -EINVAL;
2194
		}
2194
		}
2195
		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2195
		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2196
			DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2196
			DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2197
				  offset + 8, radeon_bo_size(reloc->robj));
2197
				  offset + 8, radeon_bo_size(reloc->robj));
2198
			return -EINVAL;
2198
			return -EINVAL;
2199
		}
2199
		}
2200
		offset += reloc->gpu_offset;
2200
		offset += reloc->gpu_offset;
2201
		ib[idx+0] = offset;
2201
		ib[idx+0] = offset;
2202
		ib[idx+1] = upper_32_bits(offset) & 0xff;
2202
		ib[idx+1] = upper_32_bits(offset) & 0xff;
2203
		break;
2203
		break;
2204
	}
2204
	}
2205
	case PACKET3_COPY_DW:
2205
	case PACKET3_COPY_DW:
2206
		if (pkt->count != 4) {
2206
		if (pkt->count != 4) {
2207
			DRM_ERROR("bad COPY_DW (invalid count)\n");
2207
			DRM_ERROR("bad COPY_DW (invalid count)\n");
2208
			return -EINVAL;
2208
			return -EINVAL;
2209
		}
2209
		}
2210
		if (idx_value & 0x1) {
2210
		if (idx_value & 0x1) {
2211
			u64 offset;
2211
			u64 offset;
2212
			/* SRC is memory. */
2212
			/* SRC is memory. */
2213
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2213
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2214
			if (r) {
2214
			if (r) {
2215
				DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2215
				DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2216
				return -EINVAL;
2216
				return -EINVAL;
2217
			}
2217
			}
2218
			offset = radeon_get_ib_value(p, idx+1);
2218
			offset = radeon_get_ib_value(p, idx+1);
2219
			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2219
			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2220
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2220
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2221
				DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2221
				DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2222
					  offset + 4, radeon_bo_size(reloc->robj));
2222
					  offset + 4, radeon_bo_size(reloc->robj));
2223
				return -EINVAL;
2223
				return -EINVAL;
2224
			}
2224
			}
2225
			offset += reloc->gpu_offset;
2225
			offset += reloc->gpu_offset;
2226
			ib[idx+1] = offset;
2226
			ib[idx+1] = offset;
2227
			ib[idx+2] = upper_32_bits(offset) & 0xff;
2227
			ib[idx+2] = upper_32_bits(offset) & 0xff;
2228
		} else {
2228
		} else {
2229
			/* SRC is a reg. */
2229
			/* SRC is a reg. */
2230
			reg = radeon_get_ib_value(p, idx+1) << 2;
2230
			reg = radeon_get_ib_value(p, idx+1) << 2;
2231
			if (!r600_is_safe_reg(p, reg, idx+1))
2231
			if (!r600_is_safe_reg(p, reg, idx+1))
2232
				return -EINVAL;
2232
				return -EINVAL;
2233
		}
2233
		}
2234
		if (idx_value & 0x2) {
2234
		if (idx_value & 0x2) {
2235
			u64 offset;
2235
			u64 offset;
2236
			/* DST is memory. */
2236
			/* DST is memory. */
2237
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2237
			r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
2238
			if (r) {
2238
			if (r) {
2239
				DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2239
				DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2240
				return -EINVAL;
2240
				return -EINVAL;
2241
			}
2241
			}
2242
			offset = radeon_get_ib_value(p, idx+3);
2242
			offset = radeon_get_ib_value(p, idx+3);
2243
			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2243
			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2244
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2244
			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2245
				DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2245
				DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2246
					  offset + 4, radeon_bo_size(reloc->robj));
2246
					  offset + 4, radeon_bo_size(reloc->robj));
2247
				return -EINVAL;
2247
				return -EINVAL;
2248
			}
2248
			}
2249
			offset += reloc->gpu_offset;
2249
			offset += reloc->gpu_offset;
2250
			ib[idx+3] = offset;
2250
			ib[idx+3] = offset;
2251
			ib[idx+4] = upper_32_bits(offset) & 0xff;
2251
			ib[idx+4] = upper_32_bits(offset) & 0xff;
2252
		} else {
2252
		} else {
2253
			/* DST is a reg. */
2253
			/* DST is a reg. */
2254
			reg = radeon_get_ib_value(p, idx+3) << 2;
2254
			reg = radeon_get_ib_value(p, idx+3) << 2;
2255
			if (!r600_is_safe_reg(p, reg, idx+3))
2255
			if (!r600_is_safe_reg(p, reg, idx+3))
2256
				return -EINVAL;
2256
				return -EINVAL;
2257
		}
2257
		}
2258
		break;
2258
		break;
2259
	case PACKET3_NOP:
2259
	case PACKET3_NOP:
2260
		break;
2260
		break;
2261
	default:
2261
	default:
2262
		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2262
		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2263
		return -EINVAL;
2263
		return -EINVAL;
2264
	}
2264
	}
2265
	return 0;
2265
	return 0;
2266
}
2266
}
2267
 
2267
 
2268
int r600_cs_parse(struct radeon_cs_parser *p)
2268
int r600_cs_parse(struct radeon_cs_parser *p)
2269
{
2269
{
2270
	struct radeon_cs_packet pkt;
2270
	struct radeon_cs_packet pkt;
2271
	struct r600_cs_track *track;
2271
	struct r600_cs_track *track;
2272
	int r;
2272
	int r;
2273
 
2273
 
2274
	if (p->track == NULL) {
2274
	if (p->track == NULL) {
2275
		/* initialize tracker, we are in kms */
2275
		/* initialize tracker, we are in kms */
2276
		track = kzalloc(sizeof(*track), GFP_KERNEL);
2276
		track = kzalloc(sizeof(*track), GFP_KERNEL);
2277
		if (track == NULL)
2277
		if (track == NULL)
2278
			return -ENOMEM;
2278
			return -ENOMEM;
2279
		r600_cs_track_init(track);
2279
		r600_cs_track_init(track);
2280
		if (p->rdev->family < CHIP_RV770) {
2280
		if (p->rdev->family < CHIP_RV770) {
2281
			track->npipes = p->rdev->config.r600.tiling_npipes;
2281
			track->npipes = p->rdev->config.r600.tiling_npipes;
2282
			track->nbanks = p->rdev->config.r600.tiling_nbanks;
2282
			track->nbanks = p->rdev->config.r600.tiling_nbanks;
2283
			track->group_size = p->rdev->config.r600.tiling_group_size;
2283
			track->group_size = p->rdev->config.r600.tiling_group_size;
2284
		} else if (p->rdev->family <= CHIP_RV740) {
2284
		} else if (p->rdev->family <= CHIP_RV740) {
2285
			track->npipes = p->rdev->config.rv770.tiling_npipes;
2285
			track->npipes = p->rdev->config.rv770.tiling_npipes;
2286
			track->nbanks = p->rdev->config.rv770.tiling_nbanks;
2286
			track->nbanks = p->rdev->config.rv770.tiling_nbanks;
2287
			track->group_size = p->rdev->config.rv770.tiling_group_size;
2287
			track->group_size = p->rdev->config.rv770.tiling_group_size;
2288
		}
2288
		}
2289
		p->track = track;
2289
		p->track = track;
2290
	}
2290
	}
2291
	do {
2291
	do {
2292
		r = radeon_cs_packet_parse(p, &pkt, p->idx);
2292
		r = radeon_cs_packet_parse(p, &pkt, p->idx);
2293
		if (r) {
2293
		if (r) {
2294
			kfree(p->track);
2294
			kfree(p->track);
2295
			p->track = NULL;
2295
			p->track = NULL;
2296
			return r;
2296
			return r;
2297
		}
2297
		}
2298
		p->idx += pkt.count + 2;
2298
		p->idx += pkt.count + 2;
2299
		switch (pkt.type) {
2299
		switch (pkt.type) {
2300
		case RADEON_PACKET_TYPE0:
2300
		case RADEON_PACKET_TYPE0:
2301
			r = r600_cs_parse_packet0(p, &pkt);
2301
			r = r600_cs_parse_packet0(p, &pkt);
2302
			break;
2302
			break;
2303
		case RADEON_PACKET_TYPE2:
2303
		case RADEON_PACKET_TYPE2:
2304
			break;
2304
			break;
2305
		case RADEON_PACKET_TYPE3:
2305
		case RADEON_PACKET_TYPE3:
2306
			r = r600_packet3_check(p, &pkt);
2306
			r = r600_packet3_check(p, &pkt);
2307
			break;
2307
			break;
2308
		default:
2308
		default:
2309
			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2309
			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2310
			kfree(p->track);
2310
			kfree(p->track);
2311
			p->track = NULL;
2311
			p->track = NULL;
2312
			return -EINVAL;
2312
			return -EINVAL;
2313
		}
2313
		}
2314
		if (r) {
2314
		if (r) {
2315
			kfree(p->track);
2315
			kfree(p->track);
2316
			p->track = NULL;
2316
			p->track = NULL;
2317
			return r;
2317
			return r;
2318
		}
2318
		}
2319
	} while (p->idx < p->chunk_ib->length_dw);
2319
	} while (p->idx < p->chunk_ib->length_dw);
2320
#if 0
2320
#if 0
2321
	for (r = 0; r < p->ib.length_dw; r++) {
2321
	for (r = 0; r < p->ib.length_dw; r++) {
2322
		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2322
		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2323
		mdelay(1);
2323
		mdelay(1);
2324
	}
2324
	}
2325
#endif
2325
#endif
2326
	kfree(p->track);
2326
	kfree(p->track);
2327
	p->track = NULL;
2327
	p->track = NULL;
2328
	return 0;
2328
	return 0;
2329
}
2329
}
2330
 
-
 
2331
#ifdef CONFIG_DRM_RADEON_UMS
-
 
2332
 
-
 
2333
/**
-
 
2334
 * cs_parser_fini() - clean parser states
-
 
2335
 * @parser:	parser structure holding parsing context.
-
 
2336
 * @error:	error number
-
 
2337
 *
-
 
2338
 * If error is set than unvalidate buffer, otherwise just free memory
-
 
2339
 * used by parsing context.
-
 
2340
 **/
-
 
2341
static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
-
 
2342
{
-
 
2343
	unsigned i;
-
 
2344
 
-
 
2345
	kfree(parser->relocs);
-
 
2346
	for (i = 0; i < parser->nchunks; i++)
-
 
2347
		drm_free_large(parser->chunks[i].kdata);
-
 
2348
	kfree(parser->chunks);
-
 
2349
	kfree(parser->chunks_array);
-
 
2350
}
-
 
2351
 
-
 
2352
static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
-
 
2353
{
-
 
2354
	if (p->chunk_relocs == NULL) {
-
 
2355
		return 0;
-
 
2356
	}
-
 
2357
	p->relocs = kzalloc(sizeof(struct radeon_bo_list), GFP_KERNEL);
-
 
2358
	if (p->relocs == NULL) {
-
 
2359
		return -ENOMEM;
-
 
2360
	}
-
 
2361
	return 0;
-
 
2362
}
-
 
2363
 
-
 
2364
int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
-
 
2365
			unsigned family, u32 *ib, int *l)
-
 
2366
{
-
 
2367
	struct radeon_cs_parser parser;
-
 
2368
	struct radeon_cs_chunk *ib_chunk;
-
 
2369
	struct r600_cs_track *track;
-
 
2370
	int r;
-
 
2371
 
-
 
2372
	/* initialize tracker */
-
 
2373
	track = kzalloc(sizeof(*track), GFP_KERNEL);
-
 
2374
	if (track == NULL)
-
 
2375
		return -ENOMEM;
-
 
2376
	r600_cs_track_init(track);
-
 
2377
	r600_cs_legacy_get_tiling_conf(dev, &track->npipes, &track->nbanks, &track->group_size);
-
 
2378
	/* initialize parser */
-
 
2379
	memset(&parser, 0, sizeof(struct radeon_cs_parser));
-
 
2380
	parser.filp = filp;
-
 
2381
	parser.dev = &dev->pdev->dev;
-
 
2382
	parser.rdev = NULL;
-
 
2383
	parser.family = family;
-
 
2384
	parser.track = track;
-
 
2385
	parser.ib.ptr = ib;
-
 
2386
	r = radeon_cs_parser_init(&parser, data);
-
 
2387
	if (r) {
-
 
2388
		DRM_ERROR("Failed to initialize parser !\n");
-
 
2389
		r600_cs_parser_fini(&parser, r);
-
 
2390
		return r;
-
 
2391
	}
-
 
2392
	r = r600_cs_parser_relocs_legacy(&parser);
-
 
2393
	if (r) {
-
 
2394
		DRM_ERROR("Failed to parse relocation !\n");
-
 
2395
		r600_cs_parser_fini(&parser, r);
-
 
2396
		return r;
-
 
2397
	}
-
 
2398
	/* Copy the packet into the IB, the parser will read from the
-
 
2399
	 * input memory (cached) and write to the IB (which can be
-
 
2400
	 * uncached). */
-
 
2401
	ib_chunk = parser.chunk_ib;
-
 
2402
	parser.ib.length_dw = ib_chunk->length_dw;
-
 
2403
	*l = parser.ib.length_dw;
-
 
2404
	if (copy_from_user(ib, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) {
-
 
2405
		r = -EFAULT;
-
 
2406
		r600_cs_parser_fini(&parser, r);
-
 
2407
		return r;
-
 
2408
	}
-
 
2409
	r = r600_cs_parse(&parser);
-
 
2410
	if (r) {
-
 
2411
		DRM_ERROR("Invalid command stream !\n");
-
 
2412
		r600_cs_parser_fini(&parser, r);
-
 
2413
		return r;
-
 
2414
	}
-
 
2415
	r600_cs_parser_fini(&parser, r);
-
 
2416
	return r;
-
 
2417
}
-
 
2418
 
-
 
2419
void r600_cs_legacy_init(void)
-
 
2420
{
-
 
2421
	r600_nomm = 1;
-
 
2422
}
-
 
2423
 
-
 
2424
#endif
-
 
2425
 
2330
 
2426
/*
2331
/*
2427
 *  DMA
2332
 *  DMA
2428
 */
2333
 */
2429
/**
2334
/**
2430
 * r600_dma_cs_next_reloc() - parse next reloc
2335
 * r600_dma_cs_next_reloc() - parse next reloc
2431
 * @p:		parser structure holding parsing context.
2336
 * @p:		parser structure holding parsing context.
2432
 * @cs_reloc:		reloc informations
2337
 * @cs_reloc:		reloc informations
2433
 *
2338
 *
2434
 * Return the next reloc, do bo validation and compute
2339
 * Return the next reloc, do bo validation and compute
2435
 * GPU offset using the provided start.
2340
 * GPU offset using the provided start.
2436
 **/
2341
 **/
2437
int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
2342
int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
2438
			   struct radeon_bo_list **cs_reloc)
2343
			   struct radeon_bo_list **cs_reloc)
2439
{
2344
{
2440
	struct radeon_cs_chunk *relocs_chunk;
2345
	struct radeon_cs_chunk *relocs_chunk;
2441
	unsigned idx;
2346
	unsigned idx;
2442
 
2347
 
2443
	*cs_reloc = NULL;
2348
	*cs_reloc = NULL;
2444
	if (p->chunk_relocs == NULL) {
2349
	if (p->chunk_relocs == NULL) {
2445
		DRM_ERROR("No relocation chunk !\n");
2350
		DRM_ERROR("No relocation chunk !\n");
2446
		return -EINVAL;
2351
		return -EINVAL;
2447
	}
2352
	}
2448
	relocs_chunk = p->chunk_relocs;
2353
	relocs_chunk = p->chunk_relocs;
2449
	idx = p->dma_reloc_idx;
2354
	idx = p->dma_reloc_idx;
2450
	if (idx >= p->nrelocs) {
2355
	if (idx >= p->nrelocs) {
2451
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
2356
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
2452
			  idx, p->nrelocs);
2357
			  idx, p->nrelocs);
2453
		return -EINVAL;
2358
		return -EINVAL;
2454
	}
2359
	}
2455
	*cs_reloc = &p->relocs[idx];
2360
	*cs_reloc = &p->relocs[idx];
2456
	p->dma_reloc_idx++;
2361
	p->dma_reloc_idx++;
2457
	return 0;
2362
	return 0;
2458
}
2363
}
2459
 
2364
 
2460
#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2365
#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2461
#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
2366
#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
2462
#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2367
#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2463
 
2368
 
2464
/**
2369
/**
2465
 * r600_dma_cs_parse() - parse the DMA IB
2370
 * r600_dma_cs_parse() - parse the DMA IB
2466
 * @p:		parser structure holding parsing context.
2371
 * @p:		parser structure holding parsing context.
2467
 *
2372
 *
2468
 * Parses the DMA IB from the CS ioctl and updates
2373
 * Parses the DMA IB from the CS ioctl and updates
2469
 * the GPU addresses based on the reloc information and
2374
 * the GPU addresses based on the reloc information and
2470
 * checks for errors. (R6xx-R7xx)
2375
 * checks for errors. (R6xx-R7xx)
2471
 * Returns 0 for success and an error on failure.
2376
 * Returns 0 for success and an error on failure.
2472
 **/
2377
 **/
2473
int r600_dma_cs_parse(struct radeon_cs_parser *p)
2378
int r600_dma_cs_parse(struct radeon_cs_parser *p)
2474
{
2379
{
2475
	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2380
	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2476
	struct radeon_bo_list *src_reloc, *dst_reloc;
2381
	struct radeon_bo_list *src_reloc, *dst_reloc;
2477
	u32 header, cmd, count, tiled;
2382
	u32 header, cmd, count, tiled;
2478
	volatile u32 *ib = p->ib.ptr;
2383
	volatile u32 *ib = p->ib.ptr;
2479
	u32 idx, idx_value;
2384
	u32 idx, idx_value;
2480
	u64 src_offset, dst_offset;
2385
	u64 src_offset, dst_offset;
2481
	int r;
2386
	int r;
2482
 
2387
 
2483
	do {
2388
	do {
2484
		if (p->idx >= ib_chunk->length_dw) {
2389
		if (p->idx >= ib_chunk->length_dw) {
2485
			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2390
			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2486
				  p->idx, ib_chunk->length_dw);
2391
				  p->idx, ib_chunk->length_dw);
2487
			return -EINVAL;
2392
			return -EINVAL;
2488
		}
2393
		}
2489
		idx = p->idx;
2394
		idx = p->idx;
2490
		header = radeon_get_ib_value(p, idx);
2395
		header = radeon_get_ib_value(p, idx);
2491
		cmd = GET_DMA_CMD(header);
2396
		cmd = GET_DMA_CMD(header);
2492
		count = GET_DMA_COUNT(header);
2397
		count = GET_DMA_COUNT(header);
2493
		tiled = GET_DMA_T(header);
2398
		tiled = GET_DMA_T(header);
2494
 
2399
 
2495
		switch (cmd) {
2400
		switch (cmd) {
2496
		case DMA_PACKET_WRITE:
2401
		case DMA_PACKET_WRITE:
2497
			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2402
			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2498
			if (r) {
2403
			if (r) {
2499
				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2404
				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2500
				return -EINVAL;
2405
				return -EINVAL;
2501
			}
2406
			}
2502
			if (tiled) {
2407
			if (tiled) {
2503
				dst_offset = radeon_get_ib_value(p, idx+1);
2408
				dst_offset = radeon_get_ib_value(p, idx+1);
2504
				dst_offset <<= 8;
2409
				dst_offset <<= 8;
2505
 
2410
 
2506
				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2411
				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2507
				p->idx += count + 5;
2412
				p->idx += count + 5;
2508
			} else {
2413
			} else {
2509
				dst_offset = radeon_get_ib_value(p, idx+1);
2414
				dst_offset = radeon_get_ib_value(p, idx+1);
2510
				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2415
				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2511
 
2416
 
2512
				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2417
				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2513
				ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2418
				ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2514
				p->idx += count + 3;
2419
				p->idx += count + 3;
2515
			}
2420
			}
2516
			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2421
			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2517
				dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2422
				dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2518
					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2423
					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2519
				return -EINVAL;
2424
				return -EINVAL;
2520
			}
2425
			}
2521
			break;
2426
			break;
2522
		case DMA_PACKET_COPY:
2427
		case DMA_PACKET_COPY:
2523
			r = r600_dma_cs_next_reloc(p, &src_reloc);
2428
			r = r600_dma_cs_next_reloc(p, &src_reloc);
2524
			if (r) {
2429
			if (r) {
2525
				DRM_ERROR("bad DMA_PACKET_COPY\n");
2430
				DRM_ERROR("bad DMA_PACKET_COPY\n");
2526
				return -EINVAL;
2431
				return -EINVAL;
2527
			}
2432
			}
2528
			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2433
			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2529
			if (r) {
2434
			if (r) {
2530
				DRM_ERROR("bad DMA_PACKET_COPY\n");
2435
				DRM_ERROR("bad DMA_PACKET_COPY\n");
2531
				return -EINVAL;
2436
				return -EINVAL;
2532
			}
2437
			}
2533
			if (tiled) {
2438
			if (tiled) {
2534
				idx_value = radeon_get_ib_value(p, idx + 2);
2439
				idx_value = radeon_get_ib_value(p, idx + 2);
2535
				/* detile bit */
2440
				/* detile bit */
2536
				if (idx_value & (1 << 31)) {
2441
				if (idx_value & (1 << 31)) {
2537
					/* tiled src, linear dst */
2442
					/* tiled src, linear dst */
2538
					src_offset = radeon_get_ib_value(p, idx+1);
2443
					src_offset = radeon_get_ib_value(p, idx+1);
2539
					src_offset <<= 8;
2444
					src_offset <<= 8;
2540
					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2445
					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2541
 
2446
 
2542
					dst_offset = radeon_get_ib_value(p, idx+5);
2447
					dst_offset = radeon_get_ib_value(p, idx+5);
2543
					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2448
					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2544
					ib[idx+5] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2449
					ib[idx+5] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2545
					ib[idx+6] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2450
					ib[idx+6] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2546
				} else {
2451
				} else {
2547
					/* linear src, tiled dst */
2452
					/* linear src, tiled dst */
2548
					src_offset = radeon_get_ib_value(p, idx+5);
2453
					src_offset = radeon_get_ib_value(p, idx+5);
2549
					src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2454
					src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2550
					ib[idx+5] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2455
					ib[idx+5] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2551
					ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2456
					ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2552
 
2457
 
2553
					dst_offset = radeon_get_ib_value(p, idx+1);
2458
					dst_offset = radeon_get_ib_value(p, idx+1);
2554
					dst_offset <<= 8;
2459
					dst_offset <<= 8;
2555
					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2460
					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2556
				}
2461
				}
2557
				p->idx += 7;
2462
				p->idx += 7;
2558
			} else {
2463
			} else {
2559
				if (p->family >= CHIP_RV770) {
2464
				if (p->family >= CHIP_RV770) {
2560
					src_offset = radeon_get_ib_value(p, idx+2);
2465
					src_offset = radeon_get_ib_value(p, idx+2);
2561
					src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2466
					src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2562
					dst_offset = radeon_get_ib_value(p, idx+1);
2467
					dst_offset = radeon_get_ib_value(p, idx+1);
2563
					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2468
					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2564
 
2469
 
2565
					ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2470
					ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2566
					ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2471
					ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2567
					ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2472
					ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2568
					ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2473
					ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2569
					p->idx += 5;
2474
					p->idx += 5;
2570
				} else {
2475
				} else {
2571
					src_offset = radeon_get_ib_value(p, idx+2);
2476
					src_offset = radeon_get_ib_value(p, idx+2);
2572
					src_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2477
					src_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2573
					dst_offset = radeon_get_ib_value(p, idx+1);
2478
					dst_offset = radeon_get_ib_value(p, idx+1);
2574
					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16;
2479
					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16;
2575
 
2480
 
2576
					ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2481
					ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2577
					ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2482
					ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2578
					ib[idx+3] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2483
					ib[idx+3] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2579
					ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) & 0xff) << 16;
2484
					ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) & 0xff) << 16;
2580
					p->idx += 4;
2485
					p->idx += 4;
2581
				}
2486
				}
2582
			}
2487
			}
2583
			if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2488
			if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2584
				dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
2489
				dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
2585
					 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2490
					 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2586
				return -EINVAL;
2491
				return -EINVAL;
2587
			}
2492
			}
2588
			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2493
			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2589
				dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
2494
				dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
2590
					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2495
					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2591
				return -EINVAL;
2496
				return -EINVAL;
2592
			}
2497
			}
2593
			break;
2498
			break;
2594
		case DMA_PACKET_CONSTANT_FILL:
2499
		case DMA_PACKET_CONSTANT_FILL:
2595
			if (p->family < CHIP_RV770) {
2500
			if (p->family < CHIP_RV770) {
2596
				DRM_ERROR("Constant Fill is 7xx only !\n");
2501
				DRM_ERROR("Constant Fill is 7xx only !\n");
2597
				return -EINVAL;
2502
				return -EINVAL;
2598
			}
2503
			}
2599
			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2504
			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2600
			if (r) {
2505
			if (r) {
2601
				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2506
				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2602
				return -EINVAL;
2507
				return -EINVAL;
2603
			}
2508
			}
2604
			dst_offset = radeon_get_ib_value(p, idx+1);
2509
			dst_offset = radeon_get_ib_value(p, idx+1);
2605
			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
2510
			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
2606
			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2511
			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2607
				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
2512
				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
2608
					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2513
					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2609
				return -EINVAL;
2514
				return -EINVAL;
2610
			}
2515
			}
2611
			ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2516
			ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2612
			ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
2517
			ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
2613
			p->idx += 4;
2518
			p->idx += 4;
2614
			break;
2519
			break;
2615
		case DMA_PACKET_NOP:
2520
		case DMA_PACKET_NOP:
2616
			p->idx += 1;
2521
			p->idx += 1;
2617
			break;
2522
			break;
2618
		default:
2523
		default:
2619
			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
2524
			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
2620
			return -EINVAL;
2525
			return -EINVAL;
2621
		}
2526
		}
2622
	} while (p->idx < p->chunk_ib->length_dw);
2527
	} while (p->idx < p->chunk_ib->length_dw);
2623
#if 0
2528
#if 0
2624
	for (r = 0; r < p->ib->length_dw; r++) {
2529
	for (r = 0; r < p->ib->length_dw; r++) {
2625
		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2530
		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2626
		mdelay(1);
2531
		mdelay(1);
2627
	}
2532
	}
2628
#endif
2533
#endif
2629
	return 0;
2534
	return 0;
2630
}
2535
}