Subversion Repositories Kolibri OS

Rev

Rev 5179 | Rev 6104 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5078 serge 1
/*
2
 * Copyright 2012 Advanced Micro Devices, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 *
22
 * Authors: Alex Deucher
23
 */
24
#include 
25
#include 
26
#include 
27
#include "drmP.h"
28
#include "radeon.h"
29
#include "radeon_asic.h"
30
#include "cikd.h"
31
#include "atom.h"
32
#include "cik_blit_shaders.h"
33
#include "radeon_ucode.h"
34
#include "clearstate_ci.h"
5271 serge 35
#include "radeon_kfd.h"
5078 serge 36
 
37
MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38
MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39
MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40
MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41
MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42
MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43
MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44
MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45
MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46
 
47
MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48
MODULE_FIRMWARE("radeon/bonaire_me.bin");
49
MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50
MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51
MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52
MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53
MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54
MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55
 
56
MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
57
MODULE_FIRMWARE("radeon/HAWAII_me.bin");
58
MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
59
MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
60
MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
61
MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
62
MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
63
MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
64
MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
65
 
66
MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67
MODULE_FIRMWARE("radeon/hawaii_me.bin");
68
MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69
MODULE_FIRMWARE("radeon/hawaii_mec.bin");
70
MODULE_FIRMWARE("radeon/hawaii_mc.bin");
71
MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
72
MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
73
MODULE_FIRMWARE("radeon/hawaii_smc.bin");
74
 
75
MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
76
MODULE_FIRMWARE("radeon/KAVERI_me.bin");
77
MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
78
MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
79
MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
80
MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
81
 
82
MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
83
MODULE_FIRMWARE("radeon/kaveri_me.bin");
84
MODULE_FIRMWARE("radeon/kaveri_ce.bin");
85
MODULE_FIRMWARE("radeon/kaveri_mec.bin");
86
MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
87
MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
88
MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
89
 
90
MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
91
MODULE_FIRMWARE("radeon/KABINI_me.bin");
92
MODULE_FIRMWARE("radeon/KABINI_ce.bin");
93
MODULE_FIRMWARE("radeon/KABINI_mec.bin");
94
MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
95
MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
96
 
97
MODULE_FIRMWARE("radeon/kabini_pfp.bin");
98
MODULE_FIRMWARE("radeon/kabini_me.bin");
99
MODULE_FIRMWARE("radeon/kabini_ce.bin");
100
MODULE_FIRMWARE("radeon/kabini_mec.bin");
101
MODULE_FIRMWARE("radeon/kabini_rlc.bin");
102
MODULE_FIRMWARE("radeon/kabini_sdma.bin");
103
 
104
MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
105
MODULE_FIRMWARE("radeon/MULLINS_me.bin");
106
MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
107
MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
108
MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
109
MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
110
 
111
MODULE_FIRMWARE("radeon/mullins_pfp.bin");
112
MODULE_FIRMWARE("radeon/mullins_me.bin");
113
MODULE_FIRMWARE("radeon/mullins_ce.bin");
114
MODULE_FIRMWARE("radeon/mullins_mec.bin");
115
MODULE_FIRMWARE("radeon/mullins_rlc.bin");
116
MODULE_FIRMWARE("radeon/mullins_sdma.bin");
117
 
118
extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119
extern void r600_ih_ring_fini(struct radeon_device *rdev);
120
extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
121
extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
122
extern bool evergreen_is_display_hung(struct radeon_device *rdev);
123
extern void sumo_rlc_fini(struct radeon_device *rdev);
124
extern int sumo_rlc_init(struct radeon_device *rdev);
125
extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
126
extern void si_rlc_reset(struct radeon_device *rdev);
127
extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
128
static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129
extern int cik_sdma_resume(struct radeon_device *rdev);
130
extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
131
extern void cik_sdma_fini(struct radeon_device *rdev);
132
extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
133
static void cik_rlc_stop(struct radeon_device *rdev);
134
static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135
static void cik_program_aspm(struct radeon_device *rdev);
136
static void cik_init_pg(struct radeon_device *rdev);
137
static void cik_init_cg(struct radeon_device *rdev);
138
static void cik_fini_pg(struct radeon_device *rdev);
139
static void cik_fini_cg(struct radeon_device *rdev);
140
static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141
					  bool enable);
142
 
143
/* get temperature in millidegrees */
144
int ci_get_temp(struct radeon_device *rdev)
145
{
146
	u32 temp;
147
	int actual_temp = 0;
148
 
149
	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
150
		CTF_TEMP_SHIFT;
151
 
152
	if (temp & 0x200)
153
		actual_temp = 255;
154
	else
155
		actual_temp = temp & 0x1ff;
156
 
157
	actual_temp = actual_temp * 1000;
158
 
159
	return actual_temp;
160
}
161
 
162
/* get temperature in millidegrees */
163
int kv_get_temp(struct radeon_device *rdev)
164
{
165
	u32 temp;
166
	int actual_temp = 0;
167
 
168
	temp = RREG32_SMC(0xC0300E0C);
169
 
170
	if (temp)
171
		actual_temp = (temp / 8) - 49;
172
	else
173
		actual_temp = 0;
174
 
175
	actual_temp = actual_temp * 1000;
176
 
177
	return actual_temp;
178
}
179
 
180
/*
181
 * Indirect registers accessor
182
 */
183
u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
184
{
185
	unsigned long flags;
186
	u32 r;
187
 
188
	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
189
	WREG32(PCIE_INDEX, reg);
190
	(void)RREG32(PCIE_INDEX);
191
	r = RREG32(PCIE_DATA);
192
	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
193
	return r;
194
}
195
 
196
void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197
{
198
	unsigned long flags;
199
 
200
	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
201
	WREG32(PCIE_INDEX, reg);
202
	(void)RREG32(PCIE_INDEX);
203
	WREG32(PCIE_DATA, v);
204
	(void)RREG32(PCIE_DATA);
205
	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
206
}
207
 
208
static const u32 spectre_rlc_save_restore_register_list[] =
209
{
210
	(0x0e00 << 16) | (0xc12c >> 2),
211
	0x00000000,
212
	(0x0e00 << 16) | (0xc140 >> 2),
213
	0x00000000,
214
	(0x0e00 << 16) | (0xc150 >> 2),
215
	0x00000000,
216
	(0x0e00 << 16) | (0xc15c >> 2),
217
	0x00000000,
218
	(0x0e00 << 16) | (0xc168 >> 2),
219
	0x00000000,
220
	(0x0e00 << 16) | (0xc170 >> 2),
221
	0x00000000,
222
	(0x0e00 << 16) | (0xc178 >> 2),
223
	0x00000000,
224
	(0x0e00 << 16) | (0xc204 >> 2),
225
	0x00000000,
226
	(0x0e00 << 16) | (0xc2b4 >> 2),
227
	0x00000000,
228
	(0x0e00 << 16) | (0xc2b8 >> 2),
229
	0x00000000,
230
	(0x0e00 << 16) | (0xc2bc >> 2),
231
	0x00000000,
232
	(0x0e00 << 16) | (0xc2c0 >> 2),
233
	0x00000000,
234
	(0x0e00 << 16) | (0x8228 >> 2),
235
	0x00000000,
236
	(0x0e00 << 16) | (0x829c >> 2),
237
	0x00000000,
238
	(0x0e00 << 16) | (0x869c >> 2),
239
	0x00000000,
240
	(0x0600 << 16) | (0x98f4 >> 2),
241
	0x00000000,
242
	(0x0e00 << 16) | (0x98f8 >> 2),
243
	0x00000000,
244
	(0x0e00 << 16) | (0x9900 >> 2),
245
	0x00000000,
246
	(0x0e00 << 16) | (0xc260 >> 2),
247
	0x00000000,
248
	(0x0e00 << 16) | (0x90e8 >> 2),
249
	0x00000000,
250
	(0x0e00 << 16) | (0x3c000 >> 2),
251
	0x00000000,
252
	(0x0e00 << 16) | (0x3c00c >> 2),
253
	0x00000000,
254
	(0x0e00 << 16) | (0x8c1c >> 2),
255
	0x00000000,
256
	(0x0e00 << 16) | (0x9700 >> 2),
257
	0x00000000,
258
	(0x0e00 << 16) | (0xcd20 >> 2),
259
	0x00000000,
260
	(0x4e00 << 16) | (0xcd20 >> 2),
261
	0x00000000,
262
	(0x5e00 << 16) | (0xcd20 >> 2),
263
	0x00000000,
264
	(0x6e00 << 16) | (0xcd20 >> 2),
265
	0x00000000,
266
	(0x7e00 << 16) | (0xcd20 >> 2),
267
	0x00000000,
268
	(0x8e00 << 16) | (0xcd20 >> 2),
269
	0x00000000,
270
	(0x9e00 << 16) | (0xcd20 >> 2),
271
	0x00000000,
272
	(0xae00 << 16) | (0xcd20 >> 2),
273
	0x00000000,
274
	(0xbe00 << 16) | (0xcd20 >> 2),
275
	0x00000000,
276
	(0x0e00 << 16) | (0x89bc >> 2),
277
	0x00000000,
278
	(0x0e00 << 16) | (0x8900 >> 2),
279
	0x00000000,
280
	0x3,
281
	(0x0e00 << 16) | (0xc130 >> 2),
282
	0x00000000,
283
	(0x0e00 << 16) | (0xc134 >> 2),
284
	0x00000000,
285
	(0x0e00 << 16) | (0xc1fc >> 2),
286
	0x00000000,
287
	(0x0e00 << 16) | (0xc208 >> 2),
288
	0x00000000,
289
	(0x0e00 << 16) | (0xc264 >> 2),
290
	0x00000000,
291
	(0x0e00 << 16) | (0xc268 >> 2),
292
	0x00000000,
293
	(0x0e00 << 16) | (0xc26c >> 2),
294
	0x00000000,
295
	(0x0e00 << 16) | (0xc270 >> 2),
296
	0x00000000,
297
	(0x0e00 << 16) | (0xc274 >> 2),
298
	0x00000000,
299
	(0x0e00 << 16) | (0xc278 >> 2),
300
	0x00000000,
301
	(0x0e00 << 16) | (0xc27c >> 2),
302
	0x00000000,
303
	(0x0e00 << 16) | (0xc280 >> 2),
304
	0x00000000,
305
	(0x0e00 << 16) | (0xc284 >> 2),
306
	0x00000000,
307
	(0x0e00 << 16) | (0xc288 >> 2),
308
	0x00000000,
309
	(0x0e00 << 16) | (0xc28c >> 2),
310
	0x00000000,
311
	(0x0e00 << 16) | (0xc290 >> 2),
312
	0x00000000,
313
	(0x0e00 << 16) | (0xc294 >> 2),
314
	0x00000000,
315
	(0x0e00 << 16) | (0xc298 >> 2),
316
	0x00000000,
317
	(0x0e00 << 16) | (0xc29c >> 2),
318
	0x00000000,
319
	(0x0e00 << 16) | (0xc2a0 >> 2),
320
	0x00000000,
321
	(0x0e00 << 16) | (0xc2a4 >> 2),
322
	0x00000000,
323
	(0x0e00 << 16) | (0xc2a8 >> 2),
324
	0x00000000,
325
	(0x0e00 << 16) | (0xc2ac  >> 2),
326
	0x00000000,
327
	(0x0e00 << 16) | (0xc2b0 >> 2),
328
	0x00000000,
329
	(0x0e00 << 16) | (0x301d0 >> 2),
330
	0x00000000,
331
	(0x0e00 << 16) | (0x30238 >> 2),
332
	0x00000000,
333
	(0x0e00 << 16) | (0x30250 >> 2),
334
	0x00000000,
335
	(0x0e00 << 16) | (0x30254 >> 2),
336
	0x00000000,
337
	(0x0e00 << 16) | (0x30258 >> 2),
338
	0x00000000,
339
	(0x0e00 << 16) | (0x3025c >> 2),
340
	0x00000000,
341
	(0x4e00 << 16) | (0xc900 >> 2),
342
	0x00000000,
343
	(0x5e00 << 16) | (0xc900 >> 2),
344
	0x00000000,
345
	(0x6e00 << 16) | (0xc900 >> 2),
346
	0x00000000,
347
	(0x7e00 << 16) | (0xc900 >> 2),
348
	0x00000000,
349
	(0x8e00 << 16) | (0xc900 >> 2),
350
	0x00000000,
351
	(0x9e00 << 16) | (0xc900 >> 2),
352
	0x00000000,
353
	(0xae00 << 16) | (0xc900 >> 2),
354
	0x00000000,
355
	(0xbe00 << 16) | (0xc900 >> 2),
356
	0x00000000,
357
	(0x4e00 << 16) | (0xc904 >> 2),
358
	0x00000000,
359
	(0x5e00 << 16) | (0xc904 >> 2),
360
	0x00000000,
361
	(0x6e00 << 16) | (0xc904 >> 2),
362
	0x00000000,
363
	(0x7e00 << 16) | (0xc904 >> 2),
364
	0x00000000,
365
	(0x8e00 << 16) | (0xc904 >> 2),
366
	0x00000000,
367
	(0x9e00 << 16) | (0xc904 >> 2),
368
	0x00000000,
369
	(0xae00 << 16) | (0xc904 >> 2),
370
	0x00000000,
371
	(0xbe00 << 16) | (0xc904 >> 2),
372
	0x00000000,
373
	(0x4e00 << 16) | (0xc908 >> 2),
374
	0x00000000,
375
	(0x5e00 << 16) | (0xc908 >> 2),
376
	0x00000000,
377
	(0x6e00 << 16) | (0xc908 >> 2),
378
	0x00000000,
379
	(0x7e00 << 16) | (0xc908 >> 2),
380
	0x00000000,
381
	(0x8e00 << 16) | (0xc908 >> 2),
382
	0x00000000,
383
	(0x9e00 << 16) | (0xc908 >> 2),
384
	0x00000000,
385
	(0xae00 << 16) | (0xc908 >> 2),
386
	0x00000000,
387
	(0xbe00 << 16) | (0xc908 >> 2),
388
	0x00000000,
389
	(0x4e00 << 16) | (0xc90c >> 2),
390
	0x00000000,
391
	(0x5e00 << 16) | (0xc90c >> 2),
392
	0x00000000,
393
	(0x6e00 << 16) | (0xc90c >> 2),
394
	0x00000000,
395
	(0x7e00 << 16) | (0xc90c >> 2),
396
	0x00000000,
397
	(0x8e00 << 16) | (0xc90c >> 2),
398
	0x00000000,
399
	(0x9e00 << 16) | (0xc90c >> 2),
400
	0x00000000,
401
	(0xae00 << 16) | (0xc90c >> 2),
402
	0x00000000,
403
	(0xbe00 << 16) | (0xc90c >> 2),
404
	0x00000000,
405
	(0x4e00 << 16) | (0xc910 >> 2),
406
	0x00000000,
407
	(0x5e00 << 16) | (0xc910 >> 2),
408
	0x00000000,
409
	(0x6e00 << 16) | (0xc910 >> 2),
410
	0x00000000,
411
	(0x7e00 << 16) | (0xc910 >> 2),
412
	0x00000000,
413
	(0x8e00 << 16) | (0xc910 >> 2),
414
	0x00000000,
415
	(0x9e00 << 16) | (0xc910 >> 2),
416
	0x00000000,
417
	(0xae00 << 16) | (0xc910 >> 2),
418
	0x00000000,
419
	(0xbe00 << 16) | (0xc910 >> 2),
420
	0x00000000,
421
	(0x0e00 << 16) | (0xc99c >> 2),
422
	0x00000000,
423
	(0x0e00 << 16) | (0x9834 >> 2),
424
	0x00000000,
425
	(0x0000 << 16) | (0x30f00 >> 2),
426
	0x00000000,
427
	(0x0001 << 16) | (0x30f00 >> 2),
428
	0x00000000,
429
	(0x0000 << 16) | (0x30f04 >> 2),
430
	0x00000000,
431
	(0x0001 << 16) | (0x30f04 >> 2),
432
	0x00000000,
433
	(0x0000 << 16) | (0x30f08 >> 2),
434
	0x00000000,
435
	(0x0001 << 16) | (0x30f08 >> 2),
436
	0x00000000,
437
	(0x0000 << 16) | (0x30f0c >> 2),
438
	0x00000000,
439
	(0x0001 << 16) | (0x30f0c >> 2),
440
	0x00000000,
441
	(0x0600 << 16) | (0x9b7c >> 2),
442
	0x00000000,
443
	(0x0e00 << 16) | (0x8a14 >> 2),
444
	0x00000000,
445
	(0x0e00 << 16) | (0x8a18 >> 2),
446
	0x00000000,
447
	(0x0600 << 16) | (0x30a00 >> 2),
448
	0x00000000,
449
	(0x0e00 << 16) | (0x8bf0 >> 2),
450
	0x00000000,
451
	(0x0e00 << 16) | (0x8bcc >> 2),
452
	0x00000000,
453
	(0x0e00 << 16) | (0x8b24 >> 2),
454
	0x00000000,
455
	(0x0e00 << 16) | (0x30a04 >> 2),
456
	0x00000000,
457
	(0x0600 << 16) | (0x30a10 >> 2),
458
	0x00000000,
459
	(0x0600 << 16) | (0x30a14 >> 2),
460
	0x00000000,
461
	(0x0600 << 16) | (0x30a18 >> 2),
462
	0x00000000,
463
	(0x0600 << 16) | (0x30a2c >> 2),
464
	0x00000000,
465
	(0x0e00 << 16) | (0xc700 >> 2),
466
	0x00000000,
467
	(0x0e00 << 16) | (0xc704 >> 2),
468
	0x00000000,
469
	(0x0e00 << 16) | (0xc708 >> 2),
470
	0x00000000,
471
	(0x0e00 << 16) | (0xc768 >> 2),
472
	0x00000000,
473
	(0x0400 << 16) | (0xc770 >> 2),
474
	0x00000000,
475
	(0x0400 << 16) | (0xc774 >> 2),
476
	0x00000000,
477
	(0x0400 << 16) | (0xc778 >> 2),
478
	0x00000000,
479
	(0x0400 << 16) | (0xc77c >> 2),
480
	0x00000000,
481
	(0x0400 << 16) | (0xc780 >> 2),
482
	0x00000000,
483
	(0x0400 << 16) | (0xc784 >> 2),
484
	0x00000000,
485
	(0x0400 << 16) | (0xc788 >> 2),
486
	0x00000000,
487
	(0x0400 << 16) | (0xc78c >> 2),
488
	0x00000000,
489
	(0x0400 << 16) | (0xc798 >> 2),
490
	0x00000000,
491
	(0x0400 << 16) | (0xc79c >> 2),
492
	0x00000000,
493
	(0x0400 << 16) | (0xc7a0 >> 2),
494
	0x00000000,
495
	(0x0400 << 16) | (0xc7a4 >> 2),
496
	0x00000000,
497
	(0x0400 << 16) | (0xc7a8 >> 2),
498
	0x00000000,
499
	(0x0400 << 16) | (0xc7ac >> 2),
500
	0x00000000,
501
	(0x0400 << 16) | (0xc7b0 >> 2),
502
	0x00000000,
503
	(0x0400 << 16) | (0xc7b4 >> 2),
504
	0x00000000,
505
	(0x0e00 << 16) | (0x9100 >> 2),
506
	0x00000000,
507
	(0x0e00 << 16) | (0x3c010 >> 2),
508
	0x00000000,
509
	(0x0e00 << 16) | (0x92a8 >> 2),
510
	0x00000000,
511
	(0x0e00 << 16) | (0x92ac >> 2),
512
	0x00000000,
513
	(0x0e00 << 16) | (0x92b4 >> 2),
514
	0x00000000,
515
	(0x0e00 << 16) | (0x92b8 >> 2),
516
	0x00000000,
517
	(0x0e00 << 16) | (0x92bc >> 2),
518
	0x00000000,
519
	(0x0e00 << 16) | (0x92c0 >> 2),
520
	0x00000000,
521
	(0x0e00 << 16) | (0x92c4 >> 2),
522
	0x00000000,
523
	(0x0e00 << 16) | (0x92c8 >> 2),
524
	0x00000000,
525
	(0x0e00 << 16) | (0x92cc >> 2),
526
	0x00000000,
527
	(0x0e00 << 16) | (0x92d0 >> 2),
528
	0x00000000,
529
	(0x0e00 << 16) | (0x8c00 >> 2),
530
	0x00000000,
531
	(0x0e00 << 16) | (0x8c04 >> 2),
532
	0x00000000,
533
	(0x0e00 << 16) | (0x8c20 >> 2),
534
	0x00000000,
535
	(0x0e00 << 16) | (0x8c38 >> 2),
536
	0x00000000,
537
	(0x0e00 << 16) | (0x8c3c >> 2),
538
	0x00000000,
539
	(0x0e00 << 16) | (0xae00 >> 2),
540
	0x00000000,
541
	(0x0e00 << 16) | (0x9604 >> 2),
542
	0x00000000,
543
	(0x0e00 << 16) | (0xac08 >> 2),
544
	0x00000000,
545
	(0x0e00 << 16) | (0xac0c >> 2),
546
	0x00000000,
547
	(0x0e00 << 16) | (0xac10 >> 2),
548
	0x00000000,
549
	(0x0e00 << 16) | (0xac14 >> 2),
550
	0x00000000,
551
	(0x0e00 << 16) | (0xac58 >> 2),
552
	0x00000000,
553
	(0x0e00 << 16) | (0xac68 >> 2),
554
	0x00000000,
555
	(0x0e00 << 16) | (0xac6c >> 2),
556
	0x00000000,
557
	(0x0e00 << 16) | (0xac70 >> 2),
558
	0x00000000,
559
	(0x0e00 << 16) | (0xac74 >> 2),
560
	0x00000000,
561
	(0x0e00 << 16) | (0xac78 >> 2),
562
	0x00000000,
563
	(0x0e00 << 16) | (0xac7c >> 2),
564
	0x00000000,
565
	(0x0e00 << 16) | (0xac80 >> 2),
566
	0x00000000,
567
	(0x0e00 << 16) | (0xac84 >> 2),
568
	0x00000000,
569
	(0x0e00 << 16) | (0xac88 >> 2),
570
	0x00000000,
571
	(0x0e00 << 16) | (0xac8c >> 2),
572
	0x00000000,
573
	(0x0e00 << 16) | (0x970c >> 2),
574
	0x00000000,
575
	(0x0e00 << 16) | (0x9714 >> 2),
576
	0x00000000,
577
	(0x0e00 << 16) | (0x9718 >> 2),
578
	0x00000000,
579
	(0x0e00 << 16) | (0x971c >> 2),
580
	0x00000000,
581
	(0x0e00 << 16) | (0x31068 >> 2),
582
	0x00000000,
583
	(0x4e00 << 16) | (0x31068 >> 2),
584
	0x00000000,
585
	(0x5e00 << 16) | (0x31068 >> 2),
586
	0x00000000,
587
	(0x6e00 << 16) | (0x31068 >> 2),
588
	0x00000000,
589
	(0x7e00 << 16) | (0x31068 >> 2),
590
	0x00000000,
591
	(0x8e00 << 16) | (0x31068 >> 2),
592
	0x00000000,
593
	(0x9e00 << 16) | (0x31068 >> 2),
594
	0x00000000,
595
	(0xae00 << 16) | (0x31068 >> 2),
596
	0x00000000,
597
	(0xbe00 << 16) | (0x31068 >> 2),
598
	0x00000000,
599
	(0x0e00 << 16) | (0xcd10 >> 2),
600
	0x00000000,
601
	(0x0e00 << 16) | (0xcd14 >> 2),
602
	0x00000000,
603
	(0x0e00 << 16) | (0x88b0 >> 2),
604
	0x00000000,
605
	(0x0e00 << 16) | (0x88b4 >> 2),
606
	0x00000000,
607
	(0x0e00 << 16) | (0x88b8 >> 2),
608
	0x00000000,
609
	(0x0e00 << 16) | (0x88bc >> 2),
610
	0x00000000,
611
	(0x0400 << 16) | (0x89c0 >> 2),
612
	0x00000000,
613
	(0x0e00 << 16) | (0x88c4 >> 2),
614
	0x00000000,
615
	(0x0e00 << 16) | (0x88c8 >> 2),
616
	0x00000000,
617
	(0x0e00 << 16) | (0x88d0 >> 2),
618
	0x00000000,
619
	(0x0e00 << 16) | (0x88d4 >> 2),
620
	0x00000000,
621
	(0x0e00 << 16) | (0x88d8 >> 2),
622
	0x00000000,
623
	(0x0e00 << 16) | (0x8980 >> 2),
624
	0x00000000,
625
	(0x0e00 << 16) | (0x30938 >> 2),
626
	0x00000000,
627
	(0x0e00 << 16) | (0x3093c >> 2),
628
	0x00000000,
629
	(0x0e00 << 16) | (0x30940 >> 2),
630
	0x00000000,
631
	(0x0e00 << 16) | (0x89a0 >> 2),
632
	0x00000000,
633
	(0x0e00 << 16) | (0x30900 >> 2),
634
	0x00000000,
635
	(0x0e00 << 16) | (0x30904 >> 2),
636
	0x00000000,
637
	(0x0e00 << 16) | (0x89b4 >> 2),
638
	0x00000000,
639
	(0x0e00 << 16) | (0x3c210 >> 2),
640
	0x00000000,
641
	(0x0e00 << 16) | (0x3c214 >> 2),
642
	0x00000000,
643
	(0x0e00 << 16) | (0x3c218 >> 2),
644
	0x00000000,
645
	(0x0e00 << 16) | (0x8904 >> 2),
646
	0x00000000,
647
	0x5,
648
	(0x0e00 << 16) | (0x8c28 >> 2),
649
	(0x0e00 << 16) | (0x8c2c >> 2),
650
	(0x0e00 << 16) | (0x8c30 >> 2),
651
	(0x0e00 << 16) | (0x8c34 >> 2),
652
	(0x0e00 << 16) | (0x9600 >> 2),
653
};
654
 
655
static const u32 kalindi_rlc_save_restore_register_list[] =
656
{
657
	(0x0e00 << 16) | (0xc12c >> 2),
658
	0x00000000,
659
	(0x0e00 << 16) | (0xc140 >> 2),
660
	0x00000000,
661
	(0x0e00 << 16) | (0xc150 >> 2),
662
	0x00000000,
663
	(0x0e00 << 16) | (0xc15c >> 2),
664
	0x00000000,
665
	(0x0e00 << 16) | (0xc168 >> 2),
666
	0x00000000,
667
	(0x0e00 << 16) | (0xc170 >> 2),
668
	0x00000000,
669
	(0x0e00 << 16) | (0xc204 >> 2),
670
	0x00000000,
671
	(0x0e00 << 16) | (0xc2b4 >> 2),
672
	0x00000000,
673
	(0x0e00 << 16) | (0xc2b8 >> 2),
674
	0x00000000,
675
	(0x0e00 << 16) | (0xc2bc >> 2),
676
	0x00000000,
677
	(0x0e00 << 16) | (0xc2c0 >> 2),
678
	0x00000000,
679
	(0x0e00 << 16) | (0x8228 >> 2),
680
	0x00000000,
681
	(0x0e00 << 16) | (0x829c >> 2),
682
	0x00000000,
683
	(0x0e00 << 16) | (0x869c >> 2),
684
	0x00000000,
685
	(0x0600 << 16) | (0x98f4 >> 2),
686
	0x00000000,
687
	(0x0e00 << 16) | (0x98f8 >> 2),
688
	0x00000000,
689
	(0x0e00 << 16) | (0x9900 >> 2),
690
	0x00000000,
691
	(0x0e00 << 16) | (0xc260 >> 2),
692
	0x00000000,
693
	(0x0e00 << 16) | (0x90e8 >> 2),
694
	0x00000000,
695
	(0x0e00 << 16) | (0x3c000 >> 2),
696
	0x00000000,
697
	(0x0e00 << 16) | (0x3c00c >> 2),
698
	0x00000000,
699
	(0x0e00 << 16) | (0x8c1c >> 2),
700
	0x00000000,
701
	(0x0e00 << 16) | (0x9700 >> 2),
702
	0x00000000,
703
	(0x0e00 << 16) | (0xcd20 >> 2),
704
	0x00000000,
705
	(0x4e00 << 16) | (0xcd20 >> 2),
706
	0x00000000,
707
	(0x5e00 << 16) | (0xcd20 >> 2),
708
	0x00000000,
709
	(0x6e00 << 16) | (0xcd20 >> 2),
710
	0x00000000,
711
	(0x7e00 << 16) | (0xcd20 >> 2),
712
	0x00000000,
713
	(0x0e00 << 16) | (0x89bc >> 2),
714
	0x00000000,
715
	(0x0e00 << 16) | (0x8900 >> 2),
716
	0x00000000,
717
	0x3,
718
	(0x0e00 << 16) | (0xc130 >> 2),
719
	0x00000000,
720
	(0x0e00 << 16) | (0xc134 >> 2),
721
	0x00000000,
722
	(0x0e00 << 16) | (0xc1fc >> 2),
723
	0x00000000,
724
	(0x0e00 << 16) | (0xc208 >> 2),
725
	0x00000000,
726
	(0x0e00 << 16) | (0xc264 >> 2),
727
	0x00000000,
728
	(0x0e00 << 16) | (0xc268 >> 2),
729
	0x00000000,
730
	(0x0e00 << 16) | (0xc26c >> 2),
731
	0x00000000,
732
	(0x0e00 << 16) | (0xc270 >> 2),
733
	0x00000000,
734
	(0x0e00 << 16) | (0xc274 >> 2),
735
	0x00000000,
736
	(0x0e00 << 16) | (0xc28c >> 2),
737
	0x00000000,
738
	(0x0e00 << 16) | (0xc290 >> 2),
739
	0x00000000,
740
	(0x0e00 << 16) | (0xc294 >> 2),
741
	0x00000000,
742
	(0x0e00 << 16) | (0xc298 >> 2),
743
	0x00000000,
744
	(0x0e00 << 16) | (0xc2a0 >> 2),
745
	0x00000000,
746
	(0x0e00 << 16) | (0xc2a4 >> 2),
747
	0x00000000,
748
	(0x0e00 << 16) | (0xc2a8 >> 2),
749
	0x00000000,
750
	(0x0e00 << 16) | (0xc2ac >> 2),
751
	0x00000000,
752
	(0x0e00 << 16) | (0x301d0 >> 2),
753
	0x00000000,
754
	(0x0e00 << 16) | (0x30238 >> 2),
755
	0x00000000,
756
	(0x0e00 << 16) | (0x30250 >> 2),
757
	0x00000000,
758
	(0x0e00 << 16) | (0x30254 >> 2),
759
	0x00000000,
760
	(0x0e00 << 16) | (0x30258 >> 2),
761
	0x00000000,
762
	(0x0e00 << 16) | (0x3025c >> 2),
763
	0x00000000,
764
	(0x4e00 << 16) | (0xc900 >> 2),
765
	0x00000000,
766
	(0x5e00 << 16) | (0xc900 >> 2),
767
	0x00000000,
768
	(0x6e00 << 16) | (0xc900 >> 2),
769
	0x00000000,
770
	(0x7e00 << 16) | (0xc900 >> 2),
771
	0x00000000,
772
	(0x4e00 << 16) | (0xc904 >> 2),
773
	0x00000000,
774
	(0x5e00 << 16) | (0xc904 >> 2),
775
	0x00000000,
776
	(0x6e00 << 16) | (0xc904 >> 2),
777
	0x00000000,
778
	(0x7e00 << 16) | (0xc904 >> 2),
779
	0x00000000,
780
	(0x4e00 << 16) | (0xc908 >> 2),
781
	0x00000000,
782
	(0x5e00 << 16) | (0xc908 >> 2),
783
	0x00000000,
784
	(0x6e00 << 16) | (0xc908 >> 2),
785
	0x00000000,
786
	(0x7e00 << 16) | (0xc908 >> 2),
787
	0x00000000,
788
	(0x4e00 << 16) | (0xc90c >> 2),
789
	0x00000000,
790
	(0x5e00 << 16) | (0xc90c >> 2),
791
	0x00000000,
792
	(0x6e00 << 16) | (0xc90c >> 2),
793
	0x00000000,
794
	(0x7e00 << 16) | (0xc90c >> 2),
795
	0x00000000,
796
	(0x4e00 << 16) | (0xc910 >> 2),
797
	0x00000000,
798
	(0x5e00 << 16) | (0xc910 >> 2),
799
	0x00000000,
800
	(0x6e00 << 16) | (0xc910 >> 2),
801
	0x00000000,
802
	(0x7e00 << 16) | (0xc910 >> 2),
803
	0x00000000,
804
	(0x0e00 << 16) | (0xc99c >> 2),
805
	0x00000000,
806
	(0x0e00 << 16) | (0x9834 >> 2),
807
	0x00000000,
808
	(0x0000 << 16) | (0x30f00 >> 2),
809
	0x00000000,
810
	(0x0000 << 16) | (0x30f04 >> 2),
811
	0x00000000,
812
	(0x0000 << 16) | (0x30f08 >> 2),
813
	0x00000000,
814
	(0x0000 << 16) | (0x30f0c >> 2),
815
	0x00000000,
816
	(0x0600 << 16) | (0x9b7c >> 2),
817
	0x00000000,
818
	(0x0e00 << 16) | (0x8a14 >> 2),
819
	0x00000000,
820
	(0x0e00 << 16) | (0x8a18 >> 2),
821
	0x00000000,
822
	(0x0600 << 16) | (0x30a00 >> 2),
823
	0x00000000,
824
	(0x0e00 << 16) | (0x8bf0 >> 2),
825
	0x00000000,
826
	(0x0e00 << 16) | (0x8bcc >> 2),
827
	0x00000000,
828
	(0x0e00 << 16) | (0x8b24 >> 2),
829
	0x00000000,
830
	(0x0e00 << 16) | (0x30a04 >> 2),
831
	0x00000000,
832
	(0x0600 << 16) | (0x30a10 >> 2),
833
	0x00000000,
834
	(0x0600 << 16) | (0x30a14 >> 2),
835
	0x00000000,
836
	(0x0600 << 16) | (0x30a18 >> 2),
837
	0x00000000,
838
	(0x0600 << 16) | (0x30a2c >> 2),
839
	0x00000000,
840
	(0x0e00 << 16) | (0xc700 >> 2),
841
	0x00000000,
842
	(0x0e00 << 16) | (0xc704 >> 2),
843
	0x00000000,
844
	(0x0e00 << 16) | (0xc708 >> 2),
845
	0x00000000,
846
	(0x0e00 << 16) | (0xc768 >> 2),
847
	0x00000000,
848
	(0x0400 << 16) | (0xc770 >> 2),
849
	0x00000000,
850
	(0x0400 << 16) | (0xc774 >> 2),
851
	0x00000000,
852
	(0x0400 << 16) | (0xc798 >> 2),
853
	0x00000000,
854
	(0x0400 << 16) | (0xc79c >> 2),
855
	0x00000000,
856
	(0x0e00 << 16) | (0x9100 >> 2),
857
	0x00000000,
858
	(0x0e00 << 16) | (0x3c010 >> 2),
859
	0x00000000,
860
	(0x0e00 << 16) | (0x8c00 >> 2),
861
	0x00000000,
862
	(0x0e00 << 16) | (0x8c04 >> 2),
863
	0x00000000,
864
	(0x0e00 << 16) | (0x8c20 >> 2),
865
	0x00000000,
866
	(0x0e00 << 16) | (0x8c38 >> 2),
867
	0x00000000,
868
	(0x0e00 << 16) | (0x8c3c >> 2),
869
	0x00000000,
870
	(0x0e00 << 16) | (0xae00 >> 2),
871
	0x00000000,
872
	(0x0e00 << 16) | (0x9604 >> 2),
873
	0x00000000,
874
	(0x0e00 << 16) | (0xac08 >> 2),
875
	0x00000000,
876
	(0x0e00 << 16) | (0xac0c >> 2),
877
	0x00000000,
878
	(0x0e00 << 16) | (0xac10 >> 2),
879
	0x00000000,
880
	(0x0e00 << 16) | (0xac14 >> 2),
881
	0x00000000,
882
	(0x0e00 << 16) | (0xac58 >> 2),
883
	0x00000000,
884
	(0x0e00 << 16) | (0xac68 >> 2),
885
	0x00000000,
886
	(0x0e00 << 16) | (0xac6c >> 2),
887
	0x00000000,
888
	(0x0e00 << 16) | (0xac70 >> 2),
889
	0x00000000,
890
	(0x0e00 << 16) | (0xac74 >> 2),
891
	0x00000000,
892
	(0x0e00 << 16) | (0xac78 >> 2),
893
	0x00000000,
894
	(0x0e00 << 16) | (0xac7c >> 2),
895
	0x00000000,
896
	(0x0e00 << 16) | (0xac80 >> 2),
897
	0x00000000,
898
	(0x0e00 << 16) | (0xac84 >> 2),
899
	0x00000000,
900
	(0x0e00 << 16) | (0xac88 >> 2),
901
	0x00000000,
902
	(0x0e00 << 16) | (0xac8c >> 2),
903
	0x00000000,
904
	(0x0e00 << 16) | (0x970c >> 2),
905
	0x00000000,
906
	(0x0e00 << 16) | (0x9714 >> 2),
907
	0x00000000,
908
	(0x0e00 << 16) | (0x9718 >> 2),
909
	0x00000000,
910
	(0x0e00 << 16) | (0x971c >> 2),
911
	0x00000000,
912
	(0x0e00 << 16) | (0x31068 >> 2),
913
	0x00000000,
914
	(0x4e00 << 16) | (0x31068 >> 2),
915
	0x00000000,
916
	(0x5e00 << 16) | (0x31068 >> 2),
917
	0x00000000,
918
	(0x6e00 << 16) | (0x31068 >> 2),
919
	0x00000000,
920
	(0x7e00 << 16) | (0x31068 >> 2),
921
	0x00000000,
922
	(0x0e00 << 16) | (0xcd10 >> 2),
923
	0x00000000,
924
	(0x0e00 << 16) | (0xcd14 >> 2),
925
	0x00000000,
926
	(0x0e00 << 16) | (0x88b0 >> 2),
927
	0x00000000,
928
	(0x0e00 << 16) | (0x88b4 >> 2),
929
	0x00000000,
930
	(0x0e00 << 16) | (0x88b8 >> 2),
931
	0x00000000,
932
	(0x0e00 << 16) | (0x88bc >> 2),
933
	0x00000000,
934
	(0x0400 << 16) | (0x89c0 >> 2),
935
	0x00000000,
936
	(0x0e00 << 16) | (0x88c4 >> 2),
937
	0x00000000,
938
	(0x0e00 << 16) | (0x88c8 >> 2),
939
	0x00000000,
940
	(0x0e00 << 16) | (0x88d0 >> 2),
941
	0x00000000,
942
	(0x0e00 << 16) | (0x88d4 >> 2),
943
	0x00000000,
944
	(0x0e00 << 16) | (0x88d8 >> 2),
945
	0x00000000,
946
	(0x0e00 << 16) | (0x8980 >> 2),
947
	0x00000000,
948
	(0x0e00 << 16) | (0x30938 >> 2),
949
	0x00000000,
950
	(0x0e00 << 16) | (0x3093c >> 2),
951
	0x00000000,
952
	(0x0e00 << 16) | (0x30940 >> 2),
953
	0x00000000,
954
	(0x0e00 << 16) | (0x89a0 >> 2),
955
	0x00000000,
956
	(0x0e00 << 16) | (0x30900 >> 2),
957
	0x00000000,
958
	(0x0e00 << 16) | (0x30904 >> 2),
959
	0x00000000,
960
	(0x0e00 << 16) | (0x89b4 >> 2),
961
	0x00000000,
962
	(0x0e00 << 16) | (0x3e1fc >> 2),
963
	0x00000000,
964
	(0x0e00 << 16) | (0x3c210 >> 2),
965
	0x00000000,
966
	(0x0e00 << 16) | (0x3c214 >> 2),
967
	0x00000000,
968
	(0x0e00 << 16) | (0x3c218 >> 2),
969
	0x00000000,
970
	(0x0e00 << 16) | (0x8904 >> 2),
971
	0x00000000,
972
	0x5,
973
	(0x0e00 << 16) | (0x8c28 >> 2),
974
	(0x0e00 << 16) | (0x8c2c >> 2),
975
	(0x0e00 << 16) | (0x8c30 >> 2),
976
	(0x0e00 << 16) | (0x8c34 >> 2),
977
	(0x0e00 << 16) | (0x9600 >> 2),
978
};
979
 
980
static const u32 bonaire_golden_spm_registers[] =
981
{
982
	0x30800, 0xe0ffffff, 0xe0000000
983
};
984
 
985
static const u32 bonaire_golden_common_registers[] =
986
{
987
	0xc770, 0xffffffff, 0x00000800,
988
	0xc774, 0xffffffff, 0x00000800,
989
	0xc798, 0xffffffff, 0x00007fbf,
990
	0xc79c, 0xffffffff, 0x00007faf
991
};
992
 
993
static const u32 bonaire_golden_registers[] =
994
{
995
	0x3354, 0x00000333, 0x00000333,
996
	0x3350, 0x000c0fc0, 0x00040200,
997
	0x9a10, 0x00010000, 0x00058208,
998
	0x3c000, 0xffff1fff, 0x00140000,
999
	0x3c200, 0xfdfc0fff, 0x00000100,
1000
	0x3c234, 0x40000000, 0x40000200,
1001
	0x9830, 0xffffffff, 0x00000000,
1002
	0x9834, 0xf00fffff, 0x00000400,
1003
	0x9838, 0x0002021c, 0x00020200,
1004
	0xc78, 0x00000080, 0x00000000,
1005
	0x5bb0, 0x000000f0, 0x00000070,
1006
	0x5bc0, 0xf0311fff, 0x80300000,
1007
	0x98f8, 0x73773777, 0x12010001,
1008
	0x350c, 0x00810000, 0x408af000,
1009
	0x7030, 0x31000111, 0x00000011,
1010
	0x2f48, 0x73773777, 0x12010001,
1011
	0x220c, 0x00007fb6, 0x0021a1b1,
1012
	0x2210, 0x00007fb6, 0x002021b1,
1013
	0x2180, 0x00007fb6, 0x00002191,
1014
	0x2218, 0x00007fb6, 0x002121b1,
1015
	0x221c, 0x00007fb6, 0x002021b1,
1016
	0x21dc, 0x00007fb6, 0x00002191,
1017
	0x21e0, 0x00007fb6, 0x00002191,
1018
	0x3628, 0x0000003f, 0x0000000a,
1019
	0x362c, 0x0000003f, 0x0000000a,
1020
	0x2ae4, 0x00073ffe, 0x000022a2,
1021
	0x240c, 0x000007ff, 0x00000000,
1022
	0x8a14, 0xf000003f, 0x00000007,
1023
	0x8bf0, 0x00002001, 0x00000001,
1024
	0x8b24, 0xffffffff, 0x00ffffff,
1025
	0x30a04, 0x0000ff0f, 0x00000000,
1026
	0x28a4c, 0x07ffffff, 0x06000000,
1027
	0x4d8, 0x00000fff, 0x00000100,
1028
	0x3e78, 0x00000001, 0x00000002,
1029
	0x9100, 0x03000000, 0x0362c688,
1030
	0x8c00, 0x000000ff, 0x00000001,
1031
	0xe40, 0x00001fff, 0x00001fff,
1032
	0x9060, 0x0000007f, 0x00000020,
1033
	0x9508, 0x00010000, 0x00010000,
1034
	0xac14, 0x000003ff, 0x000000f3,
1035
	0xac0c, 0xffffffff, 0x00001032
1036
};
1037
 
1038
static const u32 bonaire_mgcg_cgcg_init[] =
1039
{
1040
	0xc420, 0xffffffff, 0xfffffffc,
1041
	0x30800, 0xffffffff, 0xe0000000,
1042
	0x3c2a0, 0xffffffff, 0x00000100,
1043
	0x3c208, 0xffffffff, 0x00000100,
1044
	0x3c2c0, 0xffffffff, 0xc0000100,
1045
	0x3c2c8, 0xffffffff, 0xc0000100,
1046
	0x3c2c4, 0xffffffff, 0xc0000100,
1047
	0x55e4, 0xffffffff, 0x00600100,
1048
	0x3c280, 0xffffffff, 0x00000100,
1049
	0x3c214, 0xffffffff, 0x06000100,
1050
	0x3c220, 0xffffffff, 0x00000100,
1051
	0x3c218, 0xffffffff, 0x06000100,
1052
	0x3c204, 0xffffffff, 0x00000100,
1053
	0x3c2e0, 0xffffffff, 0x00000100,
1054
	0x3c224, 0xffffffff, 0x00000100,
1055
	0x3c200, 0xffffffff, 0x00000100,
1056
	0x3c230, 0xffffffff, 0x00000100,
1057
	0x3c234, 0xffffffff, 0x00000100,
1058
	0x3c250, 0xffffffff, 0x00000100,
1059
	0x3c254, 0xffffffff, 0x00000100,
1060
	0x3c258, 0xffffffff, 0x00000100,
1061
	0x3c25c, 0xffffffff, 0x00000100,
1062
	0x3c260, 0xffffffff, 0x00000100,
1063
	0x3c27c, 0xffffffff, 0x00000100,
1064
	0x3c278, 0xffffffff, 0x00000100,
1065
	0x3c210, 0xffffffff, 0x06000100,
1066
	0x3c290, 0xffffffff, 0x00000100,
1067
	0x3c274, 0xffffffff, 0x00000100,
1068
	0x3c2b4, 0xffffffff, 0x00000100,
1069
	0x3c2b0, 0xffffffff, 0x00000100,
1070
	0x3c270, 0xffffffff, 0x00000100,
1071
	0x30800, 0xffffffff, 0xe0000000,
1072
	0x3c020, 0xffffffff, 0x00010000,
1073
	0x3c024, 0xffffffff, 0x00030002,
1074
	0x3c028, 0xffffffff, 0x00040007,
1075
	0x3c02c, 0xffffffff, 0x00060005,
1076
	0x3c030, 0xffffffff, 0x00090008,
1077
	0x3c034, 0xffffffff, 0x00010000,
1078
	0x3c038, 0xffffffff, 0x00030002,
1079
	0x3c03c, 0xffffffff, 0x00040007,
1080
	0x3c040, 0xffffffff, 0x00060005,
1081
	0x3c044, 0xffffffff, 0x00090008,
1082
	0x3c048, 0xffffffff, 0x00010000,
1083
	0x3c04c, 0xffffffff, 0x00030002,
1084
	0x3c050, 0xffffffff, 0x00040007,
1085
	0x3c054, 0xffffffff, 0x00060005,
1086
	0x3c058, 0xffffffff, 0x00090008,
1087
	0x3c05c, 0xffffffff, 0x00010000,
1088
	0x3c060, 0xffffffff, 0x00030002,
1089
	0x3c064, 0xffffffff, 0x00040007,
1090
	0x3c068, 0xffffffff, 0x00060005,
1091
	0x3c06c, 0xffffffff, 0x00090008,
1092
	0x3c070, 0xffffffff, 0x00010000,
1093
	0x3c074, 0xffffffff, 0x00030002,
1094
	0x3c078, 0xffffffff, 0x00040007,
1095
	0x3c07c, 0xffffffff, 0x00060005,
1096
	0x3c080, 0xffffffff, 0x00090008,
1097
	0x3c084, 0xffffffff, 0x00010000,
1098
	0x3c088, 0xffffffff, 0x00030002,
1099
	0x3c08c, 0xffffffff, 0x00040007,
1100
	0x3c090, 0xffffffff, 0x00060005,
1101
	0x3c094, 0xffffffff, 0x00090008,
1102
	0x3c098, 0xffffffff, 0x00010000,
1103
	0x3c09c, 0xffffffff, 0x00030002,
1104
	0x3c0a0, 0xffffffff, 0x00040007,
1105
	0x3c0a4, 0xffffffff, 0x00060005,
1106
	0x3c0a8, 0xffffffff, 0x00090008,
1107
	0x3c000, 0xffffffff, 0x96e00200,
1108
	0x8708, 0xffffffff, 0x00900100,
1109
	0xc424, 0xffffffff, 0x0020003f,
1110
	0x38, 0xffffffff, 0x0140001c,
1111
	0x3c, 0x000f0000, 0x000f0000,
1112
	0x220, 0xffffffff, 0xC060000C,
1113
	0x224, 0xc0000fff, 0x00000100,
1114
	0xf90, 0xffffffff, 0x00000100,
1115
	0xf98, 0x00000101, 0x00000000,
1116
	0x20a8, 0xffffffff, 0x00000104,
1117
	0x55e4, 0xff000fff, 0x00000100,
1118
	0x30cc, 0xc0000fff, 0x00000104,
1119
	0xc1e4, 0x00000001, 0x00000001,
1120
	0xd00c, 0xff000ff0, 0x00000100,
1121
	0xd80c, 0xff000ff0, 0x00000100
1122
};
1123
 
1124
static const u32 spectre_golden_spm_registers[] =
1125
{
1126
	0x30800, 0xe0ffffff, 0xe0000000
1127
};
1128
 
1129
static const u32 spectre_golden_common_registers[] =
1130
{
1131
	0xc770, 0xffffffff, 0x00000800,
1132
	0xc774, 0xffffffff, 0x00000800,
1133
	0xc798, 0xffffffff, 0x00007fbf,
1134
	0xc79c, 0xffffffff, 0x00007faf
1135
};
1136
 
1137
static const u32 spectre_golden_registers[] =
1138
{
1139
	0x3c000, 0xffff1fff, 0x96940200,
1140
	0x3c00c, 0xffff0001, 0xff000000,
1141
	0x3c200, 0xfffc0fff, 0x00000100,
1142
	0x6ed8, 0x00010101, 0x00010000,
1143
	0x9834, 0xf00fffff, 0x00000400,
1144
	0x9838, 0xfffffffc, 0x00020200,
1145
	0x5bb0, 0x000000f0, 0x00000070,
1146
	0x5bc0, 0xf0311fff, 0x80300000,
1147
	0x98f8, 0x73773777, 0x12010001,
1148
	0x9b7c, 0x00ff0000, 0x00fc0000,
1149
	0x2f48, 0x73773777, 0x12010001,
1150
	0x8a14, 0xf000003f, 0x00000007,
1151
	0x8b24, 0xffffffff, 0x00ffffff,
1152
	0x28350, 0x3f3f3fff, 0x00000082,
1153
	0x28354, 0x0000003f, 0x00000000,
1154
	0x3e78, 0x00000001, 0x00000002,
1155
	0x913c, 0xffff03df, 0x00000004,
1156
	0xc768, 0x00000008, 0x00000008,
1157
	0x8c00, 0x000008ff, 0x00000800,
1158
	0x9508, 0x00010000, 0x00010000,
1159
	0xac0c, 0xffffffff, 0x54763210,
1160
	0x214f8, 0x01ff01ff, 0x00000002,
1161
	0x21498, 0x007ff800, 0x00200000,
1162
	0x2015c, 0xffffffff, 0x00000f40,
1163
	0x30934, 0xffffffff, 0x00000001
1164
};
1165
 
1166
static const u32 spectre_mgcg_cgcg_init[] =
1167
{
1168
	0xc420, 0xffffffff, 0xfffffffc,
1169
	0x30800, 0xffffffff, 0xe0000000,
1170
	0x3c2a0, 0xffffffff, 0x00000100,
1171
	0x3c208, 0xffffffff, 0x00000100,
1172
	0x3c2c0, 0xffffffff, 0x00000100,
1173
	0x3c2c8, 0xffffffff, 0x00000100,
1174
	0x3c2c4, 0xffffffff, 0x00000100,
1175
	0x55e4, 0xffffffff, 0x00600100,
1176
	0x3c280, 0xffffffff, 0x00000100,
1177
	0x3c214, 0xffffffff, 0x06000100,
1178
	0x3c220, 0xffffffff, 0x00000100,
1179
	0x3c218, 0xffffffff, 0x06000100,
1180
	0x3c204, 0xffffffff, 0x00000100,
1181
	0x3c2e0, 0xffffffff, 0x00000100,
1182
	0x3c224, 0xffffffff, 0x00000100,
1183
	0x3c200, 0xffffffff, 0x00000100,
1184
	0x3c230, 0xffffffff, 0x00000100,
1185
	0x3c234, 0xffffffff, 0x00000100,
1186
	0x3c250, 0xffffffff, 0x00000100,
1187
	0x3c254, 0xffffffff, 0x00000100,
1188
	0x3c258, 0xffffffff, 0x00000100,
1189
	0x3c25c, 0xffffffff, 0x00000100,
1190
	0x3c260, 0xffffffff, 0x00000100,
1191
	0x3c27c, 0xffffffff, 0x00000100,
1192
	0x3c278, 0xffffffff, 0x00000100,
1193
	0x3c210, 0xffffffff, 0x06000100,
1194
	0x3c290, 0xffffffff, 0x00000100,
1195
	0x3c274, 0xffffffff, 0x00000100,
1196
	0x3c2b4, 0xffffffff, 0x00000100,
1197
	0x3c2b0, 0xffffffff, 0x00000100,
1198
	0x3c270, 0xffffffff, 0x00000100,
1199
	0x30800, 0xffffffff, 0xe0000000,
1200
	0x3c020, 0xffffffff, 0x00010000,
1201
	0x3c024, 0xffffffff, 0x00030002,
1202
	0x3c028, 0xffffffff, 0x00040007,
1203
	0x3c02c, 0xffffffff, 0x00060005,
1204
	0x3c030, 0xffffffff, 0x00090008,
1205
	0x3c034, 0xffffffff, 0x00010000,
1206
	0x3c038, 0xffffffff, 0x00030002,
1207
	0x3c03c, 0xffffffff, 0x00040007,
1208
	0x3c040, 0xffffffff, 0x00060005,
1209
	0x3c044, 0xffffffff, 0x00090008,
1210
	0x3c048, 0xffffffff, 0x00010000,
1211
	0x3c04c, 0xffffffff, 0x00030002,
1212
	0x3c050, 0xffffffff, 0x00040007,
1213
	0x3c054, 0xffffffff, 0x00060005,
1214
	0x3c058, 0xffffffff, 0x00090008,
1215
	0x3c05c, 0xffffffff, 0x00010000,
1216
	0x3c060, 0xffffffff, 0x00030002,
1217
	0x3c064, 0xffffffff, 0x00040007,
1218
	0x3c068, 0xffffffff, 0x00060005,
1219
	0x3c06c, 0xffffffff, 0x00090008,
1220
	0x3c070, 0xffffffff, 0x00010000,
1221
	0x3c074, 0xffffffff, 0x00030002,
1222
	0x3c078, 0xffffffff, 0x00040007,
1223
	0x3c07c, 0xffffffff, 0x00060005,
1224
	0x3c080, 0xffffffff, 0x00090008,
1225
	0x3c084, 0xffffffff, 0x00010000,
1226
	0x3c088, 0xffffffff, 0x00030002,
1227
	0x3c08c, 0xffffffff, 0x00040007,
1228
	0x3c090, 0xffffffff, 0x00060005,
1229
	0x3c094, 0xffffffff, 0x00090008,
1230
	0x3c098, 0xffffffff, 0x00010000,
1231
	0x3c09c, 0xffffffff, 0x00030002,
1232
	0x3c0a0, 0xffffffff, 0x00040007,
1233
	0x3c0a4, 0xffffffff, 0x00060005,
1234
	0x3c0a8, 0xffffffff, 0x00090008,
1235
	0x3c0ac, 0xffffffff, 0x00010000,
1236
	0x3c0b0, 0xffffffff, 0x00030002,
1237
	0x3c0b4, 0xffffffff, 0x00040007,
1238
	0x3c0b8, 0xffffffff, 0x00060005,
1239
	0x3c0bc, 0xffffffff, 0x00090008,
1240
	0x3c000, 0xffffffff, 0x96e00200,
1241
	0x8708, 0xffffffff, 0x00900100,
1242
	0xc424, 0xffffffff, 0x0020003f,
1243
	0x38, 0xffffffff, 0x0140001c,
1244
	0x3c, 0x000f0000, 0x000f0000,
1245
	0x220, 0xffffffff, 0xC060000C,
1246
	0x224, 0xc0000fff, 0x00000100,
1247
	0xf90, 0xffffffff, 0x00000100,
1248
	0xf98, 0x00000101, 0x00000000,
1249
	0x20a8, 0xffffffff, 0x00000104,
1250
	0x55e4, 0xff000fff, 0x00000100,
1251
	0x30cc, 0xc0000fff, 0x00000104,
1252
	0xc1e4, 0x00000001, 0x00000001,
1253
	0xd00c, 0xff000ff0, 0x00000100,
1254
	0xd80c, 0xff000ff0, 0x00000100
1255
};
1256
 
1257
static const u32 kalindi_golden_spm_registers[] =
1258
{
1259
	0x30800, 0xe0ffffff, 0xe0000000
1260
};
1261
 
1262
static const u32 kalindi_golden_common_registers[] =
1263
{
1264
	0xc770, 0xffffffff, 0x00000800,
1265
	0xc774, 0xffffffff, 0x00000800,
1266
	0xc798, 0xffffffff, 0x00007fbf,
1267
	0xc79c, 0xffffffff, 0x00007faf
1268
};
1269
 
1270
static const u32 kalindi_golden_registers[] =
1271
{
1272
	0x3c000, 0xffffdfff, 0x6e944040,
1273
	0x55e4, 0xff607fff, 0xfc000100,
1274
	0x3c220, 0xff000fff, 0x00000100,
1275
	0x3c224, 0xff000fff, 0x00000100,
1276
	0x3c200, 0xfffc0fff, 0x00000100,
1277
	0x6ed8, 0x00010101, 0x00010000,
1278
	0x9830, 0xffffffff, 0x00000000,
1279
	0x9834, 0xf00fffff, 0x00000400,
1280
	0x5bb0, 0x000000f0, 0x00000070,
1281
	0x5bc0, 0xf0311fff, 0x80300000,
1282
	0x98f8, 0x73773777, 0x12010001,
1283
	0x98fc, 0xffffffff, 0x00000010,
1284
	0x9b7c, 0x00ff0000, 0x00fc0000,
1285
	0x8030, 0x00001f0f, 0x0000100a,
1286
	0x2f48, 0x73773777, 0x12010001,
1287
	0x2408, 0x000fffff, 0x000c007f,
1288
	0x8a14, 0xf000003f, 0x00000007,
1289
	0x8b24, 0x3fff3fff, 0x00ffcfff,
1290
	0x30a04, 0x0000ff0f, 0x00000000,
1291
	0x28a4c, 0x07ffffff, 0x06000000,
1292
	0x4d8, 0x00000fff, 0x00000100,
1293
	0x3e78, 0x00000001, 0x00000002,
1294
	0xc768, 0x00000008, 0x00000008,
1295
	0x8c00, 0x000000ff, 0x00000003,
1296
	0x214f8, 0x01ff01ff, 0x00000002,
1297
	0x21498, 0x007ff800, 0x00200000,
1298
	0x2015c, 0xffffffff, 0x00000f40,
1299
	0x88c4, 0x001f3ae3, 0x00000082,
1300
	0x88d4, 0x0000001f, 0x00000010,
1301
	0x30934, 0xffffffff, 0x00000000
1302
};
1303
 
1304
static const u32 kalindi_mgcg_cgcg_init[] =
1305
{
1306
	0xc420, 0xffffffff, 0xfffffffc,
1307
	0x30800, 0xffffffff, 0xe0000000,
1308
	0x3c2a0, 0xffffffff, 0x00000100,
1309
	0x3c208, 0xffffffff, 0x00000100,
1310
	0x3c2c0, 0xffffffff, 0x00000100,
1311
	0x3c2c8, 0xffffffff, 0x00000100,
1312
	0x3c2c4, 0xffffffff, 0x00000100,
1313
	0x55e4, 0xffffffff, 0x00600100,
1314
	0x3c280, 0xffffffff, 0x00000100,
1315
	0x3c214, 0xffffffff, 0x06000100,
1316
	0x3c220, 0xffffffff, 0x00000100,
1317
	0x3c218, 0xffffffff, 0x06000100,
1318
	0x3c204, 0xffffffff, 0x00000100,
1319
	0x3c2e0, 0xffffffff, 0x00000100,
1320
	0x3c224, 0xffffffff, 0x00000100,
1321
	0x3c200, 0xffffffff, 0x00000100,
1322
	0x3c230, 0xffffffff, 0x00000100,
1323
	0x3c234, 0xffffffff, 0x00000100,
1324
	0x3c250, 0xffffffff, 0x00000100,
1325
	0x3c254, 0xffffffff, 0x00000100,
1326
	0x3c258, 0xffffffff, 0x00000100,
1327
	0x3c25c, 0xffffffff, 0x00000100,
1328
	0x3c260, 0xffffffff, 0x00000100,
1329
	0x3c27c, 0xffffffff, 0x00000100,
1330
	0x3c278, 0xffffffff, 0x00000100,
1331
	0x3c210, 0xffffffff, 0x06000100,
1332
	0x3c290, 0xffffffff, 0x00000100,
1333
	0x3c274, 0xffffffff, 0x00000100,
1334
	0x3c2b4, 0xffffffff, 0x00000100,
1335
	0x3c2b0, 0xffffffff, 0x00000100,
1336
	0x3c270, 0xffffffff, 0x00000100,
1337
	0x30800, 0xffffffff, 0xe0000000,
1338
	0x3c020, 0xffffffff, 0x00010000,
1339
	0x3c024, 0xffffffff, 0x00030002,
1340
	0x3c028, 0xffffffff, 0x00040007,
1341
	0x3c02c, 0xffffffff, 0x00060005,
1342
	0x3c030, 0xffffffff, 0x00090008,
1343
	0x3c034, 0xffffffff, 0x00010000,
1344
	0x3c038, 0xffffffff, 0x00030002,
1345
	0x3c03c, 0xffffffff, 0x00040007,
1346
	0x3c040, 0xffffffff, 0x00060005,
1347
	0x3c044, 0xffffffff, 0x00090008,
1348
	0x3c000, 0xffffffff, 0x96e00200,
1349
	0x8708, 0xffffffff, 0x00900100,
1350
	0xc424, 0xffffffff, 0x0020003f,
1351
	0x38, 0xffffffff, 0x0140001c,
1352
	0x3c, 0x000f0000, 0x000f0000,
1353
	0x220, 0xffffffff, 0xC060000C,
1354
	0x224, 0xc0000fff, 0x00000100,
1355
	0x20a8, 0xffffffff, 0x00000104,
1356
	0x55e4, 0xff000fff, 0x00000100,
1357
	0x30cc, 0xc0000fff, 0x00000104,
1358
	0xc1e4, 0x00000001, 0x00000001,
1359
	0xd00c, 0xff000ff0, 0x00000100,
1360
	0xd80c, 0xff000ff0, 0x00000100
1361
};
1362
 
1363
static const u32 hawaii_golden_spm_registers[] =
1364
{
1365
	0x30800, 0xe0ffffff, 0xe0000000
1366
};
1367
 
1368
static const u32 hawaii_golden_common_registers[] =
1369
{
1370
	0x30800, 0xffffffff, 0xe0000000,
1371
	0x28350, 0xffffffff, 0x3a00161a,
1372
	0x28354, 0xffffffff, 0x0000002e,
1373
	0x9a10, 0xffffffff, 0x00018208,
1374
	0x98f8, 0xffffffff, 0x12011003
1375
};
1376
 
1377
static const u32 hawaii_golden_registers[] =
1378
{
1379
	0x3354, 0x00000333, 0x00000333,
1380
	0x9a10, 0x00010000, 0x00058208,
1381
	0x9830, 0xffffffff, 0x00000000,
1382
	0x9834, 0xf00fffff, 0x00000400,
1383
	0x9838, 0x0002021c, 0x00020200,
1384
	0xc78, 0x00000080, 0x00000000,
1385
	0x5bb0, 0x000000f0, 0x00000070,
1386
	0x5bc0, 0xf0311fff, 0x80300000,
1387
	0x350c, 0x00810000, 0x408af000,
1388
	0x7030, 0x31000111, 0x00000011,
1389
	0x2f48, 0x73773777, 0x12010001,
1390
	0x2120, 0x0000007f, 0x0000001b,
1391
	0x21dc, 0x00007fb6, 0x00002191,
1392
	0x3628, 0x0000003f, 0x0000000a,
1393
	0x362c, 0x0000003f, 0x0000000a,
1394
	0x2ae4, 0x00073ffe, 0x000022a2,
1395
	0x240c, 0x000007ff, 0x00000000,
1396
	0x8bf0, 0x00002001, 0x00000001,
1397
	0x8b24, 0xffffffff, 0x00ffffff,
1398
	0x30a04, 0x0000ff0f, 0x00000000,
1399
	0x28a4c, 0x07ffffff, 0x06000000,
1400
	0x3e78, 0x00000001, 0x00000002,
1401
	0xc768, 0x00000008, 0x00000008,
1402
	0xc770, 0x00000f00, 0x00000800,
1403
	0xc774, 0x00000f00, 0x00000800,
1404
	0xc798, 0x00ffffff, 0x00ff7fbf,
1405
	0xc79c, 0x00ffffff, 0x00ff7faf,
1406
	0x8c00, 0x000000ff, 0x00000800,
1407
	0xe40, 0x00001fff, 0x00001fff,
1408
	0x9060, 0x0000007f, 0x00000020,
1409
	0x9508, 0x00010000, 0x00010000,
1410
	0xae00, 0x00100000, 0x000ff07c,
1411
	0xac14, 0x000003ff, 0x0000000f,
1412
	0xac10, 0xffffffff, 0x7564fdec,
1413
	0xac0c, 0xffffffff, 0x3120b9a8,
1414
	0xac08, 0x20000000, 0x0f9c0000
1415
};
1416
 
1417
static const u32 hawaii_mgcg_cgcg_init[] =
1418
{
1419
	0xc420, 0xffffffff, 0xfffffffd,
1420
	0x30800, 0xffffffff, 0xe0000000,
1421
	0x3c2a0, 0xffffffff, 0x00000100,
1422
	0x3c208, 0xffffffff, 0x00000100,
1423
	0x3c2c0, 0xffffffff, 0x00000100,
1424
	0x3c2c8, 0xffffffff, 0x00000100,
1425
	0x3c2c4, 0xffffffff, 0x00000100,
1426
	0x55e4, 0xffffffff, 0x00200100,
1427
	0x3c280, 0xffffffff, 0x00000100,
1428
	0x3c214, 0xffffffff, 0x06000100,
1429
	0x3c220, 0xffffffff, 0x00000100,
1430
	0x3c218, 0xffffffff, 0x06000100,
1431
	0x3c204, 0xffffffff, 0x00000100,
1432
	0x3c2e0, 0xffffffff, 0x00000100,
1433
	0x3c224, 0xffffffff, 0x00000100,
1434
	0x3c200, 0xffffffff, 0x00000100,
1435
	0x3c230, 0xffffffff, 0x00000100,
1436
	0x3c234, 0xffffffff, 0x00000100,
1437
	0x3c250, 0xffffffff, 0x00000100,
1438
	0x3c254, 0xffffffff, 0x00000100,
1439
	0x3c258, 0xffffffff, 0x00000100,
1440
	0x3c25c, 0xffffffff, 0x00000100,
1441
	0x3c260, 0xffffffff, 0x00000100,
1442
	0x3c27c, 0xffffffff, 0x00000100,
1443
	0x3c278, 0xffffffff, 0x00000100,
1444
	0x3c210, 0xffffffff, 0x06000100,
1445
	0x3c290, 0xffffffff, 0x00000100,
1446
	0x3c274, 0xffffffff, 0x00000100,
1447
	0x3c2b4, 0xffffffff, 0x00000100,
1448
	0x3c2b0, 0xffffffff, 0x00000100,
1449
	0x3c270, 0xffffffff, 0x00000100,
1450
	0x30800, 0xffffffff, 0xe0000000,
1451
	0x3c020, 0xffffffff, 0x00010000,
1452
	0x3c024, 0xffffffff, 0x00030002,
1453
	0x3c028, 0xffffffff, 0x00040007,
1454
	0x3c02c, 0xffffffff, 0x00060005,
1455
	0x3c030, 0xffffffff, 0x00090008,
1456
	0x3c034, 0xffffffff, 0x00010000,
1457
	0x3c038, 0xffffffff, 0x00030002,
1458
	0x3c03c, 0xffffffff, 0x00040007,
1459
	0x3c040, 0xffffffff, 0x00060005,
1460
	0x3c044, 0xffffffff, 0x00090008,
1461
	0x3c048, 0xffffffff, 0x00010000,
1462
	0x3c04c, 0xffffffff, 0x00030002,
1463
	0x3c050, 0xffffffff, 0x00040007,
1464
	0x3c054, 0xffffffff, 0x00060005,
1465
	0x3c058, 0xffffffff, 0x00090008,
1466
	0x3c05c, 0xffffffff, 0x00010000,
1467
	0x3c060, 0xffffffff, 0x00030002,
1468
	0x3c064, 0xffffffff, 0x00040007,
1469
	0x3c068, 0xffffffff, 0x00060005,
1470
	0x3c06c, 0xffffffff, 0x00090008,
1471
	0x3c070, 0xffffffff, 0x00010000,
1472
	0x3c074, 0xffffffff, 0x00030002,
1473
	0x3c078, 0xffffffff, 0x00040007,
1474
	0x3c07c, 0xffffffff, 0x00060005,
1475
	0x3c080, 0xffffffff, 0x00090008,
1476
	0x3c084, 0xffffffff, 0x00010000,
1477
	0x3c088, 0xffffffff, 0x00030002,
1478
	0x3c08c, 0xffffffff, 0x00040007,
1479
	0x3c090, 0xffffffff, 0x00060005,
1480
	0x3c094, 0xffffffff, 0x00090008,
1481
	0x3c098, 0xffffffff, 0x00010000,
1482
	0x3c09c, 0xffffffff, 0x00030002,
1483
	0x3c0a0, 0xffffffff, 0x00040007,
1484
	0x3c0a4, 0xffffffff, 0x00060005,
1485
	0x3c0a8, 0xffffffff, 0x00090008,
1486
	0x3c0ac, 0xffffffff, 0x00010000,
1487
	0x3c0b0, 0xffffffff, 0x00030002,
1488
	0x3c0b4, 0xffffffff, 0x00040007,
1489
	0x3c0b8, 0xffffffff, 0x00060005,
1490
	0x3c0bc, 0xffffffff, 0x00090008,
1491
	0x3c0c0, 0xffffffff, 0x00010000,
1492
	0x3c0c4, 0xffffffff, 0x00030002,
1493
	0x3c0c8, 0xffffffff, 0x00040007,
1494
	0x3c0cc, 0xffffffff, 0x00060005,
1495
	0x3c0d0, 0xffffffff, 0x00090008,
1496
	0x3c0d4, 0xffffffff, 0x00010000,
1497
	0x3c0d8, 0xffffffff, 0x00030002,
1498
	0x3c0dc, 0xffffffff, 0x00040007,
1499
	0x3c0e0, 0xffffffff, 0x00060005,
1500
	0x3c0e4, 0xffffffff, 0x00090008,
1501
	0x3c0e8, 0xffffffff, 0x00010000,
1502
	0x3c0ec, 0xffffffff, 0x00030002,
1503
	0x3c0f0, 0xffffffff, 0x00040007,
1504
	0x3c0f4, 0xffffffff, 0x00060005,
1505
	0x3c0f8, 0xffffffff, 0x00090008,
1506
	0xc318, 0xffffffff, 0x00020200,
1507
	0x3350, 0xffffffff, 0x00000200,
1508
	0x15c0, 0xffffffff, 0x00000400,
1509
	0x55e8, 0xffffffff, 0x00000000,
1510
	0x2f50, 0xffffffff, 0x00000902,
1511
	0x3c000, 0xffffffff, 0x96940200,
1512
	0x8708, 0xffffffff, 0x00900100,
1513
	0xc424, 0xffffffff, 0x0020003f,
1514
	0x38, 0xffffffff, 0x0140001c,
1515
	0x3c, 0x000f0000, 0x000f0000,
1516
	0x220, 0xffffffff, 0xc060000c,
1517
	0x224, 0xc0000fff, 0x00000100,
1518
	0xf90, 0xffffffff, 0x00000100,
1519
	0xf98, 0x00000101, 0x00000000,
1520
	0x20a8, 0xffffffff, 0x00000104,
1521
	0x55e4, 0xff000fff, 0x00000100,
1522
	0x30cc, 0xc0000fff, 0x00000104,
1523
	0xc1e4, 0x00000001, 0x00000001,
1524
	0xd00c, 0xff000ff0, 0x00000100,
1525
	0xd80c, 0xff000ff0, 0x00000100
1526
};
1527
 
1528
static const u32 godavari_golden_registers[] =
1529
{
1530
	0x55e4, 0xff607fff, 0xfc000100,
1531
	0x6ed8, 0x00010101, 0x00010000,
1532
	0x9830, 0xffffffff, 0x00000000,
1533
	0x98302, 0xf00fffff, 0x00000400,
1534
	0x6130, 0xffffffff, 0x00010000,
1535
	0x5bb0, 0x000000f0, 0x00000070,
1536
	0x5bc0, 0xf0311fff, 0x80300000,
1537
	0x98f8, 0x73773777, 0x12010001,
1538
	0x98fc, 0xffffffff, 0x00000010,
1539
	0x8030, 0x00001f0f, 0x0000100a,
1540
	0x2f48, 0x73773777, 0x12010001,
1541
	0x2408, 0x000fffff, 0x000c007f,
1542
	0x8a14, 0xf000003f, 0x00000007,
1543
	0x8b24, 0xffffffff, 0x00ff0fff,
1544
	0x30a04, 0x0000ff0f, 0x00000000,
1545
	0x28a4c, 0x07ffffff, 0x06000000,
1546
	0x4d8, 0x00000fff, 0x00000100,
1547
	0xd014, 0x00010000, 0x00810001,
1548
	0xd814, 0x00010000, 0x00810001,
1549
	0x3e78, 0x00000001, 0x00000002,
1550
	0xc768, 0x00000008, 0x00000008,
1551
	0xc770, 0x00000f00, 0x00000800,
1552
	0xc774, 0x00000f00, 0x00000800,
1553
	0xc798, 0x00ffffff, 0x00ff7fbf,
1554
	0xc79c, 0x00ffffff, 0x00ff7faf,
1555
	0x8c00, 0x000000ff, 0x00000001,
1556
	0x214f8, 0x01ff01ff, 0x00000002,
1557
	0x21498, 0x007ff800, 0x00200000,
1558
	0x2015c, 0xffffffff, 0x00000f40,
1559
	0x88c4, 0x001f3ae3, 0x00000082,
1560
	0x88d4, 0x0000001f, 0x00000010,
1561
	0x30934, 0xffffffff, 0x00000000
1562
};
1563
 
1564
 
1565
static void cik_init_golden_registers(struct radeon_device *rdev)
1566
{
5271 serge 1567
	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1568
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 1569
	switch (rdev->family) {
1570
	case CHIP_BONAIRE:
1571
		radeon_program_register_sequence(rdev,
1572
						 bonaire_mgcg_cgcg_init,
1573
						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1574
		radeon_program_register_sequence(rdev,
1575
						 bonaire_golden_registers,
1576
						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1577
		radeon_program_register_sequence(rdev,
1578
						 bonaire_golden_common_registers,
1579
						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1580
		radeon_program_register_sequence(rdev,
1581
						 bonaire_golden_spm_registers,
1582
						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1583
		break;
1584
	case CHIP_KABINI:
1585
		radeon_program_register_sequence(rdev,
1586
						 kalindi_mgcg_cgcg_init,
1587
						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1588
		radeon_program_register_sequence(rdev,
1589
						 kalindi_golden_registers,
1590
						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1591
		radeon_program_register_sequence(rdev,
1592
						 kalindi_golden_common_registers,
1593
						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1594
		radeon_program_register_sequence(rdev,
1595
						 kalindi_golden_spm_registers,
1596
						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1597
		break;
1598
	case CHIP_MULLINS:
1599
		radeon_program_register_sequence(rdev,
1600
						 kalindi_mgcg_cgcg_init,
1601
						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1602
		radeon_program_register_sequence(rdev,
1603
						 godavari_golden_registers,
1604
						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1605
		radeon_program_register_sequence(rdev,
1606
						 kalindi_golden_common_registers,
1607
						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1608
		radeon_program_register_sequence(rdev,
1609
						 kalindi_golden_spm_registers,
1610
						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1611
		break;
1612
	case CHIP_KAVERI:
1613
		radeon_program_register_sequence(rdev,
1614
						 spectre_mgcg_cgcg_init,
1615
						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1616
		radeon_program_register_sequence(rdev,
1617
						 spectre_golden_registers,
1618
						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1619
		radeon_program_register_sequence(rdev,
1620
						 spectre_golden_common_registers,
1621
						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1622
		radeon_program_register_sequence(rdev,
1623
						 spectre_golden_spm_registers,
1624
						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1625
		break;
1626
	case CHIP_HAWAII:
1627
		radeon_program_register_sequence(rdev,
1628
						 hawaii_mgcg_cgcg_init,
1629
						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1630
		radeon_program_register_sequence(rdev,
1631
						 hawaii_golden_registers,
1632
						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1633
		radeon_program_register_sequence(rdev,
1634
						 hawaii_golden_common_registers,
1635
						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1636
		radeon_program_register_sequence(rdev,
1637
						 hawaii_golden_spm_registers,
1638
						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1639
		break;
1640
	default:
1641
		break;
1642
	}
5271 serge 1643
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 1644
}
1645
 
1646
/**
1647
 * cik_get_xclk - get the xclk
1648
 *
1649
 * @rdev: radeon_device pointer
1650
 *
1651
 * Returns the reference clock used by the gfx engine
1652
 * (CIK).
1653
 */
1654
u32 cik_get_xclk(struct radeon_device *rdev)
1655
{
1656
        u32 reference_clock = rdev->clock.spll.reference_freq;
1657
 
1658
	if (rdev->flags & RADEON_IS_IGP) {
1659
		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1660
			return reference_clock / 2;
1661
	} else {
1662
		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1663
			return reference_clock / 4;
1664
	}
1665
	return reference_clock;
1666
}
1667
 
1668
/**
1669
 * cik_mm_rdoorbell - read a doorbell dword
1670
 *
1671
 * @rdev: radeon_device pointer
1672
 * @index: doorbell index
1673
 *
1674
 * Returns the value in the doorbell aperture at the
1675
 * requested doorbell index (CIK).
1676
 */
1677
u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1678
{
1679
	if (index < rdev->doorbell.num_doorbells) {
1680
		return readl(rdev->doorbell.ptr + index);
1681
	} else {
1682
		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1683
		return 0;
1684
	}
1685
}
1686
 
1687
/**
1688
 * cik_mm_wdoorbell - write a doorbell dword
1689
 *
1690
 * @rdev: radeon_device pointer
1691
 * @index: doorbell index
1692
 * @v: value to write
1693
 *
1694
 * Writes @v to the doorbell aperture at the
1695
 * requested doorbell index (CIK).
1696
 */
1697
void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1698
{
1699
	if (index < rdev->doorbell.num_doorbells) {
1700
		writel(v, rdev->doorbell.ptr + index);
1701
	} else {
1702
		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1703
	}
1704
}
1705
 
1706
#define BONAIRE_IO_MC_REGS_SIZE 36
1707
 
1708
static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1709
{
1710
	{0x00000070, 0x04400000},
1711
	{0x00000071, 0x80c01803},
1712
	{0x00000072, 0x00004004},
1713
	{0x00000073, 0x00000100},
1714
	{0x00000074, 0x00ff0000},
1715
	{0x00000075, 0x34000000},
1716
	{0x00000076, 0x08000014},
1717
	{0x00000077, 0x00cc08ec},
1718
	{0x00000078, 0x00000400},
1719
	{0x00000079, 0x00000000},
1720
	{0x0000007a, 0x04090000},
1721
	{0x0000007c, 0x00000000},
1722
	{0x0000007e, 0x4408a8e8},
1723
	{0x0000007f, 0x00000304},
1724
	{0x00000080, 0x00000000},
1725
	{0x00000082, 0x00000001},
1726
	{0x00000083, 0x00000002},
1727
	{0x00000084, 0xf3e4f400},
1728
	{0x00000085, 0x052024e3},
1729
	{0x00000087, 0x00000000},
1730
	{0x00000088, 0x01000000},
1731
	{0x0000008a, 0x1c0a0000},
1732
	{0x0000008b, 0xff010000},
1733
	{0x0000008d, 0xffffefff},
1734
	{0x0000008e, 0xfff3efff},
1735
	{0x0000008f, 0xfff3efbf},
1736
	{0x00000092, 0xf7ffffff},
1737
	{0x00000093, 0xffffff7f},
1738
	{0x00000095, 0x00101101},
1739
	{0x00000096, 0x00000fff},
1740
	{0x00000097, 0x00116fff},
1741
	{0x00000098, 0x60010000},
1742
	{0x00000099, 0x10010000},
1743
	{0x0000009a, 0x00006000},
1744
	{0x0000009b, 0x00001000},
1745
	{0x0000009f, 0x00b48000}
1746
};
1747
 
1748
#define HAWAII_IO_MC_REGS_SIZE 22
1749
 
1750
static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1751
{
1752
	{0x0000007d, 0x40000000},
1753
	{0x0000007e, 0x40180304},
1754
	{0x0000007f, 0x0000ff00},
1755
	{0x00000081, 0x00000000},
1756
	{0x00000083, 0x00000800},
1757
	{0x00000086, 0x00000000},
1758
	{0x00000087, 0x00000100},
1759
	{0x00000088, 0x00020100},
1760
	{0x00000089, 0x00000000},
1761
	{0x0000008b, 0x00040000},
1762
	{0x0000008c, 0x00000100},
1763
	{0x0000008e, 0xff010000},
1764
	{0x00000090, 0xffffefff},
1765
	{0x00000091, 0xfff3efff},
1766
	{0x00000092, 0xfff3efbf},
1767
	{0x00000093, 0xf7ffffff},
1768
	{0x00000094, 0xffffff7f},
1769
	{0x00000095, 0x00000fff},
1770
	{0x00000096, 0x00116fff},
1771
	{0x00000097, 0x60010000},
1772
	{0x00000098, 0x10010000},
1773
	{0x0000009f, 0x00c79000}
1774
};
1775
 
1776
 
1777
/**
1778
 * cik_srbm_select - select specific register instances
1779
 *
1780
 * @rdev: radeon_device pointer
1781
 * @me: selected ME (micro engine)
1782
 * @pipe: pipe
1783
 * @queue: queue
1784
 * @vmid: VMID
1785
 *
1786
 * Switches the currently active registers instances.  Some
1787
 * registers are instanced per VMID, others are instanced per
1788
 * me/pipe/queue combination.
1789
 */
1790
static void cik_srbm_select(struct radeon_device *rdev,
1791
			    u32 me, u32 pipe, u32 queue, u32 vmid)
1792
{
1793
	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1794
			     MEID(me & 0x3) |
1795
			     VMID(vmid & 0xf) |
1796
			     QUEUEID(queue & 0x7));
1797
	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1798
}
1799
 
1800
/* ucode loading */
1801
/**
1802
 * ci_mc_load_microcode - load MC ucode into the hw
1803
 *
1804
 * @rdev: radeon_device pointer
1805
 *
1806
 * Load the GDDR MC ucode into the hw (CIK).
1807
 * Returns 0 on success, error on failure.
1808
 */
1809
int ci_mc_load_microcode(struct radeon_device *rdev)
1810
{
1811
	const __be32 *fw_data = NULL;
1812
	const __le32 *new_fw_data = NULL;
5271 serge 1813
	u32 running, blackout = 0, tmp;
5078 serge 1814
	u32 *io_mc_regs = NULL;
1815
	const __le32 *new_io_mc_regs = NULL;
1816
	int i, regs_size, ucode_size;
1817
 
1818
	if (!rdev->mc_fw)
1819
		return -EINVAL;
1820
 
1821
	if (rdev->new_fw) {
1822
		const struct mc_firmware_header_v1_0 *hdr =
1823
			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1824
 
1825
		radeon_ucode_print_mc_hdr(&hdr->header);
1826
 
1827
		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1828
		new_io_mc_regs = (const __le32 *)
1829
			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1830
		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1831
		new_fw_data = (const __le32 *)
1832
			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1833
	} else {
1834
	ucode_size = rdev->mc_fw->size / 4;
1835
 
1836
	switch (rdev->family) {
1837
	case CHIP_BONAIRE:
1838
		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1839
		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1840
		break;
1841
	case CHIP_HAWAII:
1842
		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1843
		regs_size = HAWAII_IO_MC_REGS_SIZE;
1844
		break;
1845
	default:
1846
		return -EINVAL;
1847
	}
1848
		fw_data = (const __be32 *)rdev->mc_fw->data;
1849
	}
1850
 
1851
	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1852
 
1853
	if (running == 0) {
1854
		if (running) {
1855
			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1856
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1857
		}
1858
 
1859
		/* reset the engine and set to writable */
1860
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861
		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1862
 
1863
		/* load mc io regs */
1864
		for (i = 0; i < regs_size; i++) {
1865
			if (rdev->new_fw) {
1866
				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1867
				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1868
			} else {
1869
			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1870
			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1871
		}
1872
		}
5271 serge 1873
 
1874
		tmp = RREG32(MC_SEQ_MISC0);
1875
		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1876
			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1877
			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1878
			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1879
			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1880
		}
1881
 
5078 serge 1882
		/* load the MC ucode */
1883
		for (i = 0; i < ucode_size; i++) {
1884
			if (rdev->new_fw)
1885
				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1886
			else
1887
			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1888
		}
1889
 
1890
		/* put the engine back into the active state */
1891
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1892
		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1893
		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1894
 
1895
		/* wait for training to complete */
1896
		for (i = 0; i < rdev->usec_timeout; i++) {
1897
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1898
				break;
1899
			udelay(1);
1900
		}
1901
		for (i = 0; i < rdev->usec_timeout; i++) {
1902
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1903
				break;
1904
			udelay(1);
1905
		}
1906
 
1907
		if (running)
1908
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1909
	}
1910
 
1911
	return 0;
1912
}
1913
 
1914
/**
1915
 * cik_init_microcode - load ucode images from disk
1916
 *
1917
 * @rdev: radeon_device pointer
1918
 *
1919
 * Use the firmware interface to load the ucode images into
1920
 * the driver (not loaded into hw).
1921
 * Returns 0 on success, error on failure.
1922
 */
1923
static int cik_init_microcode(struct radeon_device *rdev)
1924
{
1925
	const char *chip_name;
1926
	const char *new_chip_name;
1927
	size_t pfp_req_size, me_req_size, ce_req_size,
1928
		mec_req_size, rlc_req_size, mc_req_size = 0,
1929
		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1930
	char fw_name[30];
1931
	int new_fw = 0;
1932
	int err;
1933
	int num_fw;
1934
 
1935
	DRM_DEBUG("\n");
1936
 
1937
	switch (rdev->family) {
1938
	case CHIP_BONAIRE:
1939
		chip_name = "BONAIRE";
1940
		new_chip_name = "bonaire";
1941
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1942
		me_req_size = CIK_ME_UCODE_SIZE * 4;
1943
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1944
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1945
		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1946
		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1947
		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1948
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1949
		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1950
		num_fw = 8;
1951
		break;
1952
	case CHIP_HAWAII:
1953
		chip_name = "HAWAII";
1954
		new_chip_name = "hawaii";
1955
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1956
		me_req_size = CIK_ME_UCODE_SIZE * 4;
1957
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1958
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1959
		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1960
		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1961
		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1962
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1963
		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1964
		num_fw = 8;
1965
		break;
1966
	case CHIP_KAVERI:
1967
		chip_name = "KAVERI";
1968
		new_chip_name = "kaveri";
1969
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970
		me_req_size = CIK_ME_UCODE_SIZE * 4;
1971
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973
		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1974
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1975
		num_fw = 7;
1976
		break;
1977
	case CHIP_KABINI:
1978
		chip_name = "KABINI";
1979
		new_chip_name = "kabini";
1980
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1981
		me_req_size = CIK_ME_UCODE_SIZE * 4;
1982
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1983
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1984
		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1985
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1986
		num_fw = 6;
1987
		break;
1988
	case CHIP_MULLINS:
1989
		chip_name = "MULLINS";
1990
		new_chip_name = "mullins";
1991
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1992
		me_req_size = CIK_ME_UCODE_SIZE * 4;
1993
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1994
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1995
		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1996
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997
		num_fw = 6;
1998
		break;
1999
	default: BUG();
2000
	}
2001
 
2002
	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2003
 
2004
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2005
	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2006
	if (err) {
2007
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2008
	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2009
	if (err)
2010
		goto out;
2011
	if (rdev->pfp_fw->size != pfp_req_size) {
2012
		printk(KERN_ERR
2013
		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2014
		       rdev->pfp_fw->size, fw_name);
2015
		err = -EINVAL;
2016
		goto out;
2017
	}
2018
	} else {
2019
		err = radeon_ucode_validate(rdev->pfp_fw);
2020
		if (err) {
2021
			printk(KERN_ERR
2022
			       "cik_fw: validation failed for firmware \"%s\"\n",
2023
			       fw_name);
2024
			goto out;
2025
		} else {
2026
			new_fw++;
2027
		}
2028
	}
2029
 
2030
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2031
	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2032
	if (err) {
2033
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2034
	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2035
	if (err)
2036
		goto out;
2037
	if (rdev->me_fw->size != me_req_size) {
2038
		printk(KERN_ERR
2039
		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2040
		       rdev->me_fw->size, fw_name);
2041
		err = -EINVAL;
2042
	}
2043
	} else {
2044
		err = radeon_ucode_validate(rdev->me_fw);
2045
		if (err) {
2046
			printk(KERN_ERR
2047
			       "cik_fw: validation failed for firmware \"%s\"\n",
2048
			       fw_name);
2049
			goto out;
2050
		} else {
2051
			new_fw++;
2052
		}
2053
	}
2054
 
2055
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2056
	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2057
	if (err) {
2058
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2059
	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2060
	if (err)
2061
		goto out;
2062
	if (rdev->ce_fw->size != ce_req_size) {
2063
		printk(KERN_ERR
2064
		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065
		       rdev->ce_fw->size, fw_name);
2066
		err = -EINVAL;
2067
	}
2068
	} else {
2069
		err = radeon_ucode_validate(rdev->ce_fw);
2070
		if (err) {
2071
			printk(KERN_ERR
2072
			       "cik_fw: validation failed for firmware \"%s\"\n",
2073
			       fw_name);
2074
			goto out;
2075
		} else {
2076
			new_fw++;
2077
		}
2078
	}
2079
 
2080
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2081
	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2082
	if (err) {
2083
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2084
	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2085
	if (err)
2086
		goto out;
2087
	if (rdev->mec_fw->size != mec_req_size) {
2088
		printk(KERN_ERR
2089
		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2090
		       rdev->mec_fw->size, fw_name);
2091
		err = -EINVAL;
2092
	}
2093
	} else {
2094
		err = radeon_ucode_validate(rdev->mec_fw);
2095
		if (err) {
2096
			printk(KERN_ERR
2097
			       "cik_fw: validation failed for firmware \"%s\"\n",
2098
			       fw_name);
2099
			goto out;
2100
		} else {
2101
			new_fw++;
2102
		}
2103
	}
2104
 
2105
	if (rdev->family == CHIP_KAVERI) {
2106
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2107
		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2108
		if (err) {
2109
			goto out;
2110
		} else {
2111
			err = radeon_ucode_validate(rdev->mec2_fw);
2112
			if (err) {
2113
				goto out;
2114
			} else {
2115
				new_fw++;
2116
			}
2117
		}
2118
	}
2119
 
2120
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2121
	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2122
	if (err) {
2123
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2124
	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2125
	if (err)
2126
		goto out;
2127
	if (rdev->rlc_fw->size != rlc_req_size) {
2128
		printk(KERN_ERR
2129
		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2130
		       rdev->rlc_fw->size, fw_name);
2131
		err = -EINVAL;
2132
	}
2133
	} else {
2134
		err = radeon_ucode_validate(rdev->rlc_fw);
2135
		if (err) {
2136
			printk(KERN_ERR
2137
			       "cik_fw: validation failed for firmware \"%s\"\n",
2138
			       fw_name);
2139
			goto out;
2140
		} else {
2141
			new_fw++;
2142
		}
2143
	}
2144
 
2145
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2146
	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2147
	if (err) {
2148
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2149
	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2150
	if (err)
2151
		goto out;
2152
	if (rdev->sdma_fw->size != sdma_req_size) {
2153
		printk(KERN_ERR
2154
		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2155
		       rdev->sdma_fw->size, fw_name);
2156
		err = -EINVAL;
2157
	}
2158
	} else {
2159
		err = radeon_ucode_validate(rdev->sdma_fw);
2160
		if (err) {
2161
			printk(KERN_ERR
2162
			       "cik_fw: validation failed for firmware \"%s\"\n",
2163
			       fw_name);
2164
			goto out;
2165
		} else {
2166
			new_fw++;
2167
		}
2168
	}
2169
 
2170
	/* No SMC, MC ucode on APUs */
2171
	if (!(rdev->flags & RADEON_IS_IGP)) {
2172
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2173
		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2174
		if (err) {
2175
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2176
		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2177
		if (err) {
2178
			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2179
			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2180
			if (err)
2181
				goto out;
2182
		}
2183
		if ((rdev->mc_fw->size != mc_req_size) &&
2184
		    (rdev->mc_fw->size != mc2_req_size)){
2185
			printk(KERN_ERR
2186
			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2187
			       rdev->mc_fw->size, fw_name);
2188
			err = -EINVAL;
2189
		}
2190
		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2191
		} else {
2192
			err = radeon_ucode_validate(rdev->mc_fw);
2193
			if (err) {
2194
				printk(KERN_ERR
2195
				       "cik_fw: validation failed for firmware \"%s\"\n",
2196
				       fw_name);
2197
				goto out;
2198
			} else {
2199
				new_fw++;
2200
			}
2201
		}
2202
 
2203
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2204
		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2205
		if (err) {
2206
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2207
		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2208
		if (err) {
2209
			printk(KERN_ERR
2210
			       "smc: error loading firmware \"%s\"\n",
2211
			       fw_name);
2212
			release_firmware(rdev->smc_fw);
2213
			rdev->smc_fw = NULL;
2214
			err = 0;
2215
		} else if (rdev->smc_fw->size != smc_req_size) {
2216
			printk(KERN_ERR
2217
			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2218
			       rdev->smc_fw->size, fw_name);
2219
			err = -EINVAL;
2220
		}
2221
		} else {
2222
			err = radeon_ucode_validate(rdev->smc_fw);
2223
			if (err) {
2224
				printk(KERN_ERR
2225
				       "cik_fw: validation failed for firmware \"%s\"\n",
2226
				       fw_name);
2227
				goto out;
2228
			} else {
2229
				new_fw++;
2230
			}
2231
		}
2232
	}
2233
 
2234
	if (new_fw == 0) {
2235
		rdev->new_fw = false;
2236
	} else if (new_fw < num_fw) {
2237
		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2238
		err = -EINVAL;
2239
	} else {
2240
		rdev->new_fw = true;
2241
	}
2242
 
2243
out:
2244
	if (err) {
2245
		if (err != -EINVAL)
2246
			printk(KERN_ERR
2247
			       "cik_cp: Failed to load firmware \"%s\"\n",
2248
			       fw_name);
2249
		release_firmware(rdev->pfp_fw);
2250
		rdev->pfp_fw = NULL;
2251
		release_firmware(rdev->me_fw);
2252
		rdev->me_fw = NULL;
2253
		release_firmware(rdev->ce_fw);
2254
		rdev->ce_fw = NULL;
2255
		release_firmware(rdev->mec_fw);
2256
		rdev->mec_fw = NULL;
2257
		release_firmware(rdev->mec2_fw);
2258
		rdev->mec2_fw = NULL;
2259
		release_firmware(rdev->rlc_fw);
2260
		rdev->rlc_fw = NULL;
2261
		release_firmware(rdev->sdma_fw);
2262
		rdev->sdma_fw = NULL;
2263
		release_firmware(rdev->mc_fw);
2264
		rdev->mc_fw = NULL;
2265
		release_firmware(rdev->smc_fw);
2266
		rdev->smc_fw = NULL;
2267
	}
2268
	return err;
2269
}
2270
 
2271
/*
2272
 * Core functions
2273
 */
2274
/**
2275
 * cik_tiling_mode_table_init - init the hw tiling table
2276
 *
2277
 * @rdev: radeon_device pointer
2278
 *
2279
 * Starting with SI, the tiling setup is done globally in a
2280
 * set of 32 tiling modes.  Rather than selecting each set of
2281
 * parameters per surface as on older asics, we just select
2282
 * which index in the tiling table we want to use, and the
2283
 * surface uses those parameters (CIK).
2284
 */
2285
static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2286
{
2287
	const u32 num_tile_mode_states = 32;
2288
	const u32 num_secondary_tile_mode_states = 16;
2289
	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2290
	u32 num_pipe_configs;
2291
	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2292
		rdev->config.cik.max_shader_engines;
2293
 
2294
	switch (rdev->config.cik.mem_row_size_in_kb) {
2295
	case 1:
2296
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2297
		break;
2298
	case 2:
2299
	default:
2300
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2301
		break;
2302
	case 4:
2303
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2304
		break;
2305
	}
2306
 
2307
	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2308
	if (num_pipe_configs > 8)
2309
		num_pipe_configs = 16;
2310
 
2311
	if (num_pipe_configs == 16) {
2312
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2313
			switch (reg_offset) {
2314
			case 0:
2315
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2317
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2319
				break;
2320
			case 1:
2321
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2325
				break;
2326
			case 2:
2327
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331
				break;
2332
			case 3:
2333
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2337
				break;
2338
			case 4:
2339
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342
						 TILE_SPLIT(split_equal_to_row_size));
2343
				break;
2344
			case 5:
2345
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2346
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348
				break;
2349
			case 6:
2350
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354
				break;
2355
			case 7:
2356
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359
						 TILE_SPLIT(split_equal_to_row_size));
2360
				break;
2361
			case 8:
2362
				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2363
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2364
				break;
2365
			case 9:
2366
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2369
				break;
2370
			case 10:
2371
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375
				break;
2376
			case 11:
2377
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2380
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381
				break;
2382
			case 12:
2383
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2384
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387
				break;
2388
			case 13:
2389
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2392
				break;
2393
			case 14:
2394
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398
				break;
2399
			case 16:
2400
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2403
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404
				break;
2405
			case 17:
2406
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410
				break;
2411
			case 27:
2412
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2415
				break;
2416
			case 28:
2417
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421
				break;
2422
			case 29:
2423
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427
				break;
2428
			case 30:
2429
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431
						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433
				break;
2434
			default:
2435
				gb_tile_moden = 0;
2436
				break;
2437
			}
2438
			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2439
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2440
		}
2441
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2442
			switch (reg_offset) {
2443
			case 0:
2444
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447
						 NUM_BANKS(ADDR_SURF_16_BANK));
2448
				break;
2449
			case 1:
2450
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453
						 NUM_BANKS(ADDR_SURF_16_BANK));
2454
				break;
2455
			case 2:
2456
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459
						 NUM_BANKS(ADDR_SURF_16_BANK));
2460
				break;
2461
			case 3:
2462
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465
						 NUM_BANKS(ADDR_SURF_16_BANK));
2466
				break;
2467
			case 4:
2468
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471
						 NUM_BANKS(ADDR_SURF_8_BANK));
2472
				break;
2473
			case 5:
2474
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477
						 NUM_BANKS(ADDR_SURF_4_BANK));
2478
				break;
2479
			case 6:
2480
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483
						 NUM_BANKS(ADDR_SURF_2_BANK));
2484
				break;
2485
			case 8:
2486
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489
						 NUM_BANKS(ADDR_SURF_16_BANK));
2490
				break;
2491
			case 9:
2492
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495
						 NUM_BANKS(ADDR_SURF_16_BANK));
2496
				break;
2497
			case 10:
2498
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501
						 NUM_BANKS(ADDR_SURF_16_BANK));
2502
				break;
2503
			case 11:
2504
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507
						 NUM_BANKS(ADDR_SURF_8_BANK));
2508
				break;
2509
			case 12:
2510
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513
						 NUM_BANKS(ADDR_SURF_4_BANK));
2514
				break;
2515
			case 13:
2516
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519
						 NUM_BANKS(ADDR_SURF_2_BANK));
2520
				break;
2521
			case 14:
2522
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525
						 NUM_BANKS(ADDR_SURF_2_BANK));
2526
				break;
2527
			default:
2528
				gb_tile_moden = 0;
2529
				break;
2530
			}
2531
			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2532
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2533
		}
2534
	} else if (num_pipe_configs == 8) {
2535
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2536
			switch (reg_offset) {
2537
			case 0:
2538
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2542
				break;
2543
			case 1:
2544
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2548
				break;
2549
			case 2:
2550
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2552
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2554
				break;
2555
			case 3:
2556
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2558
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2560
				break;
2561
			case 4:
2562
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2564
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565
						 TILE_SPLIT(split_equal_to_row_size));
2566
				break;
2567
			case 5:
2568
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571
				break;
2572
			case 6:
2573
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2574
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2577
				break;
2578
			case 7:
2579
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2580
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2581
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582
						 TILE_SPLIT(split_equal_to_row_size));
2583
				break;
2584
			case 8:
2585
				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2587
				break;
2588
			case 9:
2589
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2592
				break;
2593
			case 10:
2594
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598
				break;
2599
			case 11:
2600
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604
				break;
2605
			case 12:
2606
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2607
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610
				break;
2611
			case 13:
2612
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2615
				break;
2616
			case 14:
2617
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621
				break;
2622
			case 16:
2623
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627
				break;
2628
			case 17:
2629
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633
				break;
2634
			case 27:
2635
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638
				break;
2639
			case 28:
2640
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2642
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644
				break;
2645
			case 29:
2646
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650
				break;
2651
			case 30:
2652
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656
				break;
2657
			default:
2658
				gb_tile_moden = 0;
2659
				break;
2660
			}
2661
			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2662
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2663
		}
2664
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2665
			switch (reg_offset) {
2666
			case 0:
2667
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670
						 NUM_BANKS(ADDR_SURF_16_BANK));
2671
				break;
2672
			case 1:
2673
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2675
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676
						 NUM_BANKS(ADDR_SURF_16_BANK));
2677
				break;
2678
			case 2:
2679
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682
						 NUM_BANKS(ADDR_SURF_16_BANK));
2683
				break;
2684
			case 3:
2685
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2688
						 NUM_BANKS(ADDR_SURF_16_BANK));
2689
				break;
2690
			case 4:
2691
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694
						 NUM_BANKS(ADDR_SURF_8_BANK));
2695
				break;
2696
			case 5:
2697
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2700
						 NUM_BANKS(ADDR_SURF_4_BANK));
2701
				break;
2702
			case 6:
2703
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706
						 NUM_BANKS(ADDR_SURF_2_BANK));
2707
				break;
2708
			case 8:
2709
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2711
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712
						 NUM_BANKS(ADDR_SURF_16_BANK));
2713
				break;
2714
			case 9:
2715
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718
						 NUM_BANKS(ADDR_SURF_16_BANK));
2719
				break;
2720
			case 10:
2721
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724
						 NUM_BANKS(ADDR_SURF_16_BANK));
2725
				break;
2726
			case 11:
2727
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730
						 NUM_BANKS(ADDR_SURF_16_BANK));
2731
				break;
2732
			case 12:
2733
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736
						 NUM_BANKS(ADDR_SURF_8_BANK));
2737
				break;
2738
			case 13:
2739
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742
						 NUM_BANKS(ADDR_SURF_4_BANK));
2743
				break;
2744
			case 14:
2745
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748
						 NUM_BANKS(ADDR_SURF_2_BANK));
2749
				break;
2750
			default:
2751
				gb_tile_moden = 0;
2752
				break;
2753
			}
2754
			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2755
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756
		}
2757
	} else if (num_pipe_configs == 4) {
2758
		if (num_rbs == 4) {
2759
			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2760
				switch (reg_offset) {
2761
				case 0:
2762
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2766
					break;
2767
				case 1:
2768
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2772
					break;
2773
				case 2:
2774
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2776
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2778
					break;
2779
				case 3:
2780
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2782
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2784
					break;
2785
				case 4:
2786
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2788
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789
							 TILE_SPLIT(split_equal_to_row_size));
2790
					break;
2791
				case 5:
2792
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2793
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795
					break;
2796
				case 6:
2797
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2798
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801
					break;
2802
				case 7:
2803
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806
							 TILE_SPLIT(split_equal_to_row_size));
2807
					break;
2808
				case 8:
2809
					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2810
							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2811
					break;
2812
				case 9:
2813
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2816
					break;
2817
				case 10:
2818
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2820
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822
					break;
2823
				case 11:
2824
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828
					break;
2829
				case 12:
2830
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2831
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834
					break;
2835
				case 13:
2836
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2839
					break;
2840
				case 14:
2841
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845
					break;
2846
				case 16:
2847
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851
					break;
2852
				case 17:
2853
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2854
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857
					break;
2858
				case 27:
2859
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2862
					break;
2863
				case 28:
2864
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868
					break;
2869
				case 29:
2870
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874
					break;
2875
				case 30:
2876
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2877
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878
							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880
					break;
2881
				default:
2882
					gb_tile_moden = 0;
2883
					break;
2884
				}
2885
				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2886
				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2887
			}
2888
		} else if (num_rbs < 4) {
2889
			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2890
				switch (reg_offset) {
2891
				case 0:
2892
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2895
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896
					break;
2897
				case 1:
2898
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2901
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902
					break;
2903
				case 2:
2904
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908
					break;
2909
				case 3:
2910
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2913
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2914
					break;
2915
				case 4:
2916
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2919
							 TILE_SPLIT(split_equal_to_row_size));
2920
					break;
2921
				case 5:
2922
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2924
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925
					break;
2926
				case 6:
2927
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2930
							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931
					break;
2932
				case 7:
2933
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2934
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2935
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2936
							 TILE_SPLIT(split_equal_to_row_size));
2937
					break;
2938
				case 8:
2939
					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940
						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2941
					break;
2942
				case 9:
2943
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2945
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2946
					break;
2947
				case 10:
2948
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2949
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2951
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952
					break;
2953
				case 11:
2954
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2955
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2957
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958
					break;
2959
				case 12:
2960
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961
							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2962
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964
					break;
2965
				case 13:
2966
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2967
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2968
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2969
					break;
2970
				case 14:
2971
					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2974
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975
					break;
2976
				case 16:
2977
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2979
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2980
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981
					break;
2982
				case 17:
2983
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2984
							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2985
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2986
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987
					break;
2988
				case 27:
2989
					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2992
					break;
2993
				case 28:
2994
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998
					break;
2999
				case 29:
3000
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3002
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3003
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3004
					break;
3005
				case 30:
3006
					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3007
							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008
							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3009
							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010
					break;
3011
				default:
3012
					gb_tile_moden = 0;
3013
					break;
3014
				}
3015
				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3016
				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3017
			}
3018
		}
3019
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3020
			switch (reg_offset) {
3021
			case 0:
3022
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3024
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3025
						 NUM_BANKS(ADDR_SURF_16_BANK));
3026
				break;
3027
			case 1:
3028
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031
						 NUM_BANKS(ADDR_SURF_16_BANK));
3032
				break;
3033
			case 2:
3034
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3037
						 NUM_BANKS(ADDR_SURF_16_BANK));
3038
				break;
3039
			case 3:
3040
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043
						 NUM_BANKS(ADDR_SURF_16_BANK));
3044
				break;
3045
			case 4:
3046
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049
						 NUM_BANKS(ADDR_SURF_16_BANK));
3050
				break;
3051
			case 5:
3052
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055
						 NUM_BANKS(ADDR_SURF_8_BANK));
3056
				break;
3057
			case 6:
3058
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061
						 NUM_BANKS(ADDR_SURF_4_BANK));
3062
				break;
3063
			case 8:
3064
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3065
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067
						 NUM_BANKS(ADDR_SURF_16_BANK));
3068
				break;
3069
			case 9:
3070
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3071
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073
						 NUM_BANKS(ADDR_SURF_16_BANK));
3074
				break;
3075
			case 10:
3076
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3078
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079
						 NUM_BANKS(ADDR_SURF_16_BANK));
3080
				break;
3081
			case 11:
3082
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3084
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085
						 NUM_BANKS(ADDR_SURF_16_BANK));
3086
				break;
3087
			case 12:
3088
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3091
						 NUM_BANKS(ADDR_SURF_16_BANK));
3092
				break;
3093
			case 13:
3094
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3097
						 NUM_BANKS(ADDR_SURF_8_BANK));
3098
				break;
3099
			case 14:
3100
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3102
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3103
						 NUM_BANKS(ADDR_SURF_4_BANK));
3104
				break;
3105
			default:
3106
				gb_tile_moden = 0;
3107
				break;
3108
			}
3109
			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3110
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3111
		}
3112
	} else if (num_pipe_configs == 2) {
3113
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3114
			switch (reg_offset) {
3115
			case 0:
3116
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3118
						 PIPE_CONFIG(ADDR_SURF_P2) |
3119
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3120
				break;
3121
			case 1:
3122
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3124
						 PIPE_CONFIG(ADDR_SURF_P2) |
3125
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3126
				break;
3127
			case 2:
3128
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3129
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3130
						 PIPE_CONFIG(ADDR_SURF_P2) |
3131
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3132
				break;
3133
			case 3:
3134
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3136
						 PIPE_CONFIG(ADDR_SURF_P2) |
3137
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3138
				break;
3139
			case 4:
3140
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3142
						 PIPE_CONFIG(ADDR_SURF_P2) |
3143
						 TILE_SPLIT(split_equal_to_row_size));
3144
				break;
3145
			case 5:
3146
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3147
						 PIPE_CONFIG(ADDR_SURF_P2) |
3148
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149
				break;
3150
			case 6:
3151
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3152
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3153
						 PIPE_CONFIG(ADDR_SURF_P2) |
3154
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3155
				break;
3156
			case 7:
3157
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3158
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3159
						 PIPE_CONFIG(ADDR_SURF_P2) |
3160
						 TILE_SPLIT(split_equal_to_row_size));
3161
				break;
3162
			case 8:
3163
				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3164
						PIPE_CONFIG(ADDR_SURF_P2);
3165
				break;
3166
			case 9:
3167
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3169
						 PIPE_CONFIG(ADDR_SURF_P2));
3170
				break;
3171
			case 10:
3172
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3174
						 PIPE_CONFIG(ADDR_SURF_P2) |
3175
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176
				break;
3177
			case 11:
3178
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3179
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3180
						 PIPE_CONFIG(ADDR_SURF_P2) |
3181
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182
				break;
3183
			case 12:
3184
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3185
						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3186
						 PIPE_CONFIG(ADDR_SURF_P2) |
3187
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188
				break;
3189
			case 13:
3190
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191
						 PIPE_CONFIG(ADDR_SURF_P2) |
3192
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3193
				break;
3194
			case 14:
3195
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3197
						 PIPE_CONFIG(ADDR_SURF_P2) |
3198
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199
				break;
3200
			case 16:
3201
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203
						 PIPE_CONFIG(ADDR_SURF_P2) |
3204
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205
				break;
3206
			case 17:
3207
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3208
						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3209
						 PIPE_CONFIG(ADDR_SURF_P2) |
3210
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211
				break;
3212
			case 27:
3213
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3214
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3215
						 PIPE_CONFIG(ADDR_SURF_P2));
3216
				break;
3217
			case 28:
3218
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3220
						 PIPE_CONFIG(ADDR_SURF_P2) |
3221
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222
				break;
3223
			case 29:
3224
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226
						 PIPE_CONFIG(ADDR_SURF_P2) |
3227
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228
				break;
3229
			case 30:
3230
				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3231
						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232
						 PIPE_CONFIG(ADDR_SURF_P2) |
3233
						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234
				break;
3235
			default:
3236
				gb_tile_moden = 0;
3237
				break;
3238
			}
3239
			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3240
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3241
		}
3242
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3243
			switch (reg_offset) {
3244
			case 0:
3245
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3247
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248
						 NUM_BANKS(ADDR_SURF_16_BANK));
3249
				break;
3250
			case 1:
3251
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3252
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3253
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254
						 NUM_BANKS(ADDR_SURF_16_BANK));
3255
				break;
3256
			case 2:
3257
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3259
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260
						 NUM_BANKS(ADDR_SURF_16_BANK));
3261
				break;
3262
			case 3:
3263
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3264
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3265
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266
						 NUM_BANKS(ADDR_SURF_16_BANK));
3267
				break;
3268
			case 4:
3269
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272
						 NUM_BANKS(ADDR_SURF_16_BANK));
3273
				break;
3274
			case 5:
3275
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3277
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278
						 NUM_BANKS(ADDR_SURF_16_BANK));
3279
				break;
3280
			case 6:
3281
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284
						 NUM_BANKS(ADDR_SURF_8_BANK));
3285
				break;
3286
			case 8:
3287
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3288
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3289
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290
						 NUM_BANKS(ADDR_SURF_16_BANK));
3291
				break;
3292
			case 9:
3293
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296
						 NUM_BANKS(ADDR_SURF_16_BANK));
3297
				break;
3298
			case 10:
3299
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3300
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3301
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3302
						 NUM_BANKS(ADDR_SURF_16_BANK));
3303
				break;
3304
			case 11:
3305
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308
						 NUM_BANKS(ADDR_SURF_16_BANK));
3309
				break;
3310
			case 12:
3311
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3312
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3313
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314
						 NUM_BANKS(ADDR_SURF_16_BANK));
3315
				break;
3316
			case 13:
3317
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320
						 NUM_BANKS(ADDR_SURF_16_BANK));
3321
				break;
3322
			case 14:
3323
				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326
						 NUM_BANKS(ADDR_SURF_8_BANK));
3327
				break;
3328
			default:
3329
				gb_tile_moden = 0;
3330
				break;
3331
			}
3332
			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3333
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3334
		}
3335
	} else
3336
		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3337
}
3338
 
3339
/**
3340
 * cik_select_se_sh - select which SE, SH to address
3341
 *
3342
 * @rdev: radeon_device pointer
3343
 * @se_num: shader engine to address
3344
 * @sh_num: sh block to address
3345
 *
3346
 * Select which SE, SH combinations to address. Certain
3347
 * registers are instanced per SE or SH.  0xffffffff means
3348
 * broadcast to all SEs or SHs (CIK).
3349
 */
3350
static void cik_select_se_sh(struct radeon_device *rdev,
3351
			     u32 se_num, u32 sh_num)
3352
{
3353
	u32 data = INSTANCE_BROADCAST_WRITES;
3354
 
3355
	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3356
		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3357
	else if (se_num == 0xffffffff)
3358
		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3359
	else if (sh_num == 0xffffffff)
3360
		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3361
	else
3362
		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3363
	WREG32(GRBM_GFX_INDEX, data);
3364
}
3365
 
3366
/**
3367
 * cik_create_bitmask - create a bitmask
3368
 *
3369
 * @bit_width: length of the mask
3370
 *
3371
 * create a variable length bit mask (CIK).
3372
 * Returns the bitmask.
3373
 */
3374
static u32 cik_create_bitmask(u32 bit_width)
3375
{
3376
	u32 i, mask = 0;
3377
 
3378
	for (i = 0; i < bit_width; i++) {
3379
		mask <<= 1;
3380
		mask |= 1;
3381
	}
3382
	return mask;
3383
}
3384
 
3385
/**
3386
 * cik_get_rb_disabled - computes the mask of disabled RBs
3387
 *
3388
 * @rdev: radeon_device pointer
3389
 * @max_rb_num: max RBs (render backends) for the asic
3390
 * @se_num: number of SEs (shader engines) for the asic
3391
 * @sh_per_se: number of SH blocks per SE for the asic
3392
 *
3393
 * Calculates the bitmask of disabled RBs (CIK).
3394
 * Returns the disabled RB bitmask.
3395
 */
3396
static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3397
			      u32 max_rb_num_per_se,
3398
			      u32 sh_per_se)
3399
{
3400
	u32 data, mask;
3401
 
3402
	data = RREG32(CC_RB_BACKEND_DISABLE);
3403
	if (data & 1)
3404
		data &= BACKEND_DISABLE_MASK;
3405
	else
3406
		data = 0;
3407
	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3408
 
3409
	data >>= BACKEND_DISABLE_SHIFT;
3410
 
3411
	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3412
 
3413
	return data & mask;
3414
}
3415
 
3416
/**
3417
 * cik_setup_rb - setup the RBs on the asic
3418
 *
3419
 * @rdev: radeon_device pointer
3420
 * @se_num: number of SEs (shader engines) for the asic
3421
 * @sh_per_se: number of SH blocks per SE for the asic
3422
 * @max_rb_num: max RBs (render backends) for the asic
3423
 *
3424
 * Configures per-SE/SH RB registers (CIK).
3425
 */
3426
static void cik_setup_rb(struct radeon_device *rdev,
3427
			 u32 se_num, u32 sh_per_se,
3428
			 u32 max_rb_num_per_se)
3429
{
3430
	int i, j;
3431
	u32 data, mask;
3432
	u32 disabled_rbs = 0;
3433
	u32 enabled_rbs = 0;
3434
 
5271 serge 3435
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 3436
	for (i = 0; i < se_num; i++) {
3437
		for (j = 0; j < sh_per_se; j++) {
3438
			cik_select_se_sh(rdev, i, j);
3439
			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3440
			if (rdev->family == CHIP_HAWAII)
3441
				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3442
			else
3443
				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3444
		}
3445
	}
3446
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 3447
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 3448
 
3449
	mask = 1;
3450
	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3451
		if (!(disabled_rbs & mask))
3452
			enabled_rbs |= mask;
3453
		mask <<= 1;
3454
	}
3455
 
3456
	rdev->config.cik.backend_enable_mask = enabled_rbs;
3457
 
5271 serge 3458
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 3459
	for (i = 0; i < se_num; i++) {
3460
		cik_select_se_sh(rdev, i, 0xffffffff);
3461
		data = 0;
3462
		for (j = 0; j < sh_per_se; j++) {
3463
			switch (enabled_rbs & 3) {
3464
			case 0:
3465
				if (j == 0)
3466
					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3467
				else
3468
					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3469
				break;
3470
			case 1:
3471
				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3472
				break;
3473
			case 2:
3474
				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3475
				break;
3476
			case 3:
3477
			default:
3478
				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3479
				break;
3480
			}
3481
			enabled_rbs >>= 2;
3482
		}
3483
		WREG32(PA_SC_RASTER_CONFIG, data);
3484
	}
3485
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 3486
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 3487
}
3488
 
3489
/**
3490
 * cik_gpu_init - setup the 3D engine
3491
 *
3492
 * @rdev: radeon_device pointer
3493
 *
3494
 * Configures the 3D engine and tiling configuration
3495
 * registers so that the 3D engine is usable.
3496
 */
3497
static void cik_gpu_init(struct radeon_device *rdev)
3498
{
3499
	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3500
	u32 mc_shared_chmap, mc_arb_ramcfg;
3501
	u32 hdp_host_path_cntl;
3502
	u32 tmp;
3503
	int i, j;
3504
 
3505
	switch (rdev->family) {
3506
	case CHIP_BONAIRE:
3507
		rdev->config.cik.max_shader_engines = 2;
3508
		rdev->config.cik.max_tile_pipes = 4;
3509
		rdev->config.cik.max_cu_per_sh = 7;
3510
		rdev->config.cik.max_sh_per_se = 1;
3511
		rdev->config.cik.max_backends_per_se = 2;
3512
		rdev->config.cik.max_texture_channel_caches = 4;
3513
		rdev->config.cik.max_gprs = 256;
3514
		rdev->config.cik.max_gs_threads = 32;
3515
		rdev->config.cik.max_hw_contexts = 8;
3516
 
3517
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521
		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3522
		break;
3523
	case CHIP_HAWAII:
3524
		rdev->config.cik.max_shader_engines = 4;
3525
		rdev->config.cik.max_tile_pipes = 16;
3526
		rdev->config.cik.max_cu_per_sh = 11;
3527
		rdev->config.cik.max_sh_per_se = 1;
3528
		rdev->config.cik.max_backends_per_se = 4;
3529
		rdev->config.cik.max_texture_channel_caches = 16;
3530
		rdev->config.cik.max_gprs = 256;
3531
		rdev->config.cik.max_gs_threads = 32;
3532
		rdev->config.cik.max_hw_contexts = 8;
3533
 
3534
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3535
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3536
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3537
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3538
		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3539
		break;
3540
	case CHIP_KAVERI:
3541
		rdev->config.cik.max_shader_engines = 1;
3542
		rdev->config.cik.max_tile_pipes = 4;
3543
		if ((rdev->pdev->device == 0x1304) ||
3544
		    (rdev->pdev->device == 0x1305) ||
3545
		    (rdev->pdev->device == 0x130C) ||
3546
		    (rdev->pdev->device == 0x130F) ||
3547
		    (rdev->pdev->device == 0x1310) ||
3548
		    (rdev->pdev->device == 0x1311) ||
3549
		    (rdev->pdev->device == 0x131C)) {
3550
			rdev->config.cik.max_cu_per_sh = 8;
3551
			rdev->config.cik.max_backends_per_se = 2;
3552
		} else if ((rdev->pdev->device == 0x1309) ||
3553
			   (rdev->pdev->device == 0x130A) ||
3554
			   (rdev->pdev->device == 0x130D) ||
3555
			   (rdev->pdev->device == 0x1313) ||
3556
			   (rdev->pdev->device == 0x131D)) {
3557
			rdev->config.cik.max_cu_per_sh = 6;
3558
			rdev->config.cik.max_backends_per_se = 2;
3559
		} else if ((rdev->pdev->device == 0x1306) ||
3560
			   (rdev->pdev->device == 0x1307) ||
3561
			   (rdev->pdev->device == 0x130B) ||
3562
			   (rdev->pdev->device == 0x130E) ||
3563
			   (rdev->pdev->device == 0x1315) ||
3564
			   (rdev->pdev->device == 0x1318) ||
3565
			   (rdev->pdev->device == 0x131B)) {
3566
			rdev->config.cik.max_cu_per_sh = 4;
3567
			rdev->config.cik.max_backends_per_se = 1;
3568
		} else {
3569
			rdev->config.cik.max_cu_per_sh = 3;
3570
			rdev->config.cik.max_backends_per_se = 1;
3571
		}
3572
		rdev->config.cik.max_sh_per_se = 1;
3573
		rdev->config.cik.max_texture_channel_caches = 4;
3574
		rdev->config.cik.max_gprs = 256;
3575
		rdev->config.cik.max_gs_threads = 16;
3576
		rdev->config.cik.max_hw_contexts = 8;
3577
 
3578
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3579
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3580
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3581
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3582
		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3583
		break;
3584
	case CHIP_KABINI:
3585
	case CHIP_MULLINS:
3586
	default:
3587
		rdev->config.cik.max_shader_engines = 1;
3588
		rdev->config.cik.max_tile_pipes = 2;
3589
		rdev->config.cik.max_cu_per_sh = 2;
3590
		rdev->config.cik.max_sh_per_se = 1;
3591
		rdev->config.cik.max_backends_per_se = 1;
3592
		rdev->config.cik.max_texture_channel_caches = 2;
3593
		rdev->config.cik.max_gprs = 256;
3594
		rdev->config.cik.max_gs_threads = 16;
3595
		rdev->config.cik.max_hw_contexts = 8;
3596
 
3597
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3598
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3599
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3600
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3601
		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3602
		break;
3603
	}
3604
 
3605
	/* Initialize HDP */
3606
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3607
		WREG32((0x2c14 + j), 0x00000000);
3608
		WREG32((0x2c18 + j), 0x00000000);
3609
		WREG32((0x2c1c + j), 0x00000000);
3610
		WREG32((0x2c20 + j), 0x00000000);
3611
		WREG32((0x2c24 + j), 0x00000000);
3612
	}
3613
 
3614
	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3615
 
3616
	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3617
 
3618
	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3619
	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3620
 
3621
	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3622
	rdev->config.cik.mem_max_burst_length_bytes = 256;
3623
	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3624
	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3625
	if (rdev->config.cik.mem_row_size_in_kb > 4)
3626
		rdev->config.cik.mem_row_size_in_kb = 4;
3627
	/* XXX use MC settings? */
3628
	rdev->config.cik.shader_engine_tile_size = 32;
3629
	rdev->config.cik.num_gpus = 1;
3630
	rdev->config.cik.multi_gpu_tile_size = 64;
3631
 
3632
	/* fix up row size */
3633
	gb_addr_config &= ~ROW_SIZE_MASK;
3634
	switch (rdev->config.cik.mem_row_size_in_kb) {
3635
	case 1:
3636
	default:
3637
		gb_addr_config |= ROW_SIZE(0);
3638
		break;
3639
	case 2:
3640
		gb_addr_config |= ROW_SIZE(1);
3641
		break;
3642
	case 4:
3643
		gb_addr_config |= ROW_SIZE(2);
3644
		break;
3645
	}
3646
 
3647
	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3648
	 * not have bank info, so create a custom tiling dword.
3649
	 * bits 3:0   num_pipes
3650
	 * bits 7:4   num_banks
3651
	 * bits 11:8  group_size
3652
	 * bits 15:12 row_size
3653
	 */
3654
	rdev->config.cik.tile_config = 0;
3655
	switch (rdev->config.cik.num_tile_pipes) {
3656
	case 1:
3657
		rdev->config.cik.tile_config |= (0 << 0);
3658
		break;
3659
	case 2:
3660
		rdev->config.cik.tile_config |= (1 << 0);
3661
		break;
3662
	case 4:
3663
		rdev->config.cik.tile_config |= (2 << 0);
3664
		break;
3665
	case 8:
3666
	default:
3667
		/* XXX what about 12? */
3668
		rdev->config.cik.tile_config |= (3 << 0);
3669
		break;
3670
	}
3671
	rdev->config.cik.tile_config |=
3672
		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3673
	rdev->config.cik.tile_config |=
3674
		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3675
	rdev->config.cik.tile_config |=
3676
		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3677
 
3678
	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3679
	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3680
	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3681
	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3682
	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3683
	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3684
	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3685
	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3686
 
3687
	cik_tiling_mode_table_init(rdev);
3688
 
3689
	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3690
		     rdev->config.cik.max_sh_per_se,
3691
		     rdev->config.cik.max_backends_per_se);
3692
 
3693
	rdev->config.cik.active_cus = 0;
3694
	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3695
		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3696
				rdev->config.cik.active_cus +=
3697
					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3698
			}
3699
		}
3700
 
3701
	/* set HW defaults for 3D engine */
3702
	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3703
 
5271 serge 3704
	mutex_lock(&rdev->grbm_idx_mutex);
3705
	/*
3706
	 * making sure that the following register writes will be broadcasted
3707
	 * to all the shaders
3708
	 */
3709
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5078 serge 3710
	WREG32(SX_DEBUG_1, 0x20);
3711
 
3712
	WREG32(TA_CNTL_AUX, 0x00010000);
3713
 
3714
	tmp = RREG32(SPI_CONFIG_CNTL);
3715
	tmp |= 0x03000000;
3716
	WREG32(SPI_CONFIG_CNTL, tmp);
3717
 
3718
	WREG32(SQ_CONFIG, 1);
3719
 
3720
	WREG32(DB_DEBUG, 0);
3721
 
3722
	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3723
	tmp |= 0x00000400;
3724
	WREG32(DB_DEBUG2, tmp);
3725
 
3726
	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3727
	tmp |= 0x00020200;
3728
	WREG32(DB_DEBUG3, tmp);
3729
 
3730
	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3731
	tmp |= 0x00018208;
3732
	WREG32(CB_HW_CONTROL, tmp);
3733
 
3734
	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3735
 
3736
	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3737
				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3738
				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3739
				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3740
 
3741
	WREG32(VGT_NUM_INSTANCES, 1);
3742
 
3743
	WREG32(CP_PERFMON_CNTL, 0);
3744
 
3745
	WREG32(SQ_CONFIG, 0);
3746
 
3747
	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3748
					  FORCE_EOV_MAX_REZ_CNT(255)));
3749
 
3750
	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3751
	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3752
 
3753
	WREG32(VGT_GS_VERTEX_REUSE, 16);
3754
	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3755
 
3756
	tmp = RREG32(HDP_MISC_CNTL);
3757
	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3758
	WREG32(HDP_MISC_CNTL, tmp);
3759
 
3760
	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3761
	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3762
 
3763
	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3764
	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
5271 serge 3765
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 3766
 
3767
	udelay(50);
3768
}
3769
 
3770
/*
3771
 * GPU scratch registers helpers function.
3772
 */
3773
/**
3774
 * cik_scratch_init - setup driver info for CP scratch regs
3775
 *
3776
 * @rdev: radeon_device pointer
3777
 *
3778
 * Set up the number and offset of the CP scratch registers.
3779
 * NOTE: use of CP scratch registers is a legacy inferface and
3780
 * is not used by default on newer asics (r6xx+).  On newer asics,
3781
 * memory buffers are used for fences rather than scratch regs.
3782
 */
3783
static void cik_scratch_init(struct radeon_device *rdev)
3784
{
3785
	int i;
3786
 
3787
	rdev->scratch.num_reg = 7;
3788
	rdev->scratch.reg_base = SCRATCH_REG0;
3789
	for (i = 0; i < rdev->scratch.num_reg; i++) {
3790
		rdev->scratch.free[i] = true;
3791
		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3792
	}
3793
}
3794
 
3795
/**
3796
 * cik_ring_test - basic gfx ring test
3797
 *
3798
 * @rdev: radeon_device pointer
3799
 * @ring: radeon_ring structure holding ring information
3800
 *
3801
 * Allocate a scratch register and write to it using the gfx ring (CIK).
3802
 * Provides a basic gfx ring test to verify that the ring is working.
3803
 * Used by cik_cp_gfx_resume();
3804
 * Returns 0 on success, error on failure.
3805
 */
3806
int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807
{
3808
	uint32_t scratch;
3809
	uint32_t tmp = 0;
3810
	unsigned i;
3811
	int r;
3812
 
3813
	r = radeon_scratch_get(rdev, &scratch);
3814
	if (r) {
3815
		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3816
		return r;
3817
	}
3818
	WREG32(scratch, 0xCAFEDEAD);
3819
	r = radeon_ring_lock(rdev, ring, 3);
3820
	if (r) {
3821
		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3822
		radeon_scratch_free(rdev, scratch);
3823
		return r;
3824
	}
3825
	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3826
	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3827
	radeon_ring_write(ring, 0xDEADBEEF);
3828
	radeon_ring_unlock_commit(rdev, ring, false);
3829
 
3830
	for (i = 0; i < rdev->usec_timeout; i++) {
3831
		tmp = RREG32(scratch);
3832
		if (tmp == 0xDEADBEEF)
3833
			break;
3834
		DRM_UDELAY(1);
3835
	}
3836
	if (i < rdev->usec_timeout) {
3837
		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3838
	} else {
3839
		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3840
			  ring->idx, scratch, tmp);
3841
		r = -EINVAL;
3842
	}
3843
	radeon_scratch_free(rdev, scratch);
3844
	return r;
3845
}
3846
 
3847
/**
3848
 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3849
 *
3850
 * @rdev: radeon_device pointer
3851
 * @ridx: radeon ring index
3852
 *
3853
 * Emits an hdp flush on the cp.
3854
 */
3855
static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3856
				       int ridx)
3857
{
3858
	struct radeon_ring *ring = &rdev->ring[ridx];
3859
	u32 ref_and_mask;
3860
 
3861
	switch (ring->idx) {
3862
	case CAYMAN_RING_TYPE_CP1_INDEX:
3863
	case CAYMAN_RING_TYPE_CP2_INDEX:
3864
	default:
3865
		switch (ring->me) {
3866
		case 0:
3867
			ref_and_mask = CP2 << ring->pipe;
3868
			break;
3869
		case 1:
3870
			ref_and_mask = CP6 << ring->pipe;
3871
			break;
3872
		default:
3873
			return;
3874
		}
3875
		break;
3876
	case RADEON_RING_TYPE_GFX_INDEX:
3877
		ref_and_mask = CP0;
3878
		break;
3879
	}
3880
 
3881
	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3882
	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3883
				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3884
				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3885
	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3886
	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3887
	radeon_ring_write(ring, ref_and_mask);
3888
	radeon_ring_write(ring, ref_and_mask);
3889
	radeon_ring_write(ring, 0x20); /* poll interval */
3890
}
3891
 
3892
/**
3893
 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3894
 *
3895
 * @rdev: radeon_device pointer
3896
 * @fence: radeon fence object
3897
 *
3898
 * Emits a fence sequnce number on the gfx ring and flushes
3899
 * GPU caches.
3900
 */
3901
void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3902
			     struct radeon_fence *fence)
3903
{
3904
	struct radeon_ring *ring = &rdev->ring[fence->ring];
3905
	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3906
 
3907
	/* EVENT_WRITE_EOP - flush caches, send int */
3908
	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3909
	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3910
				 EOP_TC_ACTION_EN |
3911
				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3912
				 EVENT_INDEX(5)));
3913
	radeon_ring_write(ring, addr & 0xfffffffc);
3914
	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3915
	radeon_ring_write(ring, fence->seq);
3916
	radeon_ring_write(ring, 0);
3917
}
3918
 
3919
/**
3920
 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3921
 *
3922
 * @rdev: radeon_device pointer
3923
 * @fence: radeon fence object
3924
 *
3925
 * Emits a fence sequnce number on the compute ring and flushes
3926
 * GPU caches.
3927
 */
3928
void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3929
				 struct radeon_fence *fence)
3930
{
3931
	struct radeon_ring *ring = &rdev->ring[fence->ring];
3932
	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3933
 
3934
	/* RELEASE_MEM - flush caches, send int */
3935
	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3936
	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3937
				 EOP_TC_ACTION_EN |
3938
				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3939
				 EVENT_INDEX(5)));
3940
	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3941
	radeon_ring_write(ring, addr & 0xfffffffc);
3942
	radeon_ring_write(ring, upper_32_bits(addr));
3943
	radeon_ring_write(ring, fence->seq);
3944
	radeon_ring_write(ring, 0);
3945
}
3946
 
3947
/**
3948
 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3949
 *
3950
 * @rdev: radeon_device pointer
3951
 * @ring: radeon ring buffer object
3952
 * @semaphore: radeon semaphore object
3953
 * @emit_wait: Is this a sempahore wait?
3954
 *
3955
 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3956
 * from running ahead of semaphore waits.
3957
 */
3958
bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3959
			     struct radeon_ring *ring,
3960
			     struct radeon_semaphore *semaphore,
3961
			     bool emit_wait)
3962
{
3963
	uint64_t addr = semaphore->gpu_addr;
3964
	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3965
 
3966
	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3967
	radeon_ring_write(ring, lower_32_bits(addr));
3968
	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3969
 
3970
	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3971
		/* Prevent the PFP from running ahead of the semaphore wait */
3972
		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3973
		radeon_ring_write(ring, 0x0);
3974
	}
3975
 
3976
	return true;
3977
}
3978
 
3979
/**
3980
 * cik_copy_cpdma - copy pages using the CP DMA engine
3981
 *
3982
 * @rdev: radeon_device pointer
3983
 * @src_offset: src GPU address
3984
 * @dst_offset: dst GPU address
3985
 * @num_gpu_pages: number of GPU pages to xfer
5271 serge 3986
 * @resv: reservation object to sync to
5078 serge 3987
 *
3988
 * Copy GPU paging using the CP DMA engine (CIK+).
3989
 * Used by the radeon ttm implementation to move pages if
3990
 * registered as the asic copy callback.
3991
 */
5271 serge 3992
struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
5078 serge 3993
		   uint64_t src_offset, uint64_t dst_offset,
3994
		   unsigned num_gpu_pages,
5271 serge 3995
				    struct reservation_object *resv)
5078 serge 3996
{
5271 serge 3997
	struct radeon_fence *fence;
3998
	struct radeon_sync sync;
5078 serge 3999
	int ring_index = rdev->asic->copy.blit_ring_index;
4000
	struct radeon_ring *ring = &rdev->ring[ring_index];
4001
	u32 size_in_bytes, cur_size_in_bytes, control;
4002
	int i, num_loops;
4003
	int r = 0;
4004
 
5271 serge 4005
	radeon_sync_create(&sync);
5078 serge 4006
 
4007
	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4008
	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4009
	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4010
	if (r) {
4011
		DRM_ERROR("radeon: moving bo (%d).\n", r);
5271 serge 4012
		radeon_sync_free(rdev, &sync, NULL);
4013
		return ERR_PTR(r);
5078 serge 4014
	}
4015
 
5271 serge 4016
	radeon_sync_resv(rdev, &sync, resv, false);
4017
	radeon_sync_rings(rdev, &sync, ring->idx);
5078 serge 4018
 
4019
	for (i = 0; i < num_loops; i++) {
4020
		cur_size_in_bytes = size_in_bytes;
4021
		if (cur_size_in_bytes > 0x1fffff)
4022
			cur_size_in_bytes = 0x1fffff;
4023
		size_in_bytes -= cur_size_in_bytes;
4024
		control = 0;
4025
		if (size_in_bytes == 0)
4026
			control |= PACKET3_DMA_DATA_CP_SYNC;
4027
		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4028
		radeon_ring_write(ring, control);
4029
		radeon_ring_write(ring, lower_32_bits(src_offset));
4030
		radeon_ring_write(ring, upper_32_bits(src_offset));
4031
		radeon_ring_write(ring, lower_32_bits(dst_offset));
4032
		radeon_ring_write(ring, upper_32_bits(dst_offset));
4033
		radeon_ring_write(ring, cur_size_in_bytes);
4034
		src_offset += cur_size_in_bytes;
4035
		dst_offset += cur_size_in_bytes;
4036
	}
4037
 
5271 serge 4038
	r = radeon_fence_emit(rdev, &fence, ring->idx);
5078 serge 4039
	if (r) {
4040
		radeon_ring_unlock_undo(rdev, ring);
5271 serge 4041
		radeon_sync_free(rdev, &sync, NULL);
4042
		return ERR_PTR(r);
5078 serge 4043
	}
4044
 
4045
	radeon_ring_unlock_commit(rdev, ring, false);
5271 serge 4046
	radeon_sync_free(rdev, &sync, fence);
5078 serge 4047
 
5271 serge 4048
	return fence;
5078 serge 4049
}
4050
 
4051
/*
4052
 * IB stuff
4053
 */
4054
/**
4055
 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4056
 *
4057
 * @rdev: radeon_device pointer
4058
 * @ib: radeon indirect buffer object
4059
 *
4060
 * Emits an DE (drawing engine) or CE (constant engine) IB
4061
 * on the gfx ring.  IBs are usually generated by userspace
4062
 * acceleration drivers and submitted to the kernel for
4063
 * sheduling on the ring.  This function schedules the IB
4064
 * on the gfx ring for execution by the GPU.
4065
 */
4066
void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4067
{
4068
	struct radeon_ring *ring = &rdev->ring[ib->ring];
5271 serge 4069
	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
5078 serge 4070
	u32 header, control = INDIRECT_BUFFER_VALID;
4071
 
4072
	if (ib->is_const_ib) {
4073
		/* set switch buffer packet before const IB */
4074
		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4075
		radeon_ring_write(ring, 0);
4076
 
4077
		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4078
	} else {
4079
		u32 next_rptr;
4080
		if (ring->rptr_save_reg) {
4081
			next_rptr = ring->wptr + 3 + 4;
4082
			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4083
			radeon_ring_write(ring, ((ring->rptr_save_reg -
4084
						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4085
			radeon_ring_write(ring, next_rptr);
4086
		} else if (rdev->wb.enabled) {
4087
			next_rptr = ring->wptr + 5 + 4;
4088
			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4089
			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4090
			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4091
			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4092
			radeon_ring_write(ring, next_rptr);
4093
		}
4094
 
4095
		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4096
	}
4097
 
5271 serge 4098
	control |= ib->length_dw | (vm_id << 24);
5078 serge 4099
 
4100
	radeon_ring_write(ring, header);
4101
	radeon_ring_write(ring,
4102
#ifdef __BIG_ENDIAN
4103
			  (2 << 0) |
4104
#endif
4105
			  (ib->gpu_addr & 0xFFFFFFFC));
4106
	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4107
	radeon_ring_write(ring, control);
4108
}
4109
 
4110
/**
4111
 * cik_ib_test - basic gfx ring IB test
4112
 *
4113
 * @rdev: radeon_device pointer
4114
 * @ring: radeon_ring structure holding ring information
4115
 *
4116
 * Allocate an IB and execute it on the gfx ring (CIK).
4117
 * Provides a basic gfx ring test to verify that IBs are working.
4118
 * Returns 0 on success, error on failure.
4119
 */
4120
int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4121
{
4122
	struct radeon_ib ib;
4123
	uint32_t scratch;
4124
	uint32_t tmp = 0;
4125
	unsigned i;
4126
	int r;
4127
 
4128
	r = radeon_scratch_get(rdev, &scratch);
4129
	if (r) {
4130
		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4131
		return r;
4132
	}
4133
	WREG32(scratch, 0xCAFEDEAD);
4134
	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4135
	if (r) {
4136
		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4137
		radeon_scratch_free(rdev, scratch);
4138
		return r;
4139
	}
4140
	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4141
	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4142
	ib.ptr[2] = 0xDEADBEEF;
4143
	ib.length_dw = 3;
4144
	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4145
	if (r) {
4146
		radeon_scratch_free(rdev, scratch);
4147
		radeon_ib_free(rdev, &ib);
4148
		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4149
		return r;
4150
	}
4151
	r = radeon_fence_wait(ib.fence, false);
4152
	if (r) {
4153
		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4154
		radeon_scratch_free(rdev, scratch);
4155
		radeon_ib_free(rdev, &ib);
4156
		return r;
4157
	}
4158
	for (i = 0; i < rdev->usec_timeout; i++) {
4159
		tmp = RREG32(scratch);
4160
		if (tmp == 0xDEADBEEF)
4161
			break;
4162
		DRM_UDELAY(1);
4163
	}
4164
	if (i < rdev->usec_timeout) {
4165
		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4166
	} else {
4167
		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4168
			  scratch, tmp);
4169
		r = -EINVAL;
4170
	}
4171
	radeon_scratch_free(rdev, scratch);
4172
	radeon_ib_free(rdev, &ib);
4173
	return r;
4174
}
4175
 
4176
/*
4177
 * CP.
4178
 * On CIK, gfx and compute now have independant command processors.
4179
 *
4180
 * GFX
4181
 * Gfx consists of a single ring and can process both gfx jobs and
4182
 * compute jobs.  The gfx CP consists of three microengines (ME):
4183
 * PFP - Pre-Fetch Parser
4184
 * ME - Micro Engine
4185
 * CE - Constant Engine
4186
 * The PFP and ME make up what is considered the Drawing Engine (DE).
4187
 * The CE is an asynchronous engine used for updating buffer desciptors
4188
 * used by the DE so that they can be loaded into cache in parallel
4189
 * while the DE is processing state update packets.
4190
 *
4191
 * Compute
4192
 * The compute CP consists of two microengines (ME):
4193
 * MEC1 - Compute MicroEngine 1
4194
 * MEC2 - Compute MicroEngine 2
4195
 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4196
 * The queues are exposed to userspace and are programmed directly
4197
 * by the compute runtime.
4198
 */
4199
/**
4200
 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4201
 *
4202
 * @rdev: radeon_device pointer
4203
 * @enable: enable or disable the MEs
4204
 *
4205
 * Halts or unhalts the gfx MEs.
4206
 */
4207
static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4208
{
4209
	if (enable)
4210
		WREG32(CP_ME_CNTL, 0);
4211
	else {
4212
		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4213
			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4214
		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4215
		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4216
	}
4217
	udelay(50);
4218
}
4219
 
4220
/**
4221
 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4222
 *
4223
 * @rdev: radeon_device pointer
4224
 *
4225
 * Loads the gfx PFP, ME, and CE ucode.
4226
 * Returns 0 for success, -EINVAL if the ucode is not available.
4227
 */
4228
static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4229
{
4230
	int i;
4231
 
4232
	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4233
		return -EINVAL;
4234
 
4235
	cik_cp_gfx_enable(rdev, false);
4236
 
4237
	if (rdev->new_fw) {
4238
		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4239
			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4240
		const struct gfx_firmware_header_v1_0 *ce_hdr =
4241
			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4242
		const struct gfx_firmware_header_v1_0 *me_hdr =
4243
			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4244
		const __le32 *fw_data;
4245
		u32 fw_size;
4246
 
4247
		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4248
		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4249
		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4250
 
4251
		/* PFP */
4252
		fw_data = (const __le32 *)
4253
			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4254
		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4255
		WREG32(CP_PFP_UCODE_ADDR, 0);
4256
		for (i = 0; i < fw_size; i++)
4257
			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 4258
		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
5078 serge 4259
 
4260
		/* CE */
4261
		fw_data = (const __le32 *)
4262
			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4263
		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4264
		WREG32(CP_CE_UCODE_ADDR, 0);
4265
		for (i = 0; i < fw_size; i++)
4266
			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 4267
		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
5078 serge 4268
 
4269
		/* ME */
4270
		fw_data = (const __be32 *)
4271
			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4272
		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4273
		WREG32(CP_ME_RAM_WADDR, 0);
4274
		for (i = 0; i < fw_size; i++)
4275
			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
5271 serge 4276
		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4277
		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
5078 serge 4278
	} else {
4279
		const __be32 *fw_data;
4280
 
4281
	/* PFP */
4282
	fw_data = (const __be32 *)rdev->pfp_fw->data;
4283
	WREG32(CP_PFP_UCODE_ADDR, 0);
4284
	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4285
		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4286
	WREG32(CP_PFP_UCODE_ADDR, 0);
4287
 
4288
	/* CE */
4289
	fw_data = (const __be32 *)rdev->ce_fw->data;
4290
	WREG32(CP_CE_UCODE_ADDR, 0);
4291
	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4292
		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4293
	WREG32(CP_CE_UCODE_ADDR, 0);
4294
 
4295
	/* ME */
4296
	fw_data = (const __be32 *)rdev->me_fw->data;
4297
	WREG32(CP_ME_RAM_WADDR, 0);
4298
	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4299
		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4300
	WREG32(CP_ME_RAM_WADDR, 0);
4301
	}
4302
 
4303
	return 0;
4304
}
4305
 
4306
/**
4307
 * cik_cp_gfx_start - start the gfx ring
4308
 *
4309
 * @rdev: radeon_device pointer
4310
 *
4311
 * Enables the ring and loads the clear state context and other
4312
 * packets required to init the ring.
4313
 * Returns 0 for success, error for failure.
4314
 */
4315
static int cik_cp_gfx_start(struct radeon_device *rdev)
4316
{
4317
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4318
	int r, i;
4319
 
4320
	/* init the CP */
4321
	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4322
	WREG32(CP_ENDIAN_SWAP, 0);
4323
	WREG32(CP_DEVICE_ID, 1);
4324
 
4325
	cik_cp_gfx_enable(rdev, true);
4326
 
4327
	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4328
	if (r) {
4329
		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4330
		return r;
4331
	}
4332
 
4333
	/* init the CE partitions.  CE only used for gfx on CIK */
4334
	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335
	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
5271 serge 4336
	radeon_ring_write(ring, 0x8000);
4337
	radeon_ring_write(ring, 0x8000);
5078 serge 4338
 
4339
	/* setup clear context state */
4340
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4341
	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4342
 
4343
	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4344
	radeon_ring_write(ring, 0x80000000);
4345
	radeon_ring_write(ring, 0x80000000);
4346
 
4347
	for (i = 0; i < cik_default_size; i++)
4348
		radeon_ring_write(ring, cik_default_state[i]);
4349
 
4350
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4351
	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4352
 
4353
	/* set clear context state */
4354
	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4355
	radeon_ring_write(ring, 0);
4356
 
4357
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4358
	radeon_ring_write(ring, 0x00000316);
4359
	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4360
	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4361
 
4362
	radeon_ring_unlock_commit(rdev, ring, false);
4363
 
4364
	return 0;
4365
}
4366
 
4367
/**
4368
 * cik_cp_gfx_fini - stop the gfx ring
4369
 *
4370
 * @rdev: radeon_device pointer
4371
 *
4372
 * Stop the gfx ring and tear down the driver ring
4373
 * info.
4374
 */
4375
static void cik_cp_gfx_fini(struct radeon_device *rdev)
4376
{
4377
	cik_cp_gfx_enable(rdev, false);
4378
	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4379
}
4380
 
4381
/**
4382
 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4383
 *
4384
 * @rdev: radeon_device pointer
4385
 *
4386
 * Program the location and size of the gfx ring buffer
4387
 * and test it to make sure it's working.
4388
 * Returns 0 for success, error for failure.
4389
 */
4390
static int cik_cp_gfx_resume(struct radeon_device *rdev)
4391
{
4392
	struct radeon_ring *ring;
4393
	u32 tmp;
4394
	u32 rb_bufsz;
4395
	u64 rb_addr;
4396
	int r;
4397
 
4398
	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4399
	if (rdev->family != CHIP_HAWAII)
4400
		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4401
 
4402
	/* Set the write pointer delay */
4403
	WREG32(CP_RB_WPTR_DELAY, 0);
4404
 
4405
	/* set the RB to use vmid 0 */
4406
	WREG32(CP_RB_VMID, 0);
4407
 
4408
	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4409
 
4410
	/* ring 0 - compute and gfx */
4411
	/* Set ring buffer size */
4412
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4413
	rb_bufsz = order_base_2(ring->ring_size / 8);
4414
	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4415
#ifdef __BIG_ENDIAN
4416
	tmp |= BUF_SWAP_32BIT;
4417
#endif
4418
	WREG32(CP_RB0_CNTL, tmp);
4419
 
4420
	/* Initialize the ring buffer's read and write pointers */
4421
	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4422
	ring->wptr = 0;
4423
	WREG32(CP_RB0_WPTR, ring->wptr);
4424
 
4425
	/* set the wb address wether it's enabled or not */
4426
	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4427
	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4428
 
4429
	/* scratch register shadowing is no longer supported */
4430
	WREG32(SCRATCH_UMSK, 0);
4431
 
4432
	if (!rdev->wb.enabled)
4433
		tmp |= RB_NO_UPDATE;
4434
 
4435
	mdelay(1);
4436
	WREG32(CP_RB0_CNTL, tmp);
4437
 
4438
	rb_addr = ring->gpu_addr >> 8;
4439
	WREG32(CP_RB0_BASE, rb_addr);
4440
	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4441
 
4442
	/* start the ring */
4443
	cik_cp_gfx_start(rdev);
4444
	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4445
	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4446
	if (r) {
4447
		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4448
		return r;
4449
	}
4450
 
4451
	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4452
		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4453
 
4454
	return 0;
4455
}
4456
 
4457
u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4458
		     struct radeon_ring *ring)
4459
{
4460
	u32 rptr;
4461
 
4462
	if (rdev->wb.enabled)
4463
		rptr = rdev->wb.wb[ring->rptr_offs/4];
4464
	else
4465
		rptr = RREG32(CP_RB0_RPTR);
4466
 
4467
	return rptr;
4468
}
4469
 
4470
u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4471
		     struct radeon_ring *ring)
4472
{
4473
	u32 wptr;
4474
 
4475
	wptr = RREG32(CP_RB0_WPTR);
4476
 
4477
	return wptr;
4478
}
4479
 
4480
void cik_gfx_set_wptr(struct radeon_device *rdev,
4481
		      struct radeon_ring *ring)
4482
{
4483
	WREG32(CP_RB0_WPTR, ring->wptr);
4484
	(void)RREG32(CP_RB0_WPTR);
4485
}
4486
 
4487
u32 cik_compute_get_rptr(struct radeon_device *rdev,
4488
			 struct radeon_ring *ring)
4489
{
4490
	u32 rptr;
4491
 
4492
	if (rdev->wb.enabled) {
4493
		rptr = rdev->wb.wb[ring->rptr_offs/4];
4494
	} else {
4495
		mutex_lock(&rdev->srbm_mutex);
4496
		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4497
		rptr = RREG32(CP_HQD_PQ_RPTR);
4498
		cik_srbm_select(rdev, 0, 0, 0, 0);
4499
		mutex_unlock(&rdev->srbm_mutex);
4500
	}
4501
 
4502
	return rptr;
4503
}
4504
 
4505
u32 cik_compute_get_wptr(struct radeon_device *rdev,
4506
			 struct radeon_ring *ring)
4507
{
4508
	u32 wptr;
4509
 
4510
	if (rdev->wb.enabled) {
4511
		/* XXX check if swapping is necessary on BE */
4512
		wptr = rdev->wb.wb[ring->wptr_offs/4];
4513
	} else {
4514
		mutex_lock(&rdev->srbm_mutex);
4515
		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4516
		wptr = RREG32(CP_HQD_PQ_WPTR);
4517
		cik_srbm_select(rdev, 0, 0, 0, 0);
4518
		mutex_unlock(&rdev->srbm_mutex);
4519
	}
4520
 
4521
	return wptr;
4522
}
4523
 
4524
void cik_compute_set_wptr(struct radeon_device *rdev,
4525
			  struct radeon_ring *ring)
4526
{
4527
	/* XXX check if swapping is necessary on BE */
4528
	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4529
	WDOORBELL32(ring->doorbell_index, ring->wptr);
4530
}
4531
 
4532
/**
4533
 * cik_cp_compute_enable - enable/disable the compute CP MEs
4534
 *
4535
 * @rdev: radeon_device pointer
4536
 * @enable: enable or disable the MEs
4537
 *
4538
 * Halts or unhalts the compute MEs.
4539
 */
4540
static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4541
{
4542
	if (enable)
4543
		WREG32(CP_MEC_CNTL, 0);
4544
	else {
4545
		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4546
		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4547
		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4548
	}
4549
	udelay(50);
4550
}
4551
 
4552
/**
4553
 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4554
 *
4555
 * @rdev: radeon_device pointer
4556
 *
4557
 * Loads the compute MEC1&2 ucode.
4558
 * Returns 0 for success, -EINVAL if the ucode is not available.
4559
 */
4560
static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4561
{
4562
	int i;
4563
 
4564
	if (!rdev->mec_fw)
4565
		return -EINVAL;
4566
 
4567
	cik_cp_compute_enable(rdev, false);
4568
 
4569
	if (rdev->new_fw) {
4570
		const struct gfx_firmware_header_v1_0 *mec_hdr =
4571
			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4572
		const __le32 *fw_data;
4573
		u32 fw_size;
4574
 
4575
		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4576
 
4577
		/* MEC1 */
4578
		fw_data = (const __le32 *)
4579
			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4580
		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4581
		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4582
		for (i = 0; i < fw_size; i++)
4583
			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 4584
		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
5078 serge 4585
 
4586
		/* MEC2 */
4587
		if (rdev->family == CHIP_KAVERI) {
4588
			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4589
				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4590
 
4591
			fw_data = (const __le32 *)
4592
				(rdev->mec2_fw->data +
4593
				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4594
			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4595
			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4596
			for (i = 0; i < fw_size; i++)
4597
				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 4598
			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
5078 serge 4599
		}
4600
	} else {
4601
		const __be32 *fw_data;
4602
 
4603
	/* MEC1 */
4604
	fw_data = (const __be32 *)rdev->mec_fw->data;
4605
	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4606
	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4607
		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4608
	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4609
 
4610
	if (rdev->family == CHIP_KAVERI) {
4611
		/* MEC2 */
4612
		fw_data = (const __be32 *)rdev->mec_fw->data;
4613
		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4614
		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4615
			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4616
		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4617
	}
4618
	}
4619
 
4620
	return 0;
4621
}
4622
 
4623
/**
4624
 * cik_cp_compute_start - start the compute queues
4625
 *
4626
 * @rdev: radeon_device pointer
4627
 *
4628
 * Enable the compute queues.
4629
 * Returns 0 for success, error for failure.
4630
 */
4631
static int cik_cp_compute_start(struct radeon_device *rdev)
4632
{
4633
	cik_cp_compute_enable(rdev, true);
4634
 
4635
	return 0;
4636
}
4637
 
4638
/**
4639
 * cik_cp_compute_fini - stop the compute queues
4640
 *
4641
 * @rdev: radeon_device pointer
4642
 *
4643
 * Stop the compute queues and tear down the driver queue
4644
 * info.
4645
 */
4646
static void cik_cp_compute_fini(struct radeon_device *rdev)
4647
{
4648
	int i, idx, r;
4649
 
4650
	cik_cp_compute_enable(rdev, false);
4651
 
4652
	for (i = 0; i < 2; i++) {
4653
		if (i == 0)
4654
			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4655
		else
4656
			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4657
 
4658
		if (rdev->ring[idx].mqd_obj) {
4659
			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4660
			if (unlikely(r != 0))
4661
				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4662
 
4663
			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4664
			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4665
 
4666
			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4667
			rdev->ring[idx].mqd_obj = NULL;
4668
		}
4669
	}
4670
}
4671
 
4672
static void cik_mec_fini(struct radeon_device *rdev)
4673
{
4674
	int r;
4675
 
4676
	if (rdev->mec.hpd_eop_obj) {
4677
		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4678
		if (unlikely(r != 0))
4679
			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4680
		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4681
		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4682
 
4683
		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4684
		rdev->mec.hpd_eop_obj = NULL;
4685
	}
4686
}
4687
 
4688
#define MEC_HPD_SIZE 2048
4689
 
4690
static int cik_mec_init(struct radeon_device *rdev)
4691
{
4692
	int r;
4693
	u32 *hpd;
4694
 
4695
	/*
4696
	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4697
	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
5271 serge 4698
	 * Nonetheless, we assign only 1 pipe because all other pipes will
4699
	 * be handled by KFD
5078 serge 4700
	 */
4701
		rdev->mec.num_mec = 1;
5271 serge 4702
	rdev->mec.num_pipe = 1;
5078 serge 4703
	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4704
 
4705
	if (rdev->mec.hpd_eop_obj == NULL) {
4706
		r = radeon_bo_create(rdev,
4707
				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4708
				     PAGE_SIZE, true,
5271 serge 4709
				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
5078 serge 4710
				     &rdev->mec.hpd_eop_obj);
4711
		if (r) {
4712
			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4713
			return r;
4714
		}
4715
	}
4716
 
4717
	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4718
	if (unlikely(r != 0)) {
4719
		cik_mec_fini(rdev);
4720
		return r;
4721
	}
4722
	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4723
			  &rdev->mec.hpd_eop_gpu_addr);
4724
	if (r) {
4725
		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4726
		cik_mec_fini(rdev);
4727
		return r;
4728
	}
4729
	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4730
	if (r) {
4731
		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4732
		cik_mec_fini(rdev);
4733
		return r;
4734
	}
4735
 
4736
	/* clear memory.  Not sure if this is required or not */
4737
	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4738
 
4739
	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4740
	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4741
 
4742
	return 0;
4743
}
4744
 
4745
struct hqd_registers
4746
{
4747
	u32 cp_mqd_base_addr;
4748
	u32 cp_mqd_base_addr_hi;
4749
	u32 cp_hqd_active;
4750
	u32 cp_hqd_vmid;
4751
	u32 cp_hqd_persistent_state;
4752
	u32 cp_hqd_pipe_priority;
4753
	u32 cp_hqd_queue_priority;
4754
	u32 cp_hqd_quantum;
4755
	u32 cp_hqd_pq_base;
4756
	u32 cp_hqd_pq_base_hi;
4757
	u32 cp_hqd_pq_rptr;
4758
	u32 cp_hqd_pq_rptr_report_addr;
4759
	u32 cp_hqd_pq_rptr_report_addr_hi;
4760
	u32 cp_hqd_pq_wptr_poll_addr;
4761
	u32 cp_hqd_pq_wptr_poll_addr_hi;
4762
	u32 cp_hqd_pq_doorbell_control;
4763
	u32 cp_hqd_pq_wptr;
4764
	u32 cp_hqd_pq_control;
4765
	u32 cp_hqd_ib_base_addr;
4766
	u32 cp_hqd_ib_base_addr_hi;
4767
	u32 cp_hqd_ib_rptr;
4768
	u32 cp_hqd_ib_control;
4769
	u32 cp_hqd_iq_timer;
4770
	u32 cp_hqd_iq_rptr;
4771
	u32 cp_hqd_dequeue_request;
4772
	u32 cp_hqd_dma_offload;
4773
	u32 cp_hqd_sema_cmd;
4774
	u32 cp_hqd_msg_type;
4775
	u32 cp_hqd_atomic0_preop_lo;
4776
	u32 cp_hqd_atomic0_preop_hi;
4777
	u32 cp_hqd_atomic1_preop_lo;
4778
	u32 cp_hqd_atomic1_preop_hi;
4779
	u32 cp_hqd_hq_scheduler0;
4780
	u32 cp_hqd_hq_scheduler1;
4781
	u32 cp_mqd_control;
4782
};
4783
 
4784
struct bonaire_mqd
4785
{
4786
	u32 header;
4787
	u32 dispatch_initiator;
4788
	u32 dimensions[3];
4789
	u32 start_idx[3];
4790
	u32 num_threads[3];
4791
	u32 pipeline_stat_enable;
4792
	u32 perf_counter_enable;
4793
	u32 pgm[2];
4794
	u32 tba[2];
4795
	u32 tma[2];
4796
	u32 pgm_rsrc[2];
4797
	u32 vmid;
4798
	u32 resource_limits;
4799
	u32 static_thread_mgmt01[2];
4800
	u32 tmp_ring_size;
4801
	u32 static_thread_mgmt23[2];
4802
	u32 restart[3];
4803
	u32 thread_trace_enable;
4804
	u32 reserved1;
4805
	u32 user_data[16];
4806
	u32 vgtcs_invoke_count[2];
4807
	struct hqd_registers queue_state;
4808
	u32 dequeue_cntr;
4809
	u32 interrupt_queue[64];
4810
};
4811
 
4812
/**
4813
 * cik_cp_compute_resume - setup the compute queue registers
4814
 *
4815
 * @rdev: radeon_device pointer
4816
 *
4817
 * Program the compute queues and test them to make sure they
4818
 * are working.
4819
 * Returns 0 for success, error for failure.
4820
 */
4821
static int cik_cp_compute_resume(struct radeon_device *rdev)
4822
{
5179 serge 4823
	int r, i, j, idx;
5078 serge 4824
	u32 tmp;
4825
	bool use_doorbell = true;
4826
	u64 hqd_gpu_addr;
4827
	u64 mqd_gpu_addr;
4828
	u64 eop_gpu_addr;
4829
	u64 wb_gpu_addr;
4830
	u32 *buf;
4831
	struct bonaire_mqd *mqd;
4832
 
4833
	r = cik_cp_compute_start(rdev);
4834
	if (r)
4835
		return r;
4836
 
4837
	/* fix up chicken bits */
4838
	tmp = RREG32(CP_CPF_DEBUG);
4839
	tmp |= (1 << 23);
4840
	WREG32(CP_CPF_DEBUG, tmp);
4841
 
4842
	/* init the pipes */
4843
	mutex_lock(&rdev->srbm_mutex);
4844
 
5271 serge 4845
	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
5078 serge 4846
 
5271 serge 4847
	cik_srbm_select(rdev, 0, 0, 0, 0);
5078 serge 4848
 
4849
		/* write the EOP addr */
4850
		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4851
		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4852
 
4853
		/* set the VMID assigned */
4854
		WREG32(CP_HPD_EOP_VMID, 0);
4855
 
4856
		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4857
		tmp = RREG32(CP_HPD_EOP_CONTROL);
4858
		tmp &= ~EOP_SIZE_MASK;
4859
		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4860
		WREG32(CP_HPD_EOP_CONTROL, tmp);
5271 serge 4861
 
5078 serge 4862
	mutex_unlock(&rdev->srbm_mutex);
4863
 
4864
	/* init the queues.  Just two for now. */
4865
	for (i = 0; i < 2; i++) {
4866
		if (i == 0)
4867
			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4868
		else
4869
			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4870
 
4871
		if (rdev->ring[idx].mqd_obj == NULL) {
4872
			r = radeon_bo_create(rdev,
4873
					     sizeof(struct bonaire_mqd),
4874
					     PAGE_SIZE, true,
4875
					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
5271 serge 4876
					     NULL, &rdev->ring[idx].mqd_obj);
5078 serge 4877
			if (r) {
4878
				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4879
				return r;
4880
			}
4881
		}
4882
 
4883
		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4884
		if (unlikely(r != 0)) {
4885
			cik_cp_compute_fini(rdev);
4886
			return r;
4887
		}
4888
		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4889
				  &mqd_gpu_addr);
4890
		if (r) {
4891
			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4892
			cik_cp_compute_fini(rdev);
4893
			return r;
4894
		}
4895
		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4896
		if (r) {
4897
			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4898
			cik_cp_compute_fini(rdev);
4899
			return r;
4900
		}
4901
 
4902
		/* init the mqd struct */
4903
		memset(buf, 0, sizeof(struct bonaire_mqd));
4904
 
4905
		mqd = (struct bonaire_mqd *)buf;
4906
		mqd->header = 0xC0310800;
4907
		mqd->static_thread_mgmt01[0] = 0xffffffff;
4908
		mqd->static_thread_mgmt01[1] = 0xffffffff;
4909
		mqd->static_thread_mgmt23[0] = 0xffffffff;
4910
		mqd->static_thread_mgmt23[1] = 0xffffffff;
4911
 
4912
		mutex_lock(&rdev->srbm_mutex);
4913
		cik_srbm_select(rdev, rdev->ring[idx].me,
4914
				rdev->ring[idx].pipe,
4915
				rdev->ring[idx].queue, 0);
4916
 
4917
		/* disable wptr polling */
4918
		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4919
		tmp &= ~WPTR_POLL_EN;
4920
		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4921
 
4922
		/* enable doorbell? */
4923
		mqd->queue_state.cp_hqd_pq_doorbell_control =
4924
			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4925
		if (use_doorbell)
4926
			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4927
		else
4928
			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4929
		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4930
		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4931
 
4932
		/* disable the queue if it's active */
4933
		mqd->queue_state.cp_hqd_dequeue_request = 0;
4934
		mqd->queue_state.cp_hqd_pq_rptr = 0;
4935
		mqd->queue_state.cp_hqd_pq_wptr= 0;
4936
		if (RREG32(CP_HQD_ACTIVE) & 1) {
4937
			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5179 serge 4938
			for (j = 0; j < rdev->usec_timeout; j++) {
5078 serge 4939
				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4940
					break;
4941
				udelay(1);
4942
			}
4943
			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4944
			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4945
			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4946
		}
4947
 
4948
		/* set the pointer to the MQD */
4949
		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4950
		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4951
		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4952
		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4953
		/* set MQD vmid to 0 */
4954
		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4955
		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4956
		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4957
 
4958
		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4959
		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4960
		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4961
		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4962
		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4963
		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4964
 
4965
		/* set up the HQD, this is similar to CP_RB0_CNTL */
4966
		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4967
		mqd->queue_state.cp_hqd_pq_control &=
4968
			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4969
 
4970
		mqd->queue_state.cp_hqd_pq_control |=
4971
			order_base_2(rdev->ring[idx].ring_size / 8);
4972
		mqd->queue_state.cp_hqd_pq_control |=
4973
			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4974
#ifdef __BIG_ENDIAN
4975
		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4976
#endif
4977
		mqd->queue_state.cp_hqd_pq_control &=
4978
			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4979
		mqd->queue_state.cp_hqd_pq_control |=
4980
			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4981
		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4982
 
4983
		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4984
		if (i == 0)
4985
			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4986
		else
4987
			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4988
		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4989
		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4990
		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4991
		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4992
		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4993
 
4994
		/* set the wb address wether it's enabled or not */
4995
		if (i == 0)
4996
			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4997
		else
4998
			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4999
		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5000
		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5001
			upper_32_bits(wb_gpu_addr) & 0xffff;
5002
		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5003
		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5004
		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5005
		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5006
 
5007
		/* enable the doorbell if requested */
5008
		if (use_doorbell) {
5009
			mqd->queue_state.cp_hqd_pq_doorbell_control =
5010
				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5011
			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5012
			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5013
				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5014
			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5015
			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5016
				~(DOORBELL_SOURCE | DOORBELL_HIT);
5017
 
5018
		} else {
5019
			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5020
		}
5021
		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5022
		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5023
 
5024
		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5025
		rdev->ring[idx].wptr = 0;
5026
		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5027
		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5028
		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5029
 
5030
		/* set the vmid for the queue */
5031
		mqd->queue_state.cp_hqd_vmid = 0;
5032
		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5033
 
5034
		/* activate the queue */
5035
		mqd->queue_state.cp_hqd_active = 1;
5036
		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5037
 
5038
		cik_srbm_select(rdev, 0, 0, 0, 0);
5039
		mutex_unlock(&rdev->srbm_mutex);
5040
 
5041
		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5042
		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5043
 
5044
		rdev->ring[idx].ready = true;
5045
		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5046
		if (r)
5047
			rdev->ring[idx].ready = false;
5048
	}
5049
 
5050
	return 0;
5051
}
5052
 
5053
static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5054
{
5055
	cik_cp_gfx_enable(rdev, enable);
5056
	cik_cp_compute_enable(rdev, enable);
5057
}
5058
 
5059
static int cik_cp_load_microcode(struct radeon_device *rdev)
5060
{
5061
	int r;
5062
 
5063
	r = cik_cp_gfx_load_microcode(rdev);
5064
	if (r)
5065
		return r;
5066
	r = cik_cp_compute_load_microcode(rdev);
5067
	if (r)
5068
		return r;
5069
 
5070
	return 0;
5071
}
5072
 
5073
static void cik_cp_fini(struct radeon_device *rdev)
5074
{
5075
	cik_cp_gfx_fini(rdev);
5076
	cik_cp_compute_fini(rdev);
5077
}
5078
 
5079
static int cik_cp_resume(struct radeon_device *rdev)
5080
{
5081
	int r;
5082
 
5083
	cik_enable_gui_idle_interrupt(rdev, false);
5084
 
5085
	r = cik_cp_load_microcode(rdev);
5086
	if (r)
5087
		return r;
5088
 
5089
	r = cik_cp_gfx_resume(rdev);
5090
	if (r)
5091
		return r;
5092
	r = cik_cp_compute_resume(rdev);
5093
	if (r)
5094
		return r;
5095
 
5096
	cik_enable_gui_idle_interrupt(rdev, true);
5097
 
5098
	return 0;
5099
}
5100
 
5101
static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5102
{
5103
	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5104
		RREG32(GRBM_STATUS));
5105
	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5106
		RREG32(GRBM_STATUS2));
5107
	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5108
		RREG32(GRBM_STATUS_SE0));
5109
	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5110
		RREG32(GRBM_STATUS_SE1));
5111
	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5112
		RREG32(GRBM_STATUS_SE2));
5113
	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5114
		RREG32(GRBM_STATUS_SE3));
5115
	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5116
		RREG32(SRBM_STATUS));
5117
	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5118
		RREG32(SRBM_STATUS2));
5119
	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5120
		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5121
	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5122
		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5123
	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5124
	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5125
		 RREG32(CP_STALLED_STAT1));
5126
	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5127
		 RREG32(CP_STALLED_STAT2));
5128
	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5129
		 RREG32(CP_STALLED_STAT3));
5130
	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5131
		 RREG32(CP_CPF_BUSY_STAT));
5132
	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5133
		 RREG32(CP_CPF_STALLED_STAT1));
5134
	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5135
	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5136
	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5137
		 RREG32(CP_CPC_STALLED_STAT1));
5138
	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5139
}
5140
 
5141
/**
5142
 * cik_gpu_check_soft_reset - check which blocks are busy
5143
 *
5144
 * @rdev: radeon_device pointer
5145
 *
5146
 * Check which blocks are busy and return the relevant reset
5147
 * mask to be used by cik_gpu_soft_reset().
5148
 * Returns a mask of the blocks to be reset.
5149
 */
5150
u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5151
{
5152
	u32 reset_mask = 0;
5153
	u32 tmp;
5154
 
5155
	/* GRBM_STATUS */
5156
	tmp = RREG32(GRBM_STATUS);
5157
	if (tmp & (PA_BUSY | SC_BUSY |
5158
		   BCI_BUSY | SX_BUSY |
5159
		   TA_BUSY | VGT_BUSY |
5160
		   DB_BUSY | CB_BUSY |
5161
		   GDS_BUSY | SPI_BUSY |
5162
		   IA_BUSY | IA_BUSY_NO_DMA))
5163
		reset_mask |= RADEON_RESET_GFX;
5164
 
5165
	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5166
		reset_mask |= RADEON_RESET_CP;
5167
 
5168
	/* GRBM_STATUS2 */
5169
	tmp = RREG32(GRBM_STATUS2);
5170
	if (tmp & RLC_BUSY)
5171
		reset_mask |= RADEON_RESET_RLC;
5172
 
5173
	/* SDMA0_STATUS_REG */
5174
	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5175
	if (!(tmp & SDMA_IDLE))
5176
		reset_mask |= RADEON_RESET_DMA;
5177
 
5178
	/* SDMA1_STATUS_REG */
5179
	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5180
	if (!(tmp & SDMA_IDLE))
5181
		reset_mask |= RADEON_RESET_DMA1;
5182
 
5183
	/* SRBM_STATUS2 */
5184
	tmp = RREG32(SRBM_STATUS2);
5185
	if (tmp & SDMA_BUSY)
5186
		reset_mask |= RADEON_RESET_DMA;
5187
 
5188
	if (tmp & SDMA1_BUSY)
5189
		reset_mask |= RADEON_RESET_DMA1;
5190
 
5191
	/* SRBM_STATUS */
5192
	tmp = RREG32(SRBM_STATUS);
5193
 
5194
	if (tmp & IH_BUSY)
5195
		reset_mask |= RADEON_RESET_IH;
5196
 
5197
	if (tmp & SEM_BUSY)
5198
		reset_mask |= RADEON_RESET_SEM;
5199
 
5200
	if (tmp & GRBM_RQ_PENDING)
5201
		reset_mask |= RADEON_RESET_GRBM;
5202
 
5203
	if (tmp & VMC_BUSY)
5204
		reset_mask |= RADEON_RESET_VMC;
5205
 
5206
	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5207
		   MCC_BUSY | MCD_BUSY))
5208
		reset_mask |= RADEON_RESET_MC;
5209
 
5210
	if (evergreen_is_display_hung(rdev))
5211
		reset_mask |= RADEON_RESET_DISPLAY;
5212
 
5213
	/* Skip MC reset as it's mostly likely not hung, just busy */
5214
	if (reset_mask & RADEON_RESET_MC) {
5215
		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5216
		reset_mask &= ~RADEON_RESET_MC;
5217
	}
5218
 
5219
	return reset_mask;
5220
}
5221
 
5222
/**
5223
 * cik_gpu_soft_reset - soft reset GPU
5224
 *
5225
 * @rdev: radeon_device pointer
5226
 * @reset_mask: mask of which blocks to reset
5227
 *
5228
 * Soft reset the blocks specified in @reset_mask.
5229
 */
5230
static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5231
{
5232
	struct evergreen_mc_save save;
5233
	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5234
	u32 tmp;
5235
 
5236
	if (reset_mask == 0)
5237
		return;
5238
 
5239
	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5240
 
5241
	cik_print_gpu_status_regs(rdev);
5242
	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5243
		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5244
	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5245
		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5246
 
5247
	/* disable CG/PG */
5248
	cik_fini_pg(rdev);
5249
	cik_fini_cg(rdev);
5250
 
5251
	/* stop the rlc */
5252
	cik_rlc_stop(rdev);
5253
 
5254
	/* Disable GFX parsing/prefetching */
5255
	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5256
 
5257
	/* Disable MEC parsing/prefetching */
5258
	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5259
 
5260
	if (reset_mask & RADEON_RESET_DMA) {
5261
		/* sdma0 */
5262
		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5263
		tmp |= SDMA_HALT;
5264
		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5265
	}
5266
	if (reset_mask & RADEON_RESET_DMA1) {
5267
		/* sdma1 */
5268
		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5269
		tmp |= SDMA_HALT;
5270
		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5271
	}
5272
 
5273
	evergreen_mc_stop(rdev, &save);
5274
	if (evergreen_mc_wait_for_idle(rdev)) {
5275
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5276
	}
5277
 
5278
	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5279
		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5280
 
5281
	if (reset_mask & RADEON_RESET_CP) {
5282
		grbm_soft_reset |= SOFT_RESET_CP;
5283
 
5284
		srbm_soft_reset |= SOFT_RESET_GRBM;
5285
	}
5286
 
5287
	if (reset_mask & RADEON_RESET_DMA)
5288
		srbm_soft_reset |= SOFT_RESET_SDMA;
5289
 
5290
	if (reset_mask & RADEON_RESET_DMA1)
5291
		srbm_soft_reset |= SOFT_RESET_SDMA1;
5292
 
5293
	if (reset_mask & RADEON_RESET_DISPLAY)
5294
		srbm_soft_reset |= SOFT_RESET_DC;
5295
 
5296
	if (reset_mask & RADEON_RESET_RLC)
5297
		grbm_soft_reset |= SOFT_RESET_RLC;
5298
 
5299
	if (reset_mask & RADEON_RESET_SEM)
5300
		srbm_soft_reset |= SOFT_RESET_SEM;
5301
 
5302
	if (reset_mask & RADEON_RESET_IH)
5303
		srbm_soft_reset |= SOFT_RESET_IH;
5304
 
5305
	if (reset_mask & RADEON_RESET_GRBM)
5306
		srbm_soft_reset |= SOFT_RESET_GRBM;
5307
 
5308
	if (reset_mask & RADEON_RESET_VMC)
5309
		srbm_soft_reset |= SOFT_RESET_VMC;
5310
 
5311
	if (!(rdev->flags & RADEON_IS_IGP)) {
5312
		if (reset_mask & RADEON_RESET_MC)
5313
			srbm_soft_reset |= SOFT_RESET_MC;
5314
	}
5315
 
5316
	if (grbm_soft_reset) {
5317
		tmp = RREG32(GRBM_SOFT_RESET);
5318
		tmp |= grbm_soft_reset;
5319
		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5320
		WREG32(GRBM_SOFT_RESET, tmp);
5321
		tmp = RREG32(GRBM_SOFT_RESET);
5322
 
5323
		udelay(50);
5324
 
5325
		tmp &= ~grbm_soft_reset;
5326
		WREG32(GRBM_SOFT_RESET, tmp);
5327
		tmp = RREG32(GRBM_SOFT_RESET);
5328
	}
5329
 
5330
	if (srbm_soft_reset) {
5331
		tmp = RREG32(SRBM_SOFT_RESET);
5332
		tmp |= srbm_soft_reset;
5333
		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5334
		WREG32(SRBM_SOFT_RESET, tmp);
5335
		tmp = RREG32(SRBM_SOFT_RESET);
5336
 
5337
		udelay(50);
5338
 
5339
		tmp &= ~srbm_soft_reset;
5340
		WREG32(SRBM_SOFT_RESET, tmp);
5341
		tmp = RREG32(SRBM_SOFT_RESET);
5342
	}
5343
 
5344
	/* Wait a little for things to settle down */
5345
	udelay(50);
5346
 
5347
	evergreen_mc_resume(rdev, &save);
5348
	udelay(50);
5349
 
5350
	cik_print_gpu_status_regs(rdev);
5351
}
5352
 
5353
struct kv_reset_save_regs {
5354
	u32 gmcon_reng_execute;
5355
	u32 gmcon_misc;
5356
	u32 gmcon_misc3;
5357
};
5358
 
5359
static void kv_save_regs_for_reset(struct radeon_device *rdev,
5360
				   struct kv_reset_save_regs *save)
5361
{
5362
	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5363
	save->gmcon_misc = RREG32(GMCON_MISC);
5364
	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5365
 
5366
	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5367
	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5368
						STCTRL_STUTTER_EN));
5369
}
5370
 
5371
static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5372
				      struct kv_reset_save_regs *save)
5373
{
5374
	int i;
5375
 
5376
	WREG32(GMCON_PGFSM_WRITE, 0);
5377
	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5378
 
5379
	for (i = 0; i < 5; i++)
5380
		WREG32(GMCON_PGFSM_WRITE, 0);
5381
 
5382
	WREG32(GMCON_PGFSM_WRITE, 0);
5383
	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5384
 
5385
	for (i = 0; i < 5; i++)
5386
		WREG32(GMCON_PGFSM_WRITE, 0);
5387
 
5388
	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5389
	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5390
 
5391
	for (i = 0; i < 5; i++)
5392
		WREG32(GMCON_PGFSM_WRITE, 0);
5393
 
5394
	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5395
	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5396
 
5397
	for (i = 0; i < 5; i++)
5398
		WREG32(GMCON_PGFSM_WRITE, 0);
5399
 
5400
	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5401
	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5402
 
5403
	for (i = 0; i < 5; i++)
5404
		WREG32(GMCON_PGFSM_WRITE, 0);
5405
 
5406
	WREG32(GMCON_PGFSM_WRITE, 0);
5407
	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5408
 
5409
	for (i = 0; i < 5; i++)
5410
		WREG32(GMCON_PGFSM_WRITE, 0);
5411
 
5412
	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5413
	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5414
 
5415
	for (i = 0; i < 5; i++)
5416
		WREG32(GMCON_PGFSM_WRITE, 0);
5417
 
5418
	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5419
	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5420
 
5421
	for (i = 0; i < 5; i++)
5422
		WREG32(GMCON_PGFSM_WRITE, 0);
5423
 
5424
	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5425
	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5426
 
5427
	for (i = 0; i < 5; i++)
5428
		WREG32(GMCON_PGFSM_WRITE, 0);
5429
 
5430
	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5431
	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5432
 
5433
	for (i = 0; i < 5; i++)
5434
		WREG32(GMCON_PGFSM_WRITE, 0);
5435
 
5436
	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5437
	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5438
 
5439
	WREG32(GMCON_MISC3, save->gmcon_misc3);
5440
	WREG32(GMCON_MISC, save->gmcon_misc);
5441
	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5442
}
5443
 
5444
static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5445
{
5446
	struct evergreen_mc_save save;
5447
	struct kv_reset_save_regs kv_save = { 0 };
5448
	u32 tmp, i;
5449
 
5450
	dev_info(rdev->dev, "GPU pci config reset\n");
5451
 
5452
	/* disable dpm? */
5453
 
5454
	/* disable cg/pg */
5455
	cik_fini_pg(rdev);
5456
	cik_fini_cg(rdev);
5457
 
5458
	/* Disable GFX parsing/prefetching */
5459
	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5460
 
5461
	/* Disable MEC parsing/prefetching */
5462
	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5463
 
5464
	/* sdma0 */
5465
	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5466
	tmp |= SDMA_HALT;
5467
	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5468
	/* sdma1 */
5469
	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5470
	tmp |= SDMA_HALT;
5471
	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5472
	/* XXX other engines? */
5473
 
5474
	/* halt the rlc, disable cp internal ints */
5475
	cik_rlc_stop(rdev);
5476
 
5477
	udelay(50);
5478
 
5479
	/* disable mem access */
5480
	evergreen_mc_stop(rdev, &save);
5481
	if (evergreen_mc_wait_for_idle(rdev)) {
5482
		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5483
	}
5484
 
5485
	if (rdev->flags & RADEON_IS_IGP)
5486
		kv_save_regs_for_reset(rdev, &kv_save);
5487
 
5488
	/* disable BM */
5489
	pci_clear_master(rdev->pdev);
5490
	/* reset */
5491
	radeon_pci_config_reset(rdev);
5492
 
5493
	udelay(100);
5494
 
5495
	/* wait for asic to come out of reset */
5496
	for (i = 0; i < rdev->usec_timeout; i++) {
5497
		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5498
			break;
5499
		udelay(1);
5500
	}
5501
 
5502
	/* does asic init need to be run first??? */
5503
	if (rdev->flags & RADEON_IS_IGP)
5504
		kv_restore_regs_for_reset(rdev, &kv_save);
5505
}
5506
 
5507
/**
5508
 * cik_asic_reset - soft reset GPU
5509
 *
5510
 * @rdev: radeon_device pointer
5511
 *
5512
 * Look up which blocks are hung and attempt
5513
 * to reset them.
5514
 * Returns 0 for success.
5515
 */
5516
int cik_asic_reset(struct radeon_device *rdev)
5517
{
5518
	u32 reset_mask;
5519
 
5520
	reset_mask = cik_gpu_check_soft_reset(rdev);
5521
 
5522
	if (reset_mask)
5523
		r600_set_bios_scratch_engine_hung(rdev, true);
5524
 
5525
	/* try soft reset */
5526
	cik_gpu_soft_reset(rdev, reset_mask);
5527
 
5528
	reset_mask = cik_gpu_check_soft_reset(rdev);
5529
 
5530
	/* try pci config reset */
5531
	if (reset_mask && radeon_hard_reset)
5532
		cik_gpu_pci_config_reset(rdev);
5533
 
5534
	reset_mask = cik_gpu_check_soft_reset(rdev);
5535
 
5536
	if (!reset_mask)
5537
		r600_set_bios_scratch_engine_hung(rdev, false);
5538
 
5539
	return 0;
5540
}
5541
 
5542
/**
5543
 * cik_gfx_is_lockup - check if the 3D engine is locked up
5544
 *
5545
 * @rdev: radeon_device pointer
5546
 * @ring: radeon_ring structure holding ring information
5547
 *
5548
 * Check if the 3D engine is locked up (CIK).
5549
 * Returns true if the engine is locked, false if not.
5550
 */
5551
bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5552
{
5553
	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5554
 
5555
	if (!(reset_mask & (RADEON_RESET_GFX |
5556
			    RADEON_RESET_COMPUTE |
5557
			    RADEON_RESET_CP))) {
5558
		radeon_ring_lockup_update(rdev, ring);
5559
		return false;
5560
	}
5561
	return radeon_ring_test_lockup(rdev, ring);
5562
}
5563
 
5564
/* MC */
5565
/**
5566
 * cik_mc_program - program the GPU memory controller
5567
 *
5568
 * @rdev: radeon_device pointer
5569
 *
5570
 * Set the location of vram, gart, and AGP in the GPU's
5571
 * physical address space (CIK).
5572
 */
5573
static void cik_mc_program(struct radeon_device *rdev)
5574
{
5575
	struct evergreen_mc_save save;
5576
	u32 tmp;
5577
	int i, j;
5578
 
5579
	/* Initialize HDP */
5580
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5581
		WREG32((0x2c14 + j), 0x00000000);
5582
		WREG32((0x2c18 + j), 0x00000000);
5583
		WREG32((0x2c1c + j), 0x00000000);
5584
		WREG32((0x2c20 + j), 0x00000000);
5585
		WREG32((0x2c24 + j), 0x00000000);
5586
	}
5587
	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5588
 
5589
	evergreen_mc_stop(rdev, &save);
5590
	if (radeon_mc_wait_for_idle(rdev)) {
5591
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5592
	}
5593
	/* Lockout access through VGA aperture*/
5594
	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5595
	/* Update configuration */
5596
	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5597
	       rdev->mc.vram_start >> 12);
5598
	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5599
	       rdev->mc.vram_end >> 12);
5600
	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5601
	       rdev->vram_scratch.gpu_addr >> 12);
5602
	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5603
	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5604
	WREG32(MC_VM_FB_LOCATION, tmp);
5605
	/* XXX double check these! */
5606
	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5607
	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5608
	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5609
	WREG32(MC_VM_AGP_BASE, 0);
5610
	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5611
	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5612
	if (radeon_mc_wait_for_idle(rdev)) {
5613
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5614
	}
5615
	evergreen_mc_resume(rdev, &save);
5616
	/* we need to own VRAM, so turn off the VGA renderer here
5617
	 * to stop it overwriting our objects */
5618
	rv515_vga_render_disable(rdev);
5619
}
5620
 
5621
/**
5622
 * cik_mc_init - initialize the memory controller driver params
5623
 *
5624
 * @rdev: radeon_device pointer
5625
 *
5626
 * Look up the amount of vram, vram width, and decide how to place
5627
 * vram and gart within the GPU's physical address space (CIK).
5628
 * Returns 0 for success.
5629
 */
5630
static int cik_mc_init(struct radeon_device *rdev)
5631
{
5632
	u32 tmp;
5633
	int chansize, numchan;
5634
 
5635
	/* Get VRAM informations */
5636
	rdev->mc.vram_is_ddr = true;
5637
	tmp = RREG32(MC_ARB_RAMCFG);
5638
	if (tmp & CHANSIZE_MASK) {
5639
		chansize = 64;
5640
	} else {
5641
		chansize = 32;
5642
	}
5643
	tmp = RREG32(MC_SHARED_CHMAP);
5644
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5645
	case 0:
5646
	default:
5647
		numchan = 1;
5648
		break;
5649
	case 1:
5650
		numchan = 2;
5651
		break;
5652
	case 2:
5653
		numchan = 4;
5654
		break;
5655
	case 3:
5656
		numchan = 8;
5657
		break;
5658
	case 4:
5659
		numchan = 3;
5660
		break;
5661
	case 5:
5662
		numchan = 6;
5663
		break;
5664
	case 6:
5665
		numchan = 10;
5666
		break;
5667
	case 7:
5668
		numchan = 12;
5669
		break;
5670
	case 8:
5671
		numchan = 16;
5672
		break;
5673
	}
5674
	rdev->mc.vram_width = numchan * chansize;
5675
	/* Could aper size report 0 ? */
5676
	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5677
	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5678
	/* size in MB on si */
5679
	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5680
	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5681
	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5682
	si_vram_gtt_location(rdev, &rdev->mc);
5683
	radeon_update_bandwidth_info(rdev);
5684
 
5685
	return 0;
5686
}
5687
 
5688
/*
5689
 * GART
5690
 * VMID 0 is the physical GPU addresses as used by the kernel.
5691
 * VMIDs 1-15 are used for userspace clients and are handled
5692
 * by the radeon vm/hsa code.
5693
 */
5694
/**
5695
 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5696
 *
5697
 * @rdev: radeon_device pointer
5698
 *
5699
 * Flush the TLB for the VMID 0 page table (CIK).
5700
 */
5701
void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5702
{
5703
	/* flush hdp cache */
5704
	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5705
 
5706
	/* bits 0-15 are the VM contexts0-15 */
5707
	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5708
}
5709
 
5710
/**
5711
 * cik_pcie_gart_enable - gart enable
5712
 *
5713
 * @rdev: radeon_device pointer
5714
 *
5715
 * This sets up the TLBs, programs the page tables for VMID0,
5716
 * sets up the hw for VMIDs 1-15 which are allocated on
5717
 * demand, and sets up the global locations for the LDS, GDS,
5718
 * and GPUVM for FSA64 clients (CIK).
5719
 * Returns 0 for success, errors for failure.
5720
 */
5721
static int cik_pcie_gart_enable(struct radeon_device *rdev)
5722
{
5723
	int r, i;
5724
 
5725
	if (rdev->gart.robj == NULL) {
5726
		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5727
		return -EINVAL;
5728
	}
5729
	r = radeon_gart_table_vram_pin(rdev);
5730
	if (r)
5731
		return r;
5732
	/* Setup TLB control */
5733
	WREG32(MC_VM_MX_L1_TLB_CNTL,
5734
	       (0xA << 7) |
5735
	       ENABLE_L1_TLB |
5736
	       ENABLE_L1_FRAGMENT_PROCESSING |
5737
	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5738
	       ENABLE_ADVANCED_DRIVER_MODEL |
5739
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5740
	/* Setup L2 cache */
5741
	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5742
	       ENABLE_L2_FRAGMENT_PROCESSING |
5743
	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5744
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5745
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5746
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5747
	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5748
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5749
	       BANK_SELECT(4) |
5750
	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5751
	/* setup context0 */
5752
	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5753
	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5754
	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5755
	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5756
			(u32)(rdev->dummy_page.addr >> 12));
5757
	WREG32(VM_CONTEXT0_CNTL2, 0);
5758
	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5759
				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5760
 
5761
	WREG32(0x15D4, 0);
5762
	WREG32(0x15D8, 0);
5763
	WREG32(0x15DC, 0);
5764
 
5765
	/* restore context1-15 */
5766
	/* set vm size, must be a multiple of 4 */
5767
	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5768
	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5769
	for (i = 1; i < 16; i++) {
5770
		if (i < 8)
5771
			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5772
			       rdev->vm_manager.saved_table_addr[i]);
5773
		else
5774
			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5775
			       rdev->vm_manager.saved_table_addr[i]);
5776
	}
5777
 
5778
	/* enable context1-15 */
5779
	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5780
	       (u32)(rdev->dummy_page.addr >> 12));
5781
	WREG32(VM_CONTEXT1_CNTL2, 4);
5782
	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5783
				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5784
				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5785
				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5786
				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5787
				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5788
				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5789
				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5790
				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5791
				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5792
				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5793
				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5794
				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5795
				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5796
 
5797
	if (rdev->family == CHIP_KAVERI) {
5798
		u32 tmp = RREG32(CHUB_CONTROL);
5799
		tmp &= ~BYPASS_VM;
5800
		WREG32(CHUB_CONTROL, tmp);
5801
	}
5802
 
5803
	/* XXX SH_MEM regs */
5804
	/* where to put LDS, scratch, GPUVM in FSA64 space */
5805
	mutex_lock(&rdev->srbm_mutex);
5806
	for (i = 0; i < 16; i++) {
5807
		cik_srbm_select(rdev, 0, 0, 0, i);
5808
		/* CP and shaders */
5809
		WREG32(SH_MEM_CONFIG, 0);
5810
		WREG32(SH_MEM_APE1_BASE, 1);
5811
		WREG32(SH_MEM_APE1_LIMIT, 0);
5812
		WREG32(SH_MEM_BASES, 0);
5813
		/* SDMA GFX */
5814
		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5815
		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5816
		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5817
		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5818
		/* XXX SDMA RLC - todo */
5819
	}
5820
	cik_srbm_select(rdev, 0, 0, 0, 0);
5821
	mutex_unlock(&rdev->srbm_mutex);
5822
 
5823
	cik_pcie_gart_tlb_flush(rdev);
5824
	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5825
		 (unsigned)(rdev->mc.gtt_size >> 20),
5826
		 (unsigned long long)rdev->gart.table_addr);
5827
	rdev->gart.ready = true;
5828
	return 0;
5829
}
5830
 
5831
/**
5832
 * cik_pcie_gart_disable - gart disable
5833
 *
5834
 * @rdev: radeon_device pointer
5835
 *
5836
 * This disables all VM page table (CIK).
5837
 */
5838
static void cik_pcie_gart_disable(struct radeon_device *rdev)
5839
{
5840
	unsigned i;
5841
 
5842
	for (i = 1; i < 16; ++i) {
5843
		uint32_t reg;
5844
		if (i < 8)
5845
			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5846
		else
5847
			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5848
		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5849
	}
5850
 
5851
	/* Disable all tables */
5852
	WREG32(VM_CONTEXT0_CNTL, 0);
5853
	WREG32(VM_CONTEXT1_CNTL, 0);
5854
	/* Setup TLB control */
5855
	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5856
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5857
	/* Setup L2 cache */
5858
	WREG32(VM_L2_CNTL,
5859
	       ENABLE_L2_FRAGMENT_PROCESSING |
5860
	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5861
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5862
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5863
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5864
	WREG32(VM_L2_CNTL2, 0);
5865
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5866
	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5867
	radeon_gart_table_vram_unpin(rdev);
5868
}
5869
 
5870
/**
5871
 * cik_pcie_gart_fini - vm fini callback
5872
 *
5873
 * @rdev: radeon_device pointer
5874
 *
5875
 * Tears down the driver GART/VM setup (CIK).
5876
 */
5877
static void cik_pcie_gart_fini(struct radeon_device *rdev)
5878
{
5879
	cik_pcie_gart_disable(rdev);
5880
	radeon_gart_table_vram_free(rdev);
5881
	radeon_gart_fini(rdev);
5882
}
5883
 
5884
/* vm parser */
5885
/**
5886
 * cik_ib_parse - vm ib_parse callback
5887
 *
5888
 * @rdev: radeon_device pointer
5889
 * @ib: indirect buffer pointer
5890
 *
5891
 * CIK uses hw IB checking so this is a nop (CIK).
5892
 */
5893
int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5894
{
5895
	return 0;
5896
}
5897
 
5898
/*
5899
 * vm
5900
 * VMID 0 is the physical GPU addresses as used by the kernel.
5901
 * VMIDs 1-15 are used for userspace clients and are handled
5902
 * by the radeon vm/hsa code.
5903
 */
5904
/**
5905
 * cik_vm_init - cik vm init callback
5906
 *
5907
 * @rdev: radeon_device pointer
5908
 *
5909
 * Inits cik specific vm parameters (number of VMs, base of vram for
5910
 * VMIDs 1-15) (CIK).
5911
 * Returns 0 for success.
5912
 */
5913
int cik_vm_init(struct radeon_device *rdev)
5914
{
5271 serge 5915
	/*
5916
	 * number of VMs
5917
	 * VMID 0 is reserved for System
5918
	 * radeon graphics/compute will use VMIDs 1-7
5919
	 * amdkfd will use VMIDs 8-15
5920
	 */
5921
	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5078 serge 5922
	/* base offset of vram pages */
5923
	if (rdev->flags & RADEON_IS_IGP) {
5924
		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5925
		tmp <<= 22;
5926
		rdev->vm_manager.vram_base_offset = tmp;
5927
	} else
5928
		rdev->vm_manager.vram_base_offset = 0;
5929
 
5930
	return 0;
5931
}
5932
 
5933
/**
5934
 * cik_vm_fini - cik vm fini callback
5935
 *
5936
 * @rdev: radeon_device pointer
5937
 *
5938
 * Tear down any asic specific VM setup (CIK).
5939
 */
5940
void cik_vm_fini(struct radeon_device *rdev)
5941
{
5942
}
5943
 
5944
/**
5945
 * cik_vm_decode_fault - print human readable fault info
5946
 *
5947
 * @rdev: radeon_device pointer
5948
 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5949
 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5950
 *
5951
 * Print human readable fault information (CIK).
5952
 */
5953
static void cik_vm_decode_fault(struct radeon_device *rdev,
5954
				u32 status, u32 addr, u32 mc_client)
5955
{
5956
	u32 mc_id;
5957
	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5958
	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5959
	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5960
		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5961
 
5962
	if (rdev->family == CHIP_HAWAII)
5963
		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5964
	else
5965
		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5966
 
5967
	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5968
	       protections, vmid, addr,
5969
	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5970
	       block, mc_client, mc_id);
5971
}
5972
 
5973
/**
5974
 * cik_vm_flush - cik vm flush using the CP
5975
 *
5976
 * @rdev: radeon_device pointer
5977
 *
5978
 * Update the page table base and flush the VM TLB
5979
 * using the CP (CIK).
5980
 */
5271 serge 5981
void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5982
		  unsigned vm_id, uint64_t pd_addr)
5078 serge 5983
{
5271 serge 5984
	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5078 serge 5985
 
5986
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5987
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5988
				 WRITE_DATA_DST_SEL(0)));
5271 serge 5989
	if (vm_id < 8) {
5078 serge 5990
		radeon_ring_write(ring,
5271 serge 5991
				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5078 serge 5992
	} else {
5993
		radeon_ring_write(ring,
5271 serge 5994
				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5078 serge 5995
	}
5996
	radeon_ring_write(ring, 0);
5271 serge 5997
	radeon_ring_write(ring, pd_addr >> 12);
5078 serge 5998
 
5999
	/* update SH_MEM_* regs */
6000
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6001
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6002
				 WRITE_DATA_DST_SEL(0)));
6003
	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6004
	radeon_ring_write(ring, 0);
5271 serge 6005
	radeon_ring_write(ring, VMID(vm_id));
5078 serge 6006
 
6007
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6008
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6009
				 WRITE_DATA_DST_SEL(0)));
6010
	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6011
	radeon_ring_write(ring, 0);
6012
 
6013
	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6014
	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6015
	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6016
	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6017
 
6018
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6019
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6020
				 WRITE_DATA_DST_SEL(0)));
6021
	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6022
	radeon_ring_write(ring, 0);
6023
	radeon_ring_write(ring, VMID(0));
6024
 
6025
	/* HDP flush */
5271 serge 6026
	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5078 serge 6027
 
6028
	/* bits 0-15 are the VM contexts0-15 */
6029
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6030
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6031
				 WRITE_DATA_DST_SEL(0)));
6032
	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6033
	radeon_ring_write(ring, 0);
5271 serge 6034
	radeon_ring_write(ring, 1 << vm_id);
5078 serge 6035
 
6036
	/* compute doesn't have PFP */
6037
	if (usepfp) {
6038
		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6039
		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6040
		radeon_ring_write(ring, 0x0);
6041
	}
6042
}
6043
 
6044
/*
6045
 * RLC
6046
 * The RLC is a multi-purpose microengine that handles a
6047
 * variety of functions, the most important of which is
6048
 * the interrupt controller.
6049
 */
6050
static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6051
					  bool enable)
6052
{
6053
	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6054
 
6055
	if (enable)
6056
		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6057
	else
6058
		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6059
	WREG32(CP_INT_CNTL_RING0, tmp);
6060
}
6061
 
6062
static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6063
{
6064
	u32 tmp;
6065
 
6066
	tmp = RREG32(RLC_LB_CNTL);
6067
	if (enable)
6068
		tmp |= LOAD_BALANCE_ENABLE;
6069
	else
6070
		tmp &= ~LOAD_BALANCE_ENABLE;
6071
	WREG32(RLC_LB_CNTL, tmp);
6072
}
6073
 
6074
static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6075
{
6076
	u32 i, j, k;
6077
	u32 mask;
6078
 
5271 serge 6079
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6080
	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6081
		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6082
			cik_select_se_sh(rdev, i, j);
6083
			for (k = 0; k < rdev->usec_timeout; k++) {
6084
				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6085
					break;
6086
				udelay(1);
6087
			}
6088
		}
6089
	}
6090
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 6091
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6092
 
6093
	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6094
	for (k = 0; k < rdev->usec_timeout; k++) {
6095
		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6096
			break;
6097
		udelay(1);
6098
	}
6099
}
6100
 
6101
static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6102
{
6103
	u32 tmp;
6104
 
6105
	tmp = RREG32(RLC_CNTL);
6106
	if (tmp != rlc)
6107
		WREG32(RLC_CNTL, rlc);
6108
}
6109
 
6110
static u32 cik_halt_rlc(struct radeon_device *rdev)
6111
{
6112
	u32 data, orig;
6113
 
6114
	orig = data = RREG32(RLC_CNTL);
6115
 
6116
	if (data & RLC_ENABLE) {
6117
		u32 i;
6118
 
6119
		data &= ~RLC_ENABLE;
6120
		WREG32(RLC_CNTL, data);
6121
 
6122
		for (i = 0; i < rdev->usec_timeout; i++) {
6123
			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6124
				break;
6125
			udelay(1);
6126
		}
6127
 
6128
		cik_wait_for_rlc_serdes(rdev);
6129
	}
6130
 
6131
	return orig;
6132
}
6133
 
6134
void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6135
{
6136
	u32 tmp, i, mask;
6137
 
6138
	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6139
	WREG32(RLC_GPR_REG2, tmp);
6140
 
6141
	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6142
	for (i = 0; i < rdev->usec_timeout; i++) {
6143
		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6144
			break;
6145
		udelay(1);
6146
	}
6147
 
6148
	for (i = 0; i < rdev->usec_timeout; i++) {
6149
		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6150
			break;
6151
		udelay(1);
6152
	}
6153
}
6154
 
6155
void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6156
{
6157
	u32 tmp;
6158
 
6159
	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6160
	WREG32(RLC_GPR_REG2, tmp);
6161
}
6162
 
6163
/**
6164
 * cik_rlc_stop - stop the RLC ME
6165
 *
6166
 * @rdev: radeon_device pointer
6167
 *
6168
 * Halt the RLC ME (MicroEngine) (CIK).
6169
 */
6170
static void cik_rlc_stop(struct radeon_device *rdev)
6171
{
6172
	WREG32(RLC_CNTL, 0);
6173
 
6174
	cik_enable_gui_idle_interrupt(rdev, false);
6175
 
6176
	cik_wait_for_rlc_serdes(rdev);
6177
}
6178
 
6179
/**
6180
 * cik_rlc_start - start the RLC ME
6181
 *
6182
 * @rdev: radeon_device pointer
6183
 *
6184
 * Unhalt the RLC ME (MicroEngine) (CIK).
6185
 */
6186
static void cik_rlc_start(struct radeon_device *rdev)
6187
{
6188
	WREG32(RLC_CNTL, RLC_ENABLE);
6189
 
6190
	cik_enable_gui_idle_interrupt(rdev, true);
6191
 
6192
	udelay(50);
6193
}
6194
 
6195
/**
6196
 * cik_rlc_resume - setup the RLC hw
6197
 *
6198
 * @rdev: radeon_device pointer
6199
 *
6200
 * Initialize the RLC registers, load the ucode,
6201
 * and start the RLC (CIK).
6202
 * Returns 0 for success, -EINVAL if the ucode is not available.
6203
 */
6204
static int cik_rlc_resume(struct radeon_device *rdev)
6205
{
6206
	u32 i, size, tmp;
6207
 
6208
	if (!rdev->rlc_fw)
6209
		return -EINVAL;
6210
 
6211
	cik_rlc_stop(rdev);
6212
 
6213
	/* disable CG */
6214
	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6215
	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6216
 
6217
	si_rlc_reset(rdev);
6218
 
6219
	cik_init_pg(rdev);
6220
 
6221
	cik_init_cg(rdev);
6222
 
6223
	WREG32(RLC_LB_CNTR_INIT, 0);
6224
	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6225
 
5271 serge 6226
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6227
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6228
	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6229
	WREG32(RLC_LB_PARAMS, 0x00600408);
6230
	WREG32(RLC_LB_CNTL, 0x80000004);
5271 serge 6231
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6232
 
6233
	WREG32(RLC_MC_CNTL, 0);
6234
	WREG32(RLC_UCODE_CNTL, 0);
6235
 
6236
	if (rdev->new_fw) {
6237
		const struct rlc_firmware_header_v1_0 *hdr =
6238
			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6239
		const __le32 *fw_data = (const __le32 *)
6240
			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6241
 
6242
		radeon_ucode_print_rlc_hdr(&hdr->header);
6243
 
6244
		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6245
		WREG32(RLC_GPM_UCODE_ADDR, 0);
6246
		for (i = 0; i < size; i++)
6247
			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 6248
		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5078 serge 6249
	} else {
6250
		const __be32 *fw_data;
6251
 
6252
		switch (rdev->family) {
6253
		case CHIP_BONAIRE:
6254
		case CHIP_HAWAII:
6255
		default:
6256
			size = BONAIRE_RLC_UCODE_SIZE;
6257
			break;
6258
		case CHIP_KAVERI:
6259
			size = KV_RLC_UCODE_SIZE;
6260
			break;
6261
		case CHIP_KABINI:
6262
			size = KB_RLC_UCODE_SIZE;
6263
			break;
6264
		case CHIP_MULLINS:
6265
			size = ML_RLC_UCODE_SIZE;
6266
			break;
6267
		}
6268
 
6269
	fw_data = (const __be32 *)rdev->rlc_fw->data;
6270
		WREG32(RLC_GPM_UCODE_ADDR, 0);
6271
	for (i = 0; i < size; i++)
6272
		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6273
	WREG32(RLC_GPM_UCODE_ADDR, 0);
6274
	}
6275
 
6276
	/* XXX - find out what chips support lbpw */
6277
	cik_enable_lbpw(rdev, false);
6278
 
6279
	if (rdev->family == CHIP_BONAIRE)
6280
		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6281
 
6282
	cik_rlc_start(rdev);
6283
 
6284
	return 0;
6285
}
6286
 
6287
static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6288
{
6289
	u32 data, orig, tmp, tmp2;
6290
 
6291
	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6292
 
6293
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6294
		cik_enable_gui_idle_interrupt(rdev, true);
6295
 
6296
		tmp = cik_halt_rlc(rdev);
6297
 
5271 serge 6298
		mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6299
		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6300
		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6301
		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6302
		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6303
		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5271 serge 6304
		mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6305
 
6306
		cik_update_rlc(rdev, tmp);
6307
 
6308
		data |= CGCG_EN | CGLS_EN;
6309
	} else {
6310
		cik_enable_gui_idle_interrupt(rdev, false);
6311
 
6312
		RREG32(CB_CGTT_SCLK_CTRL);
6313
		RREG32(CB_CGTT_SCLK_CTRL);
6314
		RREG32(CB_CGTT_SCLK_CTRL);
6315
		RREG32(CB_CGTT_SCLK_CTRL);
6316
 
6317
		data &= ~(CGCG_EN | CGLS_EN);
6318
	}
6319
 
6320
	if (orig != data)
6321
		WREG32(RLC_CGCG_CGLS_CTRL, data);
6322
 
6323
}
6324
 
6325
static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6326
{
6327
	u32 data, orig, tmp = 0;
6328
 
6329
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6330
		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6331
			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6332
				orig = data = RREG32(CP_MEM_SLP_CNTL);
6333
				data |= CP_MEM_LS_EN;
6334
				if (orig != data)
6335
					WREG32(CP_MEM_SLP_CNTL, data);
6336
			}
6337
		}
6338
 
6339
		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5271 serge 6340
		data |= 0x00000001;
5078 serge 6341
		data &= 0xfffffffd;
6342
		if (orig != data)
6343
			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6344
 
6345
		tmp = cik_halt_rlc(rdev);
6346
 
5271 serge 6347
		mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6348
		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6349
		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6350
		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6351
		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6352
		WREG32(RLC_SERDES_WR_CTRL, data);
5271 serge 6353
		mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6354
 
6355
		cik_update_rlc(rdev, tmp);
6356
 
6357
		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6358
			orig = data = RREG32(CGTS_SM_CTRL_REG);
6359
			data &= ~SM_MODE_MASK;
6360
			data |= SM_MODE(0x2);
6361
			data |= SM_MODE_ENABLE;
6362
			data &= ~CGTS_OVERRIDE;
6363
			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6364
			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6365
				data &= ~CGTS_LS_OVERRIDE;
6366
			data &= ~ON_MONITOR_ADD_MASK;
6367
			data |= ON_MONITOR_ADD_EN;
6368
			data |= ON_MONITOR_ADD(0x96);
6369
			if (orig != data)
6370
				WREG32(CGTS_SM_CTRL_REG, data);
6371
		}
6372
	} else {
6373
		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5271 serge 6374
		data |= 0x00000003;
5078 serge 6375
		if (orig != data)
6376
			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6377
 
6378
		data = RREG32(RLC_MEM_SLP_CNTL);
6379
		if (data & RLC_MEM_LS_EN) {
6380
			data &= ~RLC_MEM_LS_EN;
6381
			WREG32(RLC_MEM_SLP_CNTL, data);
6382
		}
6383
 
6384
		data = RREG32(CP_MEM_SLP_CNTL);
6385
		if (data & CP_MEM_LS_EN) {
6386
			data &= ~CP_MEM_LS_EN;
6387
			WREG32(CP_MEM_SLP_CNTL, data);
6388
		}
6389
 
6390
		orig = data = RREG32(CGTS_SM_CTRL_REG);
6391
		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6392
		if (orig != data)
6393
			WREG32(CGTS_SM_CTRL_REG, data);
6394
 
6395
		tmp = cik_halt_rlc(rdev);
6396
 
5271 serge 6397
		mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6398
		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6399
		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6400
		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6401
		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6402
		WREG32(RLC_SERDES_WR_CTRL, data);
5271 serge 6403
		mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6404
 
6405
		cik_update_rlc(rdev, tmp);
6406
	}
6407
}
6408
 
6409
static const u32 mc_cg_registers[] =
6410
{
6411
	MC_HUB_MISC_HUB_CG,
6412
	MC_HUB_MISC_SIP_CG,
6413
	MC_HUB_MISC_VM_CG,
6414
	MC_XPB_CLK_GAT,
6415
	ATC_MISC_CG,
6416
	MC_CITF_MISC_WR_CG,
6417
	MC_CITF_MISC_RD_CG,
6418
	MC_CITF_MISC_VM_CG,
6419
	VM_L2_CG,
6420
};
6421
 
6422
static void cik_enable_mc_ls(struct radeon_device *rdev,
6423
			     bool enable)
6424
{
6425
	int i;
6426
	u32 orig, data;
6427
 
6428
	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6429
		orig = data = RREG32(mc_cg_registers[i]);
6430
		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6431
			data |= MC_LS_ENABLE;
6432
		else
6433
			data &= ~MC_LS_ENABLE;
6434
		if (data != orig)
6435
			WREG32(mc_cg_registers[i], data);
6436
	}
6437
}
6438
 
6439
static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6440
			       bool enable)
6441
{
6442
	int i;
6443
	u32 orig, data;
6444
 
6445
	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6446
		orig = data = RREG32(mc_cg_registers[i]);
6447
		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6448
			data |= MC_CG_ENABLE;
6449
		else
6450
			data &= ~MC_CG_ENABLE;
6451
		if (data != orig)
6452
			WREG32(mc_cg_registers[i], data);
6453
	}
6454
}
6455
 
6456
static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6457
				 bool enable)
6458
{
6459
	u32 orig, data;
6460
 
6461
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6462
		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6463
		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6464
	} else {
6465
		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6466
		data |= 0xff000000;
6467
		if (data != orig)
6468
			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6469
 
6470
		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6471
		data |= 0xff000000;
6472
		if (data != orig)
6473
			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6474
	}
6475
}
6476
 
6477
static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6478
				 bool enable)
6479
{
6480
	u32 orig, data;
6481
 
6482
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6483
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6484
		data |= 0x100;
6485
		if (orig != data)
6486
			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6487
 
6488
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6489
		data |= 0x100;
6490
		if (orig != data)
6491
			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6492
	} else {
6493
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6494
		data &= ~0x100;
6495
		if (orig != data)
6496
			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6497
 
6498
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6499
		data &= ~0x100;
6500
		if (orig != data)
6501
			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6502
	}
6503
}
6504
 
6505
static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6506
				bool enable)
6507
{
6508
	u32 orig, data;
6509
 
6510
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6511
		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6512
		data = 0xfff;
6513
		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6514
 
6515
		orig = data = RREG32(UVD_CGC_CTRL);
6516
		data |= DCM;
6517
		if (orig != data)
6518
			WREG32(UVD_CGC_CTRL, data);
6519
	} else {
6520
		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6521
		data &= ~0xfff;
6522
		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6523
 
6524
		orig = data = RREG32(UVD_CGC_CTRL);
6525
		data &= ~DCM;
6526
		if (orig != data)
6527
			WREG32(UVD_CGC_CTRL, data);
6528
	}
6529
}
6530
 
6531
static void cik_enable_bif_mgls(struct radeon_device *rdev,
6532
			       bool enable)
6533
{
6534
	u32 orig, data;
6535
 
6536
	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6537
 
6538
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6539
		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6540
			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6541
	else
6542
		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6543
			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6544
 
6545
	if (orig != data)
6546
		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6547
}
6548
 
6549
static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6550
				bool enable)
6551
{
6552
	u32 orig, data;
6553
 
6554
	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6555
 
6556
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6557
		data &= ~CLOCK_GATING_DIS;
6558
	else
6559
		data |= CLOCK_GATING_DIS;
6560
 
6561
	if (orig != data)
6562
		WREG32(HDP_HOST_PATH_CNTL, data);
6563
}
6564
 
6565
static void cik_enable_hdp_ls(struct radeon_device *rdev,
6566
			      bool enable)
6567
{
6568
	u32 orig, data;
6569
 
6570
	orig = data = RREG32(HDP_MEM_POWER_LS);
6571
 
6572
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6573
		data |= HDP_LS_ENABLE;
6574
	else
6575
		data &= ~HDP_LS_ENABLE;
6576
 
6577
	if (orig != data)
6578
		WREG32(HDP_MEM_POWER_LS, data);
6579
}
6580
 
6581
void cik_update_cg(struct radeon_device *rdev,
6582
		   u32 block, bool enable)
6583
{
6584
 
6585
	if (block & RADEON_CG_BLOCK_GFX) {
6586
		cik_enable_gui_idle_interrupt(rdev, false);
6587
		/* order matters! */
6588
		if (enable) {
6589
			cik_enable_mgcg(rdev, true);
6590
			cik_enable_cgcg(rdev, true);
6591
		} else {
6592
			cik_enable_cgcg(rdev, false);
6593
			cik_enable_mgcg(rdev, false);
6594
		}
6595
		cik_enable_gui_idle_interrupt(rdev, true);
6596
	}
6597
 
6598
	if (block & RADEON_CG_BLOCK_MC) {
6599
		if (!(rdev->flags & RADEON_IS_IGP)) {
6600
			cik_enable_mc_mgcg(rdev, enable);
6601
			cik_enable_mc_ls(rdev, enable);
6602
		}
6603
	}
6604
 
6605
	if (block & RADEON_CG_BLOCK_SDMA) {
6606
		cik_enable_sdma_mgcg(rdev, enable);
6607
		cik_enable_sdma_mgls(rdev, enable);
6608
	}
6609
 
6610
	if (block & RADEON_CG_BLOCK_BIF) {
6611
		cik_enable_bif_mgls(rdev, enable);
6612
	}
6613
 
6614
	if (block & RADEON_CG_BLOCK_UVD) {
6615
		if (rdev->has_uvd)
6616
			cik_enable_uvd_mgcg(rdev, enable);
6617
	}
6618
 
6619
	if (block & RADEON_CG_BLOCK_HDP) {
6620
		cik_enable_hdp_mgcg(rdev, enable);
6621
		cik_enable_hdp_ls(rdev, enable);
6622
	}
6623
 
6624
	if (block & RADEON_CG_BLOCK_VCE) {
6625
		vce_v2_0_enable_mgcg(rdev, enable);
6626
    }
6627
}
6628
 
6629
static void cik_init_cg(struct radeon_device *rdev)
6630
{
6631
 
6632
	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6633
 
6634
	if (rdev->has_uvd)
6635
		si_init_uvd_internal_cg(rdev);
6636
 
6637
	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6638
			     RADEON_CG_BLOCK_SDMA |
6639
			     RADEON_CG_BLOCK_BIF |
6640
			     RADEON_CG_BLOCK_UVD |
6641
			     RADEON_CG_BLOCK_HDP), true);
6642
}
6643
 
6644
static void cik_fini_cg(struct radeon_device *rdev)
6645
{
6646
	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6647
			     RADEON_CG_BLOCK_SDMA |
6648
			     RADEON_CG_BLOCK_BIF |
6649
			     RADEON_CG_BLOCK_UVD |
6650
			     RADEON_CG_BLOCK_HDP), false);
6651
 
6652
	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6653
}
6654
 
6655
static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6656
					  bool enable)
6657
{
6658
	u32 data, orig;
6659
 
6660
	orig = data = RREG32(RLC_PG_CNTL);
6661
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6662
		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6663
	else
6664
		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6665
	if (orig != data)
6666
		WREG32(RLC_PG_CNTL, data);
6667
}
6668
 
6669
static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6670
					  bool enable)
6671
{
6672
	u32 data, orig;
6673
 
6674
	orig = data = RREG32(RLC_PG_CNTL);
6675
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6676
		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6677
	else
6678
		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6679
	if (orig != data)
6680
		WREG32(RLC_PG_CNTL, data);
6681
}
6682
 
6683
static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6684
{
6685
	u32 data, orig;
6686
 
6687
	orig = data = RREG32(RLC_PG_CNTL);
6688
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6689
		data &= ~DISABLE_CP_PG;
6690
	else
6691
		data |= DISABLE_CP_PG;
6692
	if (orig != data)
6693
		WREG32(RLC_PG_CNTL, data);
6694
}
6695
 
6696
static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6697
{
6698
	u32 data, orig;
6699
 
6700
	orig = data = RREG32(RLC_PG_CNTL);
6701
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6702
		data &= ~DISABLE_GDS_PG;
6703
	else
6704
		data |= DISABLE_GDS_PG;
6705
	if (orig != data)
6706
		WREG32(RLC_PG_CNTL, data);
6707
}
6708
 
6709
#define CP_ME_TABLE_SIZE    96
6710
#define CP_ME_TABLE_OFFSET  2048
6711
#define CP_MEC_TABLE_OFFSET 4096
6712
 
6713
void cik_init_cp_pg_table(struct radeon_device *rdev)
6714
{
6715
	volatile u32 *dst_ptr;
6716
	int me, i, max_me = 4;
6717
	u32 bo_offset = 0;
6718
	u32 table_offset, table_size;
6719
 
6720
	if (rdev->family == CHIP_KAVERI)
6721
		max_me = 5;
6722
 
6723
	if (rdev->rlc.cp_table_ptr == NULL)
6724
		return;
6725
 
6726
	/* write the cp table buffer */
6727
	dst_ptr = rdev->rlc.cp_table_ptr;
6728
	for (me = 0; me < max_me; me++) {
6729
		if (rdev->new_fw) {
6730
			const __le32 *fw_data;
6731
			const struct gfx_firmware_header_v1_0 *hdr;
6732
 
6733
			if (me == 0) {
6734
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6735
				fw_data = (const __le32 *)
6736
					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6737
				table_offset = le32_to_cpu(hdr->jt_offset);
6738
				table_size = le32_to_cpu(hdr->jt_size);
6739
			} else if (me == 1) {
6740
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6741
				fw_data = (const __le32 *)
6742
					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6743
				table_offset = le32_to_cpu(hdr->jt_offset);
6744
				table_size = le32_to_cpu(hdr->jt_size);
6745
			} else if (me == 2) {
6746
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6747
				fw_data = (const __le32 *)
6748
					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6749
				table_offset = le32_to_cpu(hdr->jt_offset);
6750
				table_size = le32_to_cpu(hdr->jt_size);
6751
			} else if (me == 3) {
6752
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6753
				fw_data = (const __le32 *)
6754
					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6755
				table_offset = le32_to_cpu(hdr->jt_offset);
6756
				table_size = le32_to_cpu(hdr->jt_size);
6757
			} else {
6758
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6759
				fw_data = (const __le32 *)
6760
					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6761
				table_offset = le32_to_cpu(hdr->jt_offset);
6762
				table_size = le32_to_cpu(hdr->jt_size);
6763
			}
6764
 
6765
			for (i = 0; i < table_size; i ++) {
6766
				dst_ptr[bo_offset + i] =
6767
					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6768
			}
6769
			bo_offset += table_size;
6770
		} else {
6771
			const __be32 *fw_data;
6772
			table_size = CP_ME_TABLE_SIZE;
6773
 
6774
		if (me == 0) {
6775
			fw_data = (const __be32 *)rdev->ce_fw->data;
6776
			table_offset = CP_ME_TABLE_OFFSET;
6777
		} else if (me == 1) {
6778
			fw_data = (const __be32 *)rdev->pfp_fw->data;
6779
			table_offset = CP_ME_TABLE_OFFSET;
6780
		} else if (me == 2) {
6781
			fw_data = (const __be32 *)rdev->me_fw->data;
6782
			table_offset = CP_ME_TABLE_OFFSET;
6783
		} else {
6784
			fw_data = (const __be32 *)rdev->mec_fw->data;
6785
			table_offset = CP_MEC_TABLE_OFFSET;
6786
		}
6787
 
6788
			for (i = 0; i < table_size; i ++) {
6789
				dst_ptr[bo_offset + i] =
6790
					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6791
			}
6792
			bo_offset += table_size;
6793
		}
6794
	}
6795
}
6796
 
6797
static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6798
				bool enable)
6799
{
6800
	u32 data, orig;
6801
 
6802
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6803
		orig = data = RREG32(RLC_PG_CNTL);
6804
		data |= GFX_PG_ENABLE;
6805
		if (orig != data)
6806
			WREG32(RLC_PG_CNTL, data);
6807
 
6808
		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6809
		data |= AUTO_PG_EN;
6810
		if (orig != data)
6811
			WREG32(RLC_AUTO_PG_CTRL, data);
6812
	} else {
6813
		orig = data = RREG32(RLC_PG_CNTL);
6814
		data &= ~GFX_PG_ENABLE;
6815
		if (orig != data)
6816
			WREG32(RLC_PG_CNTL, data);
6817
 
6818
		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6819
		data &= ~AUTO_PG_EN;
6820
		if (orig != data)
6821
			WREG32(RLC_AUTO_PG_CTRL, data);
6822
 
6823
		data = RREG32(DB_RENDER_CONTROL);
6824
	}
6825
}
6826
 
6827
static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6828
{
6829
	u32 mask = 0, tmp, tmp1;
6830
	int i;
6831
 
5271 serge 6832
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6833
	cik_select_se_sh(rdev, se, sh);
6834
	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6835
	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6836
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 6837
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6838
 
6839
	tmp &= 0xffff0000;
6840
 
6841
	tmp |= tmp1;
6842
	tmp >>= 16;
6843
 
6844
	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6845
		mask <<= 1;
6846
		mask |= 1;
6847
	}
6848
 
6849
	return (~tmp) & mask;
6850
}
6851
 
6852
static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6853
{
6854
	u32 i, j, k, active_cu_number = 0;
6855
	u32 mask, counter, cu_bitmap;
6856
	u32 tmp = 0;
6857
 
6858
	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6859
		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6860
			mask = 1;
6861
			cu_bitmap = 0;
6862
			counter = 0;
6863
			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6864
				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6865
					if (counter < 2)
6866
						cu_bitmap |= mask;
6867
					counter ++;
6868
				}
6869
				mask <<= 1;
6870
			}
6871
 
6872
			active_cu_number += counter;
6873
			tmp |= (cu_bitmap << (i * 16 + j * 8));
6874
		}
6875
	}
6876
 
6877
	WREG32(RLC_PG_AO_CU_MASK, tmp);
6878
 
6879
	tmp = RREG32(RLC_MAX_PG_CU);
6880
	tmp &= ~MAX_PU_CU_MASK;
6881
	tmp |= MAX_PU_CU(active_cu_number);
6882
	WREG32(RLC_MAX_PG_CU, tmp);
6883
}
6884
 
6885
static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6886
				       bool enable)
6887
{
6888
	u32 data, orig;
6889
 
6890
	orig = data = RREG32(RLC_PG_CNTL);
6891
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6892
		data |= STATIC_PER_CU_PG_ENABLE;
6893
	else
6894
		data &= ~STATIC_PER_CU_PG_ENABLE;
6895
	if (orig != data)
6896
		WREG32(RLC_PG_CNTL, data);
6897
}
6898
 
6899
static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6900
					bool enable)
6901
{
6902
	u32 data, orig;
6903
 
6904
	orig = data = RREG32(RLC_PG_CNTL);
6905
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6906
		data |= DYN_PER_CU_PG_ENABLE;
6907
	else
6908
		data &= ~DYN_PER_CU_PG_ENABLE;
6909
	if (orig != data)
6910
		WREG32(RLC_PG_CNTL, data);
6911
}
6912
 
6913
#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6914
#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6915
 
6916
static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6917
{
6918
	u32 data, orig;
6919
	u32 i;
6920
 
6921
	if (rdev->rlc.cs_data) {
6922
		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6923
		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6924
		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6925
		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6926
	} else {
6927
		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6928
		for (i = 0; i < 3; i++)
6929
			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6930
	}
6931
	if (rdev->rlc.reg_list) {
6932
		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6933
		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6934
			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6935
	}
6936
 
6937
	orig = data = RREG32(RLC_PG_CNTL);
6938
	data |= GFX_PG_SRC;
6939
	if (orig != data)
6940
		WREG32(RLC_PG_CNTL, data);
6941
 
6942
	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6943
	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6944
 
6945
	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6946
	data &= ~IDLE_POLL_COUNT_MASK;
6947
	data |= IDLE_POLL_COUNT(0x60);
6948
	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6949
 
6950
	data = 0x10101010;
6951
	WREG32(RLC_PG_DELAY, data);
6952
 
6953
	data = RREG32(RLC_PG_DELAY_2);
6954
	data &= ~0xff;
6955
	data |= 0x3;
6956
	WREG32(RLC_PG_DELAY_2, data);
6957
 
6958
	data = RREG32(RLC_AUTO_PG_CTRL);
6959
	data &= ~GRBM_REG_SGIT_MASK;
6960
	data |= GRBM_REG_SGIT(0x700);
6961
	WREG32(RLC_AUTO_PG_CTRL, data);
6962
 
6963
}
6964
 
6965
static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6966
{
6967
	cik_enable_gfx_cgpg(rdev, enable);
6968
	cik_enable_gfx_static_mgpg(rdev, enable);
6969
	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6970
}
6971
 
6972
u32 cik_get_csb_size(struct radeon_device *rdev)
6973
{
6974
	u32 count = 0;
6975
	const struct cs_section_def *sect = NULL;
6976
	const struct cs_extent_def *ext = NULL;
6977
 
6978
	if (rdev->rlc.cs_data == NULL)
6979
		return 0;
6980
 
6981
	/* begin clear state */
6982
	count += 2;
6983
	/* context control state */
6984
	count += 3;
6985
 
6986
	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6987
		for (ext = sect->section; ext->extent != NULL; ++ext) {
6988
			if (sect->id == SECT_CONTEXT)
6989
				count += 2 + ext->reg_count;
6990
			else
6991
				return 0;
6992
		}
6993
	}
6994
	/* pa_sc_raster_config/pa_sc_raster_config1 */
6995
	count += 4;
6996
	/* end clear state */
6997
	count += 2;
6998
	/* clear state */
6999
	count += 2;
7000
 
7001
	return count;
7002
}
7003
 
7004
void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7005
{
7006
	u32 count = 0, i;
7007
	const struct cs_section_def *sect = NULL;
7008
	const struct cs_extent_def *ext = NULL;
7009
 
7010
	if (rdev->rlc.cs_data == NULL)
7011
		return;
7012
	if (buffer == NULL)
7013
		return;
7014
 
7015
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7016
	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7017
 
7018
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7019
	buffer[count++] = cpu_to_le32(0x80000000);
7020
	buffer[count++] = cpu_to_le32(0x80000000);
7021
 
7022
	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7023
		for (ext = sect->section; ext->extent != NULL; ++ext) {
7024
			if (sect->id == SECT_CONTEXT) {
7025
				buffer[count++] =
7026
					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7027
				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7028
				for (i = 0; i < ext->reg_count; i++)
7029
					buffer[count++] = cpu_to_le32(ext->extent[i]);
7030
			} else {
7031
				return;
7032
			}
7033
		}
7034
	}
7035
 
7036
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7037
	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7038
	switch (rdev->family) {
7039
	case CHIP_BONAIRE:
7040
		buffer[count++] = cpu_to_le32(0x16000012);
7041
		buffer[count++] = cpu_to_le32(0x00000000);
7042
		break;
7043
	case CHIP_KAVERI:
7044
		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7045
		buffer[count++] = cpu_to_le32(0x00000000);
7046
		break;
7047
	case CHIP_KABINI:
7048
	case CHIP_MULLINS:
7049
		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7050
		buffer[count++] = cpu_to_le32(0x00000000);
7051
		break;
7052
	case CHIP_HAWAII:
7053
		buffer[count++] = cpu_to_le32(0x3a00161a);
7054
		buffer[count++] = cpu_to_le32(0x0000002e);
7055
		break;
7056
	default:
7057
		buffer[count++] = cpu_to_le32(0x00000000);
7058
		buffer[count++] = cpu_to_le32(0x00000000);
7059
		break;
7060
	}
7061
 
7062
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7063
	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7064
 
7065
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7066
	buffer[count++] = cpu_to_le32(0);
7067
}
7068
 
7069
static void cik_init_pg(struct radeon_device *rdev)
7070
{
7071
	if (rdev->pg_flags) {
7072
		cik_enable_sck_slowdown_on_pu(rdev, true);
7073
		cik_enable_sck_slowdown_on_pd(rdev, true);
7074
		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7075
			cik_init_gfx_cgpg(rdev);
7076
			cik_enable_cp_pg(rdev, true);
7077
			cik_enable_gds_pg(rdev, true);
7078
		}
7079
		cik_init_ao_cu_mask(rdev);
7080
		cik_update_gfx_pg(rdev, true);
7081
	}
7082
}
7083
 
7084
static void cik_fini_pg(struct radeon_device *rdev)
7085
{
7086
	if (rdev->pg_flags) {
7087
		cik_update_gfx_pg(rdev, false);
7088
		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7089
			cik_enable_cp_pg(rdev, false);
7090
			cik_enable_gds_pg(rdev, false);
7091
		}
7092
	}
7093
}
7094
 
7095
/*
7096
 * Interrupts
7097
 * Starting with r6xx, interrupts are handled via a ring buffer.
7098
 * Ring buffers are areas of GPU accessible memory that the GPU
7099
 * writes interrupt vectors into and the host reads vectors out of.
7100
 * There is a rptr (read pointer) that determines where the
7101
 * host is currently reading, and a wptr (write pointer)
7102
 * which determines where the GPU has written.  When the
7103
 * pointers are equal, the ring is idle.  When the GPU
7104
 * writes vectors to the ring buffer, it increments the
7105
 * wptr.  When there is an interrupt, the host then starts
7106
 * fetching commands and processing them until the pointers are
7107
 * equal again at which point it updates the rptr.
7108
 */
7109
 
7110
/**
7111
 * cik_enable_interrupts - Enable the interrupt ring buffer
7112
 *
7113
 * @rdev: radeon_device pointer
7114
 *
7115
 * Enable the interrupt ring buffer (CIK).
7116
 */
7117
static void cik_enable_interrupts(struct radeon_device *rdev)
7118
{
7119
	u32 ih_cntl = RREG32(IH_CNTL);
7120
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7121
 
7122
	ih_cntl |= ENABLE_INTR;
7123
	ih_rb_cntl |= IH_RB_ENABLE;
7124
	WREG32(IH_CNTL, ih_cntl);
7125
	WREG32(IH_RB_CNTL, ih_rb_cntl);
7126
	rdev->ih.enabled = true;
7127
}
7128
 
7129
/**
7130
 * cik_disable_interrupts - Disable the interrupt ring buffer
7131
 *
7132
 * @rdev: radeon_device pointer
7133
 *
7134
 * Disable the interrupt ring buffer (CIK).
7135
 */
7136
static void cik_disable_interrupts(struct radeon_device *rdev)
7137
{
7138
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7139
	u32 ih_cntl = RREG32(IH_CNTL);
7140
 
7141
	ih_rb_cntl &= ~IH_RB_ENABLE;
7142
	ih_cntl &= ~ENABLE_INTR;
7143
	WREG32(IH_RB_CNTL, ih_rb_cntl);
7144
	WREG32(IH_CNTL, ih_cntl);
7145
	/* set rptr, wptr to 0 */
7146
	WREG32(IH_RB_RPTR, 0);
7147
	WREG32(IH_RB_WPTR, 0);
7148
	rdev->ih.enabled = false;
7149
	rdev->ih.rptr = 0;
7150
}
7151
 
7152
/**
7153
 * cik_disable_interrupt_state - Disable all interrupt sources
7154
 *
7155
 * @rdev: radeon_device pointer
7156
 *
7157
 * Clear all interrupt enable bits used by the driver (CIK).
7158
 */
7159
static void cik_disable_interrupt_state(struct radeon_device *rdev)
7160
{
7161
	u32 tmp;
7162
 
7163
	/* gfx ring */
7164
	tmp = RREG32(CP_INT_CNTL_RING0) &
7165
		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7166
	WREG32(CP_INT_CNTL_RING0, tmp);
7167
	/* sdma */
7168
	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7169
	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7170
	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7171
	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7172
	/* compute queues */
7173
	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7174
	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7175
	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7176
	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7177
	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7178
	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7179
	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7180
	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7181
	/* grbm */
7182
	WREG32(GRBM_INT_CNTL, 0);
7183
	/* vline/vblank, etc. */
7184
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7185
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7186
	if (rdev->num_crtc >= 4) {
7187
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7188
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7189
	}
7190
	if (rdev->num_crtc >= 6) {
7191
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7192
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7193
	}
7194
	/* pflip */
7195
	if (rdev->num_crtc >= 2) {
7196
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7197
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7198
	}
7199
	if (rdev->num_crtc >= 4) {
7200
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7201
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7202
	}
7203
	if (rdev->num_crtc >= 6) {
7204
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7205
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7206
	}
7207
 
7208
	/* dac hotplug */
7209
	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7210
 
7211
	/* digital hotplug */
7212
	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7213
	WREG32(DC_HPD1_INT_CONTROL, tmp);
7214
	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7215
	WREG32(DC_HPD2_INT_CONTROL, tmp);
7216
	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7217
	WREG32(DC_HPD3_INT_CONTROL, tmp);
7218
	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7219
	WREG32(DC_HPD4_INT_CONTROL, tmp);
7220
	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7221
	WREG32(DC_HPD5_INT_CONTROL, tmp);
7222
	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7223
	WREG32(DC_HPD6_INT_CONTROL, tmp);
7224
 
7225
}
7226
 
7227
/**
7228
 * cik_irq_init - init and enable the interrupt ring
7229
 *
7230
 * @rdev: radeon_device pointer
7231
 *
7232
 * Allocate a ring buffer for the interrupt controller,
7233
 * enable the RLC, disable interrupts, enable the IH
7234
 * ring buffer and enable it (CIK).
7235
 * Called at device load and reume.
7236
 * Returns 0 for success, errors for failure.
7237
 */
7238
static int cik_irq_init(struct radeon_device *rdev)
7239
{
7240
	int ret = 0;
7241
	int rb_bufsz;
7242
	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7243
 
7244
	/* allocate ring */
7245
	ret = r600_ih_ring_alloc(rdev);
7246
	if (ret)
7247
		return ret;
7248
 
7249
	/* disable irqs */
7250
	cik_disable_interrupts(rdev);
7251
 
7252
	/* init rlc */
7253
	ret = cik_rlc_resume(rdev);
7254
	if (ret) {
7255
		r600_ih_ring_fini(rdev);
7256
		return ret;
7257
	}
7258
 
7259
	/* setup interrupt control */
7260
	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7261
	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7262
	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7263
	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7264
	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7265
	 */
7266
	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7267
	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7268
	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7269
	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7270
 
7271
	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7272
	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7273
 
7274
	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7275
		      IH_WPTR_OVERFLOW_CLEAR |
7276
		      (rb_bufsz << 1));
7277
 
7278
	if (rdev->wb.enabled)
7279
		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7280
 
7281
	/* set the writeback address whether it's enabled or not */
7282
	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7283
	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7284
 
7285
	WREG32(IH_RB_CNTL, ih_rb_cntl);
7286
 
7287
	/* set rptr, wptr to 0 */
7288
	WREG32(IH_RB_RPTR, 0);
7289
	WREG32(IH_RB_WPTR, 0);
7290
 
7291
	/* Default settings for IH_CNTL (disabled at first) */
7292
	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7293
	/* RPTR_REARM only works if msi's are enabled */
7294
	if (rdev->msi_enabled)
7295
		ih_cntl |= RPTR_REARM;
7296
	WREG32(IH_CNTL, ih_cntl);
7297
 
7298
	/* force the active interrupt state to all disabled */
7299
	cik_disable_interrupt_state(rdev);
7300
 
7301
//   pci_set_master(rdev->pdev);
7302
 
7303
	/* enable irqs */
7304
	cik_enable_interrupts(rdev);
7305
 
7306
	return ret;
7307
}
7308
 
7309
/**
7310
 * cik_irq_set - enable/disable interrupt sources
7311
 *
7312
 * @rdev: radeon_device pointer
7313
 *
7314
 * Enable interrupt sources on the GPU (vblanks, hpd,
7315
 * etc.) (CIK).
7316
 * Returns 0 for success, errors for failure.
7317
 */
7318
int cik_irq_set(struct radeon_device *rdev)
7319
{
7320
	u32 cp_int_cntl;
5271 serge 7321
	u32 cp_m1p0;
5078 serge 7322
	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7323
	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7324
	u32 grbm_int_cntl = 0;
7325
	u32 dma_cntl, dma_cntl1;
7326
	u32 thermal_int;
7327
 
7328
	if (!rdev->irq.installed) {
7329
		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7330
		return -EINVAL;
7331
	}
7332
	/* don't enable anything if the ih is disabled */
7333
	if (!rdev->ih.enabled) {
7334
		cik_disable_interrupts(rdev);
7335
		/* force the active interrupt state to all disabled */
7336
		cik_disable_interrupt_state(rdev);
7337
		return 0;
7338
	}
7339
 
7340
	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7341
		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7342
	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7343
 
7344
	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7345
	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7346
	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7347
	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7348
	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7349
	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7350
 
7351
	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7352
	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7353
 
7354
	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7355
 
7356
	if (rdev->flags & RADEON_IS_IGP)
7357
		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7358
			~(THERM_INTH_MASK | THERM_INTL_MASK);
7359
	else
7360
		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7361
			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7362
 
7363
	/* enable CP interrupts on all rings */
7364
	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7365
		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7366
		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7367
	}
7368
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7369
		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7370
		DRM_DEBUG("si_irq_set: sw int cp1\n");
7371
		if (ring->me == 1) {
7372
			switch (ring->pipe) {
7373
			case 0:
7374
				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7375
				break;
7376
			default:
7377
				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7378
				break;
7379
			}
7380
		} else {
7381
			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7382
		}
7383
	}
7384
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7385
		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7386
		DRM_DEBUG("si_irq_set: sw int cp2\n");
7387
		if (ring->me == 1) {
7388
			switch (ring->pipe) {
7389
			case 0:
7390
				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7391
				break;
7392
			default:
7393
				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7394
				break;
7395
			}
7396
		} else {
7397
			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7398
		}
7399
	}
7400
 
7401
	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7402
		DRM_DEBUG("cik_irq_set: sw int dma\n");
7403
		dma_cntl |= TRAP_ENABLE;
7404
	}
7405
 
7406
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7407
		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7408
		dma_cntl1 |= TRAP_ENABLE;
7409
	}
7410
 
7411
	if (rdev->irq.crtc_vblank_int[0] ||
7412
	    atomic_read(&rdev->irq.pflip[0])) {
7413
		DRM_DEBUG("cik_irq_set: vblank 0\n");
7414
		crtc1 |= VBLANK_INTERRUPT_MASK;
7415
	}
7416
	if (rdev->irq.crtc_vblank_int[1] ||
7417
	    atomic_read(&rdev->irq.pflip[1])) {
7418
		DRM_DEBUG("cik_irq_set: vblank 1\n");
7419
		crtc2 |= VBLANK_INTERRUPT_MASK;
7420
	}
7421
	if (rdev->irq.crtc_vblank_int[2] ||
7422
	    atomic_read(&rdev->irq.pflip[2])) {
7423
		DRM_DEBUG("cik_irq_set: vblank 2\n");
7424
		crtc3 |= VBLANK_INTERRUPT_MASK;
7425
	}
7426
	if (rdev->irq.crtc_vblank_int[3] ||
7427
	    atomic_read(&rdev->irq.pflip[3])) {
7428
		DRM_DEBUG("cik_irq_set: vblank 3\n");
7429
		crtc4 |= VBLANK_INTERRUPT_MASK;
7430
	}
7431
	if (rdev->irq.crtc_vblank_int[4] ||
7432
	    atomic_read(&rdev->irq.pflip[4])) {
7433
		DRM_DEBUG("cik_irq_set: vblank 4\n");
7434
		crtc5 |= VBLANK_INTERRUPT_MASK;
7435
	}
7436
	if (rdev->irq.crtc_vblank_int[5] ||
7437
	    atomic_read(&rdev->irq.pflip[5])) {
7438
		DRM_DEBUG("cik_irq_set: vblank 5\n");
7439
		crtc6 |= VBLANK_INTERRUPT_MASK;
7440
	}
7441
	if (rdev->irq.hpd[0]) {
7442
		DRM_DEBUG("cik_irq_set: hpd 1\n");
7443
		hpd1 |= DC_HPDx_INT_EN;
7444
	}
7445
	if (rdev->irq.hpd[1]) {
7446
		DRM_DEBUG("cik_irq_set: hpd 2\n");
7447
		hpd2 |= DC_HPDx_INT_EN;
7448
	}
7449
	if (rdev->irq.hpd[2]) {
7450
		DRM_DEBUG("cik_irq_set: hpd 3\n");
7451
		hpd3 |= DC_HPDx_INT_EN;
7452
	}
7453
	if (rdev->irq.hpd[3]) {
7454
		DRM_DEBUG("cik_irq_set: hpd 4\n");
7455
		hpd4 |= DC_HPDx_INT_EN;
7456
	}
7457
	if (rdev->irq.hpd[4]) {
7458
		DRM_DEBUG("cik_irq_set: hpd 5\n");
7459
		hpd5 |= DC_HPDx_INT_EN;
7460
	}
7461
	if (rdev->irq.hpd[5]) {
7462
		DRM_DEBUG("cik_irq_set: hpd 6\n");
7463
		hpd6 |= DC_HPDx_INT_EN;
7464
	}
7465
 
7466
	if (rdev->irq.dpm_thermal) {
7467
		DRM_DEBUG("dpm thermal\n");
7468
		if (rdev->flags & RADEON_IS_IGP)
7469
			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7470
		else
7471
			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7472
	}
7473
 
7474
	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7475
 
7476
	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7477
	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7478
 
7479
	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7480
 
7481
	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7482
 
7483
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7484
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7485
	if (rdev->num_crtc >= 4) {
7486
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7487
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7488
	}
7489
	if (rdev->num_crtc >= 6) {
7490
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7491
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7492
	}
7493
 
7494
	if (rdev->num_crtc >= 2) {
7495
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7496
		       GRPH_PFLIP_INT_MASK);
7497
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7498
		       GRPH_PFLIP_INT_MASK);
7499
	}
7500
	if (rdev->num_crtc >= 4) {
7501
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7502
		       GRPH_PFLIP_INT_MASK);
7503
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7504
		       GRPH_PFLIP_INT_MASK);
7505
	}
7506
	if (rdev->num_crtc >= 6) {
7507
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7508
		       GRPH_PFLIP_INT_MASK);
7509
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7510
		       GRPH_PFLIP_INT_MASK);
7511
	}
7512
 
7513
	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7514
	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7515
	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7516
	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7517
	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7518
	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7519
 
7520
	if (rdev->flags & RADEON_IS_IGP)
7521
		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7522
	else
7523
		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7524
 
7525
	return 0;
7526
}
7527
 
7528
/**
7529
 * cik_irq_ack - ack interrupt sources
7530
 *
7531
 * @rdev: radeon_device pointer
7532
 *
7533
 * Ack interrupt sources on the GPU (vblanks, hpd,
7534
 * etc.) (CIK).  Certain interrupts sources are sw
7535
 * generated and do not require an explicit ack.
7536
 */
7537
static inline void cik_irq_ack(struct radeon_device *rdev)
7538
{
7539
	u32 tmp;
7540
 
7541
	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7542
	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7543
	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7544
	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7545
	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7546
	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7547
	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7548
 
7549
	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7550
		EVERGREEN_CRTC0_REGISTER_OFFSET);
7551
	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7552
		EVERGREEN_CRTC1_REGISTER_OFFSET);
7553
	if (rdev->num_crtc >= 4) {
7554
		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7555
			EVERGREEN_CRTC2_REGISTER_OFFSET);
7556
		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7557
			EVERGREEN_CRTC3_REGISTER_OFFSET);
7558
	}
7559
	if (rdev->num_crtc >= 6) {
7560
		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7561
			EVERGREEN_CRTC4_REGISTER_OFFSET);
7562
		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7563
			EVERGREEN_CRTC5_REGISTER_OFFSET);
7564
	}
7565
 
7566
	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7567
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7568
		       GRPH_PFLIP_INT_CLEAR);
7569
	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7570
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7571
		       GRPH_PFLIP_INT_CLEAR);
7572
	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7573
		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7574
	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7575
		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7576
	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7577
		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7578
	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7579
		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7580
 
7581
	if (rdev->num_crtc >= 4) {
7582
		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7583
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7584
			       GRPH_PFLIP_INT_CLEAR);
7585
		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7586
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7587
			       GRPH_PFLIP_INT_CLEAR);
7588
		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7589
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7590
		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7591
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7592
		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7593
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7594
		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7595
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7596
	}
7597
 
7598
	if (rdev->num_crtc >= 6) {
7599
		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7600
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7601
			       GRPH_PFLIP_INT_CLEAR);
7602
		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7603
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7604
			       GRPH_PFLIP_INT_CLEAR);
7605
		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7606
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7607
		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7608
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7609
		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7610
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7611
		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7612
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7613
	}
7614
 
7615
	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7616
		tmp = RREG32(DC_HPD1_INT_CONTROL);
7617
		tmp |= DC_HPDx_INT_ACK;
7618
		WREG32(DC_HPD1_INT_CONTROL, tmp);
7619
	}
7620
	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7621
		tmp = RREG32(DC_HPD2_INT_CONTROL);
7622
		tmp |= DC_HPDx_INT_ACK;
7623
		WREG32(DC_HPD2_INT_CONTROL, tmp);
7624
	}
7625
	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7626
		tmp = RREG32(DC_HPD3_INT_CONTROL);
7627
		tmp |= DC_HPDx_INT_ACK;
7628
		WREG32(DC_HPD3_INT_CONTROL, tmp);
7629
	}
7630
	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7631
		tmp = RREG32(DC_HPD4_INT_CONTROL);
7632
		tmp |= DC_HPDx_INT_ACK;
7633
		WREG32(DC_HPD4_INT_CONTROL, tmp);
7634
	}
7635
	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7636
		tmp = RREG32(DC_HPD5_INT_CONTROL);
7637
		tmp |= DC_HPDx_INT_ACK;
7638
		WREG32(DC_HPD5_INT_CONTROL, tmp);
7639
	}
7640
	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7641
		tmp = RREG32(DC_HPD5_INT_CONTROL);
7642
		tmp |= DC_HPDx_INT_ACK;
7643
		WREG32(DC_HPD6_INT_CONTROL, tmp);
7644
	}
7645
}
7646
 
7647
/**
7648
 * cik_irq_disable - disable interrupts
7649
 *
7650
 * @rdev: radeon_device pointer
7651
 *
7652
 * Disable interrupts on the hw (CIK).
7653
 */
7654
static void cik_irq_disable(struct radeon_device *rdev)
7655
{
7656
	cik_disable_interrupts(rdev);
7657
	/* Wait and acknowledge irq */
7658
	mdelay(1);
7659
	cik_irq_ack(rdev);
7660
	cik_disable_interrupt_state(rdev);
7661
}
7662
 
7663
/**
7664
 * cik_irq_disable - disable interrupts for suspend
7665
 *
7666
 * @rdev: radeon_device pointer
7667
 *
7668
 * Disable interrupts and stop the RLC (CIK).
7669
 * Used for suspend.
7670
 */
7671
static void cik_irq_suspend(struct radeon_device *rdev)
7672
{
7673
	cik_irq_disable(rdev);
7674
	cik_rlc_stop(rdev);
7675
}
7676
 
7677
/**
7678
 * cik_irq_fini - tear down interrupt support
7679
 *
7680
 * @rdev: radeon_device pointer
7681
 *
7682
 * Disable interrupts on the hw and free the IH ring
7683
 * buffer (CIK).
7684
 * Used for driver unload.
7685
 */
7686
static void cik_irq_fini(struct radeon_device *rdev)
7687
{
7688
	cik_irq_suspend(rdev);
7689
	r600_ih_ring_fini(rdev);
7690
}
7691
 
7692
/**
7693
 * cik_get_ih_wptr - get the IH ring buffer wptr
7694
 *
7695
 * @rdev: radeon_device pointer
7696
 *
7697
 * Get the IH ring buffer wptr from either the register
7698
 * or the writeback memory buffer (CIK).  Also check for
7699
 * ring buffer overflow and deal with it.
7700
 * Used by cik_irq_process().
7701
 * Returns the value of the wptr.
7702
 */
7703
static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7704
{
7705
	u32 wptr, tmp;
7706
 
7707
	if (rdev->wb.enabled)
7708
		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7709
	else
7710
		wptr = RREG32(IH_RB_WPTR);
7711
 
7712
	if (wptr & RB_OVERFLOW) {
5179 serge 7713
		wptr &= ~RB_OVERFLOW;
5078 serge 7714
		/* When a ring buffer overflow happen start parsing interrupt
7715
		 * from the last not overwritten vector (wptr + 16). Hopefully
7716
		 * this should allow us to catchup.
7717
		 */
5179 serge 7718
		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7719
			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
5078 serge 7720
		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7721
		tmp = RREG32(IH_RB_CNTL);
7722
		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7723
		WREG32(IH_RB_CNTL, tmp);
7724
	}
7725
	return (wptr & rdev->ih.ptr_mask);
7726
}
7727
 
7728
/*        CIK IV Ring
7729
 * Each IV ring entry is 128 bits:
7730
 * [7:0]    - interrupt source id
7731
 * [31:8]   - reserved
7732
 * [59:32]  - interrupt source data
7733
 * [63:60]  - reserved
7734
 * [71:64]  - RINGID
7735
 *            CP:
7736
 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7737
 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7738
 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7739
 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7740
 *            PIPE_ID - ME0 0=3D
7741
 *                    - ME1&2 compute dispatcher (4 pipes each)
7742
 *            SDMA:
7743
 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7744
 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7745
 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7746
 * [79:72]  - VMID
7747
 * [95:80]  - PASID
7748
 * [127:96] - reserved
7749
 */
7750
/**
7751
 * cik_irq_process - interrupt handler
7752
 *
7753
 * @rdev: radeon_device pointer
7754
 *
7755
 * Interrupt hander (CIK).  Walk the IH ring,
7756
 * ack interrupts and schedule work to handle
7757
 * interrupt events.
7758
 * Returns irq process return code.
7759
 */
7760
int cik_irq_process(struct radeon_device *rdev)
7761
{
7762
	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7763
	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7764
	u32 wptr;
7765
	u32 rptr;
7766
	u32 src_id, src_data, ring_id;
7767
	u8 me_id, pipe_id, queue_id;
7768
	u32 ring_index;
7769
	bool queue_hotplug = false;
7770
	bool queue_reset = false;
7771
	u32 addr, status, mc_client;
7772
	bool queue_thermal = false;
7773
 
7774
	if (!rdev->ih.enabled || rdev->shutdown)
7775
		return IRQ_NONE;
7776
 
7777
	wptr = cik_get_ih_wptr(rdev);
7778
 
7779
restart_ih:
7780
	/* is somebody else already processing irqs? */
7781
	if (atomic_xchg(&rdev->ih.lock, 1))
7782
		return IRQ_NONE;
7783
 
7784
	rptr = rdev->ih.rptr;
7785
	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7786
 
7787
	/* Order reading of wptr vs. reading of IH ring data */
7788
	rmb();
7789
 
7790
	/* display interrupts */
7791
	cik_irq_ack(rdev);
7792
 
7793
	while (rptr != wptr) {
7794
		/* wptr/rptr are in bytes! */
7795
		ring_index = rptr / 4;
5271 serge 7796
 
7797
//       radeon_kfd_interrupt(rdev,
7798
//               (const void *) &rdev->ih.ring[ring_index]);
7799
 
5078 serge 7800
		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7801
		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7802
		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7803
 
7804
		switch (src_id) {
7805
		case 1: /* D1 vblank/vline */
7806
			switch (src_data) {
7807
			case 0: /* D1 vblank */
7808
				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7809
					if (rdev->irq.crtc_vblank_int[0]) {
7810
						rdev->pm.vblank_sync = true;
7811
					}
7812
					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7813
					DRM_DEBUG("IH: D1 vblank\n");
7814
				}
7815
				break;
7816
			case 1: /* D1 vline */
7817
				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7818
					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7819
					DRM_DEBUG("IH: D1 vline\n");
7820
				}
7821
				break;
7822
			default:
7823
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7824
				break;
7825
			}
7826
			break;
7827
		case 2: /* D2 vblank/vline */
7828
			switch (src_data) {
7829
			case 0: /* D2 vblank */
7830
				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7831
					if (rdev->irq.crtc_vblank_int[1]) {
7832
						rdev->pm.vblank_sync = true;
7833
					}
7834
					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7835
					DRM_DEBUG("IH: D2 vblank\n");
7836
				}
7837
				break;
7838
			case 1: /* D2 vline */
7839
				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7840
					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7841
					DRM_DEBUG("IH: D2 vline\n");
7842
				}
7843
				break;
7844
			default:
7845
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7846
				break;
7847
			}
7848
			break;
7849
		case 3: /* D3 vblank/vline */
7850
			switch (src_data) {
7851
			case 0: /* D3 vblank */
7852
				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7853
					if (rdev->irq.crtc_vblank_int[2]) {
7854
						rdev->pm.vblank_sync = true;
7855
					}
7856
					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7857
					DRM_DEBUG("IH: D3 vblank\n");
7858
				}
7859
				break;
7860
			case 1: /* D3 vline */
7861
				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7862
					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7863
					DRM_DEBUG("IH: D3 vline\n");
7864
				}
7865
				break;
7866
			default:
7867
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7868
				break;
7869
			}
7870
			break;
7871
		case 4: /* D4 vblank/vline */
7872
			switch (src_data) {
7873
			case 0: /* D4 vblank */
7874
				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7875
					if (rdev->irq.crtc_vblank_int[3]) {
7876
						rdev->pm.vblank_sync = true;
7877
					}
7878
					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7879
					DRM_DEBUG("IH: D4 vblank\n");
7880
				}
7881
				break;
7882
			case 1: /* D4 vline */
7883
				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7884
					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7885
					DRM_DEBUG("IH: D4 vline\n");
7886
				}
7887
				break;
7888
			default:
7889
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7890
				break;
7891
			}
7892
			break;
7893
		case 5: /* D5 vblank/vline */
7894
			switch (src_data) {
7895
			case 0: /* D5 vblank */
7896
				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7897
					if (rdev->irq.crtc_vblank_int[4]) {
7898
						rdev->pm.vblank_sync = true;
7899
					}
7900
					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7901
					DRM_DEBUG("IH: D5 vblank\n");
7902
				}
7903
				break;
7904
			case 1: /* D5 vline */
7905
				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7906
					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7907
					DRM_DEBUG("IH: D5 vline\n");
7908
				}
7909
				break;
7910
			default:
7911
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7912
				break;
7913
			}
7914
			break;
7915
		case 6: /* D6 vblank/vline */
7916
			switch (src_data) {
7917
			case 0: /* D6 vblank */
7918
				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7919
					if (rdev->irq.crtc_vblank_int[5]) {
7920
						rdev->pm.vblank_sync = true;
7921
					}
7922
					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7923
					DRM_DEBUG("IH: D6 vblank\n");
7924
				}
7925
				break;
7926
			case 1: /* D6 vline */
7927
				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7928
					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7929
					DRM_DEBUG("IH: D6 vline\n");
7930
				}
7931
				break;
7932
			default:
7933
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7934
				break;
7935
			}
7936
			break;
7937
		case 8: /* D1 page flip */
7938
		case 10: /* D2 page flip */
7939
		case 12: /* D3 page flip */
7940
		case 14: /* D4 page flip */
7941
		case 16: /* D5 page flip */
7942
		case 18: /* D6 page flip */
7943
			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7944
			break;
7945
		case 42: /* HPD hotplug */
7946
			switch (src_data) {
7947
			case 0:
7948
				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7949
					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7950
					queue_hotplug = true;
7951
					DRM_DEBUG("IH: HPD1\n");
7952
				}
7953
				break;
7954
			case 1:
7955
				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7956
					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7957
					queue_hotplug = true;
7958
					DRM_DEBUG("IH: HPD2\n");
7959
				}
7960
				break;
7961
			case 2:
7962
				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7963
					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7964
					queue_hotplug = true;
7965
					DRM_DEBUG("IH: HPD3\n");
7966
				}
7967
				break;
7968
			case 3:
7969
				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7970
					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7971
					queue_hotplug = true;
7972
					DRM_DEBUG("IH: HPD4\n");
7973
				}
7974
				break;
7975
			case 4:
7976
				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7977
					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7978
					queue_hotplug = true;
7979
					DRM_DEBUG("IH: HPD5\n");
7980
				}
7981
				break;
7982
			case 5:
7983
				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7984
					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7985
					queue_hotplug = true;
7986
					DRM_DEBUG("IH: HPD6\n");
7987
				}
7988
				break;
7989
			default:
7990
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7991
				break;
7992
			}
7993
			break;
7994
		case 124: /* UVD */
7995
			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7996
			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7997
			break;
7998
		case 146:
7999
		case 147:
8000
			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8001
			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8002
			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8003
			/* reset addr and status */
8004
			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8005
			if (addr == 0x0 && status == 0x0)
8006
				break;
8007
			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8008
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8009
				addr);
8010
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8011
				status);
8012
			cik_vm_decode_fault(rdev, status, addr, mc_client);
8013
			break;
8014
		case 167: /* VCE */
8015
			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8016
			switch (src_data) {
8017
			case 0:
8018
				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8019
				break;
8020
			case 1:
8021
				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8022
				break;
8023
			default:
8024
				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8025
				break;
8026
			}
8027
			break;
8028
		case 176: /* GFX RB CP_INT */
8029
		case 177: /* GFX IB CP_INT */
8030
			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8031
			break;
8032
		case 181: /* CP EOP event */
8033
			DRM_DEBUG("IH: CP EOP\n");
8034
			/* XXX check the bitfield order! */
8035
			me_id = (ring_id & 0x60) >> 5;
8036
			pipe_id = (ring_id & 0x18) >> 3;
8037
			queue_id = (ring_id & 0x7) >> 0;
8038
			switch (me_id) {
8039
			case 0:
8040
				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8041
				break;
8042
			case 1:
8043
			case 2:
8044
				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8045
					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8046
				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8047
					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8048
				break;
8049
			}
8050
			break;
8051
		case 184: /* CP Privileged reg access */
8052
			DRM_ERROR("Illegal register access in command stream\n");
8053
			/* XXX check the bitfield order! */
8054
			me_id = (ring_id & 0x60) >> 5;
8055
			pipe_id = (ring_id & 0x18) >> 3;
8056
			queue_id = (ring_id & 0x7) >> 0;
8057
			switch (me_id) {
8058
			case 0:
8059
				/* This results in a full GPU reset, but all we need to do is soft
8060
				 * reset the CP for gfx
8061
				 */
8062
				queue_reset = true;
8063
				break;
8064
			case 1:
8065
				/* XXX compute */
8066
				queue_reset = true;
8067
				break;
8068
			case 2:
8069
				/* XXX compute */
8070
				queue_reset = true;
8071
				break;
8072
			}
8073
			break;
8074
		case 185: /* CP Privileged inst */
8075
			DRM_ERROR("Illegal instruction in command stream\n");
8076
			/* XXX check the bitfield order! */
8077
			me_id = (ring_id & 0x60) >> 5;
8078
			pipe_id = (ring_id & 0x18) >> 3;
8079
			queue_id = (ring_id & 0x7) >> 0;
8080
			switch (me_id) {
8081
			case 0:
8082
				/* This results in a full GPU reset, but all we need to do is soft
8083
				 * reset the CP for gfx
8084
				 */
8085
				queue_reset = true;
8086
				break;
8087
			case 1:
8088
				/* XXX compute */
8089
				queue_reset = true;
8090
				break;
8091
			case 2:
8092
				/* XXX compute */
8093
				queue_reset = true;
8094
				break;
8095
			}
8096
			break;
8097
		case 224: /* SDMA trap event */
8098
			/* XXX check the bitfield order! */
8099
			me_id = (ring_id & 0x3) >> 0;
8100
			queue_id = (ring_id & 0xc) >> 2;
8101
			DRM_DEBUG("IH: SDMA trap\n");
8102
			switch (me_id) {
8103
			case 0:
8104
				switch (queue_id) {
8105
				case 0:
8106
					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8107
					break;
8108
				case 1:
8109
					/* XXX compute */
8110
					break;
8111
				case 2:
8112
					/* XXX compute */
8113
					break;
8114
				}
8115
				break;
8116
			case 1:
8117
				switch (queue_id) {
8118
				case 0:
8119
					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8120
					break;
8121
				case 1:
8122
					/* XXX compute */
8123
					break;
8124
				case 2:
8125
					/* XXX compute */
8126
					break;
8127
				}
8128
				break;
8129
			}
8130
			break;
8131
		case 230: /* thermal low to high */
8132
			DRM_DEBUG("IH: thermal low to high\n");
8133
			rdev->pm.dpm.thermal.high_to_low = false;
8134
			queue_thermal = true;
8135
			break;
8136
		case 231: /* thermal high to low */
8137
			DRM_DEBUG("IH: thermal high to low\n");
8138
			rdev->pm.dpm.thermal.high_to_low = true;
8139
			queue_thermal = true;
8140
			break;
8141
		case 233: /* GUI IDLE */
8142
			DRM_DEBUG("IH: GUI idle\n");
8143
			break;
8144
		case 241: /* SDMA Privileged inst */
8145
		case 247: /* SDMA Privileged inst */
8146
			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8147
			/* XXX check the bitfield order! */
8148
			me_id = (ring_id & 0x3) >> 0;
8149
			queue_id = (ring_id & 0xc) >> 2;
8150
			switch (me_id) {
8151
			case 0:
8152
				switch (queue_id) {
8153
				case 0:
8154
					queue_reset = true;
8155
					break;
8156
				case 1:
8157
					/* XXX compute */
8158
					queue_reset = true;
8159
					break;
8160
				case 2:
8161
					/* XXX compute */
8162
					queue_reset = true;
8163
					break;
8164
				}
8165
				break;
8166
			case 1:
8167
				switch (queue_id) {
8168
				case 0:
8169
					queue_reset = true;
8170
					break;
8171
				case 1:
8172
					/* XXX compute */
8173
					queue_reset = true;
8174
					break;
8175
				case 2:
8176
					/* XXX compute */
8177
					queue_reset = true;
8178
					break;
8179
				}
8180
				break;
8181
			}
8182
			break;
8183
		default:
8184
			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8185
			break;
8186
		}
8187
 
8188
		/* wptr/rptr are in bytes! */
8189
		rptr += 16;
8190
		rptr &= rdev->ih.ptr_mask;
5179 serge 8191
		WREG32(IH_RB_RPTR, rptr);
5078 serge 8192
	}
8193
	rdev->ih.rptr = rptr;
8194
	atomic_set(&rdev->ih.lock, 0);
8195
 
8196
	/* make sure wptr hasn't changed while processing */
8197
	wptr = cik_get_ih_wptr(rdev);
8198
	if (wptr != rptr)
8199
		goto restart_ih;
8200
 
8201
	return IRQ_HANDLED;
8202
}
8203
 
8204
/*
8205
 * startup/shutdown callbacks
8206
 */
8207
/**
8208
 * cik_startup - program the asic to a functional state
8209
 *
8210
 * @rdev: radeon_device pointer
8211
 *
8212
 * Programs the asic to a functional state (CIK).
8213
 * Called by cik_init() and cik_resume().
8214
 * Returns 0 for success, error for failure.
8215
 */
8216
static int cik_startup(struct radeon_device *rdev)
8217
{
8218
	struct radeon_ring *ring;
8219
	u32 nop;
8220
	int r;
8221
 
8222
	/* enable pcie gen2/3 link */
8223
	cik_pcie_gen3_enable(rdev);
8224
	/* enable aspm */
8225
	cik_program_aspm(rdev);
8226
 
8227
	/* scratch needs to be initialized before MC */
8228
	r = r600_vram_scratch_init(rdev);
8229
	if (r)
8230
		return r;
8231
 
8232
	cik_mc_program(rdev);
8233
 
8234
	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8235
		r = ci_mc_load_microcode(rdev);
8236
		if (r) {
8237
			DRM_ERROR("Failed to load MC firmware!\n");
8238
			return r;
8239
		}
8240
	}
8241
 
8242
	r = cik_pcie_gart_enable(rdev);
8243
	if (r)
8244
		return r;
8245
	cik_gpu_init(rdev);
8246
 
8247
	/* allocate rlc buffers */
8248
	if (rdev->flags & RADEON_IS_IGP) {
8249
		if (rdev->family == CHIP_KAVERI) {
8250
			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8251
			rdev->rlc.reg_list_size =
8252
				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8253
		} else {
8254
			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8255
			rdev->rlc.reg_list_size =
8256
				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8257
		}
8258
	}
8259
	rdev->rlc.cs_data = ci_cs_data;
8260
	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8261
	r = sumo_rlc_init(rdev);
8262
	if (r) {
8263
		DRM_ERROR("Failed to init rlc BOs!\n");
8264
		return r;
8265
	}
8266
 
8267
	/* allocate wb buffer */
8268
	r = radeon_wb_init(rdev);
8269
	if (r)
8270
		return r;
8271
 
8272
	/* allocate mec buffers */
8273
	r = cik_mec_init(rdev);
8274
	if (r) {
8275
		DRM_ERROR("Failed to init MEC BOs!\n");
8276
		return r;
8277
	}
8278
 
8279
	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8280
	if (r) {
8281
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8282
		return r;
8283
	}
8284
 
8285
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8286
	if (r) {
8287
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8288
		return r;
8289
	}
8290
 
8291
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8292
	if (r) {
8293
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8294
		return r;
8295
	}
8296
 
8297
	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8298
	if (r) {
8299
		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8300
		return r;
8301
	}
8302
 
8303
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8304
	if (r) {
8305
		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8306
		return r;
8307
	}
8308
 
8309
	r = radeon_uvd_resume(rdev);
8310
	if (!r) {
8311
		r = uvd_v4_2_resume(rdev);
8312
		if (!r) {
8313
			r = radeon_fence_driver_start_ring(rdev,
8314
							   R600_RING_TYPE_UVD_INDEX);
8315
			if (r)
8316
				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8317
		}
8318
	}
8319
	if (r)
8320
		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8321
 
8322
	if (r) {
8323
		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8324
		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8325
		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8326
	}
8327
 
8328
	/* Enable IRQ */
8329
	if (!rdev->irq.installed) {
8330
		r = radeon_irq_kms_init(rdev);
8331
		if (r)
8332
			return r;
8333
	}
8334
 
8335
	r = cik_irq_init(rdev);
8336
	if (r) {
8337
		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8338
//       radeon_irq_kms_fini(rdev);
8339
		return r;
8340
	}
8341
	cik_irq_set(rdev);
8342
 
8343
	if (rdev->family == CHIP_HAWAII) {
8344
		if (rdev->new_fw)
8345
			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8346
		else
8347
			nop = RADEON_CP_PACKET2;
8348
	} else {
8349
		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8350
	}
8351
 
8352
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8353
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8354
			     nop);
8355
	if (r)
8356
		return r;
8357
 
8358
	/* set up the compute queues */
8359
	/* type-2 packets are deprecated on MEC, use type-3 instead */
8360
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8361
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8362
			     nop);
8363
	if (r)
8364
		return r;
8365
	ring->me = 1; /* first MEC */
8366
	ring->pipe = 0; /* first pipe */
8367
	ring->queue = 0; /* first queue */
8368
	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8369
 
8370
	/* type-2 packets are deprecated on MEC, use type-3 instead */
8371
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8372
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8373
			     nop);
8374
	if (r)
8375
		return r;
8376
	/* dGPU only have 1 MEC */
8377
	ring->me = 1; /* first MEC */
8378
	ring->pipe = 0; /* first pipe */
8379
	ring->queue = 1; /* second queue */
8380
	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8381
 
8382
	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8383
	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8384
			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8385
	if (r)
8386
		return r;
8387
 
8388
	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8389
	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8390
			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8391
	if (r)
8392
		return r;
8393
 
8394
	r = cik_cp_resume(rdev);
8395
	if (r)
8396
		return r;
8397
 
8398
	r = cik_sdma_resume(rdev);
8399
	if (r)
8400
		return r;
8401
 
8402
	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8403
	if (ring->ring_size) {
8404
		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8405
				     RADEON_CP_PACKET2);
8406
		if (!r)
8407
			r = uvd_v1_0_init(rdev);
8408
		if (r)
8409
			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8410
	}
8411
	r = radeon_ib_pool_init(rdev);
8412
	if (r) {
8413
		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8414
		return r;
8415
	}
8416
 
8417
	r = radeon_vm_manager_init(rdev);
8418
	if (r) {
8419
		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8420
		return r;
8421
	}
8422
 
5271 serge 8423
//   r = radeon_kfd_resume(rdev);
8424
//   if (r)
8425
//       return r;
8426
 
5078 serge 8427
	return 0;
8428
}
8429
 
8430
 
8431
/* Plan is to move initialization in that function and use
8432
 * helper function so that radeon_device_init pretty much
8433
 * do nothing more than calling asic specific function. This
8434
 * should also allow to remove a bunch of callback function
8435
 * like vram_info.
8436
 */
8437
/**
8438
 * cik_init - asic specific driver and hw init
8439
 *
8440
 * @rdev: radeon_device pointer
8441
 *
8442
 * Setup asic specific driver variables and program the hw
8443
 * to a functional state (CIK).
8444
 * Called at driver startup.
8445
 * Returns 0 for success, errors for failure.
8446
 */
8447
int cik_init(struct radeon_device *rdev)
8448
{
8449
	struct radeon_ring *ring;
8450
	int r;
8451
 
8452
	/* Read BIOS */
8453
	if (!radeon_get_bios(rdev)) {
8454
		if (ASIC_IS_AVIVO(rdev))
8455
			return -EINVAL;
8456
	}
8457
	/* Must be an ATOMBIOS */
8458
	if (!rdev->is_atom_bios) {
8459
		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8460
		return -EINVAL;
8461
	}
8462
	r = radeon_atombios_init(rdev);
8463
	if (r)
8464
		return r;
8465
 
8466
	/* Post card if necessary */
8467
	if (!radeon_card_posted(rdev)) {
8468
		if (!rdev->bios) {
8469
			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8470
			return -EINVAL;
8471
		}
8472
		DRM_INFO("GPU not posted. posting now...\n");
8473
		atom_asic_init(rdev->mode_info.atom_context);
8474
	}
8475
	/* init golden registers */
8476
	cik_init_golden_registers(rdev);
8477
	/* Initialize scratch registers */
8478
	cik_scratch_init(rdev);
8479
	/* Initialize surface registers */
8480
	radeon_surface_init(rdev);
8481
	/* Initialize clocks */
8482
	radeon_get_clock_info(rdev->ddev);
8483
 
8484
	/* Fence driver */
8485
	r = radeon_fence_driver_init(rdev);
8486
	if (r)
8487
		return r;
8488
 
8489
	/* initialize memory controller */
8490
	r = cik_mc_init(rdev);
8491
	if (r)
8492
		return r;
8493
	/* Memory manager */
8494
	r = radeon_bo_init(rdev);
8495
	if (r)
8496
		return r;
8497
 
8498
	if (rdev->flags & RADEON_IS_IGP) {
8499
		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8500
		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8501
			r = cik_init_microcode(rdev);
8502
			if (r) {
8503
				DRM_ERROR("Failed to load firmware!\n");
8504
				return r;
8505
			}
8506
		}
8507
	} else {
8508
		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8509
		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8510
		    !rdev->mc_fw) {
8511
			r = cik_init_microcode(rdev);
8512
			if (r) {
8513
				DRM_ERROR("Failed to load firmware!\n");
8514
				return r;
8515
			}
8516
		}
8517
	}
8518
 
8519
	/* Initialize power management */
8520
	radeon_pm_init(rdev);
8521
 
8522
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8523
	ring->ring_obj = NULL;
8524
	r600_ring_init(rdev, ring, 1024 * 1024);
8525
 
8526
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8527
	ring->ring_obj = NULL;
8528
	r600_ring_init(rdev, ring, 1024 * 1024);
8529
	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8530
	if (r)
8531
		return r;
8532
 
8533
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8534
	ring->ring_obj = NULL;
8535
	r600_ring_init(rdev, ring, 1024 * 1024);
8536
	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8537
	if (r)
8538
		return r;
8539
 
8540
	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8541
	ring->ring_obj = NULL;
8542
	r600_ring_init(rdev, ring, 256 * 1024);
8543
 
8544
	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8545
	ring->ring_obj = NULL;
8546
	r600_ring_init(rdev, ring, 256 * 1024);
8547
 
8548
	r = radeon_uvd_init(rdev);
8549
	if (!r) {
8550
		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8551
		ring->ring_obj = NULL;
8552
		r600_ring_init(rdev, ring, 4096);
8553
	}
8554
	rdev->ih.ring_obj = NULL;
8555
	r600_ih_ring_init(rdev, 64 * 1024);
8556
 
8557
	r = r600_pcie_gart_init(rdev);
8558
	if (r)
8559
		return r;
8560
 
8561
	rdev->accel_working = true;
8562
	r = cik_startup(rdev);
8563
	if (r) {
8564
		dev_err(rdev->dev, "disabling GPU acceleration\n");
8565
        rdev->accel_working = false;
8566
	}
8567
 
8568
	/* Don't start up if the MC ucode is missing.
8569
	 * The default clocks and voltages before the MC ucode
8570
	 * is loaded are not suffient for advanced operations.
8571
	 */
8572
	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8573
		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8574
		return -EINVAL;
8575
	}
8576
 
8577
	return 0;
8578
}
8579
 
8580
/**
8581
 * cik_fini - asic specific driver and hw fini
8582
 *
8583
 * @rdev: radeon_device pointer
8584
 *
8585
 * Tear down the asic specific driver variables and program the hw
8586
 * to an idle state (CIK).
8587
 * Called at driver unload.
8588
 */
8589
void cik_fini(struct radeon_device *rdev)
8590
{
8591
    kfree(rdev->bios);
8592
	rdev->bios = NULL;
8593
}
8594
 
8595
void dce8_program_fmt(struct drm_encoder *encoder)
8596
{
8597
	struct drm_device *dev = encoder->dev;
8598
	struct radeon_device *rdev = dev->dev_private;
8599
	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8600
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8601
	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8602
	int bpc = 0;
8603
	u32 tmp = 0;
8604
	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8605
 
8606
	if (connector) {
8607
		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8608
		bpc = radeon_get_monitor_bpc(connector);
8609
		dither = radeon_connector->dither;
8610
	}
8611
 
8612
	/* LVDS/eDP FMT is set up by atom */
8613
	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8614
		return;
8615
 
8616
	/* not needed for analog */
8617
	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8618
	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8619
		return;
8620
 
8621
	if (bpc == 0)
8622
		return;
8623
 
8624
	switch (bpc) {
8625
	case 6:
8626
		if (dither == RADEON_FMT_DITHER_ENABLE)
8627
			/* XXX sort out optimal dither settings */
8628
			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8629
				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8630
		else
8631
			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8632
		break;
8633
	case 8:
8634
		if (dither == RADEON_FMT_DITHER_ENABLE)
8635
			/* XXX sort out optimal dither settings */
8636
			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8637
				FMT_RGB_RANDOM_ENABLE |
8638
				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8639
		else
8640
			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8641
		break;
8642
	case 10:
8643
		if (dither == RADEON_FMT_DITHER_ENABLE)
8644
			/* XXX sort out optimal dither settings */
8645
			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8646
				FMT_RGB_RANDOM_ENABLE |
8647
				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8648
		else
8649
			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8650
		break;
8651
	default:
8652
		/* not needed */
8653
		break;
8654
	}
8655
 
8656
	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8657
}
8658
 
8659
/* display watermark setup */
8660
/**
8661
 * dce8_line_buffer_adjust - Set up the line buffer
8662
 *
8663
 * @rdev: radeon_device pointer
8664
 * @radeon_crtc: the selected display controller
8665
 * @mode: the current display mode on the selected display
8666
 * controller
8667
 *
8668
 * Setup up the line buffer allocation for
8669
 * the selected display controller (CIK).
8670
 * Returns the line buffer size in pixels.
8671
 */
8672
static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8673
				   struct radeon_crtc *radeon_crtc,
8674
				   struct drm_display_mode *mode)
8675
{
8676
	u32 tmp, buffer_alloc, i;
8677
	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8678
	/*
8679
	 * Line Buffer Setup
8680
	 * There are 6 line buffers, one for each display controllers.
8681
	 * There are 3 partitions per LB. Select the number of partitions
8682
	 * to enable based on the display width.  For display widths larger
8683
	 * than 4096, you need use to use 2 display controllers and combine
8684
	 * them using the stereo blender.
8685
	 */
8686
	if (radeon_crtc->base.enabled && mode) {
8687
		if (mode->crtc_hdisplay < 1920) {
8688
			tmp = 1;
8689
			buffer_alloc = 2;
8690
		} else if (mode->crtc_hdisplay < 2560) {
8691
			tmp = 2;
8692
			buffer_alloc = 2;
8693
		} else if (mode->crtc_hdisplay < 4096) {
8694
			tmp = 0;
8695
			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8696
		} else {
8697
			DRM_DEBUG_KMS("Mode too big for LB!\n");
8698
			tmp = 0;
8699
			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8700
		}
8701
	} else {
8702
		tmp = 1;
8703
		buffer_alloc = 0;
8704
	}
8705
 
8706
	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8707
	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8708
 
8709
	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8710
	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8711
	for (i = 0; i < rdev->usec_timeout; i++) {
8712
		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8713
		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8714
			break;
8715
		udelay(1);
8716
	}
8717
 
8718
	if (radeon_crtc->base.enabled && mode) {
8719
		switch (tmp) {
8720
		case 0:
8721
		default:
8722
			return 4096 * 2;
8723
		case 1:
8724
			return 1920 * 2;
8725
		case 2:
8726
			return 2560 * 2;
8727
		}
8728
	}
8729
 
8730
	/* controller not enabled, so no lb used */
8731
	return 0;
8732
}
8733
 
8734
/**
8735
 * cik_get_number_of_dram_channels - get the number of dram channels
8736
 *
8737
 * @rdev: radeon_device pointer
8738
 *
8739
 * Look up the number of video ram channels (CIK).
8740
 * Used for display watermark bandwidth calculations
8741
 * Returns the number of dram channels
8742
 */
8743
static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8744
{
8745
	u32 tmp = RREG32(MC_SHARED_CHMAP);
8746
 
8747
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8748
	case 0:
8749
	default:
8750
		return 1;
8751
	case 1:
8752
		return 2;
8753
	case 2:
8754
		return 4;
8755
	case 3:
8756
		return 8;
8757
	case 4:
8758
		return 3;
8759
	case 5:
8760
		return 6;
8761
	case 6:
8762
		return 10;
8763
	case 7:
8764
		return 12;
8765
	case 8:
8766
		return 16;
8767
	}
8768
}
8769
 
8770
struct dce8_wm_params {
8771
	u32 dram_channels; /* number of dram channels */
8772
	u32 yclk;          /* bandwidth per dram data pin in kHz */
8773
	u32 sclk;          /* engine clock in kHz */
8774
	u32 disp_clk;      /* display clock in kHz */
8775
	u32 src_width;     /* viewport width */
8776
	u32 active_time;   /* active display time in ns */
8777
	u32 blank_time;    /* blank time in ns */
8778
	bool interlaced;    /* mode is interlaced */
8779
	fixed20_12 vsc;    /* vertical scale ratio */
8780
	u32 num_heads;     /* number of active crtcs */
8781
	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8782
	u32 lb_size;       /* line buffer allocated to pipe */
8783
	u32 vtaps;         /* vertical scaler taps */
8784
};
8785
 
8786
/**
8787
 * dce8_dram_bandwidth - get the dram bandwidth
8788
 *
8789
 * @wm: watermark calculation data
8790
 *
8791
 * Calculate the raw dram bandwidth (CIK).
8792
 * Used for display watermark bandwidth calculations
8793
 * Returns the dram bandwidth in MBytes/s
8794
 */
8795
static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8796
{
8797
	/* Calculate raw DRAM Bandwidth */
8798
	fixed20_12 dram_efficiency; /* 0.7 */
8799
	fixed20_12 yclk, dram_channels, bandwidth;
8800
	fixed20_12 a;
8801
 
8802
	a.full = dfixed_const(1000);
8803
	yclk.full = dfixed_const(wm->yclk);
8804
	yclk.full = dfixed_div(yclk, a);
8805
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8806
	a.full = dfixed_const(10);
8807
	dram_efficiency.full = dfixed_const(7);
8808
	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8809
	bandwidth.full = dfixed_mul(dram_channels, yclk);
8810
	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8811
 
8812
	return dfixed_trunc(bandwidth);
8813
}
8814
 
8815
/**
8816
 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8817
 *
8818
 * @wm: watermark calculation data
8819
 *
8820
 * Calculate the dram bandwidth used for display (CIK).
8821
 * Used for display watermark bandwidth calculations
8822
 * Returns the dram bandwidth for display in MBytes/s
8823
 */
8824
static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8825
{
8826
	/* Calculate DRAM Bandwidth and the part allocated to display. */
8827
	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8828
	fixed20_12 yclk, dram_channels, bandwidth;
8829
	fixed20_12 a;
8830
 
8831
	a.full = dfixed_const(1000);
8832
	yclk.full = dfixed_const(wm->yclk);
8833
	yclk.full = dfixed_div(yclk, a);
8834
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8835
	a.full = dfixed_const(10);
8836
	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8837
	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8838
	bandwidth.full = dfixed_mul(dram_channels, yclk);
8839
	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8840
 
8841
	return dfixed_trunc(bandwidth);
8842
}
8843
 
8844
/**
8845
 * dce8_data_return_bandwidth - get the data return bandwidth
8846
 *
8847
 * @wm: watermark calculation data
8848
 *
8849
 * Calculate the data return bandwidth used for display (CIK).
8850
 * Used for display watermark bandwidth calculations
8851
 * Returns the data return bandwidth in MBytes/s
8852
 */
8853
static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8854
{
8855
	/* Calculate the display Data return Bandwidth */
8856
	fixed20_12 return_efficiency; /* 0.8 */
8857
	fixed20_12 sclk, bandwidth;
8858
	fixed20_12 a;
8859
 
8860
	a.full = dfixed_const(1000);
8861
	sclk.full = dfixed_const(wm->sclk);
8862
	sclk.full = dfixed_div(sclk, a);
8863
	a.full = dfixed_const(10);
8864
	return_efficiency.full = dfixed_const(8);
8865
	return_efficiency.full = dfixed_div(return_efficiency, a);
8866
	a.full = dfixed_const(32);
8867
	bandwidth.full = dfixed_mul(a, sclk);
8868
	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8869
 
8870
	return dfixed_trunc(bandwidth);
8871
}
8872
 
8873
/**
8874
 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8875
 *
8876
 * @wm: watermark calculation data
8877
 *
8878
 * Calculate the dmif bandwidth used for display (CIK).
8879
 * Used for display watermark bandwidth calculations
8880
 * Returns the dmif bandwidth in MBytes/s
8881
 */
8882
static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8883
{
8884
	/* Calculate the DMIF Request Bandwidth */
8885
	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8886
	fixed20_12 disp_clk, bandwidth;
8887
	fixed20_12 a, b;
8888
 
8889
	a.full = dfixed_const(1000);
8890
	disp_clk.full = dfixed_const(wm->disp_clk);
8891
	disp_clk.full = dfixed_div(disp_clk, a);
8892
	a.full = dfixed_const(32);
8893
	b.full = dfixed_mul(a, disp_clk);
8894
 
8895
	a.full = dfixed_const(10);
8896
	disp_clk_request_efficiency.full = dfixed_const(8);
8897
	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8898
 
8899
	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8900
 
8901
	return dfixed_trunc(bandwidth);
8902
}
8903
 
8904
/**
8905
 * dce8_available_bandwidth - get the min available bandwidth
8906
 *
8907
 * @wm: watermark calculation data
8908
 *
8909
 * Calculate the min available bandwidth used for display (CIK).
8910
 * Used for display watermark bandwidth calculations
8911
 * Returns the min available bandwidth in MBytes/s
8912
 */
8913
static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8914
{
8915
	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8916
	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8917
	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8918
	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8919
 
8920
	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8921
}
8922
 
8923
/**
8924
 * dce8_average_bandwidth - get the average available bandwidth
8925
 *
8926
 * @wm: watermark calculation data
8927
 *
8928
 * Calculate the average available bandwidth used for display (CIK).
8929
 * Used for display watermark bandwidth calculations
8930
 * Returns the average available bandwidth in MBytes/s
8931
 */
8932
static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8933
{
8934
	/* Calculate the display mode Average Bandwidth
8935
	 * DisplayMode should contain the source and destination dimensions,
8936
	 * timing, etc.
8937
	 */
8938
	fixed20_12 bpp;
8939
	fixed20_12 line_time;
8940
	fixed20_12 src_width;
8941
	fixed20_12 bandwidth;
8942
	fixed20_12 a;
8943
 
8944
	a.full = dfixed_const(1000);
8945
	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8946
	line_time.full = dfixed_div(line_time, a);
8947
	bpp.full = dfixed_const(wm->bytes_per_pixel);
8948
	src_width.full = dfixed_const(wm->src_width);
8949
	bandwidth.full = dfixed_mul(src_width, bpp);
8950
	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8951
	bandwidth.full = dfixed_div(bandwidth, line_time);
8952
 
8953
	return dfixed_trunc(bandwidth);
8954
}
8955
 
8956
/**
8957
 * dce8_latency_watermark - get the latency watermark
8958
 *
8959
 * @wm: watermark calculation data
8960
 *
8961
 * Calculate the latency watermark (CIK).
8962
 * Used for display watermark bandwidth calculations
8963
 * Returns the latency watermark in ns
8964
 */
8965
static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8966
{
8967
	/* First calculate the latency in ns */
8968
	u32 mc_latency = 2000; /* 2000 ns. */
8969
	u32 available_bandwidth = dce8_available_bandwidth(wm);
8970
	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8971
	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8972
	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8973
	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8974
		(wm->num_heads * cursor_line_pair_return_time);
8975
	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8976
	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8977
	u32 tmp, dmif_size = 12288;
8978
	fixed20_12 a, b, c;
8979
 
8980
	if (wm->num_heads == 0)
8981
		return 0;
8982
 
8983
	a.full = dfixed_const(2);
8984
	b.full = dfixed_const(1);
8985
	if ((wm->vsc.full > a.full) ||
8986
	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8987
	    (wm->vtaps >= 5) ||
8988
	    ((wm->vsc.full >= a.full) && wm->interlaced))
8989
		max_src_lines_per_dst_line = 4;
8990
	else
8991
		max_src_lines_per_dst_line = 2;
8992
 
8993
	a.full = dfixed_const(available_bandwidth);
8994
	b.full = dfixed_const(wm->num_heads);
8995
	a.full = dfixed_div(a, b);
8996
 
8997
	b.full = dfixed_const(mc_latency + 512);
8998
	c.full = dfixed_const(wm->disp_clk);
8999
	b.full = dfixed_div(b, c);
9000
 
9001
	c.full = dfixed_const(dmif_size);
9002
	b.full = dfixed_div(c, b);
9003
 
9004
	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9005
 
9006
	b.full = dfixed_const(1000);
9007
	c.full = dfixed_const(wm->disp_clk);
9008
	b.full = dfixed_div(c, b);
9009
	c.full = dfixed_const(wm->bytes_per_pixel);
9010
	b.full = dfixed_mul(b, c);
9011
 
9012
	lb_fill_bw = min(tmp, dfixed_trunc(b));
9013
 
9014
	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9015
	b.full = dfixed_const(1000);
9016
	c.full = dfixed_const(lb_fill_bw);
9017
	b.full = dfixed_div(c, b);
9018
	a.full = dfixed_div(a, b);
9019
	line_fill_time = dfixed_trunc(a);
9020
 
9021
	if (line_fill_time < wm->active_time)
9022
		return latency;
9023
	else
9024
		return latency + (line_fill_time - wm->active_time);
9025
 
9026
}
9027
 
9028
/**
9029
 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9030
 * average and available dram bandwidth
9031
 *
9032
 * @wm: watermark calculation data
9033
 *
9034
 * Check if the display average bandwidth fits in the display
9035
 * dram bandwidth (CIK).
9036
 * Used for display watermark bandwidth calculations
9037
 * Returns true if the display fits, false if not.
9038
 */
9039
static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9040
{
9041
	if (dce8_average_bandwidth(wm) <=
9042
	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9043
		return true;
9044
	else
9045
		return false;
9046
}
9047
 
9048
/**
9049
 * dce8_average_bandwidth_vs_available_bandwidth - check
9050
 * average and available bandwidth
9051
 *
9052
 * @wm: watermark calculation data
9053
 *
9054
 * Check if the display average bandwidth fits in the display
9055
 * available bandwidth (CIK).
9056
 * Used for display watermark bandwidth calculations
9057
 * Returns true if the display fits, false if not.
9058
 */
9059
static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9060
{
9061
	if (dce8_average_bandwidth(wm) <=
9062
	    (dce8_available_bandwidth(wm) / wm->num_heads))
9063
		return true;
9064
	else
9065
		return false;
9066
}
9067
 
9068
/**
9069
 * dce8_check_latency_hiding - check latency hiding
9070
 *
9071
 * @wm: watermark calculation data
9072
 *
9073
 * Check latency hiding (CIK).
9074
 * Used for display watermark bandwidth calculations
9075
 * Returns true if the display fits, false if not.
9076
 */
9077
static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9078
{
9079
	u32 lb_partitions = wm->lb_size / wm->src_width;
9080
	u32 line_time = wm->active_time + wm->blank_time;
9081
	u32 latency_tolerant_lines;
9082
	u32 latency_hiding;
9083
	fixed20_12 a;
9084
 
9085
	a.full = dfixed_const(1);
9086
	if (wm->vsc.full > a.full)
9087
		latency_tolerant_lines = 1;
9088
	else {
9089
		if (lb_partitions <= (wm->vtaps + 1))
9090
			latency_tolerant_lines = 1;
9091
		else
9092
			latency_tolerant_lines = 2;
9093
	}
9094
 
9095
	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9096
 
9097
	if (dce8_latency_watermark(wm) <= latency_hiding)
9098
		return true;
9099
	else
9100
		return false;
9101
}
9102
 
9103
/**
9104
 * dce8_program_watermarks - program display watermarks
9105
 *
9106
 * @rdev: radeon_device pointer
9107
 * @radeon_crtc: the selected display controller
9108
 * @lb_size: line buffer size
9109
 * @num_heads: number of display controllers in use
9110
 *
9111
 * Calculate and program the display watermarks for the
9112
 * selected display controller (CIK).
9113
 */
9114
static void dce8_program_watermarks(struct radeon_device *rdev,
9115
				    struct radeon_crtc *radeon_crtc,
9116
				    u32 lb_size, u32 num_heads)
9117
{
9118
	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9119
	struct dce8_wm_params wm_low, wm_high;
9120
	u32 pixel_period;
9121
	u32 line_time = 0;
9122
	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9123
	u32 tmp, wm_mask;
9124
 
9125
	if (radeon_crtc->base.enabled && num_heads && mode) {
9126
		pixel_period = 1000000 / (u32)mode->clock;
9127
		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9128
 
9129
		/* watermark for high clocks */
9130
		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9131
		    rdev->pm.dpm_enabled) {
9132
			wm_high.yclk =
9133
				radeon_dpm_get_mclk(rdev, false) * 10;
9134
			wm_high.sclk =
9135
				radeon_dpm_get_sclk(rdev, false) * 10;
9136
		} else {
9137
			wm_high.yclk = rdev->pm.current_mclk * 10;
9138
			wm_high.sclk = rdev->pm.current_sclk * 10;
9139
		}
9140
 
9141
		wm_high.disp_clk = mode->clock;
9142
		wm_high.src_width = mode->crtc_hdisplay;
9143
		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9144
		wm_high.blank_time = line_time - wm_high.active_time;
9145
		wm_high.interlaced = false;
9146
		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9147
			wm_high.interlaced = true;
9148
		wm_high.vsc = radeon_crtc->vsc;
9149
		wm_high.vtaps = 1;
9150
		if (radeon_crtc->rmx_type != RMX_OFF)
9151
			wm_high.vtaps = 2;
9152
		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9153
		wm_high.lb_size = lb_size;
9154
		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9155
		wm_high.num_heads = num_heads;
9156
 
9157
		/* set for high clocks */
9158
		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9159
 
9160
		/* possibly force display priority to high */
9161
		/* should really do this at mode validation time... */
9162
		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9163
		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9164
		    !dce8_check_latency_hiding(&wm_high) ||
9165
		    (rdev->disp_priority == 2)) {
9166
			DRM_DEBUG_KMS("force priority to high\n");
9167
		}
9168
 
9169
		/* watermark for low clocks */
9170
		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9171
		    rdev->pm.dpm_enabled) {
9172
			wm_low.yclk =
9173
				radeon_dpm_get_mclk(rdev, true) * 10;
9174
			wm_low.sclk =
9175
				radeon_dpm_get_sclk(rdev, true) * 10;
9176
		} else {
9177
			wm_low.yclk = rdev->pm.current_mclk * 10;
9178
			wm_low.sclk = rdev->pm.current_sclk * 10;
9179
		}
9180
 
9181
		wm_low.disp_clk = mode->clock;
9182
		wm_low.src_width = mode->crtc_hdisplay;
9183
		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9184
		wm_low.blank_time = line_time - wm_low.active_time;
9185
		wm_low.interlaced = false;
9186
		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9187
			wm_low.interlaced = true;
9188
		wm_low.vsc = radeon_crtc->vsc;
9189
		wm_low.vtaps = 1;
9190
		if (radeon_crtc->rmx_type != RMX_OFF)
9191
			wm_low.vtaps = 2;
9192
		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9193
		wm_low.lb_size = lb_size;
9194
		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9195
		wm_low.num_heads = num_heads;
9196
 
9197
		/* set for low clocks */
9198
		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9199
 
9200
		/* possibly force display priority to high */
9201
		/* should really do this at mode validation time... */
9202
		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9203
		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9204
		    !dce8_check_latency_hiding(&wm_low) ||
9205
		    (rdev->disp_priority == 2)) {
9206
			DRM_DEBUG_KMS("force priority to high\n");
9207
		}
9208
	}
9209
 
9210
	/* select wm A */
9211
	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9212
	tmp = wm_mask;
9213
	tmp &= ~LATENCY_WATERMARK_MASK(3);
9214
	tmp |= LATENCY_WATERMARK_MASK(1);
9215
	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9216
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9217
	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9218
		LATENCY_HIGH_WATERMARK(line_time)));
9219
	/* select wm B */
9220
	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9221
	tmp &= ~LATENCY_WATERMARK_MASK(3);
9222
	tmp |= LATENCY_WATERMARK_MASK(2);
9223
	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9224
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9225
	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9226
		LATENCY_HIGH_WATERMARK(line_time)));
9227
	/* restore original selection */
9228
	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9229
 
9230
	/* save values for DPM */
9231
	radeon_crtc->line_time = line_time;
9232
	radeon_crtc->wm_high = latency_watermark_a;
9233
	radeon_crtc->wm_low = latency_watermark_b;
9234
}
9235
 
9236
/**
9237
 * dce8_bandwidth_update - program display watermarks
9238
 *
9239
 * @rdev: radeon_device pointer
9240
 *
9241
 * Calculate and program the display watermarks and line
9242
 * buffer allocation (CIK).
9243
 */
9244
void dce8_bandwidth_update(struct radeon_device *rdev)
9245
{
9246
	struct drm_display_mode *mode = NULL;
9247
	u32 num_heads = 0, lb_size;
9248
	int i;
9249
 
5271 serge 9250
	if (!rdev->mode_info.mode_config_initialized)
9251
		return;
9252
 
5078 serge 9253
	radeon_update_display_priority(rdev);
9254
 
9255
	for (i = 0; i < rdev->num_crtc; i++) {
9256
		if (rdev->mode_info.crtcs[i]->base.enabled)
9257
			num_heads++;
9258
	}
9259
	for (i = 0; i < rdev->num_crtc; i++) {
9260
		mode = &rdev->mode_info.crtcs[i]->base.mode;
9261
		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9262
		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9263
	}
9264
}
9265
 
9266
/**
9267
 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9268
 *
9269
 * @rdev: radeon_device pointer
9270
 *
9271
 * Fetches a GPU clock counter snapshot (SI).
9272
 * Returns the 64 bit clock counter snapshot.
9273
 */
9274
uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9275
{
9276
	uint64_t clock;
9277
 
9278
	mutex_lock(&rdev->gpu_clock_mutex);
9279
	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9280
	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9281
	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9282
	mutex_unlock(&rdev->gpu_clock_mutex);
9283
	return clock;
9284
}
9285
 
9286
static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9287
                              u32 cntl_reg, u32 status_reg)
9288
{
9289
	int r, i;
9290
	struct atom_clock_dividers dividers;
9291
	uint32_t tmp;
9292
 
9293
	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9294
					   clock, false, ÷rs);
9295
	if (r)
9296
		return r;
9297
 
9298
	tmp = RREG32_SMC(cntl_reg);
9299
	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9300
	tmp |= dividers.post_divider;
9301
	WREG32_SMC(cntl_reg, tmp);
9302
 
9303
	for (i = 0; i < 100; i++) {
9304
		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9305
			break;
9306
		mdelay(10);
9307
	}
9308
	if (i == 100)
9309
		return -ETIMEDOUT;
9310
 
9311
	return 0;
9312
}
9313
 
9314
int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9315
{
9316
	int r = 0;
9317
 
9318
	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9319
	if (r)
9320
		return r;
9321
 
9322
	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9323
	return r;
9324
}
9325
 
9326
int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9327
{
9328
	int r, i;
9329
	struct atom_clock_dividers dividers;
9330
	u32 tmp;
9331
 
9332
	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9333
					   ecclk, false, ÷rs);
9334
	if (r)
9335
		return r;
9336
 
9337
	for (i = 0; i < 100; i++) {
9338
		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9339
			break;
9340
		mdelay(10);
9341
	}
9342
	if (i == 100)
9343
		return -ETIMEDOUT;
9344
 
9345
	tmp = RREG32_SMC(CG_ECLK_CNTL);
9346
	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9347
	tmp |= dividers.post_divider;
9348
	WREG32_SMC(CG_ECLK_CNTL, tmp);
9349
 
9350
	for (i = 0; i < 100; i++) {
9351
		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9352
			break;
9353
		mdelay(10);
9354
	}
9355
	if (i == 100)
9356
		return -ETIMEDOUT;
9357
 
9358
	return 0;
9359
}
9360
 
9361
static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9362
{
9363
	struct pci_dev *root = rdev->pdev->bus->self;
9364
	int bridge_pos, gpu_pos;
9365
	u32 speed_cntl, mask, current_data_rate;
9366
	int ret, i;
9367
	u16 tmp16;
9368
 
9369
	if (radeon_pcie_gen2 == 0)
9370
		return;
9371
 
9372
	if (rdev->flags & RADEON_IS_IGP)
9373
		return;
9374
 
9375
	if (!(rdev->flags & RADEON_IS_PCIE))
9376
		return;
9377
 
9378
	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9379
	if (ret != 0)
9380
		return;
9381
 
9382
	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9383
		return;
9384
 
9385
	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9386
	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9387
		LC_CURRENT_DATA_RATE_SHIFT;
9388
	if (mask & DRM_PCIE_SPEED_80) {
9389
		if (current_data_rate == 2) {
9390
			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9391
			return;
9392
		}
9393
		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9394
	} else if (mask & DRM_PCIE_SPEED_50) {
9395
		if (current_data_rate == 1) {
9396
			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9397
			return;
9398
		}
9399
		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9400
	}
9401
 
9402
	bridge_pos = pci_pcie_cap(root);
9403
	if (!bridge_pos)
9404
		return;
9405
 
9406
	gpu_pos = pci_pcie_cap(rdev->pdev);
9407
	if (!gpu_pos)
9408
		return;
9409
 
9410
	if (mask & DRM_PCIE_SPEED_80) {
9411
		/* re-try equalization if gen3 is not already enabled */
9412
		if (current_data_rate != 2) {
9413
			u16 bridge_cfg, gpu_cfg;
9414
			u16 bridge_cfg2, gpu_cfg2;
9415
			u32 max_lw, current_lw, tmp;
9416
 
9417
			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9418
			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9419
 
9420
			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9421
			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9422
 
9423
			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9424
			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9425
 
9426
			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9427
			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9428
			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9429
 
9430
			if (current_lw < max_lw) {
9431
				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9432
				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9433
					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9434
					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9435
					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9436
					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9437
				}
9438
			}
9439
 
9440
			for (i = 0; i < 10; i++) {
9441
				/* check status */
9442
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9443
				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9444
					break;
9445
 
9446
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9447
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9448
 
9449
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9450
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9451
 
9452
				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9453
				tmp |= LC_SET_QUIESCE;
9454
				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9455
 
9456
				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9457
				tmp |= LC_REDO_EQ;
9458
				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9459
 
9460
				mdelay(100);
9461
 
9462
				/* linkctl */
9463
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9464
				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9465
				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9466
				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9467
 
9468
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9469
				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9470
				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9471
				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9472
 
9473
				/* linkctl2 */
9474
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9475
				tmp16 &= ~((1 << 4) | (7 << 9));
9476
				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9477
				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9478
 
9479
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9480
				tmp16 &= ~((1 << 4) | (7 << 9));
9481
				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9482
				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9483
 
9484
				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9485
				tmp &= ~LC_SET_QUIESCE;
9486
				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9487
			}
9488
		}
9489
	}
9490
 
9491
	/* set the link speed */
9492
	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9493
	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9494
	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9495
 
9496
	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9497
	tmp16 &= ~0xf;
9498
	if (mask & DRM_PCIE_SPEED_80)
9499
		tmp16 |= 3; /* gen3 */
9500
	else if (mask & DRM_PCIE_SPEED_50)
9501
		tmp16 |= 2; /* gen2 */
9502
	else
9503
		tmp16 |= 1; /* gen1 */
9504
	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9505
 
9506
	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9507
	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9508
	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9509
 
9510
	for (i = 0; i < rdev->usec_timeout; i++) {
9511
		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9512
		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9513
			break;
9514
		udelay(1);
9515
	}
9516
}
9517
 
9518
static void cik_program_aspm(struct radeon_device *rdev)
9519
{
9520
	u32 data, orig;
9521
	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9522
	bool disable_clkreq = false;
9523
 
9524
	if (radeon_aspm == 0)
9525
		return;
9526
 
9527
	/* XXX double check IGPs */
9528
	if (rdev->flags & RADEON_IS_IGP)
9529
		return;
9530
 
9531
	if (!(rdev->flags & RADEON_IS_PCIE))
9532
		return;
9533
 
9534
	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9535
	data &= ~LC_XMIT_N_FTS_MASK;
9536
	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9537
	if (orig != data)
9538
		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9539
 
9540
	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9541
	data |= LC_GO_TO_RECOVERY;
9542
	if (orig != data)
9543
		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9544
 
9545
	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9546
	data |= P_IGNORE_EDB_ERR;
9547
	if (orig != data)
9548
		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9549
 
9550
	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9551
	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9552
	data |= LC_PMI_TO_L1_DIS;
9553
	if (!disable_l0s)
9554
		data |= LC_L0S_INACTIVITY(7);
9555
 
9556
	if (!disable_l1) {
9557
		data |= LC_L1_INACTIVITY(7);
9558
		data &= ~LC_PMI_TO_L1_DIS;
9559
		if (orig != data)
9560
			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9561
 
9562
		if (!disable_plloff_in_l1) {
9563
			bool clk_req_support;
9564
 
9565
			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9566
			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9567
			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9568
			if (orig != data)
9569
				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9570
 
9571
			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9572
			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9573
			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9574
			if (orig != data)
9575
				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9576
 
9577
			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9578
			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9579
			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9580
			if (orig != data)
9581
				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9582
 
9583
			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9584
			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9585
			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9586
			if (orig != data)
9587
				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9588
 
9589
			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9590
			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9591
			data |= LC_DYN_LANES_PWR_STATE(3);
9592
			if (orig != data)
9593
				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9594
 
9595
			if (!disable_clkreq) {
9596
				struct pci_dev *root = rdev->pdev->bus->self;
9597
				u32 lnkcap;
9598
 
9599
				clk_req_support = false;
9600
				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9601
				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9602
					clk_req_support = true;
9603
			} else {
9604
				clk_req_support = false;
9605
			}
9606
 
9607
			if (clk_req_support) {
9608
				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9609
				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9610
				if (orig != data)
9611
					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9612
 
9613
				orig = data = RREG32_SMC(THM_CLK_CNTL);
9614
				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9615
				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9616
				if (orig != data)
9617
					WREG32_SMC(THM_CLK_CNTL, data);
9618
 
9619
				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9620
				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9621
				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9622
				if (orig != data)
9623
					WREG32_SMC(MISC_CLK_CTRL, data);
9624
 
9625
				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9626
				data &= ~BCLK_AS_XCLK;
9627
				if (orig != data)
9628
					WREG32_SMC(CG_CLKPIN_CNTL, data);
9629
 
9630
				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9631
				data &= ~FORCE_BIF_REFCLK_EN;
9632
				if (orig != data)
9633
					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9634
 
9635
				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9636
				data &= ~MPLL_CLKOUT_SEL_MASK;
9637
				data |= MPLL_CLKOUT_SEL(4);
9638
				if (orig != data)
9639
					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9640
			}
9641
		}
9642
	} else {
9643
		if (orig != data)
9644
			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9645
	}
9646
 
9647
	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9648
	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9649
	if (orig != data)
9650
		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9651
 
9652
	if (!disable_l0s) {
9653
		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9654
		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9655
			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9656
			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9657
				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9658
				data &= ~LC_L0S_INACTIVITY_MASK;
9659
				if (orig != data)
9660
					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9661
			}
9662
		}
9663
	}
9664
}