Subversion Repositories Kolibri OS

Rev

Rev 6938 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5078 serge 1
/*
2
 * Copyright 2012 Advanced Micro Devices, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 *
22
 * Authors: Alex Deucher
23
 */
24
#include 
25
#include 
26
#include 
27
#include "drmP.h"
28
#include "radeon.h"
29
#include "radeon_asic.h"
6104 serge 30
#include "radeon_audio.h"
5078 serge 31
#include "cikd.h"
32
#include "atom.h"
33
#include "cik_blit_shaders.h"
34
#include "radeon_ucode.h"
35
#include "clearstate_ci.h"
5271 serge 36
#include "radeon_kfd.h"
5078 serge 37
 
38
MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39
MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40
MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41
MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42
MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43
MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44
MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45
MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46
MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47
 
48
MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49
MODULE_FIRMWARE("radeon/bonaire_me.bin");
50
MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51
MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52
MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53
MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54
MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55
MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56
 
57
MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58
MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59
MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60
MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61
MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62
MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63
MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64
MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65
MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66
 
67
MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68
MODULE_FIRMWARE("radeon/hawaii_me.bin");
69
MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70
MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71
MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72
MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73
MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74
MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75
 
76
MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77
MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78
MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79
MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80
MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81
MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82
 
83
MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84
MODULE_FIRMWARE("radeon/kaveri_me.bin");
85
MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86
MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87
MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88
MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89
MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90
 
91
MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92
MODULE_FIRMWARE("radeon/KABINI_me.bin");
93
MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94
MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95
MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96
MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97
 
98
MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99
MODULE_FIRMWARE("radeon/kabini_me.bin");
100
MODULE_FIRMWARE("radeon/kabini_ce.bin");
101
MODULE_FIRMWARE("radeon/kabini_mec.bin");
102
MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103
MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104
 
105
MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106
MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107
MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108
MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109
MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110
MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111
 
112
MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113
MODULE_FIRMWARE("radeon/mullins_me.bin");
114
MODULE_FIRMWARE("radeon/mullins_ce.bin");
115
MODULE_FIRMWARE("radeon/mullins_mec.bin");
116
MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117
MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118
 
119
extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120
extern void r600_ih_ring_fini(struct radeon_device *rdev);
121
extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122
extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123
extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124
extern void sumo_rlc_fini(struct radeon_device *rdev);
125
extern int sumo_rlc_init(struct radeon_device *rdev);
126
extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127
extern void si_rlc_reset(struct radeon_device *rdev);
128
extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129
static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130
extern int cik_sdma_resume(struct radeon_device *rdev);
131
extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132
extern void cik_sdma_fini(struct radeon_device *rdev);
133
extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134
static void cik_rlc_stop(struct radeon_device *rdev);
135
static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136
static void cik_program_aspm(struct radeon_device *rdev);
137
static void cik_init_pg(struct radeon_device *rdev);
138
static void cik_init_cg(struct radeon_device *rdev);
139
static void cik_fini_pg(struct radeon_device *rdev);
140
static void cik_fini_cg(struct radeon_device *rdev);
141
static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142
					  bool enable);
143
 
6104 serge 144
/**
145
 * cik_get_allowed_info_register - fetch the register for the info ioctl
146
 *
147
 * @rdev: radeon_device pointer
148
 * @reg: register offset in bytes
149
 * @val: register value
150
 *
151
 * Returns 0 for success or -EINVAL for an invalid register
152
 *
153
 */
154
int cik_get_allowed_info_register(struct radeon_device *rdev,
155
				  u32 reg, u32 *val)
156
{
157
	switch (reg) {
158
	case GRBM_STATUS:
159
	case GRBM_STATUS2:
160
	case GRBM_STATUS_SE0:
161
	case GRBM_STATUS_SE1:
162
	case GRBM_STATUS_SE2:
163
	case GRBM_STATUS_SE3:
164
	case SRBM_STATUS:
165
	case SRBM_STATUS2:
166
	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167
	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168
	case UVD_STATUS:
169
	/* TODO VCE */
170
		*val = RREG32(reg);
171
		return 0;
172
	default:
173
		return -EINVAL;
174
	}
175
}
176
 
177
/*
178
 * Indirect registers accessor
179
 */
180
u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181
{
182
	unsigned long flags;
183
	u32 r;
184
 
185
	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186
	WREG32(CIK_DIDT_IND_INDEX, (reg));
187
	r = RREG32(CIK_DIDT_IND_DATA);
188
	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189
	return r;
190
}
191
 
192
void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193
{
194
	unsigned long flags;
195
 
196
	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197
	WREG32(CIK_DIDT_IND_INDEX, (reg));
198
	WREG32(CIK_DIDT_IND_DATA, (v));
199
	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200
}
201
 
5078 serge 202
/* get temperature in millidegrees */
203
int ci_get_temp(struct radeon_device *rdev)
204
{
205
	u32 temp;
206
	int actual_temp = 0;
207
 
208
	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209
		CTF_TEMP_SHIFT;
210
 
211
	if (temp & 0x200)
212
		actual_temp = 255;
213
	else
214
		actual_temp = temp & 0x1ff;
215
 
216
	actual_temp = actual_temp * 1000;
217
 
218
	return actual_temp;
219
}
220
 
221
/* get temperature in millidegrees */
222
int kv_get_temp(struct radeon_device *rdev)
223
{
224
	u32 temp;
225
	int actual_temp = 0;
226
 
227
	temp = RREG32_SMC(0xC0300E0C);
228
 
229
	if (temp)
230
		actual_temp = (temp / 8) - 49;
231
	else
232
		actual_temp = 0;
233
 
234
	actual_temp = actual_temp * 1000;
235
 
236
	return actual_temp;
237
}
238
 
239
/*
240
 * Indirect registers accessor
241
 */
242
u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243
{
244
	unsigned long flags;
245
	u32 r;
246
 
247
	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248
	WREG32(PCIE_INDEX, reg);
249
	(void)RREG32(PCIE_INDEX);
250
	r = RREG32(PCIE_DATA);
251
	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252
	return r;
253
}
254
 
255
void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256
{
257
	unsigned long flags;
258
 
259
	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260
	WREG32(PCIE_INDEX, reg);
261
	(void)RREG32(PCIE_INDEX);
262
	WREG32(PCIE_DATA, v);
263
	(void)RREG32(PCIE_DATA);
264
	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265
}
266
 
267
static const u32 spectre_rlc_save_restore_register_list[] =
268
{
269
	(0x0e00 << 16) | (0xc12c >> 2),
270
	0x00000000,
271
	(0x0e00 << 16) | (0xc140 >> 2),
272
	0x00000000,
273
	(0x0e00 << 16) | (0xc150 >> 2),
274
	0x00000000,
275
	(0x0e00 << 16) | (0xc15c >> 2),
276
	0x00000000,
277
	(0x0e00 << 16) | (0xc168 >> 2),
278
	0x00000000,
279
	(0x0e00 << 16) | (0xc170 >> 2),
280
	0x00000000,
281
	(0x0e00 << 16) | (0xc178 >> 2),
282
	0x00000000,
283
	(0x0e00 << 16) | (0xc204 >> 2),
284
	0x00000000,
285
	(0x0e00 << 16) | (0xc2b4 >> 2),
286
	0x00000000,
287
	(0x0e00 << 16) | (0xc2b8 >> 2),
288
	0x00000000,
289
	(0x0e00 << 16) | (0xc2bc >> 2),
290
	0x00000000,
291
	(0x0e00 << 16) | (0xc2c0 >> 2),
292
	0x00000000,
293
	(0x0e00 << 16) | (0x8228 >> 2),
294
	0x00000000,
295
	(0x0e00 << 16) | (0x829c >> 2),
296
	0x00000000,
297
	(0x0e00 << 16) | (0x869c >> 2),
298
	0x00000000,
299
	(0x0600 << 16) | (0x98f4 >> 2),
300
	0x00000000,
301
	(0x0e00 << 16) | (0x98f8 >> 2),
302
	0x00000000,
303
	(0x0e00 << 16) | (0x9900 >> 2),
304
	0x00000000,
305
	(0x0e00 << 16) | (0xc260 >> 2),
306
	0x00000000,
307
	(0x0e00 << 16) | (0x90e8 >> 2),
308
	0x00000000,
309
	(0x0e00 << 16) | (0x3c000 >> 2),
310
	0x00000000,
311
	(0x0e00 << 16) | (0x3c00c >> 2),
312
	0x00000000,
313
	(0x0e00 << 16) | (0x8c1c >> 2),
314
	0x00000000,
315
	(0x0e00 << 16) | (0x9700 >> 2),
316
	0x00000000,
317
	(0x0e00 << 16) | (0xcd20 >> 2),
318
	0x00000000,
319
	(0x4e00 << 16) | (0xcd20 >> 2),
320
	0x00000000,
321
	(0x5e00 << 16) | (0xcd20 >> 2),
322
	0x00000000,
323
	(0x6e00 << 16) | (0xcd20 >> 2),
324
	0x00000000,
325
	(0x7e00 << 16) | (0xcd20 >> 2),
326
	0x00000000,
327
	(0x8e00 << 16) | (0xcd20 >> 2),
328
	0x00000000,
329
	(0x9e00 << 16) | (0xcd20 >> 2),
330
	0x00000000,
331
	(0xae00 << 16) | (0xcd20 >> 2),
332
	0x00000000,
333
	(0xbe00 << 16) | (0xcd20 >> 2),
334
	0x00000000,
335
	(0x0e00 << 16) | (0x89bc >> 2),
336
	0x00000000,
337
	(0x0e00 << 16) | (0x8900 >> 2),
338
	0x00000000,
339
	0x3,
340
	(0x0e00 << 16) | (0xc130 >> 2),
341
	0x00000000,
342
	(0x0e00 << 16) | (0xc134 >> 2),
343
	0x00000000,
344
	(0x0e00 << 16) | (0xc1fc >> 2),
345
	0x00000000,
346
	(0x0e00 << 16) | (0xc208 >> 2),
347
	0x00000000,
348
	(0x0e00 << 16) | (0xc264 >> 2),
349
	0x00000000,
350
	(0x0e00 << 16) | (0xc268 >> 2),
351
	0x00000000,
352
	(0x0e00 << 16) | (0xc26c >> 2),
353
	0x00000000,
354
	(0x0e00 << 16) | (0xc270 >> 2),
355
	0x00000000,
356
	(0x0e00 << 16) | (0xc274 >> 2),
357
	0x00000000,
358
	(0x0e00 << 16) | (0xc278 >> 2),
359
	0x00000000,
360
	(0x0e00 << 16) | (0xc27c >> 2),
361
	0x00000000,
362
	(0x0e00 << 16) | (0xc280 >> 2),
363
	0x00000000,
364
	(0x0e00 << 16) | (0xc284 >> 2),
365
	0x00000000,
366
	(0x0e00 << 16) | (0xc288 >> 2),
367
	0x00000000,
368
	(0x0e00 << 16) | (0xc28c >> 2),
369
	0x00000000,
370
	(0x0e00 << 16) | (0xc290 >> 2),
371
	0x00000000,
372
	(0x0e00 << 16) | (0xc294 >> 2),
373
	0x00000000,
374
	(0x0e00 << 16) | (0xc298 >> 2),
375
	0x00000000,
376
	(0x0e00 << 16) | (0xc29c >> 2),
377
	0x00000000,
378
	(0x0e00 << 16) | (0xc2a0 >> 2),
379
	0x00000000,
380
	(0x0e00 << 16) | (0xc2a4 >> 2),
381
	0x00000000,
382
	(0x0e00 << 16) | (0xc2a8 >> 2),
383
	0x00000000,
384
	(0x0e00 << 16) | (0xc2ac  >> 2),
385
	0x00000000,
386
	(0x0e00 << 16) | (0xc2b0 >> 2),
387
	0x00000000,
388
	(0x0e00 << 16) | (0x301d0 >> 2),
389
	0x00000000,
390
	(0x0e00 << 16) | (0x30238 >> 2),
391
	0x00000000,
392
	(0x0e00 << 16) | (0x30250 >> 2),
393
	0x00000000,
394
	(0x0e00 << 16) | (0x30254 >> 2),
395
	0x00000000,
396
	(0x0e00 << 16) | (0x30258 >> 2),
397
	0x00000000,
398
	(0x0e00 << 16) | (0x3025c >> 2),
399
	0x00000000,
400
	(0x4e00 << 16) | (0xc900 >> 2),
401
	0x00000000,
402
	(0x5e00 << 16) | (0xc900 >> 2),
403
	0x00000000,
404
	(0x6e00 << 16) | (0xc900 >> 2),
405
	0x00000000,
406
	(0x7e00 << 16) | (0xc900 >> 2),
407
	0x00000000,
408
	(0x8e00 << 16) | (0xc900 >> 2),
409
	0x00000000,
410
	(0x9e00 << 16) | (0xc900 >> 2),
411
	0x00000000,
412
	(0xae00 << 16) | (0xc900 >> 2),
413
	0x00000000,
414
	(0xbe00 << 16) | (0xc900 >> 2),
415
	0x00000000,
416
	(0x4e00 << 16) | (0xc904 >> 2),
417
	0x00000000,
418
	(0x5e00 << 16) | (0xc904 >> 2),
419
	0x00000000,
420
	(0x6e00 << 16) | (0xc904 >> 2),
421
	0x00000000,
422
	(0x7e00 << 16) | (0xc904 >> 2),
423
	0x00000000,
424
	(0x8e00 << 16) | (0xc904 >> 2),
425
	0x00000000,
426
	(0x9e00 << 16) | (0xc904 >> 2),
427
	0x00000000,
428
	(0xae00 << 16) | (0xc904 >> 2),
429
	0x00000000,
430
	(0xbe00 << 16) | (0xc904 >> 2),
431
	0x00000000,
432
	(0x4e00 << 16) | (0xc908 >> 2),
433
	0x00000000,
434
	(0x5e00 << 16) | (0xc908 >> 2),
435
	0x00000000,
436
	(0x6e00 << 16) | (0xc908 >> 2),
437
	0x00000000,
438
	(0x7e00 << 16) | (0xc908 >> 2),
439
	0x00000000,
440
	(0x8e00 << 16) | (0xc908 >> 2),
441
	0x00000000,
442
	(0x9e00 << 16) | (0xc908 >> 2),
443
	0x00000000,
444
	(0xae00 << 16) | (0xc908 >> 2),
445
	0x00000000,
446
	(0xbe00 << 16) | (0xc908 >> 2),
447
	0x00000000,
448
	(0x4e00 << 16) | (0xc90c >> 2),
449
	0x00000000,
450
	(0x5e00 << 16) | (0xc90c >> 2),
451
	0x00000000,
452
	(0x6e00 << 16) | (0xc90c >> 2),
453
	0x00000000,
454
	(0x7e00 << 16) | (0xc90c >> 2),
455
	0x00000000,
456
	(0x8e00 << 16) | (0xc90c >> 2),
457
	0x00000000,
458
	(0x9e00 << 16) | (0xc90c >> 2),
459
	0x00000000,
460
	(0xae00 << 16) | (0xc90c >> 2),
461
	0x00000000,
462
	(0xbe00 << 16) | (0xc90c >> 2),
463
	0x00000000,
464
	(0x4e00 << 16) | (0xc910 >> 2),
465
	0x00000000,
466
	(0x5e00 << 16) | (0xc910 >> 2),
467
	0x00000000,
468
	(0x6e00 << 16) | (0xc910 >> 2),
469
	0x00000000,
470
	(0x7e00 << 16) | (0xc910 >> 2),
471
	0x00000000,
472
	(0x8e00 << 16) | (0xc910 >> 2),
473
	0x00000000,
474
	(0x9e00 << 16) | (0xc910 >> 2),
475
	0x00000000,
476
	(0xae00 << 16) | (0xc910 >> 2),
477
	0x00000000,
478
	(0xbe00 << 16) | (0xc910 >> 2),
479
	0x00000000,
480
	(0x0e00 << 16) | (0xc99c >> 2),
481
	0x00000000,
482
	(0x0e00 << 16) | (0x9834 >> 2),
483
	0x00000000,
484
	(0x0000 << 16) | (0x30f00 >> 2),
485
	0x00000000,
486
	(0x0001 << 16) | (0x30f00 >> 2),
487
	0x00000000,
488
	(0x0000 << 16) | (0x30f04 >> 2),
489
	0x00000000,
490
	(0x0001 << 16) | (0x30f04 >> 2),
491
	0x00000000,
492
	(0x0000 << 16) | (0x30f08 >> 2),
493
	0x00000000,
494
	(0x0001 << 16) | (0x30f08 >> 2),
495
	0x00000000,
496
	(0x0000 << 16) | (0x30f0c >> 2),
497
	0x00000000,
498
	(0x0001 << 16) | (0x30f0c >> 2),
499
	0x00000000,
500
	(0x0600 << 16) | (0x9b7c >> 2),
501
	0x00000000,
502
	(0x0e00 << 16) | (0x8a14 >> 2),
503
	0x00000000,
504
	(0x0e00 << 16) | (0x8a18 >> 2),
505
	0x00000000,
506
	(0x0600 << 16) | (0x30a00 >> 2),
507
	0x00000000,
508
	(0x0e00 << 16) | (0x8bf0 >> 2),
509
	0x00000000,
510
	(0x0e00 << 16) | (0x8bcc >> 2),
511
	0x00000000,
512
	(0x0e00 << 16) | (0x8b24 >> 2),
513
	0x00000000,
514
	(0x0e00 << 16) | (0x30a04 >> 2),
515
	0x00000000,
516
	(0x0600 << 16) | (0x30a10 >> 2),
517
	0x00000000,
518
	(0x0600 << 16) | (0x30a14 >> 2),
519
	0x00000000,
520
	(0x0600 << 16) | (0x30a18 >> 2),
521
	0x00000000,
522
	(0x0600 << 16) | (0x30a2c >> 2),
523
	0x00000000,
524
	(0x0e00 << 16) | (0xc700 >> 2),
525
	0x00000000,
526
	(0x0e00 << 16) | (0xc704 >> 2),
527
	0x00000000,
528
	(0x0e00 << 16) | (0xc708 >> 2),
529
	0x00000000,
530
	(0x0e00 << 16) | (0xc768 >> 2),
531
	0x00000000,
532
	(0x0400 << 16) | (0xc770 >> 2),
533
	0x00000000,
534
	(0x0400 << 16) | (0xc774 >> 2),
535
	0x00000000,
536
	(0x0400 << 16) | (0xc778 >> 2),
537
	0x00000000,
538
	(0x0400 << 16) | (0xc77c >> 2),
539
	0x00000000,
540
	(0x0400 << 16) | (0xc780 >> 2),
541
	0x00000000,
542
	(0x0400 << 16) | (0xc784 >> 2),
543
	0x00000000,
544
	(0x0400 << 16) | (0xc788 >> 2),
545
	0x00000000,
546
	(0x0400 << 16) | (0xc78c >> 2),
547
	0x00000000,
548
	(0x0400 << 16) | (0xc798 >> 2),
549
	0x00000000,
550
	(0x0400 << 16) | (0xc79c >> 2),
551
	0x00000000,
552
	(0x0400 << 16) | (0xc7a0 >> 2),
553
	0x00000000,
554
	(0x0400 << 16) | (0xc7a4 >> 2),
555
	0x00000000,
556
	(0x0400 << 16) | (0xc7a8 >> 2),
557
	0x00000000,
558
	(0x0400 << 16) | (0xc7ac >> 2),
559
	0x00000000,
560
	(0x0400 << 16) | (0xc7b0 >> 2),
561
	0x00000000,
562
	(0x0400 << 16) | (0xc7b4 >> 2),
563
	0x00000000,
564
	(0x0e00 << 16) | (0x9100 >> 2),
565
	0x00000000,
566
	(0x0e00 << 16) | (0x3c010 >> 2),
567
	0x00000000,
568
	(0x0e00 << 16) | (0x92a8 >> 2),
569
	0x00000000,
570
	(0x0e00 << 16) | (0x92ac >> 2),
571
	0x00000000,
572
	(0x0e00 << 16) | (0x92b4 >> 2),
573
	0x00000000,
574
	(0x0e00 << 16) | (0x92b8 >> 2),
575
	0x00000000,
576
	(0x0e00 << 16) | (0x92bc >> 2),
577
	0x00000000,
578
	(0x0e00 << 16) | (0x92c0 >> 2),
579
	0x00000000,
580
	(0x0e00 << 16) | (0x92c4 >> 2),
581
	0x00000000,
582
	(0x0e00 << 16) | (0x92c8 >> 2),
583
	0x00000000,
584
	(0x0e00 << 16) | (0x92cc >> 2),
585
	0x00000000,
586
	(0x0e00 << 16) | (0x92d0 >> 2),
587
	0x00000000,
588
	(0x0e00 << 16) | (0x8c00 >> 2),
589
	0x00000000,
590
	(0x0e00 << 16) | (0x8c04 >> 2),
591
	0x00000000,
592
	(0x0e00 << 16) | (0x8c20 >> 2),
593
	0x00000000,
594
	(0x0e00 << 16) | (0x8c38 >> 2),
595
	0x00000000,
596
	(0x0e00 << 16) | (0x8c3c >> 2),
597
	0x00000000,
598
	(0x0e00 << 16) | (0xae00 >> 2),
599
	0x00000000,
600
	(0x0e00 << 16) | (0x9604 >> 2),
601
	0x00000000,
602
	(0x0e00 << 16) | (0xac08 >> 2),
603
	0x00000000,
604
	(0x0e00 << 16) | (0xac0c >> 2),
605
	0x00000000,
606
	(0x0e00 << 16) | (0xac10 >> 2),
607
	0x00000000,
608
	(0x0e00 << 16) | (0xac14 >> 2),
609
	0x00000000,
610
	(0x0e00 << 16) | (0xac58 >> 2),
611
	0x00000000,
612
	(0x0e00 << 16) | (0xac68 >> 2),
613
	0x00000000,
614
	(0x0e00 << 16) | (0xac6c >> 2),
615
	0x00000000,
616
	(0x0e00 << 16) | (0xac70 >> 2),
617
	0x00000000,
618
	(0x0e00 << 16) | (0xac74 >> 2),
619
	0x00000000,
620
	(0x0e00 << 16) | (0xac78 >> 2),
621
	0x00000000,
622
	(0x0e00 << 16) | (0xac7c >> 2),
623
	0x00000000,
624
	(0x0e00 << 16) | (0xac80 >> 2),
625
	0x00000000,
626
	(0x0e00 << 16) | (0xac84 >> 2),
627
	0x00000000,
628
	(0x0e00 << 16) | (0xac88 >> 2),
629
	0x00000000,
630
	(0x0e00 << 16) | (0xac8c >> 2),
631
	0x00000000,
632
	(0x0e00 << 16) | (0x970c >> 2),
633
	0x00000000,
634
	(0x0e00 << 16) | (0x9714 >> 2),
635
	0x00000000,
636
	(0x0e00 << 16) | (0x9718 >> 2),
637
	0x00000000,
638
	(0x0e00 << 16) | (0x971c >> 2),
639
	0x00000000,
640
	(0x0e00 << 16) | (0x31068 >> 2),
641
	0x00000000,
642
	(0x4e00 << 16) | (0x31068 >> 2),
643
	0x00000000,
644
	(0x5e00 << 16) | (0x31068 >> 2),
645
	0x00000000,
646
	(0x6e00 << 16) | (0x31068 >> 2),
647
	0x00000000,
648
	(0x7e00 << 16) | (0x31068 >> 2),
649
	0x00000000,
650
	(0x8e00 << 16) | (0x31068 >> 2),
651
	0x00000000,
652
	(0x9e00 << 16) | (0x31068 >> 2),
653
	0x00000000,
654
	(0xae00 << 16) | (0x31068 >> 2),
655
	0x00000000,
656
	(0xbe00 << 16) | (0x31068 >> 2),
657
	0x00000000,
658
	(0x0e00 << 16) | (0xcd10 >> 2),
659
	0x00000000,
660
	(0x0e00 << 16) | (0xcd14 >> 2),
661
	0x00000000,
662
	(0x0e00 << 16) | (0x88b0 >> 2),
663
	0x00000000,
664
	(0x0e00 << 16) | (0x88b4 >> 2),
665
	0x00000000,
666
	(0x0e00 << 16) | (0x88b8 >> 2),
667
	0x00000000,
668
	(0x0e00 << 16) | (0x88bc >> 2),
669
	0x00000000,
670
	(0x0400 << 16) | (0x89c0 >> 2),
671
	0x00000000,
672
	(0x0e00 << 16) | (0x88c4 >> 2),
673
	0x00000000,
674
	(0x0e00 << 16) | (0x88c8 >> 2),
675
	0x00000000,
676
	(0x0e00 << 16) | (0x88d0 >> 2),
677
	0x00000000,
678
	(0x0e00 << 16) | (0x88d4 >> 2),
679
	0x00000000,
680
	(0x0e00 << 16) | (0x88d8 >> 2),
681
	0x00000000,
682
	(0x0e00 << 16) | (0x8980 >> 2),
683
	0x00000000,
684
	(0x0e00 << 16) | (0x30938 >> 2),
685
	0x00000000,
686
	(0x0e00 << 16) | (0x3093c >> 2),
687
	0x00000000,
688
	(0x0e00 << 16) | (0x30940 >> 2),
689
	0x00000000,
690
	(0x0e00 << 16) | (0x89a0 >> 2),
691
	0x00000000,
692
	(0x0e00 << 16) | (0x30900 >> 2),
693
	0x00000000,
694
	(0x0e00 << 16) | (0x30904 >> 2),
695
	0x00000000,
696
	(0x0e00 << 16) | (0x89b4 >> 2),
697
	0x00000000,
698
	(0x0e00 << 16) | (0x3c210 >> 2),
699
	0x00000000,
700
	(0x0e00 << 16) | (0x3c214 >> 2),
701
	0x00000000,
702
	(0x0e00 << 16) | (0x3c218 >> 2),
703
	0x00000000,
704
	(0x0e00 << 16) | (0x8904 >> 2),
705
	0x00000000,
706
	0x5,
707
	(0x0e00 << 16) | (0x8c28 >> 2),
708
	(0x0e00 << 16) | (0x8c2c >> 2),
709
	(0x0e00 << 16) | (0x8c30 >> 2),
710
	(0x0e00 << 16) | (0x8c34 >> 2),
711
	(0x0e00 << 16) | (0x9600 >> 2),
712
};
713
 
714
static const u32 kalindi_rlc_save_restore_register_list[] =
715
{
716
	(0x0e00 << 16) | (0xc12c >> 2),
717
	0x00000000,
718
	(0x0e00 << 16) | (0xc140 >> 2),
719
	0x00000000,
720
	(0x0e00 << 16) | (0xc150 >> 2),
721
	0x00000000,
722
	(0x0e00 << 16) | (0xc15c >> 2),
723
	0x00000000,
724
	(0x0e00 << 16) | (0xc168 >> 2),
725
	0x00000000,
726
	(0x0e00 << 16) | (0xc170 >> 2),
727
	0x00000000,
728
	(0x0e00 << 16) | (0xc204 >> 2),
729
	0x00000000,
730
	(0x0e00 << 16) | (0xc2b4 >> 2),
731
	0x00000000,
732
	(0x0e00 << 16) | (0xc2b8 >> 2),
733
	0x00000000,
734
	(0x0e00 << 16) | (0xc2bc >> 2),
735
	0x00000000,
736
	(0x0e00 << 16) | (0xc2c0 >> 2),
737
	0x00000000,
738
	(0x0e00 << 16) | (0x8228 >> 2),
739
	0x00000000,
740
	(0x0e00 << 16) | (0x829c >> 2),
741
	0x00000000,
742
	(0x0e00 << 16) | (0x869c >> 2),
743
	0x00000000,
744
	(0x0600 << 16) | (0x98f4 >> 2),
745
	0x00000000,
746
	(0x0e00 << 16) | (0x98f8 >> 2),
747
	0x00000000,
748
	(0x0e00 << 16) | (0x9900 >> 2),
749
	0x00000000,
750
	(0x0e00 << 16) | (0xc260 >> 2),
751
	0x00000000,
752
	(0x0e00 << 16) | (0x90e8 >> 2),
753
	0x00000000,
754
	(0x0e00 << 16) | (0x3c000 >> 2),
755
	0x00000000,
756
	(0x0e00 << 16) | (0x3c00c >> 2),
757
	0x00000000,
758
	(0x0e00 << 16) | (0x8c1c >> 2),
759
	0x00000000,
760
	(0x0e00 << 16) | (0x9700 >> 2),
761
	0x00000000,
762
	(0x0e00 << 16) | (0xcd20 >> 2),
763
	0x00000000,
764
	(0x4e00 << 16) | (0xcd20 >> 2),
765
	0x00000000,
766
	(0x5e00 << 16) | (0xcd20 >> 2),
767
	0x00000000,
768
	(0x6e00 << 16) | (0xcd20 >> 2),
769
	0x00000000,
770
	(0x7e00 << 16) | (0xcd20 >> 2),
771
	0x00000000,
772
	(0x0e00 << 16) | (0x89bc >> 2),
773
	0x00000000,
774
	(0x0e00 << 16) | (0x8900 >> 2),
775
	0x00000000,
776
	0x3,
777
	(0x0e00 << 16) | (0xc130 >> 2),
778
	0x00000000,
779
	(0x0e00 << 16) | (0xc134 >> 2),
780
	0x00000000,
781
	(0x0e00 << 16) | (0xc1fc >> 2),
782
	0x00000000,
783
	(0x0e00 << 16) | (0xc208 >> 2),
784
	0x00000000,
785
	(0x0e00 << 16) | (0xc264 >> 2),
786
	0x00000000,
787
	(0x0e00 << 16) | (0xc268 >> 2),
788
	0x00000000,
789
	(0x0e00 << 16) | (0xc26c >> 2),
790
	0x00000000,
791
	(0x0e00 << 16) | (0xc270 >> 2),
792
	0x00000000,
793
	(0x0e00 << 16) | (0xc274 >> 2),
794
	0x00000000,
795
	(0x0e00 << 16) | (0xc28c >> 2),
796
	0x00000000,
797
	(0x0e00 << 16) | (0xc290 >> 2),
798
	0x00000000,
799
	(0x0e00 << 16) | (0xc294 >> 2),
800
	0x00000000,
801
	(0x0e00 << 16) | (0xc298 >> 2),
802
	0x00000000,
803
	(0x0e00 << 16) | (0xc2a0 >> 2),
804
	0x00000000,
805
	(0x0e00 << 16) | (0xc2a4 >> 2),
806
	0x00000000,
807
	(0x0e00 << 16) | (0xc2a8 >> 2),
808
	0x00000000,
809
	(0x0e00 << 16) | (0xc2ac >> 2),
810
	0x00000000,
811
	(0x0e00 << 16) | (0x301d0 >> 2),
812
	0x00000000,
813
	(0x0e00 << 16) | (0x30238 >> 2),
814
	0x00000000,
815
	(0x0e00 << 16) | (0x30250 >> 2),
816
	0x00000000,
817
	(0x0e00 << 16) | (0x30254 >> 2),
818
	0x00000000,
819
	(0x0e00 << 16) | (0x30258 >> 2),
820
	0x00000000,
821
	(0x0e00 << 16) | (0x3025c >> 2),
822
	0x00000000,
823
	(0x4e00 << 16) | (0xc900 >> 2),
824
	0x00000000,
825
	(0x5e00 << 16) | (0xc900 >> 2),
826
	0x00000000,
827
	(0x6e00 << 16) | (0xc900 >> 2),
828
	0x00000000,
829
	(0x7e00 << 16) | (0xc900 >> 2),
830
	0x00000000,
831
	(0x4e00 << 16) | (0xc904 >> 2),
832
	0x00000000,
833
	(0x5e00 << 16) | (0xc904 >> 2),
834
	0x00000000,
835
	(0x6e00 << 16) | (0xc904 >> 2),
836
	0x00000000,
837
	(0x7e00 << 16) | (0xc904 >> 2),
838
	0x00000000,
839
	(0x4e00 << 16) | (0xc908 >> 2),
840
	0x00000000,
841
	(0x5e00 << 16) | (0xc908 >> 2),
842
	0x00000000,
843
	(0x6e00 << 16) | (0xc908 >> 2),
844
	0x00000000,
845
	(0x7e00 << 16) | (0xc908 >> 2),
846
	0x00000000,
847
	(0x4e00 << 16) | (0xc90c >> 2),
848
	0x00000000,
849
	(0x5e00 << 16) | (0xc90c >> 2),
850
	0x00000000,
851
	(0x6e00 << 16) | (0xc90c >> 2),
852
	0x00000000,
853
	(0x7e00 << 16) | (0xc90c >> 2),
854
	0x00000000,
855
	(0x4e00 << 16) | (0xc910 >> 2),
856
	0x00000000,
857
	(0x5e00 << 16) | (0xc910 >> 2),
858
	0x00000000,
859
	(0x6e00 << 16) | (0xc910 >> 2),
860
	0x00000000,
861
	(0x7e00 << 16) | (0xc910 >> 2),
862
	0x00000000,
863
	(0x0e00 << 16) | (0xc99c >> 2),
864
	0x00000000,
865
	(0x0e00 << 16) | (0x9834 >> 2),
866
	0x00000000,
867
	(0x0000 << 16) | (0x30f00 >> 2),
868
	0x00000000,
869
	(0x0000 << 16) | (0x30f04 >> 2),
870
	0x00000000,
871
	(0x0000 << 16) | (0x30f08 >> 2),
872
	0x00000000,
873
	(0x0000 << 16) | (0x30f0c >> 2),
874
	0x00000000,
875
	(0x0600 << 16) | (0x9b7c >> 2),
876
	0x00000000,
877
	(0x0e00 << 16) | (0x8a14 >> 2),
878
	0x00000000,
879
	(0x0e00 << 16) | (0x8a18 >> 2),
880
	0x00000000,
881
	(0x0600 << 16) | (0x30a00 >> 2),
882
	0x00000000,
883
	(0x0e00 << 16) | (0x8bf0 >> 2),
884
	0x00000000,
885
	(0x0e00 << 16) | (0x8bcc >> 2),
886
	0x00000000,
887
	(0x0e00 << 16) | (0x8b24 >> 2),
888
	0x00000000,
889
	(0x0e00 << 16) | (0x30a04 >> 2),
890
	0x00000000,
891
	(0x0600 << 16) | (0x30a10 >> 2),
892
	0x00000000,
893
	(0x0600 << 16) | (0x30a14 >> 2),
894
	0x00000000,
895
	(0x0600 << 16) | (0x30a18 >> 2),
896
	0x00000000,
897
	(0x0600 << 16) | (0x30a2c >> 2),
898
	0x00000000,
899
	(0x0e00 << 16) | (0xc700 >> 2),
900
	0x00000000,
901
	(0x0e00 << 16) | (0xc704 >> 2),
902
	0x00000000,
903
	(0x0e00 << 16) | (0xc708 >> 2),
904
	0x00000000,
905
	(0x0e00 << 16) | (0xc768 >> 2),
906
	0x00000000,
907
	(0x0400 << 16) | (0xc770 >> 2),
908
	0x00000000,
909
	(0x0400 << 16) | (0xc774 >> 2),
910
	0x00000000,
911
	(0x0400 << 16) | (0xc798 >> 2),
912
	0x00000000,
913
	(0x0400 << 16) | (0xc79c >> 2),
914
	0x00000000,
915
	(0x0e00 << 16) | (0x9100 >> 2),
916
	0x00000000,
917
	(0x0e00 << 16) | (0x3c010 >> 2),
918
	0x00000000,
919
	(0x0e00 << 16) | (0x8c00 >> 2),
920
	0x00000000,
921
	(0x0e00 << 16) | (0x8c04 >> 2),
922
	0x00000000,
923
	(0x0e00 << 16) | (0x8c20 >> 2),
924
	0x00000000,
925
	(0x0e00 << 16) | (0x8c38 >> 2),
926
	0x00000000,
927
	(0x0e00 << 16) | (0x8c3c >> 2),
928
	0x00000000,
929
	(0x0e00 << 16) | (0xae00 >> 2),
930
	0x00000000,
931
	(0x0e00 << 16) | (0x9604 >> 2),
932
	0x00000000,
933
	(0x0e00 << 16) | (0xac08 >> 2),
934
	0x00000000,
935
	(0x0e00 << 16) | (0xac0c >> 2),
936
	0x00000000,
937
	(0x0e00 << 16) | (0xac10 >> 2),
938
	0x00000000,
939
	(0x0e00 << 16) | (0xac14 >> 2),
940
	0x00000000,
941
	(0x0e00 << 16) | (0xac58 >> 2),
942
	0x00000000,
943
	(0x0e00 << 16) | (0xac68 >> 2),
944
	0x00000000,
945
	(0x0e00 << 16) | (0xac6c >> 2),
946
	0x00000000,
947
	(0x0e00 << 16) | (0xac70 >> 2),
948
	0x00000000,
949
	(0x0e00 << 16) | (0xac74 >> 2),
950
	0x00000000,
951
	(0x0e00 << 16) | (0xac78 >> 2),
952
	0x00000000,
953
	(0x0e00 << 16) | (0xac7c >> 2),
954
	0x00000000,
955
	(0x0e00 << 16) | (0xac80 >> 2),
956
	0x00000000,
957
	(0x0e00 << 16) | (0xac84 >> 2),
958
	0x00000000,
959
	(0x0e00 << 16) | (0xac88 >> 2),
960
	0x00000000,
961
	(0x0e00 << 16) | (0xac8c >> 2),
962
	0x00000000,
963
	(0x0e00 << 16) | (0x970c >> 2),
964
	0x00000000,
965
	(0x0e00 << 16) | (0x9714 >> 2),
966
	0x00000000,
967
	(0x0e00 << 16) | (0x9718 >> 2),
968
	0x00000000,
969
	(0x0e00 << 16) | (0x971c >> 2),
970
	0x00000000,
971
	(0x0e00 << 16) | (0x31068 >> 2),
972
	0x00000000,
973
	(0x4e00 << 16) | (0x31068 >> 2),
974
	0x00000000,
975
	(0x5e00 << 16) | (0x31068 >> 2),
976
	0x00000000,
977
	(0x6e00 << 16) | (0x31068 >> 2),
978
	0x00000000,
979
	(0x7e00 << 16) | (0x31068 >> 2),
980
	0x00000000,
981
	(0x0e00 << 16) | (0xcd10 >> 2),
982
	0x00000000,
983
	(0x0e00 << 16) | (0xcd14 >> 2),
984
	0x00000000,
985
	(0x0e00 << 16) | (0x88b0 >> 2),
986
	0x00000000,
987
	(0x0e00 << 16) | (0x88b4 >> 2),
988
	0x00000000,
989
	(0x0e00 << 16) | (0x88b8 >> 2),
990
	0x00000000,
991
	(0x0e00 << 16) | (0x88bc >> 2),
992
	0x00000000,
993
	(0x0400 << 16) | (0x89c0 >> 2),
994
	0x00000000,
995
	(0x0e00 << 16) | (0x88c4 >> 2),
996
	0x00000000,
997
	(0x0e00 << 16) | (0x88c8 >> 2),
998
	0x00000000,
999
	(0x0e00 << 16) | (0x88d0 >> 2),
1000
	0x00000000,
1001
	(0x0e00 << 16) | (0x88d4 >> 2),
1002
	0x00000000,
1003
	(0x0e00 << 16) | (0x88d8 >> 2),
1004
	0x00000000,
1005
	(0x0e00 << 16) | (0x8980 >> 2),
1006
	0x00000000,
1007
	(0x0e00 << 16) | (0x30938 >> 2),
1008
	0x00000000,
1009
	(0x0e00 << 16) | (0x3093c >> 2),
1010
	0x00000000,
1011
	(0x0e00 << 16) | (0x30940 >> 2),
1012
	0x00000000,
1013
	(0x0e00 << 16) | (0x89a0 >> 2),
1014
	0x00000000,
1015
	(0x0e00 << 16) | (0x30900 >> 2),
1016
	0x00000000,
1017
	(0x0e00 << 16) | (0x30904 >> 2),
1018
	0x00000000,
1019
	(0x0e00 << 16) | (0x89b4 >> 2),
1020
	0x00000000,
1021
	(0x0e00 << 16) | (0x3e1fc >> 2),
1022
	0x00000000,
1023
	(0x0e00 << 16) | (0x3c210 >> 2),
1024
	0x00000000,
1025
	(0x0e00 << 16) | (0x3c214 >> 2),
1026
	0x00000000,
1027
	(0x0e00 << 16) | (0x3c218 >> 2),
1028
	0x00000000,
1029
	(0x0e00 << 16) | (0x8904 >> 2),
1030
	0x00000000,
1031
	0x5,
1032
	(0x0e00 << 16) | (0x8c28 >> 2),
1033
	(0x0e00 << 16) | (0x8c2c >> 2),
1034
	(0x0e00 << 16) | (0x8c30 >> 2),
1035
	(0x0e00 << 16) | (0x8c34 >> 2),
1036
	(0x0e00 << 16) | (0x9600 >> 2),
1037
};
1038
 
1039
static const u32 bonaire_golden_spm_registers[] =
1040
{
1041
	0x30800, 0xe0ffffff, 0xe0000000
1042
};
1043
 
1044
static const u32 bonaire_golden_common_registers[] =
1045
{
1046
	0xc770, 0xffffffff, 0x00000800,
1047
	0xc774, 0xffffffff, 0x00000800,
1048
	0xc798, 0xffffffff, 0x00007fbf,
1049
	0xc79c, 0xffffffff, 0x00007faf
1050
};
1051
 
1052
static const u32 bonaire_golden_registers[] =
1053
{
1054
	0x3354, 0x00000333, 0x00000333,
1055
	0x3350, 0x000c0fc0, 0x00040200,
1056
	0x9a10, 0x00010000, 0x00058208,
1057
	0x3c000, 0xffff1fff, 0x00140000,
1058
	0x3c200, 0xfdfc0fff, 0x00000100,
1059
	0x3c234, 0x40000000, 0x40000200,
1060
	0x9830, 0xffffffff, 0x00000000,
1061
	0x9834, 0xf00fffff, 0x00000400,
1062
	0x9838, 0x0002021c, 0x00020200,
1063
	0xc78, 0x00000080, 0x00000000,
1064
	0x5bb0, 0x000000f0, 0x00000070,
1065
	0x5bc0, 0xf0311fff, 0x80300000,
1066
	0x98f8, 0x73773777, 0x12010001,
1067
	0x350c, 0x00810000, 0x408af000,
1068
	0x7030, 0x31000111, 0x00000011,
1069
	0x2f48, 0x73773777, 0x12010001,
1070
	0x220c, 0x00007fb6, 0x0021a1b1,
1071
	0x2210, 0x00007fb6, 0x002021b1,
1072
	0x2180, 0x00007fb6, 0x00002191,
1073
	0x2218, 0x00007fb6, 0x002121b1,
1074
	0x221c, 0x00007fb6, 0x002021b1,
1075
	0x21dc, 0x00007fb6, 0x00002191,
1076
	0x21e0, 0x00007fb6, 0x00002191,
1077
	0x3628, 0x0000003f, 0x0000000a,
1078
	0x362c, 0x0000003f, 0x0000000a,
1079
	0x2ae4, 0x00073ffe, 0x000022a2,
1080
	0x240c, 0x000007ff, 0x00000000,
1081
	0x8a14, 0xf000003f, 0x00000007,
1082
	0x8bf0, 0x00002001, 0x00000001,
1083
	0x8b24, 0xffffffff, 0x00ffffff,
1084
	0x30a04, 0x0000ff0f, 0x00000000,
1085
	0x28a4c, 0x07ffffff, 0x06000000,
1086
	0x4d8, 0x00000fff, 0x00000100,
1087
	0x3e78, 0x00000001, 0x00000002,
1088
	0x9100, 0x03000000, 0x0362c688,
1089
	0x8c00, 0x000000ff, 0x00000001,
1090
	0xe40, 0x00001fff, 0x00001fff,
1091
	0x9060, 0x0000007f, 0x00000020,
1092
	0x9508, 0x00010000, 0x00010000,
1093
	0xac14, 0x000003ff, 0x000000f3,
1094
	0xac0c, 0xffffffff, 0x00001032
1095
};
1096
 
1097
static const u32 bonaire_mgcg_cgcg_init[] =
1098
{
1099
	0xc420, 0xffffffff, 0xfffffffc,
1100
	0x30800, 0xffffffff, 0xe0000000,
1101
	0x3c2a0, 0xffffffff, 0x00000100,
1102
	0x3c208, 0xffffffff, 0x00000100,
1103
	0x3c2c0, 0xffffffff, 0xc0000100,
1104
	0x3c2c8, 0xffffffff, 0xc0000100,
1105
	0x3c2c4, 0xffffffff, 0xc0000100,
1106
	0x55e4, 0xffffffff, 0x00600100,
1107
	0x3c280, 0xffffffff, 0x00000100,
1108
	0x3c214, 0xffffffff, 0x06000100,
1109
	0x3c220, 0xffffffff, 0x00000100,
1110
	0x3c218, 0xffffffff, 0x06000100,
1111
	0x3c204, 0xffffffff, 0x00000100,
1112
	0x3c2e0, 0xffffffff, 0x00000100,
1113
	0x3c224, 0xffffffff, 0x00000100,
1114
	0x3c200, 0xffffffff, 0x00000100,
1115
	0x3c230, 0xffffffff, 0x00000100,
1116
	0x3c234, 0xffffffff, 0x00000100,
1117
	0x3c250, 0xffffffff, 0x00000100,
1118
	0x3c254, 0xffffffff, 0x00000100,
1119
	0x3c258, 0xffffffff, 0x00000100,
1120
	0x3c25c, 0xffffffff, 0x00000100,
1121
	0x3c260, 0xffffffff, 0x00000100,
1122
	0x3c27c, 0xffffffff, 0x00000100,
1123
	0x3c278, 0xffffffff, 0x00000100,
1124
	0x3c210, 0xffffffff, 0x06000100,
1125
	0x3c290, 0xffffffff, 0x00000100,
1126
	0x3c274, 0xffffffff, 0x00000100,
1127
	0x3c2b4, 0xffffffff, 0x00000100,
1128
	0x3c2b0, 0xffffffff, 0x00000100,
1129
	0x3c270, 0xffffffff, 0x00000100,
1130
	0x30800, 0xffffffff, 0xe0000000,
1131
	0x3c020, 0xffffffff, 0x00010000,
1132
	0x3c024, 0xffffffff, 0x00030002,
1133
	0x3c028, 0xffffffff, 0x00040007,
1134
	0x3c02c, 0xffffffff, 0x00060005,
1135
	0x3c030, 0xffffffff, 0x00090008,
1136
	0x3c034, 0xffffffff, 0x00010000,
1137
	0x3c038, 0xffffffff, 0x00030002,
1138
	0x3c03c, 0xffffffff, 0x00040007,
1139
	0x3c040, 0xffffffff, 0x00060005,
1140
	0x3c044, 0xffffffff, 0x00090008,
1141
	0x3c048, 0xffffffff, 0x00010000,
1142
	0x3c04c, 0xffffffff, 0x00030002,
1143
	0x3c050, 0xffffffff, 0x00040007,
1144
	0x3c054, 0xffffffff, 0x00060005,
1145
	0x3c058, 0xffffffff, 0x00090008,
1146
	0x3c05c, 0xffffffff, 0x00010000,
1147
	0x3c060, 0xffffffff, 0x00030002,
1148
	0x3c064, 0xffffffff, 0x00040007,
1149
	0x3c068, 0xffffffff, 0x00060005,
1150
	0x3c06c, 0xffffffff, 0x00090008,
1151
	0x3c070, 0xffffffff, 0x00010000,
1152
	0x3c074, 0xffffffff, 0x00030002,
1153
	0x3c078, 0xffffffff, 0x00040007,
1154
	0x3c07c, 0xffffffff, 0x00060005,
1155
	0x3c080, 0xffffffff, 0x00090008,
1156
	0x3c084, 0xffffffff, 0x00010000,
1157
	0x3c088, 0xffffffff, 0x00030002,
1158
	0x3c08c, 0xffffffff, 0x00040007,
1159
	0x3c090, 0xffffffff, 0x00060005,
1160
	0x3c094, 0xffffffff, 0x00090008,
1161
	0x3c098, 0xffffffff, 0x00010000,
1162
	0x3c09c, 0xffffffff, 0x00030002,
1163
	0x3c0a0, 0xffffffff, 0x00040007,
1164
	0x3c0a4, 0xffffffff, 0x00060005,
1165
	0x3c0a8, 0xffffffff, 0x00090008,
1166
	0x3c000, 0xffffffff, 0x96e00200,
1167
	0x8708, 0xffffffff, 0x00900100,
1168
	0xc424, 0xffffffff, 0x0020003f,
1169
	0x38, 0xffffffff, 0x0140001c,
1170
	0x3c, 0x000f0000, 0x000f0000,
1171
	0x220, 0xffffffff, 0xC060000C,
1172
	0x224, 0xc0000fff, 0x00000100,
1173
	0xf90, 0xffffffff, 0x00000100,
1174
	0xf98, 0x00000101, 0x00000000,
1175
	0x20a8, 0xffffffff, 0x00000104,
1176
	0x55e4, 0xff000fff, 0x00000100,
1177
	0x30cc, 0xc0000fff, 0x00000104,
1178
	0xc1e4, 0x00000001, 0x00000001,
1179
	0xd00c, 0xff000ff0, 0x00000100,
1180
	0xd80c, 0xff000ff0, 0x00000100
1181
};
1182
 
1183
static const u32 spectre_golden_spm_registers[] =
1184
{
1185
	0x30800, 0xe0ffffff, 0xe0000000
1186
};
1187
 
1188
static const u32 spectre_golden_common_registers[] =
1189
{
1190
	0xc770, 0xffffffff, 0x00000800,
1191
	0xc774, 0xffffffff, 0x00000800,
1192
	0xc798, 0xffffffff, 0x00007fbf,
1193
	0xc79c, 0xffffffff, 0x00007faf
1194
};
1195
 
1196
static const u32 spectre_golden_registers[] =
1197
{
1198
	0x3c000, 0xffff1fff, 0x96940200,
1199
	0x3c00c, 0xffff0001, 0xff000000,
1200
	0x3c200, 0xfffc0fff, 0x00000100,
1201
	0x6ed8, 0x00010101, 0x00010000,
1202
	0x9834, 0xf00fffff, 0x00000400,
1203
	0x9838, 0xfffffffc, 0x00020200,
1204
	0x5bb0, 0x000000f0, 0x00000070,
1205
	0x5bc0, 0xf0311fff, 0x80300000,
1206
	0x98f8, 0x73773777, 0x12010001,
1207
	0x9b7c, 0x00ff0000, 0x00fc0000,
1208
	0x2f48, 0x73773777, 0x12010001,
1209
	0x8a14, 0xf000003f, 0x00000007,
1210
	0x8b24, 0xffffffff, 0x00ffffff,
1211
	0x28350, 0x3f3f3fff, 0x00000082,
1212
	0x28354, 0x0000003f, 0x00000000,
1213
	0x3e78, 0x00000001, 0x00000002,
1214
	0x913c, 0xffff03df, 0x00000004,
1215
	0xc768, 0x00000008, 0x00000008,
1216
	0x8c00, 0x000008ff, 0x00000800,
1217
	0x9508, 0x00010000, 0x00010000,
1218
	0xac0c, 0xffffffff, 0x54763210,
1219
	0x214f8, 0x01ff01ff, 0x00000002,
1220
	0x21498, 0x007ff800, 0x00200000,
1221
	0x2015c, 0xffffffff, 0x00000f40,
1222
	0x30934, 0xffffffff, 0x00000001
1223
};
1224
 
1225
static const u32 spectre_mgcg_cgcg_init[] =
1226
{
1227
	0xc420, 0xffffffff, 0xfffffffc,
1228
	0x30800, 0xffffffff, 0xe0000000,
1229
	0x3c2a0, 0xffffffff, 0x00000100,
1230
	0x3c208, 0xffffffff, 0x00000100,
1231
	0x3c2c0, 0xffffffff, 0x00000100,
1232
	0x3c2c8, 0xffffffff, 0x00000100,
1233
	0x3c2c4, 0xffffffff, 0x00000100,
1234
	0x55e4, 0xffffffff, 0x00600100,
1235
	0x3c280, 0xffffffff, 0x00000100,
1236
	0x3c214, 0xffffffff, 0x06000100,
1237
	0x3c220, 0xffffffff, 0x00000100,
1238
	0x3c218, 0xffffffff, 0x06000100,
1239
	0x3c204, 0xffffffff, 0x00000100,
1240
	0x3c2e0, 0xffffffff, 0x00000100,
1241
	0x3c224, 0xffffffff, 0x00000100,
1242
	0x3c200, 0xffffffff, 0x00000100,
1243
	0x3c230, 0xffffffff, 0x00000100,
1244
	0x3c234, 0xffffffff, 0x00000100,
1245
	0x3c250, 0xffffffff, 0x00000100,
1246
	0x3c254, 0xffffffff, 0x00000100,
1247
	0x3c258, 0xffffffff, 0x00000100,
1248
	0x3c25c, 0xffffffff, 0x00000100,
1249
	0x3c260, 0xffffffff, 0x00000100,
1250
	0x3c27c, 0xffffffff, 0x00000100,
1251
	0x3c278, 0xffffffff, 0x00000100,
1252
	0x3c210, 0xffffffff, 0x06000100,
1253
	0x3c290, 0xffffffff, 0x00000100,
1254
	0x3c274, 0xffffffff, 0x00000100,
1255
	0x3c2b4, 0xffffffff, 0x00000100,
1256
	0x3c2b0, 0xffffffff, 0x00000100,
1257
	0x3c270, 0xffffffff, 0x00000100,
1258
	0x30800, 0xffffffff, 0xe0000000,
1259
	0x3c020, 0xffffffff, 0x00010000,
1260
	0x3c024, 0xffffffff, 0x00030002,
1261
	0x3c028, 0xffffffff, 0x00040007,
1262
	0x3c02c, 0xffffffff, 0x00060005,
1263
	0x3c030, 0xffffffff, 0x00090008,
1264
	0x3c034, 0xffffffff, 0x00010000,
1265
	0x3c038, 0xffffffff, 0x00030002,
1266
	0x3c03c, 0xffffffff, 0x00040007,
1267
	0x3c040, 0xffffffff, 0x00060005,
1268
	0x3c044, 0xffffffff, 0x00090008,
1269
	0x3c048, 0xffffffff, 0x00010000,
1270
	0x3c04c, 0xffffffff, 0x00030002,
1271
	0x3c050, 0xffffffff, 0x00040007,
1272
	0x3c054, 0xffffffff, 0x00060005,
1273
	0x3c058, 0xffffffff, 0x00090008,
1274
	0x3c05c, 0xffffffff, 0x00010000,
1275
	0x3c060, 0xffffffff, 0x00030002,
1276
	0x3c064, 0xffffffff, 0x00040007,
1277
	0x3c068, 0xffffffff, 0x00060005,
1278
	0x3c06c, 0xffffffff, 0x00090008,
1279
	0x3c070, 0xffffffff, 0x00010000,
1280
	0x3c074, 0xffffffff, 0x00030002,
1281
	0x3c078, 0xffffffff, 0x00040007,
1282
	0x3c07c, 0xffffffff, 0x00060005,
1283
	0x3c080, 0xffffffff, 0x00090008,
1284
	0x3c084, 0xffffffff, 0x00010000,
1285
	0x3c088, 0xffffffff, 0x00030002,
1286
	0x3c08c, 0xffffffff, 0x00040007,
1287
	0x3c090, 0xffffffff, 0x00060005,
1288
	0x3c094, 0xffffffff, 0x00090008,
1289
	0x3c098, 0xffffffff, 0x00010000,
1290
	0x3c09c, 0xffffffff, 0x00030002,
1291
	0x3c0a0, 0xffffffff, 0x00040007,
1292
	0x3c0a4, 0xffffffff, 0x00060005,
1293
	0x3c0a8, 0xffffffff, 0x00090008,
1294
	0x3c0ac, 0xffffffff, 0x00010000,
1295
	0x3c0b0, 0xffffffff, 0x00030002,
1296
	0x3c0b4, 0xffffffff, 0x00040007,
1297
	0x3c0b8, 0xffffffff, 0x00060005,
1298
	0x3c0bc, 0xffffffff, 0x00090008,
1299
	0x3c000, 0xffffffff, 0x96e00200,
1300
	0x8708, 0xffffffff, 0x00900100,
1301
	0xc424, 0xffffffff, 0x0020003f,
1302
	0x38, 0xffffffff, 0x0140001c,
1303
	0x3c, 0x000f0000, 0x000f0000,
1304
	0x220, 0xffffffff, 0xC060000C,
1305
	0x224, 0xc0000fff, 0x00000100,
1306
	0xf90, 0xffffffff, 0x00000100,
1307
	0xf98, 0x00000101, 0x00000000,
1308
	0x20a8, 0xffffffff, 0x00000104,
1309
	0x55e4, 0xff000fff, 0x00000100,
1310
	0x30cc, 0xc0000fff, 0x00000104,
1311
	0xc1e4, 0x00000001, 0x00000001,
1312
	0xd00c, 0xff000ff0, 0x00000100,
1313
	0xd80c, 0xff000ff0, 0x00000100
1314
};
1315
 
1316
static const u32 kalindi_golden_spm_registers[] =
1317
{
1318
	0x30800, 0xe0ffffff, 0xe0000000
1319
};
1320
 
1321
static const u32 kalindi_golden_common_registers[] =
1322
{
1323
	0xc770, 0xffffffff, 0x00000800,
1324
	0xc774, 0xffffffff, 0x00000800,
1325
	0xc798, 0xffffffff, 0x00007fbf,
1326
	0xc79c, 0xffffffff, 0x00007faf
1327
};
1328
 
1329
static const u32 kalindi_golden_registers[] =
1330
{
1331
	0x3c000, 0xffffdfff, 0x6e944040,
1332
	0x55e4, 0xff607fff, 0xfc000100,
1333
	0x3c220, 0xff000fff, 0x00000100,
1334
	0x3c224, 0xff000fff, 0x00000100,
1335
	0x3c200, 0xfffc0fff, 0x00000100,
1336
	0x6ed8, 0x00010101, 0x00010000,
1337
	0x9830, 0xffffffff, 0x00000000,
1338
	0x9834, 0xf00fffff, 0x00000400,
1339
	0x5bb0, 0x000000f0, 0x00000070,
1340
	0x5bc0, 0xf0311fff, 0x80300000,
1341
	0x98f8, 0x73773777, 0x12010001,
1342
	0x98fc, 0xffffffff, 0x00000010,
1343
	0x9b7c, 0x00ff0000, 0x00fc0000,
1344
	0x8030, 0x00001f0f, 0x0000100a,
1345
	0x2f48, 0x73773777, 0x12010001,
1346
	0x2408, 0x000fffff, 0x000c007f,
1347
	0x8a14, 0xf000003f, 0x00000007,
1348
	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349
	0x30a04, 0x0000ff0f, 0x00000000,
1350
	0x28a4c, 0x07ffffff, 0x06000000,
1351
	0x4d8, 0x00000fff, 0x00000100,
1352
	0x3e78, 0x00000001, 0x00000002,
1353
	0xc768, 0x00000008, 0x00000008,
1354
	0x8c00, 0x000000ff, 0x00000003,
1355
	0x214f8, 0x01ff01ff, 0x00000002,
1356
	0x21498, 0x007ff800, 0x00200000,
1357
	0x2015c, 0xffffffff, 0x00000f40,
1358
	0x88c4, 0x001f3ae3, 0x00000082,
1359
	0x88d4, 0x0000001f, 0x00000010,
1360
	0x30934, 0xffffffff, 0x00000000
1361
};
1362
 
1363
static const u32 kalindi_mgcg_cgcg_init[] =
1364
{
1365
	0xc420, 0xffffffff, 0xfffffffc,
1366
	0x30800, 0xffffffff, 0xe0000000,
1367
	0x3c2a0, 0xffffffff, 0x00000100,
1368
	0x3c208, 0xffffffff, 0x00000100,
1369
	0x3c2c0, 0xffffffff, 0x00000100,
1370
	0x3c2c8, 0xffffffff, 0x00000100,
1371
	0x3c2c4, 0xffffffff, 0x00000100,
1372
	0x55e4, 0xffffffff, 0x00600100,
1373
	0x3c280, 0xffffffff, 0x00000100,
1374
	0x3c214, 0xffffffff, 0x06000100,
1375
	0x3c220, 0xffffffff, 0x00000100,
1376
	0x3c218, 0xffffffff, 0x06000100,
1377
	0x3c204, 0xffffffff, 0x00000100,
1378
	0x3c2e0, 0xffffffff, 0x00000100,
1379
	0x3c224, 0xffffffff, 0x00000100,
1380
	0x3c200, 0xffffffff, 0x00000100,
1381
	0x3c230, 0xffffffff, 0x00000100,
1382
	0x3c234, 0xffffffff, 0x00000100,
1383
	0x3c250, 0xffffffff, 0x00000100,
1384
	0x3c254, 0xffffffff, 0x00000100,
1385
	0x3c258, 0xffffffff, 0x00000100,
1386
	0x3c25c, 0xffffffff, 0x00000100,
1387
	0x3c260, 0xffffffff, 0x00000100,
1388
	0x3c27c, 0xffffffff, 0x00000100,
1389
	0x3c278, 0xffffffff, 0x00000100,
1390
	0x3c210, 0xffffffff, 0x06000100,
1391
	0x3c290, 0xffffffff, 0x00000100,
1392
	0x3c274, 0xffffffff, 0x00000100,
1393
	0x3c2b4, 0xffffffff, 0x00000100,
1394
	0x3c2b0, 0xffffffff, 0x00000100,
1395
	0x3c270, 0xffffffff, 0x00000100,
1396
	0x30800, 0xffffffff, 0xe0000000,
1397
	0x3c020, 0xffffffff, 0x00010000,
1398
	0x3c024, 0xffffffff, 0x00030002,
1399
	0x3c028, 0xffffffff, 0x00040007,
1400
	0x3c02c, 0xffffffff, 0x00060005,
1401
	0x3c030, 0xffffffff, 0x00090008,
1402
	0x3c034, 0xffffffff, 0x00010000,
1403
	0x3c038, 0xffffffff, 0x00030002,
1404
	0x3c03c, 0xffffffff, 0x00040007,
1405
	0x3c040, 0xffffffff, 0x00060005,
1406
	0x3c044, 0xffffffff, 0x00090008,
1407
	0x3c000, 0xffffffff, 0x96e00200,
1408
	0x8708, 0xffffffff, 0x00900100,
1409
	0xc424, 0xffffffff, 0x0020003f,
1410
	0x38, 0xffffffff, 0x0140001c,
1411
	0x3c, 0x000f0000, 0x000f0000,
1412
	0x220, 0xffffffff, 0xC060000C,
1413
	0x224, 0xc0000fff, 0x00000100,
1414
	0x20a8, 0xffffffff, 0x00000104,
1415
	0x55e4, 0xff000fff, 0x00000100,
1416
	0x30cc, 0xc0000fff, 0x00000104,
1417
	0xc1e4, 0x00000001, 0x00000001,
1418
	0xd00c, 0xff000ff0, 0x00000100,
1419
	0xd80c, 0xff000ff0, 0x00000100
1420
};
1421
 
1422
static const u32 hawaii_golden_spm_registers[] =
1423
{
1424
	0x30800, 0xe0ffffff, 0xe0000000
1425
};
1426
 
1427
static const u32 hawaii_golden_common_registers[] =
1428
{
1429
	0x30800, 0xffffffff, 0xe0000000,
1430
	0x28350, 0xffffffff, 0x3a00161a,
1431
	0x28354, 0xffffffff, 0x0000002e,
1432
	0x9a10, 0xffffffff, 0x00018208,
1433
	0x98f8, 0xffffffff, 0x12011003
1434
};
1435
 
1436
static const u32 hawaii_golden_registers[] =
1437
{
1438
	0x3354, 0x00000333, 0x00000333,
1439
	0x9a10, 0x00010000, 0x00058208,
1440
	0x9830, 0xffffffff, 0x00000000,
1441
	0x9834, 0xf00fffff, 0x00000400,
1442
	0x9838, 0x0002021c, 0x00020200,
1443
	0xc78, 0x00000080, 0x00000000,
1444
	0x5bb0, 0x000000f0, 0x00000070,
1445
	0x5bc0, 0xf0311fff, 0x80300000,
1446
	0x350c, 0x00810000, 0x408af000,
1447
	0x7030, 0x31000111, 0x00000011,
1448
	0x2f48, 0x73773777, 0x12010001,
1449
	0x2120, 0x0000007f, 0x0000001b,
1450
	0x21dc, 0x00007fb6, 0x00002191,
1451
	0x3628, 0x0000003f, 0x0000000a,
1452
	0x362c, 0x0000003f, 0x0000000a,
1453
	0x2ae4, 0x00073ffe, 0x000022a2,
1454
	0x240c, 0x000007ff, 0x00000000,
1455
	0x8bf0, 0x00002001, 0x00000001,
1456
	0x8b24, 0xffffffff, 0x00ffffff,
1457
	0x30a04, 0x0000ff0f, 0x00000000,
1458
	0x28a4c, 0x07ffffff, 0x06000000,
1459
	0x3e78, 0x00000001, 0x00000002,
1460
	0xc768, 0x00000008, 0x00000008,
1461
	0xc770, 0x00000f00, 0x00000800,
1462
	0xc774, 0x00000f00, 0x00000800,
1463
	0xc798, 0x00ffffff, 0x00ff7fbf,
1464
	0xc79c, 0x00ffffff, 0x00ff7faf,
1465
	0x8c00, 0x000000ff, 0x00000800,
1466
	0xe40, 0x00001fff, 0x00001fff,
1467
	0x9060, 0x0000007f, 0x00000020,
1468
	0x9508, 0x00010000, 0x00010000,
1469
	0xae00, 0x00100000, 0x000ff07c,
1470
	0xac14, 0x000003ff, 0x0000000f,
1471
	0xac10, 0xffffffff, 0x7564fdec,
1472
	0xac0c, 0xffffffff, 0x3120b9a8,
1473
	0xac08, 0x20000000, 0x0f9c0000
1474
};
1475
 
1476
static const u32 hawaii_mgcg_cgcg_init[] =
1477
{
1478
	0xc420, 0xffffffff, 0xfffffffd,
1479
	0x30800, 0xffffffff, 0xe0000000,
1480
	0x3c2a0, 0xffffffff, 0x00000100,
1481
	0x3c208, 0xffffffff, 0x00000100,
1482
	0x3c2c0, 0xffffffff, 0x00000100,
1483
	0x3c2c8, 0xffffffff, 0x00000100,
1484
	0x3c2c4, 0xffffffff, 0x00000100,
1485
	0x55e4, 0xffffffff, 0x00200100,
1486
	0x3c280, 0xffffffff, 0x00000100,
1487
	0x3c214, 0xffffffff, 0x06000100,
1488
	0x3c220, 0xffffffff, 0x00000100,
1489
	0x3c218, 0xffffffff, 0x06000100,
1490
	0x3c204, 0xffffffff, 0x00000100,
1491
	0x3c2e0, 0xffffffff, 0x00000100,
1492
	0x3c224, 0xffffffff, 0x00000100,
1493
	0x3c200, 0xffffffff, 0x00000100,
1494
	0x3c230, 0xffffffff, 0x00000100,
1495
	0x3c234, 0xffffffff, 0x00000100,
1496
	0x3c250, 0xffffffff, 0x00000100,
1497
	0x3c254, 0xffffffff, 0x00000100,
1498
	0x3c258, 0xffffffff, 0x00000100,
1499
	0x3c25c, 0xffffffff, 0x00000100,
1500
	0x3c260, 0xffffffff, 0x00000100,
1501
	0x3c27c, 0xffffffff, 0x00000100,
1502
	0x3c278, 0xffffffff, 0x00000100,
1503
	0x3c210, 0xffffffff, 0x06000100,
1504
	0x3c290, 0xffffffff, 0x00000100,
1505
	0x3c274, 0xffffffff, 0x00000100,
1506
	0x3c2b4, 0xffffffff, 0x00000100,
1507
	0x3c2b0, 0xffffffff, 0x00000100,
1508
	0x3c270, 0xffffffff, 0x00000100,
1509
	0x30800, 0xffffffff, 0xe0000000,
1510
	0x3c020, 0xffffffff, 0x00010000,
1511
	0x3c024, 0xffffffff, 0x00030002,
1512
	0x3c028, 0xffffffff, 0x00040007,
1513
	0x3c02c, 0xffffffff, 0x00060005,
1514
	0x3c030, 0xffffffff, 0x00090008,
1515
	0x3c034, 0xffffffff, 0x00010000,
1516
	0x3c038, 0xffffffff, 0x00030002,
1517
	0x3c03c, 0xffffffff, 0x00040007,
1518
	0x3c040, 0xffffffff, 0x00060005,
1519
	0x3c044, 0xffffffff, 0x00090008,
1520
	0x3c048, 0xffffffff, 0x00010000,
1521
	0x3c04c, 0xffffffff, 0x00030002,
1522
	0x3c050, 0xffffffff, 0x00040007,
1523
	0x3c054, 0xffffffff, 0x00060005,
1524
	0x3c058, 0xffffffff, 0x00090008,
1525
	0x3c05c, 0xffffffff, 0x00010000,
1526
	0x3c060, 0xffffffff, 0x00030002,
1527
	0x3c064, 0xffffffff, 0x00040007,
1528
	0x3c068, 0xffffffff, 0x00060005,
1529
	0x3c06c, 0xffffffff, 0x00090008,
1530
	0x3c070, 0xffffffff, 0x00010000,
1531
	0x3c074, 0xffffffff, 0x00030002,
1532
	0x3c078, 0xffffffff, 0x00040007,
1533
	0x3c07c, 0xffffffff, 0x00060005,
1534
	0x3c080, 0xffffffff, 0x00090008,
1535
	0x3c084, 0xffffffff, 0x00010000,
1536
	0x3c088, 0xffffffff, 0x00030002,
1537
	0x3c08c, 0xffffffff, 0x00040007,
1538
	0x3c090, 0xffffffff, 0x00060005,
1539
	0x3c094, 0xffffffff, 0x00090008,
1540
	0x3c098, 0xffffffff, 0x00010000,
1541
	0x3c09c, 0xffffffff, 0x00030002,
1542
	0x3c0a0, 0xffffffff, 0x00040007,
1543
	0x3c0a4, 0xffffffff, 0x00060005,
1544
	0x3c0a8, 0xffffffff, 0x00090008,
1545
	0x3c0ac, 0xffffffff, 0x00010000,
1546
	0x3c0b0, 0xffffffff, 0x00030002,
1547
	0x3c0b4, 0xffffffff, 0x00040007,
1548
	0x3c0b8, 0xffffffff, 0x00060005,
1549
	0x3c0bc, 0xffffffff, 0x00090008,
1550
	0x3c0c0, 0xffffffff, 0x00010000,
1551
	0x3c0c4, 0xffffffff, 0x00030002,
1552
	0x3c0c8, 0xffffffff, 0x00040007,
1553
	0x3c0cc, 0xffffffff, 0x00060005,
1554
	0x3c0d0, 0xffffffff, 0x00090008,
1555
	0x3c0d4, 0xffffffff, 0x00010000,
1556
	0x3c0d8, 0xffffffff, 0x00030002,
1557
	0x3c0dc, 0xffffffff, 0x00040007,
1558
	0x3c0e0, 0xffffffff, 0x00060005,
1559
	0x3c0e4, 0xffffffff, 0x00090008,
1560
	0x3c0e8, 0xffffffff, 0x00010000,
1561
	0x3c0ec, 0xffffffff, 0x00030002,
1562
	0x3c0f0, 0xffffffff, 0x00040007,
1563
	0x3c0f4, 0xffffffff, 0x00060005,
1564
	0x3c0f8, 0xffffffff, 0x00090008,
1565
	0xc318, 0xffffffff, 0x00020200,
1566
	0x3350, 0xffffffff, 0x00000200,
1567
	0x15c0, 0xffffffff, 0x00000400,
1568
	0x55e8, 0xffffffff, 0x00000000,
1569
	0x2f50, 0xffffffff, 0x00000902,
1570
	0x3c000, 0xffffffff, 0x96940200,
1571
	0x8708, 0xffffffff, 0x00900100,
1572
	0xc424, 0xffffffff, 0x0020003f,
1573
	0x38, 0xffffffff, 0x0140001c,
1574
	0x3c, 0x000f0000, 0x000f0000,
1575
	0x220, 0xffffffff, 0xc060000c,
1576
	0x224, 0xc0000fff, 0x00000100,
1577
	0xf90, 0xffffffff, 0x00000100,
1578
	0xf98, 0x00000101, 0x00000000,
1579
	0x20a8, 0xffffffff, 0x00000104,
1580
	0x55e4, 0xff000fff, 0x00000100,
1581
	0x30cc, 0xc0000fff, 0x00000104,
1582
	0xc1e4, 0x00000001, 0x00000001,
1583
	0xd00c, 0xff000ff0, 0x00000100,
1584
	0xd80c, 0xff000ff0, 0x00000100
1585
};
1586
 
1587
static const u32 godavari_golden_registers[] =
1588
{
1589
	0x55e4, 0xff607fff, 0xfc000100,
1590
	0x6ed8, 0x00010101, 0x00010000,
1591
	0x9830, 0xffffffff, 0x00000000,
1592
	0x98302, 0xf00fffff, 0x00000400,
1593
	0x6130, 0xffffffff, 0x00010000,
1594
	0x5bb0, 0x000000f0, 0x00000070,
1595
	0x5bc0, 0xf0311fff, 0x80300000,
1596
	0x98f8, 0x73773777, 0x12010001,
1597
	0x98fc, 0xffffffff, 0x00000010,
1598
	0x8030, 0x00001f0f, 0x0000100a,
1599
	0x2f48, 0x73773777, 0x12010001,
1600
	0x2408, 0x000fffff, 0x000c007f,
1601
	0x8a14, 0xf000003f, 0x00000007,
1602
	0x8b24, 0xffffffff, 0x00ff0fff,
1603
	0x30a04, 0x0000ff0f, 0x00000000,
1604
	0x28a4c, 0x07ffffff, 0x06000000,
1605
	0x4d8, 0x00000fff, 0x00000100,
1606
	0xd014, 0x00010000, 0x00810001,
1607
	0xd814, 0x00010000, 0x00810001,
1608
	0x3e78, 0x00000001, 0x00000002,
1609
	0xc768, 0x00000008, 0x00000008,
1610
	0xc770, 0x00000f00, 0x00000800,
1611
	0xc774, 0x00000f00, 0x00000800,
1612
	0xc798, 0x00ffffff, 0x00ff7fbf,
1613
	0xc79c, 0x00ffffff, 0x00ff7faf,
1614
	0x8c00, 0x000000ff, 0x00000001,
1615
	0x214f8, 0x01ff01ff, 0x00000002,
1616
	0x21498, 0x007ff800, 0x00200000,
1617
	0x2015c, 0xffffffff, 0x00000f40,
1618
	0x88c4, 0x001f3ae3, 0x00000082,
1619
	0x88d4, 0x0000001f, 0x00000010,
1620
	0x30934, 0xffffffff, 0x00000000
1621
};
1622
 
1623
 
1624
static void cik_init_golden_registers(struct radeon_device *rdev)
1625
{
5271 serge 1626
	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 1628
	switch (rdev->family) {
1629
	case CHIP_BONAIRE:
1630
		radeon_program_register_sequence(rdev,
1631
						 bonaire_mgcg_cgcg_init,
1632
						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633
		radeon_program_register_sequence(rdev,
1634
						 bonaire_golden_registers,
1635
						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636
		radeon_program_register_sequence(rdev,
1637
						 bonaire_golden_common_registers,
1638
						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639
		radeon_program_register_sequence(rdev,
1640
						 bonaire_golden_spm_registers,
1641
						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642
		break;
1643
	case CHIP_KABINI:
1644
		radeon_program_register_sequence(rdev,
1645
						 kalindi_mgcg_cgcg_init,
1646
						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647
		radeon_program_register_sequence(rdev,
1648
						 kalindi_golden_registers,
1649
						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650
		radeon_program_register_sequence(rdev,
1651
						 kalindi_golden_common_registers,
1652
						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653
		radeon_program_register_sequence(rdev,
1654
						 kalindi_golden_spm_registers,
1655
						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656
		break;
1657
	case CHIP_MULLINS:
1658
		radeon_program_register_sequence(rdev,
1659
						 kalindi_mgcg_cgcg_init,
1660
						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661
		radeon_program_register_sequence(rdev,
1662
						 godavari_golden_registers,
1663
						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664
		radeon_program_register_sequence(rdev,
1665
						 kalindi_golden_common_registers,
1666
						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667
		radeon_program_register_sequence(rdev,
1668
						 kalindi_golden_spm_registers,
1669
						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670
		break;
1671
	case CHIP_KAVERI:
1672
		radeon_program_register_sequence(rdev,
1673
						 spectre_mgcg_cgcg_init,
1674
						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675
		radeon_program_register_sequence(rdev,
1676
						 spectre_golden_registers,
1677
						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678
		radeon_program_register_sequence(rdev,
1679
						 spectre_golden_common_registers,
1680
						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681
		radeon_program_register_sequence(rdev,
1682
						 spectre_golden_spm_registers,
1683
						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684
		break;
1685
	case CHIP_HAWAII:
1686
		radeon_program_register_sequence(rdev,
1687
						 hawaii_mgcg_cgcg_init,
1688
						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689
		radeon_program_register_sequence(rdev,
1690
						 hawaii_golden_registers,
1691
						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692
		radeon_program_register_sequence(rdev,
1693
						 hawaii_golden_common_registers,
1694
						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695
		radeon_program_register_sequence(rdev,
1696
						 hawaii_golden_spm_registers,
1697
						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698
		break;
1699
	default:
1700
		break;
1701
	}
5271 serge 1702
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 1703
}
1704
 
1705
/**
1706
 * cik_get_xclk - get the xclk
1707
 *
1708
 * @rdev: radeon_device pointer
1709
 *
1710
 * Returns the reference clock used by the gfx engine
1711
 * (CIK).
1712
 */
1713
u32 cik_get_xclk(struct radeon_device *rdev)
1714
{
7146 serge 1715
	u32 reference_clock = rdev->clock.spll.reference_freq;
5078 serge 1716
 
1717
	if (rdev->flags & RADEON_IS_IGP) {
1718
		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719
			return reference_clock / 2;
1720
	} else {
1721
		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722
			return reference_clock / 4;
1723
	}
1724
	return reference_clock;
1725
}
1726
 
1727
/**
1728
 * cik_mm_rdoorbell - read a doorbell dword
1729
 *
1730
 * @rdev: radeon_device pointer
1731
 * @index: doorbell index
1732
 *
1733
 * Returns the value in the doorbell aperture at the
1734
 * requested doorbell index (CIK).
1735
 */
1736
u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737
{
1738
	if (index < rdev->doorbell.num_doorbells) {
1739
		return readl(rdev->doorbell.ptr + index);
1740
	} else {
1741
		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742
		return 0;
1743
	}
1744
}
1745
 
1746
/**
1747
 * cik_mm_wdoorbell - write a doorbell dword
1748
 *
1749
 * @rdev: radeon_device pointer
1750
 * @index: doorbell index
1751
 * @v: value to write
1752
 *
1753
 * Writes @v to the doorbell aperture at the
1754
 * requested doorbell index (CIK).
1755
 */
1756
void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757
{
1758
	if (index < rdev->doorbell.num_doorbells) {
1759
		writel(v, rdev->doorbell.ptr + index);
1760
	} else {
1761
		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762
	}
1763
}
1764
 
1765
#define BONAIRE_IO_MC_REGS_SIZE 36
1766
 
1767
static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768
{
1769
	{0x00000070, 0x04400000},
1770
	{0x00000071, 0x80c01803},
1771
	{0x00000072, 0x00004004},
1772
	{0x00000073, 0x00000100},
1773
	{0x00000074, 0x00ff0000},
1774
	{0x00000075, 0x34000000},
1775
	{0x00000076, 0x08000014},
1776
	{0x00000077, 0x00cc08ec},
1777
	{0x00000078, 0x00000400},
1778
	{0x00000079, 0x00000000},
1779
	{0x0000007a, 0x04090000},
1780
	{0x0000007c, 0x00000000},
1781
	{0x0000007e, 0x4408a8e8},
1782
	{0x0000007f, 0x00000304},
1783
	{0x00000080, 0x00000000},
1784
	{0x00000082, 0x00000001},
1785
	{0x00000083, 0x00000002},
1786
	{0x00000084, 0xf3e4f400},
1787
	{0x00000085, 0x052024e3},
1788
	{0x00000087, 0x00000000},
1789
	{0x00000088, 0x01000000},
1790
	{0x0000008a, 0x1c0a0000},
1791
	{0x0000008b, 0xff010000},
1792
	{0x0000008d, 0xffffefff},
1793
	{0x0000008e, 0xfff3efff},
1794
	{0x0000008f, 0xfff3efbf},
1795
	{0x00000092, 0xf7ffffff},
1796
	{0x00000093, 0xffffff7f},
1797
	{0x00000095, 0x00101101},
1798
	{0x00000096, 0x00000fff},
1799
	{0x00000097, 0x00116fff},
1800
	{0x00000098, 0x60010000},
1801
	{0x00000099, 0x10010000},
1802
	{0x0000009a, 0x00006000},
1803
	{0x0000009b, 0x00001000},
1804
	{0x0000009f, 0x00b48000}
1805
};
1806
 
1807
#define HAWAII_IO_MC_REGS_SIZE 22
1808
 
1809
static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810
{
1811
	{0x0000007d, 0x40000000},
1812
	{0x0000007e, 0x40180304},
1813
	{0x0000007f, 0x0000ff00},
1814
	{0x00000081, 0x00000000},
1815
	{0x00000083, 0x00000800},
1816
	{0x00000086, 0x00000000},
1817
	{0x00000087, 0x00000100},
1818
	{0x00000088, 0x00020100},
1819
	{0x00000089, 0x00000000},
1820
	{0x0000008b, 0x00040000},
1821
	{0x0000008c, 0x00000100},
1822
	{0x0000008e, 0xff010000},
1823
	{0x00000090, 0xffffefff},
1824
	{0x00000091, 0xfff3efff},
1825
	{0x00000092, 0xfff3efbf},
1826
	{0x00000093, 0xf7ffffff},
1827
	{0x00000094, 0xffffff7f},
1828
	{0x00000095, 0x00000fff},
1829
	{0x00000096, 0x00116fff},
1830
	{0x00000097, 0x60010000},
1831
	{0x00000098, 0x10010000},
1832
	{0x0000009f, 0x00c79000}
1833
};
1834
 
1835
 
1836
/**
1837
 * cik_srbm_select - select specific register instances
1838
 *
1839
 * @rdev: radeon_device pointer
1840
 * @me: selected ME (micro engine)
1841
 * @pipe: pipe
1842
 * @queue: queue
1843
 * @vmid: VMID
1844
 *
1845
 * Switches the currently active registers instances.  Some
1846
 * registers are instanced per VMID, others are instanced per
1847
 * me/pipe/queue combination.
1848
 */
1849
static void cik_srbm_select(struct radeon_device *rdev,
1850
			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851
{
1852
	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853
			     MEID(me & 0x3) |
1854
			     VMID(vmid & 0xf) |
1855
			     QUEUEID(queue & 0x7));
1856
	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857
}
1858
 
1859
/* ucode loading */
1860
/**
1861
 * ci_mc_load_microcode - load MC ucode into the hw
1862
 *
1863
 * @rdev: radeon_device pointer
1864
 *
1865
 * Load the GDDR MC ucode into the hw (CIK).
1866
 * Returns 0 on success, error on failure.
1867
 */
1868
int ci_mc_load_microcode(struct radeon_device *rdev)
1869
{
1870
	const __be32 *fw_data = NULL;
1871
	const __le32 *new_fw_data = NULL;
5271 serge 1872
	u32 running, blackout = 0, tmp;
5078 serge 1873
	u32 *io_mc_regs = NULL;
1874
	const __le32 *new_io_mc_regs = NULL;
1875
	int i, regs_size, ucode_size;
1876
 
1877
	if (!rdev->mc_fw)
1878
		return -EINVAL;
1879
 
1880
	if (rdev->new_fw) {
1881
		const struct mc_firmware_header_v1_0 *hdr =
1882
			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883
 
1884
		radeon_ucode_print_mc_hdr(&hdr->header);
1885
 
1886
		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887
		new_io_mc_regs = (const __le32 *)
1888
			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889
		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890
		new_fw_data = (const __le32 *)
1891
			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892
	} else {
6104 serge 1893
		ucode_size = rdev->mc_fw->size / 4;
5078 serge 1894
 
6104 serge 1895
		switch (rdev->family) {
1896
		case CHIP_BONAIRE:
1897
			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898
			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899
			break;
1900
		case CHIP_HAWAII:
1901
			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902
			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903
			break;
1904
		default:
1905
			return -EINVAL;
1906
		}
5078 serge 1907
		fw_data = (const __be32 *)rdev->mc_fw->data;
1908
	}
1909
 
1910
	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911
 
1912
	if (running == 0) {
1913
		if (running) {
1914
			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916
		}
1917
 
1918
		/* reset the engine and set to writable */
1919
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920
		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
 
1922
		/* load mc io regs */
1923
		for (i = 0; i < regs_size; i++) {
1924
			if (rdev->new_fw) {
1925
				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926
				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927
			} else {
6104 serge 1928
				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929
				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930
			}
5078 serge 1931
		}
5271 serge 1932
 
1933
		tmp = RREG32(MC_SEQ_MISC0);
1934
		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935
			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936
			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937
			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938
			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939
		}
1940
 
5078 serge 1941
		/* load the MC ucode */
1942
		for (i = 0; i < ucode_size; i++) {
1943
			if (rdev->new_fw)
1944
				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945
			else
6104 serge 1946
				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
5078 serge 1947
		}
1948
 
1949
		/* put the engine back into the active state */
1950
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951
		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952
		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
 
1954
		/* wait for training to complete */
1955
		for (i = 0; i < rdev->usec_timeout; i++) {
1956
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957
				break;
1958
			udelay(1);
1959
		}
1960
		for (i = 0; i < rdev->usec_timeout; i++) {
1961
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962
				break;
1963
			udelay(1);
1964
		}
1965
 
1966
		if (running)
1967
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968
	}
1969
 
1970
	return 0;
1971
}
1972
 
1973
/**
1974
 * cik_init_microcode - load ucode images from disk
1975
 *
1976
 * @rdev: radeon_device pointer
1977
 *
1978
 * Use the firmware interface to load the ucode images into
1979
 * the driver (not loaded into hw).
1980
 * Returns 0 on success, error on failure.
1981
 */
1982
static int cik_init_microcode(struct radeon_device *rdev)
1983
{
1984
	const char *chip_name;
1985
	const char *new_chip_name;
1986
	size_t pfp_req_size, me_req_size, ce_req_size,
1987
		mec_req_size, rlc_req_size, mc_req_size = 0,
1988
		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989
	char fw_name[30];
1990
	int new_fw = 0;
1991
	int err;
1992
	int num_fw;
1993
 
1994
	DRM_DEBUG("\n");
1995
 
1996
	switch (rdev->family) {
1997
	case CHIP_BONAIRE:
1998
		chip_name = "BONAIRE";
1999
		new_chip_name = "bonaire";
2000
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001
		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004
		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005
		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006
		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008
		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009
		num_fw = 8;
2010
		break;
2011
	case CHIP_HAWAII:
2012
		chip_name = "HAWAII";
2013
		new_chip_name = "hawaii";
2014
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015
		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018
		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019
		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020
		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022
		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023
		num_fw = 8;
2024
		break;
2025
	case CHIP_KAVERI:
2026
		chip_name = "KAVERI";
2027
		new_chip_name = "kaveri";
2028
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029
		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032
		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034
		num_fw = 7;
2035
		break;
2036
	case CHIP_KABINI:
2037
		chip_name = "KABINI";
2038
		new_chip_name = "kabini";
2039
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040
		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043
		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045
		num_fw = 6;
2046
		break;
2047
	case CHIP_MULLINS:
2048
		chip_name = "MULLINS";
2049
		new_chip_name = "mullins";
2050
		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051
		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052
		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053
		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054
		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055
		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056
		num_fw = 6;
2057
		break;
2058
	default: BUG();
2059
	}
2060
 
2061
	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
 
2063
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064
	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065
	if (err) {
6104 serge 2066
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067
		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068
		if (err)
2069
			goto out;
2070
		if (rdev->pfp_fw->size != pfp_req_size) {
2071
			printk(KERN_ERR
2072
			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073
			       rdev->pfp_fw->size, fw_name);
2074
			err = -EINVAL;
2075
			goto out;
2076
		}
5078 serge 2077
	} else {
2078
		err = radeon_ucode_validate(rdev->pfp_fw);
2079
		if (err) {
2080
			printk(KERN_ERR
2081
			       "cik_fw: validation failed for firmware \"%s\"\n",
2082
			       fw_name);
2083
			goto out;
2084
		} else {
2085
			new_fw++;
2086
		}
2087
	}
2088
 
2089
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090
	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091
	if (err) {
6104 serge 2092
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093
		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094
		if (err)
2095
			goto out;
2096
		if (rdev->me_fw->size != me_req_size) {
2097
			printk(KERN_ERR
2098
			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099
			       rdev->me_fw->size, fw_name);
2100
			err = -EINVAL;
2101
		}
5078 serge 2102
	} else {
2103
		err = radeon_ucode_validate(rdev->me_fw);
2104
		if (err) {
2105
			printk(KERN_ERR
2106
			       "cik_fw: validation failed for firmware \"%s\"\n",
2107
			       fw_name);
2108
			goto out;
2109
		} else {
2110
			new_fw++;
2111
		}
2112
	}
2113
 
2114
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115
	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116
	if (err) {
6104 serge 2117
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118
		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119
		if (err)
2120
			goto out;
2121
		if (rdev->ce_fw->size != ce_req_size) {
2122
			printk(KERN_ERR
2123
			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124
			       rdev->ce_fw->size, fw_name);
2125
			err = -EINVAL;
2126
		}
5078 serge 2127
	} else {
2128
		err = radeon_ucode_validate(rdev->ce_fw);
2129
		if (err) {
2130
			printk(KERN_ERR
2131
			       "cik_fw: validation failed for firmware \"%s\"\n",
2132
			       fw_name);
2133
			goto out;
2134
		} else {
2135
			new_fw++;
2136
		}
2137
	}
2138
 
2139
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140
	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141
	if (err) {
6104 serge 2142
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143
		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144
		if (err)
2145
			goto out;
2146
		if (rdev->mec_fw->size != mec_req_size) {
2147
			printk(KERN_ERR
2148
			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149
			       rdev->mec_fw->size, fw_name);
2150
			err = -EINVAL;
2151
		}
5078 serge 2152
	} else {
2153
		err = radeon_ucode_validate(rdev->mec_fw);
2154
		if (err) {
2155
			printk(KERN_ERR
2156
			       "cik_fw: validation failed for firmware \"%s\"\n",
2157
			       fw_name);
2158
			goto out;
2159
		} else {
2160
			new_fw++;
2161
		}
2162
	}
2163
 
2164
	if (rdev->family == CHIP_KAVERI) {
2165
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166
		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167
		if (err) {
2168
			goto out;
2169
		} else {
2170
			err = radeon_ucode_validate(rdev->mec2_fw);
2171
			if (err) {
2172
				goto out;
2173
			} else {
2174
				new_fw++;
2175
			}
2176
		}
2177
	}
2178
 
2179
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180
	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181
	if (err) {
6104 serge 2182
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183
		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184
		if (err)
2185
			goto out;
2186
		if (rdev->rlc_fw->size != rlc_req_size) {
2187
			printk(KERN_ERR
2188
			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189
			       rdev->rlc_fw->size, fw_name);
2190
			err = -EINVAL;
2191
		}
5078 serge 2192
	} else {
2193
		err = radeon_ucode_validate(rdev->rlc_fw);
2194
		if (err) {
2195
			printk(KERN_ERR
2196
			       "cik_fw: validation failed for firmware \"%s\"\n",
2197
			       fw_name);
2198
			goto out;
2199
		} else {
2200
			new_fw++;
2201
		}
2202
	}
2203
 
2204
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205
	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206
	if (err) {
6104 serge 2207
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208
		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209
		if (err)
2210
			goto out;
2211
		if (rdev->sdma_fw->size != sdma_req_size) {
2212
			printk(KERN_ERR
2213
			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214
			       rdev->sdma_fw->size, fw_name);
2215
			err = -EINVAL;
2216
		}
5078 serge 2217
	} else {
2218
		err = radeon_ucode_validate(rdev->sdma_fw);
2219
		if (err) {
2220
			printk(KERN_ERR
2221
			       "cik_fw: validation failed for firmware \"%s\"\n",
2222
			       fw_name);
2223
			goto out;
2224
		} else {
2225
			new_fw++;
2226
		}
2227
	}
2228
 
2229
	/* No SMC, MC ucode on APUs */
2230
	if (!(rdev->flags & RADEON_IS_IGP)) {
2231
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232
		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233
		if (err) {
6104 serge 2234
			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
5078 serge 2235
			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
6104 serge 2236
			if (err) {
2237
				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238
				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239
				if (err)
2240
					goto out;
2241
			}
2242
			if ((rdev->mc_fw->size != mc_req_size) &&
2243
			    (rdev->mc_fw->size != mc2_req_size)){
2244
				printk(KERN_ERR
2245
				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246
				       rdev->mc_fw->size, fw_name);
2247
				err = -EINVAL;
2248
			}
2249
			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
5078 serge 2250
		} else {
2251
			err = radeon_ucode_validate(rdev->mc_fw);
2252
			if (err) {
2253
				printk(KERN_ERR
2254
				       "cik_fw: validation failed for firmware \"%s\"\n",
2255
				       fw_name);
2256
				goto out;
2257
			} else {
2258
				new_fw++;
2259
			}
2260
		}
2261
 
2262
		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263
		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264
		if (err) {
6104 serge 2265
			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266
			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267
			if (err) {
2268
				printk(KERN_ERR
2269
				       "smc: error loading firmware \"%s\"\n",
2270
				       fw_name);
2271
				release_firmware(rdev->smc_fw);
2272
				rdev->smc_fw = NULL;
2273
				err = 0;
2274
			} else if (rdev->smc_fw->size != smc_req_size) {
2275
				printk(KERN_ERR
2276
				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277
				       rdev->smc_fw->size, fw_name);
2278
				err = -EINVAL;
2279
			}
5078 serge 2280
		} else {
2281
			err = radeon_ucode_validate(rdev->smc_fw);
2282
			if (err) {
2283
				printk(KERN_ERR
2284
				       "cik_fw: validation failed for firmware \"%s\"\n",
2285
				       fw_name);
2286
				goto out;
2287
			} else {
2288
				new_fw++;
2289
			}
2290
		}
2291
	}
2292
 
2293
	if (new_fw == 0) {
2294
		rdev->new_fw = false;
2295
	} else if (new_fw < num_fw) {
2296
		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297
		err = -EINVAL;
2298
	} else {
2299
		rdev->new_fw = true;
2300
	}
2301
 
2302
out:
2303
	if (err) {
2304
		if (err != -EINVAL)
2305
			printk(KERN_ERR
2306
			       "cik_cp: Failed to load firmware \"%s\"\n",
2307
			       fw_name);
2308
		release_firmware(rdev->pfp_fw);
2309
		rdev->pfp_fw = NULL;
2310
		release_firmware(rdev->me_fw);
2311
		rdev->me_fw = NULL;
2312
		release_firmware(rdev->ce_fw);
2313
		rdev->ce_fw = NULL;
2314
		release_firmware(rdev->mec_fw);
2315
		rdev->mec_fw = NULL;
2316
		release_firmware(rdev->mec2_fw);
2317
		rdev->mec2_fw = NULL;
2318
		release_firmware(rdev->rlc_fw);
2319
		rdev->rlc_fw = NULL;
2320
		release_firmware(rdev->sdma_fw);
2321
		rdev->sdma_fw = NULL;
2322
		release_firmware(rdev->mc_fw);
2323
		rdev->mc_fw = NULL;
2324
		release_firmware(rdev->smc_fw);
2325
		rdev->smc_fw = NULL;
2326
	}
2327
	return err;
2328
}
2329
 
2330
/*
2331
 * Core functions
2332
 */
2333
/**
2334
 * cik_tiling_mode_table_init - init the hw tiling table
2335
 *
2336
 * @rdev: radeon_device pointer
2337
 *
2338
 * Starting with SI, the tiling setup is done globally in a
2339
 * set of 32 tiling modes.  Rather than selecting each set of
2340
 * parameters per surface as on older asics, we just select
2341
 * which index in the tiling table we want to use, and the
2342
 * surface uses those parameters (CIK).
2343
 */
2344
static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345
{
7146 serge 2346
	u32 *tile = rdev->config.cik.tile_mode_array;
2347
	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2348
	const u32 num_tile_mode_states =
2349
			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2350
	const u32 num_secondary_tile_mode_states =
2351
			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2352
	u32 reg_offset, split_equal_to_row_size;
5078 serge 2353
	u32 num_pipe_configs;
2354
	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2355
		rdev->config.cik.max_shader_engines;
2356
 
2357
	switch (rdev->config.cik.mem_row_size_in_kb) {
2358
	case 1:
2359
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2360
		break;
2361
	case 2:
2362
	default:
2363
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2364
		break;
2365
	case 4:
2366
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2367
		break;
2368
	}
2369
 
2370
	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2371
	if (num_pipe_configs > 8)
2372
		num_pipe_configs = 16;
2373
 
7146 serge 2374
	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2375
		tile[reg_offset] = 0;
2376
	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2377
		macrotile[reg_offset] = 0;
2378
 
2379
	switch(num_pipe_configs) {
2380
	case 16:
2381
		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2385
		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2389
		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393
		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2397
		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400
			   TILE_SPLIT(split_equal_to_row_size));
2401
		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2402
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404
		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2405
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2406
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2408
		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411
			   TILE_SPLIT(split_equal_to_row_size));
2412
		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2413
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2414
		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415
			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2417
		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421
		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425
		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429
		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2432
		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2434
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436
		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440
		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444
		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2445
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2447
		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2449
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451
		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2454
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455
		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457
			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459
 
2460
		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463
			   NUM_BANKS(ADDR_SURF_16_BANK));
2464
		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467
			   NUM_BANKS(ADDR_SURF_16_BANK));
2468
		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471
			   NUM_BANKS(ADDR_SURF_16_BANK));
2472
		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475
			   NUM_BANKS(ADDR_SURF_16_BANK));
2476
		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479
			   NUM_BANKS(ADDR_SURF_8_BANK));
2480
		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483
			   NUM_BANKS(ADDR_SURF_4_BANK));
2484
		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487
			   NUM_BANKS(ADDR_SURF_2_BANK));
2488
		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491
			   NUM_BANKS(ADDR_SURF_16_BANK));
2492
		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493
			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494
			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495
			   NUM_BANKS(ADDR_SURF_16_BANK));
2496
		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497
			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498
			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499
			    NUM_BANKS(ADDR_SURF_16_BANK));
2500
		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501
			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502
			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503
			    NUM_BANKS(ADDR_SURF_8_BANK));
2504
		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505
			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506
			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507
			    NUM_BANKS(ADDR_SURF_4_BANK));
2508
		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509
			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510
			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511
			    NUM_BANKS(ADDR_SURF_2_BANK));
2512
		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513
			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514
			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515
			    NUM_BANKS(ADDR_SURF_2_BANK));
2516
 
2517
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2518
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2519
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2520
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2521
		break;
2522
 
2523
	case 8:
2524
		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2528
		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2532
		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536
		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2540
		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543
			   TILE_SPLIT(split_equal_to_row_size));
2544
		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547
		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2548
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2551
		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554
			   TILE_SPLIT(split_equal_to_row_size));
2555
		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2556
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2557
		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558
			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2560
		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564
		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568
		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572
		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2575
		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579
		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583
		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587
		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2590
		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594
		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2595
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2597
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598
		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600
			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602
 
2603
		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606
				NUM_BANKS(ADDR_SURF_16_BANK));
2607
		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2609
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2610
				NUM_BANKS(ADDR_SURF_16_BANK));
2611
		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614
				NUM_BANKS(ADDR_SURF_16_BANK));
2615
		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618
				NUM_BANKS(ADDR_SURF_16_BANK));
2619
		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2622
				NUM_BANKS(ADDR_SURF_8_BANK));
2623
		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626
				NUM_BANKS(ADDR_SURF_4_BANK));
2627
		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630
				NUM_BANKS(ADDR_SURF_2_BANK));
2631
		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2633
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634
				NUM_BANKS(ADDR_SURF_16_BANK));
2635
		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638
				NUM_BANKS(ADDR_SURF_16_BANK));
2639
		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2641
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2642
				NUM_BANKS(ADDR_SURF_16_BANK));
2643
		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646
				NUM_BANKS(ADDR_SURF_16_BANK));
2647
		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650
				NUM_BANKS(ADDR_SURF_8_BANK));
2651
		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654
				NUM_BANKS(ADDR_SURF_4_BANK));
2655
		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658
				NUM_BANKS(ADDR_SURF_2_BANK));
2659
 
2660
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2661
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2662
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2664
		break;
2665
 
2666
	case 4:
5078 serge 2667
		if (num_rbs == 4) {
7146 serge 2668
		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2672
		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2676
		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680
		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2684
		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687
			   TILE_SPLIT(split_equal_to_row_size));
2688
		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691
		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2692
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2693
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2695
		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698
			   TILE_SPLIT(split_equal_to_row_size));
2699
		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2700
			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2701
		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702
			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2704
		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708
		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712
		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716
		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2719
		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723
		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727
		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731
		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2732
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2734
		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2735
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738
		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742
		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2743
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744
			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746
 
5078 serge 2747
		} else if (num_rbs < 4) {
7146 serge 2748
		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2752
		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2756
		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760
		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2764
		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767
			   TILE_SPLIT(split_equal_to_row_size));
2768
		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2769
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2771
		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2772
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2773
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2775
		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778
			   TILE_SPLIT(split_equal_to_row_size));
2779
		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2780
			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2781
		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782
			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2784
		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2786
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788
		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792
		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796
		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2799
		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2801
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803
		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807
		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811
		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2814
		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2815
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818
		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822
		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2823
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824
			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
5078 serge 2826
		}
7146 serge 2827
 
2828
		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831
				NUM_BANKS(ADDR_SURF_16_BANK));
2832
		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835
				NUM_BANKS(ADDR_SURF_16_BANK));
2836
		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839
				NUM_BANKS(ADDR_SURF_16_BANK));
2840
		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843
				NUM_BANKS(ADDR_SURF_16_BANK));
2844
		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847
				NUM_BANKS(ADDR_SURF_16_BANK));
2848
		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851
				NUM_BANKS(ADDR_SURF_8_BANK));
2852
		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2855
				NUM_BANKS(ADDR_SURF_4_BANK));
2856
		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859
				NUM_BANKS(ADDR_SURF_16_BANK));
2860
		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863
				NUM_BANKS(ADDR_SURF_16_BANK));
2864
		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867
				NUM_BANKS(ADDR_SURF_16_BANK));
2868
		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2870
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871
				NUM_BANKS(ADDR_SURF_16_BANK));
2872
		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875
				NUM_BANKS(ADDR_SURF_16_BANK));
2876
		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879
				NUM_BANKS(ADDR_SURF_8_BANK));
2880
		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2883
				NUM_BANKS(ADDR_SURF_4_BANK));
2884
 
2885
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2886
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2887
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2888
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2889
		break;
2890
 
2891
	case 2:
2892
		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894
			   PIPE_CONFIG(ADDR_SURF_P2) |
2895
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896
		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898
			   PIPE_CONFIG(ADDR_SURF_P2) |
2899
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2900
		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902
			   PIPE_CONFIG(ADDR_SURF_P2) |
2903
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904
		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906
			   PIPE_CONFIG(ADDR_SURF_P2) |
2907
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2908
		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910
			   PIPE_CONFIG(ADDR_SURF_P2) |
2911
			   TILE_SPLIT(split_equal_to_row_size));
2912
		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913
			   PIPE_CONFIG(ADDR_SURF_P2) |
2914
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915
		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2916
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2917
			   PIPE_CONFIG(ADDR_SURF_P2) |
2918
			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2919
		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921
			   PIPE_CONFIG(ADDR_SURF_P2) |
2922
			   TILE_SPLIT(split_equal_to_row_size));
2923
		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2924
			   PIPE_CONFIG(ADDR_SURF_P2);
2925
		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926
			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927
			   PIPE_CONFIG(ADDR_SURF_P2));
2928
		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930
			    PIPE_CONFIG(ADDR_SURF_P2) |
2931
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932
		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934
			    PIPE_CONFIG(ADDR_SURF_P2) |
2935
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936
		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937
			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938
			    PIPE_CONFIG(ADDR_SURF_P2) |
2939
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940
		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941
			    PIPE_CONFIG(ADDR_SURF_P2) |
2942
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2943
		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2945
			    PIPE_CONFIG(ADDR_SURF_P2) |
2946
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947
		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949
			    PIPE_CONFIG(ADDR_SURF_P2) |
2950
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951
		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952
			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953
			    PIPE_CONFIG(ADDR_SURF_P2) |
2954
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955
		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957
			    PIPE_CONFIG(ADDR_SURF_P2));
2958
		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2959
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2960
			    PIPE_CONFIG(ADDR_SURF_P2) |
2961
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962
		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964
			    PIPE_CONFIG(ADDR_SURF_P2) |
2965
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966
		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2967
			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968
			    PIPE_CONFIG(ADDR_SURF_P2) |
2969
			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970
 
2971
		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2972
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2973
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974
				NUM_BANKS(ADDR_SURF_16_BANK));
2975
		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978
				NUM_BANKS(ADDR_SURF_16_BANK));
2979
		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982
				NUM_BANKS(ADDR_SURF_16_BANK));
2983
		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986
				NUM_BANKS(ADDR_SURF_16_BANK));
2987
		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990
				NUM_BANKS(ADDR_SURF_16_BANK));
2991
		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994
				NUM_BANKS(ADDR_SURF_16_BANK));
2995
		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2998
				NUM_BANKS(ADDR_SURF_8_BANK));
2999
		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3000
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3001
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002
				NUM_BANKS(ADDR_SURF_16_BANK));
3003
		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3005
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006
				NUM_BANKS(ADDR_SURF_16_BANK));
3007
		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3008
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010
				NUM_BANKS(ADDR_SURF_16_BANK));
3011
		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3013
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014
				NUM_BANKS(ADDR_SURF_16_BANK));
3015
		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018
				NUM_BANKS(ADDR_SURF_16_BANK));
3019
		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022
				NUM_BANKS(ADDR_SURF_16_BANK));
3023
		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024
				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025
				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3026
				NUM_BANKS(ADDR_SURF_8_BANK));
3027
 
3028
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3029
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3030
		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031
			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3032
		break;
3033
 
3034
	default:
5078 serge 3035
		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
7146 serge 3036
	}
5078 serge 3037
}
3038
 
3039
/**
3040
 * cik_select_se_sh - select which SE, SH to address
3041
 *
3042
 * @rdev: radeon_device pointer
3043
 * @se_num: shader engine to address
3044
 * @sh_num: sh block to address
3045
 *
3046
 * Select which SE, SH combinations to address. Certain
3047
 * registers are instanced per SE or SH.  0xffffffff means
3048
 * broadcast to all SEs or SHs (CIK).
3049
 */
3050
static void cik_select_se_sh(struct radeon_device *rdev,
3051
			     u32 se_num, u32 sh_num)
3052
{
3053
	u32 data = INSTANCE_BROADCAST_WRITES;
3054
 
3055
	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3056
		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3057
	else if (se_num == 0xffffffff)
3058
		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3059
	else if (sh_num == 0xffffffff)
3060
		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3061
	else
3062
		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3063
	WREG32(GRBM_GFX_INDEX, data);
3064
}
3065
 
3066
/**
3067
 * cik_create_bitmask - create a bitmask
3068
 *
3069
 * @bit_width: length of the mask
3070
 *
3071
 * create a variable length bit mask (CIK).
3072
 * Returns the bitmask.
3073
 */
3074
static u32 cik_create_bitmask(u32 bit_width)
3075
{
3076
	u32 i, mask = 0;
3077
 
3078
	for (i = 0; i < bit_width; i++) {
3079
		mask <<= 1;
3080
		mask |= 1;
3081
	}
3082
	return mask;
3083
}
3084
 
3085
/**
3086
 * cik_get_rb_disabled - computes the mask of disabled RBs
3087
 *
3088
 * @rdev: radeon_device pointer
3089
 * @max_rb_num: max RBs (render backends) for the asic
3090
 * @se_num: number of SEs (shader engines) for the asic
3091
 * @sh_per_se: number of SH blocks per SE for the asic
3092
 *
3093
 * Calculates the bitmask of disabled RBs (CIK).
3094
 * Returns the disabled RB bitmask.
3095
 */
3096
static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3097
			      u32 max_rb_num_per_se,
3098
			      u32 sh_per_se)
3099
{
3100
	u32 data, mask;
3101
 
3102
	data = RREG32(CC_RB_BACKEND_DISABLE);
3103
	if (data & 1)
3104
		data &= BACKEND_DISABLE_MASK;
3105
	else
3106
		data = 0;
3107
	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3108
 
3109
	data >>= BACKEND_DISABLE_SHIFT;
3110
 
3111
	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3112
 
3113
	return data & mask;
3114
}
3115
 
3116
/**
3117
 * cik_setup_rb - setup the RBs on the asic
3118
 *
3119
 * @rdev: radeon_device pointer
3120
 * @se_num: number of SEs (shader engines) for the asic
3121
 * @sh_per_se: number of SH blocks per SE for the asic
3122
 * @max_rb_num: max RBs (render backends) for the asic
3123
 *
3124
 * Configures per-SE/SH RB registers (CIK).
3125
 */
3126
static void cik_setup_rb(struct radeon_device *rdev,
3127
			 u32 se_num, u32 sh_per_se,
3128
			 u32 max_rb_num_per_se)
3129
{
3130
	int i, j;
3131
	u32 data, mask;
3132
	u32 disabled_rbs = 0;
3133
	u32 enabled_rbs = 0;
3134
 
5271 serge 3135
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 3136
	for (i = 0; i < se_num; i++) {
3137
		for (j = 0; j < sh_per_se; j++) {
3138
			cik_select_se_sh(rdev, i, j);
3139
			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3140
			if (rdev->family == CHIP_HAWAII)
3141
				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3142
			else
3143
				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3144
		}
3145
	}
3146
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 3147
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 3148
 
3149
	mask = 1;
3150
	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3151
		if (!(disabled_rbs & mask))
3152
			enabled_rbs |= mask;
3153
		mask <<= 1;
3154
	}
3155
 
3156
	rdev->config.cik.backend_enable_mask = enabled_rbs;
3157
 
5271 serge 3158
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 3159
	for (i = 0; i < se_num; i++) {
3160
		cik_select_se_sh(rdev, i, 0xffffffff);
3161
		data = 0;
3162
		for (j = 0; j < sh_per_se; j++) {
3163
			switch (enabled_rbs & 3) {
3164
			case 0:
3165
				if (j == 0)
3166
					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3167
				else
3168
					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3169
				break;
3170
			case 1:
3171
				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3172
				break;
3173
			case 2:
3174
				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3175
				break;
3176
			case 3:
3177
			default:
3178
				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3179
				break;
3180
			}
3181
			enabled_rbs >>= 2;
3182
		}
3183
		WREG32(PA_SC_RASTER_CONFIG, data);
3184
	}
3185
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 3186
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 3187
}
3188
 
3189
/**
3190
 * cik_gpu_init - setup the 3D engine
3191
 *
3192
 * @rdev: radeon_device pointer
3193
 *
3194
 * Configures the 3D engine and tiling configuration
3195
 * registers so that the 3D engine is usable.
3196
 */
3197
static void cik_gpu_init(struct radeon_device *rdev)
3198
{
3199
	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3200
	u32 mc_shared_chmap, mc_arb_ramcfg;
3201
	u32 hdp_host_path_cntl;
3202
	u32 tmp;
3203
	int i, j;
3204
 
3205
	switch (rdev->family) {
3206
	case CHIP_BONAIRE:
3207
		rdev->config.cik.max_shader_engines = 2;
3208
		rdev->config.cik.max_tile_pipes = 4;
3209
		rdev->config.cik.max_cu_per_sh = 7;
3210
		rdev->config.cik.max_sh_per_se = 1;
3211
		rdev->config.cik.max_backends_per_se = 2;
3212
		rdev->config.cik.max_texture_channel_caches = 4;
3213
		rdev->config.cik.max_gprs = 256;
3214
		rdev->config.cik.max_gs_threads = 32;
3215
		rdev->config.cik.max_hw_contexts = 8;
3216
 
3217
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3218
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3219
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3220
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3221
		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3222
		break;
3223
	case CHIP_HAWAII:
3224
		rdev->config.cik.max_shader_engines = 4;
3225
		rdev->config.cik.max_tile_pipes = 16;
3226
		rdev->config.cik.max_cu_per_sh = 11;
3227
		rdev->config.cik.max_sh_per_se = 1;
3228
		rdev->config.cik.max_backends_per_se = 4;
3229
		rdev->config.cik.max_texture_channel_caches = 16;
3230
		rdev->config.cik.max_gprs = 256;
3231
		rdev->config.cik.max_gs_threads = 32;
3232
		rdev->config.cik.max_hw_contexts = 8;
3233
 
3234
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3235
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3236
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3237
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3238
		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3239
		break;
3240
	case CHIP_KAVERI:
3241
		rdev->config.cik.max_shader_engines = 1;
3242
		rdev->config.cik.max_tile_pipes = 4;
3243
		if ((rdev->pdev->device == 0x1304) ||
3244
		    (rdev->pdev->device == 0x1305) ||
3245
		    (rdev->pdev->device == 0x130C) ||
3246
		    (rdev->pdev->device == 0x130F) ||
3247
		    (rdev->pdev->device == 0x1310) ||
3248
		    (rdev->pdev->device == 0x1311) ||
3249
		    (rdev->pdev->device == 0x131C)) {
3250
			rdev->config.cik.max_cu_per_sh = 8;
3251
			rdev->config.cik.max_backends_per_se = 2;
3252
		} else if ((rdev->pdev->device == 0x1309) ||
3253
			   (rdev->pdev->device == 0x130A) ||
3254
			   (rdev->pdev->device == 0x130D) ||
3255
			   (rdev->pdev->device == 0x1313) ||
3256
			   (rdev->pdev->device == 0x131D)) {
3257
			rdev->config.cik.max_cu_per_sh = 6;
3258
			rdev->config.cik.max_backends_per_se = 2;
3259
		} else if ((rdev->pdev->device == 0x1306) ||
3260
			   (rdev->pdev->device == 0x1307) ||
3261
			   (rdev->pdev->device == 0x130B) ||
3262
			   (rdev->pdev->device == 0x130E) ||
3263
			   (rdev->pdev->device == 0x1315) ||
3264
			   (rdev->pdev->device == 0x1318) ||
3265
			   (rdev->pdev->device == 0x131B)) {
3266
			rdev->config.cik.max_cu_per_sh = 4;
3267
			rdev->config.cik.max_backends_per_se = 1;
3268
		} else {
3269
			rdev->config.cik.max_cu_per_sh = 3;
3270
			rdev->config.cik.max_backends_per_se = 1;
3271
		}
3272
		rdev->config.cik.max_sh_per_se = 1;
3273
		rdev->config.cik.max_texture_channel_caches = 4;
3274
		rdev->config.cik.max_gprs = 256;
3275
		rdev->config.cik.max_gs_threads = 16;
3276
		rdev->config.cik.max_hw_contexts = 8;
3277
 
3278
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3279
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3280
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3281
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3282
		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3283
		break;
3284
	case CHIP_KABINI:
3285
	case CHIP_MULLINS:
3286
	default:
3287
		rdev->config.cik.max_shader_engines = 1;
3288
		rdev->config.cik.max_tile_pipes = 2;
3289
		rdev->config.cik.max_cu_per_sh = 2;
3290
		rdev->config.cik.max_sh_per_se = 1;
3291
		rdev->config.cik.max_backends_per_se = 1;
3292
		rdev->config.cik.max_texture_channel_caches = 2;
3293
		rdev->config.cik.max_gprs = 256;
3294
		rdev->config.cik.max_gs_threads = 16;
3295
		rdev->config.cik.max_hw_contexts = 8;
3296
 
3297
		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3298
		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3299
		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3300
		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3301
		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3302
		break;
3303
	}
3304
 
3305
	/* Initialize HDP */
3306
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3307
		WREG32((0x2c14 + j), 0x00000000);
3308
		WREG32((0x2c18 + j), 0x00000000);
3309
		WREG32((0x2c1c + j), 0x00000000);
3310
		WREG32((0x2c20 + j), 0x00000000);
3311
		WREG32((0x2c24 + j), 0x00000000);
3312
	}
3313
 
3314
	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
6104 serge 3315
	WREG32(SRBM_INT_CNTL, 0x1);
3316
	WREG32(SRBM_INT_ACK, 0x1);
5078 serge 3317
 
3318
	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3319
 
3320
	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3321
	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3322
 
3323
	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3324
	rdev->config.cik.mem_max_burst_length_bytes = 256;
3325
	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3326
	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3327
	if (rdev->config.cik.mem_row_size_in_kb > 4)
3328
		rdev->config.cik.mem_row_size_in_kb = 4;
3329
	/* XXX use MC settings? */
3330
	rdev->config.cik.shader_engine_tile_size = 32;
3331
	rdev->config.cik.num_gpus = 1;
3332
	rdev->config.cik.multi_gpu_tile_size = 64;
3333
 
3334
	/* fix up row size */
3335
	gb_addr_config &= ~ROW_SIZE_MASK;
3336
	switch (rdev->config.cik.mem_row_size_in_kb) {
3337
	case 1:
3338
	default:
3339
		gb_addr_config |= ROW_SIZE(0);
3340
		break;
3341
	case 2:
3342
		gb_addr_config |= ROW_SIZE(1);
3343
		break;
3344
	case 4:
3345
		gb_addr_config |= ROW_SIZE(2);
3346
		break;
3347
	}
3348
 
3349
	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3350
	 * not have bank info, so create a custom tiling dword.
3351
	 * bits 3:0   num_pipes
3352
	 * bits 7:4   num_banks
3353
	 * bits 11:8  group_size
3354
	 * bits 15:12 row_size
3355
	 */
3356
	rdev->config.cik.tile_config = 0;
3357
	switch (rdev->config.cik.num_tile_pipes) {
3358
	case 1:
3359
		rdev->config.cik.tile_config |= (0 << 0);
3360
		break;
3361
	case 2:
3362
		rdev->config.cik.tile_config |= (1 << 0);
3363
		break;
3364
	case 4:
3365
		rdev->config.cik.tile_config |= (2 << 0);
3366
		break;
3367
	case 8:
3368
	default:
3369
		/* XXX what about 12? */
3370
		rdev->config.cik.tile_config |= (3 << 0);
3371
		break;
3372
	}
3373
	rdev->config.cik.tile_config |=
3374
		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3375
	rdev->config.cik.tile_config |=
3376
		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3377
	rdev->config.cik.tile_config |=
3378
		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3379
 
3380
	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3381
	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3382
	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3383
	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3384
	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3385
	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3386
	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3387
	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3388
 
3389
	cik_tiling_mode_table_init(rdev);
3390
 
3391
	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3392
		     rdev->config.cik.max_sh_per_se,
3393
		     rdev->config.cik.max_backends_per_se);
3394
 
3395
	rdev->config.cik.active_cus = 0;
3396
	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3397
		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6104 serge 3398
			rdev->config.cik.active_cus +=
3399
				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
5078 serge 3400
		}
6104 serge 3401
	}
5078 serge 3402
 
3403
	/* set HW defaults for 3D engine */
3404
	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3405
 
5271 serge 3406
	mutex_lock(&rdev->grbm_idx_mutex);
3407
	/*
3408
	 * making sure that the following register writes will be broadcasted
3409
	 * to all the shaders
3410
	 */
3411
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5078 serge 3412
	WREG32(SX_DEBUG_1, 0x20);
3413
 
3414
	WREG32(TA_CNTL_AUX, 0x00010000);
3415
 
3416
	tmp = RREG32(SPI_CONFIG_CNTL);
3417
	tmp |= 0x03000000;
3418
	WREG32(SPI_CONFIG_CNTL, tmp);
3419
 
3420
	WREG32(SQ_CONFIG, 1);
3421
 
3422
	WREG32(DB_DEBUG, 0);
3423
 
3424
	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3425
	tmp |= 0x00000400;
3426
	WREG32(DB_DEBUG2, tmp);
3427
 
3428
	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3429
	tmp |= 0x00020200;
3430
	WREG32(DB_DEBUG3, tmp);
3431
 
3432
	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3433
	tmp |= 0x00018208;
3434
	WREG32(CB_HW_CONTROL, tmp);
3435
 
3436
	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3437
 
3438
	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3439
				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3440
				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3441
				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3442
 
3443
	WREG32(VGT_NUM_INSTANCES, 1);
3444
 
3445
	WREG32(CP_PERFMON_CNTL, 0);
3446
 
3447
	WREG32(SQ_CONFIG, 0);
3448
 
3449
	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3450
					  FORCE_EOV_MAX_REZ_CNT(255)));
3451
 
3452
	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3453
	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3454
 
3455
	WREG32(VGT_GS_VERTEX_REUSE, 16);
3456
	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3457
 
3458
	tmp = RREG32(HDP_MISC_CNTL);
3459
	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3460
	WREG32(HDP_MISC_CNTL, tmp);
3461
 
3462
	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3463
	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3464
 
3465
	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3466
	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
5271 serge 3467
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 3468
 
3469
	udelay(50);
3470
}
3471
 
3472
/*
3473
 * GPU scratch registers helpers function.
3474
 */
3475
/**
3476
 * cik_scratch_init - setup driver info for CP scratch regs
3477
 *
3478
 * @rdev: radeon_device pointer
3479
 *
3480
 * Set up the number and offset of the CP scratch registers.
3481
 * NOTE: use of CP scratch registers is a legacy inferface and
3482
 * is not used by default on newer asics (r6xx+).  On newer asics,
3483
 * memory buffers are used for fences rather than scratch regs.
3484
 */
3485
static void cik_scratch_init(struct radeon_device *rdev)
3486
{
3487
	int i;
3488
 
3489
	rdev->scratch.num_reg = 7;
3490
	rdev->scratch.reg_base = SCRATCH_REG0;
3491
	for (i = 0; i < rdev->scratch.num_reg; i++) {
3492
		rdev->scratch.free[i] = true;
3493
		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3494
	}
3495
}
3496
 
3497
/**
3498
 * cik_ring_test - basic gfx ring test
3499
 *
3500
 * @rdev: radeon_device pointer
3501
 * @ring: radeon_ring structure holding ring information
3502
 *
3503
 * Allocate a scratch register and write to it using the gfx ring (CIK).
3504
 * Provides a basic gfx ring test to verify that the ring is working.
3505
 * Used by cik_cp_gfx_resume();
3506
 * Returns 0 on success, error on failure.
3507
 */
3508
int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3509
{
3510
	uint32_t scratch;
3511
	uint32_t tmp = 0;
3512
	unsigned i;
3513
	int r;
3514
 
3515
	r = radeon_scratch_get(rdev, &scratch);
3516
	if (r) {
3517
		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3518
		return r;
3519
	}
3520
	WREG32(scratch, 0xCAFEDEAD);
3521
	r = radeon_ring_lock(rdev, ring, 3);
3522
	if (r) {
3523
		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3524
		radeon_scratch_free(rdev, scratch);
3525
		return r;
3526
	}
3527
	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3528
	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3529
	radeon_ring_write(ring, 0xDEADBEEF);
3530
	radeon_ring_unlock_commit(rdev, ring, false);
3531
 
3532
	for (i = 0; i < rdev->usec_timeout; i++) {
3533
		tmp = RREG32(scratch);
3534
		if (tmp == 0xDEADBEEF)
3535
			break;
3536
		DRM_UDELAY(1);
3537
	}
3538
	if (i < rdev->usec_timeout) {
3539
		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3540
	} else {
3541
		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3542
			  ring->idx, scratch, tmp);
3543
		r = -EINVAL;
3544
	}
3545
	radeon_scratch_free(rdev, scratch);
3546
	return r;
3547
}
3548
 
3549
/**
3550
 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3551
 *
3552
 * @rdev: radeon_device pointer
3553
 * @ridx: radeon ring index
3554
 *
3555
 * Emits an hdp flush on the cp.
3556
 */
3557
static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3558
				       int ridx)
3559
{
3560
	struct radeon_ring *ring = &rdev->ring[ridx];
3561
	u32 ref_and_mask;
3562
 
3563
	switch (ring->idx) {
3564
	case CAYMAN_RING_TYPE_CP1_INDEX:
3565
	case CAYMAN_RING_TYPE_CP2_INDEX:
3566
	default:
3567
		switch (ring->me) {
3568
		case 0:
3569
			ref_and_mask = CP2 << ring->pipe;
3570
			break;
3571
		case 1:
3572
			ref_and_mask = CP6 << ring->pipe;
3573
			break;
3574
		default:
3575
			return;
3576
		}
3577
		break;
3578
	case RADEON_RING_TYPE_GFX_INDEX:
3579
		ref_and_mask = CP0;
3580
		break;
3581
	}
3582
 
3583
	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3584
	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3585
				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3586
				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3587
	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3588
	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3589
	radeon_ring_write(ring, ref_and_mask);
3590
	radeon_ring_write(ring, ref_and_mask);
3591
	radeon_ring_write(ring, 0x20); /* poll interval */
3592
}
3593
 
3594
/**
3595
 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3596
 *
3597
 * @rdev: radeon_device pointer
3598
 * @fence: radeon fence object
3599
 *
3600
 * Emits a fence sequnce number on the gfx ring and flushes
3601
 * GPU caches.
3602
 */
3603
void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3604
			     struct radeon_fence *fence)
3605
{
3606
	struct radeon_ring *ring = &rdev->ring[fence->ring];
3607
	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3608
 
6104 serge 3609
	/* Workaround for cache flush problems. First send a dummy EOP
3610
	 * event down the pipe with seq one below.
3611
	 */
5078 serge 3612
	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613
	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614
				 EOP_TC_ACTION_EN |
3615
				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616
				 EVENT_INDEX(5)));
3617
	radeon_ring_write(ring, addr & 0xfffffffc);
6104 serge 3618
	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3619
				DATA_SEL(1) | INT_SEL(0));
3620
	radeon_ring_write(ring, fence->seq - 1);
3621
	radeon_ring_write(ring, 0);
3622
 
3623
	/* Then send the real EOP event down the pipe. */
3624
	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3625
	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3626
				 EOP_TC_ACTION_EN |
3627
				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3628
				 EVENT_INDEX(5)));
3629
	radeon_ring_write(ring, addr & 0xfffffffc);
5078 serge 3630
	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3631
	radeon_ring_write(ring, fence->seq);
3632
	radeon_ring_write(ring, 0);
3633
}
3634
 
3635
/**
3636
 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3637
 *
3638
 * @rdev: radeon_device pointer
3639
 * @fence: radeon fence object
3640
 *
3641
 * Emits a fence sequnce number on the compute ring and flushes
3642
 * GPU caches.
3643
 */
3644
void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3645
				 struct radeon_fence *fence)
3646
{
3647
	struct radeon_ring *ring = &rdev->ring[fence->ring];
3648
	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3649
 
3650
	/* RELEASE_MEM - flush caches, send int */
3651
	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3652
	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3653
				 EOP_TC_ACTION_EN |
3654
				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3655
				 EVENT_INDEX(5)));
3656
	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3657
	radeon_ring_write(ring, addr & 0xfffffffc);
3658
	radeon_ring_write(ring, upper_32_bits(addr));
3659
	radeon_ring_write(ring, fence->seq);
3660
	radeon_ring_write(ring, 0);
3661
}
3662
 
3663
/**
3664
 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3665
 *
3666
 * @rdev: radeon_device pointer
3667
 * @ring: radeon ring buffer object
3668
 * @semaphore: radeon semaphore object
3669
 * @emit_wait: Is this a sempahore wait?
3670
 *
3671
 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3672
 * from running ahead of semaphore waits.
3673
 */
3674
bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3675
			     struct radeon_ring *ring,
3676
			     struct radeon_semaphore *semaphore,
3677
			     bool emit_wait)
3678
{
3679
	uint64_t addr = semaphore->gpu_addr;
3680
	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3681
 
3682
	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3683
	radeon_ring_write(ring, lower_32_bits(addr));
3684
	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3685
 
3686
	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3687
		/* Prevent the PFP from running ahead of the semaphore wait */
3688
		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3689
		radeon_ring_write(ring, 0x0);
3690
	}
3691
 
3692
	return true;
3693
}
3694
 
3695
/**
3696
 * cik_copy_cpdma - copy pages using the CP DMA engine
3697
 *
3698
 * @rdev: radeon_device pointer
3699
 * @src_offset: src GPU address
3700
 * @dst_offset: dst GPU address
3701
 * @num_gpu_pages: number of GPU pages to xfer
5271 serge 3702
 * @resv: reservation object to sync to
5078 serge 3703
 *
3704
 * Copy GPU paging using the CP DMA engine (CIK+).
3705
 * Used by the radeon ttm implementation to move pages if
3706
 * registered as the asic copy callback.
3707
 */
5271 serge 3708
struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
6104 serge 3709
				    uint64_t src_offset, uint64_t dst_offset,
3710
				    unsigned num_gpu_pages,
5271 serge 3711
				    struct reservation_object *resv)
5078 serge 3712
{
5271 serge 3713
	struct radeon_fence *fence;
3714
	struct radeon_sync sync;
5078 serge 3715
	int ring_index = rdev->asic->copy.blit_ring_index;
3716
	struct radeon_ring *ring = &rdev->ring[ring_index];
3717
	u32 size_in_bytes, cur_size_in_bytes, control;
3718
	int i, num_loops;
3719
	int r = 0;
3720
 
5271 serge 3721
	radeon_sync_create(&sync);
5078 serge 3722
 
3723
	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3724
	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3725
	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3726
	if (r) {
3727
		DRM_ERROR("radeon: moving bo (%d).\n", r);
5271 serge 3728
		radeon_sync_free(rdev, &sync, NULL);
3729
		return ERR_PTR(r);
5078 serge 3730
	}
3731
 
5271 serge 3732
	radeon_sync_resv(rdev, &sync, resv, false);
3733
	radeon_sync_rings(rdev, &sync, ring->idx);
5078 serge 3734
 
3735
	for (i = 0; i < num_loops; i++) {
3736
		cur_size_in_bytes = size_in_bytes;
3737
		if (cur_size_in_bytes > 0x1fffff)
3738
			cur_size_in_bytes = 0x1fffff;
3739
		size_in_bytes -= cur_size_in_bytes;
3740
		control = 0;
3741
		if (size_in_bytes == 0)
3742
			control |= PACKET3_DMA_DATA_CP_SYNC;
3743
		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3744
		radeon_ring_write(ring, control);
3745
		radeon_ring_write(ring, lower_32_bits(src_offset));
3746
		radeon_ring_write(ring, upper_32_bits(src_offset));
3747
		radeon_ring_write(ring, lower_32_bits(dst_offset));
3748
		radeon_ring_write(ring, upper_32_bits(dst_offset));
3749
		radeon_ring_write(ring, cur_size_in_bytes);
3750
		src_offset += cur_size_in_bytes;
3751
		dst_offset += cur_size_in_bytes;
3752
	}
3753
 
5271 serge 3754
	r = radeon_fence_emit(rdev, &fence, ring->idx);
5078 serge 3755
	if (r) {
3756
		radeon_ring_unlock_undo(rdev, ring);
5271 serge 3757
		radeon_sync_free(rdev, &sync, NULL);
3758
		return ERR_PTR(r);
5078 serge 3759
	}
3760
 
3761
	radeon_ring_unlock_commit(rdev, ring, false);
5271 serge 3762
	radeon_sync_free(rdev, &sync, fence);
5078 serge 3763
 
5271 serge 3764
	return fence;
5078 serge 3765
}
3766
 
3767
/*
3768
 * IB stuff
3769
 */
3770
/**
3771
 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3772
 *
3773
 * @rdev: radeon_device pointer
3774
 * @ib: radeon indirect buffer object
3775
 *
6938 serge 3776
 * Emits a DE (drawing engine) or CE (constant engine) IB
5078 serge 3777
 * on the gfx ring.  IBs are usually generated by userspace
3778
 * acceleration drivers and submitted to the kernel for
6938 serge 3779
 * scheduling on the ring.  This function schedules the IB
5078 serge 3780
 * on the gfx ring for execution by the GPU.
3781
 */
3782
void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3783
{
3784
	struct radeon_ring *ring = &rdev->ring[ib->ring];
5271 serge 3785
	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
5078 serge 3786
	u32 header, control = INDIRECT_BUFFER_VALID;
3787
 
3788
	if (ib->is_const_ib) {
3789
		/* set switch buffer packet before const IB */
3790
		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3791
		radeon_ring_write(ring, 0);
3792
 
3793
		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3794
	} else {
3795
		u32 next_rptr;
3796
		if (ring->rptr_save_reg) {
3797
			next_rptr = ring->wptr + 3 + 4;
3798
			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3799
			radeon_ring_write(ring, ((ring->rptr_save_reg -
3800
						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3801
			radeon_ring_write(ring, next_rptr);
3802
		} else if (rdev->wb.enabled) {
3803
			next_rptr = ring->wptr + 5 + 4;
3804
			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3805
			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3806
			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3807
			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3808
			radeon_ring_write(ring, next_rptr);
3809
		}
3810
 
3811
		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3812
	}
3813
 
5271 serge 3814
	control |= ib->length_dw | (vm_id << 24);
5078 serge 3815
 
3816
	radeon_ring_write(ring, header);
6104 serge 3817
	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
5078 serge 3818
	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3819
	radeon_ring_write(ring, control);
3820
}
3821
 
3822
/**
3823
 * cik_ib_test - basic gfx ring IB test
3824
 *
3825
 * @rdev: radeon_device pointer
3826
 * @ring: radeon_ring structure holding ring information
3827
 *
3828
 * Allocate an IB and execute it on the gfx ring (CIK).
3829
 * Provides a basic gfx ring test to verify that IBs are working.
3830
 * Returns 0 on success, error on failure.
3831
 */
3832
int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3833
{
3834
	struct radeon_ib ib;
3835
	uint32_t scratch;
3836
	uint32_t tmp = 0;
3837
	unsigned i;
3838
	int r;
3839
 
3840
	r = radeon_scratch_get(rdev, &scratch);
3841
	if (r) {
3842
		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3843
		return r;
3844
	}
3845
	WREG32(scratch, 0xCAFEDEAD);
3846
	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3847
	if (r) {
3848
		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3849
		radeon_scratch_free(rdev, scratch);
3850
		return r;
3851
	}
3852
	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3853
	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3854
	ib.ptr[2] = 0xDEADBEEF;
3855
	ib.length_dw = 3;
3856
	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3857
	if (r) {
3858
		radeon_scratch_free(rdev, scratch);
3859
		radeon_ib_free(rdev, &ib);
3860
		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3861
		return r;
3862
	}
7146 serge 3863
	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3864
		RADEON_USEC_IB_TEST_TIMEOUT));
3865
	if (r < 0) {
5078 serge 3866
		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3867
		radeon_scratch_free(rdev, scratch);
3868
		radeon_ib_free(rdev, &ib);
3869
		return r;
7146 serge 3870
	} else if (r == 0) {
3871
		DRM_ERROR("radeon: fence wait timed out.\n");
3872
		radeon_scratch_free(rdev, scratch);
3873
		radeon_ib_free(rdev, &ib);
3874
		return -ETIMEDOUT;
5078 serge 3875
	}
7146 serge 3876
	r = 0;
5078 serge 3877
	for (i = 0; i < rdev->usec_timeout; i++) {
3878
		tmp = RREG32(scratch);
3879
		if (tmp == 0xDEADBEEF)
3880
			break;
3881
		DRM_UDELAY(1);
3882
	}
3883
	if (i < rdev->usec_timeout) {
3884
		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3885
	} else {
3886
		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3887
			  scratch, tmp);
3888
		r = -EINVAL;
3889
	}
3890
	radeon_scratch_free(rdev, scratch);
3891
	radeon_ib_free(rdev, &ib);
3892
	return r;
3893
}
3894
 
3895
/*
3896
 * CP.
3897
 * On CIK, gfx and compute now have independant command processors.
3898
 *
3899
 * GFX
3900
 * Gfx consists of a single ring and can process both gfx jobs and
3901
 * compute jobs.  The gfx CP consists of three microengines (ME):
3902
 * PFP - Pre-Fetch Parser
3903
 * ME - Micro Engine
3904
 * CE - Constant Engine
3905
 * The PFP and ME make up what is considered the Drawing Engine (DE).
3906
 * The CE is an asynchronous engine used for updating buffer desciptors
3907
 * used by the DE so that they can be loaded into cache in parallel
3908
 * while the DE is processing state update packets.
3909
 *
3910
 * Compute
3911
 * The compute CP consists of two microengines (ME):
3912
 * MEC1 - Compute MicroEngine 1
3913
 * MEC2 - Compute MicroEngine 2
3914
 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3915
 * The queues are exposed to userspace and are programmed directly
3916
 * by the compute runtime.
3917
 */
3918
/**
3919
 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3920
 *
3921
 * @rdev: radeon_device pointer
3922
 * @enable: enable or disable the MEs
3923
 *
3924
 * Halts or unhalts the gfx MEs.
3925
 */
3926
static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3927
{
3928
	if (enable)
3929
		WREG32(CP_ME_CNTL, 0);
3930
	else {
3931
		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3932
			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3933
		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3934
		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3935
	}
3936
	udelay(50);
3937
}
3938
 
3939
/**
3940
 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3941
 *
3942
 * @rdev: radeon_device pointer
3943
 *
3944
 * Loads the gfx PFP, ME, and CE ucode.
3945
 * Returns 0 for success, -EINVAL if the ucode is not available.
3946
 */
3947
static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3948
{
3949
	int i;
3950
 
3951
	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3952
		return -EINVAL;
3953
 
3954
	cik_cp_gfx_enable(rdev, false);
3955
 
3956
	if (rdev->new_fw) {
3957
		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3958
			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3959
		const struct gfx_firmware_header_v1_0 *ce_hdr =
3960
			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3961
		const struct gfx_firmware_header_v1_0 *me_hdr =
3962
			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3963
		const __le32 *fw_data;
3964
		u32 fw_size;
3965
 
3966
		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3967
		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3968
		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3969
 
3970
		/* PFP */
3971
		fw_data = (const __le32 *)
3972
			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3973
		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3974
		WREG32(CP_PFP_UCODE_ADDR, 0);
3975
		for (i = 0; i < fw_size; i++)
3976
			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 3977
		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
5078 serge 3978
 
3979
		/* CE */
3980
		fw_data = (const __le32 *)
3981
			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3982
		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3983
		WREG32(CP_CE_UCODE_ADDR, 0);
3984
		for (i = 0; i < fw_size; i++)
3985
			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 3986
		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
5078 serge 3987
 
3988
		/* ME */
3989
		fw_data = (const __be32 *)
3990
			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3991
		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3992
		WREG32(CP_ME_RAM_WADDR, 0);
3993
		for (i = 0; i < fw_size; i++)
3994
			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
5271 serge 3995
		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3996
		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
5078 serge 3997
	} else {
3998
		const __be32 *fw_data;
3999
 
6104 serge 4000
		/* PFP */
4001
		fw_data = (const __be32 *)rdev->pfp_fw->data;
4002
		WREG32(CP_PFP_UCODE_ADDR, 0);
4003
		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4004
			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4005
		WREG32(CP_PFP_UCODE_ADDR, 0);
5078 serge 4006
 
6104 serge 4007
		/* CE */
4008
		fw_data = (const __be32 *)rdev->ce_fw->data;
4009
		WREG32(CP_CE_UCODE_ADDR, 0);
4010
		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4011
			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4012
		WREG32(CP_CE_UCODE_ADDR, 0);
5078 serge 4013
 
6104 serge 4014
		/* ME */
4015
		fw_data = (const __be32 *)rdev->me_fw->data;
4016
		WREG32(CP_ME_RAM_WADDR, 0);
4017
		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4018
			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4019
		WREG32(CP_ME_RAM_WADDR, 0);
5078 serge 4020
	}
4021
 
4022
	return 0;
4023
}
4024
 
4025
/**
4026
 * cik_cp_gfx_start - start the gfx ring
4027
 *
4028
 * @rdev: radeon_device pointer
4029
 *
4030
 * Enables the ring and loads the clear state context and other
4031
 * packets required to init the ring.
4032
 * Returns 0 for success, error for failure.
4033
 */
4034
static int cik_cp_gfx_start(struct radeon_device *rdev)
4035
{
4036
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4037
	int r, i;
4038
 
4039
	/* init the CP */
4040
	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4041
	WREG32(CP_ENDIAN_SWAP, 0);
4042
	WREG32(CP_DEVICE_ID, 1);
4043
 
4044
	cik_cp_gfx_enable(rdev, true);
4045
 
4046
	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4047
	if (r) {
4048
		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4049
		return r;
4050
	}
4051
 
4052
	/* init the CE partitions.  CE only used for gfx on CIK */
4053
	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4054
	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
5271 serge 4055
	radeon_ring_write(ring, 0x8000);
4056
	radeon_ring_write(ring, 0x8000);
5078 serge 4057
 
4058
	/* setup clear context state */
4059
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4060
	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4061
 
4062
	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4063
	radeon_ring_write(ring, 0x80000000);
4064
	radeon_ring_write(ring, 0x80000000);
4065
 
4066
	for (i = 0; i < cik_default_size; i++)
4067
		radeon_ring_write(ring, cik_default_state[i]);
4068
 
4069
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4070
	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4071
 
4072
	/* set clear context state */
4073
	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4074
	radeon_ring_write(ring, 0);
4075
 
4076
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4077
	radeon_ring_write(ring, 0x00000316);
4078
	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4079
	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4080
 
4081
	radeon_ring_unlock_commit(rdev, ring, false);
4082
 
4083
	return 0;
4084
}
4085
 
4086
/**
4087
 * cik_cp_gfx_fini - stop the gfx ring
4088
 *
4089
 * @rdev: radeon_device pointer
4090
 *
4091
 * Stop the gfx ring and tear down the driver ring
4092
 * info.
4093
 */
4094
static void cik_cp_gfx_fini(struct radeon_device *rdev)
4095
{
4096
	cik_cp_gfx_enable(rdev, false);
4097
	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4098
}
4099
 
4100
/**
4101
 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4102
 *
4103
 * @rdev: radeon_device pointer
4104
 *
4105
 * Program the location and size of the gfx ring buffer
4106
 * and test it to make sure it's working.
4107
 * Returns 0 for success, error for failure.
4108
 */
4109
static int cik_cp_gfx_resume(struct radeon_device *rdev)
4110
{
4111
	struct radeon_ring *ring;
4112
	u32 tmp;
4113
	u32 rb_bufsz;
4114
	u64 rb_addr;
4115
	int r;
4116
 
4117
	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4118
	if (rdev->family != CHIP_HAWAII)
4119
		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4120
 
4121
	/* Set the write pointer delay */
4122
	WREG32(CP_RB_WPTR_DELAY, 0);
4123
 
4124
	/* set the RB to use vmid 0 */
4125
	WREG32(CP_RB_VMID, 0);
4126
 
4127
	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4128
 
4129
	/* ring 0 - compute and gfx */
4130
	/* Set ring buffer size */
4131
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4132
	rb_bufsz = order_base_2(ring->ring_size / 8);
4133
	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4134
#ifdef __BIG_ENDIAN
4135
	tmp |= BUF_SWAP_32BIT;
4136
#endif
4137
	WREG32(CP_RB0_CNTL, tmp);
4138
 
4139
	/* Initialize the ring buffer's read and write pointers */
4140
	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4141
	ring->wptr = 0;
4142
	WREG32(CP_RB0_WPTR, ring->wptr);
4143
 
4144
	/* set the wb address wether it's enabled or not */
4145
	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4146
	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4147
 
4148
	/* scratch register shadowing is no longer supported */
4149
	WREG32(SCRATCH_UMSK, 0);
4150
 
4151
	if (!rdev->wb.enabled)
4152
		tmp |= RB_NO_UPDATE;
4153
 
4154
	mdelay(1);
4155
	WREG32(CP_RB0_CNTL, tmp);
4156
 
4157
	rb_addr = ring->gpu_addr >> 8;
4158
	WREG32(CP_RB0_BASE, rb_addr);
4159
	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4160
 
4161
	/* start the ring */
4162
	cik_cp_gfx_start(rdev);
4163
	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4164
	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4165
	if (r) {
4166
		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4167
		return r;
4168
	}
4169
 
4170
	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4171
		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4172
 
4173
	return 0;
4174
}
4175
 
4176
u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4177
		     struct radeon_ring *ring)
4178
{
4179
	u32 rptr;
4180
 
4181
	if (rdev->wb.enabled)
4182
		rptr = rdev->wb.wb[ring->rptr_offs/4];
4183
	else
4184
		rptr = RREG32(CP_RB0_RPTR);
4185
 
4186
	return rptr;
4187
}
4188
 
4189
u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4190
		     struct radeon_ring *ring)
4191
{
4192
	u32 wptr;
4193
 
4194
	wptr = RREG32(CP_RB0_WPTR);
4195
 
4196
	return wptr;
4197
}
4198
 
4199
void cik_gfx_set_wptr(struct radeon_device *rdev,
4200
		      struct radeon_ring *ring)
4201
{
4202
	WREG32(CP_RB0_WPTR, ring->wptr);
4203
	(void)RREG32(CP_RB0_WPTR);
4204
}
4205
 
4206
u32 cik_compute_get_rptr(struct radeon_device *rdev,
4207
			 struct radeon_ring *ring)
4208
{
4209
	u32 rptr;
4210
 
4211
	if (rdev->wb.enabled) {
4212
		rptr = rdev->wb.wb[ring->rptr_offs/4];
4213
	} else {
4214
		mutex_lock(&rdev->srbm_mutex);
4215
		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4216
		rptr = RREG32(CP_HQD_PQ_RPTR);
4217
		cik_srbm_select(rdev, 0, 0, 0, 0);
4218
		mutex_unlock(&rdev->srbm_mutex);
4219
	}
4220
 
4221
	return rptr;
4222
}
4223
 
4224
u32 cik_compute_get_wptr(struct radeon_device *rdev,
4225
			 struct radeon_ring *ring)
4226
{
4227
	u32 wptr;
4228
 
4229
	if (rdev->wb.enabled) {
4230
		/* XXX check if swapping is necessary on BE */
4231
		wptr = rdev->wb.wb[ring->wptr_offs/4];
4232
	} else {
4233
		mutex_lock(&rdev->srbm_mutex);
4234
		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4235
		wptr = RREG32(CP_HQD_PQ_WPTR);
4236
		cik_srbm_select(rdev, 0, 0, 0, 0);
4237
		mutex_unlock(&rdev->srbm_mutex);
4238
	}
4239
 
4240
	return wptr;
4241
}
4242
 
4243
void cik_compute_set_wptr(struct radeon_device *rdev,
4244
			  struct radeon_ring *ring)
4245
{
4246
	/* XXX check if swapping is necessary on BE */
4247
	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4248
	WDOORBELL32(ring->doorbell_index, ring->wptr);
4249
}
4250
 
6104 serge 4251
static void cik_compute_stop(struct radeon_device *rdev,
4252
			     struct radeon_ring *ring)
4253
{
4254
	u32 j, tmp;
4255
 
4256
	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4257
	/* Disable wptr polling. */
4258
	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4259
	tmp &= ~WPTR_POLL_EN;
4260
	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4261
	/* Disable HQD. */
4262
	if (RREG32(CP_HQD_ACTIVE) & 1) {
4263
		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4264
		for (j = 0; j < rdev->usec_timeout; j++) {
4265
			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4266
				break;
4267
			udelay(1);
4268
		}
4269
		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4270
		WREG32(CP_HQD_PQ_RPTR, 0);
4271
		WREG32(CP_HQD_PQ_WPTR, 0);
4272
	}
4273
	cik_srbm_select(rdev, 0, 0, 0, 0);
4274
}
4275
 
5078 serge 4276
/**
4277
 * cik_cp_compute_enable - enable/disable the compute CP MEs
4278
 *
4279
 * @rdev: radeon_device pointer
4280
 * @enable: enable or disable the MEs
4281
 *
4282
 * Halts or unhalts the compute MEs.
4283
 */
4284
static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4285
{
4286
	if (enable)
4287
		WREG32(CP_MEC_CNTL, 0);
4288
	else {
6104 serge 4289
		/*
4290
		 * To make hibernation reliable we need to clear compute ring
4291
		 * configuration before halting the compute ring.
4292
		 */
4293
		mutex_lock(&rdev->srbm_mutex);
4294
		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4295
		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4296
		mutex_unlock(&rdev->srbm_mutex);
4297
 
5078 serge 4298
		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4299
		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4300
		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4301
	}
4302
	udelay(50);
4303
}
4304
 
4305
/**
4306
 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4307
 *
4308
 * @rdev: radeon_device pointer
4309
 *
4310
 * Loads the compute MEC1&2 ucode.
4311
 * Returns 0 for success, -EINVAL if the ucode is not available.
4312
 */
4313
static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4314
{
4315
	int i;
4316
 
4317
	if (!rdev->mec_fw)
4318
		return -EINVAL;
4319
 
4320
	cik_cp_compute_enable(rdev, false);
4321
 
4322
	if (rdev->new_fw) {
4323
		const struct gfx_firmware_header_v1_0 *mec_hdr =
4324
			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4325
		const __le32 *fw_data;
4326
		u32 fw_size;
4327
 
4328
		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4329
 
4330
		/* MEC1 */
4331
		fw_data = (const __le32 *)
4332
			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333
		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334
		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335
		for (i = 0; i < fw_size; i++)
4336
			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 4337
		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
5078 serge 4338
 
4339
		/* MEC2 */
4340
		if (rdev->family == CHIP_KAVERI) {
4341
			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4342
				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4343
 
4344
			fw_data = (const __le32 *)
4345
				(rdev->mec2_fw->data +
4346
				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4347
			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4348
			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349
			for (i = 0; i < fw_size; i++)
4350
				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 4351
			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
5078 serge 4352
		}
4353
	} else {
4354
		const __be32 *fw_data;
4355
 
6104 serge 4356
		/* MEC1 */
5078 serge 4357
		fw_data = (const __be32 *)rdev->mec_fw->data;
6104 serge 4358
		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
5078 serge 4359
		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
6104 serge 4360
			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4361
		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4362
 
4363
		if (rdev->family == CHIP_KAVERI) {
4364
			/* MEC2 */
4365
			fw_data = (const __be32 *)rdev->mec_fw->data;
4366
			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367
			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4368
				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4369
			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4370
		}
5078 serge 4371
	}
4372
 
4373
	return 0;
4374
}
4375
 
4376
/**
4377
 * cik_cp_compute_start - start the compute queues
4378
 *
4379
 * @rdev: radeon_device pointer
4380
 *
4381
 * Enable the compute queues.
4382
 * Returns 0 for success, error for failure.
4383
 */
4384
static int cik_cp_compute_start(struct radeon_device *rdev)
4385
{
4386
	cik_cp_compute_enable(rdev, true);
4387
 
4388
	return 0;
4389
}
4390
 
4391
/**
4392
 * cik_cp_compute_fini - stop the compute queues
4393
 *
4394
 * @rdev: radeon_device pointer
4395
 *
4396
 * Stop the compute queues and tear down the driver queue
4397
 * info.
4398
 */
4399
static void cik_cp_compute_fini(struct radeon_device *rdev)
4400
{
4401
	int i, idx, r;
4402
 
4403
	cik_cp_compute_enable(rdev, false);
4404
 
4405
	for (i = 0; i < 2; i++) {
4406
		if (i == 0)
4407
			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4408
		else
4409
			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4410
 
4411
		if (rdev->ring[idx].mqd_obj) {
4412
			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4413
			if (unlikely(r != 0))
4414
				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4415
 
4416
			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4417
			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4418
 
4419
			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4420
			rdev->ring[idx].mqd_obj = NULL;
4421
		}
4422
	}
4423
}
4424
 
4425
static void cik_mec_fini(struct radeon_device *rdev)
4426
{
4427
	int r;
4428
 
4429
	if (rdev->mec.hpd_eop_obj) {
4430
		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4431
		if (unlikely(r != 0))
4432
			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4433
		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4434
		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4435
 
4436
		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4437
		rdev->mec.hpd_eop_obj = NULL;
4438
	}
4439
}
4440
 
4441
#define MEC_HPD_SIZE 2048
4442
 
4443
static int cik_mec_init(struct radeon_device *rdev)
4444
{
4445
	int r;
4446
	u32 *hpd;
4447
 
4448
	/*
4449
	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4450
	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
5271 serge 4451
	 * Nonetheless, we assign only 1 pipe because all other pipes will
4452
	 * be handled by KFD
5078 serge 4453
	 */
6104 serge 4454
	rdev->mec.num_mec = 1;
5271 serge 4455
	rdev->mec.num_pipe = 1;
5078 serge 4456
	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4457
 
4458
	if (rdev->mec.hpd_eop_obj == NULL) {
4459
		r = radeon_bo_create(rdev,
4460
				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4461
				     PAGE_SIZE, true,
5271 serge 4462
				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
5078 serge 4463
				     &rdev->mec.hpd_eop_obj);
4464
		if (r) {
4465
			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4466
			return r;
4467
		}
4468
	}
4469
 
4470
	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4471
	if (unlikely(r != 0)) {
4472
		cik_mec_fini(rdev);
4473
		return r;
4474
	}
4475
	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4476
			  &rdev->mec.hpd_eop_gpu_addr);
4477
	if (r) {
4478
		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4479
		cik_mec_fini(rdev);
4480
		return r;
4481
	}
4482
	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4483
	if (r) {
4484
		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4485
		cik_mec_fini(rdev);
4486
		return r;
4487
	}
4488
 
4489
	/* clear memory.  Not sure if this is required or not */
4490
	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4491
 
4492
	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4493
	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4494
 
4495
	return 0;
4496
}
4497
 
4498
struct hqd_registers
4499
{
4500
	u32 cp_mqd_base_addr;
4501
	u32 cp_mqd_base_addr_hi;
4502
	u32 cp_hqd_active;
4503
	u32 cp_hqd_vmid;
4504
	u32 cp_hqd_persistent_state;
4505
	u32 cp_hqd_pipe_priority;
4506
	u32 cp_hqd_queue_priority;
4507
	u32 cp_hqd_quantum;
4508
	u32 cp_hqd_pq_base;
4509
	u32 cp_hqd_pq_base_hi;
4510
	u32 cp_hqd_pq_rptr;
4511
	u32 cp_hqd_pq_rptr_report_addr;
4512
	u32 cp_hqd_pq_rptr_report_addr_hi;
4513
	u32 cp_hqd_pq_wptr_poll_addr;
4514
	u32 cp_hqd_pq_wptr_poll_addr_hi;
4515
	u32 cp_hqd_pq_doorbell_control;
4516
	u32 cp_hqd_pq_wptr;
4517
	u32 cp_hqd_pq_control;
4518
	u32 cp_hqd_ib_base_addr;
4519
	u32 cp_hqd_ib_base_addr_hi;
4520
	u32 cp_hqd_ib_rptr;
4521
	u32 cp_hqd_ib_control;
4522
	u32 cp_hqd_iq_timer;
4523
	u32 cp_hqd_iq_rptr;
4524
	u32 cp_hqd_dequeue_request;
4525
	u32 cp_hqd_dma_offload;
4526
	u32 cp_hqd_sema_cmd;
4527
	u32 cp_hqd_msg_type;
4528
	u32 cp_hqd_atomic0_preop_lo;
4529
	u32 cp_hqd_atomic0_preop_hi;
4530
	u32 cp_hqd_atomic1_preop_lo;
4531
	u32 cp_hqd_atomic1_preop_hi;
4532
	u32 cp_hqd_hq_scheduler0;
4533
	u32 cp_hqd_hq_scheduler1;
4534
	u32 cp_mqd_control;
4535
};
4536
 
4537
struct bonaire_mqd
4538
{
4539
	u32 header;
4540
	u32 dispatch_initiator;
4541
	u32 dimensions[3];
4542
	u32 start_idx[3];
4543
	u32 num_threads[3];
4544
	u32 pipeline_stat_enable;
4545
	u32 perf_counter_enable;
4546
	u32 pgm[2];
4547
	u32 tba[2];
4548
	u32 tma[2];
4549
	u32 pgm_rsrc[2];
4550
	u32 vmid;
4551
	u32 resource_limits;
4552
	u32 static_thread_mgmt01[2];
4553
	u32 tmp_ring_size;
4554
	u32 static_thread_mgmt23[2];
4555
	u32 restart[3];
4556
	u32 thread_trace_enable;
4557
	u32 reserved1;
4558
	u32 user_data[16];
4559
	u32 vgtcs_invoke_count[2];
4560
	struct hqd_registers queue_state;
4561
	u32 dequeue_cntr;
4562
	u32 interrupt_queue[64];
4563
};
4564
 
4565
/**
4566
 * cik_cp_compute_resume - setup the compute queue registers
4567
 *
4568
 * @rdev: radeon_device pointer
4569
 *
4570
 * Program the compute queues and test them to make sure they
4571
 * are working.
4572
 * Returns 0 for success, error for failure.
4573
 */
4574
static int cik_cp_compute_resume(struct radeon_device *rdev)
4575
{
5179 serge 4576
	int r, i, j, idx;
5078 serge 4577
	u32 tmp;
4578
	bool use_doorbell = true;
4579
	u64 hqd_gpu_addr;
4580
	u64 mqd_gpu_addr;
4581
	u64 eop_gpu_addr;
4582
	u64 wb_gpu_addr;
4583
	u32 *buf;
4584
	struct bonaire_mqd *mqd;
4585
 
4586
	r = cik_cp_compute_start(rdev);
4587
	if (r)
4588
		return r;
4589
 
4590
	/* fix up chicken bits */
4591
	tmp = RREG32(CP_CPF_DEBUG);
4592
	tmp |= (1 << 23);
4593
	WREG32(CP_CPF_DEBUG, tmp);
4594
 
4595
	/* init the pipes */
4596
	mutex_lock(&rdev->srbm_mutex);
4597
 
5271 serge 4598
	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
5078 serge 4599
 
5271 serge 4600
	cik_srbm_select(rdev, 0, 0, 0, 0);
5078 serge 4601
 
6104 serge 4602
	/* write the EOP addr */
4603
	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4604
	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
5078 serge 4605
 
6104 serge 4606
	/* set the VMID assigned */
4607
	WREG32(CP_HPD_EOP_VMID, 0);
5078 serge 4608
 
6104 serge 4609
	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610
	tmp = RREG32(CP_HPD_EOP_CONTROL);
4611
	tmp &= ~EOP_SIZE_MASK;
4612
	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4613
	WREG32(CP_HPD_EOP_CONTROL, tmp);
5271 serge 4614
 
5078 serge 4615
	mutex_unlock(&rdev->srbm_mutex);
4616
 
4617
	/* init the queues.  Just two for now. */
4618
	for (i = 0; i < 2; i++) {
4619
		if (i == 0)
4620
			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4621
		else
4622
			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4623
 
4624
		if (rdev->ring[idx].mqd_obj == NULL) {
4625
			r = radeon_bo_create(rdev,
4626
					     sizeof(struct bonaire_mqd),
4627
					     PAGE_SIZE, true,
4628
					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
5271 serge 4629
					     NULL, &rdev->ring[idx].mqd_obj);
5078 serge 4630
			if (r) {
4631
				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4632
				return r;
4633
			}
4634
		}
4635
 
4636
		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4637
		if (unlikely(r != 0)) {
4638
			cik_cp_compute_fini(rdev);
4639
			return r;
4640
		}
4641
		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4642
				  &mqd_gpu_addr);
4643
		if (r) {
4644
			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4645
			cik_cp_compute_fini(rdev);
4646
			return r;
4647
		}
4648
		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4649
		if (r) {
4650
			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4651
			cik_cp_compute_fini(rdev);
4652
			return r;
4653
		}
4654
 
4655
		/* init the mqd struct */
4656
		memset(buf, 0, sizeof(struct bonaire_mqd));
4657
 
4658
		mqd = (struct bonaire_mqd *)buf;
4659
		mqd->header = 0xC0310800;
4660
		mqd->static_thread_mgmt01[0] = 0xffffffff;
4661
		mqd->static_thread_mgmt01[1] = 0xffffffff;
4662
		mqd->static_thread_mgmt23[0] = 0xffffffff;
4663
		mqd->static_thread_mgmt23[1] = 0xffffffff;
4664
 
4665
		mutex_lock(&rdev->srbm_mutex);
4666
		cik_srbm_select(rdev, rdev->ring[idx].me,
4667
				rdev->ring[idx].pipe,
4668
				rdev->ring[idx].queue, 0);
4669
 
4670
		/* disable wptr polling */
4671
		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4672
		tmp &= ~WPTR_POLL_EN;
4673
		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4674
 
4675
		/* enable doorbell? */
4676
		mqd->queue_state.cp_hqd_pq_doorbell_control =
4677
			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4678
		if (use_doorbell)
4679
			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4680
		else
4681
			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4682
		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4683
		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4684
 
4685
		/* disable the queue if it's active */
4686
		mqd->queue_state.cp_hqd_dequeue_request = 0;
4687
		mqd->queue_state.cp_hqd_pq_rptr = 0;
4688
		mqd->queue_state.cp_hqd_pq_wptr= 0;
4689
		if (RREG32(CP_HQD_ACTIVE) & 1) {
4690
			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5179 serge 4691
			for (j = 0; j < rdev->usec_timeout; j++) {
5078 serge 4692
				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4693
					break;
4694
				udelay(1);
4695
			}
4696
			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4697
			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4698
			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4699
		}
4700
 
4701
		/* set the pointer to the MQD */
4702
		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4703
		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4704
		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4705
		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4706
		/* set MQD vmid to 0 */
4707
		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4708
		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4709
		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4710
 
4711
		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4712
		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4713
		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4714
		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4715
		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4716
		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4717
 
4718
		/* set up the HQD, this is similar to CP_RB0_CNTL */
4719
		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4720
		mqd->queue_state.cp_hqd_pq_control &=
4721
			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4722
 
4723
		mqd->queue_state.cp_hqd_pq_control |=
4724
			order_base_2(rdev->ring[idx].ring_size / 8);
4725
		mqd->queue_state.cp_hqd_pq_control |=
4726
			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4727
#ifdef __BIG_ENDIAN
4728
		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4729
#endif
4730
		mqd->queue_state.cp_hqd_pq_control &=
4731
			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4732
		mqd->queue_state.cp_hqd_pq_control |=
4733
			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4734
		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4735
 
4736
		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4737
		if (i == 0)
4738
			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4739
		else
4740
			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4741
		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4742
		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743
		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4744
		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4745
		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4746
 
4747
		/* set the wb address wether it's enabled or not */
4748
		if (i == 0)
4749
			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4750
		else
4751
			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4752
		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4753
		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4754
			upper_32_bits(wb_gpu_addr) & 0xffff;
4755
		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4756
		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4757
		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4758
		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4759
 
4760
		/* enable the doorbell if requested */
4761
		if (use_doorbell) {
4762
			mqd->queue_state.cp_hqd_pq_doorbell_control =
4763
				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4764
			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4765
			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4766
				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4767
			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4768
			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4769
				~(DOORBELL_SOURCE | DOORBELL_HIT);
4770
 
4771
		} else {
4772
			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4773
		}
4774
		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4775
		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4776
 
4777
		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4778
		rdev->ring[idx].wptr = 0;
4779
		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4780
		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4781
		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4782
 
4783
		/* set the vmid for the queue */
4784
		mqd->queue_state.cp_hqd_vmid = 0;
4785
		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4786
 
4787
		/* activate the queue */
4788
		mqd->queue_state.cp_hqd_active = 1;
4789
		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4790
 
4791
		cik_srbm_select(rdev, 0, 0, 0, 0);
4792
		mutex_unlock(&rdev->srbm_mutex);
4793
 
4794
		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4795
		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4796
 
4797
		rdev->ring[idx].ready = true;
4798
		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4799
		if (r)
4800
			rdev->ring[idx].ready = false;
4801
	}
4802
 
4803
	return 0;
4804
}
4805
 
4806
static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4807
{
4808
	cik_cp_gfx_enable(rdev, enable);
4809
	cik_cp_compute_enable(rdev, enable);
4810
}
4811
 
4812
static int cik_cp_load_microcode(struct radeon_device *rdev)
4813
{
4814
	int r;
4815
 
4816
	r = cik_cp_gfx_load_microcode(rdev);
4817
	if (r)
4818
		return r;
4819
	r = cik_cp_compute_load_microcode(rdev);
4820
	if (r)
4821
		return r;
4822
 
4823
	return 0;
4824
}
4825
 
4826
static void cik_cp_fini(struct radeon_device *rdev)
4827
{
4828
	cik_cp_gfx_fini(rdev);
4829
	cik_cp_compute_fini(rdev);
4830
}
4831
 
4832
static int cik_cp_resume(struct radeon_device *rdev)
4833
{
4834
	int r;
4835
 
4836
	cik_enable_gui_idle_interrupt(rdev, false);
4837
 
4838
	r = cik_cp_load_microcode(rdev);
4839
	if (r)
4840
		return r;
4841
 
4842
	r = cik_cp_gfx_resume(rdev);
4843
	if (r)
4844
		return r;
4845
	r = cik_cp_compute_resume(rdev);
4846
	if (r)
4847
		return r;
4848
 
4849
	cik_enable_gui_idle_interrupt(rdev, true);
4850
 
4851
	return 0;
4852
}
4853
 
4854
static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4855
{
4856
	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4857
		RREG32(GRBM_STATUS));
4858
	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4859
		RREG32(GRBM_STATUS2));
4860
	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4861
		RREG32(GRBM_STATUS_SE0));
4862
	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4863
		RREG32(GRBM_STATUS_SE1));
4864
	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4865
		RREG32(GRBM_STATUS_SE2));
4866
	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4867
		RREG32(GRBM_STATUS_SE3));
4868
	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4869
		RREG32(SRBM_STATUS));
4870
	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4871
		RREG32(SRBM_STATUS2));
4872
	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4873
		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4874
	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4875
		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4876
	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4877
	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4878
		 RREG32(CP_STALLED_STAT1));
4879
	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4880
		 RREG32(CP_STALLED_STAT2));
4881
	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4882
		 RREG32(CP_STALLED_STAT3));
4883
	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4884
		 RREG32(CP_CPF_BUSY_STAT));
4885
	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4886
		 RREG32(CP_CPF_STALLED_STAT1));
4887
	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4888
	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4889
	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4890
		 RREG32(CP_CPC_STALLED_STAT1));
4891
	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4892
}
4893
 
4894
/**
4895
 * cik_gpu_check_soft_reset - check which blocks are busy
4896
 *
4897
 * @rdev: radeon_device pointer
4898
 *
4899
 * Check which blocks are busy and return the relevant reset
4900
 * mask to be used by cik_gpu_soft_reset().
4901
 * Returns a mask of the blocks to be reset.
4902
 */
4903
u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4904
{
4905
	u32 reset_mask = 0;
4906
	u32 tmp;
4907
 
4908
	/* GRBM_STATUS */
4909
	tmp = RREG32(GRBM_STATUS);
4910
	if (tmp & (PA_BUSY | SC_BUSY |
4911
		   BCI_BUSY | SX_BUSY |
4912
		   TA_BUSY | VGT_BUSY |
4913
		   DB_BUSY | CB_BUSY |
4914
		   GDS_BUSY | SPI_BUSY |
4915
		   IA_BUSY | IA_BUSY_NO_DMA))
4916
		reset_mask |= RADEON_RESET_GFX;
4917
 
4918
	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4919
		reset_mask |= RADEON_RESET_CP;
4920
 
4921
	/* GRBM_STATUS2 */
4922
	tmp = RREG32(GRBM_STATUS2);
4923
	if (tmp & RLC_BUSY)
4924
		reset_mask |= RADEON_RESET_RLC;
4925
 
4926
	/* SDMA0_STATUS_REG */
4927
	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4928
	if (!(tmp & SDMA_IDLE))
4929
		reset_mask |= RADEON_RESET_DMA;
4930
 
4931
	/* SDMA1_STATUS_REG */
4932
	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4933
	if (!(tmp & SDMA_IDLE))
4934
		reset_mask |= RADEON_RESET_DMA1;
4935
 
4936
	/* SRBM_STATUS2 */
4937
	tmp = RREG32(SRBM_STATUS2);
4938
	if (tmp & SDMA_BUSY)
4939
		reset_mask |= RADEON_RESET_DMA;
4940
 
4941
	if (tmp & SDMA1_BUSY)
4942
		reset_mask |= RADEON_RESET_DMA1;
4943
 
4944
	/* SRBM_STATUS */
4945
	tmp = RREG32(SRBM_STATUS);
4946
 
4947
	if (tmp & IH_BUSY)
4948
		reset_mask |= RADEON_RESET_IH;
4949
 
4950
	if (tmp & SEM_BUSY)
4951
		reset_mask |= RADEON_RESET_SEM;
4952
 
4953
	if (tmp & GRBM_RQ_PENDING)
4954
		reset_mask |= RADEON_RESET_GRBM;
4955
 
4956
	if (tmp & VMC_BUSY)
4957
		reset_mask |= RADEON_RESET_VMC;
4958
 
4959
	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4960
		   MCC_BUSY | MCD_BUSY))
4961
		reset_mask |= RADEON_RESET_MC;
4962
 
4963
	if (evergreen_is_display_hung(rdev))
4964
		reset_mask |= RADEON_RESET_DISPLAY;
4965
 
4966
	/* Skip MC reset as it's mostly likely not hung, just busy */
4967
	if (reset_mask & RADEON_RESET_MC) {
4968
		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4969
		reset_mask &= ~RADEON_RESET_MC;
4970
	}
4971
 
4972
	return reset_mask;
4973
}
4974
 
4975
/**
4976
 * cik_gpu_soft_reset - soft reset GPU
4977
 *
4978
 * @rdev: radeon_device pointer
4979
 * @reset_mask: mask of which blocks to reset
4980
 *
4981
 * Soft reset the blocks specified in @reset_mask.
4982
 */
4983
static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4984
{
4985
	struct evergreen_mc_save save;
4986
	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4987
	u32 tmp;
4988
 
4989
	if (reset_mask == 0)
4990
		return;
4991
 
4992
	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4993
 
4994
	cik_print_gpu_status_regs(rdev);
4995
	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4996
		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4997
	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4998
		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4999
 
5000
	/* disable CG/PG */
5001
	cik_fini_pg(rdev);
5002
	cik_fini_cg(rdev);
5003
 
5004
	/* stop the rlc */
5005
	cik_rlc_stop(rdev);
5006
 
5007
	/* Disable GFX parsing/prefetching */
5008
	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5009
 
5010
	/* Disable MEC parsing/prefetching */
5011
	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5012
 
5013
	if (reset_mask & RADEON_RESET_DMA) {
5014
		/* sdma0 */
5015
		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5016
		tmp |= SDMA_HALT;
5017
		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5018
	}
5019
	if (reset_mask & RADEON_RESET_DMA1) {
5020
		/* sdma1 */
5021
		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5022
		tmp |= SDMA_HALT;
5023
		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5024
	}
5025
 
5026
	evergreen_mc_stop(rdev, &save);
5027
	if (evergreen_mc_wait_for_idle(rdev)) {
5028
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5029
	}
5030
 
5031
	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5032
		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5033
 
5034
	if (reset_mask & RADEON_RESET_CP) {
5035
		grbm_soft_reset |= SOFT_RESET_CP;
5036
 
5037
		srbm_soft_reset |= SOFT_RESET_GRBM;
5038
	}
5039
 
5040
	if (reset_mask & RADEON_RESET_DMA)
5041
		srbm_soft_reset |= SOFT_RESET_SDMA;
5042
 
5043
	if (reset_mask & RADEON_RESET_DMA1)
5044
		srbm_soft_reset |= SOFT_RESET_SDMA1;
5045
 
5046
	if (reset_mask & RADEON_RESET_DISPLAY)
5047
		srbm_soft_reset |= SOFT_RESET_DC;
5048
 
5049
	if (reset_mask & RADEON_RESET_RLC)
5050
		grbm_soft_reset |= SOFT_RESET_RLC;
5051
 
5052
	if (reset_mask & RADEON_RESET_SEM)
5053
		srbm_soft_reset |= SOFT_RESET_SEM;
5054
 
5055
	if (reset_mask & RADEON_RESET_IH)
5056
		srbm_soft_reset |= SOFT_RESET_IH;
5057
 
5058
	if (reset_mask & RADEON_RESET_GRBM)
5059
		srbm_soft_reset |= SOFT_RESET_GRBM;
5060
 
5061
	if (reset_mask & RADEON_RESET_VMC)
5062
		srbm_soft_reset |= SOFT_RESET_VMC;
5063
 
5064
	if (!(rdev->flags & RADEON_IS_IGP)) {
5065
		if (reset_mask & RADEON_RESET_MC)
5066
			srbm_soft_reset |= SOFT_RESET_MC;
5067
	}
5068
 
5069
	if (grbm_soft_reset) {
5070
		tmp = RREG32(GRBM_SOFT_RESET);
5071
		tmp |= grbm_soft_reset;
5072
		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5073
		WREG32(GRBM_SOFT_RESET, tmp);
5074
		tmp = RREG32(GRBM_SOFT_RESET);
5075
 
5076
		udelay(50);
5077
 
5078
		tmp &= ~grbm_soft_reset;
5079
		WREG32(GRBM_SOFT_RESET, tmp);
5080
		tmp = RREG32(GRBM_SOFT_RESET);
5081
	}
5082
 
5083
	if (srbm_soft_reset) {
5084
		tmp = RREG32(SRBM_SOFT_RESET);
5085
		tmp |= srbm_soft_reset;
5086
		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5087
		WREG32(SRBM_SOFT_RESET, tmp);
5088
		tmp = RREG32(SRBM_SOFT_RESET);
5089
 
5090
		udelay(50);
5091
 
5092
		tmp &= ~srbm_soft_reset;
5093
		WREG32(SRBM_SOFT_RESET, tmp);
5094
		tmp = RREG32(SRBM_SOFT_RESET);
5095
	}
5096
 
5097
	/* Wait a little for things to settle down */
5098
	udelay(50);
5099
 
5100
	evergreen_mc_resume(rdev, &save);
5101
	udelay(50);
5102
 
5103
	cik_print_gpu_status_regs(rdev);
5104
}
5105
 
5106
struct kv_reset_save_regs {
5107
	u32 gmcon_reng_execute;
5108
	u32 gmcon_misc;
5109
	u32 gmcon_misc3;
5110
};
5111
 
5112
static void kv_save_regs_for_reset(struct radeon_device *rdev,
5113
				   struct kv_reset_save_regs *save)
5114
{
5115
	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5116
	save->gmcon_misc = RREG32(GMCON_MISC);
5117
	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5118
 
5119
	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5120
	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5121
						STCTRL_STUTTER_EN));
5122
}
5123
 
5124
static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5125
				      struct kv_reset_save_regs *save)
5126
{
5127
	int i;
5128
 
5129
	WREG32(GMCON_PGFSM_WRITE, 0);
5130
	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5131
 
5132
	for (i = 0; i < 5; i++)
5133
		WREG32(GMCON_PGFSM_WRITE, 0);
5134
 
5135
	WREG32(GMCON_PGFSM_WRITE, 0);
5136
	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5137
 
5138
	for (i = 0; i < 5; i++)
5139
		WREG32(GMCON_PGFSM_WRITE, 0);
5140
 
5141
	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5142
	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5143
 
5144
	for (i = 0; i < 5; i++)
5145
		WREG32(GMCON_PGFSM_WRITE, 0);
5146
 
5147
	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5148
	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5149
 
5150
	for (i = 0; i < 5; i++)
5151
		WREG32(GMCON_PGFSM_WRITE, 0);
5152
 
5153
	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5154
	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5155
 
5156
	for (i = 0; i < 5; i++)
5157
		WREG32(GMCON_PGFSM_WRITE, 0);
5158
 
5159
	WREG32(GMCON_PGFSM_WRITE, 0);
5160
	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5161
 
5162
	for (i = 0; i < 5; i++)
5163
		WREG32(GMCON_PGFSM_WRITE, 0);
5164
 
5165
	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5166
	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5167
 
5168
	for (i = 0; i < 5; i++)
5169
		WREG32(GMCON_PGFSM_WRITE, 0);
5170
 
5171
	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5172
	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5173
 
5174
	for (i = 0; i < 5; i++)
5175
		WREG32(GMCON_PGFSM_WRITE, 0);
5176
 
5177
	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5178
	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5179
 
5180
	for (i = 0; i < 5; i++)
5181
		WREG32(GMCON_PGFSM_WRITE, 0);
5182
 
5183
	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5184
	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5185
 
5186
	for (i = 0; i < 5; i++)
5187
		WREG32(GMCON_PGFSM_WRITE, 0);
5188
 
5189
	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5190
	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5191
 
5192
	WREG32(GMCON_MISC3, save->gmcon_misc3);
5193
	WREG32(GMCON_MISC, save->gmcon_misc);
5194
	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5195
}
5196
 
5197
static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5198
{
5199
	struct evergreen_mc_save save;
5200
	struct kv_reset_save_regs kv_save = { 0 };
5201
	u32 tmp, i;
5202
 
5203
	dev_info(rdev->dev, "GPU pci config reset\n");
5204
 
5205
	/* disable dpm? */
5206
 
5207
	/* disable cg/pg */
5208
	cik_fini_pg(rdev);
5209
	cik_fini_cg(rdev);
5210
 
5211
	/* Disable GFX parsing/prefetching */
5212
	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5213
 
5214
	/* Disable MEC parsing/prefetching */
5215
	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5216
 
5217
	/* sdma0 */
5218
	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5219
	tmp |= SDMA_HALT;
5220
	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5221
	/* sdma1 */
5222
	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5223
	tmp |= SDMA_HALT;
5224
	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5225
	/* XXX other engines? */
5226
 
5227
	/* halt the rlc, disable cp internal ints */
5228
	cik_rlc_stop(rdev);
5229
 
5230
	udelay(50);
5231
 
5232
	/* disable mem access */
5233
	evergreen_mc_stop(rdev, &save);
5234
	if (evergreen_mc_wait_for_idle(rdev)) {
5235
		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5236
	}
5237
 
5238
	if (rdev->flags & RADEON_IS_IGP)
5239
		kv_save_regs_for_reset(rdev, &kv_save);
5240
 
5241
	/* disable BM */
5242
	pci_clear_master(rdev->pdev);
5243
	/* reset */
5244
	radeon_pci_config_reset(rdev);
5245
 
5246
	udelay(100);
5247
 
5248
	/* wait for asic to come out of reset */
5249
	for (i = 0; i < rdev->usec_timeout; i++) {
5250
		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5251
			break;
5252
		udelay(1);
5253
	}
5254
 
5255
	/* does asic init need to be run first??? */
5256
	if (rdev->flags & RADEON_IS_IGP)
5257
		kv_restore_regs_for_reset(rdev, &kv_save);
5258
}
5259
 
5260
/**
5261
 * cik_asic_reset - soft reset GPU
5262
 *
5263
 * @rdev: radeon_device pointer
5264
 *
5265
 * Look up which blocks are hung and attempt
5266
 * to reset them.
5267
 * Returns 0 for success.
5268
 */
5269
int cik_asic_reset(struct radeon_device *rdev)
5270
{
5271
	u32 reset_mask;
5272
 
5273
	reset_mask = cik_gpu_check_soft_reset(rdev);
5274
 
5275
	if (reset_mask)
5276
		r600_set_bios_scratch_engine_hung(rdev, true);
5277
 
5278
	/* try soft reset */
5279
	cik_gpu_soft_reset(rdev, reset_mask);
5280
 
5281
	reset_mask = cik_gpu_check_soft_reset(rdev);
5282
 
5283
	/* try pci config reset */
5284
	if (reset_mask && radeon_hard_reset)
5285
		cik_gpu_pci_config_reset(rdev);
5286
 
5287
	reset_mask = cik_gpu_check_soft_reset(rdev);
5288
 
5289
	if (!reset_mask)
5290
		r600_set_bios_scratch_engine_hung(rdev, false);
5291
 
5292
	return 0;
5293
}
5294
 
5295
/**
5296
 * cik_gfx_is_lockup - check if the 3D engine is locked up
5297
 *
5298
 * @rdev: radeon_device pointer
5299
 * @ring: radeon_ring structure holding ring information
5300
 *
5301
 * Check if the 3D engine is locked up (CIK).
5302
 * Returns true if the engine is locked, false if not.
5303
 */
5304
bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5305
{
5306
	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5307
 
5308
	if (!(reset_mask & (RADEON_RESET_GFX |
5309
			    RADEON_RESET_COMPUTE |
5310
			    RADEON_RESET_CP))) {
5311
		radeon_ring_lockup_update(rdev, ring);
5312
		return false;
5313
	}
5314
	return radeon_ring_test_lockup(rdev, ring);
5315
}
5316
 
5317
/* MC */
5318
/**
5319
 * cik_mc_program - program the GPU memory controller
5320
 *
5321
 * @rdev: radeon_device pointer
5322
 *
5323
 * Set the location of vram, gart, and AGP in the GPU's
5324
 * physical address space (CIK).
5325
 */
5326
static void cik_mc_program(struct radeon_device *rdev)
5327
{
5328
	struct evergreen_mc_save save;
5329
	u32 tmp;
5330
	int i, j;
5331
 
5332
	/* Initialize HDP */
5333
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5334
		WREG32((0x2c14 + j), 0x00000000);
5335
		WREG32((0x2c18 + j), 0x00000000);
5336
		WREG32((0x2c1c + j), 0x00000000);
5337
		WREG32((0x2c20 + j), 0x00000000);
5338
		WREG32((0x2c24 + j), 0x00000000);
5339
	}
5340
	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5341
 
5342
	evergreen_mc_stop(rdev, &save);
5343
	if (radeon_mc_wait_for_idle(rdev)) {
5344
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5345
	}
5346
	/* Lockout access through VGA aperture*/
5347
	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5348
	/* Update configuration */
5349
	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5350
	       rdev->mc.vram_start >> 12);
5351
	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5352
	       rdev->mc.vram_end >> 12);
5353
	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5354
	       rdev->vram_scratch.gpu_addr >> 12);
5355
	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5356
	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5357
	WREG32(MC_VM_FB_LOCATION, tmp);
5358
	/* XXX double check these! */
5359
	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5360
	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5361
	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5362
	WREG32(MC_VM_AGP_BASE, 0);
5363
	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5364
	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5365
	if (radeon_mc_wait_for_idle(rdev)) {
5366
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5367
	}
5368
	evergreen_mc_resume(rdev, &save);
5369
	/* we need to own VRAM, so turn off the VGA renderer here
5370
	 * to stop it overwriting our objects */
5371
	rv515_vga_render_disable(rdev);
5372
}
5373
 
5374
/**
5375
 * cik_mc_init - initialize the memory controller driver params
5376
 *
5377
 * @rdev: radeon_device pointer
5378
 *
5379
 * Look up the amount of vram, vram width, and decide how to place
5380
 * vram and gart within the GPU's physical address space (CIK).
5381
 * Returns 0 for success.
5382
 */
5383
static int cik_mc_init(struct radeon_device *rdev)
5384
{
5385
	u32 tmp;
5386
	int chansize, numchan;
5387
 
5388
	/* Get VRAM informations */
5389
	rdev->mc.vram_is_ddr = true;
5390
	tmp = RREG32(MC_ARB_RAMCFG);
5391
	if (tmp & CHANSIZE_MASK) {
5392
		chansize = 64;
5393
	} else {
5394
		chansize = 32;
5395
	}
5396
	tmp = RREG32(MC_SHARED_CHMAP);
5397
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5398
	case 0:
5399
	default:
5400
		numchan = 1;
5401
		break;
5402
	case 1:
5403
		numchan = 2;
5404
		break;
5405
	case 2:
5406
		numchan = 4;
5407
		break;
5408
	case 3:
5409
		numchan = 8;
5410
		break;
5411
	case 4:
5412
		numchan = 3;
5413
		break;
5414
	case 5:
5415
		numchan = 6;
5416
		break;
5417
	case 6:
5418
		numchan = 10;
5419
		break;
5420
	case 7:
5421
		numchan = 12;
5422
		break;
5423
	case 8:
5424
		numchan = 16;
5425
		break;
5426
	}
5427
	rdev->mc.vram_width = numchan * chansize;
5428
	/* Could aper size report 0 ? */
5429
	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5430
	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5431
	/* size in MB on si */
5432
	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5433
	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5434
	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5435
	si_vram_gtt_location(rdev, &rdev->mc);
5436
	radeon_update_bandwidth_info(rdev);
5437
 
5438
	return 0;
5439
}
5440
 
5441
/*
5442
 * GART
5443
 * VMID 0 is the physical GPU addresses as used by the kernel.
5444
 * VMIDs 1-15 are used for userspace clients and are handled
5445
 * by the radeon vm/hsa code.
5446
 */
5447
/**
5448
 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5449
 *
5450
 * @rdev: radeon_device pointer
5451
 *
5452
 * Flush the TLB for the VMID 0 page table (CIK).
5453
 */
5454
void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5455
{
5456
	/* flush hdp cache */
5457
	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5458
 
5459
	/* bits 0-15 are the VM contexts0-15 */
5460
	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5461
}
5462
 
6104 serge 5463
static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5464
{
5465
	int i;
5466
	uint32_t sh_mem_bases, sh_mem_config;
5467
 
5468
	sh_mem_bases = 0x6000 | 0x6000 << 16;
5469
	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5470
	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5471
 
5472
	mutex_lock(&rdev->srbm_mutex);
5473
	for (i = 8; i < 16; i++) {
5474
		cik_srbm_select(rdev, 0, 0, 0, i);
5475
		/* CP and shaders */
5476
		WREG32(SH_MEM_CONFIG, sh_mem_config);
5477
		WREG32(SH_MEM_APE1_BASE, 1);
5478
		WREG32(SH_MEM_APE1_LIMIT, 0);
5479
		WREG32(SH_MEM_BASES, sh_mem_bases);
5480
	}
5481
	cik_srbm_select(rdev, 0, 0, 0, 0);
5482
	mutex_unlock(&rdev->srbm_mutex);
5483
}
5484
 
5078 serge 5485
/**
5486
 * cik_pcie_gart_enable - gart enable
5487
 *
5488
 * @rdev: radeon_device pointer
5489
 *
5490
 * This sets up the TLBs, programs the page tables for VMID0,
5491
 * sets up the hw for VMIDs 1-15 which are allocated on
5492
 * demand, and sets up the global locations for the LDS, GDS,
5493
 * and GPUVM for FSA64 clients (CIK).
5494
 * Returns 0 for success, errors for failure.
5495
 */
5496
static int cik_pcie_gart_enable(struct radeon_device *rdev)
5497
{
5498
	int r, i;
5499
 
5500
	if (rdev->gart.robj == NULL) {
5501
		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5502
		return -EINVAL;
5503
	}
5504
	r = radeon_gart_table_vram_pin(rdev);
5505
	if (r)
5506
		return r;
5507
	/* Setup TLB control */
5508
	WREG32(MC_VM_MX_L1_TLB_CNTL,
5509
	       (0xA << 7) |
5510
	       ENABLE_L1_TLB |
5511
	       ENABLE_L1_FRAGMENT_PROCESSING |
5512
	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5513
	       ENABLE_ADVANCED_DRIVER_MODEL |
5514
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5515
	/* Setup L2 cache */
5516
	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5517
	       ENABLE_L2_FRAGMENT_PROCESSING |
5518
	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5519
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5520
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5521
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5522
	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5523
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5524
	       BANK_SELECT(4) |
5525
	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5526
	/* setup context0 */
5527
	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5528
	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5529
	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5530
	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5531
			(u32)(rdev->dummy_page.addr >> 12));
5532
	WREG32(VM_CONTEXT0_CNTL2, 0);
5533
	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5534
				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5535
 
5536
	WREG32(0x15D4, 0);
5537
	WREG32(0x15D8, 0);
5538
	WREG32(0x15DC, 0);
5539
 
5540
	/* restore context1-15 */
5541
	/* set vm size, must be a multiple of 4 */
5542
	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
6104 serge 5543
	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5078 serge 5544
	for (i = 1; i < 16; i++) {
5545
		if (i < 8)
5546
			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5547
			       rdev->vm_manager.saved_table_addr[i]);
5548
		else
5549
			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5550
			       rdev->vm_manager.saved_table_addr[i]);
5551
	}
5552
 
5553
	/* enable context1-15 */
5554
	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5555
	       (u32)(rdev->dummy_page.addr >> 12));
5556
	WREG32(VM_CONTEXT1_CNTL2, 4);
5557
	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5558
				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5559
				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5560
				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5561
				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5562
				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5563
				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5564
				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5565
				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5566
				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5567
				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5568
				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5569
				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570
				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5571
 
5572
	if (rdev->family == CHIP_KAVERI) {
5573
		u32 tmp = RREG32(CHUB_CONTROL);
5574
		tmp &= ~BYPASS_VM;
5575
		WREG32(CHUB_CONTROL, tmp);
5576
	}
5577
 
5578
	/* XXX SH_MEM regs */
5579
	/* where to put LDS, scratch, GPUVM in FSA64 space */
5580
	mutex_lock(&rdev->srbm_mutex);
5581
	for (i = 0; i < 16; i++) {
5582
		cik_srbm_select(rdev, 0, 0, 0, i);
5583
		/* CP and shaders */
5584
		WREG32(SH_MEM_CONFIG, 0);
5585
		WREG32(SH_MEM_APE1_BASE, 1);
5586
		WREG32(SH_MEM_APE1_LIMIT, 0);
5587
		WREG32(SH_MEM_BASES, 0);
5588
		/* SDMA GFX */
5589
		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5590
		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5591
		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5592
		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5593
		/* XXX SDMA RLC - todo */
5594
	}
5595
	cik_srbm_select(rdev, 0, 0, 0, 0);
5596
	mutex_unlock(&rdev->srbm_mutex);
5597
 
6104 serge 5598
	cik_pcie_init_compute_vmid(rdev);
5599
 
5078 serge 5600
	cik_pcie_gart_tlb_flush(rdev);
5601
	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5602
		 (unsigned)(rdev->mc.gtt_size >> 20),
5603
		 (unsigned long long)rdev->gart.table_addr);
5604
	rdev->gart.ready = true;
5605
	return 0;
5606
}
5607
 
5608
/**
5609
 * cik_pcie_gart_disable - gart disable
5610
 *
5611
 * @rdev: radeon_device pointer
5612
 *
5613
 * This disables all VM page table (CIK).
5614
 */
5615
static void cik_pcie_gart_disable(struct radeon_device *rdev)
5616
{
5617
	unsigned i;
5618
 
5619
	for (i = 1; i < 16; ++i) {
5620
		uint32_t reg;
5621
		if (i < 8)
5622
			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5623
		else
5624
			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5625
		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5626
	}
5627
 
5628
	/* Disable all tables */
5629
	WREG32(VM_CONTEXT0_CNTL, 0);
5630
	WREG32(VM_CONTEXT1_CNTL, 0);
5631
	/* Setup TLB control */
5632
	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5633
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5634
	/* Setup L2 cache */
5635
	WREG32(VM_L2_CNTL,
5636
	       ENABLE_L2_FRAGMENT_PROCESSING |
5637
	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5638
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5639
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5640
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5641
	WREG32(VM_L2_CNTL2, 0);
5642
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5643
	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5644
	radeon_gart_table_vram_unpin(rdev);
5645
}
5646
 
5647
/**
5648
 * cik_pcie_gart_fini - vm fini callback
5649
 *
5650
 * @rdev: radeon_device pointer
5651
 *
5652
 * Tears down the driver GART/VM setup (CIK).
5653
 */
5654
static void cik_pcie_gart_fini(struct radeon_device *rdev)
5655
{
5656
	cik_pcie_gart_disable(rdev);
5657
	radeon_gart_table_vram_free(rdev);
5658
	radeon_gart_fini(rdev);
5659
}
5660
 
5661
/* vm parser */
5662
/**
5663
 * cik_ib_parse - vm ib_parse callback
5664
 *
5665
 * @rdev: radeon_device pointer
5666
 * @ib: indirect buffer pointer
5667
 *
5668
 * CIK uses hw IB checking so this is a nop (CIK).
5669
 */
5670
int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5671
{
5672
	return 0;
5673
}
5674
 
5675
/*
5676
 * vm
5677
 * VMID 0 is the physical GPU addresses as used by the kernel.
5678
 * VMIDs 1-15 are used for userspace clients and are handled
5679
 * by the radeon vm/hsa code.
5680
 */
5681
/**
5682
 * cik_vm_init - cik vm init callback
5683
 *
5684
 * @rdev: radeon_device pointer
5685
 *
5686
 * Inits cik specific vm parameters (number of VMs, base of vram for
5687
 * VMIDs 1-15) (CIK).
5688
 * Returns 0 for success.
5689
 */
5690
int cik_vm_init(struct radeon_device *rdev)
5691
{
5271 serge 5692
	/*
5693
	 * number of VMs
5694
	 * VMID 0 is reserved for System
5695
	 * radeon graphics/compute will use VMIDs 1-7
5696
	 * amdkfd will use VMIDs 8-15
5697
	 */
5698
	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5078 serge 5699
	/* base offset of vram pages */
5700
	if (rdev->flags & RADEON_IS_IGP) {
5701
		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5702
		tmp <<= 22;
5703
		rdev->vm_manager.vram_base_offset = tmp;
5704
	} else
5705
		rdev->vm_manager.vram_base_offset = 0;
5706
 
5707
	return 0;
5708
}
5709
 
5710
/**
5711
 * cik_vm_fini - cik vm fini callback
5712
 *
5713
 * @rdev: radeon_device pointer
5714
 *
5715
 * Tear down any asic specific VM setup (CIK).
5716
 */
5717
void cik_vm_fini(struct radeon_device *rdev)
5718
{
5719
}
5720
 
5721
/**
5722
 * cik_vm_decode_fault - print human readable fault info
5723
 *
5724
 * @rdev: radeon_device pointer
5725
 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5726
 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5727
 *
5728
 * Print human readable fault information (CIK).
5729
 */
5730
static void cik_vm_decode_fault(struct radeon_device *rdev,
5731
				u32 status, u32 addr, u32 mc_client)
5732
{
5733
	u32 mc_id;
5734
	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5735
	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5736
	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5737
		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5738
 
5739
	if (rdev->family == CHIP_HAWAII)
5740
		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5741
	else
5742
		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5743
 
5744
	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5745
	       protections, vmid, addr,
5746
	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5747
	       block, mc_client, mc_id);
5748
}
5749
 
5750
/**
5751
 * cik_vm_flush - cik vm flush using the CP
5752
 *
5753
 * @rdev: radeon_device pointer
5754
 *
5755
 * Update the page table base and flush the VM TLB
5756
 * using the CP (CIK).
5757
 */
5271 serge 5758
void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5759
		  unsigned vm_id, uint64_t pd_addr)
5078 serge 5760
{
5271 serge 5761
	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5078 serge 5762
 
5763
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5764
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5765
				 WRITE_DATA_DST_SEL(0)));
5271 serge 5766
	if (vm_id < 8) {
5078 serge 5767
		radeon_ring_write(ring,
5271 serge 5768
				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5078 serge 5769
	} else {
5770
		radeon_ring_write(ring,
5271 serge 5771
				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5078 serge 5772
	}
5773
	radeon_ring_write(ring, 0);
5271 serge 5774
	radeon_ring_write(ring, pd_addr >> 12);
5078 serge 5775
 
5776
	/* update SH_MEM_* regs */
5777
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5778
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5779
				 WRITE_DATA_DST_SEL(0)));
5780
	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5781
	radeon_ring_write(ring, 0);
5271 serge 5782
	radeon_ring_write(ring, VMID(vm_id));
5078 serge 5783
 
5784
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5785
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5786
				 WRITE_DATA_DST_SEL(0)));
5787
	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5788
	radeon_ring_write(ring, 0);
5789
 
5790
	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5791
	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5792
	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5793
	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5794
 
5795
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5796
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5797
				 WRITE_DATA_DST_SEL(0)));
5798
	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5799
	radeon_ring_write(ring, 0);
5800
	radeon_ring_write(ring, VMID(0));
5801
 
5802
	/* HDP flush */
5271 serge 5803
	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5078 serge 5804
 
5805
	/* bits 0-15 are the VM contexts0-15 */
5806
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5807
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5808
				 WRITE_DATA_DST_SEL(0)));
5809
	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5810
	radeon_ring_write(ring, 0);
5271 serge 5811
	radeon_ring_write(ring, 1 << vm_id);
5078 serge 5812
 
6104 serge 5813
	/* wait for the invalidate to complete */
5814
	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5815
	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5816
				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5817
				 WAIT_REG_MEM_ENGINE(0))); /* me */
5818
	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5819
	radeon_ring_write(ring, 0);
5820
	radeon_ring_write(ring, 0); /* ref */
5821
	radeon_ring_write(ring, 0); /* mask */
5822
	radeon_ring_write(ring, 0x20); /* poll interval */
5823
 
5078 serge 5824
	/* compute doesn't have PFP */
5825
	if (usepfp) {
5826
		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5827
		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5828
		radeon_ring_write(ring, 0x0);
5829
	}
5830
}
5831
 
5832
/*
5833
 * RLC
5834
 * The RLC is a multi-purpose microengine that handles a
5835
 * variety of functions, the most important of which is
5836
 * the interrupt controller.
5837
 */
5838
static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5839
					  bool enable)
5840
{
5841
	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5842
 
5843
	if (enable)
5844
		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5845
	else
5846
		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5847
	WREG32(CP_INT_CNTL_RING0, tmp);
5848
}
5849
 
5850
static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5851
{
5852
	u32 tmp;
5853
 
5854
	tmp = RREG32(RLC_LB_CNTL);
5855
	if (enable)
5856
		tmp |= LOAD_BALANCE_ENABLE;
5857
	else
5858
		tmp &= ~LOAD_BALANCE_ENABLE;
5859
	WREG32(RLC_LB_CNTL, tmp);
5860
}
5861
 
5862
static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5863
{
5864
	u32 i, j, k;
5865
	u32 mask;
5866
 
5271 serge 5867
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 5868
	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5869
		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5870
			cik_select_se_sh(rdev, i, j);
5871
			for (k = 0; k < rdev->usec_timeout; k++) {
5872
				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5873
					break;
5874
				udelay(1);
5875
			}
5876
		}
5877
	}
5878
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 5879
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 5880
 
5881
	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5882
	for (k = 0; k < rdev->usec_timeout; k++) {
5883
		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5884
			break;
5885
		udelay(1);
5886
	}
5887
}
5888
 
5889
static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5890
{
5891
	u32 tmp;
5892
 
5893
	tmp = RREG32(RLC_CNTL);
5894
	if (tmp != rlc)
5895
		WREG32(RLC_CNTL, rlc);
5896
}
5897
 
5898
static u32 cik_halt_rlc(struct radeon_device *rdev)
5899
{
5900
	u32 data, orig;
5901
 
5902
	orig = data = RREG32(RLC_CNTL);
5903
 
5904
	if (data & RLC_ENABLE) {
5905
		u32 i;
5906
 
5907
		data &= ~RLC_ENABLE;
5908
		WREG32(RLC_CNTL, data);
5909
 
5910
		for (i = 0; i < rdev->usec_timeout; i++) {
5911
			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5912
				break;
5913
			udelay(1);
5914
		}
5915
 
5916
		cik_wait_for_rlc_serdes(rdev);
5917
	}
5918
 
5919
	return orig;
5920
}
5921
 
5922
void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5923
{
5924
	u32 tmp, i, mask;
5925
 
5926
	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5927
	WREG32(RLC_GPR_REG2, tmp);
5928
 
5929
	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5930
	for (i = 0; i < rdev->usec_timeout; i++) {
5931
		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5932
			break;
5933
		udelay(1);
5934
	}
5935
 
5936
	for (i = 0; i < rdev->usec_timeout; i++) {
5937
		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5938
			break;
5939
		udelay(1);
5940
	}
5941
}
5942
 
5943
void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5944
{
5945
	u32 tmp;
5946
 
5947
	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5948
	WREG32(RLC_GPR_REG2, tmp);
5949
}
5950
 
5951
/**
5952
 * cik_rlc_stop - stop the RLC ME
5953
 *
5954
 * @rdev: radeon_device pointer
5955
 *
5956
 * Halt the RLC ME (MicroEngine) (CIK).
5957
 */
5958
static void cik_rlc_stop(struct radeon_device *rdev)
5959
{
5960
	WREG32(RLC_CNTL, 0);
5961
 
5962
	cik_enable_gui_idle_interrupt(rdev, false);
5963
 
5964
	cik_wait_for_rlc_serdes(rdev);
5965
}
5966
 
5967
/**
5968
 * cik_rlc_start - start the RLC ME
5969
 *
5970
 * @rdev: radeon_device pointer
5971
 *
5972
 * Unhalt the RLC ME (MicroEngine) (CIK).
5973
 */
5974
static void cik_rlc_start(struct radeon_device *rdev)
5975
{
5976
	WREG32(RLC_CNTL, RLC_ENABLE);
5977
 
5978
	cik_enable_gui_idle_interrupt(rdev, true);
5979
 
5980
	udelay(50);
5981
}
5982
 
5983
/**
5984
 * cik_rlc_resume - setup the RLC hw
5985
 *
5986
 * @rdev: radeon_device pointer
5987
 *
5988
 * Initialize the RLC registers, load the ucode,
5989
 * and start the RLC (CIK).
5990
 * Returns 0 for success, -EINVAL if the ucode is not available.
5991
 */
5992
static int cik_rlc_resume(struct radeon_device *rdev)
5993
{
5994
	u32 i, size, tmp;
5995
 
5996
	if (!rdev->rlc_fw)
5997
		return -EINVAL;
5998
 
5999
	cik_rlc_stop(rdev);
6000
 
6001
	/* disable CG */
6002
	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6003
	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6004
 
6005
	si_rlc_reset(rdev);
6006
 
6007
	cik_init_pg(rdev);
6008
 
6009
	cik_init_cg(rdev);
6010
 
6011
	WREG32(RLC_LB_CNTR_INIT, 0);
6012
	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6013
 
5271 serge 6014
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6015
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6016
	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6017
	WREG32(RLC_LB_PARAMS, 0x00600408);
6018
	WREG32(RLC_LB_CNTL, 0x80000004);
5271 serge 6019
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6020
 
6021
	WREG32(RLC_MC_CNTL, 0);
6022
	WREG32(RLC_UCODE_CNTL, 0);
6023
 
6024
	if (rdev->new_fw) {
6025
		const struct rlc_firmware_header_v1_0 *hdr =
6026
			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6027
		const __le32 *fw_data = (const __le32 *)
6028
			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6029
 
6030
		radeon_ucode_print_rlc_hdr(&hdr->header);
6031
 
6032
		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6033
		WREG32(RLC_GPM_UCODE_ADDR, 0);
6034
		for (i = 0; i < size; i++)
6035
			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5271 serge 6036
		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5078 serge 6037
	} else {
6038
		const __be32 *fw_data;
6039
 
6040
		switch (rdev->family) {
6041
		case CHIP_BONAIRE:
6042
		case CHIP_HAWAII:
6043
		default:
6044
			size = BONAIRE_RLC_UCODE_SIZE;
6045
			break;
6046
		case CHIP_KAVERI:
6047
			size = KV_RLC_UCODE_SIZE;
6048
			break;
6049
		case CHIP_KABINI:
6050
			size = KB_RLC_UCODE_SIZE;
6051
			break;
6052
		case CHIP_MULLINS:
6053
			size = ML_RLC_UCODE_SIZE;
6054
			break;
6055
		}
6056
 
6104 serge 6057
		fw_data = (const __be32 *)rdev->rlc_fw->data;
5078 serge 6058
		WREG32(RLC_GPM_UCODE_ADDR, 0);
6104 serge 6059
		for (i = 0; i < size; i++)
6060
			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6061
		WREG32(RLC_GPM_UCODE_ADDR, 0);
5078 serge 6062
	}
6063
 
6064
	/* XXX - find out what chips support lbpw */
6065
	cik_enable_lbpw(rdev, false);
6066
 
6067
	if (rdev->family == CHIP_BONAIRE)
6068
		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6069
 
6070
	cik_rlc_start(rdev);
6071
 
6072
	return 0;
6073
}
6074
 
6075
static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6076
{
6077
	u32 data, orig, tmp, tmp2;
6078
 
6079
	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6080
 
6081
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6082
		cik_enable_gui_idle_interrupt(rdev, true);
6083
 
6084
		tmp = cik_halt_rlc(rdev);
6085
 
5271 serge 6086
		mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6087
		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6088
		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6089
		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6090
		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6091
		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5271 serge 6092
		mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6093
 
6094
		cik_update_rlc(rdev, tmp);
6095
 
6096
		data |= CGCG_EN | CGLS_EN;
6097
	} else {
6098
		cik_enable_gui_idle_interrupt(rdev, false);
6099
 
6100
		RREG32(CB_CGTT_SCLK_CTRL);
6101
		RREG32(CB_CGTT_SCLK_CTRL);
6102
		RREG32(CB_CGTT_SCLK_CTRL);
6103
		RREG32(CB_CGTT_SCLK_CTRL);
6104
 
6105
		data &= ~(CGCG_EN | CGLS_EN);
6106
	}
6107
 
6108
	if (orig != data)
6109
		WREG32(RLC_CGCG_CGLS_CTRL, data);
6110
 
6111
}
6112
 
6113
static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6114
{
6115
	u32 data, orig, tmp = 0;
6116
 
6117
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6118
		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6119
			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6120
				orig = data = RREG32(CP_MEM_SLP_CNTL);
6121
				data |= CP_MEM_LS_EN;
6122
				if (orig != data)
6123
					WREG32(CP_MEM_SLP_CNTL, data);
6124
			}
6125
		}
6126
 
6127
		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5271 serge 6128
		data |= 0x00000001;
5078 serge 6129
		data &= 0xfffffffd;
6130
		if (orig != data)
6131
			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6132
 
6133
		tmp = cik_halt_rlc(rdev);
6134
 
5271 serge 6135
		mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6136
		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6137
		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6138
		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6139
		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6140
		WREG32(RLC_SERDES_WR_CTRL, data);
5271 serge 6141
		mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6142
 
6143
		cik_update_rlc(rdev, tmp);
6144
 
6145
		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6146
			orig = data = RREG32(CGTS_SM_CTRL_REG);
6147
			data &= ~SM_MODE_MASK;
6148
			data |= SM_MODE(0x2);
6149
			data |= SM_MODE_ENABLE;
6150
			data &= ~CGTS_OVERRIDE;
6151
			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6152
			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6153
				data &= ~CGTS_LS_OVERRIDE;
6154
			data &= ~ON_MONITOR_ADD_MASK;
6155
			data |= ON_MONITOR_ADD_EN;
6156
			data |= ON_MONITOR_ADD(0x96);
6157
			if (orig != data)
6158
				WREG32(CGTS_SM_CTRL_REG, data);
6159
		}
6160
	} else {
6161
		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5271 serge 6162
		data |= 0x00000003;
5078 serge 6163
		if (orig != data)
6164
			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6165
 
6166
		data = RREG32(RLC_MEM_SLP_CNTL);
6167
		if (data & RLC_MEM_LS_EN) {
6168
			data &= ~RLC_MEM_LS_EN;
6169
			WREG32(RLC_MEM_SLP_CNTL, data);
6170
		}
6171
 
6172
		data = RREG32(CP_MEM_SLP_CNTL);
6173
		if (data & CP_MEM_LS_EN) {
6174
			data &= ~CP_MEM_LS_EN;
6175
			WREG32(CP_MEM_SLP_CNTL, data);
6176
		}
6177
 
6178
		orig = data = RREG32(CGTS_SM_CTRL_REG);
6179
		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6180
		if (orig != data)
6181
			WREG32(CGTS_SM_CTRL_REG, data);
6182
 
6183
		tmp = cik_halt_rlc(rdev);
6184
 
5271 serge 6185
		mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6186
		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6187
		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6188
		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6189
		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6190
		WREG32(RLC_SERDES_WR_CTRL, data);
5271 serge 6191
		mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6192
 
6193
		cik_update_rlc(rdev, tmp);
6194
	}
6195
}
6196
 
6197
static const u32 mc_cg_registers[] =
6198
{
6199
	MC_HUB_MISC_HUB_CG,
6200
	MC_HUB_MISC_SIP_CG,
6201
	MC_HUB_MISC_VM_CG,
6202
	MC_XPB_CLK_GAT,
6203
	ATC_MISC_CG,
6204
	MC_CITF_MISC_WR_CG,
6205
	MC_CITF_MISC_RD_CG,
6206
	MC_CITF_MISC_VM_CG,
6207
	VM_L2_CG,
6208
};
6209
 
6210
static void cik_enable_mc_ls(struct radeon_device *rdev,
6211
			     bool enable)
6212
{
6213
	int i;
6214
	u32 orig, data;
6215
 
6216
	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6217
		orig = data = RREG32(mc_cg_registers[i]);
6218
		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6219
			data |= MC_LS_ENABLE;
6220
		else
6221
			data &= ~MC_LS_ENABLE;
6222
		if (data != orig)
6223
			WREG32(mc_cg_registers[i], data);
6224
	}
6225
}
6226
 
6227
static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6228
			       bool enable)
6229
{
6230
	int i;
6231
	u32 orig, data;
6232
 
6233
	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6234
		orig = data = RREG32(mc_cg_registers[i]);
6235
		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6236
			data |= MC_CG_ENABLE;
6237
		else
6238
			data &= ~MC_CG_ENABLE;
6239
		if (data != orig)
6240
			WREG32(mc_cg_registers[i], data);
6241
	}
6242
}
6243
 
6244
static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6245
				 bool enable)
6246
{
6247
	u32 orig, data;
6248
 
6249
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6250
		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6251
		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6252
	} else {
6253
		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6254
		data |= 0xff000000;
6255
		if (data != orig)
6256
			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6257
 
6258
		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6259
		data |= 0xff000000;
6260
		if (data != orig)
6261
			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6262
	}
6263
}
6264
 
6265
static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6266
				 bool enable)
6267
{
6268
	u32 orig, data;
6269
 
6270
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6271
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6272
		data |= 0x100;
6273
		if (orig != data)
6274
			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6275
 
6276
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6277
		data |= 0x100;
6278
		if (orig != data)
6279
			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6280
	} else {
6281
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6282
		data &= ~0x100;
6283
		if (orig != data)
6284
			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6285
 
6286
		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6287
		data &= ~0x100;
6288
		if (orig != data)
6289
			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6290
	}
6291
}
6292
 
6293
static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6294
				bool enable)
6295
{
6296
	u32 orig, data;
6297
 
6298
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6299
		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6300
		data = 0xfff;
6301
		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6302
 
6303
		orig = data = RREG32(UVD_CGC_CTRL);
6304
		data |= DCM;
6305
		if (orig != data)
6306
			WREG32(UVD_CGC_CTRL, data);
6307
	} else {
6308
		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6309
		data &= ~0xfff;
6310
		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6311
 
6312
		orig = data = RREG32(UVD_CGC_CTRL);
6313
		data &= ~DCM;
6314
		if (orig != data)
6315
			WREG32(UVD_CGC_CTRL, data);
6316
	}
6317
}
6318
 
6319
static void cik_enable_bif_mgls(struct radeon_device *rdev,
6320
			       bool enable)
6321
{
6322
	u32 orig, data;
6323
 
6324
	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6325
 
6326
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6327
		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6328
			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6329
	else
6330
		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6331
			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6332
 
6333
	if (orig != data)
6334
		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6335
}
6336
 
6337
static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6338
				bool enable)
6339
{
6340
	u32 orig, data;
6341
 
6342
	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6343
 
6344
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6345
		data &= ~CLOCK_GATING_DIS;
6346
	else
6347
		data |= CLOCK_GATING_DIS;
6348
 
6349
	if (orig != data)
6350
		WREG32(HDP_HOST_PATH_CNTL, data);
6351
}
6352
 
6353
static void cik_enable_hdp_ls(struct radeon_device *rdev,
6354
			      bool enable)
6355
{
6356
	u32 orig, data;
6357
 
6358
	orig = data = RREG32(HDP_MEM_POWER_LS);
6359
 
6360
	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6361
		data |= HDP_LS_ENABLE;
6362
	else
6363
		data &= ~HDP_LS_ENABLE;
6364
 
6365
	if (orig != data)
6366
		WREG32(HDP_MEM_POWER_LS, data);
6367
}
6368
 
6369
void cik_update_cg(struct radeon_device *rdev,
6370
		   u32 block, bool enable)
6371
{
6372
 
6373
	if (block & RADEON_CG_BLOCK_GFX) {
6374
		cik_enable_gui_idle_interrupt(rdev, false);
6375
		/* order matters! */
6376
		if (enable) {
6377
			cik_enable_mgcg(rdev, true);
6378
			cik_enable_cgcg(rdev, true);
6379
		} else {
6380
			cik_enable_cgcg(rdev, false);
6381
			cik_enable_mgcg(rdev, false);
6382
		}
6383
		cik_enable_gui_idle_interrupt(rdev, true);
6384
	}
6385
 
6386
	if (block & RADEON_CG_BLOCK_MC) {
6387
		if (!(rdev->flags & RADEON_IS_IGP)) {
6388
			cik_enable_mc_mgcg(rdev, enable);
6389
			cik_enable_mc_ls(rdev, enable);
6390
		}
6391
	}
6392
 
6393
	if (block & RADEON_CG_BLOCK_SDMA) {
6394
		cik_enable_sdma_mgcg(rdev, enable);
6395
		cik_enable_sdma_mgls(rdev, enable);
6396
	}
6397
 
6398
	if (block & RADEON_CG_BLOCK_BIF) {
6399
		cik_enable_bif_mgls(rdev, enable);
6400
	}
6401
 
6402
	if (block & RADEON_CG_BLOCK_UVD) {
6403
		if (rdev->has_uvd)
6404
			cik_enable_uvd_mgcg(rdev, enable);
6405
	}
6406
 
6407
	if (block & RADEON_CG_BLOCK_HDP) {
6408
		cik_enable_hdp_mgcg(rdev, enable);
6409
		cik_enable_hdp_ls(rdev, enable);
6410
	}
6411
 
6412
	if (block & RADEON_CG_BLOCK_VCE) {
6413
		vce_v2_0_enable_mgcg(rdev, enable);
6104 serge 6414
	}
5078 serge 6415
}
6416
 
6417
static void cik_init_cg(struct radeon_device *rdev)
6418
{
6419
 
6420
	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6421
 
6422
	if (rdev->has_uvd)
6423
		si_init_uvd_internal_cg(rdev);
6424
 
6425
	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6426
			     RADEON_CG_BLOCK_SDMA |
6427
			     RADEON_CG_BLOCK_BIF |
6428
			     RADEON_CG_BLOCK_UVD |
6429
			     RADEON_CG_BLOCK_HDP), true);
6430
}
6431
 
6432
static void cik_fini_cg(struct radeon_device *rdev)
6433
{
6434
	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6435
			     RADEON_CG_BLOCK_SDMA |
6436
			     RADEON_CG_BLOCK_BIF |
6437
			     RADEON_CG_BLOCK_UVD |
6438
			     RADEON_CG_BLOCK_HDP), false);
6439
 
6440
	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6441
}
6442
 
6443
static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6444
					  bool enable)
6445
{
6446
	u32 data, orig;
6447
 
6448
	orig = data = RREG32(RLC_PG_CNTL);
6449
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6450
		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6451
	else
6452
		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6453
	if (orig != data)
6454
		WREG32(RLC_PG_CNTL, data);
6455
}
6456
 
6457
static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6458
					  bool enable)
6459
{
6460
	u32 data, orig;
6461
 
6462
	orig = data = RREG32(RLC_PG_CNTL);
6463
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6464
		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6465
	else
6466
		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6467
	if (orig != data)
6468
		WREG32(RLC_PG_CNTL, data);
6469
}
6470
 
6471
static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6472
{
6473
	u32 data, orig;
6474
 
6475
	orig = data = RREG32(RLC_PG_CNTL);
6476
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6477
		data &= ~DISABLE_CP_PG;
6478
	else
6479
		data |= DISABLE_CP_PG;
6480
	if (orig != data)
6481
		WREG32(RLC_PG_CNTL, data);
6482
}
6483
 
6484
static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6485
{
6486
	u32 data, orig;
6487
 
6488
	orig = data = RREG32(RLC_PG_CNTL);
6489
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6490
		data &= ~DISABLE_GDS_PG;
6491
	else
6492
		data |= DISABLE_GDS_PG;
6493
	if (orig != data)
6494
		WREG32(RLC_PG_CNTL, data);
6495
}
6496
 
6497
#define CP_ME_TABLE_SIZE    96
6498
#define CP_ME_TABLE_OFFSET  2048
6499
#define CP_MEC_TABLE_OFFSET 4096
6500
 
6501
void cik_init_cp_pg_table(struct radeon_device *rdev)
6502
{
6503
	volatile u32 *dst_ptr;
6504
	int me, i, max_me = 4;
6505
	u32 bo_offset = 0;
6506
	u32 table_offset, table_size;
6507
 
6508
	if (rdev->family == CHIP_KAVERI)
6509
		max_me = 5;
6510
 
6511
	if (rdev->rlc.cp_table_ptr == NULL)
6512
		return;
6513
 
6514
	/* write the cp table buffer */
6515
	dst_ptr = rdev->rlc.cp_table_ptr;
6516
	for (me = 0; me < max_me; me++) {
6517
		if (rdev->new_fw) {
6518
			const __le32 *fw_data;
6519
			const struct gfx_firmware_header_v1_0 *hdr;
6520
 
6521
			if (me == 0) {
6522
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6523
				fw_data = (const __le32 *)
6524
					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6525
				table_offset = le32_to_cpu(hdr->jt_offset);
6526
				table_size = le32_to_cpu(hdr->jt_size);
6527
			} else if (me == 1) {
6528
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6529
				fw_data = (const __le32 *)
6530
					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531
				table_offset = le32_to_cpu(hdr->jt_offset);
6532
				table_size = le32_to_cpu(hdr->jt_size);
6533
			} else if (me == 2) {
6534
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6535
				fw_data = (const __le32 *)
6536
					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537
				table_offset = le32_to_cpu(hdr->jt_offset);
6538
				table_size = le32_to_cpu(hdr->jt_size);
6539
			} else if (me == 3) {
6540
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6541
				fw_data = (const __le32 *)
6542
					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543
				table_offset = le32_to_cpu(hdr->jt_offset);
6544
				table_size = le32_to_cpu(hdr->jt_size);
6545
			} else {
6546
				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6547
				fw_data = (const __le32 *)
6548
					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549
				table_offset = le32_to_cpu(hdr->jt_offset);
6550
				table_size = le32_to_cpu(hdr->jt_size);
6551
			}
6552
 
6553
			for (i = 0; i < table_size; i ++) {
6554
				dst_ptr[bo_offset + i] =
6555
					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6556
			}
6557
			bo_offset += table_size;
6558
		} else {
6559
			const __be32 *fw_data;
6560
			table_size = CP_ME_TABLE_SIZE;
6561
 
6104 serge 6562
			if (me == 0) {
6563
				fw_data = (const __be32 *)rdev->ce_fw->data;
6564
				table_offset = CP_ME_TABLE_OFFSET;
6565
			} else if (me == 1) {
6566
				fw_data = (const __be32 *)rdev->pfp_fw->data;
6567
				table_offset = CP_ME_TABLE_OFFSET;
6568
			} else if (me == 2) {
6569
				fw_data = (const __be32 *)rdev->me_fw->data;
6570
				table_offset = CP_ME_TABLE_OFFSET;
6571
			} else {
6572
				fw_data = (const __be32 *)rdev->mec_fw->data;
6573
				table_offset = CP_MEC_TABLE_OFFSET;
6574
			}
5078 serge 6575
 
6576
			for (i = 0; i < table_size; i ++) {
6577
				dst_ptr[bo_offset + i] =
6578
					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6579
			}
6580
			bo_offset += table_size;
6581
		}
6582
	}
6583
}
6584
 
6585
static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6586
				bool enable)
6587
{
6588
	u32 data, orig;
6589
 
6590
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6591
		orig = data = RREG32(RLC_PG_CNTL);
6592
		data |= GFX_PG_ENABLE;
6593
		if (orig != data)
6594
			WREG32(RLC_PG_CNTL, data);
6595
 
6596
		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6597
		data |= AUTO_PG_EN;
6598
		if (orig != data)
6599
			WREG32(RLC_AUTO_PG_CTRL, data);
6600
	} else {
6601
		orig = data = RREG32(RLC_PG_CNTL);
6602
		data &= ~GFX_PG_ENABLE;
6603
		if (orig != data)
6604
			WREG32(RLC_PG_CNTL, data);
6605
 
6606
		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6607
		data &= ~AUTO_PG_EN;
6608
		if (orig != data)
6609
			WREG32(RLC_AUTO_PG_CTRL, data);
6610
 
6611
		data = RREG32(DB_RENDER_CONTROL);
6612
	}
6613
}
6614
 
6615
static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6616
{
6617
	u32 mask = 0, tmp, tmp1;
6618
	int i;
6619
 
5271 serge 6620
	mutex_lock(&rdev->grbm_idx_mutex);
5078 serge 6621
	cik_select_se_sh(rdev, se, sh);
6622
	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6623
	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6624
	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5271 serge 6625
	mutex_unlock(&rdev->grbm_idx_mutex);
5078 serge 6626
 
6627
	tmp &= 0xffff0000;
6628
 
6629
	tmp |= tmp1;
6630
	tmp >>= 16;
6631
 
6632
	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6633
		mask <<= 1;
6634
		mask |= 1;
6635
	}
6636
 
6637
	return (~tmp) & mask;
6638
}
6639
 
6640
static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6641
{
6642
	u32 i, j, k, active_cu_number = 0;
6643
	u32 mask, counter, cu_bitmap;
6644
	u32 tmp = 0;
6645
 
6646
	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6647
		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6648
			mask = 1;
6649
			cu_bitmap = 0;
6650
			counter = 0;
6651
			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6652
				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6653
					if (counter < 2)
6654
						cu_bitmap |= mask;
6655
					counter ++;
6656
				}
6657
				mask <<= 1;
6658
			}
6659
 
6660
			active_cu_number += counter;
6661
			tmp |= (cu_bitmap << (i * 16 + j * 8));
6662
		}
6663
	}
6664
 
6665
	WREG32(RLC_PG_AO_CU_MASK, tmp);
6666
 
6667
	tmp = RREG32(RLC_MAX_PG_CU);
6668
	tmp &= ~MAX_PU_CU_MASK;
6669
	tmp |= MAX_PU_CU(active_cu_number);
6670
	WREG32(RLC_MAX_PG_CU, tmp);
6671
}
6672
 
6673
static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6674
				       bool enable)
6675
{
6676
	u32 data, orig;
6677
 
6678
	orig = data = RREG32(RLC_PG_CNTL);
6679
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6680
		data |= STATIC_PER_CU_PG_ENABLE;
6681
	else
6682
		data &= ~STATIC_PER_CU_PG_ENABLE;
6683
	if (orig != data)
6684
		WREG32(RLC_PG_CNTL, data);
6685
}
6686
 
6687
static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6688
					bool enable)
6689
{
6690
	u32 data, orig;
6691
 
6692
	orig = data = RREG32(RLC_PG_CNTL);
6693
	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6694
		data |= DYN_PER_CU_PG_ENABLE;
6695
	else
6696
		data &= ~DYN_PER_CU_PG_ENABLE;
6697
	if (orig != data)
6698
		WREG32(RLC_PG_CNTL, data);
6699
}
6700
 
6701
#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6702
#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6703
 
6704
static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6705
{
6706
	u32 data, orig;
6707
	u32 i;
6708
 
6709
	if (rdev->rlc.cs_data) {
6710
		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6711
		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6712
		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6713
		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6714
	} else {
6715
		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6716
		for (i = 0; i < 3; i++)
6717
			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6718
	}
6719
	if (rdev->rlc.reg_list) {
6720
		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6721
		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6722
			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6723
	}
6724
 
6725
	orig = data = RREG32(RLC_PG_CNTL);
6726
	data |= GFX_PG_SRC;
6727
	if (orig != data)
6728
		WREG32(RLC_PG_CNTL, data);
6729
 
6730
	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6731
	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6732
 
6733
	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6734
	data &= ~IDLE_POLL_COUNT_MASK;
6735
	data |= IDLE_POLL_COUNT(0x60);
6736
	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6737
 
6738
	data = 0x10101010;
6739
	WREG32(RLC_PG_DELAY, data);
6740
 
6741
	data = RREG32(RLC_PG_DELAY_2);
6742
	data &= ~0xff;
6743
	data |= 0x3;
6744
	WREG32(RLC_PG_DELAY_2, data);
6745
 
6746
	data = RREG32(RLC_AUTO_PG_CTRL);
6747
	data &= ~GRBM_REG_SGIT_MASK;
6748
	data |= GRBM_REG_SGIT(0x700);
6749
	WREG32(RLC_AUTO_PG_CTRL, data);
6750
 
6751
}
6752
 
6753
static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6754
{
6755
	cik_enable_gfx_cgpg(rdev, enable);
6756
	cik_enable_gfx_static_mgpg(rdev, enable);
6757
	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6758
}
6759
 
6760
u32 cik_get_csb_size(struct radeon_device *rdev)
6761
{
6762
	u32 count = 0;
6763
	const struct cs_section_def *sect = NULL;
6764
	const struct cs_extent_def *ext = NULL;
6765
 
6766
	if (rdev->rlc.cs_data == NULL)
6767
		return 0;
6768
 
6769
	/* begin clear state */
6770
	count += 2;
6771
	/* context control state */
6772
	count += 3;
6773
 
6774
	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6775
		for (ext = sect->section; ext->extent != NULL; ++ext) {
6776
			if (sect->id == SECT_CONTEXT)
6777
				count += 2 + ext->reg_count;
6778
			else
6779
				return 0;
6780
		}
6781
	}
6782
	/* pa_sc_raster_config/pa_sc_raster_config1 */
6783
	count += 4;
6784
	/* end clear state */
6785
	count += 2;
6786
	/* clear state */
6787
	count += 2;
6788
 
6789
	return count;
6790
}
6791
 
6792
void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6793
{
6794
	u32 count = 0, i;
6795
	const struct cs_section_def *sect = NULL;
6796
	const struct cs_extent_def *ext = NULL;
6797
 
6798
	if (rdev->rlc.cs_data == NULL)
6799
		return;
6800
	if (buffer == NULL)
6801
		return;
6802
 
6803
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6804
	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6805
 
6806
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6807
	buffer[count++] = cpu_to_le32(0x80000000);
6808
	buffer[count++] = cpu_to_le32(0x80000000);
6809
 
6810
	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6811
		for (ext = sect->section; ext->extent != NULL; ++ext) {
6812
			if (sect->id == SECT_CONTEXT) {
6813
				buffer[count++] =
6814
					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6815
				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6816
				for (i = 0; i < ext->reg_count; i++)
6817
					buffer[count++] = cpu_to_le32(ext->extent[i]);
6818
			} else {
6819
				return;
6820
			}
6821
		}
6822
	}
6823
 
6824
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6825
	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6826
	switch (rdev->family) {
6827
	case CHIP_BONAIRE:
6828
		buffer[count++] = cpu_to_le32(0x16000012);
6829
		buffer[count++] = cpu_to_le32(0x00000000);
6830
		break;
6831
	case CHIP_KAVERI:
6832
		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6833
		buffer[count++] = cpu_to_le32(0x00000000);
6834
		break;
6835
	case CHIP_KABINI:
6836
	case CHIP_MULLINS:
6837
		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6838
		buffer[count++] = cpu_to_le32(0x00000000);
6839
		break;
6840
	case CHIP_HAWAII:
6841
		buffer[count++] = cpu_to_le32(0x3a00161a);
6842
		buffer[count++] = cpu_to_le32(0x0000002e);
6843
		break;
6844
	default:
6845
		buffer[count++] = cpu_to_le32(0x00000000);
6846
		buffer[count++] = cpu_to_le32(0x00000000);
6847
		break;
6848
	}
6849
 
6850
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6851
	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6852
 
6853
	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6854
	buffer[count++] = cpu_to_le32(0);
6855
}
6856
 
6857
static void cik_init_pg(struct radeon_device *rdev)
6858
{
6859
	if (rdev->pg_flags) {
6860
		cik_enable_sck_slowdown_on_pu(rdev, true);
6861
		cik_enable_sck_slowdown_on_pd(rdev, true);
6862
		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6863
			cik_init_gfx_cgpg(rdev);
6864
			cik_enable_cp_pg(rdev, true);
6865
			cik_enable_gds_pg(rdev, true);
6866
		}
6867
		cik_init_ao_cu_mask(rdev);
6868
		cik_update_gfx_pg(rdev, true);
6869
	}
6870
}
6871
 
6872
static void cik_fini_pg(struct radeon_device *rdev)
6873
{
6874
	if (rdev->pg_flags) {
6875
		cik_update_gfx_pg(rdev, false);
6876
		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6877
			cik_enable_cp_pg(rdev, false);
6878
			cik_enable_gds_pg(rdev, false);
6879
		}
6880
	}
6881
}
6882
 
6883
/*
6884
 * Interrupts
6885
 * Starting with r6xx, interrupts are handled via a ring buffer.
6886
 * Ring buffers are areas of GPU accessible memory that the GPU
6887
 * writes interrupt vectors into and the host reads vectors out of.
6888
 * There is a rptr (read pointer) that determines where the
6889
 * host is currently reading, and a wptr (write pointer)
6890
 * which determines where the GPU has written.  When the
6891
 * pointers are equal, the ring is idle.  When the GPU
6892
 * writes vectors to the ring buffer, it increments the
6893
 * wptr.  When there is an interrupt, the host then starts
6894
 * fetching commands and processing them until the pointers are
6895
 * equal again at which point it updates the rptr.
6896
 */
6897
 
6898
/**
6899
 * cik_enable_interrupts - Enable the interrupt ring buffer
6900
 *
6901
 * @rdev: radeon_device pointer
6902
 *
6903
 * Enable the interrupt ring buffer (CIK).
6904
 */
6905
static void cik_enable_interrupts(struct radeon_device *rdev)
6906
{
6907
	u32 ih_cntl = RREG32(IH_CNTL);
6908
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6909
 
6910
	ih_cntl |= ENABLE_INTR;
6911
	ih_rb_cntl |= IH_RB_ENABLE;
6912
	WREG32(IH_CNTL, ih_cntl);
6913
	WREG32(IH_RB_CNTL, ih_rb_cntl);
6914
	rdev->ih.enabled = true;
6915
}
6916
 
6917
/**
6918
 * cik_disable_interrupts - Disable the interrupt ring buffer
6919
 *
6920
 * @rdev: radeon_device pointer
6921
 *
6922
 * Disable the interrupt ring buffer (CIK).
6923
 */
6924
static void cik_disable_interrupts(struct radeon_device *rdev)
6925
{
6926
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6927
	u32 ih_cntl = RREG32(IH_CNTL);
6928
 
6929
	ih_rb_cntl &= ~IH_RB_ENABLE;
6930
	ih_cntl &= ~ENABLE_INTR;
6931
	WREG32(IH_RB_CNTL, ih_rb_cntl);
6932
	WREG32(IH_CNTL, ih_cntl);
6933
	/* set rptr, wptr to 0 */
6934
	WREG32(IH_RB_RPTR, 0);
6935
	WREG32(IH_RB_WPTR, 0);
6936
	rdev->ih.enabled = false;
6937
	rdev->ih.rptr = 0;
6938
}
6939
 
6940
/**
6941
 * cik_disable_interrupt_state - Disable all interrupt sources
6942
 *
6943
 * @rdev: radeon_device pointer
6944
 *
6945
 * Clear all interrupt enable bits used by the driver (CIK).
6946
 */
6947
static void cik_disable_interrupt_state(struct radeon_device *rdev)
6948
{
6949
	u32 tmp;
6950
 
6951
	/* gfx ring */
6952
	tmp = RREG32(CP_INT_CNTL_RING0) &
6953
		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6954
	WREG32(CP_INT_CNTL_RING0, tmp);
6955
	/* sdma */
6956
	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6957
	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6958
	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6959
	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6960
	/* compute queues */
6961
	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6962
	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6963
	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6964
	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6965
	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6966
	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6967
	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6968
	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6969
	/* grbm */
6970
	WREG32(GRBM_INT_CNTL, 0);
6104 serge 6971
	/* SRBM */
6972
	WREG32(SRBM_INT_CNTL, 0);
5078 serge 6973
	/* vline/vblank, etc. */
6974
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6975
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6976
	if (rdev->num_crtc >= 4) {
6977
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6978
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6979
	}
6980
	if (rdev->num_crtc >= 6) {
6981
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6982
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6983
	}
6984
	/* pflip */
6985
	if (rdev->num_crtc >= 2) {
6986
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6987
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6988
	}
6989
	if (rdev->num_crtc >= 4) {
6990
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6991
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6992
	}
6993
	if (rdev->num_crtc >= 6) {
6994
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6995
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6996
	}
6997
 
6998
	/* dac hotplug */
6999
	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7000
 
7001
	/* digital hotplug */
7002
	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7003
	WREG32(DC_HPD1_INT_CONTROL, tmp);
7004
	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7005
	WREG32(DC_HPD2_INT_CONTROL, tmp);
7006
	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7007
	WREG32(DC_HPD3_INT_CONTROL, tmp);
7008
	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7009
	WREG32(DC_HPD4_INT_CONTROL, tmp);
7010
	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7011
	WREG32(DC_HPD5_INT_CONTROL, tmp);
7012
	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013
	WREG32(DC_HPD6_INT_CONTROL, tmp);
7014
 
7015
}
7016
 
7017
/**
7018
 * cik_irq_init - init and enable the interrupt ring
7019
 *
7020
 * @rdev: radeon_device pointer
7021
 *
7022
 * Allocate a ring buffer for the interrupt controller,
7023
 * enable the RLC, disable interrupts, enable the IH
7024
 * ring buffer and enable it (CIK).
7025
 * Called at device load and reume.
7026
 * Returns 0 for success, errors for failure.
7027
 */
7028
static int cik_irq_init(struct radeon_device *rdev)
7029
{
7030
	int ret = 0;
7031
	int rb_bufsz;
7032
	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7033
 
7034
	/* allocate ring */
7035
	ret = r600_ih_ring_alloc(rdev);
7036
	if (ret)
7037
		return ret;
7038
 
7039
	/* disable irqs */
7040
	cik_disable_interrupts(rdev);
7041
 
7042
	/* init rlc */
7043
	ret = cik_rlc_resume(rdev);
7044
	if (ret) {
7045
		r600_ih_ring_fini(rdev);
7046
		return ret;
7047
	}
7048
 
7049
	/* setup interrupt control */
7050
	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7051
	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7052
	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7053
	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7054
	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7055
	 */
7056
	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7057
	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7058
	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7059
	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7060
 
7061
	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7062
	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7063
 
7064
	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7065
		      IH_WPTR_OVERFLOW_CLEAR |
7066
		      (rb_bufsz << 1));
7067
 
7068
	if (rdev->wb.enabled)
7069
		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7070
 
7071
	/* set the writeback address whether it's enabled or not */
7072
	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7073
	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7074
 
7075
	WREG32(IH_RB_CNTL, ih_rb_cntl);
7076
 
7077
	/* set rptr, wptr to 0 */
7078
	WREG32(IH_RB_RPTR, 0);
7079
	WREG32(IH_RB_WPTR, 0);
7080
 
7081
	/* Default settings for IH_CNTL (disabled at first) */
7082
	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7083
	/* RPTR_REARM only works if msi's are enabled */
7084
	if (rdev->msi_enabled)
7085
		ih_cntl |= RPTR_REARM;
7086
	WREG32(IH_CNTL, ih_cntl);
7087
 
7088
	/* force the active interrupt state to all disabled */
7089
	cik_disable_interrupt_state(rdev);
7090
 
7091
//   pci_set_master(rdev->pdev);
7092
 
7093
	/* enable irqs */
7094
	cik_enable_interrupts(rdev);
7095
 
7096
	return ret;
7097
}
7098
 
7099
/**
7100
 * cik_irq_set - enable/disable interrupt sources
7101
 *
7102
 * @rdev: radeon_device pointer
7103
 *
7104
 * Enable interrupt sources on the GPU (vblanks, hpd,
7105
 * etc.) (CIK).
7106
 * Returns 0 for success, errors for failure.
7107
 */
7108
int cik_irq_set(struct radeon_device *rdev)
7109
{
7110
	u32 cp_int_cntl;
5271 serge 7111
	u32 cp_m1p0;
5078 serge 7112
	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7113
	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7114
	u32 grbm_int_cntl = 0;
7115
	u32 dma_cntl, dma_cntl1;
7116
 
7117
	if (!rdev->irq.installed) {
7118
		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7119
		return -EINVAL;
7120
	}
7121
	/* don't enable anything if the ih is disabled */
7122
	if (!rdev->ih.enabled) {
7123
		cik_disable_interrupts(rdev);
7124
		/* force the active interrupt state to all disabled */
7125
		cik_disable_interrupt_state(rdev);
7126
		return 0;
7127
	}
7128
 
7129
	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7130
		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7131
	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7132
 
6104 serge 7133
	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7134
	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7135
	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7136
	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7137
	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7138
	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
5078 serge 7139
 
7140
	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7141
	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7142
 
7143
	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7144
 
7145
	/* enable CP interrupts on all rings */
7146
	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7147
		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7148
		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7149
	}
7150
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7151
		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7152
		DRM_DEBUG("si_irq_set: sw int cp1\n");
7153
		if (ring->me == 1) {
7154
			switch (ring->pipe) {
7155
			case 0:
7156
				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7157
				break;
7158
			default:
7159
				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7160
				break;
7161
			}
7162
		} else {
7163
			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7164
		}
7165
	}
7166
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7167
		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7168
		DRM_DEBUG("si_irq_set: sw int cp2\n");
7169
		if (ring->me == 1) {
7170
			switch (ring->pipe) {
7171
			case 0:
7172
				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7173
				break;
7174
			default:
7175
				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7176
				break;
7177
			}
7178
		} else {
7179
			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7180
		}
7181
	}
7182
 
7183
	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7184
		DRM_DEBUG("cik_irq_set: sw int dma\n");
7185
		dma_cntl |= TRAP_ENABLE;
7186
	}
7187
 
7188
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7189
		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7190
		dma_cntl1 |= TRAP_ENABLE;
7191
	}
7192
 
7193
	if (rdev->irq.crtc_vblank_int[0] ||
7194
	    atomic_read(&rdev->irq.pflip[0])) {
7195
		DRM_DEBUG("cik_irq_set: vblank 0\n");
7196
		crtc1 |= VBLANK_INTERRUPT_MASK;
7197
	}
7198
	if (rdev->irq.crtc_vblank_int[1] ||
7199
	    atomic_read(&rdev->irq.pflip[1])) {
7200
		DRM_DEBUG("cik_irq_set: vblank 1\n");
7201
		crtc2 |= VBLANK_INTERRUPT_MASK;
7202
	}
7203
	if (rdev->irq.crtc_vblank_int[2] ||
7204
	    atomic_read(&rdev->irq.pflip[2])) {
7205
		DRM_DEBUG("cik_irq_set: vblank 2\n");
7206
		crtc3 |= VBLANK_INTERRUPT_MASK;
7207
	}
7208
	if (rdev->irq.crtc_vblank_int[3] ||
7209
	    atomic_read(&rdev->irq.pflip[3])) {
7210
		DRM_DEBUG("cik_irq_set: vblank 3\n");
7211
		crtc4 |= VBLANK_INTERRUPT_MASK;
7212
	}
7213
	if (rdev->irq.crtc_vblank_int[4] ||
7214
	    atomic_read(&rdev->irq.pflip[4])) {
7215
		DRM_DEBUG("cik_irq_set: vblank 4\n");
7216
		crtc5 |= VBLANK_INTERRUPT_MASK;
7217
	}
7218
	if (rdev->irq.crtc_vblank_int[5] ||
7219
	    atomic_read(&rdev->irq.pflip[5])) {
7220
		DRM_DEBUG("cik_irq_set: vblank 5\n");
7221
		crtc6 |= VBLANK_INTERRUPT_MASK;
7222
	}
7223
	if (rdev->irq.hpd[0]) {
7224
		DRM_DEBUG("cik_irq_set: hpd 1\n");
6104 serge 7225
		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
5078 serge 7226
	}
7227
	if (rdev->irq.hpd[1]) {
7228
		DRM_DEBUG("cik_irq_set: hpd 2\n");
6104 serge 7229
		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
5078 serge 7230
	}
7231
	if (rdev->irq.hpd[2]) {
7232
		DRM_DEBUG("cik_irq_set: hpd 3\n");
6104 serge 7233
		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
5078 serge 7234
	}
7235
	if (rdev->irq.hpd[3]) {
7236
		DRM_DEBUG("cik_irq_set: hpd 4\n");
6104 serge 7237
		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
5078 serge 7238
	}
7239
	if (rdev->irq.hpd[4]) {
7240
		DRM_DEBUG("cik_irq_set: hpd 5\n");
6104 serge 7241
		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
5078 serge 7242
	}
7243
	if (rdev->irq.hpd[5]) {
7244
		DRM_DEBUG("cik_irq_set: hpd 6\n");
6104 serge 7245
		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
5078 serge 7246
	}
7247
 
7248
	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7249
 
7250
	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7251
	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7252
 
7253
	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7254
 
7255
	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7256
 
7257
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7258
	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7259
	if (rdev->num_crtc >= 4) {
7260
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7261
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7262
	}
7263
	if (rdev->num_crtc >= 6) {
7264
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7265
		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7266
	}
7267
 
7268
	if (rdev->num_crtc >= 2) {
7269
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7270
		       GRPH_PFLIP_INT_MASK);
7271
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7272
		       GRPH_PFLIP_INT_MASK);
7273
	}
7274
	if (rdev->num_crtc >= 4) {
7275
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7276
		       GRPH_PFLIP_INT_MASK);
7277
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7278
		       GRPH_PFLIP_INT_MASK);
7279
	}
7280
	if (rdev->num_crtc >= 6) {
7281
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7282
		       GRPH_PFLIP_INT_MASK);
7283
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7284
		       GRPH_PFLIP_INT_MASK);
7285
	}
7286
 
7287
	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7288
	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7289
	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7290
	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7291
	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7292
	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7293
 
6104 serge 7294
	/* posting read */
7295
	RREG32(SRBM_STATUS);
5078 serge 7296
 
7297
	return 0;
7298
}
7299
 
7300
/**
7301
 * cik_irq_ack - ack interrupt sources
7302
 *
7303
 * @rdev: radeon_device pointer
7304
 *
7305
 * Ack interrupt sources on the GPU (vblanks, hpd,
7306
 * etc.) (CIK).  Certain interrupts sources are sw
7307
 * generated and do not require an explicit ack.
7308
 */
7309
static inline void cik_irq_ack(struct radeon_device *rdev)
7310
{
7311
	u32 tmp;
7312
 
7313
	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7314
	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7315
	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7316
	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7317
	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7318
	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7319
	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7320
 
7321
	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7322
		EVERGREEN_CRTC0_REGISTER_OFFSET);
7323
	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7324
		EVERGREEN_CRTC1_REGISTER_OFFSET);
7325
	if (rdev->num_crtc >= 4) {
7326
		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7327
			EVERGREEN_CRTC2_REGISTER_OFFSET);
7328
		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7329
			EVERGREEN_CRTC3_REGISTER_OFFSET);
7330
	}
7331
	if (rdev->num_crtc >= 6) {
7332
		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7333
			EVERGREEN_CRTC4_REGISTER_OFFSET);
7334
		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7335
			EVERGREEN_CRTC5_REGISTER_OFFSET);
7336
	}
7337
 
7338
	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7339
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7340
		       GRPH_PFLIP_INT_CLEAR);
7341
	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7342
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7343
		       GRPH_PFLIP_INT_CLEAR);
7344
	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7345
		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7346
	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7347
		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7348
	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7349
		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7350
	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7351
		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7352
 
7353
	if (rdev->num_crtc >= 4) {
7354
		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7355
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7356
			       GRPH_PFLIP_INT_CLEAR);
7357
		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7358
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7359
			       GRPH_PFLIP_INT_CLEAR);
7360
		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7361
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7362
		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7363
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7364
		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7365
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7366
		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7367
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7368
	}
7369
 
7370
	if (rdev->num_crtc >= 6) {
7371
		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7372
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7373
			       GRPH_PFLIP_INT_CLEAR);
7374
		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7375
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7376
			       GRPH_PFLIP_INT_CLEAR);
7377
		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7378
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7379
		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7380
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7381
		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7382
			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7383
		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7384
			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7385
	}
7386
 
7387
	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7388
		tmp = RREG32(DC_HPD1_INT_CONTROL);
7389
		tmp |= DC_HPDx_INT_ACK;
7390
		WREG32(DC_HPD1_INT_CONTROL, tmp);
7391
	}
7392
	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7393
		tmp = RREG32(DC_HPD2_INT_CONTROL);
7394
		tmp |= DC_HPDx_INT_ACK;
7395
		WREG32(DC_HPD2_INT_CONTROL, tmp);
7396
	}
7397
	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7398
		tmp = RREG32(DC_HPD3_INT_CONTROL);
7399
		tmp |= DC_HPDx_INT_ACK;
7400
		WREG32(DC_HPD3_INT_CONTROL, tmp);
7401
	}
7402
	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7403
		tmp = RREG32(DC_HPD4_INT_CONTROL);
7404
		tmp |= DC_HPDx_INT_ACK;
7405
		WREG32(DC_HPD4_INT_CONTROL, tmp);
7406
	}
7407
	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7408
		tmp = RREG32(DC_HPD5_INT_CONTROL);
7409
		tmp |= DC_HPDx_INT_ACK;
7410
		WREG32(DC_HPD5_INT_CONTROL, tmp);
7411
	}
7412
	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7413
		tmp = RREG32(DC_HPD5_INT_CONTROL);
7414
		tmp |= DC_HPDx_INT_ACK;
7415
		WREG32(DC_HPD6_INT_CONTROL, tmp);
7416
	}
6104 serge 7417
	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7418
		tmp = RREG32(DC_HPD1_INT_CONTROL);
7419
		tmp |= DC_HPDx_RX_INT_ACK;
7420
		WREG32(DC_HPD1_INT_CONTROL, tmp);
7421
	}
7422
	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7423
		tmp = RREG32(DC_HPD2_INT_CONTROL);
7424
		tmp |= DC_HPDx_RX_INT_ACK;
7425
		WREG32(DC_HPD2_INT_CONTROL, tmp);
7426
	}
7427
	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7428
		tmp = RREG32(DC_HPD3_INT_CONTROL);
7429
		tmp |= DC_HPDx_RX_INT_ACK;
7430
		WREG32(DC_HPD3_INT_CONTROL, tmp);
7431
	}
7432
	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7433
		tmp = RREG32(DC_HPD4_INT_CONTROL);
7434
		tmp |= DC_HPDx_RX_INT_ACK;
7435
		WREG32(DC_HPD4_INT_CONTROL, tmp);
7436
	}
7437
	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7438
		tmp = RREG32(DC_HPD5_INT_CONTROL);
7439
		tmp |= DC_HPDx_RX_INT_ACK;
7440
		WREG32(DC_HPD5_INT_CONTROL, tmp);
7441
	}
7442
	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7443
		tmp = RREG32(DC_HPD5_INT_CONTROL);
7444
		tmp |= DC_HPDx_RX_INT_ACK;
7445
		WREG32(DC_HPD6_INT_CONTROL, tmp);
7446
	}
5078 serge 7447
}
7448
 
7449
/**
7450
 * cik_irq_disable - disable interrupts
7451
 *
7452
 * @rdev: radeon_device pointer
7453
 *
7454
 * Disable interrupts on the hw (CIK).
7455
 */
7456
static void cik_irq_disable(struct radeon_device *rdev)
7457
{
7458
	cik_disable_interrupts(rdev);
7459
	/* Wait and acknowledge irq */
7460
	mdelay(1);
7461
	cik_irq_ack(rdev);
7462
	cik_disable_interrupt_state(rdev);
7463
}
7464
 
7465
/**
7466
 * cik_irq_disable - disable interrupts for suspend
7467
 *
7468
 * @rdev: radeon_device pointer
7469
 *
7470
 * Disable interrupts and stop the RLC (CIK).
7471
 * Used for suspend.
7472
 */
7473
static void cik_irq_suspend(struct radeon_device *rdev)
7474
{
7475
	cik_irq_disable(rdev);
7476
	cik_rlc_stop(rdev);
7477
}
7478
 
7479
/**
7480
 * cik_irq_fini - tear down interrupt support
7481
 *
7482
 * @rdev: radeon_device pointer
7483
 *
7484
 * Disable interrupts on the hw and free the IH ring
7485
 * buffer (CIK).
7486
 * Used for driver unload.
7487
 */
7488
static void cik_irq_fini(struct radeon_device *rdev)
7489
{
7490
	cik_irq_suspend(rdev);
7491
	r600_ih_ring_fini(rdev);
7492
}
7493
 
7494
/**
7495
 * cik_get_ih_wptr - get the IH ring buffer wptr
7496
 *
7497
 * @rdev: radeon_device pointer
7498
 *
7499
 * Get the IH ring buffer wptr from either the register
7500
 * or the writeback memory buffer (CIK).  Also check for
7501
 * ring buffer overflow and deal with it.
7502
 * Used by cik_irq_process().
7503
 * Returns the value of the wptr.
7504
 */
7505
static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7506
{
7507
	u32 wptr, tmp;
7508
 
7509
	if (rdev->wb.enabled)
7510
		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7511
	else
7512
		wptr = RREG32(IH_RB_WPTR);
7513
 
7514
	if (wptr & RB_OVERFLOW) {
5179 serge 7515
		wptr &= ~RB_OVERFLOW;
5078 serge 7516
		/* When a ring buffer overflow happen start parsing interrupt
7517
		 * from the last not overwritten vector (wptr + 16). Hopefully
7518
		 * this should allow us to catchup.
7519
		 */
5179 serge 7520
		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7521
			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
5078 serge 7522
		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7523
		tmp = RREG32(IH_RB_CNTL);
7524
		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7525
		WREG32(IH_RB_CNTL, tmp);
7526
	}
7527
	return (wptr & rdev->ih.ptr_mask);
7528
}
7529
 
7530
/*        CIK IV Ring
7531
 * Each IV ring entry is 128 bits:
7532
 * [7:0]    - interrupt source id
7533
 * [31:8]   - reserved
7534
 * [59:32]  - interrupt source data
7535
 * [63:60]  - reserved
7536
 * [71:64]  - RINGID
7537
 *            CP:
7538
 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7539
 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7540
 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7541
 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7542
 *            PIPE_ID - ME0 0=3D
7543
 *                    - ME1&2 compute dispatcher (4 pipes each)
7544
 *            SDMA:
7545
 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7546
 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7547
 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7548
 * [79:72]  - VMID
7549
 * [95:80]  - PASID
7550
 * [127:96] - reserved
7551
 */
7552
/**
7553
 * cik_irq_process - interrupt handler
7554
 *
7555
 * @rdev: radeon_device pointer
7556
 *
7557
 * Interrupt hander (CIK).  Walk the IH ring,
7558
 * ack interrupts and schedule work to handle
7559
 * interrupt events.
7560
 * Returns irq process return code.
7561
 */
7562
int cik_irq_process(struct radeon_device *rdev)
7563
{
7564
	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7565
	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7566
	u32 wptr;
7567
	u32 rptr;
7568
	u32 src_id, src_data, ring_id;
7569
	u8 me_id, pipe_id, queue_id;
7570
	u32 ring_index;
7571
	bool queue_hotplug = false;
6104 serge 7572
	bool queue_dp = false;
5078 serge 7573
	bool queue_reset = false;
7574
	u32 addr, status, mc_client;
7575
	bool queue_thermal = false;
7576
 
7577
	if (!rdev->ih.enabled || rdev->shutdown)
7578
		return IRQ_NONE;
7579
 
7580
	wptr = cik_get_ih_wptr(rdev);
7581
 
7582
restart_ih:
7583
	/* is somebody else already processing irqs? */
7584
	if (atomic_xchg(&rdev->ih.lock, 1))
7585
		return IRQ_NONE;
7586
 
7587
	rptr = rdev->ih.rptr;
7588
	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7589
 
7590
	/* Order reading of wptr vs. reading of IH ring data */
7591
	rmb();
7592
 
7593
	/* display interrupts */
7594
	cik_irq_ack(rdev);
7595
 
7596
	while (rptr != wptr) {
7597
		/* wptr/rptr are in bytes! */
7598
		ring_index = rptr / 4;
5271 serge 7599
 
7600
//       radeon_kfd_interrupt(rdev,
7601
//               (const void *) &rdev->ih.ring[ring_index]);
7602
 
5078 serge 7603
		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7604
		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7605
		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7606
 
7607
		switch (src_id) {
7608
		case 1: /* D1 vblank/vline */
7609
			switch (src_data) {
7610
			case 0: /* D1 vblank */
6104 serge 7611
				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7612
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7613
 
7614
				if (rdev->irq.crtc_vblank_int[0]) {
7615
					drm_handle_vblank(rdev->ddev, 0);
7616
					rdev->pm.vblank_sync = true;
7617
					wake_up(&rdev->irq.vblank_queue);
5078 serge 7618
				}
6104 serge 7619
				if (atomic_read(&rdev->irq.pflip[0]))
7620
					radeon_crtc_handle_vblank(rdev, 0);
7621
				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7622
				DRM_DEBUG("IH: D1 vblank\n");
7623
 
5078 serge 7624
				break;
7625
			case 1: /* D1 vline */
6104 serge 7626
				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7627
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7628
 
7629
				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7630
				DRM_DEBUG("IH: D1 vline\n");
7631
 
5078 serge 7632
				break;
7633
			default:
7634
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7635
				break;
7636
			}
7637
			break;
7638
		case 2: /* D2 vblank/vline */
7639
			switch (src_data) {
7640
			case 0: /* D2 vblank */
6104 serge 7641
				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7642
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7643
 
7644
				if (rdev->irq.crtc_vblank_int[1]) {
7645
					drm_handle_vblank(rdev->ddev, 1);
7646
					rdev->pm.vblank_sync = true;
7647
					wake_up(&rdev->irq.vblank_queue);
5078 serge 7648
				}
6104 serge 7649
				if (atomic_read(&rdev->irq.pflip[1]))
7650
					radeon_crtc_handle_vblank(rdev, 1);
7651
				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7652
				DRM_DEBUG("IH: D2 vblank\n");
7653
 
5078 serge 7654
				break;
7655
			case 1: /* D2 vline */
6104 serge 7656
				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7657
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7658
 
7659
				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7660
				DRM_DEBUG("IH: D2 vline\n");
7661
 
5078 serge 7662
				break;
7663
			default:
7664
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7665
				break;
7666
			}
7667
			break;
7668
		case 3: /* D3 vblank/vline */
7669
			switch (src_data) {
7670
			case 0: /* D3 vblank */
6104 serge 7671
				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7672
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7673
 
7674
				if (rdev->irq.crtc_vblank_int[2]) {
7675
					drm_handle_vblank(rdev->ddev, 2);
7676
					rdev->pm.vblank_sync = true;
7677
					wake_up(&rdev->irq.vblank_queue);
5078 serge 7678
				}
6104 serge 7679
				if (atomic_read(&rdev->irq.pflip[2]))
7680
					radeon_crtc_handle_vblank(rdev, 2);
7681
				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7682
				DRM_DEBUG("IH: D3 vblank\n");
7683
 
5078 serge 7684
				break;
7685
			case 1: /* D3 vline */
6104 serge 7686
				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7687
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7688
 
7689
				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7690
				DRM_DEBUG("IH: D3 vline\n");
7691
 
5078 serge 7692
				break;
7693
			default:
7694
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7695
				break;
7696
			}
7697
			break;
7698
		case 4: /* D4 vblank/vline */
7699
			switch (src_data) {
7700
			case 0: /* D4 vblank */
6104 serge 7701
				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7702
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7703
 
7704
				if (rdev->irq.crtc_vblank_int[3]) {
7705
					drm_handle_vblank(rdev->ddev, 3);
7706
					rdev->pm.vblank_sync = true;
7707
					wake_up(&rdev->irq.vblank_queue);
5078 serge 7708
				}
6104 serge 7709
				if (atomic_read(&rdev->irq.pflip[3]))
7710
					radeon_crtc_handle_vblank(rdev, 3);
7711
				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7712
				DRM_DEBUG("IH: D4 vblank\n");
7713
 
5078 serge 7714
				break;
7715
			case 1: /* D4 vline */
6104 serge 7716
				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7717
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7718
 
7719
				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7720
				DRM_DEBUG("IH: D4 vline\n");
7721
 
5078 serge 7722
				break;
7723
			default:
7724
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7725
				break;
7726
			}
7727
			break;
7728
		case 5: /* D5 vblank/vline */
7729
			switch (src_data) {
7730
			case 0: /* D5 vblank */
6104 serge 7731
				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7732
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7733
 
7734
				if (rdev->irq.crtc_vblank_int[4]) {
7735
					drm_handle_vblank(rdev->ddev, 4);
7736
					rdev->pm.vblank_sync = true;
7737
					wake_up(&rdev->irq.vblank_queue);
5078 serge 7738
				}
6104 serge 7739
				if (atomic_read(&rdev->irq.pflip[4]))
7740
					radeon_crtc_handle_vblank(rdev, 4);
7741
				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7742
				DRM_DEBUG("IH: D5 vblank\n");
7743
 
5078 serge 7744
				break;
7745
			case 1: /* D5 vline */
6104 serge 7746
				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7747
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7748
 
7749
				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7750
				DRM_DEBUG("IH: D5 vline\n");
7751
 
5078 serge 7752
				break;
7753
			default:
7754
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7755
				break;
7756
			}
7757
			break;
7758
		case 6: /* D6 vblank/vline */
7759
			switch (src_data) {
7760
			case 0: /* D6 vblank */
6104 serge 7761
				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7762
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7763
 
7764
				if (rdev->irq.crtc_vblank_int[5]) {
7765
					drm_handle_vblank(rdev->ddev, 5);
7766
					rdev->pm.vblank_sync = true;
7767
					wake_up(&rdev->irq.vblank_queue);
5078 serge 7768
				}
6104 serge 7769
				if (atomic_read(&rdev->irq.pflip[5]))
7770
					radeon_crtc_handle_vblank(rdev, 5);
7771
				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7772
				DRM_DEBUG("IH: D6 vblank\n");
7773
 
5078 serge 7774
				break;
7775
			case 1: /* D6 vline */
6104 serge 7776
				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7777
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7778
 
7779
				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7780
				DRM_DEBUG("IH: D6 vline\n");
7781
 
5078 serge 7782
				break;
7783
			default:
7784
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7785
				break;
7786
			}
7787
			break;
7788
		case 8: /* D1 page flip */
7789
		case 10: /* D2 page flip */
7790
		case 12: /* D3 page flip */
7791
		case 14: /* D4 page flip */
7792
		case 16: /* D5 page flip */
7793
		case 18: /* D6 page flip */
7794
			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7795
			break;
7796
		case 42: /* HPD hotplug */
7797
			switch (src_data) {
7798
			case 0:
6104 serge 7799
				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7800
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7801
 
7802
				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7803
				queue_hotplug = true;
7804
				DRM_DEBUG("IH: HPD1\n");
7805
 
5078 serge 7806
				break;
7807
			case 1:
6104 serge 7808
				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7809
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7810
 
7811
				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7812
				queue_hotplug = true;
7813
				DRM_DEBUG("IH: HPD2\n");
7814
 
5078 serge 7815
				break;
7816
			case 2:
6104 serge 7817
				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7818
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7819
 
7820
				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7821
				queue_hotplug = true;
7822
				DRM_DEBUG("IH: HPD3\n");
7823
 
5078 serge 7824
				break;
7825
			case 3:
6104 serge 7826
				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7827
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7828
 
7829
				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7830
				queue_hotplug = true;
7831
				DRM_DEBUG("IH: HPD4\n");
7832
 
5078 serge 7833
				break;
7834
			case 4:
6104 serge 7835
				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7836
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7837
 
7838
				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7839
				queue_hotplug = true;
7840
				DRM_DEBUG("IH: HPD5\n");
7841
 
5078 serge 7842
				break;
7843
			case 5:
6104 serge 7844
				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7845
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7846
 
7847
				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7848
				queue_hotplug = true;
7849
				DRM_DEBUG("IH: HPD6\n");
7850
 
5078 serge 7851
				break;
6104 serge 7852
			case 6:
7853
				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7854
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7855
 
7856
				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7857
				queue_dp = true;
7858
				DRM_DEBUG("IH: HPD_RX 1\n");
7859
 
7860
				break;
7861
			case 7:
7862
				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7863
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7864
 
7865
				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7866
				queue_dp = true;
7867
				DRM_DEBUG("IH: HPD_RX 2\n");
7868
 
7869
				break;
7870
			case 8:
7871
				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7872
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7873
 
7874
				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7875
				queue_dp = true;
7876
				DRM_DEBUG("IH: HPD_RX 3\n");
7877
 
7878
				break;
7879
			case 9:
7880
				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7881
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7882
 
7883
				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7884
				queue_dp = true;
7885
				DRM_DEBUG("IH: HPD_RX 4\n");
7886
 
7887
				break;
7888
			case 10:
7889
				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7890
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7891
 
7892
				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7893
				queue_dp = true;
7894
				DRM_DEBUG("IH: HPD_RX 5\n");
7895
 
7896
				break;
7897
			case 11:
7898
				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7899
					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7900
 
7901
				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7902
				queue_dp = true;
7903
				DRM_DEBUG("IH: HPD_RX 6\n");
7904
 
7905
				break;
5078 serge 7906
			default:
7907
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7908
				break;
7909
			}
7910
			break;
6104 serge 7911
		case 96:
7912
			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7913
			WREG32(SRBM_INT_ACK, 0x1);
7914
			break;
5078 serge 7915
		case 124: /* UVD */
7916
			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7917
			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7918
			break;
7919
		case 146:
7920
		case 147:
7921
			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7922
			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7923
			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7924
			/* reset addr and status */
7925
			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7926
			if (addr == 0x0 && status == 0x0)
7927
				break;
7928
			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7929
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7930
				addr);
7931
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7932
				status);
7933
			cik_vm_decode_fault(rdev, status, addr, mc_client);
7934
			break;
7935
		case 167: /* VCE */
7936
			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7937
			switch (src_data) {
7938
			case 0:
7939
				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7940
				break;
7941
			case 1:
7942
				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7943
				break;
7944
			default:
7945
				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7946
				break;
7947
			}
7948
			break;
7949
		case 176: /* GFX RB CP_INT */
7950
		case 177: /* GFX IB CP_INT */
7951
			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7952
			break;
7953
		case 181: /* CP EOP event */
7954
			DRM_DEBUG("IH: CP EOP\n");
7955
			/* XXX check the bitfield order! */
7956
			me_id = (ring_id & 0x60) >> 5;
7957
			pipe_id = (ring_id & 0x18) >> 3;
7958
			queue_id = (ring_id & 0x7) >> 0;
7959
			switch (me_id) {
7960
			case 0:
7961
				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7962
				break;
7963
			case 1:
7964
			case 2:
7965
				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7966
					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7967
				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7968
					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7969
				break;
7970
			}
7971
			break;
7972
		case 184: /* CP Privileged reg access */
7973
			DRM_ERROR("Illegal register access in command stream\n");
7974
			/* XXX check the bitfield order! */
7975
			me_id = (ring_id & 0x60) >> 5;
7976
			pipe_id = (ring_id & 0x18) >> 3;
7977
			queue_id = (ring_id & 0x7) >> 0;
7978
			switch (me_id) {
7979
			case 0:
7980
				/* This results in a full GPU reset, but all we need to do is soft
7981
				 * reset the CP for gfx
7982
				 */
7983
				queue_reset = true;
7984
				break;
7985
			case 1:
7986
				/* XXX compute */
7987
				queue_reset = true;
7988
				break;
7989
			case 2:
7990
				/* XXX compute */
7991
				queue_reset = true;
7992
				break;
7993
			}
7994
			break;
7995
		case 185: /* CP Privileged inst */
7996
			DRM_ERROR("Illegal instruction in command stream\n");
7997
			/* XXX check the bitfield order! */
7998
			me_id = (ring_id & 0x60) >> 5;
7999
			pipe_id = (ring_id & 0x18) >> 3;
8000
			queue_id = (ring_id & 0x7) >> 0;
8001
			switch (me_id) {
8002
			case 0:
8003
				/* This results in a full GPU reset, but all we need to do is soft
8004
				 * reset the CP for gfx
8005
				 */
8006
				queue_reset = true;
8007
				break;
8008
			case 1:
8009
				/* XXX compute */
8010
				queue_reset = true;
8011
				break;
8012
			case 2:
8013
				/* XXX compute */
8014
				queue_reset = true;
8015
				break;
8016
			}
8017
			break;
8018
		case 224: /* SDMA trap event */
8019
			/* XXX check the bitfield order! */
8020
			me_id = (ring_id & 0x3) >> 0;
8021
			queue_id = (ring_id & 0xc) >> 2;
8022
			DRM_DEBUG("IH: SDMA trap\n");
8023
			switch (me_id) {
8024
			case 0:
8025
				switch (queue_id) {
8026
				case 0:
8027
					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8028
					break;
8029
				case 1:
8030
					/* XXX compute */
8031
					break;
8032
				case 2:
8033
					/* XXX compute */
8034
					break;
8035
				}
8036
				break;
8037
			case 1:
8038
				switch (queue_id) {
8039
				case 0:
8040
					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8041
					break;
8042
				case 1:
8043
					/* XXX compute */
8044
					break;
8045
				case 2:
8046
					/* XXX compute */
8047
					break;
8048
				}
8049
				break;
8050
			}
8051
			break;
8052
		case 230: /* thermal low to high */
8053
			DRM_DEBUG("IH: thermal low to high\n");
8054
			rdev->pm.dpm.thermal.high_to_low = false;
8055
			queue_thermal = true;
8056
			break;
8057
		case 231: /* thermal high to low */
8058
			DRM_DEBUG("IH: thermal high to low\n");
8059
			rdev->pm.dpm.thermal.high_to_low = true;
8060
			queue_thermal = true;
8061
			break;
8062
		case 233: /* GUI IDLE */
8063
			DRM_DEBUG("IH: GUI idle\n");
8064
			break;
8065
		case 241: /* SDMA Privileged inst */
8066
		case 247: /* SDMA Privileged inst */
8067
			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8068
			/* XXX check the bitfield order! */
8069
			me_id = (ring_id & 0x3) >> 0;
8070
			queue_id = (ring_id & 0xc) >> 2;
8071
			switch (me_id) {
8072
			case 0:
8073
				switch (queue_id) {
8074
				case 0:
8075
					queue_reset = true;
8076
					break;
8077
				case 1:
8078
					/* XXX compute */
8079
					queue_reset = true;
8080
					break;
8081
				case 2:
8082
					/* XXX compute */
8083
					queue_reset = true;
8084
					break;
8085
				}
8086
				break;
8087
			case 1:
8088
				switch (queue_id) {
8089
				case 0:
8090
					queue_reset = true;
8091
					break;
8092
				case 1:
8093
					/* XXX compute */
8094
					queue_reset = true;
8095
					break;
8096
				case 2:
8097
					/* XXX compute */
8098
					queue_reset = true;
8099
					break;
8100
				}
8101
				break;
8102
			}
8103
			break;
8104
		default:
8105
			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8106
			break;
8107
		}
8108
 
8109
		/* wptr/rptr are in bytes! */
8110
		rptr += 16;
8111
		rptr &= rdev->ih.ptr_mask;
5179 serge 8112
		WREG32(IH_RB_RPTR, rptr);
5078 serge 8113
	}
8114
	rdev->ih.rptr = rptr;
8115
	atomic_set(&rdev->ih.lock, 0);
8116
 
8117
	/* make sure wptr hasn't changed while processing */
8118
	wptr = cik_get_ih_wptr(rdev);
8119
	if (wptr != rptr)
8120
		goto restart_ih;
8121
 
8122
	return IRQ_HANDLED;
8123
}
8124
 
8125
/*
8126
 * startup/shutdown callbacks
8127
 */
8128
/**
8129
 * cik_startup - program the asic to a functional state
8130
 *
8131
 * @rdev: radeon_device pointer
8132
 *
8133
 * Programs the asic to a functional state (CIK).
8134
 * Called by cik_init() and cik_resume().
8135
 * Returns 0 for success, error for failure.
8136
 */
8137
static int cik_startup(struct radeon_device *rdev)
8138
{
8139
	struct radeon_ring *ring;
8140
	u32 nop;
8141
	int r;
8142
 
8143
	/* enable pcie gen2/3 link */
8144
	cik_pcie_gen3_enable(rdev);
8145
	/* enable aspm */
8146
	cik_program_aspm(rdev);
8147
 
8148
	/* scratch needs to be initialized before MC */
8149
	r = r600_vram_scratch_init(rdev);
8150
	if (r)
8151
		return r;
8152
 
8153
	cik_mc_program(rdev);
8154
 
8155
	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8156
		r = ci_mc_load_microcode(rdev);
8157
		if (r) {
8158
			DRM_ERROR("Failed to load MC firmware!\n");
8159
			return r;
8160
		}
8161
	}
8162
 
8163
	r = cik_pcie_gart_enable(rdev);
8164
	if (r)
8165
		return r;
8166
	cik_gpu_init(rdev);
8167
 
8168
	/* allocate rlc buffers */
8169
	if (rdev->flags & RADEON_IS_IGP) {
8170
		if (rdev->family == CHIP_KAVERI) {
8171
			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8172
			rdev->rlc.reg_list_size =
8173
				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8174
		} else {
8175
			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8176
			rdev->rlc.reg_list_size =
8177
				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8178
		}
8179
	}
8180
	rdev->rlc.cs_data = ci_cs_data;
8181
	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8182
	r = sumo_rlc_init(rdev);
8183
	if (r) {
8184
		DRM_ERROR("Failed to init rlc BOs!\n");
8185
		return r;
8186
	}
8187
 
8188
	/* allocate wb buffer */
8189
	r = radeon_wb_init(rdev);
8190
	if (r)
8191
		return r;
8192
 
8193
	/* allocate mec buffers */
8194
	r = cik_mec_init(rdev);
8195
	if (r) {
8196
		DRM_ERROR("Failed to init MEC BOs!\n");
8197
		return r;
8198
	}
8199
 
8200
	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8201
	if (r) {
8202
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8203
		return r;
8204
	}
8205
 
8206
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8207
	if (r) {
8208
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8209
		return r;
8210
	}
8211
 
8212
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8213
	if (r) {
8214
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8215
		return r;
8216
	}
8217
 
8218
	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8219
	if (r) {
8220
		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8221
		return r;
8222
	}
8223
 
8224
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8225
	if (r) {
8226
		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8227
		return r;
8228
	}
8229
 
8230
	r = radeon_uvd_resume(rdev);
8231
	if (!r) {
8232
		r = uvd_v4_2_resume(rdev);
8233
		if (!r) {
8234
			r = radeon_fence_driver_start_ring(rdev,
8235
							   R600_RING_TYPE_UVD_INDEX);
8236
			if (r)
8237
				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8238
		}
8239
	}
8240
	if (r)
8241
		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8242
 
6104 serge 8243
	r = radeon_vce_resume(rdev);
8244
	if (!r) {
8245
		r = vce_v2_0_resume(rdev);
8246
		if (!r)
8247
			r = radeon_fence_driver_start_ring(rdev,
8248
							   TN_RING_TYPE_VCE1_INDEX);
8249
		if (!r)
8250
			r = radeon_fence_driver_start_ring(rdev,
8251
							   TN_RING_TYPE_VCE2_INDEX);
8252
	}
5078 serge 8253
	if (r) {
8254
		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8255
		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8256
		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8257
	}
8258
 
8259
	/* Enable IRQ */
8260
	if (!rdev->irq.installed) {
8261
		r = radeon_irq_kms_init(rdev);
8262
		if (r)
8263
			return r;
8264
	}
8265
 
8266
	r = cik_irq_init(rdev);
8267
	if (r) {
8268
		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8269
//       radeon_irq_kms_fini(rdev);
8270
		return r;
8271
	}
8272
	cik_irq_set(rdev);
8273
 
8274
	if (rdev->family == CHIP_HAWAII) {
8275
		if (rdev->new_fw)
8276
			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8277
		else
8278
			nop = RADEON_CP_PACKET2;
8279
	} else {
8280
		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8281
	}
8282
 
8283
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8284
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8285
			     nop);
8286
	if (r)
8287
		return r;
8288
 
8289
	/* set up the compute queues */
8290
	/* type-2 packets are deprecated on MEC, use type-3 instead */
8291
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8292
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8293
			     nop);
8294
	if (r)
8295
		return r;
8296
	ring->me = 1; /* first MEC */
8297
	ring->pipe = 0; /* first pipe */
8298
	ring->queue = 0; /* first queue */
8299
	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8300
 
8301
	/* type-2 packets are deprecated on MEC, use type-3 instead */
8302
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8303
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8304
			     nop);
8305
	if (r)
8306
		return r;
8307
	/* dGPU only have 1 MEC */
8308
	ring->me = 1; /* first MEC */
8309
	ring->pipe = 0; /* first pipe */
8310
	ring->queue = 1; /* second queue */
8311
	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8312
 
8313
	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8314
	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8315
			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8316
	if (r)
8317
		return r;
8318
 
8319
	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8320
	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8321
			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8322
	if (r)
8323
		return r;
8324
 
8325
	r = cik_cp_resume(rdev);
8326
	if (r)
8327
		return r;
8328
 
8329
	r = cik_sdma_resume(rdev);
8330
	if (r)
8331
		return r;
8332
 
8333
	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8334
	if (ring->ring_size) {
8335
		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8336
				     RADEON_CP_PACKET2);
8337
		if (!r)
8338
			r = uvd_v1_0_init(rdev);
8339
		if (r)
8340
			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8341
	}
6104 serge 8342
 
8343
	r = -ENOENT;
8344
 
8345
	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8346
	if (ring->ring_size)
8347
		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8348
				     VCE_CMD_NO_OP);
8349
 
8350
	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8351
	if (ring->ring_size)
8352
		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8353
				     VCE_CMD_NO_OP);
8354
 
8355
	if (!r)
8356
		r = vce_v1_0_init(rdev);
8357
	else if (r != -ENOENT)
8358
		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8359
 
5078 serge 8360
	r = radeon_ib_pool_init(rdev);
8361
	if (r) {
8362
		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8363
		return r;
8364
	}
8365
 
8366
	r = radeon_vm_manager_init(rdev);
8367
	if (r) {
8368
		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8369
		return r;
8370
	}
8371
 
5271 serge 8372
//   r = radeon_kfd_resume(rdev);
8373
//   if (r)
8374
//       return r;
8375
 
5078 serge 8376
	return 0;
8377
}
8378
 
8379
 
8380
/* Plan is to move initialization in that function and use
8381
 * helper function so that radeon_device_init pretty much
8382
 * do nothing more than calling asic specific function. This
8383
 * should also allow to remove a bunch of callback function
8384
 * like vram_info.
8385
 */
8386
/**
8387
 * cik_init - asic specific driver and hw init
8388
 *
8389
 * @rdev: radeon_device pointer
8390
 *
8391
 * Setup asic specific driver variables and program the hw
8392
 * to a functional state (CIK).
8393
 * Called at driver startup.
8394
 * Returns 0 for success, errors for failure.
8395
 */
8396
int cik_init(struct radeon_device *rdev)
8397
{
8398
	struct radeon_ring *ring;
8399
	int r;
8400
 
8401
	/* Read BIOS */
8402
	if (!radeon_get_bios(rdev)) {
8403
		if (ASIC_IS_AVIVO(rdev))
8404
			return -EINVAL;
8405
	}
8406
	/* Must be an ATOMBIOS */
8407
	if (!rdev->is_atom_bios) {
8408
		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8409
		return -EINVAL;
8410
	}
8411
	r = radeon_atombios_init(rdev);
8412
	if (r)
8413
		return r;
8414
 
8415
	/* Post card if necessary */
8416
	if (!radeon_card_posted(rdev)) {
8417
		if (!rdev->bios) {
8418
			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8419
			return -EINVAL;
8420
		}
8421
		DRM_INFO("GPU not posted. posting now...\n");
8422
		atom_asic_init(rdev->mode_info.atom_context);
8423
	}
8424
	/* init golden registers */
8425
	cik_init_golden_registers(rdev);
8426
	/* Initialize scratch registers */
8427
	cik_scratch_init(rdev);
8428
	/* Initialize surface registers */
8429
	radeon_surface_init(rdev);
8430
	/* Initialize clocks */
8431
	radeon_get_clock_info(rdev->ddev);
8432
 
8433
	/* Fence driver */
8434
	r = radeon_fence_driver_init(rdev);
8435
	if (r)
8436
		return r;
8437
 
8438
	/* initialize memory controller */
8439
	r = cik_mc_init(rdev);
8440
	if (r)
8441
		return r;
8442
	/* Memory manager */
8443
	r = radeon_bo_init(rdev);
8444
	if (r)
8445
		return r;
8446
 
8447
	if (rdev->flags & RADEON_IS_IGP) {
8448
		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8449
		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8450
			r = cik_init_microcode(rdev);
8451
			if (r) {
8452
				DRM_ERROR("Failed to load firmware!\n");
8453
				return r;
8454
			}
8455
		}
8456
	} else {
8457
		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8458
		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8459
		    !rdev->mc_fw) {
8460
			r = cik_init_microcode(rdev);
8461
			if (r) {
8462
				DRM_ERROR("Failed to load firmware!\n");
8463
				return r;
8464
			}
8465
		}
8466
	}
8467
 
8468
	/* Initialize power management */
8469
	radeon_pm_init(rdev);
8470
 
8471
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8472
	ring->ring_obj = NULL;
8473
	r600_ring_init(rdev, ring, 1024 * 1024);
8474
 
8475
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8476
	ring->ring_obj = NULL;
8477
	r600_ring_init(rdev, ring, 1024 * 1024);
8478
	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8479
	if (r)
8480
		return r;
8481
 
8482
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8483
	ring->ring_obj = NULL;
8484
	r600_ring_init(rdev, ring, 1024 * 1024);
8485
	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8486
	if (r)
8487
		return r;
8488
 
8489
	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8490
	ring->ring_obj = NULL;
8491
	r600_ring_init(rdev, ring, 256 * 1024);
8492
 
8493
	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8494
	ring->ring_obj = NULL;
8495
	r600_ring_init(rdev, ring, 256 * 1024);
8496
 
8497
	r = radeon_uvd_init(rdev);
8498
	if (!r) {
8499
		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8500
		ring->ring_obj = NULL;
8501
		r600_ring_init(rdev, ring, 4096);
8502
	}
6104 serge 8503
 
8504
	r = radeon_vce_init(rdev);
8505
	if (!r) {
8506
		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8507
		ring->ring_obj = NULL;
8508
		r600_ring_init(rdev, ring, 4096);
8509
 
8510
		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8511
		ring->ring_obj = NULL;
8512
		r600_ring_init(rdev, ring, 4096);
8513
	}
8514
 
5078 serge 8515
	rdev->ih.ring_obj = NULL;
8516
	r600_ih_ring_init(rdev, 64 * 1024);
8517
 
8518
	r = r600_pcie_gart_init(rdev);
8519
	if (r)
8520
		return r;
8521
 
8522
	rdev->accel_working = true;
8523
	r = cik_startup(rdev);
8524
	if (r) {
8525
		dev_err(rdev->dev, "disabling GPU acceleration\n");
6104 serge 8526
		cik_cp_fini(rdev);
8527
		cik_sdma_fini(rdev);
8528
		cik_irq_fini(rdev);
8529
		sumo_rlc_fini(rdev);
8530
		cik_mec_fini(rdev);
8531
		radeon_wb_fini(rdev);
8532
		radeon_ib_pool_fini(rdev);
8533
		radeon_vm_manager_fini(rdev);
8534
		radeon_irq_kms_fini(rdev);
8535
		cik_pcie_gart_fini(rdev);
8536
		rdev->accel_working = false;
5078 serge 8537
	}
8538
 
8539
	/* Don't start up if the MC ucode is missing.
8540
	 * The default clocks and voltages before the MC ucode
8541
	 * is loaded are not suffient for advanced operations.
8542
	 */
8543
	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8544
		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8545
		return -EINVAL;
8546
	}
8547
 
8548
	return 0;
8549
}
8550
 
8551
/**
8552
 * cik_fini - asic specific driver and hw fini
8553
 *
8554
 * @rdev: radeon_device pointer
8555
 *
8556
 * Tear down the asic specific driver variables and program the hw
8557
 * to an idle state (CIK).
8558
 * Called at driver unload.
8559
 */
8560
void cik_fini(struct radeon_device *rdev)
8561
{
6104 serge 8562
	radeon_pm_fini(rdev);
8563
	cik_cp_fini(rdev);
8564
	cik_sdma_fini(rdev);
8565
	cik_fini_pg(rdev);
8566
	cik_fini_cg(rdev);
8567
	cik_irq_fini(rdev);
8568
	sumo_rlc_fini(rdev);
8569
	cik_mec_fini(rdev);
8570
	radeon_wb_fini(rdev);
8571
	radeon_vm_manager_fini(rdev);
8572
	radeon_ib_pool_fini(rdev);
8573
	radeon_irq_kms_fini(rdev);
8574
	uvd_v1_0_fini(rdev);
8575
	radeon_uvd_fini(rdev);
8576
	radeon_vce_fini(rdev);
8577
	cik_pcie_gart_fini(rdev);
8578
	r600_vram_scratch_fini(rdev);
8579
	radeon_gem_fini(rdev);
8580
	radeon_fence_driver_fini(rdev);
8581
	radeon_bo_fini(rdev);
8582
	radeon_atombios_fini(rdev);
8583
	kfree(rdev->bios);
5078 serge 8584
	rdev->bios = NULL;
8585
}
8586
 
8587
void dce8_program_fmt(struct drm_encoder *encoder)
8588
{
8589
	struct drm_device *dev = encoder->dev;
8590
	struct radeon_device *rdev = dev->dev_private;
8591
	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8592
	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8593
	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8594
	int bpc = 0;
8595
	u32 tmp = 0;
8596
	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8597
 
8598
	if (connector) {
8599
		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8600
		bpc = radeon_get_monitor_bpc(connector);
8601
		dither = radeon_connector->dither;
8602
	}
8603
 
8604
	/* LVDS/eDP FMT is set up by atom */
8605
	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8606
		return;
8607
 
8608
	/* not needed for analog */
8609
	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8610
	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8611
		return;
8612
 
8613
	if (bpc == 0)
8614
		return;
8615
 
8616
	switch (bpc) {
8617
	case 6:
8618
		if (dither == RADEON_FMT_DITHER_ENABLE)
8619
			/* XXX sort out optimal dither settings */
8620
			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8621
				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8622
		else
8623
			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8624
		break;
8625
	case 8:
8626
		if (dither == RADEON_FMT_DITHER_ENABLE)
8627
			/* XXX sort out optimal dither settings */
8628
			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8629
				FMT_RGB_RANDOM_ENABLE |
8630
				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8631
		else
8632
			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8633
		break;
8634
	case 10:
8635
		if (dither == RADEON_FMT_DITHER_ENABLE)
8636
			/* XXX sort out optimal dither settings */
8637
			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8638
				FMT_RGB_RANDOM_ENABLE |
8639
				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8640
		else
8641
			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8642
		break;
8643
	default:
8644
		/* not needed */
8645
		break;
8646
	}
8647
 
8648
	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8649
}
8650
 
8651
/* display watermark setup */
8652
/**
8653
 * dce8_line_buffer_adjust - Set up the line buffer
8654
 *
8655
 * @rdev: radeon_device pointer
8656
 * @radeon_crtc: the selected display controller
8657
 * @mode: the current display mode on the selected display
8658
 * controller
8659
 *
8660
 * Setup up the line buffer allocation for
8661
 * the selected display controller (CIK).
8662
 * Returns the line buffer size in pixels.
8663
 */
8664
static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8665
				   struct radeon_crtc *radeon_crtc,
8666
				   struct drm_display_mode *mode)
8667
{
8668
	u32 tmp, buffer_alloc, i;
8669
	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8670
	/*
8671
	 * Line Buffer Setup
8672
	 * There are 6 line buffers, one for each display controllers.
8673
	 * There are 3 partitions per LB. Select the number of partitions
8674
	 * to enable based on the display width.  For display widths larger
8675
	 * than 4096, you need use to use 2 display controllers and combine
8676
	 * them using the stereo blender.
8677
	 */
8678
	if (radeon_crtc->base.enabled && mode) {
8679
		if (mode->crtc_hdisplay < 1920) {
8680
			tmp = 1;
8681
			buffer_alloc = 2;
8682
		} else if (mode->crtc_hdisplay < 2560) {
8683
			tmp = 2;
8684
			buffer_alloc = 2;
8685
		} else if (mode->crtc_hdisplay < 4096) {
8686
			tmp = 0;
8687
			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8688
		} else {
8689
			DRM_DEBUG_KMS("Mode too big for LB!\n");
8690
			tmp = 0;
8691
			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8692
		}
8693
	} else {
8694
		tmp = 1;
8695
		buffer_alloc = 0;
8696
	}
8697
 
8698
	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8699
	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8700
 
8701
	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8702
	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8703
	for (i = 0; i < rdev->usec_timeout; i++) {
8704
		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8705
		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8706
			break;
8707
		udelay(1);
8708
	}
8709
 
8710
	if (radeon_crtc->base.enabled && mode) {
8711
		switch (tmp) {
8712
		case 0:
8713
		default:
8714
			return 4096 * 2;
8715
		case 1:
8716
			return 1920 * 2;
8717
		case 2:
8718
			return 2560 * 2;
8719
		}
8720
	}
8721
 
8722
	/* controller not enabled, so no lb used */
8723
	return 0;
8724
}
8725
 
8726
/**
8727
 * cik_get_number_of_dram_channels - get the number of dram channels
8728
 *
8729
 * @rdev: radeon_device pointer
8730
 *
8731
 * Look up the number of video ram channels (CIK).
8732
 * Used for display watermark bandwidth calculations
8733
 * Returns the number of dram channels
8734
 */
8735
static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8736
{
8737
	u32 tmp = RREG32(MC_SHARED_CHMAP);
8738
 
8739
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8740
	case 0:
8741
	default:
8742
		return 1;
8743
	case 1:
8744
		return 2;
8745
	case 2:
8746
		return 4;
8747
	case 3:
8748
		return 8;
8749
	case 4:
8750
		return 3;
8751
	case 5:
8752
		return 6;
8753
	case 6:
8754
		return 10;
8755
	case 7:
8756
		return 12;
8757
	case 8:
8758
		return 16;
8759
	}
8760
}
8761
 
8762
struct dce8_wm_params {
8763
	u32 dram_channels; /* number of dram channels */
8764
	u32 yclk;          /* bandwidth per dram data pin in kHz */
8765
	u32 sclk;          /* engine clock in kHz */
8766
	u32 disp_clk;      /* display clock in kHz */
8767
	u32 src_width;     /* viewport width */
8768
	u32 active_time;   /* active display time in ns */
8769
	u32 blank_time;    /* blank time in ns */
8770
	bool interlaced;    /* mode is interlaced */
8771
	fixed20_12 vsc;    /* vertical scale ratio */
8772
	u32 num_heads;     /* number of active crtcs */
8773
	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8774
	u32 lb_size;       /* line buffer allocated to pipe */
8775
	u32 vtaps;         /* vertical scaler taps */
8776
};
8777
 
8778
/**
8779
 * dce8_dram_bandwidth - get the dram bandwidth
8780
 *
8781
 * @wm: watermark calculation data
8782
 *
8783
 * Calculate the raw dram bandwidth (CIK).
8784
 * Used for display watermark bandwidth calculations
8785
 * Returns the dram bandwidth in MBytes/s
8786
 */
8787
static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8788
{
8789
	/* Calculate raw DRAM Bandwidth */
8790
	fixed20_12 dram_efficiency; /* 0.7 */
8791
	fixed20_12 yclk, dram_channels, bandwidth;
8792
	fixed20_12 a;
8793
 
8794
	a.full = dfixed_const(1000);
8795
	yclk.full = dfixed_const(wm->yclk);
8796
	yclk.full = dfixed_div(yclk, a);
8797
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8798
	a.full = dfixed_const(10);
8799
	dram_efficiency.full = dfixed_const(7);
8800
	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8801
	bandwidth.full = dfixed_mul(dram_channels, yclk);
8802
	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8803
 
8804
	return dfixed_trunc(bandwidth);
8805
}
8806
 
8807
/**
8808
 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8809
 *
8810
 * @wm: watermark calculation data
8811
 *
8812
 * Calculate the dram bandwidth used for display (CIK).
8813
 * Used for display watermark bandwidth calculations
8814
 * Returns the dram bandwidth for display in MBytes/s
8815
 */
8816
static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8817
{
8818
	/* Calculate DRAM Bandwidth and the part allocated to display. */
8819
	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8820
	fixed20_12 yclk, dram_channels, bandwidth;
8821
	fixed20_12 a;
8822
 
8823
	a.full = dfixed_const(1000);
8824
	yclk.full = dfixed_const(wm->yclk);
8825
	yclk.full = dfixed_div(yclk, a);
8826
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8827
	a.full = dfixed_const(10);
8828
	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8829
	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8830
	bandwidth.full = dfixed_mul(dram_channels, yclk);
8831
	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8832
 
8833
	return dfixed_trunc(bandwidth);
8834
}
8835
 
8836
/**
8837
 * dce8_data_return_bandwidth - get the data return bandwidth
8838
 *
8839
 * @wm: watermark calculation data
8840
 *
8841
 * Calculate the data return bandwidth used for display (CIK).
8842
 * Used for display watermark bandwidth calculations
8843
 * Returns the data return bandwidth in MBytes/s
8844
 */
8845
static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8846
{
8847
	/* Calculate the display Data return Bandwidth */
8848
	fixed20_12 return_efficiency; /* 0.8 */
8849
	fixed20_12 sclk, bandwidth;
8850
	fixed20_12 a;
8851
 
8852
	a.full = dfixed_const(1000);
8853
	sclk.full = dfixed_const(wm->sclk);
8854
	sclk.full = dfixed_div(sclk, a);
8855
	a.full = dfixed_const(10);
8856
	return_efficiency.full = dfixed_const(8);
8857
	return_efficiency.full = dfixed_div(return_efficiency, a);
8858
	a.full = dfixed_const(32);
8859
	bandwidth.full = dfixed_mul(a, sclk);
8860
	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8861
 
8862
	return dfixed_trunc(bandwidth);
8863
}
8864
 
8865
/**
8866
 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8867
 *
8868
 * @wm: watermark calculation data
8869
 *
8870
 * Calculate the dmif bandwidth used for display (CIK).
8871
 * Used for display watermark bandwidth calculations
8872
 * Returns the dmif bandwidth in MBytes/s
8873
 */
8874
static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8875
{
8876
	/* Calculate the DMIF Request Bandwidth */
8877
	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8878
	fixed20_12 disp_clk, bandwidth;
8879
	fixed20_12 a, b;
8880
 
8881
	a.full = dfixed_const(1000);
8882
	disp_clk.full = dfixed_const(wm->disp_clk);
8883
	disp_clk.full = dfixed_div(disp_clk, a);
8884
	a.full = dfixed_const(32);
8885
	b.full = dfixed_mul(a, disp_clk);
8886
 
8887
	a.full = dfixed_const(10);
8888
	disp_clk_request_efficiency.full = dfixed_const(8);
8889
	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8890
 
8891
	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8892
 
8893
	return dfixed_trunc(bandwidth);
8894
}
8895
 
8896
/**
8897
 * dce8_available_bandwidth - get the min available bandwidth
8898
 *
8899
 * @wm: watermark calculation data
8900
 *
8901
 * Calculate the min available bandwidth used for display (CIK).
8902
 * Used for display watermark bandwidth calculations
8903
 * Returns the min available bandwidth in MBytes/s
8904
 */
8905
static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8906
{
8907
	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8908
	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8909
	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8910
	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8911
 
8912
	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8913
}
8914
 
8915
/**
8916
 * dce8_average_bandwidth - get the average available bandwidth
8917
 *
8918
 * @wm: watermark calculation data
8919
 *
8920
 * Calculate the average available bandwidth used for display (CIK).
8921
 * Used for display watermark bandwidth calculations
8922
 * Returns the average available bandwidth in MBytes/s
8923
 */
8924
static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8925
{
8926
	/* Calculate the display mode Average Bandwidth
8927
	 * DisplayMode should contain the source and destination dimensions,
8928
	 * timing, etc.
8929
	 */
8930
	fixed20_12 bpp;
8931
	fixed20_12 line_time;
8932
	fixed20_12 src_width;
8933
	fixed20_12 bandwidth;
8934
	fixed20_12 a;
8935
 
8936
	a.full = dfixed_const(1000);
8937
	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8938
	line_time.full = dfixed_div(line_time, a);
8939
	bpp.full = dfixed_const(wm->bytes_per_pixel);
8940
	src_width.full = dfixed_const(wm->src_width);
8941
	bandwidth.full = dfixed_mul(src_width, bpp);
8942
	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8943
	bandwidth.full = dfixed_div(bandwidth, line_time);
8944
 
8945
	return dfixed_trunc(bandwidth);
8946
}
8947
 
8948
/**
8949
 * dce8_latency_watermark - get the latency watermark
8950
 *
8951
 * @wm: watermark calculation data
8952
 *
8953
 * Calculate the latency watermark (CIK).
8954
 * Used for display watermark bandwidth calculations
8955
 * Returns the latency watermark in ns
8956
 */
8957
static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8958
{
8959
	/* First calculate the latency in ns */
8960
	u32 mc_latency = 2000; /* 2000 ns. */
8961
	u32 available_bandwidth = dce8_available_bandwidth(wm);
8962
	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8963
	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8964
	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8965
	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8966
		(wm->num_heads * cursor_line_pair_return_time);
8967
	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8968
	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8969
	u32 tmp, dmif_size = 12288;
8970
	fixed20_12 a, b, c;
8971
 
8972
	if (wm->num_heads == 0)
8973
		return 0;
8974
 
8975
	a.full = dfixed_const(2);
8976
	b.full = dfixed_const(1);
8977
	if ((wm->vsc.full > a.full) ||
8978
	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8979
	    (wm->vtaps >= 5) ||
8980
	    ((wm->vsc.full >= a.full) && wm->interlaced))
8981
		max_src_lines_per_dst_line = 4;
8982
	else
8983
		max_src_lines_per_dst_line = 2;
8984
 
8985
	a.full = dfixed_const(available_bandwidth);
8986
	b.full = dfixed_const(wm->num_heads);
8987
	a.full = dfixed_div(a, b);
8988
 
8989
	b.full = dfixed_const(mc_latency + 512);
8990
	c.full = dfixed_const(wm->disp_clk);
8991
	b.full = dfixed_div(b, c);
8992
 
8993
	c.full = dfixed_const(dmif_size);
8994
	b.full = dfixed_div(c, b);
8995
 
8996
	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8997
 
8998
	b.full = dfixed_const(1000);
8999
	c.full = dfixed_const(wm->disp_clk);
9000
	b.full = dfixed_div(c, b);
9001
	c.full = dfixed_const(wm->bytes_per_pixel);
9002
	b.full = dfixed_mul(b, c);
9003
 
9004
	lb_fill_bw = min(tmp, dfixed_trunc(b));
9005
 
9006
	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9007
	b.full = dfixed_const(1000);
9008
	c.full = dfixed_const(lb_fill_bw);
9009
	b.full = dfixed_div(c, b);
9010
	a.full = dfixed_div(a, b);
9011
	line_fill_time = dfixed_trunc(a);
9012
 
9013
	if (line_fill_time < wm->active_time)
9014
		return latency;
9015
	else
9016
		return latency + (line_fill_time - wm->active_time);
9017
 
9018
}
9019
 
9020
/**
9021
 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9022
 * average and available dram bandwidth
9023
 *
9024
 * @wm: watermark calculation data
9025
 *
9026
 * Check if the display average bandwidth fits in the display
9027
 * dram bandwidth (CIK).
9028
 * Used for display watermark bandwidth calculations
9029
 * Returns true if the display fits, false if not.
9030
 */
9031
static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9032
{
9033
	if (dce8_average_bandwidth(wm) <=
9034
	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9035
		return true;
9036
	else
9037
		return false;
9038
}
9039
 
9040
/**
9041
 * dce8_average_bandwidth_vs_available_bandwidth - check
9042
 * average and available bandwidth
9043
 *
9044
 * @wm: watermark calculation data
9045
 *
9046
 * Check if the display average bandwidth fits in the display
9047
 * available bandwidth (CIK).
9048
 * Used for display watermark bandwidth calculations
9049
 * Returns true if the display fits, false if not.
9050
 */
9051
static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9052
{
9053
	if (dce8_average_bandwidth(wm) <=
9054
	    (dce8_available_bandwidth(wm) / wm->num_heads))
9055
		return true;
9056
	else
9057
		return false;
9058
}
9059
 
9060
/**
9061
 * dce8_check_latency_hiding - check latency hiding
9062
 *
9063
 * @wm: watermark calculation data
9064
 *
9065
 * Check latency hiding (CIK).
9066
 * Used for display watermark bandwidth calculations
9067
 * Returns true if the display fits, false if not.
9068
 */
9069
static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9070
{
9071
	u32 lb_partitions = wm->lb_size / wm->src_width;
9072
	u32 line_time = wm->active_time + wm->blank_time;
9073
	u32 latency_tolerant_lines;
9074
	u32 latency_hiding;
9075
	fixed20_12 a;
9076
 
9077
	a.full = dfixed_const(1);
9078
	if (wm->vsc.full > a.full)
9079
		latency_tolerant_lines = 1;
9080
	else {
9081
		if (lb_partitions <= (wm->vtaps + 1))
9082
			latency_tolerant_lines = 1;
9083
		else
9084
			latency_tolerant_lines = 2;
9085
	}
9086
 
9087
	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9088
 
9089
	if (dce8_latency_watermark(wm) <= latency_hiding)
9090
		return true;
9091
	else
9092
		return false;
9093
}
9094
 
9095
/**
9096
 * dce8_program_watermarks - program display watermarks
9097
 *
9098
 * @rdev: radeon_device pointer
9099
 * @radeon_crtc: the selected display controller
9100
 * @lb_size: line buffer size
9101
 * @num_heads: number of display controllers in use
9102
 *
9103
 * Calculate and program the display watermarks for the
9104
 * selected display controller (CIK).
9105
 */
9106
static void dce8_program_watermarks(struct radeon_device *rdev,
9107
				    struct radeon_crtc *radeon_crtc,
9108
				    u32 lb_size, u32 num_heads)
9109
{
9110
	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9111
	struct dce8_wm_params wm_low, wm_high;
9112
	u32 pixel_period;
9113
	u32 line_time = 0;
9114
	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9115
	u32 tmp, wm_mask;
9116
 
9117
	if (radeon_crtc->base.enabled && num_heads && mode) {
9118
		pixel_period = 1000000 / (u32)mode->clock;
9119
		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9120
 
9121
		/* watermark for high clocks */
9122
		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9123
		    rdev->pm.dpm_enabled) {
9124
			wm_high.yclk =
9125
				radeon_dpm_get_mclk(rdev, false) * 10;
9126
			wm_high.sclk =
9127
				radeon_dpm_get_sclk(rdev, false) * 10;
9128
		} else {
9129
			wm_high.yclk = rdev->pm.current_mclk * 10;
9130
			wm_high.sclk = rdev->pm.current_sclk * 10;
9131
		}
9132
 
9133
		wm_high.disp_clk = mode->clock;
9134
		wm_high.src_width = mode->crtc_hdisplay;
9135
		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9136
		wm_high.blank_time = line_time - wm_high.active_time;
9137
		wm_high.interlaced = false;
9138
		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9139
			wm_high.interlaced = true;
9140
		wm_high.vsc = radeon_crtc->vsc;
9141
		wm_high.vtaps = 1;
9142
		if (radeon_crtc->rmx_type != RMX_OFF)
9143
			wm_high.vtaps = 2;
9144
		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9145
		wm_high.lb_size = lb_size;
9146
		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9147
		wm_high.num_heads = num_heads;
9148
 
9149
		/* set for high clocks */
9150
		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9151
 
9152
		/* possibly force display priority to high */
9153
		/* should really do this at mode validation time... */
9154
		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9155
		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9156
		    !dce8_check_latency_hiding(&wm_high) ||
9157
		    (rdev->disp_priority == 2)) {
9158
			DRM_DEBUG_KMS("force priority to high\n");
9159
		}
9160
 
9161
		/* watermark for low clocks */
9162
		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9163
		    rdev->pm.dpm_enabled) {
9164
			wm_low.yclk =
9165
				radeon_dpm_get_mclk(rdev, true) * 10;
9166
			wm_low.sclk =
9167
				radeon_dpm_get_sclk(rdev, true) * 10;
9168
		} else {
9169
			wm_low.yclk = rdev->pm.current_mclk * 10;
9170
			wm_low.sclk = rdev->pm.current_sclk * 10;
9171
		}
9172
 
9173
		wm_low.disp_clk = mode->clock;
9174
		wm_low.src_width = mode->crtc_hdisplay;
9175
		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9176
		wm_low.blank_time = line_time - wm_low.active_time;
9177
		wm_low.interlaced = false;
9178
		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9179
			wm_low.interlaced = true;
9180
		wm_low.vsc = radeon_crtc->vsc;
9181
		wm_low.vtaps = 1;
9182
		if (radeon_crtc->rmx_type != RMX_OFF)
9183
			wm_low.vtaps = 2;
9184
		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9185
		wm_low.lb_size = lb_size;
9186
		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9187
		wm_low.num_heads = num_heads;
9188
 
9189
		/* set for low clocks */
9190
		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9191
 
9192
		/* possibly force display priority to high */
9193
		/* should really do this at mode validation time... */
9194
		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9195
		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9196
		    !dce8_check_latency_hiding(&wm_low) ||
9197
		    (rdev->disp_priority == 2)) {
9198
			DRM_DEBUG_KMS("force priority to high\n");
9199
		}
6104 serge 9200
 
9201
		/* Save number of lines the linebuffer leads before the scanout */
9202
		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
5078 serge 9203
	}
9204
 
9205
	/* select wm A */
9206
	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9207
	tmp = wm_mask;
9208
	tmp &= ~LATENCY_WATERMARK_MASK(3);
9209
	tmp |= LATENCY_WATERMARK_MASK(1);
9210
	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9211
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9212
	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9213
		LATENCY_HIGH_WATERMARK(line_time)));
9214
	/* select wm B */
9215
	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9216
	tmp &= ~LATENCY_WATERMARK_MASK(3);
9217
	tmp |= LATENCY_WATERMARK_MASK(2);
9218
	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9219
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9220
	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9221
		LATENCY_HIGH_WATERMARK(line_time)));
9222
	/* restore original selection */
9223
	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9224
 
9225
	/* save values for DPM */
9226
	radeon_crtc->line_time = line_time;
9227
	radeon_crtc->wm_high = latency_watermark_a;
9228
	radeon_crtc->wm_low = latency_watermark_b;
9229
}
9230
 
9231
/**
9232
 * dce8_bandwidth_update - program display watermarks
9233
 *
9234
 * @rdev: radeon_device pointer
9235
 *
9236
 * Calculate and program the display watermarks and line
9237
 * buffer allocation (CIK).
9238
 */
9239
void dce8_bandwidth_update(struct radeon_device *rdev)
9240
{
9241
	struct drm_display_mode *mode = NULL;
9242
	u32 num_heads = 0, lb_size;
9243
	int i;
9244
 
5271 serge 9245
	if (!rdev->mode_info.mode_config_initialized)
9246
		return;
9247
 
5078 serge 9248
	radeon_update_display_priority(rdev);
9249
 
9250
	for (i = 0; i < rdev->num_crtc; i++) {
9251
		if (rdev->mode_info.crtcs[i]->base.enabled)
9252
			num_heads++;
9253
	}
9254
	for (i = 0; i < rdev->num_crtc; i++) {
9255
		mode = &rdev->mode_info.crtcs[i]->base.mode;
9256
		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9257
		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9258
	}
9259
}
9260
 
9261
/**
9262
 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9263
 *
9264
 * @rdev: radeon_device pointer
9265
 *
9266
 * Fetches a GPU clock counter snapshot (SI).
9267
 * Returns the 64 bit clock counter snapshot.
9268
 */
9269
uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9270
{
9271
	uint64_t clock;
9272
 
9273
	mutex_lock(&rdev->gpu_clock_mutex);
9274
	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9275
	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7146 serge 9276
		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5078 serge 9277
	mutex_unlock(&rdev->gpu_clock_mutex);
9278
	return clock;
9279
}
9280
 
9281
static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
7146 serge 9282
			     u32 cntl_reg, u32 status_reg)
5078 serge 9283
{
9284
	int r, i;
9285
	struct atom_clock_dividers dividers;
9286
	uint32_t tmp;
9287
 
9288
	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9289
					   clock, false, ÷rs);
9290
	if (r)
9291
		return r;
9292
 
9293
	tmp = RREG32_SMC(cntl_reg);
9294
	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9295
	tmp |= dividers.post_divider;
9296
	WREG32_SMC(cntl_reg, tmp);
9297
 
9298
	for (i = 0; i < 100; i++) {
9299
		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9300
			break;
9301
		mdelay(10);
9302
	}
9303
	if (i == 100)
9304
		return -ETIMEDOUT;
9305
 
9306
	return 0;
9307
}
9308
 
9309
int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9310
{
9311
	int r = 0;
9312
 
9313
	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9314
	if (r)
9315
		return r;
9316
 
9317
	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9318
	return r;
9319
}
9320
 
9321
int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9322
{
9323
	int r, i;
9324
	struct atom_clock_dividers dividers;
9325
	u32 tmp;
9326
 
9327
	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9328
					   ecclk, false, ÷rs);
9329
	if (r)
9330
		return r;
9331
 
9332
	for (i = 0; i < 100; i++) {
9333
		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9334
			break;
9335
		mdelay(10);
9336
	}
9337
	if (i == 100)
9338
		return -ETIMEDOUT;
9339
 
9340
	tmp = RREG32_SMC(CG_ECLK_CNTL);
9341
	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9342
	tmp |= dividers.post_divider;
9343
	WREG32_SMC(CG_ECLK_CNTL, tmp);
9344
 
9345
	for (i = 0; i < 100; i++) {
9346
		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9347
			break;
9348
		mdelay(10);
9349
	}
9350
	if (i == 100)
9351
		return -ETIMEDOUT;
9352
 
9353
	return 0;
9354
}
9355
 
9356
static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9357
{
9358
	struct pci_dev *root = rdev->pdev->bus->self;
9359
	int bridge_pos, gpu_pos;
9360
	u32 speed_cntl, mask, current_data_rate;
9361
	int ret, i;
9362
	u16 tmp16;
9363
 
9364
	if (radeon_pcie_gen2 == 0)
9365
		return;
9366
 
9367
	if (rdev->flags & RADEON_IS_IGP)
9368
		return;
9369
 
9370
	if (!(rdev->flags & RADEON_IS_PCIE))
9371
		return;
9372
 
9373
	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9374
	if (ret != 0)
9375
		return;
9376
 
9377
	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9378
		return;
9379
 
9380
	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9381
	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9382
		LC_CURRENT_DATA_RATE_SHIFT;
9383
	if (mask & DRM_PCIE_SPEED_80) {
9384
		if (current_data_rate == 2) {
9385
			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9386
			return;
9387
		}
9388
		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9389
	} else if (mask & DRM_PCIE_SPEED_50) {
9390
		if (current_data_rate == 1) {
9391
			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9392
			return;
9393
		}
9394
		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9395
	}
9396
 
9397
	bridge_pos = pci_pcie_cap(root);
9398
	if (!bridge_pos)
9399
		return;
9400
 
9401
	gpu_pos = pci_pcie_cap(rdev->pdev);
9402
	if (!gpu_pos)
9403
		return;
9404
 
9405
	if (mask & DRM_PCIE_SPEED_80) {
9406
		/* re-try equalization if gen3 is not already enabled */
9407
		if (current_data_rate != 2) {
9408
			u16 bridge_cfg, gpu_cfg;
9409
			u16 bridge_cfg2, gpu_cfg2;
9410
			u32 max_lw, current_lw, tmp;
9411
 
9412
			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9413
			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9414
 
9415
			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9416
			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9417
 
9418
			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9419
			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9420
 
9421
			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9422
			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9423
			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9424
 
9425
			if (current_lw < max_lw) {
9426
				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9427
				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9428
					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9429
					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9430
					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9431
					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9432
				}
9433
			}
9434
 
9435
			for (i = 0; i < 10; i++) {
9436
				/* check status */
9437
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9438
				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9439
					break;
9440
 
9441
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9442
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9443
 
9444
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9445
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9446
 
9447
				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9448
				tmp |= LC_SET_QUIESCE;
9449
				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9450
 
9451
				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9452
				tmp |= LC_REDO_EQ;
9453
				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9454
 
9455
				mdelay(100);
9456
 
9457
				/* linkctl */
9458
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9459
				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9460
				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9461
				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9462
 
9463
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9464
				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9465
				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9466
				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9467
 
9468
				/* linkctl2 */
9469
				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9470
				tmp16 &= ~((1 << 4) | (7 << 9));
9471
				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9472
				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9473
 
9474
				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9475
				tmp16 &= ~((1 << 4) | (7 << 9));
9476
				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9477
				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9478
 
9479
				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9480
				tmp &= ~LC_SET_QUIESCE;
9481
				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9482
			}
9483
		}
9484
	}
9485
 
9486
	/* set the link speed */
9487
	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9488
	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9489
	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9490
 
9491
	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9492
	tmp16 &= ~0xf;
9493
	if (mask & DRM_PCIE_SPEED_80)
9494
		tmp16 |= 3; /* gen3 */
9495
	else if (mask & DRM_PCIE_SPEED_50)
9496
		tmp16 |= 2; /* gen2 */
9497
	else
9498
		tmp16 |= 1; /* gen1 */
9499
	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9500
 
9501
	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9502
	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9503
	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9504
 
9505
	for (i = 0; i < rdev->usec_timeout; i++) {
9506
		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9507
		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9508
			break;
9509
		udelay(1);
9510
	}
9511
}
9512
 
9513
static void cik_program_aspm(struct radeon_device *rdev)
9514
{
9515
	u32 data, orig;
9516
	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9517
	bool disable_clkreq = false;
9518
 
9519
	if (radeon_aspm == 0)
9520
		return;
9521
 
9522
	/* XXX double check IGPs */
9523
	if (rdev->flags & RADEON_IS_IGP)
9524
		return;
9525
 
9526
	if (!(rdev->flags & RADEON_IS_PCIE))
9527
		return;
9528
 
9529
	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9530
	data &= ~LC_XMIT_N_FTS_MASK;
9531
	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9532
	if (orig != data)
9533
		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9534
 
9535
	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9536
	data |= LC_GO_TO_RECOVERY;
9537
	if (orig != data)
9538
		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9539
 
9540
	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9541
	data |= P_IGNORE_EDB_ERR;
9542
	if (orig != data)
9543
		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9544
 
9545
	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9546
	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9547
	data |= LC_PMI_TO_L1_DIS;
9548
	if (!disable_l0s)
9549
		data |= LC_L0S_INACTIVITY(7);
9550
 
9551
	if (!disable_l1) {
9552
		data |= LC_L1_INACTIVITY(7);
9553
		data &= ~LC_PMI_TO_L1_DIS;
9554
		if (orig != data)
9555
			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9556
 
9557
		if (!disable_plloff_in_l1) {
9558
			bool clk_req_support;
9559
 
9560
			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9561
			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9562
			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9563
			if (orig != data)
9564
				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9565
 
9566
			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9567
			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9568
			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9569
			if (orig != data)
9570
				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9571
 
9572
			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9573
			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9574
			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9575
			if (orig != data)
9576
				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9577
 
9578
			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9579
			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9580
			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9581
			if (orig != data)
9582
				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9583
 
9584
			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9585
			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9586
			data |= LC_DYN_LANES_PWR_STATE(3);
9587
			if (orig != data)
9588
				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9589
 
9590
			if (!disable_clkreq) {
9591
				struct pci_dev *root = rdev->pdev->bus->self;
9592
				u32 lnkcap;
9593
 
9594
				clk_req_support = false;
9595
				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9596
				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9597
					clk_req_support = true;
9598
			} else {
9599
				clk_req_support = false;
9600
			}
9601
 
9602
			if (clk_req_support) {
9603
				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9604
				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9605
				if (orig != data)
9606
					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9607
 
9608
				orig = data = RREG32_SMC(THM_CLK_CNTL);
9609
				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9610
				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9611
				if (orig != data)
9612
					WREG32_SMC(THM_CLK_CNTL, data);
9613
 
9614
				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9615
				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9616
				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9617
				if (orig != data)
9618
					WREG32_SMC(MISC_CLK_CTRL, data);
9619
 
9620
				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9621
				data &= ~BCLK_AS_XCLK;
9622
				if (orig != data)
9623
					WREG32_SMC(CG_CLKPIN_CNTL, data);
9624
 
9625
				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9626
				data &= ~FORCE_BIF_REFCLK_EN;
9627
				if (orig != data)
9628
					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9629
 
9630
				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9631
				data &= ~MPLL_CLKOUT_SEL_MASK;
9632
				data |= MPLL_CLKOUT_SEL(4);
9633
				if (orig != data)
9634
					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9635
			}
9636
		}
9637
	} else {
9638
		if (orig != data)
9639
			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9640
	}
9641
 
9642
	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9643
	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9644
	if (orig != data)
9645
		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9646
 
9647
	if (!disable_l0s) {
9648
		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9649
		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9650
			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9651
			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9652
				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9653
				data &= ~LC_L0S_INACTIVITY_MASK;
9654
				if (orig != data)
9655
					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9656
			}
9657
		}
9658
	}
9659
}