Subversion Repositories Kolibri OS

Rev

Rev 2997 | Rev 3120 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2997 Serge 1
/*
2
 * Copyright 2011 Advanced Micro Devices, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 *
22
 * Authors: Alex Deucher
23
 */
24
#include 
25
//#include 
26
#include 
27
#include 
28
#include 
29
#include "radeon.h"
30
#include "radeon_asic.h"
31
#include 
32
#include "sid.h"
33
#include "atom.h"
34
#include "si_blit_shaders.h"
35
 
36
#define SI_PFP_UCODE_SIZE 2144
37
#define SI_PM4_UCODE_SIZE 2144
38
#define SI_CE_UCODE_SIZE 2144
39
#define SI_RLC_UCODE_SIZE 2048
40
#define SI_MC_UCODE_SIZE 7769
41
 
42
MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
43
MODULE_FIRMWARE("radeon/TAHITI_me.bin");
44
MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
45
MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
46
MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
47
MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
48
MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
49
MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
50
MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
51
MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
52
MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
53
MODULE_FIRMWARE("radeon/VERDE_me.bin");
54
MODULE_FIRMWARE("radeon/VERDE_ce.bin");
55
MODULE_FIRMWARE("radeon/VERDE_mc.bin");
56
MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
57
 
58
extern int r600_ih_ring_alloc(struct radeon_device *rdev);
59
extern void r600_ih_ring_fini(struct radeon_device *rdev);
60
extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
61
extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
62
extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
63
extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
64
 
65
/* get temperature in millidegrees */
66
int si_get_temp(struct radeon_device *rdev)
67
{
68
	u32 temp;
69
	int actual_temp = 0;
70
 
71
	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
72
		CTF_TEMP_SHIFT;
73
 
74
	if (temp & 0x200)
75
		actual_temp = 255;
76
	else
77
		actual_temp = temp & 0x1ff;
78
 
79
	actual_temp = (actual_temp * 1000);
80
 
81
	return actual_temp;
82
}
83
 
84
#define TAHITI_IO_MC_REGS_SIZE 36
85
 
86
static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
87
	{0x0000006f, 0x03044000},
88
	{0x00000070, 0x0480c018},
89
	{0x00000071, 0x00000040},
90
	{0x00000072, 0x01000000},
91
	{0x00000074, 0x000000ff},
92
	{0x00000075, 0x00143400},
93
	{0x00000076, 0x08ec0800},
94
	{0x00000077, 0x040000cc},
95
	{0x00000079, 0x00000000},
96
	{0x0000007a, 0x21000409},
97
	{0x0000007c, 0x00000000},
98
	{0x0000007d, 0xe8000000},
99
	{0x0000007e, 0x044408a8},
100
	{0x0000007f, 0x00000003},
101
	{0x00000080, 0x00000000},
102
	{0x00000081, 0x01000000},
103
	{0x00000082, 0x02000000},
104
	{0x00000083, 0x00000000},
105
	{0x00000084, 0xe3f3e4f4},
106
	{0x00000085, 0x00052024},
107
	{0x00000087, 0x00000000},
108
	{0x00000088, 0x66036603},
109
	{0x00000089, 0x01000000},
110
	{0x0000008b, 0x1c0a0000},
111
	{0x0000008c, 0xff010000},
112
	{0x0000008e, 0xffffefff},
113
	{0x0000008f, 0xfff3efff},
114
	{0x00000090, 0xfff3efbf},
115
	{0x00000094, 0x00101101},
116
	{0x00000095, 0x00000fff},
117
	{0x00000096, 0x00116fff},
118
	{0x00000097, 0x60010000},
119
	{0x00000098, 0x10010000},
120
	{0x00000099, 0x00006000},
121
	{0x0000009a, 0x00001000},
122
	{0x0000009f, 0x00a77400}
123
};
124
 
125
static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
126
	{0x0000006f, 0x03044000},
127
	{0x00000070, 0x0480c018},
128
	{0x00000071, 0x00000040},
129
	{0x00000072, 0x01000000},
130
	{0x00000074, 0x000000ff},
131
	{0x00000075, 0x00143400},
132
	{0x00000076, 0x08ec0800},
133
	{0x00000077, 0x040000cc},
134
	{0x00000079, 0x00000000},
135
	{0x0000007a, 0x21000409},
136
	{0x0000007c, 0x00000000},
137
	{0x0000007d, 0xe8000000},
138
	{0x0000007e, 0x044408a8},
139
	{0x0000007f, 0x00000003},
140
	{0x00000080, 0x00000000},
141
	{0x00000081, 0x01000000},
142
	{0x00000082, 0x02000000},
143
	{0x00000083, 0x00000000},
144
	{0x00000084, 0xe3f3e4f4},
145
	{0x00000085, 0x00052024},
146
	{0x00000087, 0x00000000},
147
	{0x00000088, 0x66036603},
148
	{0x00000089, 0x01000000},
149
	{0x0000008b, 0x1c0a0000},
150
	{0x0000008c, 0xff010000},
151
	{0x0000008e, 0xffffefff},
152
	{0x0000008f, 0xfff3efff},
153
	{0x00000090, 0xfff3efbf},
154
	{0x00000094, 0x00101101},
155
	{0x00000095, 0x00000fff},
156
	{0x00000096, 0x00116fff},
157
	{0x00000097, 0x60010000},
158
	{0x00000098, 0x10010000},
159
	{0x00000099, 0x00006000},
160
	{0x0000009a, 0x00001000},
161
	{0x0000009f, 0x00a47400}
162
};
163
 
164
static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
165
	{0x0000006f, 0x03044000},
166
	{0x00000070, 0x0480c018},
167
	{0x00000071, 0x00000040},
168
	{0x00000072, 0x01000000},
169
	{0x00000074, 0x000000ff},
170
	{0x00000075, 0x00143400},
171
	{0x00000076, 0x08ec0800},
172
	{0x00000077, 0x040000cc},
173
	{0x00000079, 0x00000000},
174
	{0x0000007a, 0x21000409},
175
	{0x0000007c, 0x00000000},
176
	{0x0000007d, 0xe8000000},
177
	{0x0000007e, 0x044408a8},
178
	{0x0000007f, 0x00000003},
179
	{0x00000080, 0x00000000},
180
	{0x00000081, 0x01000000},
181
	{0x00000082, 0x02000000},
182
	{0x00000083, 0x00000000},
183
	{0x00000084, 0xe3f3e4f4},
184
	{0x00000085, 0x00052024},
185
	{0x00000087, 0x00000000},
186
	{0x00000088, 0x66036603},
187
	{0x00000089, 0x01000000},
188
	{0x0000008b, 0x1c0a0000},
189
	{0x0000008c, 0xff010000},
190
	{0x0000008e, 0xffffefff},
191
	{0x0000008f, 0xfff3efff},
192
	{0x00000090, 0xfff3efbf},
193
	{0x00000094, 0x00101101},
194
	{0x00000095, 0x00000fff},
195
	{0x00000096, 0x00116fff},
196
	{0x00000097, 0x60010000},
197
	{0x00000098, 0x10010000},
198
	{0x00000099, 0x00006000},
199
	{0x0000009a, 0x00001000},
200
	{0x0000009f, 0x00a37400}
201
};
202
 
203
/* ucode loading */
204
static int si_mc_load_microcode(struct radeon_device *rdev)
205
{
206
	const __be32 *fw_data;
207
	u32 running, blackout = 0;
208
	u32 *io_mc_regs;
209
	int i, ucode_size, regs_size;
210
 
211
	if (!rdev->mc_fw)
212
		return -EINVAL;
213
 
214
	switch (rdev->family) {
215
	case CHIP_TAHITI:
216
		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
217
		ucode_size = SI_MC_UCODE_SIZE;
218
		regs_size = TAHITI_IO_MC_REGS_SIZE;
219
		break;
220
	case CHIP_PITCAIRN:
221
		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
222
		ucode_size = SI_MC_UCODE_SIZE;
223
		regs_size = TAHITI_IO_MC_REGS_SIZE;
224
		break;
225
	case CHIP_VERDE:
226
	default:
227
		io_mc_regs = (u32 *)&verde_io_mc_regs;
228
		ucode_size = SI_MC_UCODE_SIZE;
229
		regs_size = TAHITI_IO_MC_REGS_SIZE;
230
		break;
231
	}
232
 
233
	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
234
 
235
	if (running == 0) {
236
		if (running) {
237
			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
238
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
239
		}
240
 
241
		/* reset the engine and set to writable */
242
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
243
		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
244
 
245
		/* load mc io regs */
246
		for (i = 0; i < regs_size; i++) {
247
			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
248
			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
249
		}
250
		/* load the MC ucode */
251
		fw_data = (const __be32 *)rdev->mc_fw->data;
252
		for (i = 0; i < ucode_size; i++)
253
			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
254
 
255
		/* put the engine back into the active state */
256
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
257
		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
258
		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
259
 
260
		/* wait for training to complete */
261
		for (i = 0; i < rdev->usec_timeout; i++) {
262
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
263
				break;
264
			udelay(1);
265
		}
266
		for (i = 0; i < rdev->usec_timeout; i++) {
267
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
268
				break;
269
			udelay(1);
270
		}
271
 
272
		if (running)
273
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
274
	}
275
 
276
	return 0;
277
}
278
 
279
static int si_init_microcode(struct radeon_device *rdev)
280
{
281
	struct platform_device *pdev;
282
	const char *chip_name;
283
	const char *rlc_chip_name;
284
	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
285
	char fw_name[30];
286
	int err;
287
 
288
	DRM_DEBUG("\n");
289
 
290
	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
291
	err = IS_ERR(pdev);
292
	if (err) {
293
		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
294
		return -EINVAL;
295
	}
296
 
297
	switch (rdev->family) {
298
	case CHIP_TAHITI:
299
		chip_name = "TAHITI";
300
		rlc_chip_name = "TAHITI";
301
		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
302
		me_req_size = SI_PM4_UCODE_SIZE * 4;
303
		ce_req_size = SI_CE_UCODE_SIZE * 4;
304
		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
305
		mc_req_size = SI_MC_UCODE_SIZE * 4;
306
		break;
307
	case CHIP_PITCAIRN:
308
		chip_name = "PITCAIRN";
309
		rlc_chip_name = "PITCAIRN";
310
		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
311
		me_req_size = SI_PM4_UCODE_SIZE * 4;
312
		ce_req_size = SI_CE_UCODE_SIZE * 4;
313
		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
314
		mc_req_size = SI_MC_UCODE_SIZE * 4;
315
		break;
316
	case CHIP_VERDE:
317
		chip_name = "VERDE";
318
		rlc_chip_name = "VERDE";
319
		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
320
		me_req_size = SI_PM4_UCODE_SIZE * 4;
321
		ce_req_size = SI_CE_UCODE_SIZE * 4;
322
		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
323
		mc_req_size = SI_MC_UCODE_SIZE * 4;
324
		break;
325
	default: BUG();
326
	}
327
 
328
	DRM_INFO("Loading %s Microcode\n", chip_name);
329
 
330
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
331
	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
332
	if (err)
333
		goto out;
334
	if (rdev->pfp_fw->size != pfp_req_size) {
335
		printk(KERN_ERR
336
		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
337
		       rdev->pfp_fw->size, fw_name);
338
		err = -EINVAL;
339
		goto out;
340
	}
341
 
342
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
343
	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
344
	if (err)
345
		goto out;
346
	if (rdev->me_fw->size != me_req_size) {
347
		printk(KERN_ERR
348
		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
349
		       rdev->me_fw->size, fw_name);
350
		err = -EINVAL;
351
	}
352
 
353
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
354
	err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
355
	if (err)
356
		goto out;
357
	if (rdev->ce_fw->size != ce_req_size) {
358
		printk(KERN_ERR
359
		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
360
		       rdev->ce_fw->size, fw_name);
361
		err = -EINVAL;
362
	}
363
 
364
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
365
	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
366
	if (err)
367
		goto out;
368
	if (rdev->rlc_fw->size != rlc_req_size) {
369
		printk(KERN_ERR
370
		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
371
		       rdev->rlc_fw->size, fw_name);
372
		err = -EINVAL;
373
	}
374
 
375
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
376
	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
377
	if (err)
378
		goto out;
379
	if (rdev->mc_fw->size != mc_req_size) {
380
		printk(KERN_ERR
381
		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
382
		       rdev->mc_fw->size, fw_name);
383
		err = -EINVAL;
384
	}
385
 
386
out:
387
	platform_device_unregister(pdev);
388
 
389
	if (err) {
390
		if (err != -EINVAL)
391
			printk(KERN_ERR
392
			       "si_cp: Failed to load firmware \"%s\"\n",
393
			       fw_name);
394
		release_firmware(rdev->pfp_fw);
395
		rdev->pfp_fw = NULL;
396
		release_firmware(rdev->me_fw);
397
		rdev->me_fw = NULL;
398
		release_firmware(rdev->ce_fw);
399
		rdev->ce_fw = NULL;
400
		release_firmware(rdev->rlc_fw);
401
		rdev->rlc_fw = NULL;
402
		release_firmware(rdev->mc_fw);
403
		rdev->mc_fw = NULL;
404
	}
405
	return err;
406
}
407
 
408
/* watermark setup */
409
static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
410
				   struct radeon_crtc *radeon_crtc,
411
				   struct drm_display_mode *mode,
412
				   struct drm_display_mode *other_mode)
413
{
414
	u32 tmp;
415
	/*
416
	 * Line Buffer Setup
417
	 * There are 3 line buffers, each one shared by 2 display controllers.
418
	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
419
	 * the display controllers.  The paritioning is done via one of four
420
	 * preset allocations specified in bits 21:20:
421
	 *  0 - half lb
422
	 *  2 - whole lb, other crtc must be disabled
423
	 */
424
	/* this can get tricky if we have two large displays on a paired group
425
	 * of crtcs.  Ideally for multiple large displays we'd assign them to
426
	 * non-linked crtcs for maximum line buffer allocation.
427
	 */
428
	if (radeon_crtc->base.enabled && mode) {
429
		if (other_mode)
430
			tmp = 0; /* 1/2 */
431
		else
432
			tmp = 2; /* whole */
433
	} else
434
		tmp = 0;
435
 
436
	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
437
	       DC_LB_MEMORY_CONFIG(tmp));
438
 
439
	if (radeon_crtc->base.enabled && mode) {
440
		switch (tmp) {
441
		case 0:
442
		default:
443
			return 4096 * 2;
444
		case 2:
445
			return 8192 * 2;
446
		}
447
	}
448
 
449
	/* controller not enabled, so no lb used */
450
	return 0;
451
}
452
 
453
static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
454
{
455
	u32 tmp = RREG32(MC_SHARED_CHMAP);
456
 
457
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
458
	case 0:
459
	default:
460
		return 1;
461
	case 1:
462
		return 2;
463
	case 2:
464
		return 4;
465
	case 3:
466
		return 8;
467
	case 4:
468
		return 3;
469
	case 5:
470
		return 6;
471
	case 6:
472
		return 10;
473
	case 7:
474
		return 12;
475
	case 8:
476
		return 16;
477
	}
478
}
479
 
480
struct dce6_wm_params {
481
	u32 dram_channels; /* number of dram channels */
482
	u32 yclk;          /* bandwidth per dram data pin in kHz */
483
	u32 sclk;          /* engine clock in kHz */
484
	u32 disp_clk;      /* display clock in kHz */
485
	u32 src_width;     /* viewport width */
486
	u32 active_time;   /* active display time in ns */
487
	u32 blank_time;    /* blank time in ns */
488
	bool interlaced;    /* mode is interlaced */
489
	fixed20_12 vsc;    /* vertical scale ratio */
490
	u32 num_heads;     /* number of active crtcs */
491
	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
492
	u32 lb_size;       /* line buffer allocated to pipe */
493
	u32 vtaps;         /* vertical scaler taps */
494
};
495
 
496
static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
497
{
498
	/* Calculate raw DRAM Bandwidth */
499
	fixed20_12 dram_efficiency; /* 0.7 */
500
	fixed20_12 yclk, dram_channels, bandwidth;
501
	fixed20_12 a;
502
 
503
	a.full = dfixed_const(1000);
504
	yclk.full = dfixed_const(wm->yclk);
505
	yclk.full = dfixed_div(yclk, a);
506
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
507
	a.full = dfixed_const(10);
508
	dram_efficiency.full = dfixed_const(7);
509
	dram_efficiency.full = dfixed_div(dram_efficiency, a);
510
	bandwidth.full = dfixed_mul(dram_channels, yclk);
511
	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
512
 
513
	return dfixed_trunc(bandwidth);
514
}
515
 
516
static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
517
{
518
	/* Calculate DRAM Bandwidth and the part allocated to display. */
519
	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
520
	fixed20_12 yclk, dram_channels, bandwidth;
521
	fixed20_12 a;
522
 
523
	a.full = dfixed_const(1000);
524
	yclk.full = dfixed_const(wm->yclk);
525
	yclk.full = dfixed_div(yclk, a);
526
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
527
	a.full = dfixed_const(10);
528
	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
529
	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
530
	bandwidth.full = dfixed_mul(dram_channels, yclk);
531
	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
532
 
533
	return dfixed_trunc(bandwidth);
534
}
535
 
536
static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
537
{
538
	/* Calculate the display Data return Bandwidth */
539
	fixed20_12 return_efficiency; /* 0.8 */
540
	fixed20_12 sclk, bandwidth;
541
	fixed20_12 a;
542
 
543
	a.full = dfixed_const(1000);
544
	sclk.full = dfixed_const(wm->sclk);
545
	sclk.full = dfixed_div(sclk, a);
546
	a.full = dfixed_const(10);
547
	return_efficiency.full = dfixed_const(8);
548
	return_efficiency.full = dfixed_div(return_efficiency, a);
549
	a.full = dfixed_const(32);
550
	bandwidth.full = dfixed_mul(a, sclk);
551
	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
552
 
553
	return dfixed_trunc(bandwidth);
554
}
555
 
556
static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
557
{
558
	return 32;
559
}
560
 
561
static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
562
{
563
	/* Calculate the DMIF Request Bandwidth */
564
	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
565
	fixed20_12 disp_clk, sclk, bandwidth;
566
	fixed20_12 a, b1, b2;
567
	u32 min_bandwidth;
568
 
569
	a.full = dfixed_const(1000);
570
	disp_clk.full = dfixed_const(wm->disp_clk);
571
	disp_clk.full = dfixed_div(disp_clk, a);
572
	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
573
	b1.full = dfixed_mul(a, disp_clk);
574
 
575
	a.full = dfixed_const(1000);
576
	sclk.full = dfixed_const(wm->sclk);
577
	sclk.full = dfixed_div(sclk, a);
578
	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
579
	b2.full = dfixed_mul(a, sclk);
580
 
581
	a.full = dfixed_const(10);
582
	disp_clk_request_efficiency.full = dfixed_const(8);
583
	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
584
 
585
	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
586
 
587
	a.full = dfixed_const(min_bandwidth);
588
	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
589
 
590
	return dfixed_trunc(bandwidth);
591
}
592
 
593
static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
594
{
595
	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
596
	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
597
	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
598
	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
599
 
600
	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
601
}
602
 
603
static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
604
{
605
	/* Calculate the display mode Average Bandwidth
606
	 * DisplayMode should contain the source and destination dimensions,
607
	 * timing, etc.
608
	 */
609
	fixed20_12 bpp;
610
	fixed20_12 line_time;
611
	fixed20_12 src_width;
612
	fixed20_12 bandwidth;
613
	fixed20_12 a;
614
 
615
	a.full = dfixed_const(1000);
616
	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
617
	line_time.full = dfixed_div(line_time, a);
618
	bpp.full = dfixed_const(wm->bytes_per_pixel);
619
	src_width.full = dfixed_const(wm->src_width);
620
	bandwidth.full = dfixed_mul(src_width, bpp);
621
	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
622
	bandwidth.full = dfixed_div(bandwidth, line_time);
623
 
624
	return dfixed_trunc(bandwidth);
625
}
626
 
627
static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
628
{
629
	/* First calcualte the latency in ns */
630
	u32 mc_latency = 2000; /* 2000 ns. */
631
	u32 available_bandwidth = dce6_available_bandwidth(wm);
632
	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
633
	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
634
	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
635
	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
636
		(wm->num_heads * cursor_line_pair_return_time);
637
	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
638
	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
639
	u32 tmp, dmif_size = 12288;
640
	fixed20_12 a, b, c;
641
 
642
	if (wm->num_heads == 0)
643
		return 0;
644
 
645
	a.full = dfixed_const(2);
646
	b.full = dfixed_const(1);
647
	if ((wm->vsc.full > a.full) ||
648
	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
649
	    (wm->vtaps >= 5) ||
650
	    ((wm->vsc.full >= a.full) && wm->interlaced))
651
		max_src_lines_per_dst_line = 4;
652
	else
653
		max_src_lines_per_dst_line = 2;
654
 
655
	a.full = dfixed_const(available_bandwidth);
656
	b.full = dfixed_const(wm->num_heads);
657
	a.full = dfixed_div(a, b);
658
 
659
	b.full = dfixed_const(mc_latency + 512);
660
	c.full = dfixed_const(wm->disp_clk);
661
	b.full = dfixed_div(b, c);
662
 
663
	c.full = dfixed_const(dmif_size);
664
	b.full = dfixed_div(c, b);
665
 
666
	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
667
 
668
	b.full = dfixed_const(1000);
669
	c.full = dfixed_const(wm->disp_clk);
670
	b.full = dfixed_div(c, b);
671
	c.full = dfixed_const(wm->bytes_per_pixel);
672
	b.full = dfixed_mul(b, c);
673
 
674
	lb_fill_bw = min(tmp, dfixed_trunc(b));
675
 
676
	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
677
	b.full = dfixed_const(1000);
678
	c.full = dfixed_const(lb_fill_bw);
679
	b.full = dfixed_div(c, b);
680
	a.full = dfixed_div(a, b);
681
	line_fill_time = dfixed_trunc(a);
682
 
683
	if (line_fill_time < wm->active_time)
684
		return latency;
685
	else
686
		return latency + (line_fill_time - wm->active_time);
687
 
688
}
689
 
690
static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
691
{
692
	if (dce6_average_bandwidth(wm) <=
693
	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
694
		return true;
695
	else
696
		return false;
697
};
698
 
699
static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
700
{
701
	if (dce6_average_bandwidth(wm) <=
702
	    (dce6_available_bandwidth(wm) / wm->num_heads))
703
		return true;
704
	else
705
		return false;
706
};
707
 
708
static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
709
{
710
	u32 lb_partitions = wm->lb_size / wm->src_width;
711
	u32 line_time = wm->active_time + wm->blank_time;
712
	u32 latency_tolerant_lines;
713
	u32 latency_hiding;
714
	fixed20_12 a;
715
 
716
	a.full = dfixed_const(1);
717
	if (wm->vsc.full > a.full)
718
		latency_tolerant_lines = 1;
719
	else {
720
		if (lb_partitions <= (wm->vtaps + 1))
721
			latency_tolerant_lines = 1;
722
		else
723
			latency_tolerant_lines = 2;
724
	}
725
 
726
	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
727
 
728
	if (dce6_latency_watermark(wm) <= latency_hiding)
729
		return true;
730
	else
731
		return false;
732
}
733
 
734
static void dce6_program_watermarks(struct radeon_device *rdev,
735
					 struct radeon_crtc *radeon_crtc,
736
					 u32 lb_size, u32 num_heads)
737
{
738
	struct drm_display_mode *mode = &radeon_crtc->base.mode;
739
	struct dce6_wm_params wm;
740
	u32 pixel_period;
741
	u32 line_time = 0;
742
	u32 latency_watermark_a = 0, latency_watermark_b = 0;
743
	u32 priority_a_mark = 0, priority_b_mark = 0;
744
	u32 priority_a_cnt = PRIORITY_OFF;
745
	u32 priority_b_cnt = PRIORITY_OFF;
746
	u32 tmp, arb_control3;
747
	fixed20_12 a, b, c;
748
 
749
	if (radeon_crtc->base.enabled && num_heads && mode) {
750
		pixel_period = 1000000 / (u32)mode->clock;
751
		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
752
		priority_a_cnt = 0;
753
		priority_b_cnt = 0;
754
 
755
		wm.yclk = rdev->pm.current_mclk * 10;
756
		wm.sclk = rdev->pm.current_sclk * 10;
757
		wm.disp_clk = mode->clock;
758
		wm.src_width = mode->crtc_hdisplay;
759
		wm.active_time = mode->crtc_hdisplay * pixel_period;
760
		wm.blank_time = line_time - wm.active_time;
761
		wm.interlaced = false;
762
		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
763
			wm.interlaced = true;
764
		wm.vsc = radeon_crtc->vsc;
765
		wm.vtaps = 1;
766
		if (radeon_crtc->rmx_type != RMX_OFF)
767
			wm.vtaps = 2;
768
		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
769
		wm.lb_size = lb_size;
770
		if (rdev->family == CHIP_ARUBA)
771
			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
772
		else
773
			wm.dram_channels = si_get_number_of_dram_channels(rdev);
774
		wm.num_heads = num_heads;
775
 
776
		/* set for high clocks */
777
		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
778
		/* set for low clocks */
779
		/* wm.yclk = low clk; wm.sclk = low clk */
780
		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
781
 
782
		/* possibly force display priority to high */
783
		/* should really do this at mode validation time... */
784
		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
785
		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
786
		    !dce6_check_latency_hiding(&wm) ||
787
		    (rdev->disp_priority == 2)) {
788
			DRM_DEBUG_KMS("force priority to high\n");
789
			priority_a_cnt |= PRIORITY_ALWAYS_ON;
790
			priority_b_cnt |= PRIORITY_ALWAYS_ON;
791
		}
792
 
793
		a.full = dfixed_const(1000);
794
		b.full = dfixed_const(mode->clock);
795
		b.full = dfixed_div(b, a);
796
		c.full = dfixed_const(latency_watermark_a);
797
		c.full = dfixed_mul(c, b);
798
		c.full = dfixed_mul(c, radeon_crtc->hsc);
799
		c.full = dfixed_div(c, a);
800
		a.full = dfixed_const(16);
801
		c.full = dfixed_div(c, a);
802
		priority_a_mark = dfixed_trunc(c);
803
		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
804
 
805
		a.full = dfixed_const(1000);
806
		b.full = dfixed_const(mode->clock);
807
		b.full = dfixed_div(b, a);
808
		c.full = dfixed_const(latency_watermark_b);
809
		c.full = dfixed_mul(c, b);
810
		c.full = dfixed_mul(c, radeon_crtc->hsc);
811
		c.full = dfixed_div(c, a);
812
		a.full = dfixed_const(16);
813
		c.full = dfixed_div(c, a);
814
		priority_b_mark = dfixed_trunc(c);
815
		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
816
	}
817
 
818
	/* select wm A */
819
	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
820
	tmp = arb_control3;
821
	tmp &= ~LATENCY_WATERMARK_MASK(3);
822
	tmp |= LATENCY_WATERMARK_MASK(1);
823
	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
824
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
825
	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
826
		LATENCY_HIGH_WATERMARK(line_time)));
827
	/* select wm B */
828
	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
829
	tmp &= ~LATENCY_WATERMARK_MASK(3);
830
	tmp |= LATENCY_WATERMARK_MASK(2);
831
	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
832
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
833
	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
834
		LATENCY_HIGH_WATERMARK(line_time)));
835
	/* restore original selection */
836
	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
837
 
838
	/* write the priority marks */
839
	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
840
	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
841
 
842
}
843
 
844
void dce6_bandwidth_update(struct radeon_device *rdev)
845
{
846
	struct drm_display_mode *mode0 = NULL;
847
	struct drm_display_mode *mode1 = NULL;
848
	u32 num_heads = 0, lb_size;
849
	int i;
850
 
851
	radeon_update_display_priority(rdev);
852
 
853
	for (i = 0; i < rdev->num_crtc; i++) {
854
		if (rdev->mode_info.crtcs[i]->base.enabled)
855
			num_heads++;
856
	}
857
	for (i = 0; i < rdev->num_crtc; i += 2) {
858
		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
859
		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
860
		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
861
		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
862
		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
863
		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
864
	}
865
}
866
 
867
/*
868
 * Core functions
869
 */
870
static void si_tiling_mode_table_init(struct radeon_device *rdev)
871
{
872
	const u32 num_tile_mode_states = 32;
873
	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
874
 
875
	switch (rdev->config.si.mem_row_size_in_kb) {
876
	case 1:
877
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
878
		break;
879
	case 2:
880
	default:
881
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
882
		break;
883
	case 4:
884
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
885
		break;
886
	}
887
 
888
	if ((rdev->family == CHIP_TAHITI) ||
889
	    (rdev->family == CHIP_PITCAIRN)) {
890
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
891
			switch (reg_offset) {
892
			case 0:  /* non-AA compressed depth or any compressed stencil */
893
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
894
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
895
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
896
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
897
						 NUM_BANKS(ADDR_SURF_16_BANK) |
898
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
899
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
900
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
901
				break;
902
			case 1:  /* 2xAA/4xAA compressed depth only */
903
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
904
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
905
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
906
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
907
						 NUM_BANKS(ADDR_SURF_16_BANK) |
908
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
909
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
910
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
911
				break;
912
			case 2:  /* 8xAA compressed depth only */
913
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
914
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
915
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
916
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
917
						 NUM_BANKS(ADDR_SURF_16_BANK) |
918
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
919
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
920
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
921
				break;
922
			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
923
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
924
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
925
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
926
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
927
						 NUM_BANKS(ADDR_SURF_16_BANK) |
928
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
929
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
930
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
931
				break;
932
			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
933
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
934
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
935
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
936
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
937
						 NUM_BANKS(ADDR_SURF_16_BANK) |
938
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
939
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
940
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
941
				break;
942
			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
943
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
945
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
946
						 TILE_SPLIT(split_equal_to_row_size) |
947
						 NUM_BANKS(ADDR_SURF_16_BANK) |
948
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
949
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
950
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
951
				break;
952
			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
953
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
954
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
955
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
956
						 TILE_SPLIT(split_equal_to_row_size) |
957
						 NUM_BANKS(ADDR_SURF_16_BANK) |
958
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
959
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
960
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
961
				break;
962
			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
963
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
964
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
965
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
966
						 TILE_SPLIT(split_equal_to_row_size) |
967
						 NUM_BANKS(ADDR_SURF_16_BANK) |
968
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
969
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
970
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
971
				break;
972
			case 8:  /* 1D and 1D Array Surfaces */
973
				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
974
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
975
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
976
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
977
						 NUM_BANKS(ADDR_SURF_16_BANK) |
978
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
979
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
980
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
981
				break;
982
			case 9:  /* Displayable maps. */
983
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
984
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
985
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
986
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
987
						 NUM_BANKS(ADDR_SURF_16_BANK) |
988
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
989
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
990
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
991
				break;
992
			case 10:  /* Display 8bpp. */
993
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
994
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
995
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
996
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
997
						 NUM_BANKS(ADDR_SURF_16_BANK) |
998
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
999
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1000
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1001
				break;
1002
			case 11:  /* Display 16bpp. */
1003
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1007
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1008
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1009
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1010
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1011
				break;
1012
			case 12:  /* Display 32bpp. */
1013
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1014
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1015
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1016
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1017
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1018
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1019
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1020
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1021
				break;
1022
			case 13:  /* Thin. */
1023
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1024
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1025
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1026
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1027
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1028
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1029
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1030
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1031
				break;
1032
			case 14:  /* Thin 8 bpp. */
1033
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1034
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1035
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1036
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1037
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1038
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1039
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1040
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1041
				break;
1042
			case 15:  /* Thin 16 bpp. */
1043
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1044
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1045
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1046
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1047
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1048
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1049
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1050
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1051
				break;
1052
			case 16:  /* Thin 32 bpp. */
1053
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1054
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1055
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1056
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1057
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1058
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1060
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1061
				break;
1062
			case 17:  /* Thin 64 bpp. */
1063
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1065
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1066
						 TILE_SPLIT(split_equal_to_row_size) |
1067
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1068
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1070
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1071
				break;
1072
			case 21:  /* 8 bpp PRT. */
1073
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1074
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1075
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1076
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1077
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1078
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1079
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1080
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1081
				break;
1082
			case 22:  /* 16 bpp PRT */
1083
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1085
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1086
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1087
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1088
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1090
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1091
				break;
1092
			case 23:  /* 32 bpp PRT */
1093
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1094
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1095
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1096
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1097
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1098
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1100
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1101
				break;
1102
			case 24:  /* 64 bpp PRT */
1103
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1104
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1105
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1106
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1107
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1108
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1109
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1110
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1111
				break;
1112
			case 25:  /* 128 bpp PRT */
1113
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1114
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1115
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1116
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1117
						 NUM_BANKS(ADDR_SURF_8_BANK) |
1118
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1121
				break;
1122
			default:
1123
				gb_tile_moden = 0;
1124
				break;
1125
			}
1126
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1127
		}
1128
	} else if (rdev->family == CHIP_VERDE) {
1129
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1130
			switch (reg_offset) {
1131
			case 0:  /* non-AA compressed depth or any compressed stencil */
1132
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1133
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1134
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1135
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1136
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1137
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1138
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1139
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1140
				break;
1141
			case 1:  /* 2xAA/4xAA compressed depth only */
1142
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1143
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1144
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1145
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1146
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1147
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1148
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1149
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1150
				break;
1151
			case 2:  /* 8xAA compressed depth only */
1152
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1154
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1155
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1156
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1157
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1158
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1159
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1160
				break;
1161
			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1162
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1164
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1165
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1166
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1167
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1168
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1169
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1170
				break;
1171
			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1172
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1173
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1174
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1175
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1176
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1177
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1178
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1179
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1180
				break;
1181
			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1182
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1183
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1184
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1185
						 TILE_SPLIT(split_equal_to_row_size) |
1186
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1187
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1188
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1189
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1190
				break;
1191
			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1192
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1193
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1194
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1195
						 TILE_SPLIT(split_equal_to_row_size) |
1196
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1197
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1198
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1199
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1200
				break;
1201
			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1202
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1203
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1204
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1205
						 TILE_SPLIT(split_equal_to_row_size) |
1206
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1207
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1208
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1209
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1210
				break;
1211
			case 8:  /* 1D and 1D Array Surfaces */
1212
				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1213
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1215
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1216
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1217
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1218
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1219
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1220
				break;
1221
			case 9:  /* Displayable maps. */
1222
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1223
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1224
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1225
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1226
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1227
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1228
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1229
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1230
				break;
1231
			case 10:  /* Display 8bpp. */
1232
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1233
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1234
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1235
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1236
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1237
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1238
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1239
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1240
				break;
1241
			case 11:  /* Display 16bpp. */
1242
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1243
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1244
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1245
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1246
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1247
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1248
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1249
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1250
				break;
1251
			case 12:  /* Display 32bpp. */
1252
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1253
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1254
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1255
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1256
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1257
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1258
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1259
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1260
				break;
1261
			case 13:  /* Thin. */
1262
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1263
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1264
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1265
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1266
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1267
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1268
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1269
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1270
				break;
1271
			case 14:  /* Thin 8 bpp. */
1272
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1274
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1276
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1277
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1278
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1279
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1280
				break;
1281
			case 15:  /* Thin 16 bpp. */
1282
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1283
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1284
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1285
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1286
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1287
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1288
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1289
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1290
				break;
1291
			case 16:  /* Thin 32 bpp. */
1292
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1293
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1294
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1295
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1296
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1297
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1298
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1299
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1300
				break;
1301
			case 17:  /* Thin 64 bpp. */
1302
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1303
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1304
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1305
						 TILE_SPLIT(split_equal_to_row_size) |
1306
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1307
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1308
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1309
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1310
				break;
1311
			case 21:  /* 8 bpp PRT. */
1312
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1314
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1315
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1316
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1317
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1318
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1319
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1320
				break;
1321
			case 22:  /* 16 bpp PRT */
1322
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1323
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1324
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1325
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1326
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1327
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1328
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1329
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1330
				break;
1331
			case 23:  /* 32 bpp PRT */
1332
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1333
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1334
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1335
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1336
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1337
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1338
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1339
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1340
				break;
1341
			case 24:  /* 64 bpp PRT */
1342
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1344
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1345
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1346
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1347
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1348
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1349
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1350
				break;
1351
			case 25:  /* 128 bpp PRT */
1352
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1353
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1354
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1355
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1356
						 NUM_BANKS(ADDR_SURF_8_BANK) |
1357
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1359
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1360
				break;
1361
			default:
1362
				gb_tile_moden = 0;
1363
				break;
1364
			}
1365
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1366
		}
1367
	} else
1368
		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1369
}
1370
 
1371
static void si_select_se_sh(struct radeon_device *rdev,
1372
			    u32 se_num, u32 sh_num)
1373
{
1374
	u32 data = INSTANCE_BROADCAST_WRITES;
1375
 
1376
	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1377
		data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1378
	else if (se_num == 0xffffffff)
1379
		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1380
	else if (sh_num == 0xffffffff)
1381
		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1382
	else
1383
		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1384
	WREG32(GRBM_GFX_INDEX, data);
1385
}
1386
 
1387
static u32 si_create_bitmask(u32 bit_width)
1388
{
1389
	u32 i, mask = 0;
1390
 
1391
	for (i = 0; i < bit_width; i++) {
1392
		mask <<= 1;
1393
		mask |= 1;
1394
	}
1395
	return mask;
1396
}
1397
 
1398
static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1399
{
1400
	u32 data, mask;
1401
 
1402
	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1403
	if (data & 1)
1404
		data &= INACTIVE_CUS_MASK;
1405
	else
1406
		data = 0;
1407
	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1408
 
1409
	data >>= INACTIVE_CUS_SHIFT;
1410
 
1411
	mask = si_create_bitmask(cu_per_sh);
1412
 
1413
	return ~data & mask;
1414
}
1415
 
1416
static void si_setup_spi(struct radeon_device *rdev,
1417
			 u32 se_num, u32 sh_per_se,
1418
			 u32 cu_per_sh)
1419
{
1420
	int i, j, k;
1421
	u32 data, mask, active_cu;
1422
 
1423
	for (i = 0; i < se_num; i++) {
1424
		for (j = 0; j < sh_per_se; j++) {
1425
			si_select_se_sh(rdev, i, j);
1426
			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1427
			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1428
 
1429
			mask = 1;
1430
			for (k = 0; k < 16; k++) {
1431
				mask <<= k;
1432
				if (active_cu & mask) {
1433
					data &= ~mask;
1434
					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1435
					break;
1436
				}
1437
			}
1438
		}
1439
	}
1440
	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1441
}
1442
 
1443
static u32 si_get_rb_disabled(struct radeon_device *rdev,
1444
			      u32 max_rb_num, u32 se_num,
1445
			      u32 sh_per_se)
1446
{
1447
	u32 data, mask;
1448
 
1449
	data = RREG32(CC_RB_BACKEND_DISABLE);
1450
	if (data & 1)
1451
		data &= BACKEND_DISABLE_MASK;
1452
	else
1453
		data = 0;
1454
	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1455
 
1456
	data >>= BACKEND_DISABLE_SHIFT;
1457
 
1458
	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1459
 
1460
	return data & mask;
1461
}
1462
 
1463
static void si_setup_rb(struct radeon_device *rdev,
1464
			u32 se_num, u32 sh_per_se,
1465
			u32 max_rb_num)
1466
{
1467
	int i, j;
1468
	u32 data, mask;
1469
	u32 disabled_rbs = 0;
1470
	u32 enabled_rbs = 0;
1471
 
1472
	for (i = 0; i < se_num; i++) {
1473
		for (j = 0; j < sh_per_se; j++) {
1474
			si_select_se_sh(rdev, i, j);
1475
			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1476
			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1477
		}
1478
	}
1479
	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1480
 
1481
	mask = 1;
1482
	for (i = 0; i < max_rb_num; i++) {
1483
		if (!(disabled_rbs & mask))
1484
			enabled_rbs |= mask;
1485
		mask <<= 1;
1486
	}
1487
 
1488
	for (i = 0; i < se_num; i++) {
1489
		si_select_se_sh(rdev, i, 0xffffffff);
1490
		data = 0;
1491
		for (j = 0; j < sh_per_se; j++) {
1492
			switch (enabled_rbs & 3) {
1493
			case 1:
1494
				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1495
				break;
1496
			case 2:
1497
				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1498
				break;
1499
			case 3:
1500
			default:
1501
				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1502
				break;
1503
			}
1504
			enabled_rbs >>= 2;
1505
		}
1506
		WREG32(PA_SC_RASTER_CONFIG, data);
1507
	}
1508
	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1509
}
1510
 
1511
static void si_gpu_init(struct radeon_device *rdev)
1512
{
1513
	u32 gb_addr_config = 0;
1514
	u32 mc_shared_chmap, mc_arb_ramcfg;
1515
	u32 sx_debug_1;
1516
	u32 hdp_host_path_cntl;
1517
	u32 tmp;
1518
	int i, j;
1519
 
1520
	switch (rdev->family) {
1521
	case CHIP_TAHITI:
1522
		rdev->config.si.max_shader_engines = 2;
1523
		rdev->config.si.max_tile_pipes = 12;
1524
		rdev->config.si.max_cu_per_sh = 8;
1525
		rdev->config.si.max_sh_per_se = 2;
1526
		rdev->config.si.max_backends_per_se = 4;
1527
		rdev->config.si.max_texture_channel_caches = 12;
1528
		rdev->config.si.max_gprs = 256;
1529
		rdev->config.si.max_gs_threads = 32;
1530
		rdev->config.si.max_hw_contexts = 8;
1531
 
1532
		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1533
		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1534
		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1535
		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1536
		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1537
		break;
1538
	case CHIP_PITCAIRN:
1539
		rdev->config.si.max_shader_engines = 2;
1540
		rdev->config.si.max_tile_pipes = 8;
1541
		rdev->config.si.max_cu_per_sh = 5;
1542
		rdev->config.si.max_sh_per_se = 2;
1543
		rdev->config.si.max_backends_per_se = 4;
1544
		rdev->config.si.max_texture_channel_caches = 8;
1545
		rdev->config.si.max_gprs = 256;
1546
		rdev->config.si.max_gs_threads = 32;
1547
		rdev->config.si.max_hw_contexts = 8;
1548
 
1549
		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1550
		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1551
		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1552
		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1553
		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1554
		break;
1555
	case CHIP_VERDE:
1556
	default:
1557
		rdev->config.si.max_shader_engines = 1;
1558
		rdev->config.si.max_tile_pipes = 4;
1559
		rdev->config.si.max_cu_per_sh = 2;
1560
		rdev->config.si.max_sh_per_se = 2;
1561
		rdev->config.si.max_backends_per_se = 4;
1562
		rdev->config.si.max_texture_channel_caches = 4;
1563
		rdev->config.si.max_gprs = 256;
1564
		rdev->config.si.max_gs_threads = 32;
1565
		rdev->config.si.max_hw_contexts = 8;
1566
 
1567
		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1568
		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1569
		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1570
		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1571
		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1572
		break;
1573
	}
1574
 
1575
	/* Initialize HDP */
1576
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1577
		WREG32((0x2c14 + j), 0x00000000);
1578
		WREG32((0x2c18 + j), 0x00000000);
1579
		WREG32((0x2c1c + j), 0x00000000);
1580
		WREG32((0x2c20 + j), 0x00000000);
1581
		WREG32((0x2c24 + j), 0x00000000);
1582
	}
1583
 
1584
	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1585
 
1586
	evergreen_fix_pci_max_read_req_size(rdev);
1587
 
1588
	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1589
 
1590
	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1591
	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1592
 
1593
	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1594
	rdev->config.si.mem_max_burst_length_bytes = 256;
1595
	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1596
	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1597
	if (rdev->config.si.mem_row_size_in_kb > 4)
1598
		rdev->config.si.mem_row_size_in_kb = 4;
1599
	/* XXX use MC settings? */
1600
	rdev->config.si.shader_engine_tile_size = 32;
1601
	rdev->config.si.num_gpus = 1;
1602
	rdev->config.si.multi_gpu_tile_size = 64;
1603
 
1604
	/* fix up row size */
1605
	gb_addr_config &= ~ROW_SIZE_MASK;
1606
	switch (rdev->config.si.mem_row_size_in_kb) {
1607
	case 1:
1608
	default:
1609
		gb_addr_config |= ROW_SIZE(0);
1610
		break;
1611
	case 2:
1612
		gb_addr_config |= ROW_SIZE(1);
1613
		break;
1614
	case 4:
1615
		gb_addr_config |= ROW_SIZE(2);
1616
		break;
1617
	}
1618
 
1619
	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1620
	 * not have bank info, so create a custom tiling dword.
1621
	 * bits 3:0   num_pipes
1622
	 * bits 7:4   num_banks
1623
	 * bits 11:8  group_size
1624
	 * bits 15:12 row_size
1625
	 */
1626
	rdev->config.si.tile_config = 0;
1627
	switch (rdev->config.si.num_tile_pipes) {
1628
	case 1:
1629
		rdev->config.si.tile_config |= (0 << 0);
1630
		break;
1631
	case 2:
1632
		rdev->config.si.tile_config |= (1 << 0);
1633
		break;
1634
	case 4:
1635
		rdev->config.si.tile_config |= (2 << 0);
1636
		break;
1637
	case 8:
1638
	default:
1639
		/* XXX what about 12? */
1640
		rdev->config.si.tile_config |= (3 << 0);
1641
		break;
1642
	}
1643
	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1644
	case 0: /* four banks */
1645
		rdev->config.si.tile_config |= 0 << 4;
1646
		break;
1647
	case 1: /* eight banks */
1648
		rdev->config.si.tile_config |= 1 << 4;
1649
		break;
1650
	case 2: /* sixteen banks */
1651
	default:
1652
		rdev->config.si.tile_config |= 2 << 4;
1653
		break;
1654
	}
1655
	rdev->config.si.tile_config |=
1656
		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1657
	rdev->config.si.tile_config |=
1658
		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1659
 
1660
	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1661
	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1662
	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1663
 
1664
	si_tiling_mode_table_init(rdev);
1665
 
1666
	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1667
		    rdev->config.si.max_sh_per_se,
1668
		    rdev->config.si.max_backends_per_se);
1669
 
1670
	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1671
		     rdev->config.si.max_sh_per_se,
1672
		     rdev->config.si.max_cu_per_sh);
1673
 
1674
 
1675
	/* set HW defaults for 3D engine */
1676
	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1677
				     ROQ_IB2_START(0x2b)));
1678
	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1679
 
1680
	sx_debug_1 = RREG32(SX_DEBUG_1);
1681
	WREG32(SX_DEBUG_1, sx_debug_1);
1682
 
1683
	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1684
 
1685
	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1686
				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1687
				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1688
				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1689
 
1690
	WREG32(VGT_NUM_INSTANCES, 1);
1691
 
1692
	WREG32(CP_PERFMON_CNTL, 0);
1693
 
1694
	WREG32(SQ_CONFIG, 0);
1695
 
1696
	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1697
					  FORCE_EOV_MAX_REZ_CNT(255)));
1698
 
1699
	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1700
	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1701
 
1702
	WREG32(VGT_GS_VERTEX_REUSE, 16);
1703
	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1704
 
1705
	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1706
	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1707
	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1708
	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1709
	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1710
	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1711
	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1712
	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1713
 
1714
	tmp = RREG32(HDP_MISC_CNTL);
1715
	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1716
	WREG32(HDP_MISC_CNTL, tmp);
1717
 
1718
	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1719
	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1720
 
1721
	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1722
 
1723
	udelay(50);
1724
}
1725
 
1726
/*
1727
 * GPU scratch registers helpers function.
1728
 */
1729
static void si_scratch_init(struct radeon_device *rdev)
1730
{
1731
	int i;
1732
 
1733
	rdev->scratch.num_reg = 7;
1734
	rdev->scratch.reg_base = SCRATCH_REG0;
1735
	for (i = 0; i < rdev->scratch.num_reg; i++) {
1736
		rdev->scratch.free[i] = true;
1737
		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1738
	}
1739
}
1740
 
1741
void si_fence_ring_emit(struct radeon_device *rdev,
1742
			struct radeon_fence *fence)
1743
{
1744
	struct radeon_ring *ring = &rdev->ring[fence->ring];
1745
	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1746
 
1747
	/* flush read cache over gart */
1748
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1749
	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1750
	radeon_ring_write(ring, 0);
1751
	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1752
	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1753
			  PACKET3_TC_ACTION_ENA |
1754
			  PACKET3_SH_KCACHE_ACTION_ENA |
1755
			  PACKET3_SH_ICACHE_ACTION_ENA);
1756
	radeon_ring_write(ring, 0xFFFFFFFF);
1757
	radeon_ring_write(ring, 0);
1758
	radeon_ring_write(ring, 10); /* poll interval */
1759
	/* EVENT_WRITE_EOP - flush caches, send int */
1760
	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1761
	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
1762
	radeon_ring_write(ring, addr & 0xffffffff);
1763
	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1764
	radeon_ring_write(ring, fence->seq);
1765
	radeon_ring_write(ring, 0);
1766
}
1767
 
1768
/*
1769
 * IB stuff
1770
 */
1771
void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1772
{
1773
	struct radeon_ring *ring = &rdev->ring[ib->ring];
1774
	u32 header;
1775
 
1776
	if (ib->is_const_ib) {
1777
		/* set switch buffer packet before const IB */
1778
		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1779
		radeon_ring_write(ring, 0);
1780
 
1781
		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1782
	} else {
1783
		u32 next_rptr;
1784
		if (ring->rptr_save_reg) {
1785
			next_rptr = ring->wptr + 3 + 4 + 8;
1786
			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1787
			radeon_ring_write(ring, ((ring->rptr_save_reg -
1788
						  PACKET3_SET_CONFIG_REG_START) >> 2));
1789
			radeon_ring_write(ring, next_rptr);
1790
		} else if (rdev->wb.enabled) {
1791
			next_rptr = ring->wptr + 5 + 4 + 8;
1792
			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1793
			radeon_ring_write(ring, (1 << 8));
1794
			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1795
			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1796
			radeon_ring_write(ring, next_rptr);
1797
		}
1798
 
1799
		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1800
	}
1801
 
1802
	radeon_ring_write(ring, header);
1803
	radeon_ring_write(ring,
1804
#ifdef __BIG_ENDIAN
1805
			  (2 << 0) |
1806
#endif
1807
			  (ib->gpu_addr & 0xFFFFFFFC));
1808
	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1809
	radeon_ring_write(ring, ib->length_dw |
1810
			  (ib->vm ? (ib->vm->id << 24) : 0));
1811
 
1812
	if (!ib->is_const_ib) {
1813
		/* flush read cache over gart for this vmid */
1814
		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1815
		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1816
		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1817
		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1818
		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1819
				  PACKET3_TC_ACTION_ENA |
1820
				  PACKET3_SH_KCACHE_ACTION_ENA |
1821
				  PACKET3_SH_ICACHE_ACTION_ENA);
1822
		radeon_ring_write(ring, 0xFFFFFFFF);
1823
		radeon_ring_write(ring, 0);
1824
		radeon_ring_write(ring, 10); /* poll interval */
1825
	}
1826
}
1827
 
1828
/*
1829
 * CP.
1830
 */
1831
static void si_cp_enable(struct radeon_device *rdev, bool enable)
1832
{
1833
	if (enable)
1834
		WREG32(CP_ME_CNTL, 0);
1835
	else {
1836
//       radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1837
		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1838
		WREG32(SCRATCH_UMSK, 0);
1839
	}
1840
	udelay(50);
1841
}
1842
 
1843
static int si_cp_load_microcode(struct radeon_device *rdev)
1844
{
1845
	const __be32 *fw_data;
1846
	int i;
1847
 
1848
	if (!rdev->me_fw || !rdev->pfp_fw)
1849
		return -EINVAL;
1850
 
1851
	si_cp_enable(rdev, false);
1852
 
1853
	/* PFP */
1854
	fw_data = (const __be32 *)rdev->pfp_fw->data;
1855
	WREG32(CP_PFP_UCODE_ADDR, 0);
1856
	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1857
		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1858
	WREG32(CP_PFP_UCODE_ADDR, 0);
1859
 
1860
	/* CE */
1861
	fw_data = (const __be32 *)rdev->ce_fw->data;
1862
	WREG32(CP_CE_UCODE_ADDR, 0);
1863
	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1864
		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1865
	WREG32(CP_CE_UCODE_ADDR, 0);
1866
 
1867
	/* ME */
1868
	fw_data = (const __be32 *)rdev->me_fw->data;
1869
	WREG32(CP_ME_RAM_WADDR, 0);
1870
	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1871
		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1872
	WREG32(CP_ME_RAM_WADDR, 0);
1873
 
1874
	WREG32(CP_PFP_UCODE_ADDR, 0);
1875
	WREG32(CP_CE_UCODE_ADDR, 0);
1876
	WREG32(CP_ME_RAM_WADDR, 0);
1877
	WREG32(CP_ME_RAM_RADDR, 0);
1878
	return 0;
1879
}
1880
 
1881
static int si_cp_start(struct radeon_device *rdev)
1882
{
1883
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1884
	int r, i;
1885
 
1886
	r = radeon_ring_lock(rdev, ring, 7 + 4);
1887
	if (r) {
1888
		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1889
		return r;
1890
	}
1891
	/* init the CP */
1892
	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1893
	radeon_ring_write(ring, 0x1);
1894
	radeon_ring_write(ring, 0x0);
1895
	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
1896
	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1897
	radeon_ring_write(ring, 0);
1898
	radeon_ring_write(ring, 0);
1899
 
1900
	/* init the CE partitions */
1901
	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1902
	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1903
	radeon_ring_write(ring, 0xc000);
1904
	radeon_ring_write(ring, 0xe000);
1905
	radeon_ring_unlock_commit(rdev, ring);
1906
 
1907
	si_cp_enable(rdev, true);
1908
 
1909
	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
1910
	if (r) {
1911
		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1912
		return r;
1913
	}
1914
 
1915
	/* setup clear context state */
1916
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1917
	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1918
 
1919
	for (i = 0; i < si_default_size; i++)
1920
		radeon_ring_write(ring, si_default_state[i]);
1921
 
1922
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1923
	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1924
 
1925
	/* set clear context state */
1926
	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1927
	radeon_ring_write(ring, 0);
1928
 
1929
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1930
	radeon_ring_write(ring, 0x00000316);
1931
	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1932
	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1933
 
1934
	radeon_ring_unlock_commit(rdev, ring);
1935
 
1936
	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
1937
		ring = &rdev->ring[i];
1938
		r = radeon_ring_lock(rdev, ring, 2);
1939
 
1940
		/* clear the compute context state */
1941
		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
1942
		radeon_ring_write(ring, 0);
1943
 
1944
		radeon_ring_unlock_commit(rdev, ring);
1945
	}
1946
 
1947
	return 0;
1948
}
1949
 
1950
static void si_cp_fini(struct radeon_device *rdev)
1951
{
1952
	struct radeon_ring *ring;
1953
	si_cp_enable(rdev, false);
1954
 
1955
//   ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1956
//   radeon_ring_fini(rdev, ring);
1957
//   radeon_scratch_free(rdev, ring->rptr_save_reg);
1958
 
1959
//   ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
1960
//   radeon_ring_fini(rdev, ring);
1961
//   radeon_scratch_free(rdev, ring->rptr_save_reg);
1962
 
1963
//   ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
1964
//   radeon_ring_fini(rdev, ring);
1965
//   radeon_scratch_free(rdev, ring->rptr_save_reg);
1966
}
1967
 
1968
static int si_cp_resume(struct radeon_device *rdev)
1969
{
1970
	struct radeon_ring *ring;
1971
	u32 tmp;
1972
	u32 rb_bufsz;
1973
	int r;
1974
 
1975
	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1976
	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1977
				 SOFT_RESET_PA |
1978
				 SOFT_RESET_VGT |
1979
				 SOFT_RESET_SPI |
1980
				 SOFT_RESET_SX));
1981
	RREG32(GRBM_SOFT_RESET);
1982
	mdelay(15);
1983
	WREG32(GRBM_SOFT_RESET, 0);
1984
	RREG32(GRBM_SOFT_RESET);
1985
 
1986
	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1987
	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1988
 
1989
	/* Set the write pointer delay */
1990
	WREG32(CP_RB_WPTR_DELAY, 0);
1991
 
1992
	WREG32(CP_DEBUG, 0);
1993
	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1994
 
1995
	/* ring 0 - compute and gfx */
1996
	/* Set ring buffer size */
1997
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1998
	rb_bufsz = drm_order(ring->ring_size / 8);
1999
	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2000
#ifdef __BIG_ENDIAN
2001
	tmp |= BUF_SWAP_32BIT;
2002
#endif
2003
	WREG32(CP_RB0_CNTL, tmp);
2004
 
2005
	/* Initialize the ring buffer's read and write pointers */
2006
	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2007
	ring->wptr = 0;
2008
	WREG32(CP_RB0_WPTR, ring->wptr);
2009
 
2010
	/* set the wb address wether it's enabled or not */
2011
	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2012
	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2013
 
2014
	if (rdev->wb.enabled)
2015
		WREG32(SCRATCH_UMSK, 0xff);
2016
	else {
2017
		tmp |= RB_NO_UPDATE;
2018
		WREG32(SCRATCH_UMSK, 0);
2019
	}
2020
 
2021
	mdelay(1);
2022
	WREG32(CP_RB0_CNTL, tmp);
2023
 
2024
	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2025
 
2026
	ring->rptr = RREG32(CP_RB0_RPTR);
2027
 
2028
	/* ring1  - compute only */
2029
	/* Set ring buffer size */
2030
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2031
	rb_bufsz = drm_order(ring->ring_size / 8);
2032
	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2033
#ifdef __BIG_ENDIAN
2034
	tmp |= BUF_SWAP_32BIT;
2035
#endif
2036
	WREG32(CP_RB1_CNTL, tmp);
2037
 
2038
	/* Initialize the ring buffer's read and write pointers */
2039
	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2040
	ring->wptr = 0;
2041
	WREG32(CP_RB1_WPTR, ring->wptr);
2042
 
2043
	/* set the wb address wether it's enabled or not */
2044
	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2045
	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2046
 
2047
	mdelay(1);
2048
	WREG32(CP_RB1_CNTL, tmp);
2049
 
2050
	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2051
 
2052
	ring->rptr = RREG32(CP_RB1_RPTR);
2053
 
2054
	/* ring2 - compute only */
2055
	/* Set ring buffer size */
2056
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2057
	rb_bufsz = drm_order(ring->ring_size / 8);
2058
	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2059
#ifdef __BIG_ENDIAN
2060
	tmp |= BUF_SWAP_32BIT;
2061
#endif
2062
	WREG32(CP_RB2_CNTL, tmp);
2063
 
2064
	/* Initialize the ring buffer's read and write pointers */
2065
	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2066
	ring->wptr = 0;
2067
	WREG32(CP_RB2_WPTR, ring->wptr);
2068
 
2069
	/* set the wb address wether it's enabled or not */
2070
	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2071
	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2072
 
2073
	mdelay(1);
2074
	WREG32(CP_RB2_CNTL, tmp);
2075
 
2076
	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2077
 
2078
	ring->rptr = RREG32(CP_RB2_RPTR);
2079
 
2080
	/* start the rings */
2081
	si_cp_start(rdev);
2082
	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2083
	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2084
	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2085
	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2086
	if (r) {
2087
		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2088
		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2089
		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2090
		return r;
2091
	}
2092
	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2093
	if (r) {
2094
		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2095
	}
2096
	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2097
	if (r) {
2098
		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2099
	}
2100
 
2101
	return 0;
2102
}
2103
 
2104
bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2105
{
2106
	u32 srbm_status;
2107
	u32 grbm_status, grbm_status2;
2108
	u32 grbm_status_se0, grbm_status_se1;
2109
 
2110
	srbm_status = RREG32(SRBM_STATUS);
2111
	grbm_status = RREG32(GRBM_STATUS);
2112
	grbm_status2 = RREG32(GRBM_STATUS2);
2113
	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2114
	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2115
	if (!(grbm_status & GUI_ACTIVE)) {
2116
		radeon_ring_lockup_update(ring);
2117
		return false;
2118
	}
2119
	/* force CP activities */
2120
	radeon_ring_force_activity(rdev, ring);
2121
	return radeon_ring_test_lockup(rdev, ring);
2122
}
2123
 
2124
static int si_gpu_soft_reset(struct radeon_device *rdev)
2125
{
2126
	struct evergreen_mc_save save;
2127
	u32 grbm_reset = 0;
2128
 
2129
	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2130
		return 0;
2131
 
2132
	dev_info(rdev->dev, "GPU softreset \n");
2133
	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2134
		RREG32(GRBM_STATUS));
2135
	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2136
		RREG32(GRBM_STATUS2));
2137
	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2138
		RREG32(GRBM_STATUS_SE0));
2139
	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2140
		RREG32(GRBM_STATUS_SE1));
2141
	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2142
		RREG32(SRBM_STATUS));
2143
	evergreen_mc_stop(rdev, &save);
2144
	if (radeon_mc_wait_for_idle(rdev)) {
2145
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2146
	}
2147
	/* Disable CP parsing/prefetching */
2148
	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2149
 
2150
	/* reset all the gfx blocks */
2151
	grbm_reset = (SOFT_RESET_CP |
2152
		      SOFT_RESET_CB |
2153
		      SOFT_RESET_DB |
2154
		      SOFT_RESET_GDS |
2155
		      SOFT_RESET_PA |
2156
		      SOFT_RESET_SC |
2157
		      SOFT_RESET_BCI |
2158
		      SOFT_RESET_SPI |
2159
		      SOFT_RESET_SX |
2160
		      SOFT_RESET_TC |
2161
		      SOFT_RESET_TA |
2162
		      SOFT_RESET_VGT |
2163
		      SOFT_RESET_IA);
2164
 
2165
	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2166
	WREG32(GRBM_SOFT_RESET, grbm_reset);
2167
	(void)RREG32(GRBM_SOFT_RESET);
2168
	udelay(50);
2169
	WREG32(GRBM_SOFT_RESET, 0);
2170
	(void)RREG32(GRBM_SOFT_RESET);
2171
	/* Wait a little for things to settle down */
2172
	udelay(50);
2173
	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2174
		RREG32(GRBM_STATUS));
2175
	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2176
		RREG32(GRBM_STATUS2));
2177
	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2178
		RREG32(GRBM_STATUS_SE0));
2179
	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2180
		RREG32(GRBM_STATUS_SE1));
2181
	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2182
		RREG32(SRBM_STATUS));
2183
	evergreen_mc_resume(rdev, &save);
2184
	return 0;
2185
}
2186
 
2187
int si_asic_reset(struct radeon_device *rdev)
2188
{
2189
	return si_gpu_soft_reset(rdev);
2190
}
2191
 
2192
/* MC */
2193
static void si_mc_program(struct radeon_device *rdev)
2194
{
2195
	struct evergreen_mc_save save;
2196
	u32 tmp;
2197
	int i, j;
2198
 
2199
	/* Initialize HDP */
2200
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2201
		WREG32((0x2c14 + j), 0x00000000);
2202
		WREG32((0x2c18 + j), 0x00000000);
2203
		WREG32((0x2c1c + j), 0x00000000);
2204
		WREG32((0x2c20 + j), 0x00000000);
2205
		WREG32((0x2c24 + j), 0x00000000);
2206
	}
2207
	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2208
 
2209
	evergreen_mc_stop(rdev, &save);
2210
	if (radeon_mc_wait_for_idle(rdev)) {
2211
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2212
	}
2213
	/* Lockout access through VGA aperture*/
2214
	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2215
	/* Update configuration */
2216
	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2217
	       rdev->mc.vram_start >> 12);
2218
	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2219
	       rdev->mc.vram_end >> 12);
2220
	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2221
	       rdev->vram_scratch.gpu_addr >> 12);
2222
	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2223
	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2224
	WREG32(MC_VM_FB_LOCATION, tmp);
2225
	/* XXX double check these! */
2226
	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2227
	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2228
	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2229
	WREG32(MC_VM_AGP_BASE, 0);
2230
	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2231
	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2232
	if (radeon_mc_wait_for_idle(rdev)) {
2233
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2234
	}
2235
	evergreen_mc_resume(rdev, &save);
2236
	/* we need to own VRAM, so turn off the VGA renderer here
2237
	 * to stop it overwriting our objects */
2238
	rv515_vga_render_disable(rdev);
2239
}
2240
 
2241
/* SI MC address space is 40 bits */
2242
static void si_vram_location(struct radeon_device *rdev,
2243
			     struct radeon_mc *mc, u64 base)
2244
{
2245
	mc->vram_start = base;
2246
	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2247
		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2248
		mc->real_vram_size = mc->aper_size;
2249
		mc->mc_vram_size = mc->aper_size;
2250
	}
2251
	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2252
	dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
2253
			mc->mc_vram_size >> 20, mc->vram_start,
2254
			mc->vram_end, mc->real_vram_size >> 20);
2255
}
2256
 
2257
static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2258
{
2259
	u64 size_af, size_bf;
2260
 
2261
	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2262
	size_bf = mc->vram_start & ~mc->gtt_base_align;
2263
	if (size_bf > size_af) {
2264
		if (mc->gtt_size > size_bf) {
2265
			dev_warn(rdev->dev, "limiting GTT\n");
2266
			mc->gtt_size = size_bf;
2267
		}
2268
		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2269
	} else {
2270
		if (mc->gtt_size > size_af) {
2271
			dev_warn(rdev->dev, "limiting GTT\n");
2272
			mc->gtt_size = size_af;
2273
		}
2274
		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2275
	}
2276
	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2277
	dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
2278
			mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
2279
}
2280
 
2281
static void si_vram_gtt_location(struct radeon_device *rdev,
2282
				 struct radeon_mc *mc)
2283
{
2284
	if (mc->mc_vram_size > 0xFFC0000000ULL) {
2285
		/* leave room for at least 1024M GTT */
2286
		dev_warn(rdev->dev, "limiting VRAM\n");
2287
		mc->real_vram_size = 0xFFC0000000ULL;
2288
		mc->mc_vram_size = 0xFFC0000000ULL;
2289
	}
2290
	si_vram_location(rdev, &rdev->mc, 0);
2291
	rdev->mc.gtt_base_align = 0;
2292
	si_gtt_location(rdev, mc);
2293
}
2294
 
2295
static int si_mc_init(struct radeon_device *rdev)
2296
{
2297
	u32 tmp;
2298
	int chansize, numchan;
2299
 
2300
	/* Get VRAM informations */
2301
	rdev->mc.vram_is_ddr = true;
2302
	tmp = RREG32(MC_ARB_RAMCFG);
2303
	if (tmp & CHANSIZE_OVERRIDE) {
2304
		chansize = 16;
2305
	} else if (tmp & CHANSIZE_MASK) {
2306
		chansize = 64;
2307
	} else {
2308
		chansize = 32;
2309
	}
2310
	tmp = RREG32(MC_SHARED_CHMAP);
2311
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2312
	case 0:
2313
	default:
2314
		numchan = 1;
2315
		break;
2316
	case 1:
2317
		numchan = 2;
2318
		break;
2319
	case 2:
2320
		numchan = 4;
2321
		break;
2322
	case 3:
2323
		numchan = 8;
2324
		break;
2325
	case 4:
2326
		numchan = 3;
2327
		break;
2328
	case 5:
2329
		numchan = 6;
2330
		break;
2331
	case 6:
2332
		numchan = 10;
2333
		break;
2334
	case 7:
2335
		numchan = 12;
2336
		break;
2337
	case 8:
2338
		numchan = 16;
2339
		break;
2340
	}
2341
	rdev->mc.vram_width = numchan * chansize;
2342
	/* Could aper size report 0 ? */
2343
	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2344
	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2345
	/* size in MB on si */
2346
	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2347
	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2348
	rdev->mc.visible_vram_size = rdev->mc.aper_size;
2349
	si_vram_gtt_location(rdev, &rdev->mc);
2350
	radeon_update_bandwidth_info(rdev);
2351
 
2352
	return 0;
2353
}
2354
 
2355
/*
2356
 * GART
2357
 */
2358
void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
2359
{
2360
	/* flush hdp cache */
2361
	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2362
 
2363
	/* bits 0-15 are the VM contexts0-15 */
2364
	WREG32(VM_INVALIDATE_REQUEST, 1);
2365
}
2366
 
2367
static int si_pcie_gart_enable(struct radeon_device *rdev)
2368
{
2369
	int r, i;
2370
 
2371
	if (rdev->gart.robj == NULL) {
2372
		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2373
		return -EINVAL;
2374
	}
2375
	r = radeon_gart_table_vram_pin(rdev);
2376
	if (r)
2377
		return r;
2378
	radeon_gart_restore(rdev);
2379
	/* Setup TLB control */
2380
	WREG32(MC_VM_MX_L1_TLB_CNTL,
2381
	       (0xA << 7) |
2382
	       ENABLE_L1_TLB |
2383
	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2384
	       ENABLE_ADVANCED_DRIVER_MODEL |
2385
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2386
	/* Setup L2 cache */
2387
	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2388
	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2389
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2390
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2391
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2392
	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2393
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2394
	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2395
	/* setup context0 */
2396
	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2397
	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2398
	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2399
	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2400
			(u32)(rdev->dummy_page.addr >> 12));
2401
	WREG32(VM_CONTEXT0_CNTL2, 0);
2402
	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2403
				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2404
 
2405
	WREG32(0x15D4, 0);
2406
	WREG32(0x15D8, 0);
2407
	WREG32(0x15DC, 0);
2408
 
2409
	/* empty context1-15 */
2410
	/* set vm size, must be a multiple of 4 */
2411
	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2412
	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2413
	/* Assign the pt base to something valid for now; the pts used for
2414
	 * the VMs are determined by the application and setup and assigned
2415
	 * on the fly in the vm part of radeon_gart.c
2416
	 */
2417
	for (i = 1; i < 16; i++) {
2418
		if (i < 8)
2419
			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2420
			       rdev->gart.table_addr >> 12);
2421
		else
2422
			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2423
			       rdev->gart.table_addr >> 12);
2424
	}
2425
 
2426
	/* enable context1-15 */
2427
	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2428
	       (u32)(rdev->dummy_page.addr >> 12));
2429
	WREG32(VM_CONTEXT1_CNTL2, 0);
2430
	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
2431
				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
2432
 
2433
	si_pcie_gart_tlb_flush(rdev);
2434
	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2435
		 (unsigned)(rdev->mc.gtt_size >> 20),
2436
		 (unsigned long long)rdev->gart.table_addr);
2437
	rdev->gart.ready = true;
2438
	return 0;
2439
}
2440
 
2441
static void si_pcie_gart_disable(struct radeon_device *rdev)
2442
{
2443
	/* Disable all tables */
2444
	WREG32(VM_CONTEXT0_CNTL, 0);
2445
	WREG32(VM_CONTEXT1_CNTL, 0);
2446
	/* Setup TLB control */
2447
	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2448
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2449
	/* Setup L2 cache */
2450
	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2451
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2452
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2453
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2454
	WREG32(VM_L2_CNTL2, 0);
2455
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2456
	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2457
	radeon_gart_table_vram_unpin(rdev);
2458
}
2459
 
2460
static void si_pcie_gart_fini(struct radeon_device *rdev)
2461
{
2462
	si_pcie_gart_disable(rdev);
2463
	radeon_gart_table_vram_free(rdev);
2464
//   radeon_gart_fini(rdev);
2465
}
2466
 
2467
/* vm parser */
2468
static bool si_vm_reg_valid(u32 reg)
2469
{
2470
	/* context regs are fine */
2471
	if (reg >= 0x28000)
2472
		return true;
2473
 
2474
	/* check config regs */
2475
	switch (reg) {
2476
	case GRBM_GFX_INDEX:
3031 serge 2477
	case CP_STRMOUT_CNTL:
2997 Serge 2478
	case VGT_VTX_VECT_EJECT_REG:
2479
	case VGT_CACHE_INVALIDATION:
2480
	case VGT_ESGS_RING_SIZE:
2481
	case VGT_GSVS_RING_SIZE:
2482
	case VGT_GS_VERTEX_REUSE:
2483
	case VGT_PRIMITIVE_TYPE:
2484
	case VGT_INDEX_TYPE:
2485
	case VGT_NUM_INDICES:
2486
	case VGT_NUM_INSTANCES:
2487
	case VGT_TF_RING_SIZE:
2488
	case VGT_HS_OFFCHIP_PARAM:
2489
	case VGT_TF_MEMORY_BASE:
2490
	case PA_CL_ENHANCE:
2491
	case PA_SU_LINE_STIPPLE_VALUE:
2492
	case PA_SC_LINE_STIPPLE_STATE:
2493
	case PA_SC_ENHANCE:
2494
	case SQC_CACHES:
2495
	case SPI_STATIC_THREAD_MGMT_1:
2496
	case SPI_STATIC_THREAD_MGMT_2:
2497
	case SPI_STATIC_THREAD_MGMT_3:
2498
	case SPI_PS_MAX_WAVE_ID:
2499
	case SPI_CONFIG_CNTL:
2500
	case SPI_CONFIG_CNTL_1:
2501
	case TA_CNTL_AUX:
2502
		return true;
2503
	default:
2504
		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2505
		return false;
2506
	}
2507
}
2508
 
2509
static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2510
				  u32 *ib, struct radeon_cs_packet *pkt)
2511
{
2512
	switch (pkt->opcode) {
2513
	case PACKET3_NOP:
2514
	case PACKET3_SET_BASE:
2515
	case PACKET3_SET_CE_DE_COUNTERS:
2516
	case PACKET3_LOAD_CONST_RAM:
2517
	case PACKET3_WRITE_CONST_RAM:
2518
	case PACKET3_WRITE_CONST_RAM_OFFSET:
2519
	case PACKET3_DUMP_CONST_RAM:
2520
	case PACKET3_INCREMENT_CE_COUNTER:
2521
	case PACKET3_WAIT_ON_DE_COUNTER:
2522
	case PACKET3_CE_WRITE:
2523
		break;
2524
	default:
2525
		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2526
		return -EINVAL;
2527
	}
2528
	return 0;
2529
}
2530
 
2531
static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2532
				   u32 *ib, struct radeon_cs_packet *pkt)
2533
{
2534
	u32 idx = pkt->idx + 1;
2535
	u32 idx_value = ib[idx];
2536
	u32 start_reg, end_reg, reg, i;
2537
 
2538
	switch (pkt->opcode) {
2539
	case PACKET3_NOP:
2540
	case PACKET3_SET_BASE:
2541
	case PACKET3_CLEAR_STATE:
2542
	case PACKET3_INDEX_BUFFER_SIZE:
2543
	case PACKET3_DISPATCH_DIRECT:
2544
	case PACKET3_DISPATCH_INDIRECT:
2545
	case PACKET3_ALLOC_GDS:
2546
	case PACKET3_WRITE_GDS_RAM:
2547
	case PACKET3_ATOMIC_GDS:
2548
	case PACKET3_ATOMIC:
2549
	case PACKET3_OCCLUSION_QUERY:
2550
	case PACKET3_SET_PREDICATION:
2551
	case PACKET3_COND_EXEC:
2552
	case PACKET3_PRED_EXEC:
2553
	case PACKET3_DRAW_INDIRECT:
2554
	case PACKET3_DRAW_INDEX_INDIRECT:
2555
	case PACKET3_INDEX_BASE:
2556
	case PACKET3_DRAW_INDEX_2:
2557
	case PACKET3_CONTEXT_CONTROL:
2558
	case PACKET3_INDEX_TYPE:
2559
	case PACKET3_DRAW_INDIRECT_MULTI:
2560
	case PACKET3_DRAW_INDEX_AUTO:
2561
	case PACKET3_DRAW_INDEX_IMMD:
2562
	case PACKET3_NUM_INSTANCES:
2563
	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2564
	case PACKET3_STRMOUT_BUFFER_UPDATE:
2565
	case PACKET3_DRAW_INDEX_OFFSET_2:
2566
	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2567
	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2568
	case PACKET3_MPEG_INDEX:
2569
	case PACKET3_WAIT_REG_MEM:
2570
	case PACKET3_MEM_WRITE:
2571
	case PACKET3_PFP_SYNC_ME:
2572
	case PACKET3_SURFACE_SYNC:
2573
	case PACKET3_EVENT_WRITE:
2574
	case PACKET3_EVENT_WRITE_EOP:
2575
	case PACKET3_EVENT_WRITE_EOS:
2576
	case PACKET3_SET_CONTEXT_REG:
2577
	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2578
	case PACKET3_SET_SH_REG:
2579
	case PACKET3_SET_SH_REG_OFFSET:
2580
	case PACKET3_INCREMENT_DE_COUNTER:
2581
	case PACKET3_WAIT_ON_CE_COUNTER:
2582
	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2583
	case PACKET3_ME_WRITE:
2584
		break;
2585
	case PACKET3_COPY_DATA:
2586
		if ((idx_value & 0xf00) == 0) {
2587
			reg = ib[idx + 3] * 4;
2588
			if (!si_vm_reg_valid(reg))
2589
				return -EINVAL;
2590
		}
2591
		break;
2592
	case PACKET3_WRITE_DATA:
2593
		if ((idx_value & 0xf00) == 0) {
2594
			start_reg = ib[idx + 1] * 4;
2595
			if (idx_value & 0x10000) {
2596
				if (!si_vm_reg_valid(start_reg))
2597
					return -EINVAL;
2598
			} else {
2599
				for (i = 0; i < (pkt->count - 2); i++) {
2600
					reg = start_reg + (4 * i);
2601
					if (!si_vm_reg_valid(reg))
2602
						return -EINVAL;
2603
				}
2604
			}
2605
		}
2606
		break;
2607
	case PACKET3_COND_WRITE:
2608
		if (idx_value & 0x100) {
2609
			reg = ib[idx + 5] * 4;
2610
			if (!si_vm_reg_valid(reg))
2611
				return -EINVAL;
2612
		}
2613
		break;
2614
	case PACKET3_COPY_DW:
2615
		if (idx_value & 0x2) {
2616
			reg = ib[idx + 3] * 4;
2617
			if (!si_vm_reg_valid(reg))
2618
				return -EINVAL;
2619
		}
2620
		break;
2621
	case PACKET3_SET_CONFIG_REG:
2622
		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2623
		end_reg = 4 * pkt->count + start_reg - 4;
2624
		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2625
		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2626
		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2627
			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2628
			return -EINVAL;
2629
		}
2630
		for (i = 0; i < pkt->count; i++) {
2631
			reg = start_reg + (4 * i);
2632
			if (!si_vm_reg_valid(reg))
2633
				return -EINVAL;
2634
		}
2635
		break;
2636
	default:
2637
		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2638
		return -EINVAL;
2639
	}
2640
	return 0;
2641
}
2642
 
2643
static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2644
				       u32 *ib, struct radeon_cs_packet *pkt)
2645
{
2646
	u32 idx = pkt->idx + 1;
2647
	u32 idx_value = ib[idx];
2648
	u32 start_reg, reg, i;
2649
 
2650
	switch (pkt->opcode) {
2651
	case PACKET3_NOP:
2652
	case PACKET3_SET_BASE:
2653
	case PACKET3_CLEAR_STATE:
2654
	case PACKET3_DISPATCH_DIRECT:
2655
	case PACKET3_DISPATCH_INDIRECT:
2656
	case PACKET3_ALLOC_GDS:
2657
	case PACKET3_WRITE_GDS_RAM:
2658
	case PACKET3_ATOMIC_GDS:
2659
	case PACKET3_ATOMIC:
2660
	case PACKET3_OCCLUSION_QUERY:
2661
	case PACKET3_SET_PREDICATION:
2662
	case PACKET3_COND_EXEC:
2663
	case PACKET3_PRED_EXEC:
2664
	case PACKET3_CONTEXT_CONTROL:
2665
	case PACKET3_STRMOUT_BUFFER_UPDATE:
2666
	case PACKET3_WAIT_REG_MEM:
2667
	case PACKET3_MEM_WRITE:
2668
	case PACKET3_PFP_SYNC_ME:
2669
	case PACKET3_SURFACE_SYNC:
2670
	case PACKET3_EVENT_WRITE:
2671
	case PACKET3_EVENT_WRITE_EOP:
2672
	case PACKET3_EVENT_WRITE_EOS:
2673
	case PACKET3_SET_CONTEXT_REG:
2674
	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2675
	case PACKET3_SET_SH_REG:
2676
	case PACKET3_SET_SH_REG_OFFSET:
2677
	case PACKET3_INCREMENT_DE_COUNTER:
2678
	case PACKET3_WAIT_ON_CE_COUNTER:
2679
	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2680
	case PACKET3_ME_WRITE:
2681
		break;
2682
	case PACKET3_COPY_DATA:
2683
		if ((idx_value & 0xf00) == 0) {
2684
			reg = ib[idx + 3] * 4;
2685
			if (!si_vm_reg_valid(reg))
2686
				return -EINVAL;
2687
		}
2688
		break;
2689
	case PACKET3_WRITE_DATA:
2690
		if ((idx_value & 0xf00) == 0) {
2691
			start_reg = ib[idx + 1] * 4;
2692
			if (idx_value & 0x10000) {
2693
				if (!si_vm_reg_valid(start_reg))
2694
					return -EINVAL;
2695
			} else {
2696
				for (i = 0; i < (pkt->count - 2); i++) {
2697
					reg = start_reg + (4 * i);
2698
					if (!si_vm_reg_valid(reg))
2699
						return -EINVAL;
2700
				}
2701
			}
2702
		}
2703
		break;
2704
	case PACKET3_COND_WRITE:
2705
		if (idx_value & 0x100) {
2706
			reg = ib[idx + 5] * 4;
2707
			if (!si_vm_reg_valid(reg))
2708
				return -EINVAL;
2709
		}
2710
		break;
2711
	case PACKET3_COPY_DW:
2712
		if (idx_value & 0x2) {
2713
			reg = ib[idx + 3] * 4;
2714
			if (!si_vm_reg_valid(reg))
2715
				return -EINVAL;
2716
		}
2717
		break;
2718
	default:
2719
		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2720
		return -EINVAL;
2721
	}
2722
	return 0;
2723
}
2724
 
2725
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2726
{
2727
	int ret = 0;
2728
	u32 idx = 0;
2729
	struct radeon_cs_packet pkt;
2730
 
2731
	do {
2732
		pkt.idx = idx;
2733
		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2734
		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2735
		pkt.one_reg_wr = 0;
2736
		switch (pkt.type) {
2737
		case PACKET_TYPE0:
2738
			dev_err(rdev->dev, "Packet0 not allowed!\n");
2739
			ret = -EINVAL;
2740
			break;
2741
		case PACKET_TYPE2:
2742
			idx += 1;
2743
			break;
2744
		case PACKET_TYPE3:
2745
			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2746
			if (ib->is_const_ib)
2747
				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2748
			else {
2749
				switch (ib->ring) {
2750
				case RADEON_RING_TYPE_GFX_INDEX:
2751
					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2752
					break;
2753
				case CAYMAN_RING_TYPE_CP1_INDEX:
2754
				case CAYMAN_RING_TYPE_CP2_INDEX:
2755
					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2756
					break;
2757
				default:
2758
					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
2759
					ret = -EINVAL;
2760
					break;
2761
				}
2762
			}
2763
			idx += pkt.count + 2;
2764
			break;
2765
		default:
2766
			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2767
			ret = -EINVAL;
2768
			break;
2769
		}
2770
		if (ret)
2771
			break;
2772
	} while (idx < ib->length_dw);
2773
 
2774
	return ret;
2775
}
2776
 
2777
/*
2778
 * vm
2779
 */
2780
int si_vm_init(struct radeon_device *rdev)
2781
{
2782
	/* number of VMs */
2783
	rdev->vm_manager.nvm = 16;
2784
	/* base offset of vram pages */
2785
	rdev->vm_manager.vram_base_offset = 0;
2786
 
2787
	return 0;
2788
}
2789
 
2790
void si_vm_fini(struct radeon_device *rdev)
2791
{
2792
}
2793
 
2794
/**
2795
 * si_vm_set_page - update the page tables using the CP
2796
 *
2797
 * @rdev: radeon_device pointer
2798
 * @pe: addr of the page entry
2799
 * @addr: dst addr to write into pe
2800
 * @count: number of page entries to update
2801
 * @incr: increase next addr by incr bytes
2802
 * @flags: access flags
2803
 *
2804
 * Update the page tables using the CP (cayman-si).
2805
 */
2806
void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
2807
		    uint64_t addr, unsigned count,
2808
		    uint32_t incr, uint32_t flags)
2809
{
2810
	struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
2811
	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2812
 
2813
	while (count) {
2814
		unsigned ndw = 2 + count * 2;
2815
		if (ndw > 0x3FFE)
2816
			ndw = 0x3FFE;
2817
 
2818
		radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
2819
		radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2820
					 WRITE_DATA_DST_SEL(1)));
2821
		radeon_ring_write(ring, pe);
2822
		radeon_ring_write(ring, upper_32_bits(pe));
2823
		for (; ndw > 2; ndw -= 2, --count, pe += 8) {
2824
			uint64_t value;
2825
			if (flags & RADEON_VM_PAGE_SYSTEM) {
2826
				value = radeon_vm_map_gart(rdev, addr);
2827
				value &= 0xFFFFFFFFFFFFF000ULL;
2828
			} else if (flags & RADEON_VM_PAGE_VALID)
2829
				value = addr;
2830
			else
2831
				value = 0;
2832
			addr += incr;
2833
			value |= r600_flags;
2834
			radeon_ring_write(ring, value);
2835
			radeon_ring_write(ring, upper_32_bits(value));
2836
		}
2837
	}
2838
}
2839
 
2840
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2841
{
2842
	struct radeon_ring *ring = &rdev->ring[ridx];
2843
 
2844
	if (vm == NULL)
2845
		return;
2846
 
2847
	/* write new base address */
2848
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2849
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2850
				 WRITE_DATA_DST_SEL(0)));
2851
 
2852
	if (vm->id < 8) {
2853
		radeon_ring_write(ring,
2854
				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
2855
	} else {
2856
		radeon_ring_write(ring,
2857
				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
2858
	}
2859
	radeon_ring_write(ring, 0);
2860
	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2861
 
2862
	/* flush hdp cache */
2863
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2864
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2865
				 WRITE_DATA_DST_SEL(0)));
2866
	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2867
	radeon_ring_write(ring, 0);
2868
	radeon_ring_write(ring, 0x1);
2869
 
2870
	/* bits 0-15 are the VM contexts0-15 */
2871
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2872
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2873
				 WRITE_DATA_DST_SEL(0)));
2874
	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
2875
	radeon_ring_write(ring, 0);
2876
	radeon_ring_write(ring, 1 << vm->id);
2877
 
2878
	/* sync PFP to ME, otherwise we might get invalid PFP reads */
2879
	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2880
	radeon_ring_write(ring, 0x0);
2881
}
2882
 
2883
/*
2884
 * RLC
2885
 */
2886
void si_rlc_fini(struct radeon_device *rdev)
2887
{
2888
	int r;
2889
 
2890
	/* save restore block */
2891
	if (rdev->rlc.save_restore_obj) {
2892
		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2893
		if (unlikely(r != 0))
2894
			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
2895
		radeon_bo_unpin(rdev->rlc.save_restore_obj);
2896
		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
2897
 
2898
		radeon_bo_unref(&rdev->rlc.save_restore_obj);
2899
		rdev->rlc.save_restore_obj = NULL;
2900
	}
2901
 
2902
	/* clear state block */
2903
	if (rdev->rlc.clear_state_obj) {
2904
		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
2905
		if (unlikely(r != 0))
2906
			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
2907
		radeon_bo_unpin(rdev->rlc.clear_state_obj);
2908
		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
2909
 
2910
		radeon_bo_unref(&rdev->rlc.clear_state_obj);
2911
		rdev->rlc.clear_state_obj = NULL;
2912
	}
2913
}
2914
 
2915
int si_rlc_init(struct radeon_device *rdev)
2916
{
2917
	int r;
2918
 
2919
	/* save restore block */
2920
	if (rdev->rlc.save_restore_obj == NULL) {
2921
		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
2922
				     RADEON_GEM_DOMAIN_VRAM, NULL,
2923
				     &rdev->rlc.save_restore_obj);
2924
		if (r) {
2925
			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
2926
			return r;
2927
		}
2928
	}
2929
 
2930
	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2931
	if (unlikely(r != 0)) {
2932
		si_rlc_fini(rdev);
2933
		return r;
2934
	}
2935
	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
2936
			  &rdev->rlc.save_restore_gpu_addr);
2937
	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
2938
	if (r) {
2939
		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
2940
		si_rlc_fini(rdev);
2941
		return r;
2942
	}
2943
 
2944
	/* clear state block */
2945
	if (rdev->rlc.clear_state_obj == NULL) {
2946
		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
2947
				     RADEON_GEM_DOMAIN_VRAM, NULL,
2948
				     &rdev->rlc.clear_state_obj);
2949
		if (r) {
2950
			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
2951
			si_rlc_fini(rdev);
2952
			return r;
2953
		}
2954
	}
2955
	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
2956
	if (unlikely(r != 0)) {
2957
		si_rlc_fini(rdev);
2958
		return r;
2959
	}
2960
	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
2961
			  &rdev->rlc.clear_state_gpu_addr);
2962
	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
2963
	if (r) {
2964
		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
2965
		si_rlc_fini(rdev);
2966
		return r;
2967
	}
2968
 
2969
	return 0;
2970
}
2971
 
2972
static void si_rlc_stop(struct radeon_device *rdev)
2973
{
2974
	WREG32(RLC_CNTL, 0);
2975
}
2976
 
2977
static void si_rlc_start(struct radeon_device *rdev)
2978
{
2979
	WREG32(RLC_CNTL, RLC_ENABLE);
2980
}
2981
 
2982
static int si_rlc_resume(struct radeon_device *rdev)
2983
{
2984
	u32 i;
2985
	const __be32 *fw_data;
2986
 
2987
	if (!rdev->rlc_fw)
2988
		return -EINVAL;
2989
 
2990
	si_rlc_stop(rdev);
2991
 
2992
	WREG32(RLC_RL_BASE, 0);
2993
	WREG32(RLC_RL_SIZE, 0);
2994
	WREG32(RLC_LB_CNTL, 0);
2995
	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
2996
	WREG32(RLC_LB_CNTR_INIT, 0);
2997
 
2998
	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
2999
	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
3000
 
3001
	WREG32(RLC_MC_CNTL, 0);
3002
	WREG32(RLC_UCODE_CNTL, 0);
3003
 
3004
	fw_data = (const __be32 *)rdev->rlc_fw->data;
3005
	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
3006
		WREG32(RLC_UCODE_ADDR, i);
3007
		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
3008
	}
3009
	WREG32(RLC_UCODE_ADDR, 0);
3010
 
3011
	si_rlc_start(rdev);
3012
 
3013
	return 0;
3014
}
3015
 
3016
static void si_enable_interrupts(struct radeon_device *rdev)
3017
{
3018
	u32 ih_cntl = RREG32(IH_CNTL);
3019
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3020
 
3021
	ih_cntl |= ENABLE_INTR;
3022
	ih_rb_cntl |= IH_RB_ENABLE;
3023
	WREG32(IH_CNTL, ih_cntl);
3024
	WREG32(IH_RB_CNTL, ih_rb_cntl);
3025
	rdev->ih.enabled = true;
3026
}
3027
 
3028
static void si_disable_interrupts(struct radeon_device *rdev)
3029
{
3030
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3031
	u32 ih_cntl = RREG32(IH_CNTL);
3032
 
3033
	ih_rb_cntl &= ~IH_RB_ENABLE;
3034
	ih_cntl &= ~ENABLE_INTR;
3035
	WREG32(IH_RB_CNTL, ih_rb_cntl);
3036
	WREG32(IH_CNTL, ih_cntl);
3037
	/* set rptr, wptr to 0 */
3038
	WREG32(IH_RB_RPTR, 0);
3039
	WREG32(IH_RB_WPTR, 0);
3040
	rdev->ih.enabled = false;
3041
	rdev->ih.rptr = 0;
3042
}
3043
 
3044
static void si_disable_interrupt_state(struct radeon_device *rdev)
3045
{
3046
	u32 tmp;
3047
 
3048
	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3049
	WREG32(CP_INT_CNTL_RING1, 0);
3050
	WREG32(CP_INT_CNTL_RING2, 0);
3051
	WREG32(GRBM_INT_CNTL, 0);
3052
	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3053
	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3054
	if (rdev->num_crtc >= 4) {
3055
		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3056
		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3057
	}
3058
	if (rdev->num_crtc >= 6) {
3059
		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3060
		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3061
	}
3062
 
3063
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3064
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3065
	if (rdev->num_crtc >= 4) {
3066
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3067
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3068
	}
3069
	if (rdev->num_crtc >= 6) {
3070
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3071
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3072
	}
3073
 
3074
	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
3075
 
3076
	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3077
	WREG32(DC_HPD1_INT_CONTROL, tmp);
3078
	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3079
	WREG32(DC_HPD2_INT_CONTROL, tmp);
3080
	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3081
	WREG32(DC_HPD3_INT_CONTROL, tmp);
3082
	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3083
	WREG32(DC_HPD4_INT_CONTROL, tmp);
3084
	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3085
	WREG32(DC_HPD5_INT_CONTROL, tmp);
3086
	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3087
	WREG32(DC_HPD6_INT_CONTROL, tmp);
3088
 
3089
}
3090
 
3091
static int si_irq_init(struct radeon_device *rdev)
3092
{
3093
	int ret = 0;
3094
	int rb_bufsz;
3095
	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3096
 
3097
	/* allocate ring */
3098
	ret = r600_ih_ring_alloc(rdev);
3099
	if (ret)
3100
		return ret;
3101
 
3102
	/* disable irqs */
3103
	si_disable_interrupts(rdev);
3104
 
3105
	/* init rlc */
3106
	ret = si_rlc_resume(rdev);
3107
	if (ret) {
3108
		r600_ih_ring_fini(rdev);
3109
		return ret;
3110
	}
3111
 
3112
	/* setup interrupt control */
3113
	/* set dummy read address to ring address */
3114
	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3115
	interrupt_cntl = RREG32(INTERRUPT_CNTL);
3116
	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3117
	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3118
	 */
3119
	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3120
	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3121
	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3122
	WREG32(INTERRUPT_CNTL, interrupt_cntl);
3123
 
3124
	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3125
	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3126
 
3127
	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3128
		      IH_WPTR_OVERFLOW_CLEAR |
3129
		      (rb_bufsz << 1));
3130
 
3131
	if (rdev->wb.enabled)
3132
		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3133
 
3134
	/* set the writeback address whether it's enabled or not */
3135
	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3136
	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3137
 
3138
	WREG32(IH_RB_CNTL, ih_rb_cntl);
3139
 
3140
	/* set rptr, wptr to 0 */
3141
	WREG32(IH_RB_RPTR, 0);
3142
	WREG32(IH_RB_WPTR, 0);
3143
 
3144
	/* Default settings for IH_CNTL (disabled at first) */
3145
	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3146
	/* RPTR_REARM only works if msi's are enabled */
3147
	if (rdev->msi_enabled)
3148
		ih_cntl |= RPTR_REARM;
3149
	WREG32(IH_CNTL, ih_cntl);
3150
 
3151
	/* force the active interrupt state to all disabled */
3152
	si_disable_interrupt_state(rdev);
3153
 
3154
	pci_set_master(rdev->pdev);
3155
 
3156
	/* enable irqs */
3157
	si_enable_interrupts(rdev);
3158
 
3159
	return ret;
3160
}
3161
 
3162
int si_irq_set(struct radeon_device *rdev)
3163
{
3164
	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
3165
	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
3166
	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3167
	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3168
	u32 grbm_int_cntl = 0;
3169
	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3170
 
3171
	if (!rdev->irq.installed) {
3172
		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3173
		return -EINVAL;
3174
	}
3175
	/* don't enable anything if the ih is disabled */
3176
	if (!rdev->ih.enabled) {
3177
		si_disable_interrupts(rdev);
3178
		/* force the active interrupt state to all disabled */
3179
		si_disable_interrupt_state(rdev);
3180
		return 0;
3181
	}
3182
 
3183
	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3184
	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3185
	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3186
	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3187
	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3188
	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3189
 
3190
	/* enable CP interrupts on all rings */
3191
	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3192
		DRM_DEBUG("si_irq_set: sw int gfx\n");
3193
		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3194
	}
3195
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
3196
		DRM_DEBUG("si_irq_set: sw int cp1\n");
3197
		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
3198
	}
3199
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
3200
		DRM_DEBUG("si_irq_set: sw int cp2\n");
3201
		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3202
	}
3203
	if (rdev->irq.crtc_vblank_int[0] ||
3204
	    atomic_read(&rdev->irq.pflip[0])) {
3205
		DRM_DEBUG("si_irq_set: vblank 0\n");
3206
		crtc1 |= VBLANK_INT_MASK;
3207
	}
3208
	if (rdev->irq.crtc_vblank_int[1] ||
3209
	    atomic_read(&rdev->irq.pflip[1])) {
3210
		DRM_DEBUG("si_irq_set: vblank 1\n");
3211
		crtc2 |= VBLANK_INT_MASK;
3212
	}
3213
	if (rdev->irq.crtc_vblank_int[2] ||
3214
	    atomic_read(&rdev->irq.pflip[2])) {
3215
		DRM_DEBUG("si_irq_set: vblank 2\n");
3216
		crtc3 |= VBLANK_INT_MASK;
3217
	}
3218
	if (rdev->irq.crtc_vblank_int[3] ||
3219
	    atomic_read(&rdev->irq.pflip[3])) {
3220
		DRM_DEBUG("si_irq_set: vblank 3\n");
3221
		crtc4 |= VBLANK_INT_MASK;
3222
	}
3223
	if (rdev->irq.crtc_vblank_int[4] ||
3224
	    atomic_read(&rdev->irq.pflip[4])) {
3225
		DRM_DEBUG("si_irq_set: vblank 4\n");
3226
		crtc5 |= VBLANK_INT_MASK;
3227
	}
3228
	if (rdev->irq.crtc_vblank_int[5] ||
3229
	    atomic_read(&rdev->irq.pflip[5])) {
3230
		DRM_DEBUG("si_irq_set: vblank 5\n");
3231
		crtc6 |= VBLANK_INT_MASK;
3232
	}
3233
	if (rdev->irq.hpd[0]) {
3234
		DRM_DEBUG("si_irq_set: hpd 1\n");
3235
		hpd1 |= DC_HPDx_INT_EN;
3236
	}
3237
	if (rdev->irq.hpd[1]) {
3238
		DRM_DEBUG("si_irq_set: hpd 2\n");
3239
		hpd2 |= DC_HPDx_INT_EN;
3240
	}
3241
	if (rdev->irq.hpd[2]) {
3242
		DRM_DEBUG("si_irq_set: hpd 3\n");
3243
		hpd3 |= DC_HPDx_INT_EN;
3244
	}
3245
	if (rdev->irq.hpd[3]) {
3246
		DRM_DEBUG("si_irq_set: hpd 4\n");
3247
		hpd4 |= DC_HPDx_INT_EN;
3248
	}
3249
	if (rdev->irq.hpd[4]) {
3250
		DRM_DEBUG("si_irq_set: hpd 5\n");
3251
		hpd5 |= DC_HPDx_INT_EN;
3252
	}
3253
	if (rdev->irq.hpd[5]) {
3254
		DRM_DEBUG("si_irq_set: hpd 6\n");
3255
		hpd6 |= DC_HPDx_INT_EN;
3256
	}
3257
 
3258
	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3259
	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3260
	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3261
 
3262
	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3263
 
3264
	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3265
	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3266
	if (rdev->num_crtc >= 4) {
3267
		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3268
		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3269
	}
3270
	if (rdev->num_crtc >= 6) {
3271
		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3272
		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3273
	}
3274
 
3275
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
3276
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
3277
	if (rdev->num_crtc >= 4) {
3278
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
3279
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
3280
	}
3281
	if (rdev->num_crtc >= 6) {
3282
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
3283
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
3284
	}
3285
 
3286
	WREG32(DC_HPD1_INT_CONTROL, hpd1);
3287
	WREG32(DC_HPD2_INT_CONTROL, hpd2);
3288
	WREG32(DC_HPD3_INT_CONTROL, hpd3);
3289
	WREG32(DC_HPD4_INT_CONTROL, hpd4);
3290
	WREG32(DC_HPD5_INT_CONTROL, hpd5);
3291
	WREG32(DC_HPD6_INT_CONTROL, hpd6);
3292
 
3293
	return 0;
3294
}
3295
 
3296
static inline void si_irq_ack(struct radeon_device *rdev)
3297
{
3298
	u32 tmp;
3299
 
3300
	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3301
	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3302
	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3303
	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3304
	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3305
	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3306
	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
3307
	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
3308
	if (rdev->num_crtc >= 4) {
3309
		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
3310
		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
3311
	}
3312
	if (rdev->num_crtc >= 6) {
3313
		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
3314
		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
3315
	}
3316
 
3317
	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
3318
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3319
	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
3320
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3321
	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
3322
		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3323
	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
3324
		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3325
	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3326
		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3327
	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3328
		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3329
 
3330
	if (rdev->num_crtc >= 4) {
3331
		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
3332
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3333
		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
3334
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3335
		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3336
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3337
		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3338
			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3339
		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3340
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3341
		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3342
			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3343
	}
3344
 
3345
	if (rdev->num_crtc >= 6) {
3346
		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
3347
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3348
		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
3349
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3350
		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3351
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3352
		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3353
			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3354
		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3355
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3356
		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3357
			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3358
	}
3359
 
3360
	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3361
		tmp = RREG32(DC_HPD1_INT_CONTROL);
3362
		tmp |= DC_HPDx_INT_ACK;
3363
		WREG32(DC_HPD1_INT_CONTROL, tmp);
3364
	}
3365
	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3366
		tmp = RREG32(DC_HPD2_INT_CONTROL);
3367
		tmp |= DC_HPDx_INT_ACK;
3368
		WREG32(DC_HPD2_INT_CONTROL, tmp);
3369
	}
3370
	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3371
		tmp = RREG32(DC_HPD3_INT_CONTROL);
3372
		tmp |= DC_HPDx_INT_ACK;
3373
		WREG32(DC_HPD3_INT_CONTROL, tmp);
3374
	}
3375
	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3376
		tmp = RREG32(DC_HPD4_INT_CONTROL);
3377
		tmp |= DC_HPDx_INT_ACK;
3378
		WREG32(DC_HPD4_INT_CONTROL, tmp);
3379
	}
3380
	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3381
		tmp = RREG32(DC_HPD5_INT_CONTROL);
3382
		tmp |= DC_HPDx_INT_ACK;
3383
		WREG32(DC_HPD5_INT_CONTROL, tmp);
3384
	}
3385
	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3386
		tmp = RREG32(DC_HPD5_INT_CONTROL);
3387
		tmp |= DC_HPDx_INT_ACK;
3388
		WREG32(DC_HPD6_INT_CONTROL, tmp);
3389
	}
3390
}
3391
 
3392
static void si_irq_disable(struct radeon_device *rdev)
3393
{
3394
	si_disable_interrupts(rdev);
3395
	/* Wait and acknowledge irq */
3396
	mdelay(1);
3397
	si_irq_ack(rdev);
3398
	si_disable_interrupt_state(rdev);
3399
}
3400
 
3401
static void si_irq_suspend(struct radeon_device *rdev)
3402
{
3403
	si_irq_disable(rdev);
3404
	si_rlc_stop(rdev);
3405
}
3406
 
3407
static void si_irq_fini(struct radeon_device *rdev)
3408
{
3409
	si_irq_suspend(rdev);
3410
	r600_ih_ring_fini(rdev);
3411
}
3412
 
3413
static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
3414
{
3415
	u32 wptr, tmp;
3416
 
3417
	if (rdev->wb.enabled)
3418
		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3419
	else
3420
		wptr = RREG32(IH_RB_WPTR);
3421
 
3422
	if (wptr & RB_OVERFLOW) {
3423
		/* When a ring buffer overflow happen start parsing interrupt
3424
		 * from the last not overwritten vector (wptr + 16). Hopefully
3425
		 * this should allow us to catchup.
3426
		 */
3427
		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3428
			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3429
		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3430
		tmp = RREG32(IH_RB_CNTL);
3431
		tmp |= IH_WPTR_OVERFLOW_CLEAR;
3432
		WREG32(IH_RB_CNTL, tmp);
3433
	}
3434
	return (wptr & rdev->ih.ptr_mask);
3435
}
3436
 
3437
/*        SI IV Ring
3438
 * Each IV ring entry is 128 bits:
3439
 * [7:0]    - interrupt source id
3440
 * [31:8]   - reserved
3441
 * [59:32]  - interrupt source data
3442
 * [63:60]  - reserved
3443
 * [71:64]  - RINGID
3444
 * [79:72]  - VMID
3445
 * [127:80] - reserved
3446
 */
3447
int si_irq_process(struct radeon_device *rdev)
3448
{
3449
	u32 wptr;
3450
	u32 rptr;
3451
	u32 src_id, src_data, ring_id;
3452
	u32 ring_index;
3453
	bool queue_hotplug = false;
3454
 
3455
	if (!rdev->ih.enabled || rdev->shutdown)
3456
		return IRQ_NONE;
3457
 
3458
	wptr = si_get_ih_wptr(rdev);
3459
 
3460
restart_ih:
3461
	/* is somebody else already processing irqs? */
3462
	if (atomic_xchg(&rdev->ih.lock, 1))
3463
		return IRQ_NONE;
3464
 
3465
	rptr = rdev->ih.rptr;
3466
	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3467
 
3468
	/* Order reading of wptr vs. reading of IH ring data */
3469
	rmb();
3470
 
3471
	/* display interrupts */
3472
	si_irq_ack(rdev);
3473
 
3474
	while (rptr != wptr) {
3475
		/* wptr/rptr are in bytes! */
3476
		ring_index = rptr / 4;
3477
		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3478
		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3479
		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3480
 
3481
		switch (src_id) {
3482
		case 1: /* D1 vblank/vline */
3483
			switch (src_data) {
3484
			case 0: /* D1 vblank */
3485
				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
3486
					if (rdev->irq.crtc_vblank_int[0]) {
3487
//                       drm_handle_vblank(rdev->ddev, 0);
3488
						rdev->pm.vblank_sync = true;
3489
//                       wake_up(&rdev->irq.vblank_queue);
3490
					}
3491
//                   if (atomic_read(&rdev->irq.pflip[0]))
3492
//                       radeon_crtc_handle_flip(rdev, 0);
3493
					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3494
					DRM_DEBUG("IH: D1 vblank\n");
3495
				}
3496
				break;
3497
			case 1: /* D1 vline */
3498
				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
3499
					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3500
					DRM_DEBUG("IH: D1 vline\n");
3501
				}
3502
				break;
3503
			default:
3504
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3505
				break;
3506
			}
3507
			break;
3508
		case 2: /* D2 vblank/vline */
3509
			switch (src_data) {
3510
			case 0: /* D2 vblank */
3511
				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3512
					if (rdev->irq.crtc_vblank_int[1]) {
3513
//                       drm_handle_vblank(rdev->ddev, 1);
3514
						rdev->pm.vblank_sync = true;
3515
//                       wake_up(&rdev->irq.vblank_queue);
3516
					}
3517
//                   if (atomic_read(&rdev->irq.pflip[1]))
3518
//                       radeon_crtc_handle_flip(rdev, 1);
3519
					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3520
					DRM_DEBUG("IH: D2 vblank\n");
3521
				}
3522
				break;
3523
			case 1: /* D2 vline */
3524
				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3525
					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3526
					DRM_DEBUG("IH: D2 vline\n");
3527
				}
3528
				break;
3529
			default:
3530
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3531
				break;
3532
			}
3533
			break;
3534
		case 3: /* D3 vblank/vline */
3535
			switch (src_data) {
3536
			case 0: /* D3 vblank */
3537
				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3538
					if (rdev->irq.crtc_vblank_int[2]) {
3539
//                       drm_handle_vblank(rdev->ddev, 2);
3540
						rdev->pm.vblank_sync = true;
3541
//                       wake_up(&rdev->irq.vblank_queue);
3542
					}
3543
//                   if (atomic_read(&rdev->irq.pflip[2]))
3544
//                       radeon_crtc_handle_flip(rdev, 2);
3545
					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3546
					DRM_DEBUG("IH: D3 vblank\n");
3547
				}
3548
				break;
3549
			case 1: /* D3 vline */
3550
				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3551
					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3552
					DRM_DEBUG("IH: D3 vline\n");
3553
				}
3554
				break;
3555
			default:
3556
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3557
				break;
3558
			}
3559
			break;
3560
		case 4: /* D4 vblank/vline */
3561
			switch (src_data) {
3562
			case 0: /* D4 vblank */
3563
				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3564
					if (rdev->irq.crtc_vblank_int[3]) {
3565
//                       drm_handle_vblank(rdev->ddev, 3);
3566
						rdev->pm.vblank_sync = true;
3567
//                       wake_up(&rdev->irq.vblank_queue);
3568
					}
3569
//                   if (atomic_read(&rdev->irq.pflip[3]))
3570
//                       radeon_crtc_handle_flip(rdev, 3);
3571
					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3572
					DRM_DEBUG("IH: D4 vblank\n");
3573
				}
3574
				break;
3575
			case 1: /* D4 vline */
3576
				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3577
					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3578
					DRM_DEBUG("IH: D4 vline\n");
3579
				}
3580
				break;
3581
			default:
3582
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3583
				break;
3584
			}
3585
			break;
3586
		case 5: /* D5 vblank/vline */
3587
			switch (src_data) {
3588
			case 0: /* D5 vblank */
3589
				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3590
					if (rdev->irq.crtc_vblank_int[4]) {
3591
//                       drm_handle_vblank(rdev->ddev, 4);
3592
						rdev->pm.vblank_sync = true;
3593
//                       wake_up(&rdev->irq.vblank_queue);
3594
					}
3595
//                   if (atomic_read(&rdev->irq.pflip[4]))
3596
//                       radeon_crtc_handle_flip(rdev, 4);
3597
					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3598
					DRM_DEBUG("IH: D5 vblank\n");
3599
				}
3600
				break;
3601
			case 1: /* D5 vline */
3602
				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3603
					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3604
					DRM_DEBUG("IH: D5 vline\n");
3605
				}
3606
				break;
3607
			default:
3608
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3609
				break;
3610
			}
3611
			break;
3612
		case 6: /* D6 vblank/vline */
3613
			switch (src_data) {
3614
			case 0: /* D6 vblank */
3615
				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3616
					if (rdev->irq.crtc_vblank_int[5]) {
3617
//                       drm_handle_vblank(rdev->ddev, 5);
3618
						rdev->pm.vblank_sync = true;
3619
//                       wake_up(&rdev->irq.vblank_queue);
3620
					}
3621
//                   if (atomic_read(&rdev->irq.pflip[5]))
3622
//                       radeon_crtc_handle_flip(rdev, 5);
3623
					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3624
					DRM_DEBUG("IH: D6 vblank\n");
3625
				}
3626
				break;
3627
			case 1: /* D6 vline */
3628
				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3629
					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3630
					DRM_DEBUG("IH: D6 vline\n");
3631
				}
3632
				break;
3633
			default:
3634
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3635
				break;
3636
			}
3637
			break;
3638
		case 42: /* HPD hotplug */
3639
			switch (src_data) {
3640
			case 0:
3641
				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3642
					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
3643
					queue_hotplug = true;
3644
					DRM_DEBUG("IH: HPD1\n");
3645
				}
3646
				break;
3647
			case 1:
3648
				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3649
					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3650
					queue_hotplug = true;
3651
					DRM_DEBUG("IH: HPD2\n");
3652
				}
3653
				break;
3654
			case 2:
3655
				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3656
					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3657
					queue_hotplug = true;
3658
					DRM_DEBUG("IH: HPD3\n");
3659
				}
3660
				break;
3661
			case 3:
3662
				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3663
					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3664
					queue_hotplug = true;
3665
					DRM_DEBUG("IH: HPD4\n");
3666
				}
3667
				break;
3668
			case 4:
3669
				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3670
					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3671
					queue_hotplug = true;
3672
					DRM_DEBUG("IH: HPD5\n");
3673
				}
3674
				break;
3675
			case 5:
3676
				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3677
					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3678
					queue_hotplug = true;
3679
					DRM_DEBUG("IH: HPD6\n");
3680
				}
3681
				break;
3682
			default:
3683
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3684
				break;
3685
			}
3686
			break;
3687
		case 176: /* RINGID0 CP_INT */
3688
			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3689
			break;
3690
		case 177: /* RINGID1 CP_INT */
3691
			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3692
			break;
3693
		case 178: /* RINGID2 CP_INT */
3694
			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3695
			break;
3696
		case 181: /* CP EOP event */
3697
			DRM_DEBUG("IH: CP EOP\n");
3698
			switch (ring_id) {
3699
			case 0:
3700
				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3701
				break;
3702
			case 1:
3703
				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3704
				break;
3705
			case 2:
3706
				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3707
				break;
3708
			}
3709
			break;
3710
		case 233: /* GUI IDLE */
3711
			DRM_DEBUG("IH: GUI idle\n");
3712
			break;
3713
		default:
3714
			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3715
			break;
3716
		}
3717
 
3718
		/* wptr/rptr are in bytes! */
3719
		rptr += 16;
3720
		rptr &= rdev->ih.ptr_mask;
3721
	}
3722
//   if (queue_hotplug)
3723
//       schedule_work(&rdev->hotplug_work);
3724
	rdev->ih.rptr = rptr;
3725
	WREG32(IH_RB_RPTR, rdev->ih.rptr);
3726
	atomic_set(&rdev->ih.lock, 0);
3727
 
3728
	/* make sure wptr hasn't changed while processing */
3729
	wptr = si_get_ih_wptr(rdev);
3730
	if (wptr != rptr)
3731
		goto restart_ih;
3732
 
3733
	return IRQ_HANDLED;
3734
}
3735
 
3736
/*
3737
 * startup/shutdown callbacks
3738
 */
3739
static int si_startup(struct radeon_device *rdev)
3740
{
3741
	struct radeon_ring *ring;
3742
	int r;
3743
 
3744
	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
3745
	    !rdev->rlc_fw || !rdev->mc_fw) {
3746
		r = si_init_microcode(rdev);
3747
		if (r) {
3748
			DRM_ERROR("Failed to load firmware!\n");
3749
			return r;
3750
		}
3751
	}
3752
 
3753
	r = si_mc_load_microcode(rdev);
3754
	if (r) {
3755
		DRM_ERROR("Failed to load MC firmware!\n");
3756
		return r;
3757
	}
3758
 
3759
	r = r600_vram_scratch_init(rdev);
3760
	if (r)
3761
		return r;
3762
 
3763
	si_mc_program(rdev);
3764
	r = si_pcie_gart_enable(rdev);
3765
	if (r)
3766
		return r;
3767
	si_gpu_init(rdev);
3768
 
3769
#if 0
3770
	r = evergreen_blit_init(rdev);
3771
	if (r) {
3772
		r600_blit_fini(rdev);
3773
		rdev->asic->copy = NULL;
3774
		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
3775
	}
3776
#endif
3777
	/* allocate rlc buffers */
3778
	r = si_rlc_init(rdev);
3779
	if (r) {
3780
		DRM_ERROR("Failed to init rlc BOs!\n");
3781
		return r;
3782
	}
3783
 
3784
	/* allocate wb buffer */
3785
	r = radeon_wb_init(rdev);
3786
	if (r)
3787
		return r;
3788
 
3789
	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3790
	if (r) {
3791
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3792
		return r;
3793
	}
3794
 
3795
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3796
	if (r) {
3797
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3798
		return r;
3799
	}
3800
 
3801
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3802
	if (r) {
3803
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3804
		return r;
3805
	}
3806
 
3807
	/* Enable IRQ */
3808
	r = si_irq_init(rdev);
3809
	if (r) {
3810
		DRM_ERROR("radeon: IH init failed (%d).\n", r);
3811
		radeon_irq_kms_fini(rdev);
3812
		return r;
3813
	}
3814
	si_irq_set(rdev);
3815
 
3816
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3817
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
3818
			     CP_RB0_RPTR, CP_RB0_WPTR,
3819
			     0, 0xfffff, RADEON_CP_PACKET2);
3820
	if (r)
3821
		return r;
3822
 
3823
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3824
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
3825
			     CP_RB1_RPTR, CP_RB1_WPTR,
3826
			     0, 0xfffff, RADEON_CP_PACKET2);
3827
	if (r)
3828
		return r;
3829
 
3830
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3831
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
3832
			     CP_RB2_RPTR, CP_RB2_WPTR,
3833
			     0, 0xfffff, RADEON_CP_PACKET2);
3834
	if (r)
3835
		return r;
3836
 
3837
	r = si_cp_load_microcode(rdev);
3838
	if (r)
3839
		return r;
3840
	r = si_cp_resume(rdev);
3841
	if (r)
3842
		return r;
3843
 
3844
//   r = radeon_ib_pool_init(rdev);
3845
//   if (r) {
3846
//       dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3847
//       return r;
3848
//   }
3849
 
3850
//   r = radeon_vm_manager_init(rdev);
3851
//   if (r) {
3852
//       dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
3853
//       return r;
3854
//   }
3855
 
3856
	return 0;
3857
}
3858
 
3859
 
3860
/* Plan is to move initialization in that function and use
3861
 * helper function so that radeon_device_init pretty much
3862
 * do nothing more than calling asic specific function. This
3863
 * should also allow to remove a bunch of callback function
3864
 * like vram_info.
3865
 */
3866
int si_init(struct radeon_device *rdev)
3867
{
3868
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3869
	int r;
3870
 
3871
    ENTER();
3872
 
3873
	/* Read BIOS */
3874
	if (!radeon_get_bios(rdev)) {
3875
		if (ASIC_IS_AVIVO(rdev))
3876
			return -EINVAL;
3877
	}
3878
	/* Must be an ATOMBIOS */
3879
	if (!rdev->is_atom_bios) {
3880
		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
3881
		return -EINVAL;
3882
	}
3883
	r = radeon_atombios_init(rdev);
3884
	if (r)
3885
		return r;
3886
 
3887
	/* Post card if necessary */
3888
	if (!radeon_card_posted(rdev)) {
3889
		if (!rdev->bios) {
3890
			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
3891
			return -EINVAL;
3892
		}
3893
		DRM_INFO("GPU not posted. posting now...\n");
3894
		atom_asic_init(rdev->mode_info.atom_context);
3895
	}
3896
	/* Initialize scratch registers */
3897
	si_scratch_init(rdev);
3898
	/* Initialize surface registers */
3899
	radeon_surface_init(rdev);
3900
	/* Initialize clocks */
3901
	radeon_get_clock_info(rdev->ddev);
3902
 
3903
	/* Fence driver */
3904
	r = radeon_fence_driver_init(rdev);
3905
	if (r)
3906
		return r;
3907
 
3908
	/* initialize memory controller */
3909
	r = si_mc_init(rdev);
3910
	if (r)
3911
		return r;
3912
	/* Memory manager */
3913
	r = radeon_bo_init(rdev);
3914
	if (r)
3915
		return r;
3916
 
3917
	r = radeon_irq_kms_init(rdev);
3918
	if (r)
3919
		return r;
3920
 
3921
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3922
	ring->ring_obj = NULL;
3923
	r600_ring_init(rdev, ring, 1024 * 1024);
3924
 
3925
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3926
	ring->ring_obj = NULL;
3927
	r600_ring_init(rdev, ring, 1024 * 1024);
3928
 
3929
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3930
	ring->ring_obj = NULL;
3931
	r600_ring_init(rdev, ring, 1024 * 1024);
3932
 
3933
	rdev->ih.ring_obj = NULL;
3934
	r600_ih_ring_init(rdev, 64 * 1024);
3935
 
3936
    r = r600_pcie_gart_init(rdev);
3937
    if (r)
3938
       return r;
3939
 
3940
	rdev->accel_working = true;
3941
    r = si_startup(rdev);
3942
	if (r) {
3943
		dev_err(rdev->dev, "disabling GPU acceleration\n");
3944
//       si_cp_fini(rdev);
3945
//       si_irq_fini(rdev);
3946
//       si_rlc_fini(rdev);
3947
//       radeon_wb_fini(rdev);
3948
//       radeon_ib_pool_fini(rdev);
3949
//       radeon_vm_manager_fini(rdev);
3950
//       radeon_irq_kms_fini(rdev);
3951
//       si_pcie_gart_fini(rdev);
3952
		rdev->accel_working = false;
3953
	}
3954
 
3955
	/* Don't start up if the MC ucode is missing.
3956
	 * The default clocks and voltages before the MC ucode
3957
	 * is loaded are not suffient for advanced operations.
3958
	 */
3959
	if (!rdev->mc_fw) {
3960
		DRM_ERROR("radeon: MC ucode required for NI+.\n");
3961
		return -EINVAL;
3962
	}
3963
    LEAVE();
3964
 
3965
	return 0;
3966
}
3967
 
3968
/**
3969
 * si_get_gpu_clock - return GPU clock counter snapshot
3970
 *
3971
 * @rdev: radeon_device pointer
3972
 *
3973
 * Fetches a GPU clock counter snapshot (SI).
3974
 * Returns the 64 bit clock counter snapshot.
3975
 */
3976
uint64_t si_get_gpu_clock(struct radeon_device *rdev)
3977
{
3978
	uint64_t clock;
3979
 
3980
	mutex_lock(&rdev->gpu_clock_mutex);
3981
	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3982
	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
3983
	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3984
	mutex_unlock(&rdev->gpu_clock_mutex);
3985
	return clock;
3986
}