Subversion Repositories Kolibri OS

Rev

Rev 3120 | Rev 3764 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2997 Serge 1
/*
2
 * Copyright 2011 Advanced Micro Devices, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 *
22
 * Authors: Alex Deucher
23
 */
24
#include 
25
//#include 
26
#include 
27
#include 
28
#include 
29
#include "radeon.h"
30
#include "radeon_asic.h"
31
#include 
32
#include "sid.h"
33
#include "atom.h"
34
#include "si_blit_shaders.h"
35
 
36
#define SI_PFP_UCODE_SIZE 2144
37
#define SI_PM4_UCODE_SIZE 2144
38
#define SI_CE_UCODE_SIZE 2144
39
#define SI_RLC_UCODE_SIZE 2048
40
#define SI_MC_UCODE_SIZE 7769
41
 
42
MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
43
MODULE_FIRMWARE("radeon/TAHITI_me.bin");
44
MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
45
MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
46
MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
47
MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
48
MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
49
MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
50
MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
51
MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
52
MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
53
MODULE_FIRMWARE("radeon/VERDE_me.bin");
54
MODULE_FIRMWARE("radeon/VERDE_ce.bin");
55
MODULE_FIRMWARE("radeon/VERDE_mc.bin");
56
MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
57
 
58
extern int r600_ih_ring_alloc(struct radeon_device *rdev);
59
extern void r600_ih_ring_fini(struct radeon_device *rdev);
60
extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
61
extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
62
extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
63
extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
64
 
65
/* get temperature in millidegrees */
66
int si_get_temp(struct radeon_device *rdev)
67
{
68
	u32 temp;
69
	int actual_temp = 0;
70
 
71
	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
72
		CTF_TEMP_SHIFT;
73
 
74
	if (temp & 0x200)
75
		actual_temp = 255;
76
	else
77
		actual_temp = temp & 0x1ff;
78
 
79
	actual_temp = (actual_temp * 1000);
80
 
81
	return actual_temp;
82
}
83
 
84
#define TAHITI_IO_MC_REGS_SIZE 36
85
 
86
static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
87
	{0x0000006f, 0x03044000},
88
	{0x00000070, 0x0480c018},
89
	{0x00000071, 0x00000040},
90
	{0x00000072, 0x01000000},
91
	{0x00000074, 0x000000ff},
92
	{0x00000075, 0x00143400},
93
	{0x00000076, 0x08ec0800},
94
	{0x00000077, 0x040000cc},
95
	{0x00000079, 0x00000000},
96
	{0x0000007a, 0x21000409},
97
	{0x0000007c, 0x00000000},
98
	{0x0000007d, 0xe8000000},
99
	{0x0000007e, 0x044408a8},
100
	{0x0000007f, 0x00000003},
101
	{0x00000080, 0x00000000},
102
	{0x00000081, 0x01000000},
103
	{0x00000082, 0x02000000},
104
	{0x00000083, 0x00000000},
105
	{0x00000084, 0xe3f3e4f4},
106
	{0x00000085, 0x00052024},
107
	{0x00000087, 0x00000000},
108
	{0x00000088, 0x66036603},
109
	{0x00000089, 0x01000000},
110
	{0x0000008b, 0x1c0a0000},
111
	{0x0000008c, 0xff010000},
112
	{0x0000008e, 0xffffefff},
113
	{0x0000008f, 0xfff3efff},
114
	{0x00000090, 0xfff3efbf},
115
	{0x00000094, 0x00101101},
116
	{0x00000095, 0x00000fff},
117
	{0x00000096, 0x00116fff},
118
	{0x00000097, 0x60010000},
119
	{0x00000098, 0x10010000},
120
	{0x00000099, 0x00006000},
121
	{0x0000009a, 0x00001000},
122
	{0x0000009f, 0x00a77400}
123
};
124
 
125
static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
126
	{0x0000006f, 0x03044000},
127
	{0x00000070, 0x0480c018},
128
	{0x00000071, 0x00000040},
129
	{0x00000072, 0x01000000},
130
	{0x00000074, 0x000000ff},
131
	{0x00000075, 0x00143400},
132
	{0x00000076, 0x08ec0800},
133
	{0x00000077, 0x040000cc},
134
	{0x00000079, 0x00000000},
135
	{0x0000007a, 0x21000409},
136
	{0x0000007c, 0x00000000},
137
	{0x0000007d, 0xe8000000},
138
	{0x0000007e, 0x044408a8},
139
	{0x0000007f, 0x00000003},
140
	{0x00000080, 0x00000000},
141
	{0x00000081, 0x01000000},
142
	{0x00000082, 0x02000000},
143
	{0x00000083, 0x00000000},
144
	{0x00000084, 0xe3f3e4f4},
145
	{0x00000085, 0x00052024},
146
	{0x00000087, 0x00000000},
147
	{0x00000088, 0x66036603},
148
	{0x00000089, 0x01000000},
149
	{0x0000008b, 0x1c0a0000},
150
	{0x0000008c, 0xff010000},
151
	{0x0000008e, 0xffffefff},
152
	{0x0000008f, 0xfff3efff},
153
	{0x00000090, 0xfff3efbf},
154
	{0x00000094, 0x00101101},
155
	{0x00000095, 0x00000fff},
156
	{0x00000096, 0x00116fff},
157
	{0x00000097, 0x60010000},
158
	{0x00000098, 0x10010000},
159
	{0x00000099, 0x00006000},
160
	{0x0000009a, 0x00001000},
161
	{0x0000009f, 0x00a47400}
162
};
163
 
164
static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
165
	{0x0000006f, 0x03044000},
166
	{0x00000070, 0x0480c018},
167
	{0x00000071, 0x00000040},
168
	{0x00000072, 0x01000000},
169
	{0x00000074, 0x000000ff},
170
	{0x00000075, 0x00143400},
171
	{0x00000076, 0x08ec0800},
172
	{0x00000077, 0x040000cc},
173
	{0x00000079, 0x00000000},
174
	{0x0000007a, 0x21000409},
175
	{0x0000007c, 0x00000000},
176
	{0x0000007d, 0xe8000000},
177
	{0x0000007e, 0x044408a8},
178
	{0x0000007f, 0x00000003},
179
	{0x00000080, 0x00000000},
180
	{0x00000081, 0x01000000},
181
	{0x00000082, 0x02000000},
182
	{0x00000083, 0x00000000},
183
	{0x00000084, 0xe3f3e4f4},
184
	{0x00000085, 0x00052024},
185
	{0x00000087, 0x00000000},
186
	{0x00000088, 0x66036603},
187
	{0x00000089, 0x01000000},
188
	{0x0000008b, 0x1c0a0000},
189
	{0x0000008c, 0xff010000},
190
	{0x0000008e, 0xffffefff},
191
	{0x0000008f, 0xfff3efff},
192
	{0x00000090, 0xfff3efbf},
193
	{0x00000094, 0x00101101},
194
	{0x00000095, 0x00000fff},
195
	{0x00000096, 0x00116fff},
196
	{0x00000097, 0x60010000},
197
	{0x00000098, 0x10010000},
198
	{0x00000099, 0x00006000},
199
	{0x0000009a, 0x00001000},
200
	{0x0000009f, 0x00a37400}
201
};
202
 
203
/* ucode loading */
204
static int si_mc_load_microcode(struct radeon_device *rdev)
205
{
206
	const __be32 *fw_data;
207
	u32 running, blackout = 0;
208
	u32 *io_mc_regs;
209
	int i, ucode_size, regs_size;
210
 
211
	if (!rdev->mc_fw)
212
		return -EINVAL;
213
 
214
	switch (rdev->family) {
215
	case CHIP_TAHITI:
216
		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
217
		ucode_size = SI_MC_UCODE_SIZE;
218
		regs_size = TAHITI_IO_MC_REGS_SIZE;
219
		break;
220
	case CHIP_PITCAIRN:
221
		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
222
		ucode_size = SI_MC_UCODE_SIZE;
223
		regs_size = TAHITI_IO_MC_REGS_SIZE;
224
		break;
225
	case CHIP_VERDE:
226
	default:
227
		io_mc_regs = (u32 *)&verde_io_mc_regs;
228
		ucode_size = SI_MC_UCODE_SIZE;
229
		regs_size = TAHITI_IO_MC_REGS_SIZE;
230
		break;
231
	}
232
 
233
	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
234
 
235
	if (running == 0) {
236
		if (running) {
237
			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
238
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
239
		}
240
 
241
		/* reset the engine and set to writable */
242
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
243
		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
244
 
245
		/* load mc io regs */
246
		for (i = 0; i < regs_size; i++) {
247
			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
248
			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
249
		}
250
		/* load the MC ucode */
251
		fw_data = (const __be32 *)rdev->mc_fw->data;
252
		for (i = 0; i < ucode_size; i++)
253
			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
254
 
255
		/* put the engine back into the active state */
256
		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
257
		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
258
		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
259
 
260
		/* wait for training to complete */
261
		for (i = 0; i < rdev->usec_timeout; i++) {
262
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
263
				break;
264
			udelay(1);
265
		}
266
		for (i = 0; i < rdev->usec_timeout; i++) {
267
			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
268
				break;
269
			udelay(1);
270
		}
271
 
272
		if (running)
273
			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
274
	}
275
 
276
	return 0;
277
}
278
 
279
static int si_init_microcode(struct radeon_device *rdev)
280
{
281
	struct platform_device *pdev;
282
	const char *chip_name;
283
	const char *rlc_chip_name;
284
	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
285
	char fw_name[30];
286
	int err;
287
 
288
	DRM_DEBUG("\n");
289
 
290
	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
291
	err = IS_ERR(pdev);
292
	if (err) {
293
		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
294
		return -EINVAL;
295
	}
296
 
297
	switch (rdev->family) {
298
	case CHIP_TAHITI:
299
		chip_name = "TAHITI";
300
		rlc_chip_name = "TAHITI";
301
		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
302
		me_req_size = SI_PM4_UCODE_SIZE * 4;
303
		ce_req_size = SI_CE_UCODE_SIZE * 4;
304
		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
305
		mc_req_size = SI_MC_UCODE_SIZE * 4;
306
		break;
307
	case CHIP_PITCAIRN:
308
		chip_name = "PITCAIRN";
309
		rlc_chip_name = "PITCAIRN";
310
		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
311
		me_req_size = SI_PM4_UCODE_SIZE * 4;
312
		ce_req_size = SI_CE_UCODE_SIZE * 4;
313
		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
314
		mc_req_size = SI_MC_UCODE_SIZE * 4;
315
		break;
316
	case CHIP_VERDE:
317
		chip_name = "VERDE";
318
		rlc_chip_name = "VERDE";
319
		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
320
		me_req_size = SI_PM4_UCODE_SIZE * 4;
321
		ce_req_size = SI_CE_UCODE_SIZE * 4;
322
		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
323
		mc_req_size = SI_MC_UCODE_SIZE * 4;
324
		break;
325
	default: BUG();
326
	}
327
 
328
	DRM_INFO("Loading %s Microcode\n", chip_name);
329
 
330
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
331
	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
332
	if (err)
333
		goto out;
334
	if (rdev->pfp_fw->size != pfp_req_size) {
335
		printk(KERN_ERR
336
		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
337
		       rdev->pfp_fw->size, fw_name);
338
		err = -EINVAL;
339
		goto out;
340
	}
341
 
342
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
343
	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
344
	if (err)
345
		goto out;
346
	if (rdev->me_fw->size != me_req_size) {
347
		printk(KERN_ERR
348
		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
349
		       rdev->me_fw->size, fw_name);
350
		err = -EINVAL;
351
	}
352
 
353
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
354
	err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
355
	if (err)
356
		goto out;
357
	if (rdev->ce_fw->size != ce_req_size) {
358
		printk(KERN_ERR
359
		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
360
		       rdev->ce_fw->size, fw_name);
361
		err = -EINVAL;
362
	}
363
 
364
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
365
	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
366
	if (err)
367
		goto out;
368
	if (rdev->rlc_fw->size != rlc_req_size) {
369
		printk(KERN_ERR
370
		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
371
		       rdev->rlc_fw->size, fw_name);
372
		err = -EINVAL;
373
	}
374
 
375
	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
376
	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
377
	if (err)
378
		goto out;
379
	if (rdev->mc_fw->size != mc_req_size) {
380
		printk(KERN_ERR
381
		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
382
		       rdev->mc_fw->size, fw_name);
383
		err = -EINVAL;
384
	}
385
 
386
out:
387
	platform_device_unregister(pdev);
388
 
389
	if (err) {
390
		if (err != -EINVAL)
391
			printk(KERN_ERR
392
			       "si_cp: Failed to load firmware \"%s\"\n",
393
			       fw_name);
394
		release_firmware(rdev->pfp_fw);
395
		rdev->pfp_fw = NULL;
396
		release_firmware(rdev->me_fw);
397
		rdev->me_fw = NULL;
398
		release_firmware(rdev->ce_fw);
399
		rdev->ce_fw = NULL;
400
		release_firmware(rdev->rlc_fw);
401
		rdev->rlc_fw = NULL;
402
		release_firmware(rdev->mc_fw);
403
		rdev->mc_fw = NULL;
404
	}
405
	return err;
406
}
407
 
408
/* watermark setup */
409
static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
410
				   struct radeon_crtc *radeon_crtc,
411
				   struct drm_display_mode *mode,
412
				   struct drm_display_mode *other_mode)
413
{
414
	u32 tmp;
415
	/*
416
	 * Line Buffer Setup
417
	 * There are 3 line buffers, each one shared by 2 display controllers.
418
	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
419
	 * the display controllers.  The paritioning is done via one of four
420
	 * preset allocations specified in bits 21:20:
421
	 *  0 - half lb
422
	 *  2 - whole lb, other crtc must be disabled
423
	 */
424
	/* this can get tricky if we have two large displays on a paired group
425
	 * of crtcs.  Ideally for multiple large displays we'd assign them to
426
	 * non-linked crtcs for maximum line buffer allocation.
427
	 */
428
	if (radeon_crtc->base.enabled && mode) {
429
		if (other_mode)
430
			tmp = 0; /* 1/2 */
431
		else
432
			tmp = 2; /* whole */
433
	} else
434
		tmp = 0;
435
 
436
	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
437
	       DC_LB_MEMORY_CONFIG(tmp));
438
 
439
	if (radeon_crtc->base.enabled && mode) {
440
		switch (tmp) {
441
		case 0:
442
		default:
443
			return 4096 * 2;
444
		case 2:
445
			return 8192 * 2;
446
		}
447
	}
448
 
449
	/* controller not enabled, so no lb used */
450
	return 0;
451
}
452
 
453
static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
454
{
455
	u32 tmp = RREG32(MC_SHARED_CHMAP);
456
 
457
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
458
	case 0:
459
	default:
460
		return 1;
461
	case 1:
462
		return 2;
463
	case 2:
464
		return 4;
465
	case 3:
466
		return 8;
467
	case 4:
468
		return 3;
469
	case 5:
470
		return 6;
471
	case 6:
472
		return 10;
473
	case 7:
474
		return 12;
475
	case 8:
476
		return 16;
477
	}
478
}
479
 
480
struct dce6_wm_params {
481
	u32 dram_channels; /* number of dram channels */
482
	u32 yclk;          /* bandwidth per dram data pin in kHz */
483
	u32 sclk;          /* engine clock in kHz */
484
	u32 disp_clk;      /* display clock in kHz */
485
	u32 src_width;     /* viewport width */
486
	u32 active_time;   /* active display time in ns */
487
	u32 blank_time;    /* blank time in ns */
488
	bool interlaced;    /* mode is interlaced */
489
	fixed20_12 vsc;    /* vertical scale ratio */
490
	u32 num_heads;     /* number of active crtcs */
491
	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
492
	u32 lb_size;       /* line buffer allocated to pipe */
493
	u32 vtaps;         /* vertical scaler taps */
494
};
495
 
496
static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
497
{
498
	/* Calculate raw DRAM Bandwidth */
499
	fixed20_12 dram_efficiency; /* 0.7 */
500
	fixed20_12 yclk, dram_channels, bandwidth;
501
	fixed20_12 a;
502
 
503
	a.full = dfixed_const(1000);
504
	yclk.full = dfixed_const(wm->yclk);
505
	yclk.full = dfixed_div(yclk, a);
506
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
507
	a.full = dfixed_const(10);
508
	dram_efficiency.full = dfixed_const(7);
509
	dram_efficiency.full = dfixed_div(dram_efficiency, a);
510
	bandwidth.full = dfixed_mul(dram_channels, yclk);
511
	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
512
 
513
	return dfixed_trunc(bandwidth);
514
}
515
 
516
static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
517
{
518
	/* Calculate DRAM Bandwidth and the part allocated to display. */
519
	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
520
	fixed20_12 yclk, dram_channels, bandwidth;
521
	fixed20_12 a;
522
 
523
	a.full = dfixed_const(1000);
524
	yclk.full = dfixed_const(wm->yclk);
525
	yclk.full = dfixed_div(yclk, a);
526
	dram_channels.full = dfixed_const(wm->dram_channels * 4);
527
	a.full = dfixed_const(10);
528
	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
529
	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
530
	bandwidth.full = dfixed_mul(dram_channels, yclk);
531
	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
532
 
533
	return dfixed_trunc(bandwidth);
534
}
535
 
536
static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
537
{
538
	/* Calculate the display Data return Bandwidth */
539
	fixed20_12 return_efficiency; /* 0.8 */
540
	fixed20_12 sclk, bandwidth;
541
	fixed20_12 a;
542
 
543
	a.full = dfixed_const(1000);
544
	sclk.full = dfixed_const(wm->sclk);
545
	sclk.full = dfixed_div(sclk, a);
546
	a.full = dfixed_const(10);
547
	return_efficiency.full = dfixed_const(8);
548
	return_efficiency.full = dfixed_div(return_efficiency, a);
549
	a.full = dfixed_const(32);
550
	bandwidth.full = dfixed_mul(a, sclk);
551
	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
552
 
553
	return dfixed_trunc(bandwidth);
554
}
555
 
556
static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
557
{
558
	return 32;
559
}
560
 
561
static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
562
{
563
	/* Calculate the DMIF Request Bandwidth */
564
	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
565
	fixed20_12 disp_clk, sclk, bandwidth;
566
	fixed20_12 a, b1, b2;
567
	u32 min_bandwidth;
568
 
569
	a.full = dfixed_const(1000);
570
	disp_clk.full = dfixed_const(wm->disp_clk);
571
	disp_clk.full = dfixed_div(disp_clk, a);
572
	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
573
	b1.full = dfixed_mul(a, disp_clk);
574
 
575
	a.full = dfixed_const(1000);
576
	sclk.full = dfixed_const(wm->sclk);
577
	sclk.full = dfixed_div(sclk, a);
578
	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
579
	b2.full = dfixed_mul(a, sclk);
580
 
581
	a.full = dfixed_const(10);
582
	disp_clk_request_efficiency.full = dfixed_const(8);
583
	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
584
 
585
	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
586
 
587
	a.full = dfixed_const(min_bandwidth);
588
	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
589
 
590
	return dfixed_trunc(bandwidth);
591
}
592
 
593
static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
594
{
595
	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
596
	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
597
	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
598
	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
599
 
600
	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
601
}
602
 
603
static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
604
{
605
	/* Calculate the display mode Average Bandwidth
606
	 * DisplayMode should contain the source and destination dimensions,
607
	 * timing, etc.
608
	 */
609
	fixed20_12 bpp;
610
	fixed20_12 line_time;
611
	fixed20_12 src_width;
612
	fixed20_12 bandwidth;
613
	fixed20_12 a;
614
 
615
	a.full = dfixed_const(1000);
616
	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
617
	line_time.full = dfixed_div(line_time, a);
618
	bpp.full = dfixed_const(wm->bytes_per_pixel);
619
	src_width.full = dfixed_const(wm->src_width);
620
	bandwidth.full = dfixed_mul(src_width, bpp);
621
	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
622
	bandwidth.full = dfixed_div(bandwidth, line_time);
623
 
624
	return dfixed_trunc(bandwidth);
625
}
626
 
627
static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
628
{
629
	/* First calcualte the latency in ns */
630
	u32 mc_latency = 2000; /* 2000 ns. */
631
	u32 available_bandwidth = dce6_available_bandwidth(wm);
632
	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
633
	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
634
	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
635
	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
636
		(wm->num_heads * cursor_line_pair_return_time);
637
	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
638
	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
639
	u32 tmp, dmif_size = 12288;
640
	fixed20_12 a, b, c;
641
 
642
	if (wm->num_heads == 0)
643
		return 0;
644
 
645
	a.full = dfixed_const(2);
646
	b.full = dfixed_const(1);
647
	if ((wm->vsc.full > a.full) ||
648
	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
649
	    (wm->vtaps >= 5) ||
650
	    ((wm->vsc.full >= a.full) && wm->interlaced))
651
		max_src_lines_per_dst_line = 4;
652
	else
653
		max_src_lines_per_dst_line = 2;
654
 
655
	a.full = dfixed_const(available_bandwidth);
656
	b.full = dfixed_const(wm->num_heads);
657
	a.full = dfixed_div(a, b);
658
 
659
	b.full = dfixed_const(mc_latency + 512);
660
	c.full = dfixed_const(wm->disp_clk);
661
	b.full = dfixed_div(b, c);
662
 
663
	c.full = dfixed_const(dmif_size);
664
	b.full = dfixed_div(c, b);
665
 
666
	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
667
 
668
	b.full = dfixed_const(1000);
669
	c.full = dfixed_const(wm->disp_clk);
670
	b.full = dfixed_div(c, b);
671
	c.full = dfixed_const(wm->bytes_per_pixel);
672
	b.full = dfixed_mul(b, c);
673
 
674
	lb_fill_bw = min(tmp, dfixed_trunc(b));
675
 
676
	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
677
	b.full = dfixed_const(1000);
678
	c.full = dfixed_const(lb_fill_bw);
679
	b.full = dfixed_div(c, b);
680
	a.full = dfixed_div(a, b);
681
	line_fill_time = dfixed_trunc(a);
682
 
683
	if (line_fill_time < wm->active_time)
684
		return latency;
685
	else
686
		return latency + (line_fill_time - wm->active_time);
687
 
688
}
689
 
690
static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
691
{
692
	if (dce6_average_bandwidth(wm) <=
693
	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
694
		return true;
695
	else
696
		return false;
697
};
698
 
699
static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
700
{
701
	if (dce6_average_bandwidth(wm) <=
702
	    (dce6_available_bandwidth(wm) / wm->num_heads))
703
		return true;
704
	else
705
		return false;
706
};
707
 
708
static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
709
{
710
	u32 lb_partitions = wm->lb_size / wm->src_width;
711
	u32 line_time = wm->active_time + wm->blank_time;
712
	u32 latency_tolerant_lines;
713
	u32 latency_hiding;
714
	fixed20_12 a;
715
 
716
	a.full = dfixed_const(1);
717
	if (wm->vsc.full > a.full)
718
		latency_tolerant_lines = 1;
719
	else {
720
		if (lb_partitions <= (wm->vtaps + 1))
721
			latency_tolerant_lines = 1;
722
		else
723
			latency_tolerant_lines = 2;
724
	}
725
 
726
	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
727
 
728
	if (dce6_latency_watermark(wm) <= latency_hiding)
729
		return true;
730
	else
731
		return false;
732
}
733
 
734
static void dce6_program_watermarks(struct radeon_device *rdev,
735
					 struct radeon_crtc *radeon_crtc,
736
					 u32 lb_size, u32 num_heads)
737
{
738
	struct drm_display_mode *mode = &radeon_crtc->base.mode;
739
	struct dce6_wm_params wm;
740
	u32 pixel_period;
741
	u32 line_time = 0;
742
	u32 latency_watermark_a = 0, latency_watermark_b = 0;
743
	u32 priority_a_mark = 0, priority_b_mark = 0;
744
	u32 priority_a_cnt = PRIORITY_OFF;
745
	u32 priority_b_cnt = PRIORITY_OFF;
746
	u32 tmp, arb_control3;
747
	fixed20_12 a, b, c;
748
 
749
	if (radeon_crtc->base.enabled && num_heads && mode) {
750
		pixel_period = 1000000 / (u32)mode->clock;
751
		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
752
		priority_a_cnt = 0;
753
		priority_b_cnt = 0;
754
 
755
		wm.yclk = rdev->pm.current_mclk * 10;
756
		wm.sclk = rdev->pm.current_sclk * 10;
757
		wm.disp_clk = mode->clock;
758
		wm.src_width = mode->crtc_hdisplay;
759
		wm.active_time = mode->crtc_hdisplay * pixel_period;
760
		wm.blank_time = line_time - wm.active_time;
761
		wm.interlaced = false;
762
		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
763
			wm.interlaced = true;
764
		wm.vsc = radeon_crtc->vsc;
765
		wm.vtaps = 1;
766
		if (radeon_crtc->rmx_type != RMX_OFF)
767
			wm.vtaps = 2;
768
		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
769
		wm.lb_size = lb_size;
770
		if (rdev->family == CHIP_ARUBA)
771
			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
772
		else
773
			wm.dram_channels = si_get_number_of_dram_channels(rdev);
774
		wm.num_heads = num_heads;
775
 
776
		/* set for high clocks */
777
		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
778
		/* set for low clocks */
779
		/* wm.yclk = low clk; wm.sclk = low clk */
780
		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
781
 
782
		/* possibly force display priority to high */
783
		/* should really do this at mode validation time... */
784
		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
785
		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
786
		    !dce6_check_latency_hiding(&wm) ||
787
		    (rdev->disp_priority == 2)) {
788
			DRM_DEBUG_KMS("force priority to high\n");
789
			priority_a_cnt |= PRIORITY_ALWAYS_ON;
790
			priority_b_cnt |= PRIORITY_ALWAYS_ON;
791
		}
792
 
793
		a.full = dfixed_const(1000);
794
		b.full = dfixed_const(mode->clock);
795
		b.full = dfixed_div(b, a);
796
		c.full = dfixed_const(latency_watermark_a);
797
		c.full = dfixed_mul(c, b);
798
		c.full = dfixed_mul(c, radeon_crtc->hsc);
799
		c.full = dfixed_div(c, a);
800
		a.full = dfixed_const(16);
801
		c.full = dfixed_div(c, a);
802
		priority_a_mark = dfixed_trunc(c);
803
		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
804
 
805
		a.full = dfixed_const(1000);
806
		b.full = dfixed_const(mode->clock);
807
		b.full = dfixed_div(b, a);
808
		c.full = dfixed_const(latency_watermark_b);
809
		c.full = dfixed_mul(c, b);
810
		c.full = dfixed_mul(c, radeon_crtc->hsc);
811
		c.full = dfixed_div(c, a);
812
		a.full = dfixed_const(16);
813
		c.full = dfixed_div(c, a);
814
		priority_b_mark = dfixed_trunc(c);
815
		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
816
	}
817
 
818
	/* select wm A */
819
	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
820
	tmp = arb_control3;
821
	tmp &= ~LATENCY_WATERMARK_MASK(3);
822
	tmp |= LATENCY_WATERMARK_MASK(1);
823
	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
824
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
825
	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
826
		LATENCY_HIGH_WATERMARK(line_time)));
827
	/* select wm B */
828
	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
829
	tmp &= ~LATENCY_WATERMARK_MASK(3);
830
	tmp |= LATENCY_WATERMARK_MASK(2);
831
	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
832
	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
833
	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
834
		LATENCY_HIGH_WATERMARK(line_time)));
835
	/* restore original selection */
836
	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
837
 
838
	/* write the priority marks */
839
	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
840
	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
841
 
842
}
843
 
844
void dce6_bandwidth_update(struct radeon_device *rdev)
845
{
846
	struct drm_display_mode *mode0 = NULL;
847
	struct drm_display_mode *mode1 = NULL;
848
	u32 num_heads = 0, lb_size;
849
	int i;
850
 
851
	radeon_update_display_priority(rdev);
852
 
853
	for (i = 0; i < rdev->num_crtc; i++) {
854
		if (rdev->mode_info.crtcs[i]->base.enabled)
855
			num_heads++;
856
	}
857
	for (i = 0; i < rdev->num_crtc; i += 2) {
858
		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
859
		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
860
		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
861
		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
862
		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
863
		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
864
	}
865
}
866
 
867
/*
868
 * Core functions
869
 */
870
static void si_tiling_mode_table_init(struct radeon_device *rdev)
871
{
872
	const u32 num_tile_mode_states = 32;
873
	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
874
 
875
	switch (rdev->config.si.mem_row_size_in_kb) {
876
	case 1:
877
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
878
		break;
879
	case 2:
880
	default:
881
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
882
		break;
883
	case 4:
884
		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
885
		break;
886
	}
887
 
888
	if ((rdev->family == CHIP_TAHITI) ||
889
	    (rdev->family == CHIP_PITCAIRN)) {
890
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
891
			switch (reg_offset) {
892
			case 0:  /* non-AA compressed depth or any compressed stencil */
893
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
894
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
895
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
896
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
897
						 NUM_BANKS(ADDR_SURF_16_BANK) |
898
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
899
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
900
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
901
				break;
902
			case 1:  /* 2xAA/4xAA compressed depth only */
903
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
904
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
905
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
906
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
907
						 NUM_BANKS(ADDR_SURF_16_BANK) |
908
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
909
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
910
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
911
				break;
912
			case 2:  /* 8xAA compressed depth only */
913
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
914
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
915
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
916
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
917
						 NUM_BANKS(ADDR_SURF_16_BANK) |
918
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
919
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
920
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
921
				break;
922
			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
923
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
924
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
925
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
926
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
927
						 NUM_BANKS(ADDR_SURF_16_BANK) |
928
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
929
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
930
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
931
				break;
932
			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
933
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
934
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
935
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
936
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
937
						 NUM_BANKS(ADDR_SURF_16_BANK) |
938
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
939
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
940
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
941
				break;
942
			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
943
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
945
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
946
						 TILE_SPLIT(split_equal_to_row_size) |
947
						 NUM_BANKS(ADDR_SURF_16_BANK) |
948
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
949
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
950
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
951
				break;
952
			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
953
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
954
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
955
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
956
						 TILE_SPLIT(split_equal_to_row_size) |
957
						 NUM_BANKS(ADDR_SURF_16_BANK) |
958
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
959
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
960
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
961
				break;
962
			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
963
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
964
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
965
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
966
						 TILE_SPLIT(split_equal_to_row_size) |
967
						 NUM_BANKS(ADDR_SURF_16_BANK) |
968
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
969
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
970
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
971
				break;
972
			case 8:  /* 1D and 1D Array Surfaces */
973
				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
974
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
975
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
976
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
977
						 NUM_BANKS(ADDR_SURF_16_BANK) |
978
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
979
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
980
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
981
				break;
982
			case 9:  /* Displayable maps. */
983
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
984
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
985
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
986
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
987
						 NUM_BANKS(ADDR_SURF_16_BANK) |
988
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
989
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
990
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
991
				break;
992
			case 10:  /* Display 8bpp. */
993
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
994
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
995
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
996
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
997
						 NUM_BANKS(ADDR_SURF_16_BANK) |
998
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
999
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1000
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1001
				break;
1002
			case 11:  /* Display 16bpp. */
1003
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1007
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1008
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1009
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1010
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1011
				break;
1012
			case 12:  /* Display 32bpp. */
1013
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1014
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1015
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1016
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1017
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1018
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1019
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1020
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1021
				break;
1022
			case 13:  /* Thin. */
1023
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1024
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1025
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1026
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1027
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1028
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1029
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1030
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1031
				break;
1032
			case 14:  /* Thin 8 bpp. */
1033
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1034
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1035
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1036
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1037
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1038
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1039
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1040
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1041
				break;
1042
			case 15:  /* Thin 16 bpp. */
1043
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1044
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1045
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1046
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1047
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1048
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1049
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1050
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1051
				break;
1052
			case 16:  /* Thin 32 bpp. */
1053
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1054
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1055
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1056
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1057
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1058
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1060
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1061
				break;
1062
			case 17:  /* Thin 64 bpp. */
1063
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1065
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1066
						 TILE_SPLIT(split_equal_to_row_size) |
1067
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1068
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1070
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1071
				break;
1072
			case 21:  /* 8 bpp PRT. */
1073
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1074
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1075
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1076
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1077
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1078
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1079
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1080
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1081
				break;
1082
			case 22:  /* 16 bpp PRT */
1083
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1085
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1086
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1087
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1088
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1090
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1091
				break;
1092
			case 23:  /* 32 bpp PRT */
1093
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1094
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1095
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1096
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1097
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1098
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1100
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1101
				break;
1102
			case 24:  /* 64 bpp PRT */
1103
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1104
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1105
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1106
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1107
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1108
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1109
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1110
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1111
				break;
1112
			case 25:  /* 128 bpp PRT */
1113
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1114
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1115
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1116
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1117
						 NUM_BANKS(ADDR_SURF_8_BANK) |
1118
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1121
				break;
1122
			default:
1123
				gb_tile_moden = 0;
1124
				break;
1125
			}
1126
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1127
		}
1128
	} else if (rdev->family == CHIP_VERDE) {
1129
		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1130
			switch (reg_offset) {
1131
			case 0:  /* non-AA compressed depth or any compressed stencil */
1132
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1133
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1134
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1135
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1136
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1137
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1138
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1139
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1140
				break;
1141
			case 1:  /* 2xAA/4xAA compressed depth only */
1142
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1143
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1144
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1145
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1146
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1147
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1148
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1149
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1150
				break;
1151
			case 2:  /* 8xAA compressed depth only */
1152
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1154
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1155
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1156
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1157
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1158
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1159
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1160
				break;
1161
			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1162
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1164
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1165
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1166
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1167
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1168
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1169
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1170
				break;
1171
			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1172
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1173
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1174
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1175
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1176
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1177
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1178
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1179
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1180
				break;
1181
			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1182
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1183
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1184
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1185
						 TILE_SPLIT(split_equal_to_row_size) |
1186
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1187
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1188
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1189
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1190
				break;
1191
			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1192
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1193
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1194
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1195
						 TILE_SPLIT(split_equal_to_row_size) |
1196
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1197
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1198
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1199
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1200
				break;
1201
			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1202
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1203
						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1204
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1205
						 TILE_SPLIT(split_equal_to_row_size) |
1206
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1207
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1208
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1209
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1210
				break;
1211
			case 8:  /* 1D and 1D Array Surfaces */
1212
				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1213
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1215
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1216
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1217
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1218
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1219
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1220
				break;
1221
			case 9:  /* Displayable maps. */
1222
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1223
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1224
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1225
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1226
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1227
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1228
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1229
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1230
				break;
1231
			case 10:  /* Display 8bpp. */
1232
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1233
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1234
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1235
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1236
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1237
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1238
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1239
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1240
				break;
1241
			case 11:  /* Display 16bpp. */
1242
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1243
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1244
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1245
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1246
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1247
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1248
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1249
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1250
				break;
1251
			case 12:  /* Display 32bpp. */
1252
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1253
						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1254
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1255
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1256
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1257
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1258
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1259
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1260
				break;
1261
			case 13:  /* Thin. */
1262
				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1263
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1264
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1265
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1266
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1267
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1268
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1269
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1270
				break;
1271
			case 14:  /* Thin 8 bpp. */
1272
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1274
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1276
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1277
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1278
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1279
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1280
				break;
1281
			case 15:  /* Thin 16 bpp. */
1282
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1283
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1284
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1285
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1286
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1287
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1288
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1289
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1290
				break;
1291
			case 16:  /* Thin 32 bpp. */
1292
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1293
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1294
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1295
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1296
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1297
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1298
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1299
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1300
				break;
1301
			case 17:  /* Thin 64 bpp. */
1302
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1303
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1304
						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1305
						 TILE_SPLIT(split_equal_to_row_size) |
1306
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1307
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1308
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1309
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1310
				break;
1311
			case 21:  /* 8 bpp PRT. */
1312
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1314
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1315
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1316
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1317
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1318
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1319
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1320
				break;
1321
			case 22:  /* 16 bpp PRT */
1322
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1323
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1324
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1325
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1326
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1327
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1328
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1329
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1330
				break;
1331
			case 23:  /* 32 bpp PRT */
1332
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1333
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1334
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1335
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1336
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1337
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1338
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1339
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1340
				break;
1341
			case 24:  /* 64 bpp PRT */
1342
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1344
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1345
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1346
						 NUM_BANKS(ADDR_SURF_16_BANK) |
1347
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1348
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1349
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1350
				break;
1351
			case 25:  /* 128 bpp PRT */
1352
				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1353
						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1354
						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1355
						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1356
						 NUM_BANKS(ADDR_SURF_8_BANK) |
1357
						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358
						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1359
						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1360
				break;
1361
			default:
1362
				gb_tile_moden = 0;
1363
				break;
1364
			}
1365
			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1366
		}
1367
	} else
1368
		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1369
}
1370
 
1371
static void si_select_se_sh(struct radeon_device *rdev,
1372
			    u32 se_num, u32 sh_num)
1373
{
1374
	u32 data = INSTANCE_BROADCAST_WRITES;
1375
 
1376
	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1377
		data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1378
	else if (se_num == 0xffffffff)
1379
		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1380
	else if (sh_num == 0xffffffff)
1381
		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1382
	else
1383
		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1384
	WREG32(GRBM_GFX_INDEX, data);
1385
}
1386
 
1387
static u32 si_create_bitmask(u32 bit_width)
1388
{
1389
	u32 i, mask = 0;
1390
 
1391
	for (i = 0; i < bit_width; i++) {
1392
		mask <<= 1;
1393
		mask |= 1;
1394
	}
1395
	return mask;
1396
}
1397
 
1398
static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1399
{
1400
	u32 data, mask;
1401
 
1402
	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1403
	if (data & 1)
1404
		data &= INACTIVE_CUS_MASK;
1405
	else
1406
		data = 0;
1407
	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1408
 
1409
	data >>= INACTIVE_CUS_SHIFT;
1410
 
1411
	mask = si_create_bitmask(cu_per_sh);
1412
 
1413
	return ~data & mask;
1414
}
1415
 
1416
static void si_setup_spi(struct radeon_device *rdev,
1417
			 u32 se_num, u32 sh_per_se,
1418
			 u32 cu_per_sh)
1419
{
1420
	int i, j, k;
1421
	u32 data, mask, active_cu;
1422
 
1423
	for (i = 0; i < se_num; i++) {
1424
		for (j = 0; j < sh_per_se; j++) {
1425
			si_select_se_sh(rdev, i, j);
1426
			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1427
			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1428
 
1429
			mask = 1;
1430
			for (k = 0; k < 16; k++) {
1431
				mask <<= k;
1432
				if (active_cu & mask) {
1433
					data &= ~mask;
1434
					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1435
					break;
1436
				}
1437
			}
1438
		}
1439
	}
1440
	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1441
}
1442
 
1443
static u32 si_get_rb_disabled(struct radeon_device *rdev,
1444
			      u32 max_rb_num, u32 se_num,
1445
			      u32 sh_per_se)
1446
{
1447
	u32 data, mask;
1448
 
1449
	data = RREG32(CC_RB_BACKEND_DISABLE);
1450
	if (data & 1)
1451
		data &= BACKEND_DISABLE_MASK;
1452
	else
1453
		data = 0;
1454
	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1455
 
1456
	data >>= BACKEND_DISABLE_SHIFT;
1457
 
1458
	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1459
 
1460
	return data & mask;
1461
}
1462
 
1463
static void si_setup_rb(struct radeon_device *rdev,
1464
			u32 se_num, u32 sh_per_se,
1465
			u32 max_rb_num)
1466
{
1467
	int i, j;
1468
	u32 data, mask;
1469
	u32 disabled_rbs = 0;
1470
	u32 enabled_rbs = 0;
1471
 
1472
	for (i = 0; i < se_num; i++) {
1473
		for (j = 0; j < sh_per_se; j++) {
1474
			si_select_se_sh(rdev, i, j);
1475
			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1476
			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1477
		}
1478
	}
1479
	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1480
 
1481
	mask = 1;
1482
	for (i = 0; i < max_rb_num; i++) {
1483
		if (!(disabled_rbs & mask))
1484
			enabled_rbs |= mask;
1485
		mask <<= 1;
1486
	}
1487
 
1488
	for (i = 0; i < se_num; i++) {
1489
		si_select_se_sh(rdev, i, 0xffffffff);
1490
		data = 0;
1491
		for (j = 0; j < sh_per_se; j++) {
1492
			switch (enabled_rbs & 3) {
1493
			case 1:
1494
				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1495
				break;
1496
			case 2:
1497
				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1498
				break;
1499
			case 3:
1500
			default:
1501
				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1502
				break;
1503
			}
1504
			enabled_rbs >>= 2;
1505
		}
1506
		WREG32(PA_SC_RASTER_CONFIG, data);
1507
	}
1508
	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1509
}
1510
 
1511
static void si_gpu_init(struct radeon_device *rdev)
1512
{
1513
	u32 gb_addr_config = 0;
1514
	u32 mc_shared_chmap, mc_arb_ramcfg;
1515
	u32 sx_debug_1;
1516
	u32 hdp_host_path_cntl;
1517
	u32 tmp;
1518
	int i, j;
1519
 
1520
	switch (rdev->family) {
1521
	case CHIP_TAHITI:
1522
		rdev->config.si.max_shader_engines = 2;
1523
		rdev->config.si.max_tile_pipes = 12;
1524
		rdev->config.si.max_cu_per_sh = 8;
1525
		rdev->config.si.max_sh_per_se = 2;
1526
		rdev->config.si.max_backends_per_se = 4;
1527
		rdev->config.si.max_texture_channel_caches = 12;
1528
		rdev->config.si.max_gprs = 256;
1529
		rdev->config.si.max_gs_threads = 32;
1530
		rdev->config.si.max_hw_contexts = 8;
1531
 
1532
		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1533
		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1534
		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1535
		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1536
		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1537
		break;
1538
	case CHIP_PITCAIRN:
1539
		rdev->config.si.max_shader_engines = 2;
1540
		rdev->config.si.max_tile_pipes = 8;
1541
		rdev->config.si.max_cu_per_sh = 5;
1542
		rdev->config.si.max_sh_per_se = 2;
1543
		rdev->config.si.max_backends_per_se = 4;
1544
		rdev->config.si.max_texture_channel_caches = 8;
1545
		rdev->config.si.max_gprs = 256;
1546
		rdev->config.si.max_gs_threads = 32;
1547
		rdev->config.si.max_hw_contexts = 8;
1548
 
1549
		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1550
		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1551
		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1552
		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1553
		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1554
		break;
1555
	case CHIP_VERDE:
1556
	default:
1557
		rdev->config.si.max_shader_engines = 1;
1558
		rdev->config.si.max_tile_pipes = 4;
1559
		rdev->config.si.max_cu_per_sh = 2;
1560
		rdev->config.si.max_sh_per_se = 2;
1561
		rdev->config.si.max_backends_per_se = 4;
1562
		rdev->config.si.max_texture_channel_caches = 4;
1563
		rdev->config.si.max_gprs = 256;
1564
		rdev->config.si.max_gs_threads = 32;
1565
		rdev->config.si.max_hw_contexts = 8;
1566
 
1567
		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1568
		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1569
		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1570
		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1571
		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1572
		break;
1573
	}
1574
 
1575
	/* Initialize HDP */
1576
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1577
		WREG32((0x2c14 + j), 0x00000000);
1578
		WREG32((0x2c18 + j), 0x00000000);
1579
		WREG32((0x2c1c + j), 0x00000000);
1580
		WREG32((0x2c20 + j), 0x00000000);
1581
		WREG32((0x2c24 + j), 0x00000000);
1582
	}
1583
 
1584
	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1585
 
1586
	evergreen_fix_pci_max_read_req_size(rdev);
1587
 
1588
	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1589
 
1590
	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1591
	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1592
 
1593
	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1594
	rdev->config.si.mem_max_burst_length_bytes = 256;
1595
	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1596
	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1597
	if (rdev->config.si.mem_row_size_in_kb > 4)
1598
		rdev->config.si.mem_row_size_in_kb = 4;
1599
	/* XXX use MC settings? */
1600
	rdev->config.si.shader_engine_tile_size = 32;
1601
	rdev->config.si.num_gpus = 1;
1602
	rdev->config.si.multi_gpu_tile_size = 64;
1603
 
1604
	/* fix up row size */
1605
	gb_addr_config &= ~ROW_SIZE_MASK;
1606
	switch (rdev->config.si.mem_row_size_in_kb) {
1607
	case 1:
1608
	default:
1609
		gb_addr_config |= ROW_SIZE(0);
1610
		break;
1611
	case 2:
1612
		gb_addr_config |= ROW_SIZE(1);
1613
		break;
1614
	case 4:
1615
		gb_addr_config |= ROW_SIZE(2);
1616
		break;
1617
	}
1618
 
1619
	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1620
	 * not have bank info, so create a custom tiling dword.
1621
	 * bits 3:0   num_pipes
1622
	 * bits 7:4   num_banks
1623
	 * bits 11:8  group_size
1624
	 * bits 15:12 row_size
1625
	 */
1626
	rdev->config.si.tile_config = 0;
1627
	switch (rdev->config.si.num_tile_pipes) {
1628
	case 1:
1629
		rdev->config.si.tile_config |= (0 << 0);
1630
		break;
1631
	case 2:
1632
		rdev->config.si.tile_config |= (1 << 0);
1633
		break;
1634
	case 4:
1635
		rdev->config.si.tile_config |= (2 << 0);
1636
		break;
1637
	case 8:
1638
	default:
1639
		/* XXX what about 12? */
1640
		rdev->config.si.tile_config |= (3 << 0);
1641
		break;
1642
	}
1643
	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1644
	case 0: /* four banks */
1645
		rdev->config.si.tile_config |= 0 << 4;
1646
		break;
1647
	case 1: /* eight banks */
1648
		rdev->config.si.tile_config |= 1 << 4;
1649
		break;
1650
	case 2: /* sixteen banks */
1651
	default:
1652
		rdev->config.si.tile_config |= 2 << 4;
1653
		break;
1654
	}
1655
	rdev->config.si.tile_config |=
1656
		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1657
	rdev->config.si.tile_config |=
1658
		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1659
 
1660
	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1661
	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1662
	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3192 Serge 1663
	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1664
	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2997 Serge 1665
 
1666
	si_tiling_mode_table_init(rdev);
1667
 
1668
	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1669
		    rdev->config.si.max_sh_per_se,
1670
		    rdev->config.si.max_backends_per_se);
1671
 
1672
	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1673
		     rdev->config.si.max_sh_per_se,
1674
		     rdev->config.si.max_cu_per_sh);
1675
 
1676
 
1677
	/* set HW defaults for 3D engine */
1678
	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1679
				     ROQ_IB2_START(0x2b)));
1680
	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1681
 
1682
	sx_debug_1 = RREG32(SX_DEBUG_1);
1683
	WREG32(SX_DEBUG_1, sx_debug_1);
1684
 
1685
	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1686
 
1687
	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1688
				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1689
				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1690
				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1691
 
1692
	WREG32(VGT_NUM_INSTANCES, 1);
1693
 
1694
	WREG32(CP_PERFMON_CNTL, 0);
1695
 
1696
	WREG32(SQ_CONFIG, 0);
1697
 
1698
	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1699
					  FORCE_EOV_MAX_REZ_CNT(255)));
1700
 
1701
	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1702
	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1703
 
1704
	WREG32(VGT_GS_VERTEX_REUSE, 16);
1705
	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1706
 
1707
	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1708
	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1709
	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1710
	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1711
	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1712
	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1713
	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1714
	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1715
 
1716
	tmp = RREG32(HDP_MISC_CNTL);
1717
	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1718
	WREG32(HDP_MISC_CNTL, tmp);
1719
 
1720
	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1721
	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1722
 
1723
	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1724
 
1725
	udelay(50);
1726
}
1727
 
1728
/*
1729
 * GPU scratch registers helpers function.
1730
 */
1731
static void si_scratch_init(struct radeon_device *rdev)
1732
{
1733
	int i;
1734
 
1735
	rdev->scratch.num_reg = 7;
1736
	rdev->scratch.reg_base = SCRATCH_REG0;
1737
	for (i = 0; i < rdev->scratch.num_reg; i++) {
1738
		rdev->scratch.free[i] = true;
1739
		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1740
	}
1741
}
1742
 
1743
void si_fence_ring_emit(struct radeon_device *rdev,
1744
			struct radeon_fence *fence)
1745
{
1746
	struct radeon_ring *ring = &rdev->ring[fence->ring];
1747
	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1748
 
1749
	/* flush read cache over gart */
1750
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1751
	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1752
	radeon_ring_write(ring, 0);
1753
	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1754
	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1755
			  PACKET3_TC_ACTION_ENA |
1756
			  PACKET3_SH_KCACHE_ACTION_ENA |
1757
			  PACKET3_SH_ICACHE_ACTION_ENA);
1758
	radeon_ring_write(ring, 0xFFFFFFFF);
1759
	radeon_ring_write(ring, 0);
1760
	radeon_ring_write(ring, 10); /* poll interval */
1761
	/* EVENT_WRITE_EOP - flush caches, send int */
1762
	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1763
	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
1764
	radeon_ring_write(ring, addr & 0xffffffff);
1765
	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1766
	radeon_ring_write(ring, fence->seq);
1767
	radeon_ring_write(ring, 0);
1768
}
1769
 
1770
/*
1771
 * IB stuff
1772
 */
1773
void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1774
{
1775
	struct radeon_ring *ring = &rdev->ring[ib->ring];
1776
	u32 header;
1777
 
1778
	if (ib->is_const_ib) {
1779
		/* set switch buffer packet before const IB */
1780
		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1781
		radeon_ring_write(ring, 0);
1782
 
1783
		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1784
	} else {
1785
		u32 next_rptr;
1786
		if (ring->rptr_save_reg) {
1787
			next_rptr = ring->wptr + 3 + 4 + 8;
1788
			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1789
			radeon_ring_write(ring, ((ring->rptr_save_reg -
1790
						  PACKET3_SET_CONFIG_REG_START) >> 2));
1791
			radeon_ring_write(ring, next_rptr);
1792
		} else if (rdev->wb.enabled) {
1793
			next_rptr = ring->wptr + 5 + 4 + 8;
1794
			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1795
			radeon_ring_write(ring, (1 << 8));
1796
			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1797
			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1798
			radeon_ring_write(ring, next_rptr);
1799
		}
1800
 
1801
		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1802
	}
1803
 
1804
	radeon_ring_write(ring, header);
1805
	radeon_ring_write(ring,
1806
#ifdef __BIG_ENDIAN
1807
			  (2 << 0) |
1808
#endif
1809
			  (ib->gpu_addr & 0xFFFFFFFC));
1810
	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1811
	radeon_ring_write(ring, ib->length_dw |
1812
			  (ib->vm ? (ib->vm->id << 24) : 0));
1813
 
1814
	if (!ib->is_const_ib) {
1815
		/* flush read cache over gart for this vmid */
1816
		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1817
		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1818
		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1819
		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1820
		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1821
				  PACKET3_TC_ACTION_ENA |
1822
				  PACKET3_SH_KCACHE_ACTION_ENA |
1823
				  PACKET3_SH_ICACHE_ACTION_ENA);
1824
		radeon_ring_write(ring, 0xFFFFFFFF);
1825
		radeon_ring_write(ring, 0);
1826
		radeon_ring_write(ring, 10); /* poll interval */
1827
	}
1828
}
1829
 
1830
/*
1831
 * CP.
1832
 */
1833
static void si_cp_enable(struct radeon_device *rdev, bool enable)
1834
{
1835
	if (enable)
1836
		WREG32(CP_ME_CNTL, 0);
1837
	else {
3192 Serge 1838
		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2997 Serge 1839
		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1840
		WREG32(SCRATCH_UMSK, 0);
3192 Serge 1841
		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1842
		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1843
		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2997 Serge 1844
	}
1845
	udelay(50);
1846
}
1847
 
1848
static int si_cp_load_microcode(struct radeon_device *rdev)
1849
{
1850
	const __be32 *fw_data;
1851
	int i;
1852
 
1853
	if (!rdev->me_fw || !rdev->pfp_fw)
1854
		return -EINVAL;
1855
 
1856
	si_cp_enable(rdev, false);
1857
 
1858
	/* PFP */
1859
	fw_data = (const __be32 *)rdev->pfp_fw->data;
1860
	WREG32(CP_PFP_UCODE_ADDR, 0);
1861
	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1862
		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1863
	WREG32(CP_PFP_UCODE_ADDR, 0);
1864
 
1865
	/* CE */
1866
	fw_data = (const __be32 *)rdev->ce_fw->data;
1867
	WREG32(CP_CE_UCODE_ADDR, 0);
1868
	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1869
		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1870
	WREG32(CP_CE_UCODE_ADDR, 0);
1871
 
1872
	/* ME */
1873
	fw_data = (const __be32 *)rdev->me_fw->data;
1874
	WREG32(CP_ME_RAM_WADDR, 0);
1875
	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1876
		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1877
	WREG32(CP_ME_RAM_WADDR, 0);
1878
 
1879
	WREG32(CP_PFP_UCODE_ADDR, 0);
1880
	WREG32(CP_CE_UCODE_ADDR, 0);
1881
	WREG32(CP_ME_RAM_WADDR, 0);
1882
	WREG32(CP_ME_RAM_RADDR, 0);
1883
	return 0;
1884
}
1885
 
1886
static int si_cp_start(struct radeon_device *rdev)
1887
{
1888
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1889
	int r, i;
1890
 
1891
	r = radeon_ring_lock(rdev, ring, 7 + 4);
1892
	if (r) {
1893
		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1894
		return r;
1895
	}
1896
	/* init the CP */
1897
	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1898
	radeon_ring_write(ring, 0x1);
1899
	radeon_ring_write(ring, 0x0);
1900
	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
1901
	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1902
	radeon_ring_write(ring, 0);
1903
	radeon_ring_write(ring, 0);
1904
 
1905
	/* init the CE partitions */
1906
	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1907
	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1908
	radeon_ring_write(ring, 0xc000);
1909
	radeon_ring_write(ring, 0xe000);
1910
	radeon_ring_unlock_commit(rdev, ring);
1911
 
1912
	si_cp_enable(rdev, true);
1913
 
1914
	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
1915
	if (r) {
1916
		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1917
		return r;
1918
	}
1919
 
1920
	/* setup clear context state */
1921
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1922
	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1923
 
1924
	for (i = 0; i < si_default_size; i++)
1925
		radeon_ring_write(ring, si_default_state[i]);
1926
 
1927
	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1928
	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1929
 
1930
	/* set clear context state */
1931
	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1932
	radeon_ring_write(ring, 0);
1933
 
1934
	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1935
	radeon_ring_write(ring, 0x00000316);
1936
	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1937
	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1938
 
1939
	radeon_ring_unlock_commit(rdev, ring);
1940
 
1941
	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
1942
		ring = &rdev->ring[i];
1943
		r = radeon_ring_lock(rdev, ring, 2);
1944
 
1945
		/* clear the compute context state */
1946
		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
1947
		radeon_ring_write(ring, 0);
1948
 
1949
		radeon_ring_unlock_commit(rdev, ring);
1950
	}
1951
 
1952
	return 0;
1953
}
1954
 
1955
static void si_cp_fini(struct radeon_device *rdev)
1956
{
1957
	struct radeon_ring *ring;
1958
	si_cp_enable(rdev, false);
1959
 
1960
//   ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1961
//   radeon_ring_fini(rdev, ring);
1962
//   radeon_scratch_free(rdev, ring->rptr_save_reg);
1963
 
1964
//   ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
1965
//   radeon_ring_fini(rdev, ring);
1966
//   radeon_scratch_free(rdev, ring->rptr_save_reg);
1967
 
1968
//   ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
1969
//   radeon_ring_fini(rdev, ring);
1970
//   radeon_scratch_free(rdev, ring->rptr_save_reg);
1971
}
1972
 
1973
static int si_cp_resume(struct radeon_device *rdev)
1974
{
1975
	struct radeon_ring *ring;
1976
	u32 tmp;
1977
	u32 rb_bufsz;
1978
	int r;
1979
 
1980
	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1981
	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1982
				 SOFT_RESET_PA |
1983
				 SOFT_RESET_VGT |
1984
				 SOFT_RESET_SPI |
1985
				 SOFT_RESET_SX));
1986
	RREG32(GRBM_SOFT_RESET);
1987
	mdelay(15);
1988
	WREG32(GRBM_SOFT_RESET, 0);
1989
	RREG32(GRBM_SOFT_RESET);
1990
 
1991
	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1992
	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1993
 
1994
	/* Set the write pointer delay */
1995
	WREG32(CP_RB_WPTR_DELAY, 0);
1996
 
1997
	WREG32(CP_DEBUG, 0);
1998
	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1999
 
2000
	/* ring 0 - compute and gfx */
2001
	/* Set ring buffer size */
2002
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2003
	rb_bufsz = drm_order(ring->ring_size / 8);
2004
	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2005
#ifdef __BIG_ENDIAN
2006
	tmp |= BUF_SWAP_32BIT;
2007
#endif
2008
	WREG32(CP_RB0_CNTL, tmp);
2009
 
2010
	/* Initialize the ring buffer's read and write pointers */
2011
	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2012
	ring->wptr = 0;
2013
	WREG32(CP_RB0_WPTR, ring->wptr);
2014
 
3120 serge 2015
	/* set the wb address whether it's enabled or not */
2997 Serge 2016
	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2017
	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2018
 
2019
	if (rdev->wb.enabled)
2020
		WREG32(SCRATCH_UMSK, 0xff);
2021
	else {
2022
		tmp |= RB_NO_UPDATE;
2023
		WREG32(SCRATCH_UMSK, 0);
2024
	}
2025
 
2026
	mdelay(1);
2027
	WREG32(CP_RB0_CNTL, tmp);
2028
 
2029
	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2030
 
2031
	ring->rptr = RREG32(CP_RB0_RPTR);
2032
 
2033
	/* ring1  - compute only */
2034
	/* Set ring buffer size */
2035
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2036
	rb_bufsz = drm_order(ring->ring_size / 8);
2037
	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2038
#ifdef __BIG_ENDIAN
2039
	tmp |= BUF_SWAP_32BIT;
2040
#endif
2041
	WREG32(CP_RB1_CNTL, tmp);
2042
 
2043
	/* Initialize the ring buffer's read and write pointers */
2044
	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2045
	ring->wptr = 0;
2046
	WREG32(CP_RB1_WPTR, ring->wptr);
2047
 
3120 serge 2048
	/* set the wb address whether it's enabled or not */
2997 Serge 2049
	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2050
	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2051
 
2052
	mdelay(1);
2053
	WREG32(CP_RB1_CNTL, tmp);
2054
 
2055
	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2056
 
2057
	ring->rptr = RREG32(CP_RB1_RPTR);
2058
 
2059
	/* ring2 - compute only */
2060
	/* Set ring buffer size */
2061
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2062
	rb_bufsz = drm_order(ring->ring_size / 8);
2063
	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2064
#ifdef __BIG_ENDIAN
2065
	tmp |= BUF_SWAP_32BIT;
2066
#endif
2067
	WREG32(CP_RB2_CNTL, tmp);
2068
 
2069
	/* Initialize the ring buffer's read and write pointers */
2070
	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2071
	ring->wptr = 0;
2072
	WREG32(CP_RB2_WPTR, ring->wptr);
2073
 
3120 serge 2074
	/* set the wb address whether it's enabled or not */
2997 Serge 2075
	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2076
	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2077
 
2078
	mdelay(1);
2079
	WREG32(CP_RB2_CNTL, tmp);
2080
 
2081
	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2082
 
2083
	ring->rptr = RREG32(CP_RB2_RPTR);
2084
 
2085
	/* start the rings */
2086
	si_cp_start(rdev);
2087
	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2088
	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2089
	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2090
	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2091
	if (r) {
2092
		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2093
		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2094
		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2095
		return r;
2096
	}
2097
	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2098
	if (r) {
2099
		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2100
	}
2101
	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2102
	if (r) {
2103
		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2104
	}
2105
 
2106
	return 0;
2107
}
2108
 
2109
bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2110
{
2111
	u32 srbm_status;
2112
	u32 grbm_status, grbm_status2;
2113
	u32 grbm_status_se0, grbm_status_se1;
2114
 
2115
	srbm_status = RREG32(SRBM_STATUS);
2116
	grbm_status = RREG32(GRBM_STATUS);
2117
	grbm_status2 = RREG32(GRBM_STATUS2);
2118
	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2119
	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2120
	if (!(grbm_status & GUI_ACTIVE)) {
2121
		radeon_ring_lockup_update(ring);
2122
		return false;
2123
	}
2124
	/* force CP activities */
2125
	radeon_ring_force_activity(rdev, ring);
2126
	return radeon_ring_test_lockup(rdev, ring);
2127
}
2128
 
3192 Serge 2129
static void si_gpu_soft_reset_gfx(struct radeon_device *rdev)
2997 Serge 2130
{
2131
	u32 grbm_reset = 0;
2132
 
2133
	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
3192 Serge 2134
		return;
2997 Serge 2135
 
2136
	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2137
		RREG32(GRBM_STATUS));
2138
	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2139
		RREG32(GRBM_STATUS2));
2140
	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2141
		RREG32(GRBM_STATUS_SE0));
2142
	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2143
		RREG32(GRBM_STATUS_SE1));
2144
	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2145
		RREG32(SRBM_STATUS));
3192 Serge 2146
 
2997 Serge 2147
	/* Disable CP parsing/prefetching */
2148
	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2149
 
2150
	/* reset all the gfx blocks */
2151
	grbm_reset = (SOFT_RESET_CP |
2152
		      SOFT_RESET_CB |
2153
		      SOFT_RESET_DB |
2154
		      SOFT_RESET_GDS |
2155
		      SOFT_RESET_PA |
2156
		      SOFT_RESET_SC |
2157
		      SOFT_RESET_BCI |
2158
		      SOFT_RESET_SPI |
2159
		      SOFT_RESET_SX |
2160
		      SOFT_RESET_TC |
2161
		      SOFT_RESET_TA |
2162
		      SOFT_RESET_VGT |
2163
		      SOFT_RESET_IA);
2164
 
2165
	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2166
	WREG32(GRBM_SOFT_RESET, grbm_reset);
2167
	(void)RREG32(GRBM_SOFT_RESET);
2168
	udelay(50);
2169
	WREG32(GRBM_SOFT_RESET, 0);
2170
	(void)RREG32(GRBM_SOFT_RESET);
3192 Serge 2171
 
2997 Serge 2172
	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2173
		RREG32(GRBM_STATUS));
2174
	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2175
		RREG32(GRBM_STATUS2));
2176
	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2177
		RREG32(GRBM_STATUS_SE0));
2178
	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2179
		RREG32(GRBM_STATUS_SE1));
2180
	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2181
		RREG32(SRBM_STATUS));
3192 Serge 2182
}
2183
 
2184
static void si_gpu_soft_reset_dma(struct radeon_device *rdev)
2185
{
2186
	u32 tmp;
2187
 
2188
	if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
2189
		return;
2190
 
2191
	dev_info(rdev->dev, "  DMA_STATUS_REG   = 0x%08X\n",
2192
		RREG32(DMA_STATUS_REG));
2193
 
2194
	/* dma0 */
2195
	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
2196
	tmp &= ~DMA_RB_ENABLE;
2197
	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
2198
 
2199
	/* dma1 */
2200
	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
2201
	tmp &= ~DMA_RB_ENABLE;
2202
	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
2203
 
2204
	/* Reset dma */
2205
	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
2206
	RREG32(SRBM_SOFT_RESET);
2207
	udelay(50);
2208
	WREG32(SRBM_SOFT_RESET, 0);
2209
 
2210
	dev_info(rdev->dev, "  DMA_STATUS_REG   = 0x%08X\n",
2211
		RREG32(DMA_STATUS_REG));
2212
}
2213
 
2214
static int si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
2215
{
2216
	struct evergreen_mc_save save;
2217
 
2218
	if (reset_mask == 0)
2219
		return 0;
2220
 
2221
	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2222
 
2223
	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
2224
		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2225
	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2226
		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2227
 
2228
	evergreen_mc_stop(rdev, &save);
2229
	if (radeon_mc_wait_for_idle(rdev)) {
2230
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2231
	}
2232
 
2233
	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
2234
		si_gpu_soft_reset_gfx(rdev);
2235
 
2236
	if (reset_mask & RADEON_RESET_DMA)
2237
		si_gpu_soft_reset_dma(rdev);
2238
 
2239
	/* Wait a little for things to settle down */
2240
	udelay(50);
2241
 
2997 Serge 2242
	evergreen_mc_resume(rdev, &save);
2243
	return 0;
2244
}
2245
 
2246
int si_asic_reset(struct radeon_device *rdev)
2247
{
3192 Serge 2248
	return si_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
2249
					RADEON_RESET_COMPUTE |
2250
					RADEON_RESET_DMA));
2997 Serge 2251
}
2252
 
2253
/* MC */
2254
static void si_mc_program(struct radeon_device *rdev)
2255
{
2256
	struct evergreen_mc_save save;
2257
	u32 tmp;
2258
	int i, j;
2259
 
2260
	/* Initialize HDP */
2261
	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2262
		WREG32((0x2c14 + j), 0x00000000);
2263
		WREG32((0x2c18 + j), 0x00000000);
2264
		WREG32((0x2c1c + j), 0x00000000);
2265
		WREG32((0x2c20 + j), 0x00000000);
2266
		WREG32((0x2c24 + j), 0x00000000);
2267
	}
2268
	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2269
 
2270
	evergreen_mc_stop(rdev, &save);
2271
	if (radeon_mc_wait_for_idle(rdev)) {
2272
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2273
	}
2274
	/* Lockout access through VGA aperture*/
2275
	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2276
	/* Update configuration */
2277
	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2278
	       rdev->mc.vram_start >> 12);
2279
	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2280
	       rdev->mc.vram_end >> 12);
2281
	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2282
	       rdev->vram_scratch.gpu_addr >> 12);
2283
	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2284
	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2285
	WREG32(MC_VM_FB_LOCATION, tmp);
2286
	/* XXX double check these! */
2287
	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2288
	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2289
	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2290
	WREG32(MC_VM_AGP_BASE, 0);
2291
	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2292
	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2293
	if (radeon_mc_wait_for_idle(rdev)) {
2294
		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2295
	}
2296
	evergreen_mc_resume(rdev, &save);
2297
	/* we need to own VRAM, so turn off the VGA renderer here
2298
	 * to stop it overwriting our objects */
2299
	rv515_vga_render_disable(rdev);
2300
}
2301
 
2302
/* SI MC address space is 40 bits */
2303
static void si_vram_location(struct radeon_device *rdev,
2304
			     struct radeon_mc *mc, u64 base)
2305
{
2306
	mc->vram_start = base;
2307
	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2308
		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2309
		mc->real_vram_size = mc->aper_size;
2310
		mc->mc_vram_size = mc->aper_size;
2311
	}
2312
	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2313
	dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
2314
			mc->mc_vram_size >> 20, mc->vram_start,
2315
			mc->vram_end, mc->real_vram_size >> 20);
2316
}
2317
 
2318
static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2319
{
2320
	u64 size_af, size_bf;
2321
 
2322
	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2323
	size_bf = mc->vram_start & ~mc->gtt_base_align;
2324
	if (size_bf > size_af) {
2325
		if (mc->gtt_size > size_bf) {
2326
			dev_warn(rdev->dev, "limiting GTT\n");
2327
			mc->gtt_size = size_bf;
2328
		}
2329
		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2330
	} else {
2331
		if (mc->gtt_size > size_af) {
2332
			dev_warn(rdev->dev, "limiting GTT\n");
2333
			mc->gtt_size = size_af;
2334
		}
2335
		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2336
	}
2337
	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2338
	dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
2339
			mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
2340
}
2341
 
2342
static void si_vram_gtt_location(struct radeon_device *rdev,
2343
				 struct radeon_mc *mc)
2344
{
2345
	if (mc->mc_vram_size > 0xFFC0000000ULL) {
2346
		/* leave room for at least 1024M GTT */
2347
		dev_warn(rdev->dev, "limiting VRAM\n");
2348
		mc->real_vram_size = 0xFFC0000000ULL;
2349
		mc->mc_vram_size = 0xFFC0000000ULL;
2350
	}
2351
	si_vram_location(rdev, &rdev->mc, 0);
2352
	rdev->mc.gtt_base_align = 0;
2353
	si_gtt_location(rdev, mc);
2354
}
2355
 
2356
static int si_mc_init(struct radeon_device *rdev)
2357
{
2358
	u32 tmp;
2359
	int chansize, numchan;
2360
 
2361
	/* Get VRAM informations */
2362
	rdev->mc.vram_is_ddr = true;
2363
	tmp = RREG32(MC_ARB_RAMCFG);
2364
	if (tmp & CHANSIZE_OVERRIDE) {
2365
		chansize = 16;
2366
	} else if (tmp & CHANSIZE_MASK) {
2367
		chansize = 64;
2368
	} else {
2369
		chansize = 32;
2370
	}
2371
	tmp = RREG32(MC_SHARED_CHMAP);
2372
	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2373
	case 0:
2374
	default:
2375
		numchan = 1;
2376
		break;
2377
	case 1:
2378
		numchan = 2;
2379
		break;
2380
	case 2:
2381
		numchan = 4;
2382
		break;
2383
	case 3:
2384
		numchan = 8;
2385
		break;
2386
	case 4:
2387
		numchan = 3;
2388
		break;
2389
	case 5:
2390
		numchan = 6;
2391
		break;
2392
	case 6:
2393
		numchan = 10;
2394
		break;
2395
	case 7:
2396
		numchan = 12;
2397
		break;
2398
	case 8:
2399
		numchan = 16;
2400
		break;
2401
	}
2402
	rdev->mc.vram_width = numchan * chansize;
2403
	/* Could aper size report 0 ? */
2404
	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2405
	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2406
	/* size in MB on si */
2407
	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2408
	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2409
	rdev->mc.visible_vram_size = rdev->mc.aper_size;
2410
	si_vram_gtt_location(rdev, &rdev->mc);
2411
	radeon_update_bandwidth_info(rdev);
2412
 
2413
	return 0;
2414
}
2415
 
2416
/*
2417
 * GART
2418
 */
2419
void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
2420
{
2421
	/* flush hdp cache */
2422
	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2423
 
2424
	/* bits 0-15 are the VM contexts0-15 */
2425
	WREG32(VM_INVALIDATE_REQUEST, 1);
2426
}
2427
 
2428
static int si_pcie_gart_enable(struct radeon_device *rdev)
2429
{
2430
	int r, i;
2431
 
2432
	if (rdev->gart.robj == NULL) {
2433
		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2434
		return -EINVAL;
2435
	}
2436
	r = radeon_gart_table_vram_pin(rdev);
2437
	if (r)
2438
		return r;
2439
	radeon_gart_restore(rdev);
2440
	/* Setup TLB control */
2441
	WREG32(MC_VM_MX_L1_TLB_CNTL,
2442
	       (0xA << 7) |
2443
	       ENABLE_L1_TLB |
2444
	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2445
	       ENABLE_ADVANCED_DRIVER_MODEL |
2446
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2447
	/* Setup L2 cache */
2448
	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2449
	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2450
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2451
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2452
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2453
	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2454
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2455
	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2456
	/* setup context0 */
2457
	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2458
	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2459
	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2460
	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2461
			(u32)(rdev->dummy_page.addr >> 12));
2462
	WREG32(VM_CONTEXT0_CNTL2, 0);
2463
	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2464
				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2465
 
2466
	WREG32(0x15D4, 0);
2467
	WREG32(0x15D8, 0);
2468
	WREG32(0x15DC, 0);
2469
 
2470
	/* empty context1-15 */
2471
	/* set vm size, must be a multiple of 4 */
2472
	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2473
	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2474
	/* Assign the pt base to something valid for now; the pts used for
2475
	 * the VMs are determined by the application and setup and assigned
2476
	 * on the fly in the vm part of radeon_gart.c
2477
	 */
2478
	for (i = 1; i < 16; i++) {
2479
		if (i < 8)
2480
			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2481
			       rdev->gart.table_addr >> 12);
2482
		else
2483
			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2484
			       rdev->gart.table_addr >> 12);
2485
	}
2486
 
2487
	/* enable context1-15 */
2488
	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2489
	       (u32)(rdev->dummy_page.addr >> 12));
3192 Serge 2490
	WREG32(VM_CONTEXT1_CNTL2, 4);
2997 Serge 2491
	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3192 Serge 2492
				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2493
				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2494
				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2495
				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2496
				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2497
				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2498
				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2499
				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2500
				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2501
				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2502
				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2503
				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
2997 Serge 2504
 
2505
	si_pcie_gart_tlb_flush(rdev);
2506
	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2507
		 (unsigned)(rdev->mc.gtt_size >> 20),
2508
		 (unsigned long long)rdev->gart.table_addr);
2509
	rdev->gart.ready = true;
2510
	return 0;
2511
}
2512
 
2513
static void si_pcie_gart_disable(struct radeon_device *rdev)
2514
{
2515
	/* Disable all tables */
2516
	WREG32(VM_CONTEXT0_CNTL, 0);
2517
	WREG32(VM_CONTEXT1_CNTL, 0);
2518
	/* Setup TLB control */
2519
	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2520
	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2521
	/* Setup L2 cache */
2522
	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2523
	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2524
	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2525
	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2526
	WREG32(VM_L2_CNTL2, 0);
2527
	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2528
	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2529
	radeon_gart_table_vram_unpin(rdev);
2530
}
2531
 
2532
static void si_pcie_gart_fini(struct radeon_device *rdev)
2533
{
2534
	si_pcie_gart_disable(rdev);
2535
	radeon_gart_table_vram_free(rdev);
2536
//   radeon_gart_fini(rdev);
2537
}
2538
 
2539
/* vm parser */
2540
static bool si_vm_reg_valid(u32 reg)
2541
{
2542
	/* context regs are fine */
2543
	if (reg >= 0x28000)
2544
		return true;
2545
 
2546
	/* check config regs */
2547
	switch (reg) {
2548
	case GRBM_GFX_INDEX:
3031 serge 2549
	case CP_STRMOUT_CNTL:
2997 Serge 2550
	case VGT_VTX_VECT_EJECT_REG:
2551
	case VGT_CACHE_INVALIDATION:
2552
	case VGT_ESGS_RING_SIZE:
2553
	case VGT_GSVS_RING_SIZE:
2554
	case VGT_GS_VERTEX_REUSE:
2555
	case VGT_PRIMITIVE_TYPE:
2556
	case VGT_INDEX_TYPE:
2557
	case VGT_NUM_INDICES:
2558
	case VGT_NUM_INSTANCES:
2559
	case VGT_TF_RING_SIZE:
2560
	case VGT_HS_OFFCHIP_PARAM:
2561
	case VGT_TF_MEMORY_BASE:
2562
	case PA_CL_ENHANCE:
2563
	case PA_SU_LINE_STIPPLE_VALUE:
2564
	case PA_SC_LINE_STIPPLE_STATE:
2565
	case PA_SC_ENHANCE:
2566
	case SQC_CACHES:
2567
	case SPI_STATIC_THREAD_MGMT_1:
2568
	case SPI_STATIC_THREAD_MGMT_2:
2569
	case SPI_STATIC_THREAD_MGMT_3:
2570
	case SPI_PS_MAX_WAVE_ID:
2571
	case SPI_CONFIG_CNTL:
2572
	case SPI_CONFIG_CNTL_1:
2573
	case TA_CNTL_AUX:
2574
		return true;
2575
	default:
2576
		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2577
		return false;
2578
	}
2579
}
2580
 
2581
static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2582
				  u32 *ib, struct radeon_cs_packet *pkt)
2583
{
2584
	switch (pkt->opcode) {
2585
	case PACKET3_NOP:
2586
	case PACKET3_SET_BASE:
2587
	case PACKET3_SET_CE_DE_COUNTERS:
2588
	case PACKET3_LOAD_CONST_RAM:
2589
	case PACKET3_WRITE_CONST_RAM:
2590
	case PACKET3_WRITE_CONST_RAM_OFFSET:
2591
	case PACKET3_DUMP_CONST_RAM:
2592
	case PACKET3_INCREMENT_CE_COUNTER:
2593
	case PACKET3_WAIT_ON_DE_COUNTER:
2594
	case PACKET3_CE_WRITE:
2595
		break;
2596
	default:
2597
		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2598
		return -EINVAL;
2599
	}
2600
	return 0;
2601
}
2602
 
2603
static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2604
				   u32 *ib, struct radeon_cs_packet *pkt)
2605
{
2606
	u32 idx = pkt->idx + 1;
2607
	u32 idx_value = ib[idx];
2608
	u32 start_reg, end_reg, reg, i;
3192 Serge 2609
	u32 command, info;
2997 Serge 2610
 
2611
	switch (pkt->opcode) {
2612
	case PACKET3_NOP:
2613
	case PACKET3_SET_BASE:
2614
	case PACKET3_CLEAR_STATE:
2615
	case PACKET3_INDEX_BUFFER_SIZE:
2616
	case PACKET3_DISPATCH_DIRECT:
2617
	case PACKET3_DISPATCH_INDIRECT:
2618
	case PACKET3_ALLOC_GDS:
2619
	case PACKET3_WRITE_GDS_RAM:
2620
	case PACKET3_ATOMIC_GDS:
2621
	case PACKET3_ATOMIC:
2622
	case PACKET3_OCCLUSION_QUERY:
2623
	case PACKET3_SET_PREDICATION:
2624
	case PACKET3_COND_EXEC:
2625
	case PACKET3_PRED_EXEC:
2626
	case PACKET3_DRAW_INDIRECT:
2627
	case PACKET3_DRAW_INDEX_INDIRECT:
2628
	case PACKET3_INDEX_BASE:
2629
	case PACKET3_DRAW_INDEX_2:
2630
	case PACKET3_CONTEXT_CONTROL:
2631
	case PACKET3_INDEX_TYPE:
2632
	case PACKET3_DRAW_INDIRECT_MULTI:
2633
	case PACKET3_DRAW_INDEX_AUTO:
2634
	case PACKET3_DRAW_INDEX_IMMD:
2635
	case PACKET3_NUM_INSTANCES:
2636
	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2637
	case PACKET3_STRMOUT_BUFFER_UPDATE:
2638
	case PACKET3_DRAW_INDEX_OFFSET_2:
2639
	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2640
	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2641
	case PACKET3_MPEG_INDEX:
2642
	case PACKET3_WAIT_REG_MEM:
2643
	case PACKET3_MEM_WRITE:
2644
	case PACKET3_PFP_SYNC_ME:
2645
	case PACKET3_SURFACE_SYNC:
2646
	case PACKET3_EVENT_WRITE:
2647
	case PACKET3_EVENT_WRITE_EOP:
2648
	case PACKET3_EVENT_WRITE_EOS:
2649
	case PACKET3_SET_CONTEXT_REG:
2650
	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2651
	case PACKET3_SET_SH_REG:
2652
	case PACKET3_SET_SH_REG_OFFSET:
2653
	case PACKET3_INCREMENT_DE_COUNTER:
2654
	case PACKET3_WAIT_ON_CE_COUNTER:
2655
	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2656
	case PACKET3_ME_WRITE:
2657
		break;
2658
	case PACKET3_COPY_DATA:
2659
		if ((idx_value & 0xf00) == 0) {
2660
			reg = ib[idx + 3] * 4;
2661
			if (!si_vm_reg_valid(reg))
2662
				return -EINVAL;
2663
		}
2664
		break;
2665
	case PACKET3_WRITE_DATA:
2666
		if ((idx_value & 0xf00) == 0) {
2667
			start_reg = ib[idx + 1] * 4;
2668
			if (idx_value & 0x10000) {
2669
				if (!si_vm_reg_valid(start_reg))
2670
					return -EINVAL;
2671
			} else {
2672
				for (i = 0; i < (pkt->count - 2); i++) {
2673
					reg = start_reg + (4 * i);
2674
					if (!si_vm_reg_valid(reg))
2675
						return -EINVAL;
2676
				}
2677
			}
2678
		}
2679
		break;
2680
	case PACKET3_COND_WRITE:
2681
		if (idx_value & 0x100) {
2682
			reg = ib[idx + 5] * 4;
2683
			if (!si_vm_reg_valid(reg))
2684
				return -EINVAL;
2685
		}
2686
		break;
2687
	case PACKET3_COPY_DW:
2688
		if (idx_value & 0x2) {
2689
			reg = ib[idx + 3] * 4;
2690
			if (!si_vm_reg_valid(reg))
2691
				return -EINVAL;
2692
		}
2693
		break;
2694
	case PACKET3_SET_CONFIG_REG:
2695
		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2696
		end_reg = 4 * pkt->count + start_reg - 4;
2697
		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2698
		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2699
		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2700
			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2701
			return -EINVAL;
2702
		}
2703
		for (i = 0; i < pkt->count; i++) {
2704
			reg = start_reg + (4 * i);
2705
			if (!si_vm_reg_valid(reg))
2706
				return -EINVAL;
2707
		}
2708
		break;
3192 Serge 2709
	case PACKET3_CP_DMA:
2710
		command = ib[idx + 4];
2711
		info = ib[idx + 1];
2712
		if (command & PACKET3_CP_DMA_CMD_SAS) {
2713
			/* src address space is register */
2714
			if (((info & 0x60000000) >> 29) == 0) {
2715
				start_reg = idx_value << 2;
2716
				if (command & PACKET3_CP_DMA_CMD_SAIC) {
2717
					reg = start_reg;
2718
					if (!si_vm_reg_valid(reg)) {
2719
						DRM_ERROR("CP DMA Bad SRC register\n");
2720
						return -EINVAL;
2721
					}
2722
				} else {
2723
					for (i = 0; i < (command & 0x1fffff); i++) {
2724
						reg = start_reg + (4 * i);
2725
						if (!si_vm_reg_valid(reg)) {
2726
							DRM_ERROR("CP DMA Bad SRC register\n");
2727
							return -EINVAL;
2728
						}
2729
					}
2730
				}
2731
			}
2732
		}
2733
		if (command & PACKET3_CP_DMA_CMD_DAS) {
2734
			/* dst address space is register */
2735
			if (((info & 0x00300000) >> 20) == 0) {
2736
				start_reg = ib[idx + 2];
2737
				if (command & PACKET3_CP_DMA_CMD_DAIC) {
2738
					reg = start_reg;
2739
					if (!si_vm_reg_valid(reg)) {
2740
						DRM_ERROR("CP DMA Bad DST register\n");
2741
						return -EINVAL;
2742
					}
2743
				} else {
2744
					for (i = 0; i < (command & 0x1fffff); i++) {
2745
						reg = start_reg + (4 * i);
2746
						if (!si_vm_reg_valid(reg)) {
2747
							DRM_ERROR("CP DMA Bad DST register\n");
2748
							return -EINVAL;
2749
						}
2750
					}
2751
				}
2752
			}
2753
		}
2754
		break;
2997 Serge 2755
	default:
2756
		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2757
		return -EINVAL;
2758
	}
2759
	return 0;
2760
}
2761
 
2762
static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2763
				       u32 *ib, struct radeon_cs_packet *pkt)
2764
{
2765
	u32 idx = pkt->idx + 1;
2766
	u32 idx_value = ib[idx];
2767
	u32 start_reg, reg, i;
2768
 
2769
	switch (pkt->opcode) {
2770
	case PACKET3_NOP:
2771
	case PACKET3_SET_BASE:
2772
	case PACKET3_CLEAR_STATE:
2773
	case PACKET3_DISPATCH_DIRECT:
2774
	case PACKET3_DISPATCH_INDIRECT:
2775
	case PACKET3_ALLOC_GDS:
2776
	case PACKET3_WRITE_GDS_RAM:
2777
	case PACKET3_ATOMIC_GDS:
2778
	case PACKET3_ATOMIC:
2779
	case PACKET3_OCCLUSION_QUERY:
2780
	case PACKET3_SET_PREDICATION:
2781
	case PACKET3_COND_EXEC:
2782
	case PACKET3_PRED_EXEC:
2783
	case PACKET3_CONTEXT_CONTROL:
2784
	case PACKET3_STRMOUT_BUFFER_UPDATE:
2785
	case PACKET3_WAIT_REG_MEM:
2786
	case PACKET3_MEM_WRITE:
2787
	case PACKET3_PFP_SYNC_ME:
2788
	case PACKET3_SURFACE_SYNC:
2789
	case PACKET3_EVENT_WRITE:
2790
	case PACKET3_EVENT_WRITE_EOP:
2791
	case PACKET3_EVENT_WRITE_EOS:
2792
	case PACKET3_SET_CONTEXT_REG:
2793
	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2794
	case PACKET3_SET_SH_REG:
2795
	case PACKET3_SET_SH_REG_OFFSET:
2796
	case PACKET3_INCREMENT_DE_COUNTER:
2797
	case PACKET3_WAIT_ON_CE_COUNTER:
2798
	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2799
	case PACKET3_ME_WRITE:
2800
		break;
2801
	case PACKET3_COPY_DATA:
2802
		if ((idx_value & 0xf00) == 0) {
2803
			reg = ib[idx + 3] * 4;
2804
			if (!si_vm_reg_valid(reg))
2805
				return -EINVAL;
2806
		}
2807
		break;
2808
	case PACKET3_WRITE_DATA:
2809
		if ((idx_value & 0xf00) == 0) {
2810
			start_reg = ib[idx + 1] * 4;
2811
			if (idx_value & 0x10000) {
2812
				if (!si_vm_reg_valid(start_reg))
2813
					return -EINVAL;
2814
			} else {
2815
				for (i = 0; i < (pkt->count - 2); i++) {
2816
					reg = start_reg + (4 * i);
2817
					if (!si_vm_reg_valid(reg))
2818
						return -EINVAL;
2819
				}
2820
			}
2821
		}
2822
		break;
2823
	case PACKET3_COND_WRITE:
2824
		if (idx_value & 0x100) {
2825
			reg = ib[idx + 5] * 4;
2826
			if (!si_vm_reg_valid(reg))
2827
				return -EINVAL;
2828
		}
2829
		break;
2830
	case PACKET3_COPY_DW:
2831
		if (idx_value & 0x2) {
2832
			reg = ib[idx + 3] * 4;
2833
			if (!si_vm_reg_valid(reg))
2834
				return -EINVAL;
2835
		}
2836
		break;
2837
	default:
2838
		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2839
		return -EINVAL;
2840
	}
2841
	return 0;
2842
}
2843
 
2844
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2845
{
2846
	int ret = 0;
2847
	u32 idx = 0;
2848
	struct radeon_cs_packet pkt;
2849
 
2850
	do {
2851
		pkt.idx = idx;
2852
		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2853
		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2854
		pkt.one_reg_wr = 0;
2855
		switch (pkt.type) {
2856
		case PACKET_TYPE0:
2857
			dev_err(rdev->dev, "Packet0 not allowed!\n");
2858
			ret = -EINVAL;
2859
			break;
2860
		case PACKET_TYPE2:
2861
			idx += 1;
2862
			break;
2863
		case PACKET_TYPE3:
2864
			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2865
			if (ib->is_const_ib)
2866
				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2867
			else {
2868
				switch (ib->ring) {
2869
				case RADEON_RING_TYPE_GFX_INDEX:
2870
					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2871
					break;
2872
				case CAYMAN_RING_TYPE_CP1_INDEX:
2873
				case CAYMAN_RING_TYPE_CP2_INDEX:
2874
					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2875
					break;
2876
				default:
2877
					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
2878
					ret = -EINVAL;
2879
					break;
2880
				}
2881
			}
2882
			idx += pkt.count + 2;
2883
			break;
2884
		default:
2885
			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2886
			ret = -EINVAL;
2887
			break;
2888
		}
2889
		if (ret)
2890
			break;
2891
	} while (idx < ib->length_dw);
2892
 
2893
	return ret;
2894
}
2895
 
2896
/*
2897
 * vm
2898
 */
2899
int si_vm_init(struct radeon_device *rdev)
2900
{
2901
	/* number of VMs */
2902
	rdev->vm_manager.nvm = 16;
2903
	/* base offset of vram pages */
2904
	rdev->vm_manager.vram_base_offset = 0;
2905
 
2906
	return 0;
2907
}
2908
 
2909
void si_vm_fini(struct radeon_device *rdev)
2910
{
2911
}
2912
 
2913
/**
2914
 * si_vm_set_page - update the page tables using the CP
2915
 *
2916
 * @rdev: radeon_device pointer
2917
 * @pe: addr of the page entry
2918
 * @addr: dst addr to write into pe
2919
 * @count: number of page entries to update
2920
 * @incr: increase next addr by incr bytes
2921
 * @flags: access flags
2922
 *
2923
 * Update the page tables using the CP (cayman-si).
2924
 */
2925
void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
2926
		    uint64_t addr, unsigned count,
2927
		    uint32_t incr, uint32_t flags)
2928
{
2929
	struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
2930
	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3192 Serge 2931
	uint64_t value;
2932
	unsigned ndw;
2997 Serge 2933
 
3192 Serge 2934
	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2997 Serge 2935
	while (count) {
3192 Serge 2936
			ndw = 2 + count * 2;
2997 Serge 2937
		if (ndw > 0x3FFE)
2938
			ndw = 0x3FFE;
2939
 
2940
		radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
2941
		radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2942
					 WRITE_DATA_DST_SEL(1)));
2943
		radeon_ring_write(ring, pe);
2944
		radeon_ring_write(ring, upper_32_bits(pe));
2945
		for (; ndw > 2; ndw -= 2, --count, pe += 8) {
2946
			if (flags & RADEON_VM_PAGE_SYSTEM) {
2947
				value = radeon_vm_map_gart(rdev, addr);
2948
				value &= 0xFFFFFFFFFFFFF000ULL;
3192 Serge 2949
				} else if (flags & RADEON_VM_PAGE_VALID) {
2997 Serge 2950
				value = addr;
3192 Serge 2951
				} else {
2997 Serge 2952
				value = 0;
3192 Serge 2953
				}
2997 Serge 2954
			addr += incr;
2955
			value |= r600_flags;
2956
			radeon_ring_write(ring, value);
2957
			radeon_ring_write(ring, upper_32_bits(value));
2958
		}
2959
	}
3192 Serge 2960
	} else {
2961
		/* DMA */
2962
		if (flags & RADEON_VM_PAGE_SYSTEM) {
2963
			while (count) {
2964
				ndw = count * 2;
2965
				if (ndw > 0xFFFFE)
2966
					ndw = 0xFFFFE;
2967
 
2968
				/* for non-physically contiguous pages (system) */
2969
				radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
2970
				radeon_ring_write(ring, pe);
2971
				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
2972
				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2973
					if (flags & RADEON_VM_PAGE_SYSTEM) {
2974
						value = radeon_vm_map_gart(rdev, addr);
2975
						value &= 0xFFFFFFFFFFFFF000ULL;
2976
					} else if (flags & RADEON_VM_PAGE_VALID) {
2977
						value = addr;
2978
					} else {
2979
						value = 0;
2980
					}
2981
			addr += incr;
2982
			value |= r600_flags;
2983
			radeon_ring_write(ring, value);
2984
			radeon_ring_write(ring, upper_32_bits(value));
2985
		}
2986
	}
2987
		} else {
2988
			while (count) {
2989
				ndw = count * 2;
2990
				if (ndw > 0xFFFFE)
2991
					ndw = 0xFFFFE;
2992
 
2993
				if (flags & RADEON_VM_PAGE_VALID)
2994
					value = addr;
2995
				else
2996
					value = 0;
2997
				/* for physically contiguous pages (vram) */
2998
				radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
2999
				radeon_ring_write(ring, pe); /* dst addr */
3000
				radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
3001
				radeon_ring_write(ring, r600_flags); /* mask */
3002
				radeon_ring_write(ring, 0);
3003
				radeon_ring_write(ring, value); /* value */
3004
				radeon_ring_write(ring, upper_32_bits(value));
3005
				radeon_ring_write(ring, incr); /* increment size */
3006
				radeon_ring_write(ring, 0);
3007
				pe += ndw * 4;
3008
				addr += (ndw / 2) * incr;
3009
				count -= ndw / 2;
3010
			}
3011
		}
3012
	}
2997 Serge 3013
}
3014
 
3015
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3016
{
3017
	struct radeon_ring *ring = &rdev->ring[ridx];
3018
 
3019
	if (vm == NULL)
3020
		return;
3021
 
3022
	/* write new base address */
3023
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3024
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3025
				 WRITE_DATA_DST_SEL(0)));
3026
 
3027
	if (vm->id < 8) {
3028
		radeon_ring_write(ring,
3029
				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3030
	} else {
3031
		radeon_ring_write(ring,
3032
				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3033
	}
3034
	radeon_ring_write(ring, 0);
3035
	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3036
 
3037
	/* flush hdp cache */
3038
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3039
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3040
				 WRITE_DATA_DST_SEL(0)));
3041
	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3042
	radeon_ring_write(ring, 0);
3043
	radeon_ring_write(ring, 0x1);
3044
 
3045
	/* bits 0-15 are the VM contexts0-15 */
3046
	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3047
	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3048
				 WRITE_DATA_DST_SEL(0)));
3049
	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3050
	radeon_ring_write(ring, 0);
3051
	radeon_ring_write(ring, 1 << vm->id);
3052
 
3053
	/* sync PFP to ME, otherwise we might get invalid PFP reads */
3054
	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3055
	radeon_ring_write(ring, 0x0);
3056
}
3057
 
3192 Serge 3058
void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3059
{
3060
	struct radeon_ring *ring = &rdev->ring[ridx];
3061
 
3062
	if (vm == NULL)
3063
		return;
3064
 
3065
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3066
	if (vm->id < 8) {
3067
		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
3068
	} else {
3069
		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
3070
	}
3071
	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3072
 
3073
	/* flush hdp cache */
3074
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3075
	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
3076
	radeon_ring_write(ring, 1);
3077
 
3078
	/* bits 0-7 are the VM contexts0-7 */
3079
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3080
	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
3081
	radeon_ring_write(ring, 1 << vm->id);
3082
}
3083
 
2997 Serge 3084
/*
3085
 * RLC
3086
 */
3087
void si_rlc_fini(struct radeon_device *rdev)
3088
{
3089
	int r;
3090
 
3091
	/* save restore block */
3092
	if (rdev->rlc.save_restore_obj) {
3093
		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
3094
		if (unlikely(r != 0))
3095
			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
3096
		radeon_bo_unpin(rdev->rlc.save_restore_obj);
3097
		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
3098
 
3099
		radeon_bo_unref(&rdev->rlc.save_restore_obj);
3100
		rdev->rlc.save_restore_obj = NULL;
3101
	}
3102
 
3103
	/* clear state block */
3104
	if (rdev->rlc.clear_state_obj) {
3105
		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
3106
		if (unlikely(r != 0))
3107
			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
3108
		radeon_bo_unpin(rdev->rlc.clear_state_obj);
3109
		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
3110
 
3111
		radeon_bo_unref(&rdev->rlc.clear_state_obj);
3112
		rdev->rlc.clear_state_obj = NULL;
3113
	}
3114
}
3115
 
3116
int si_rlc_init(struct radeon_device *rdev)
3117
{
3118
	int r;
3119
 
3120
	/* save restore block */
3121
	if (rdev->rlc.save_restore_obj == NULL) {
3122
		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
3123
				     RADEON_GEM_DOMAIN_VRAM, NULL,
3124
				     &rdev->rlc.save_restore_obj);
3125
		if (r) {
3126
			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
3127
			return r;
3128
		}
3129
	}
3130
 
3131
	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
3132
	if (unlikely(r != 0)) {
3133
		si_rlc_fini(rdev);
3134
		return r;
3135
	}
3136
	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
3137
			  &rdev->rlc.save_restore_gpu_addr);
3138
	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
3139
	if (r) {
3140
		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
3141
		si_rlc_fini(rdev);
3142
		return r;
3143
	}
3144
 
3145
	/* clear state block */
3146
	if (rdev->rlc.clear_state_obj == NULL) {
3147
		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
3148
				     RADEON_GEM_DOMAIN_VRAM, NULL,
3149
				     &rdev->rlc.clear_state_obj);
3150
		if (r) {
3151
			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
3152
			si_rlc_fini(rdev);
3153
			return r;
3154
		}
3155
	}
3156
	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
3157
	if (unlikely(r != 0)) {
3158
		si_rlc_fini(rdev);
3159
		return r;
3160
	}
3161
	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
3162
			  &rdev->rlc.clear_state_gpu_addr);
3163
	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
3164
	if (r) {
3165
		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
3166
		si_rlc_fini(rdev);
3167
		return r;
3168
	}
3169
 
3170
	return 0;
3171
}
3172
 
3173
static void si_rlc_stop(struct radeon_device *rdev)
3174
{
3175
	WREG32(RLC_CNTL, 0);
3176
}
3177
 
3178
static void si_rlc_start(struct radeon_device *rdev)
3179
{
3180
	WREG32(RLC_CNTL, RLC_ENABLE);
3181
}
3182
 
3183
static int si_rlc_resume(struct radeon_device *rdev)
3184
{
3185
	u32 i;
3186
	const __be32 *fw_data;
3187
 
3188
	if (!rdev->rlc_fw)
3189
		return -EINVAL;
3190
 
3191
	si_rlc_stop(rdev);
3192
 
3193
	WREG32(RLC_RL_BASE, 0);
3194
	WREG32(RLC_RL_SIZE, 0);
3195
	WREG32(RLC_LB_CNTL, 0);
3196
	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
3197
	WREG32(RLC_LB_CNTR_INIT, 0);
3198
 
3199
	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
3200
	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
3201
 
3202
	WREG32(RLC_MC_CNTL, 0);
3203
	WREG32(RLC_UCODE_CNTL, 0);
3204
 
3205
	fw_data = (const __be32 *)rdev->rlc_fw->data;
3206
	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
3207
		WREG32(RLC_UCODE_ADDR, i);
3208
		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
3209
	}
3210
	WREG32(RLC_UCODE_ADDR, 0);
3211
 
3212
	si_rlc_start(rdev);
3213
 
3214
	return 0;
3215
}
3216
 
3217
static void si_enable_interrupts(struct radeon_device *rdev)
3218
{
3219
	u32 ih_cntl = RREG32(IH_CNTL);
3220
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3221
 
3222
	ih_cntl |= ENABLE_INTR;
3223
	ih_rb_cntl |= IH_RB_ENABLE;
3224
	WREG32(IH_CNTL, ih_cntl);
3225
	WREG32(IH_RB_CNTL, ih_rb_cntl);
3226
	rdev->ih.enabled = true;
3227
}
3228
 
3229
static void si_disable_interrupts(struct radeon_device *rdev)
3230
{
3231
	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3232
	u32 ih_cntl = RREG32(IH_CNTL);
3233
 
3234
	ih_rb_cntl &= ~IH_RB_ENABLE;
3235
	ih_cntl &= ~ENABLE_INTR;
3236
	WREG32(IH_RB_CNTL, ih_rb_cntl);
3237
	WREG32(IH_CNTL, ih_cntl);
3238
	/* set rptr, wptr to 0 */
3239
	WREG32(IH_RB_RPTR, 0);
3240
	WREG32(IH_RB_WPTR, 0);
3241
	rdev->ih.enabled = false;
3242
	rdev->ih.rptr = 0;
3243
}
3244
 
3245
static void si_disable_interrupt_state(struct radeon_device *rdev)
3246
{
3247
	u32 tmp;
3248
 
3249
	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3250
	WREG32(CP_INT_CNTL_RING1, 0);
3251
	WREG32(CP_INT_CNTL_RING2, 0);
3192 Serge 3252
	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3253
	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
3254
	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3255
	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
2997 Serge 3256
	WREG32(GRBM_INT_CNTL, 0);
3257
	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3258
	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3259
	if (rdev->num_crtc >= 4) {
3260
		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3261
		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3262
	}
3263
	if (rdev->num_crtc >= 6) {
3264
		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3265
		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3266
	}
3267
 
3268
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3269
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3270
	if (rdev->num_crtc >= 4) {
3271
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3272
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3273
	}
3274
	if (rdev->num_crtc >= 6) {
3275
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3276
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3277
	}
3278
 
3279
	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
3280
 
3281
	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3282
	WREG32(DC_HPD1_INT_CONTROL, tmp);
3283
	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3284
	WREG32(DC_HPD2_INT_CONTROL, tmp);
3285
	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3286
	WREG32(DC_HPD3_INT_CONTROL, tmp);
3287
	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3288
	WREG32(DC_HPD4_INT_CONTROL, tmp);
3289
	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3290
	WREG32(DC_HPD5_INT_CONTROL, tmp);
3291
	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3292
	WREG32(DC_HPD6_INT_CONTROL, tmp);
3293
 
3294
}
3295
 
3296
static int si_irq_init(struct radeon_device *rdev)
3297
{
3298
	int ret = 0;
3299
	int rb_bufsz;
3300
	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3301
 
3302
	/* allocate ring */
3303
	ret = r600_ih_ring_alloc(rdev);
3304
	if (ret)
3305
		return ret;
3306
 
3307
	/* disable irqs */
3308
	si_disable_interrupts(rdev);
3309
 
3310
	/* init rlc */
3311
	ret = si_rlc_resume(rdev);
3312
	if (ret) {
3313
		r600_ih_ring_fini(rdev);
3314
		return ret;
3315
	}
3316
 
3317
	/* setup interrupt control */
3318
	/* set dummy read address to ring address */
3319
	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3320
	interrupt_cntl = RREG32(INTERRUPT_CNTL);
3321
	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3322
	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3323
	 */
3324
	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3325
	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3326
	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3327
	WREG32(INTERRUPT_CNTL, interrupt_cntl);
3328
 
3329
	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3330
	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3331
 
3332
	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3333
		      IH_WPTR_OVERFLOW_CLEAR |
3334
		      (rb_bufsz << 1));
3335
 
3336
	if (rdev->wb.enabled)
3337
		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3338
 
3339
	/* set the writeback address whether it's enabled or not */
3340
	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3341
	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3342
 
3343
	WREG32(IH_RB_CNTL, ih_rb_cntl);
3344
 
3345
	/* set rptr, wptr to 0 */
3346
	WREG32(IH_RB_RPTR, 0);
3347
	WREG32(IH_RB_WPTR, 0);
3348
 
3349
	/* Default settings for IH_CNTL (disabled at first) */
3350
	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3351
	/* RPTR_REARM only works if msi's are enabled */
3352
	if (rdev->msi_enabled)
3353
		ih_cntl |= RPTR_REARM;
3354
	WREG32(IH_CNTL, ih_cntl);
3355
 
3356
	/* force the active interrupt state to all disabled */
3357
	si_disable_interrupt_state(rdev);
3358
 
3359
	pci_set_master(rdev->pdev);
3360
 
3361
	/* enable irqs */
3362
	si_enable_interrupts(rdev);
3363
 
3364
	return ret;
3365
}
3366
 
3367
int si_irq_set(struct radeon_device *rdev)
3368
{
3369
	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
3370
	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
3371
	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3372
	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3373
	u32 grbm_int_cntl = 0;
3374
	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3192 Serge 3375
	u32 dma_cntl, dma_cntl1;
2997 Serge 3376
 
3377
	if (!rdev->irq.installed) {
3378
		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3379
		return -EINVAL;
3380
	}
3381
	/* don't enable anything if the ih is disabled */
3382
	if (!rdev->ih.enabled) {
3383
		si_disable_interrupts(rdev);
3384
		/* force the active interrupt state to all disabled */
3385
		si_disable_interrupt_state(rdev);
3386
		return 0;
3387
	}
3388
 
3389
	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3390
	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3391
	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3392
	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3393
	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3394
	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3395
 
3192 Serge 3396
	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3397
	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3398
 
2997 Serge 3399
	/* enable CP interrupts on all rings */
3400
	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3401
		DRM_DEBUG("si_irq_set: sw int gfx\n");
3402
		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3403
	}
3404
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
3405
		DRM_DEBUG("si_irq_set: sw int cp1\n");
3406
		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
3407
	}
3408
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
3409
		DRM_DEBUG("si_irq_set: sw int cp2\n");
3410
		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3411
	}
3192 Serge 3412
	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3413
		DRM_DEBUG("si_irq_set: sw int dma\n");
3414
		dma_cntl |= TRAP_ENABLE;
3415
	}
3416
 
3417
	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
3418
		DRM_DEBUG("si_irq_set: sw int dma1\n");
3419
		dma_cntl1 |= TRAP_ENABLE;
3420
	}
2997 Serge 3421
	if (rdev->irq.crtc_vblank_int[0] ||
3422
	    atomic_read(&rdev->irq.pflip[0])) {
3423
		DRM_DEBUG("si_irq_set: vblank 0\n");
3424
		crtc1 |= VBLANK_INT_MASK;
3425
	}
3426
	if (rdev->irq.crtc_vblank_int[1] ||
3427
	    atomic_read(&rdev->irq.pflip[1])) {
3428
		DRM_DEBUG("si_irq_set: vblank 1\n");
3429
		crtc2 |= VBLANK_INT_MASK;
3430
	}
3431
	if (rdev->irq.crtc_vblank_int[2] ||
3432
	    atomic_read(&rdev->irq.pflip[2])) {
3433
		DRM_DEBUG("si_irq_set: vblank 2\n");
3434
		crtc3 |= VBLANK_INT_MASK;
3435
	}
3436
	if (rdev->irq.crtc_vblank_int[3] ||
3437
	    atomic_read(&rdev->irq.pflip[3])) {
3438
		DRM_DEBUG("si_irq_set: vblank 3\n");
3439
		crtc4 |= VBLANK_INT_MASK;
3440
	}
3441
	if (rdev->irq.crtc_vblank_int[4] ||
3442
	    atomic_read(&rdev->irq.pflip[4])) {
3443
		DRM_DEBUG("si_irq_set: vblank 4\n");
3444
		crtc5 |= VBLANK_INT_MASK;
3445
	}
3446
	if (rdev->irq.crtc_vblank_int[5] ||
3447
	    atomic_read(&rdev->irq.pflip[5])) {
3448
		DRM_DEBUG("si_irq_set: vblank 5\n");
3449
		crtc6 |= VBLANK_INT_MASK;
3450
	}
3451
	if (rdev->irq.hpd[0]) {
3452
		DRM_DEBUG("si_irq_set: hpd 1\n");
3453
		hpd1 |= DC_HPDx_INT_EN;
3454
	}
3455
	if (rdev->irq.hpd[1]) {
3456
		DRM_DEBUG("si_irq_set: hpd 2\n");
3457
		hpd2 |= DC_HPDx_INT_EN;
3458
	}
3459
	if (rdev->irq.hpd[2]) {
3460
		DRM_DEBUG("si_irq_set: hpd 3\n");
3461
		hpd3 |= DC_HPDx_INT_EN;
3462
	}
3463
	if (rdev->irq.hpd[3]) {
3464
		DRM_DEBUG("si_irq_set: hpd 4\n");
3465
		hpd4 |= DC_HPDx_INT_EN;
3466
	}
3467
	if (rdev->irq.hpd[4]) {
3468
		DRM_DEBUG("si_irq_set: hpd 5\n");
3469
		hpd5 |= DC_HPDx_INT_EN;
3470
	}
3471
	if (rdev->irq.hpd[5]) {
3472
		DRM_DEBUG("si_irq_set: hpd 6\n");
3473
		hpd6 |= DC_HPDx_INT_EN;
3474
	}
3475
 
3476
	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3477
	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3478
	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3479
 
3192 Serge 3480
	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
3481
	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
3482
 
2997 Serge 3483
	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3484
 
3485
	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3486
	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3487
	if (rdev->num_crtc >= 4) {
3488
		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3489
		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3490
	}
3491
	if (rdev->num_crtc >= 6) {
3492
		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3493
		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3494
	}
3495
 
3496
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
3497
	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
3498
	if (rdev->num_crtc >= 4) {
3499
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
3500
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
3501
	}
3502
	if (rdev->num_crtc >= 6) {
3503
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
3504
		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
3505
	}
3506
 
3507
	WREG32(DC_HPD1_INT_CONTROL, hpd1);
3508
	WREG32(DC_HPD2_INT_CONTROL, hpd2);
3509
	WREG32(DC_HPD3_INT_CONTROL, hpd3);
3510
	WREG32(DC_HPD4_INT_CONTROL, hpd4);
3511
	WREG32(DC_HPD5_INT_CONTROL, hpd5);
3512
	WREG32(DC_HPD6_INT_CONTROL, hpd6);
3513
 
3514
	return 0;
3515
}
3516
 
3517
static inline void si_irq_ack(struct radeon_device *rdev)
3518
{
3519
	u32 tmp;
3520
 
3521
	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3522
	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3523
	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3524
	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3525
	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3526
	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3527
	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
3528
	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
3529
	if (rdev->num_crtc >= 4) {
3530
		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
3531
		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
3532
	}
3533
	if (rdev->num_crtc >= 6) {
3534
		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
3535
		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
3536
	}
3537
 
3538
	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
3539
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3540
	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
3541
		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3542
	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
3543
		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3544
	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
3545
		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3546
	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3547
		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3548
	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3549
		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3550
 
3551
	if (rdev->num_crtc >= 4) {
3552
		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
3553
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3554
		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
3555
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3556
		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3557
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3558
		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3559
			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3560
		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3561
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3562
		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3563
			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3564
	}
3565
 
3566
	if (rdev->num_crtc >= 6) {
3567
		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
3568
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3569
		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
3570
			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3571
		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3572
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3573
		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3574
			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3575
		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3576
			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3577
		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3578
			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3579
	}
3580
 
3581
	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3582
		tmp = RREG32(DC_HPD1_INT_CONTROL);
3583
		tmp |= DC_HPDx_INT_ACK;
3584
		WREG32(DC_HPD1_INT_CONTROL, tmp);
3585
	}
3586
	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3587
		tmp = RREG32(DC_HPD2_INT_CONTROL);
3588
		tmp |= DC_HPDx_INT_ACK;
3589
		WREG32(DC_HPD2_INT_CONTROL, tmp);
3590
	}
3591
	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3592
		tmp = RREG32(DC_HPD3_INT_CONTROL);
3593
		tmp |= DC_HPDx_INT_ACK;
3594
		WREG32(DC_HPD3_INT_CONTROL, tmp);
3595
	}
3596
	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3597
		tmp = RREG32(DC_HPD4_INT_CONTROL);
3598
		tmp |= DC_HPDx_INT_ACK;
3599
		WREG32(DC_HPD4_INT_CONTROL, tmp);
3600
	}
3601
	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3602
		tmp = RREG32(DC_HPD5_INT_CONTROL);
3603
		tmp |= DC_HPDx_INT_ACK;
3604
		WREG32(DC_HPD5_INT_CONTROL, tmp);
3605
	}
3606
	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3607
		tmp = RREG32(DC_HPD5_INT_CONTROL);
3608
		tmp |= DC_HPDx_INT_ACK;
3609
		WREG32(DC_HPD6_INT_CONTROL, tmp);
3610
	}
3611
}
3612
 
3613
static void si_irq_disable(struct radeon_device *rdev)
3614
{
3615
	si_disable_interrupts(rdev);
3616
	/* Wait and acknowledge irq */
3617
	mdelay(1);
3618
	si_irq_ack(rdev);
3619
	si_disable_interrupt_state(rdev);
3620
}
3621
 
3622
static void si_irq_suspend(struct radeon_device *rdev)
3623
{
3624
	si_irq_disable(rdev);
3625
	si_rlc_stop(rdev);
3626
}
3627
 
3628
static void si_irq_fini(struct radeon_device *rdev)
3629
{
3630
	si_irq_suspend(rdev);
3631
	r600_ih_ring_fini(rdev);
3632
}
3633
 
3634
static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
3635
{
3636
	u32 wptr, tmp;
3637
 
3638
	if (rdev->wb.enabled)
3639
		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3640
	else
3641
		wptr = RREG32(IH_RB_WPTR);
3642
 
3643
	if (wptr & RB_OVERFLOW) {
3644
		/* When a ring buffer overflow happen start parsing interrupt
3645
		 * from the last not overwritten vector (wptr + 16). Hopefully
3646
		 * this should allow us to catchup.
3647
		 */
3648
		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3649
			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3650
		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3651
		tmp = RREG32(IH_RB_CNTL);
3652
		tmp |= IH_WPTR_OVERFLOW_CLEAR;
3653
		WREG32(IH_RB_CNTL, tmp);
3654
	}
3655
	return (wptr & rdev->ih.ptr_mask);
3656
}
3657
 
3658
/*        SI IV Ring
3659
 * Each IV ring entry is 128 bits:
3660
 * [7:0]    - interrupt source id
3661
 * [31:8]   - reserved
3662
 * [59:32]  - interrupt source data
3663
 * [63:60]  - reserved
3664
 * [71:64]  - RINGID
3665
 * [79:72]  - VMID
3666
 * [127:80] - reserved
3667
 */
3668
int si_irq_process(struct radeon_device *rdev)
3669
{
3670
	u32 wptr;
3671
	u32 rptr;
3672
	u32 src_id, src_data, ring_id;
3673
	u32 ring_index;
3674
	bool queue_hotplug = false;
3675
 
3676
	if (!rdev->ih.enabled || rdev->shutdown)
3677
		return IRQ_NONE;
3678
 
3679
	wptr = si_get_ih_wptr(rdev);
3680
 
3681
restart_ih:
3682
	/* is somebody else already processing irqs? */
3683
	if (atomic_xchg(&rdev->ih.lock, 1))
3684
		return IRQ_NONE;
3685
 
3686
	rptr = rdev->ih.rptr;
3687
	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3688
 
3689
	/* Order reading of wptr vs. reading of IH ring data */
3690
	rmb();
3691
 
3692
	/* display interrupts */
3693
	si_irq_ack(rdev);
3694
 
3695
	while (rptr != wptr) {
3696
		/* wptr/rptr are in bytes! */
3697
		ring_index = rptr / 4;
3698
		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3699
		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3700
		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3701
 
3702
		switch (src_id) {
3703
		case 1: /* D1 vblank/vline */
3704
			switch (src_data) {
3705
			case 0: /* D1 vblank */
3706
				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
3707
					if (rdev->irq.crtc_vblank_int[0]) {
3708
//                       drm_handle_vblank(rdev->ddev, 0);
3709
						rdev->pm.vblank_sync = true;
3710
//                       wake_up(&rdev->irq.vblank_queue);
3711
					}
3712
//                   if (atomic_read(&rdev->irq.pflip[0]))
3713
//                       radeon_crtc_handle_flip(rdev, 0);
3714
					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3715
					DRM_DEBUG("IH: D1 vblank\n");
3716
				}
3717
				break;
3718
			case 1: /* D1 vline */
3719
				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
3720
					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3721
					DRM_DEBUG("IH: D1 vline\n");
3722
				}
3723
				break;
3724
			default:
3725
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3726
				break;
3727
			}
3728
			break;
3729
		case 2: /* D2 vblank/vline */
3730
			switch (src_data) {
3731
			case 0: /* D2 vblank */
3732
				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3733
					if (rdev->irq.crtc_vblank_int[1]) {
3734
//                       drm_handle_vblank(rdev->ddev, 1);
3735
						rdev->pm.vblank_sync = true;
3736
//                       wake_up(&rdev->irq.vblank_queue);
3737
					}
3738
//                   if (atomic_read(&rdev->irq.pflip[1]))
3739
//                       radeon_crtc_handle_flip(rdev, 1);
3740
					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3741
					DRM_DEBUG("IH: D2 vblank\n");
3742
				}
3743
				break;
3744
			case 1: /* D2 vline */
3745
				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3746
					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3747
					DRM_DEBUG("IH: D2 vline\n");
3748
				}
3749
				break;
3750
			default:
3751
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3752
				break;
3753
			}
3754
			break;
3755
		case 3: /* D3 vblank/vline */
3756
			switch (src_data) {
3757
			case 0: /* D3 vblank */
3758
				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3759
					if (rdev->irq.crtc_vblank_int[2]) {
3760
//                       drm_handle_vblank(rdev->ddev, 2);
3761
						rdev->pm.vblank_sync = true;
3762
//                       wake_up(&rdev->irq.vblank_queue);
3763
					}
3764
//                   if (atomic_read(&rdev->irq.pflip[2]))
3765
//                       radeon_crtc_handle_flip(rdev, 2);
3766
					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3767
					DRM_DEBUG("IH: D3 vblank\n");
3768
				}
3769
				break;
3770
			case 1: /* D3 vline */
3771
				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3772
					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3773
					DRM_DEBUG("IH: D3 vline\n");
3774
				}
3775
				break;
3776
			default:
3777
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3778
				break;
3779
			}
3780
			break;
3781
		case 4: /* D4 vblank/vline */
3782
			switch (src_data) {
3783
			case 0: /* D4 vblank */
3784
				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3785
					if (rdev->irq.crtc_vblank_int[3]) {
3786
//                       drm_handle_vblank(rdev->ddev, 3);
3787
						rdev->pm.vblank_sync = true;
3788
//                       wake_up(&rdev->irq.vblank_queue);
3789
					}
3790
//                   if (atomic_read(&rdev->irq.pflip[3]))
3791
//                       radeon_crtc_handle_flip(rdev, 3);
3792
					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3793
					DRM_DEBUG("IH: D4 vblank\n");
3794
				}
3795
				break;
3796
			case 1: /* D4 vline */
3797
				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3798
					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3799
					DRM_DEBUG("IH: D4 vline\n");
3800
				}
3801
				break;
3802
			default:
3803
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3804
				break;
3805
			}
3806
			break;
3807
		case 5: /* D5 vblank/vline */
3808
			switch (src_data) {
3809
			case 0: /* D5 vblank */
3810
				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3811
					if (rdev->irq.crtc_vblank_int[4]) {
3812
//                       drm_handle_vblank(rdev->ddev, 4);
3813
						rdev->pm.vblank_sync = true;
3814
//                       wake_up(&rdev->irq.vblank_queue);
3815
					}
3816
//                   if (atomic_read(&rdev->irq.pflip[4]))
3817
//                       radeon_crtc_handle_flip(rdev, 4);
3818
					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3819
					DRM_DEBUG("IH: D5 vblank\n");
3820
				}
3821
				break;
3822
			case 1: /* D5 vline */
3823
				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3824
					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3825
					DRM_DEBUG("IH: D5 vline\n");
3826
				}
3827
				break;
3828
			default:
3829
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3830
				break;
3831
			}
3832
			break;
3833
		case 6: /* D6 vblank/vline */
3834
			switch (src_data) {
3835
			case 0: /* D6 vblank */
3836
				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3837
					if (rdev->irq.crtc_vblank_int[5]) {
3838
//                       drm_handle_vblank(rdev->ddev, 5);
3839
						rdev->pm.vblank_sync = true;
3840
//                       wake_up(&rdev->irq.vblank_queue);
3841
					}
3842
//                   if (atomic_read(&rdev->irq.pflip[5]))
3843
//                       radeon_crtc_handle_flip(rdev, 5);
3844
					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3845
					DRM_DEBUG("IH: D6 vblank\n");
3846
				}
3847
				break;
3848
			case 1: /* D6 vline */
3849
				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3850
					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3851
					DRM_DEBUG("IH: D6 vline\n");
3852
				}
3853
				break;
3854
			default:
3855
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3856
				break;
3857
			}
3858
			break;
3859
		case 42: /* HPD hotplug */
3860
			switch (src_data) {
3861
			case 0:
3862
				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3863
					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
3864
					queue_hotplug = true;
3865
					DRM_DEBUG("IH: HPD1\n");
3866
				}
3867
				break;
3868
			case 1:
3869
				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3870
					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3871
					queue_hotplug = true;
3872
					DRM_DEBUG("IH: HPD2\n");
3873
				}
3874
				break;
3875
			case 2:
3876
				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3877
					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3878
					queue_hotplug = true;
3879
					DRM_DEBUG("IH: HPD3\n");
3880
				}
3881
				break;
3882
			case 3:
3883
				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3884
					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3885
					queue_hotplug = true;
3886
					DRM_DEBUG("IH: HPD4\n");
3887
				}
3888
				break;
3889
			case 4:
3890
				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3891
					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3892
					queue_hotplug = true;
3893
					DRM_DEBUG("IH: HPD5\n");
3894
				}
3895
				break;
3896
			case 5:
3897
				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3898
					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3899
					queue_hotplug = true;
3900
					DRM_DEBUG("IH: HPD6\n");
3901
				}
3902
				break;
3903
			default:
3904
				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3905
				break;
3906
			}
3907
			break;
3192 Serge 3908
		case 146:
3909
		case 147:
3910
			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
3911
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3912
				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3913
			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3914
				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3915
			/* reset addr and status */
3916
			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
3917
			break;
2997 Serge 3918
		case 176: /* RINGID0 CP_INT */
3919
			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3920
			break;
3921
		case 177: /* RINGID1 CP_INT */
3922
			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3923
			break;
3924
		case 178: /* RINGID2 CP_INT */
3925
			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3926
			break;
3927
		case 181: /* CP EOP event */
3928
			DRM_DEBUG("IH: CP EOP\n");
3929
			switch (ring_id) {
3930
			case 0:
3931
				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3932
				break;
3933
			case 1:
3934
				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3935
				break;
3936
			case 2:
3937
				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3938
				break;
3939
			}
3940
			break;
3192 Serge 3941
		case 224: /* DMA trap event */
3942
			DRM_DEBUG("IH: DMA trap\n");
3943
			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3944
			break;
2997 Serge 3945
		case 233: /* GUI IDLE */
3946
			DRM_DEBUG("IH: GUI idle\n");
3947
			break;
3192 Serge 3948
		case 244: /* DMA trap event */
3949
			DRM_DEBUG("IH: DMA1 trap\n");
3950
			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3951
			break;
2997 Serge 3952
		default:
3953
			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3954
			break;
3955
		}
3956
 
3957
		/* wptr/rptr are in bytes! */
3958
		rptr += 16;
3959
		rptr &= rdev->ih.ptr_mask;
3960
	}
3961
//   if (queue_hotplug)
3962
//       schedule_work(&rdev->hotplug_work);
3963
	rdev->ih.rptr = rptr;
3964
	WREG32(IH_RB_RPTR, rdev->ih.rptr);
3965
	atomic_set(&rdev->ih.lock, 0);
3966
 
3967
	/* make sure wptr hasn't changed while processing */
3968
	wptr = si_get_ih_wptr(rdev);
3969
	if (wptr != rptr)
3970
		goto restart_ih;
3971
 
3972
	return IRQ_HANDLED;
3973
}
3974
 
3192 Serge 3975
/**
3976
 * si_copy_dma - copy pages using the DMA engine
3977
 *
3978
 * @rdev: radeon_device pointer
3979
 * @src_offset: src GPU address
3980
 * @dst_offset: dst GPU address
3981
 * @num_gpu_pages: number of GPU pages to xfer
3982
 * @fence: radeon fence object
3983
 *
3984
 * Copy GPU paging using the DMA engine (SI).
3985
 * Used by the radeon ttm implementation to move pages if
3986
 * registered as the asic copy callback.
3987
 */
3988
int si_copy_dma(struct radeon_device *rdev,
3989
		uint64_t src_offset, uint64_t dst_offset,
3990
		unsigned num_gpu_pages,
3991
		struct radeon_fence **fence)
3992
{
3993
	struct radeon_semaphore *sem = NULL;
3994
	int ring_index = rdev->asic->copy.dma_ring_index;
3995
	struct radeon_ring *ring = &rdev->ring[ring_index];
3996
	u32 size_in_bytes, cur_size_in_bytes;
3997
	int i, num_loops;
3998
	int r = 0;
3999
 
4000
	r = radeon_semaphore_create(rdev, &sem);
4001
	if (r) {
4002
		DRM_ERROR("radeon: moving bo (%d).\n", r);
4003
		return r;
4004
	}
4005
 
4006
	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4007
	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
4008
	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
4009
	if (r) {
4010
		DRM_ERROR("radeon: moving bo (%d).\n", r);
4011
		radeon_semaphore_free(rdev, &sem, NULL);
4012
		return r;
4013
	}
4014
 
4015
	if (radeon_fence_need_sync(*fence, ring->idx)) {
4016
		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4017
					    ring->idx);
4018
		radeon_fence_note_sync(*fence, ring->idx);
4019
	} else {
4020
		radeon_semaphore_free(rdev, &sem, NULL);
4021
	}
4022
 
4023
	for (i = 0; i < num_loops; i++) {
4024
		cur_size_in_bytes = size_in_bytes;
4025
		if (cur_size_in_bytes > 0xFFFFF)
4026
			cur_size_in_bytes = 0xFFFFF;
4027
		size_in_bytes -= cur_size_in_bytes;
4028
		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
4029
		radeon_ring_write(ring, dst_offset & 0xffffffff);
4030
		radeon_ring_write(ring, src_offset & 0xffffffff);
4031
		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
4032
		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
4033
		src_offset += cur_size_in_bytes;
4034
		dst_offset += cur_size_in_bytes;
4035
	}
4036
 
4037
	r = radeon_fence_emit(rdev, fence, ring->idx);
4038
	if (r) {
4039
		radeon_ring_unlock_undo(rdev, ring);
4040
		return r;
4041
	}
4042
 
4043
	radeon_ring_unlock_commit(rdev, ring);
4044
	radeon_semaphore_free(rdev, &sem, *fence);
4045
 
4046
	return r;
4047
}
4048
 
2997 Serge 4049
/*
4050
 * startup/shutdown callbacks
4051
 */
4052
static int si_startup(struct radeon_device *rdev)
4053
{
4054
	struct radeon_ring *ring;
4055
	int r;
4056
 
4057
	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4058
	    !rdev->rlc_fw || !rdev->mc_fw) {
4059
		r = si_init_microcode(rdev);
4060
		if (r) {
4061
			DRM_ERROR("Failed to load firmware!\n");
4062
			return r;
4063
		}
4064
	}
4065
 
4066
	r = si_mc_load_microcode(rdev);
4067
	if (r) {
4068
		DRM_ERROR("Failed to load MC firmware!\n");
4069
		return r;
4070
	}
4071
 
4072
	r = r600_vram_scratch_init(rdev);
4073
	if (r)
4074
		return r;
4075
 
4076
	si_mc_program(rdev);
4077
	r = si_pcie_gart_enable(rdev);
4078
	if (r)
4079
		return r;
4080
	si_gpu_init(rdev);
4081
 
4082
#if 0
4083
	r = evergreen_blit_init(rdev);
4084
	if (r) {
4085
		r600_blit_fini(rdev);
4086
		rdev->asic->copy = NULL;
4087
		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
4088
	}
4089
#endif
4090
	/* allocate rlc buffers */
4091
	r = si_rlc_init(rdev);
4092
	if (r) {
4093
		DRM_ERROR("Failed to init rlc BOs!\n");
4094
		return r;
4095
	}
4096
 
4097
	/* allocate wb buffer */
4098
	r = radeon_wb_init(rdev);
4099
	if (r)
4100
		return r;
4101
 
4102
	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4103
	if (r) {
4104
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4105
		return r;
4106
	}
4107
 
4108
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4109
	if (r) {
4110
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4111
		return r;
4112
	}
4113
 
4114
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4115
	if (r) {
4116
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4117
		return r;
4118
	}
4119
 
3192 Serge 4120
	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4121
	if (r) {
4122
		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4123
		return r;
4124
	}
4125
 
4126
	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4127
	if (r) {
4128
		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4129
		return r;
4130
	}
4131
 
2997 Serge 4132
	/* Enable IRQ */
4133
	r = si_irq_init(rdev);
4134
	if (r) {
4135
		DRM_ERROR("radeon: IH init failed (%d).\n", r);
4136
		radeon_irq_kms_fini(rdev);
4137
		return r;
4138
	}
4139
	si_irq_set(rdev);
4140
 
4141
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4142
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4143
			     CP_RB0_RPTR, CP_RB0_WPTR,
4144
			     0, 0xfffff, RADEON_CP_PACKET2);
4145
	if (r)
4146
		return r;
4147
 
4148
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
4149
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
4150
			     CP_RB1_RPTR, CP_RB1_WPTR,
4151
			     0, 0xfffff, RADEON_CP_PACKET2);
4152
	if (r)
4153
		return r;
4154
 
4155
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
4156
	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
4157
			     CP_RB2_RPTR, CP_RB2_WPTR,
4158
			     0, 0xfffff, RADEON_CP_PACKET2);
4159
	if (r)
4160
		return r;
4161
 
3192 Serge 4162
	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4163
	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4164
			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
4165
			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
4166
			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4167
	if (r)
4168
		return r;
4169
 
4170
	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4171
	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4172
			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
4173
			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
4174
			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4175
	if (r)
4176
		return r;
4177
 
2997 Serge 4178
	r = si_cp_load_microcode(rdev);
4179
	if (r)
4180
		return r;
4181
	r = si_cp_resume(rdev);
4182
	if (r)
4183
		return r;
4184
 
3192 Serge 4185
	r = cayman_dma_resume(rdev);
4186
	if (r)
4187
		return r;
4188
 
3120 serge 4189
	r = radeon_ib_pool_init(rdev);
4190
	if (r) {
4191
		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4192
		return r;
4193
	}
2997 Serge 4194
 
4195
//   r = radeon_vm_manager_init(rdev);
4196
//   if (r) {
4197
//       dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4198
//       return r;
4199
//   }
4200
 
4201
	return 0;
4202
}
4203
 
4204
 
3120 serge 4205
 
4206
 
2997 Serge 4207
/* Plan is to move initialization in that function and use
4208
 * helper function so that radeon_device_init pretty much
4209
 * do nothing more than calling asic specific function. This
4210
 * should also allow to remove a bunch of callback function
4211
 * like vram_info.
4212
 */
4213
int si_init(struct radeon_device *rdev)
4214
{
4215
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4216
	int r;
4217
 
4218
    ENTER();
4219
 
4220
	/* Read BIOS */
4221
	if (!radeon_get_bios(rdev)) {
4222
		if (ASIC_IS_AVIVO(rdev))
4223
			return -EINVAL;
4224
	}
4225
	/* Must be an ATOMBIOS */
4226
	if (!rdev->is_atom_bios) {
4227
		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
4228
		return -EINVAL;
4229
	}
4230
	r = radeon_atombios_init(rdev);
4231
	if (r)
4232
		return r;
4233
 
4234
	/* Post card if necessary */
4235
	if (!radeon_card_posted(rdev)) {
4236
		if (!rdev->bios) {
4237
			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
4238
			return -EINVAL;
4239
		}
4240
		DRM_INFO("GPU not posted. posting now...\n");
4241
		atom_asic_init(rdev->mode_info.atom_context);
4242
	}
4243
	/* Initialize scratch registers */
4244
	si_scratch_init(rdev);
4245
	/* Initialize surface registers */
4246
	radeon_surface_init(rdev);
4247
	/* Initialize clocks */
4248
	radeon_get_clock_info(rdev->ddev);
4249
 
4250
	/* Fence driver */
4251
	r = radeon_fence_driver_init(rdev);
4252
	if (r)
4253
		return r;
4254
 
4255
	/* initialize memory controller */
4256
	r = si_mc_init(rdev);
4257
	if (r)
4258
		return r;
4259
	/* Memory manager */
4260
	r = radeon_bo_init(rdev);
4261
	if (r)
4262
		return r;
4263
 
4264
	r = radeon_irq_kms_init(rdev);
4265
	if (r)
4266
		return r;
4267
 
4268
	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4269
	ring->ring_obj = NULL;
4270
	r600_ring_init(rdev, ring, 1024 * 1024);
4271
 
4272
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
4273
	ring->ring_obj = NULL;
4274
	r600_ring_init(rdev, ring, 1024 * 1024);
4275
 
4276
	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
4277
	ring->ring_obj = NULL;
4278
	r600_ring_init(rdev, ring, 1024 * 1024);
4279
 
3192 Serge 4280
	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4281
	ring->ring_obj = NULL;
4282
	r600_ring_init(rdev, ring, 64 * 1024);
4283
 
4284
	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4285
	ring->ring_obj = NULL;
4286
	r600_ring_init(rdev, ring, 64 * 1024);
4287
 
2997 Serge 4288
	rdev->ih.ring_obj = NULL;
4289
	r600_ih_ring_init(rdev, 64 * 1024);
4290
 
4291
    r = r600_pcie_gart_init(rdev);
4292
    if (r)
4293
       return r;
4294
 
4295
	rdev->accel_working = true;
4296
    r = si_startup(rdev);
4297
	if (r) {
4298
		dev_err(rdev->dev, "disabling GPU acceleration\n");
4299
//       si_cp_fini(rdev);
4300
//       si_irq_fini(rdev);
4301
//       si_rlc_fini(rdev);
4302
//       radeon_wb_fini(rdev);
4303
//       radeon_ib_pool_fini(rdev);
4304
//       radeon_vm_manager_fini(rdev);
4305
//       radeon_irq_kms_fini(rdev);
4306
//       si_pcie_gart_fini(rdev);
4307
		rdev->accel_working = false;
4308
	}
4309
 
4310
	/* Don't start up if the MC ucode is missing.
4311
	 * The default clocks and voltages before the MC ucode
4312
	 * is loaded are not suffient for advanced operations.
4313
	 */
4314
	if (!rdev->mc_fw) {
4315
		DRM_ERROR("radeon: MC ucode required for NI+.\n");
4316
		return -EINVAL;
4317
	}
4318
    LEAVE();
4319
 
4320
	return 0;
4321
}
4322
 
4323
/**
4324
 * si_get_gpu_clock - return GPU clock counter snapshot
4325
 *
4326
 * @rdev: radeon_device pointer
4327
 *
4328
 * Fetches a GPU clock counter snapshot (SI).
4329
 * Returns the 64 bit clock counter snapshot.
4330
 */
4331
uint64_t si_get_gpu_clock(struct radeon_device *rdev)
4332
{
4333
	uint64_t clock;
4334
 
4335
	mutex_lock(&rdev->gpu_clock_mutex);
4336
	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4337
	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
4338
	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4339
	mutex_unlock(&rdev->gpu_clock_mutex);
4340
	return clock;
4341
}