Subversion Repositories Kolibri OS

Rev

Rev 1128 | Rev 1179 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1117 serge 1
/*
2
 * Copyright 2008 Advanced Micro Devices, Inc.
3
 * Copyright 2008 Red Hat Inc.
4
 * Copyright 2009 Jerome Glisse.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
 * OTHER DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors: Dave Airlie
25
 *          Alex Deucher
26
 *          Jerome Glisse
27
 */
28
//#include 
1125 serge 29
#include "drmP.h"
30
#include "drm.h"
1117 serge 31
#include "radeon_drm.h"
32
#include "radeon_microcode.h"
33
#include "radeon_reg.h"
34
#include "radeon.h"
35
 
36
/* This files gather functions specifics to:
37
 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
38
 *
39
 * Some of these functions might be used by newer ASICs.
40
 */
41
void r100_hdp_reset(struct radeon_device *rdev);
42
void r100_gpu_init(struct radeon_device *rdev);
43
int r100_gui_wait_for_idle(struct radeon_device *rdev);
44
int r100_mc_wait_for_idle(struct radeon_device *rdev);
45
void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
46
void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
47
int r100_debugfs_mc_info_init(struct radeon_device *rdev);
48
 
49
/*
50
 * PCI GART
51
 */
52
void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
53
{
54
	/* TODO: can we do somethings here ? */
55
	/* It seems hw only cache one entry so we should discard this
56
	 * entry otherwise if first GPU GART read hit this entry it
57
	 * could end up in wrong address. */
58
}
59
 
60
int r100_pci_gart_enable(struct radeon_device *rdev)
61
{
62
	uint32_t tmp;
63
	int r;
64
 
65
	/* Initialize common gart structure */
66
	r = radeon_gart_init(rdev);
67
	if (r) {
68
		return r;
69
	}
70
	if (rdev->gart.table.ram.ptr == NULL) {
71
		rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
72
		r = radeon_gart_table_ram_alloc(rdev);
73
		if (r) {
74
			return r;
75
		}
76
	}
77
	/* discard memory request outside of configured range */
78
	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
79
	WREG32(RADEON_AIC_CNTL, tmp);
80
	/* set address range for PCI address translate */
81
	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
82
	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
83
	WREG32(RADEON_AIC_HI_ADDR, tmp);
84
	/* Enable bus mastering */
85
	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
86
	WREG32(RADEON_BUS_CNTL, tmp);
87
	/* set PCI GART page-table base address */
88
	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
89
	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
90
	WREG32(RADEON_AIC_CNTL, tmp);
91
	r100_pci_gart_tlb_flush(rdev);
92
	rdev->gart.ready = true;
93
	return 0;
94
}
95
 
96
void r100_pci_gart_disable(struct radeon_device *rdev)
97
{
98
	uint32_t tmp;
99
 
100
	/* discard memory request outside of configured range */
101
	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
102
	WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
103
	WREG32(RADEON_AIC_LO_ADDR, 0);
104
	WREG32(RADEON_AIC_HI_ADDR, 0);
105
}
106
 
1128 serge 107
 
1117 serge 108
int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
109
{
110
	if (i < 0 || i > rdev->gart.num_gpu_pages) {
111
		return -EINVAL;
112
	}
113
	rdev->gart.table.ram.ptr[i] = cpu_to_le32((uint32_t)addr);
114
	return 0;
115
}
116
 
117
int r100_gart_enable(struct radeon_device *rdev)
118
{
119
	if (rdev->flags & RADEON_IS_AGP) {
120
		r100_pci_gart_disable(rdev);
121
		return 0;
122
	}
123
	return r100_pci_gart_enable(rdev);
124
}
125
 
126
 
127
/*
128
 * MC
129
 */
130
void r100_mc_disable_clients(struct radeon_device *rdev)
131
{
132
	uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
133
 
134
	/* FIXME: is this function correct for rs100,rs200,rs300 ? */
1128 serge 135
	if (r100_gui_wait_for_idle(rdev)) {
136
		printk(KERN_WARNING "Failed to wait GUI idle while "
137
		       "programming pipes. Bad things might happen.\n");
138
	}
1117 serge 139
 
140
	/* stop display and memory access */
141
	ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
142
	WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
143
	crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
144
	WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
145
	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
146
 
147
	r100_gpu_wait_for_vsync(rdev);
148
 
149
	WREG32(RADEON_CRTC_GEN_CNTL,
150
	       (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
151
	       RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
152
 
153
	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
154
		crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
155
 
156
		r100_gpu_wait_for_vsync2(rdev);
157
		WREG32(RADEON_CRTC2_GEN_CNTL,
158
		       (crtc2_gen_cntl &
159
		        ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
160
		       RADEON_CRTC2_DISP_REQ_EN_B);
161
	}
162
 
163
	udelay(500);
164
}
165
 
166
void r100_mc_setup(struct radeon_device *rdev)
167
{
168
	uint32_t tmp;
169
	int r;
170
 
1129 serge 171
	r = r100_debugfs_mc_info_init(rdev);
172
	if (r) {
173
		DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
174
	}
1117 serge 175
	/* Write VRAM size in case we are limiting it */
176
	WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
177
	tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
178
	tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
179
	tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
180
	WREG32(RADEON_MC_FB_LOCATION, tmp);
181
 
182
	/* Enable bus mastering */
183
	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
184
	WREG32(RADEON_BUS_CNTL, tmp);
185
 
186
	if (rdev->flags & RADEON_IS_AGP) {
187
		tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
188
		tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
189
		tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
190
		WREG32(RADEON_MC_AGP_LOCATION, tmp);
191
		WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
192
	} else {
193
		WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
194
		WREG32(RADEON_AGP_BASE, 0);
195
	}
196
 
197
	tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
198
	tmp |= (7 << 28);
199
	WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
200
	(void)RREG32(RADEON_HOST_PATH_CNTL);
201
	WREG32(RADEON_HOST_PATH_CNTL, tmp);
202
	(void)RREG32(RADEON_HOST_PATH_CNTL);
203
}
204
 
205
int r100_mc_init(struct radeon_device *rdev)
206
{
207
	int r;
208
 
1129 serge 209
	if (r100_debugfs_rbbm_init(rdev)) {
210
		DRM_ERROR("Failed to register debugfs file for RBBM !\n");
211
	}
1117 serge 212
 
213
	r100_gpu_init(rdev);
214
	/* Disable gart which also disable out of gart access */
215
	r100_pci_gart_disable(rdev);
216
 
217
	/* Setup GPU memory space */
218
	rdev->mc.vram_location = 0xFFFFFFFFUL;
219
	rdev->mc.gtt_location = 0xFFFFFFFFUL;
220
	if (rdev->flags & RADEON_IS_AGP) {
221
		r = radeon_agp_init(rdev);
222
		if (r) {
223
			printk(KERN_WARNING "[drm] Disabling AGP\n");
224
			rdev->flags &= ~RADEON_IS_AGP;
225
			rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
226
		} else {
227
			rdev->mc.gtt_location = rdev->mc.agp_base;
228
		}
229
	}
230
	r = radeon_mc_setup(rdev);
231
	if (r) {
232
		return r;
233
	}
234
 
235
	r100_mc_disable_clients(rdev);
236
	if (r100_mc_wait_for_idle(rdev)) {
237
       printk(KERN_WARNING "Failed to wait MC idle while "
238
              "programming pipes. Bad things might happen.\n");
239
	}
240
 
241
	r100_mc_setup(rdev);
242
	return 0;
243
}
244
 
245
void r100_mc_fini(struct radeon_device *rdev)
246
{
247
	r100_pci_gart_disable(rdev);
1128 serge 248
//   radeon_gart_table_ram_free(rdev);
249
//   radeon_gart_fini(rdev);
1117 serge 250
}
251
 
252
/*
253
 * Fence emission
254
 */
255
void r100_fence_ring_emit(struct radeon_device *rdev,
256
			  struct radeon_fence *fence)
257
{
258
	/* Who ever call radeon_fence_emit should call ring_lock and ask
259
	 * for enough space (today caller are ib schedule and buffer move) */
260
	/* Wait until IDLE & CLEAN */
261
	radeon_ring_write(rdev, PACKET0(0x1720, 0));
262
	radeon_ring_write(rdev, (1 << 16) | (1 << 17));
263
	/* Emit fence sequence & fire IRQ */
264
	radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
265
	radeon_ring_write(rdev, fence->seq);
266
	radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
267
	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
268
}
269
 
1128 serge 270
#if 0
1117 serge 271
/*
272
 * Writeback
273
 */
274
int r100_wb_init(struct radeon_device *rdev)
275
{
276
	int r;
277
 
278
	if (rdev->wb.wb_obj == NULL) {
279
		r = radeon_object_create(rdev, NULL, 4096,
280
					 true,
281
					 RADEON_GEM_DOMAIN_GTT,
282
					 false, &rdev->wb.wb_obj);
283
		if (r) {
284
			DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
285
			return r;
286
		}
287
		r = radeon_object_pin(rdev->wb.wb_obj,
288
				      RADEON_GEM_DOMAIN_GTT,
289
				      &rdev->wb.gpu_addr);
290
		if (r) {
291
			DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
292
			return r;
293
		}
294
		r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
295
		if (r) {
296
			DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
297
			return r;
298
		}
299
	}
300
	WREG32(0x774, rdev->wb.gpu_addr);
301
	WREG32(0x70C, rdev->wb.gpu_addr + 1024);
302
	WREG32(0x770, 0xff);
303
	return 0;
304
}
305
 
306
void r100_wb_fini(struct radeon_device *rdev)
307
{
308
	if (rdev->wb.wb_obj) {
1120 serge 309
//       radeon_object_kunmap(rdev->wb.wb_obj);
310
//       radeon_object_unpin(rdev->wb.wb_obj);
311
//       radeon_object_unref(&rdev->wb.wb_obj);
1117 serge 312
		rdev->wb.wb = NULL;
313
		rdev->wb.wb_obj = NULL;
314
	}
315
}
316
 
1120 serge 317
 
1117 serge 318
int r100_copy_blit(struct radeon_device *rdev,
319
		   uint64_t src_offset,
320
		   uint64_t dst_offset,
321
		   unsigned num_pages,
322
		   struct radeon_fence *fence)
323
{
324
	uint32_t cur_pages;
325
	uint32_t stride_bytes = PAGE_SIZE;
326
	uint32_t pitch;
327
	uint32_t stride_pixels;
328
	unsigned ndw;
329
	int num_loops;
330
	int r = 0;
331
 
332
	/* radeon limited to 16k stride */
333
	stride_bytes &= 0x3fff;
334
	/* radeon pitch is /64 */
335
	pitch = stride_bytes / 64;
336
	stride_pixels = stride_bytes / 4;
337
	num_loops = DIV_ROUND_UP(num_pages, 8191);
338
 
339
	/* Ask for enough room for blit + flush + fence */
340
	ndw = 64 + (10 * num_loops);
341
	r = radeon_ring_lock(rdev, ndw);
342
	if (r) {
343
		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
344
		return -EINVAL;
345
	}
346
	while (num_pages > 0) {
347
		cur_pages = num_pages;
348
		if (cur_pages > 8191) {
349
			cur_pages = 8191;
350
		}
351
		num_pages -= cur_pages;
352
 
353
		/* pages are in Y direction - height
354
		   page width in X direction - width */
355
		radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
356
		radeon_ring_write(rdev,
357
				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
358
				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
359
				  RADEON_GMC_SRC_CLIPPING |
360
				  RADEON_GMC_DST_CLIPPING |
361
				  RADEON_GMC_BRUSH_NONE |
362
				  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
363
				  RADEON_GMC_SRC_DATATYPE_COLOR |
364
				  RADEON_ROP3_S |
365
				  RADEON_DP_SRC_SOURCE_MEMORY |
366
				  RADEON_GMC_CLR_CMP_CNTL_DIS |
367
				  RADEON_GMC_WR_MSK_DIS);
368
		radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
369
		radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
370
		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
371
		radeon_ring_write(rdev, 0);
372
		radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
373
		radeon_ring_write(rdev, num_pages);
374
		radeon_ring_write(rdev, num_pages);
375
		radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
376
	}
377
	radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
378
	radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
379
	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
380
	radeon_ring_write(rdev,
381
			  RADEON_WAIT_2D_IDLECLEAN |
382
			  RADEON_WAIT_HOST_IDLECLEAN |
383
			  RADEON_WAIT_DMA_GUI_IDLE);
384
	if (fence) {
385
		r = radeon_fence_emit(rdev, fence);
386
	}
387
	radeon_ring_unlock_commit(rdev);
388
	return r;
389
}
390
 
1128 serge 391
#endif
1117 serge 392
 
393
/*
394
 * CP
395
 */
396
void r100_ring_start(struct radeon_device *rdev)
397
{
398
	int r;
399
 
400
	r = radeon_ring_lock(rdev, 2);
401
	if (r) {
402
		return;
403
	}
404
	radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
405
	radeon_ring_write(rdev,
406
			  RADEON_ISYNC_ANY2D_IDLE3D |
407
			  RADEON_ISYNC_ANY3D_IDLE2D |
408
			  RADEON_ISYNC_WAIT_IDLEGUI |
409
			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
410
	radeon_ring_unlock_commit(rdev);
411
}
412
 
413
static void r100_cp_load_microcode(struct radeon_device *rdev)
414
{
415
	int i;
416
 
417
	if (r100_gui_wait_for_idle(rdev)) {
418
		printk(KERN_WARNING "Failed to wait GUI idle while "
419
		       "programming pipes. Bad things might happen.\n");
420
	}
421
 
422
	WREG32(RADEON_CP_ME_RAM_ADDR, 0);
423
	if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
424
	    (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
425
	    (rdev->family == CHIP_RS200)) {
426
		DRM_INFO("Loading R100 Microcode\n");
427
		for (i = 0; i < 256; i++) {
428
			WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]);
429
			WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]);
430
		}
431
	} else if ((rdev->family == CHIP_R200) ||
432
		   (rdev->family == CHIP_RV250) ||
433
		   (rdev->family == CHIP_RV280) ||
434
		   (rdev->family == CHIP_RS300)) {
435
		DRM_INFO("Loading R200 Microcode\n");
436
		for (i = 0; i < 256; i++) {
437
			WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]);
438
			WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]);
439
		}
440
	} else if ((rdev->family == CHIP_R300) ||
441
		   (rdev->family == CHIP_R350) ||
442
		   (rdev->family == CHIP_RV350) ||
443
		   (rdev->family == CHIP_RV380) ||
444
		   (rdev->family == CHIP_RS400) ||
445
		   (rdev->family == CHIP_RS480)) {
446
		DRM_INFO("Loading R300 Microcode\n");
447
		for (i = 0; i < 256; i++) {
448
			WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]);
449
			WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]);
450
		}
451
	} else if ((rdev->family == CHIP_R420) ||
452
		   (rdev->family == CHIP_R423) ||
453
		   (rdev->family == CHIP_RV410)) {
454
		DRM_INFO("Loading R400 Microcode\n");
455
		for (i = 0; i < 256; i++) {
456
			WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]);
457
			WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]);
458
		}
459
	} else if ((rdev->family == CHIP_RS690) ||
460
		   (rdev->family == CHIP_RS740)) {
461
		DRM_INFO("Loading RS690/RS740 Microcode\n");
462
		for (i = 0; i < 256; i++) {
463
			WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]);
464
			WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]);
465
		}
466
	} else if (rdev->family == CHIP_RS600) {
467
		DRM_INFO("Loading RS600 Microcode\n");
468
		for (i = 0; i < 256; i++) {
469
			WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]);
470
			WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]);
471
		}
472
	} else if ((rdev->family == CHIP_RV515) ||
473
		   (rdev->family == CHIP_R520) ||
474
		   (rdev->family == CHIP_RV530) ||
475
		   (rdev->family == CHIP_R580) ||
476
		   (rdev->family == CHIP_RV560) ||
477
		   (rdev->family == CHIP_RV570)) {
478
		DRM_INFO("Loading R500 Microcode\n");
479
		for (i = 0; i < 256; i++) {
480
			WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]);
481
			WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]);
482
		}
483
	}
484
}
485
 
486
int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
487
{
488
	unsigned rb_bufsz;
489
	unsigned rb_blksz;
490
	unsigned max_fetch;
491
	unsigned pre_write_timer;
492
	unsigned pre_write_limit;
493
	unsigned indirect2_start;
494
	unsigned indirect1_start;
495
	uint32_t tmp;
496
	int r;
497
 
1129 serge 498
	if (r100_debugfs_cp_init(rdev)) {
499
		DRM_ERROR("Failed to register debugfs file for CP !\n");
500
	}
1117 serge 501
	/* Reset CP */
502
	tmp = RREG32(RADEON_CP_CSQ_STAT);
503
	if ((tmp & (1 << 31))) {
504
		DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
505
		WREG32(RADEON_CP_CSQ_MODE, 0);
506
		WREG32(RADEON_CP_CSQ_CNTL, 0);
507
		WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
508
		tmp = RREG32(RADEON_RBBM_SOFT_RESET);
509
		mdelay(2);
510
		WREG32(RADEON_RBBM_SOFT_RESET, 0);
511
		tmp = RREG32(RADEON_RBBM_SOFT_RESET);
512
		mdelay(2);
513
		tmp = RREG32(RADEON_CP_CSQ_STAT);
514
		if ((tmp & (1 << 31))) {
515
			DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
516
		}
517
	} else {
518
		DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
519
	}
520
	/* Align ring size */
521
	rb_bufsz = drm_order(ring_size / 8);
522
	ring_size = (1 << (rb_bufsz + 1)) * 4;
523
	r100_cp_load_microcode(rdev);
524
	r = radeon_ring_init(rdev, ring_size);
525
	if (r) {
526
		return r;
527
	}
528
	/* Each time the cp read 1024 bytes (16 dword/quadword) update
529
	 * the rptr copy in system ram */
530
	rb_blksz = 9;
531
	/* cp will read 128bytes at a time (4 dwords) */
532
	max_fetch = 1;
533
	rdev->cp.align_mask = 16 - 1;
534
	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
535
	pre_write_timer = 64;
536
	/* Force CP_RB_WPTR write if written more than one time before the
537
	 * delay expire
538
	 */
539
	pre_write_limit = 0;
540
	/* Setup the cp cache like this (cache size is 96 dwords) :
541
	 *	RING		0  to 15
542
	 *	INDIRECT1	16 to 79
543
	 *	INDIRECT2	80 to 95
544
	 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
545
	 *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
546
	 *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
547
	 * Idea being that most of the gpu cmd will be through indirect1 buffer
548
	 * so it gets the bigger cache.
549
	 */
550
	indirect2_start = 80;
551
	indirect1_start = 16;
552
	/* cp setup */
553
	WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
554
	WREG32(RADEON_CP_RB_CNTL,
555
#ifdef __BIG_ENDIAN
556
	       RADEON_BUF_SWAP_32BIT |
557
#endif
558
	       REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
559
	       REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
560
	       REG_SET(RADEON_MAX_FETCH, max_fetch) |
561
	       RADEON_RB_NO_UPDATE);
562
	/* Set ring address */
563
	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
564
	WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
565
	/* Force read & write ptr to 0 */
566
	tmp = RREG32(RADEON_CP_RB_CNTL);
567
	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
568
	WREG32(RADEON_CP_RB_RPTR_WR, 0);
569
	WREG32(RADEON_CP_RB_WPTR, 0);
570
	WREG32(RADEON_CP_RB_CNTL, tmp);
571
	udelay(10);
572
	rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
573
	rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
574
	/* Set cp mode to bus mastering & enable cp*/
575
	WREG32(RADEON_CP_CSQ_MODE,
576
	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
577
	       REG_SET(RADEON_INDIRECT1_START, indirect1_start));
578
	WREG32(0x718, 0);
579
	WREG32(0x744, 0x00004D4D);
580
	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
581
	radeon_ring_start(rdev);
582
	r = radeon_ring_test(rdev);
583
	if (r) {
584
		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
585
		return r;
586
	}
587
	rdev->cp.ready = true;
588
	return 0;
589
}
590
 
591
 
592
void r100_cp_fini(struct radeon_device *rdev)
593
{
594
	/* Disable ring */
595
	rdev->cp.ready = false;
596
	WREG32(RADEON_CP_CSQ_CNTL, 0);
597
	radeon_ring_fini(rdev);
598
	DRM_INFO("radeon: cp finalized\n");
599
}
600
 
601
void r100_cp_disable(struct radeon_device *rdev)
602
{
603
	/* Disable ring */
604
	rdev->cp.ready = false;
605
	WREG32(RADEON_CP_CSQ_MODE, 0);
606
	WREG32(RADEON_CP_CSQ_CNTL, 0);
607
	if (r100_gui_wait_for_idle(rdev)) {
608
		printk(KERN_WARNING "Failed to wait GUI idle while "
609
		       "programming pipes. Bad things might happen.\n");
610
	}
611
}
612
 
613
 
614
int r100_cp_reset(struct radeon_device *rdev)
615
{
616
	uint32_t tmp;
617
	bool reinit_cp;
618
	int i;
619
 
1120 serge 620
    dbgprintf("%s\n",__FUNCTION__);
1117 serge 621
 
622
 
623
	reinit_cp = rdev->cp.ready;
624
	rdev->cp.ready = false;
625
	WREG32(RADEON_CP_CSQ_MODE, 0);
626
	WREG32(RADEON_CP_CSQ_CNTL, 0);
627
	WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
628
	(void)RREG32(RADEON_RBBM_SOFT_RESET);
629
	udelay(200);
630
	WREG32(RADEON_RBBM_SOFT_RESET, 0);
631
	/* Wait to prevent race in RBBM_STATUS */
632
	mdelay(1);
633
	for (i = 0; i < rdev->usec_timeout; i++) {
634
		tmp = RREG32(RADEON_RBBM_STATUS);
635
		if (!(tmp & (1 << 16))) {
636
			DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
637
				 tmp);
638
			if (reinit_cp) {
639
				return r100_cp_init(rdev, rdev->cp.ring_size);
640
			}
641
			return 0;
642
		}
643
		DRM_UDELAY(1);
644
	}
645
	tmp = RREG32(RADEON_RBBM_STATUS);
646
	DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
647
	return -1;
648
}
649
 
650
#if 0
651
/*
652
 * CS functions
653
 */
654
int r100_cs_parse_packet0(struct radeon_cs_parser *p,
655
			  struct radeon_cs_packet *pkt,
656
			  const unsigned *auth, unsigned n,
657
			  radeon_packet0_check_t check)
658
{
659
	unsigned reg;
660
	unsigned i, j, m;
661
	unsigned idx;
662
	int r;
663
 
664
	idx = pkt->idx + 1;
665
	reg = pkt->reg;
666
	/* Check that register fall into register range
667
	 * determined by the number of entry (n) in the
668
	 * safe register bitmap.
669
	 */
670
	if (pkt->one_reg_wr) {
671
		if ((reg >> 7) > n) {
672
			return -EINVAL;
673
		}
674
	} else {
675
		if (((reg + (pkt->count << 2)) >> 7) > n) {
676
			return -EINVAL;
677
		}
678
	}
679
	for (i = 0; i <= pkt->count; i++, idx++) {
680
		j = (reg >> 7);
681
		m = 1 << ((reg >> 2) & 31);
682
		if (auth[j] & m) {
683
			r = check(p, pkt, idx, reg);
684
			if (r) {
685
				return r;
686
			}
687
		}
688
		if (pkt->one_reg_wr) {
689
			if (!(auth[j] & m)) {
690
				break;
691
			}
692
		} else {
693
			reg += 4;
694
		}
695
	}
696
	return 0;
697
}
698
 
699
void r100_cs_dump_packet(struct radeon_cs_parser *p,
700
			 struct radeon_cs_packet *pkt)
701
{
702
	struct radeon_cs_chunk *ib_chunk;
703
	volatile uint32_t *ib;
704
	unsigned i;
705
	unsigned idx;
706
 
707
	ib = p->ib->ptr;
708
	ib_chunk = &p->chunks[p->chunk_ib_idx];
709
	idx = pkt->idx;
710
	for (i = 0; i <= (pkt->count + 1); i++, idx++) {
711
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
712
	}
713
}
714
 
715
/**
716
 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
717
 * @parser:	parser structure holding parsing context.
718
 * @pkt:	where to store packet informations
719
 *
720
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
721
 * if packet is bigger than remaining ib size. or if packets is unknown.
722
 **/
723
int r100_cs_packet_parse(struct radeon_cs_parser *p,
724
			 struct radeon_cs_packet *pkt,
725
			 unsigned idx)
726
{
727
	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
728
	uint32_t header = ib_chunk->kdata[idx];
729
 
730
	if (idx >= ib_chunk->length_dw) {
731
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
732
			  idx, ib_chunk->length_dw);
733
		return -EINVAL;
734
	}
735
	pkt->idx = idx;
736
	pkt->type = CP_PACKET_GET_TYPE(header);
737
	pkt->count = CP_PACKET_GET_COUNT(header);
738
	switch (pkt->type) {
739
	case PACKET_TYPE0:
740
		pkt->reg = CP_PACKET0_GET_REG(header);
741
		pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
742
		break;
743
	case PACKET_TYPE3:
744
		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
745
		break;
746
	case PACKET_TYPE2:
747
		pkt->count = -1;
748
		break;
749
	default:
750
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
751
		return -EINVAL;
752
	}
753
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
754
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
755
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
756
		return -EINVAL;
757
	}
758
	return 0;
759
}
760
 
761
/**
762
 * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
763
 * @parser:		parser structure holding parsing context.
764
 * @data:		pointer to relocation data
765
 * @offset_start:	starting offset
766
 * @offset_mask:	offset mask (to align start offset on)
767
 * @reloc:		reloc informations
768
 *
769
 * Check next packet is relocation packet3, do bo validation and compute
770
 * GPU offset using the provided start.
771
 **/
772
int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
773
			      struct radeon_cs_reloc **cs_reloc)
774
{
775
	struct radeon_cs_chunk *ib_chunk;
776
	struct radeon_cs_chunk *relocs_chunk;
777
	struct radeon_cs_packet p3reloc;
778
	unsigned idx;
779
	int r;
780
 
781
	if (p->chunk_relocs_idx == -1) {
782
		DRM_ERROR("No relocation chunk !\n");
783
		return -EINVAL;
784
	}
785
	*cs_reloc = NULL;
786
	ib_chunk = &p->chunks[p->chunk_ib_idx];
787
	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
788
	r = r100_cs_packet_parse(p, &p3reloc, p->idx);
789
	if (r) {
790
		return r;
791
	}
792
	p->idx += p3reloc.count + 2;
793
	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
794
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
795
			  p3reloc.idx);
796
		r100_cs_dump_packet(p, &p3reloc);
797
		return -EINVAL;
798
	}
799
	idx = ib_chunk->kdata[p3reloc.idx + 1];
800
	if (idx >= relocs_chunk->length_dw) {
801
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
802
			  idx, relocs_chunk->length_dw);
803
		r100_cs_dump_packet(p, &p3reloc);
804
		return -EINVAL;
805
	}
806
	/* FIXME: we assume reloc size is 4 dwords */
807
	*cs_reloc = p->relocs_ptr[(idx / 4)];
808
	return 0;
809
}
810
 
811
static int r100_packet0_check(struct radeon_cs_parser *p,
812
			      struct radeon_cs_packet *pkt)
813
{
814
	struct radeon_cs_chunk *ib_chunk;
815
	struct radeon_cs_reloc *reloc;
816
	volatile uint32_t *ib;
817
	uint32_t tmp;
818
	unsigned reg;
819
	unsigned i;
820
	unsigned idx;
821
	bool onereg;
822
	int r;
823
 
824
	ib = p->ib->ptr;
825
	ib_chunk = &p->chunks[p->chunk_ib_idx];
826
	idx = pkt->idx + 1;
827
	reg = pkt->reg;
828
	onereg = false;
829
	if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) {
830
		onereg = true;
831
	}
832
	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
833
		switch (reg) {
834
		/* FIXME: only allow PACKET3 blit? easier to check for out of
835
		 * range access */
836
		case RADEON_DST_PITCH_OFFSET:
837
		case RADEON_SRC_PITCH_OFFSET:
838
			r = r100_cs_packet_next_reloc(p, &reloc);
839
			if (r) {
840
				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
841
					  idx, reg);
842
				r100_cs_dump_packet(p, pkt);
843
				return r;
844
			}
845
			tmp = ib_chunk->kdata[idx] & 0x003fffff;
846
			tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
847
			ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
848
			break;
849
		case RADEON_RB3D_DEPTHOFFSET:
850
		case RADEON_RB3D_COLOROFFSET:
851
		case R300_RB3D_COLOROFFSET0:
852
		case R300_ZB_DEPTHOFFSET:
853
		case R200_PP_TXOFFSET_0:
854
		case R200_PP_TXOFFSET_1:
855
		case R200_PP_TXOFFSET_2:
856
		case R200_PP_TXOFFSET_3:
857
		case R200_PP_TXOFFSET_4:
858
		case R200_PP_TXOFFSET_5:
859
		case RADEON_PP_TXOFFSET_0:
860
		case RADEON_PP_TXOFFSET_1:
861
		case RADEON_PP_TXOFFSET_2:
862
		case R300_TX_OFFSET_0:
863
		case R300_TX_OFFSET_0+4:
864
		case R300_TX_OFFSET_0+8:
865
		case R300_TX_OFFSET_0+12:
866
		case R300_TX_OFFSET_0+16:
867
		case R300_TX_OFFSET_0+20:
868
		case R300_TX_OFFSET_0+24:
869
		case R300_TX_OFFSET_0+28:
870
		case R300_TX_OFFSET_0+32:
871
		case R300_TX_OFFSET_0+36:
872
		case R300_TX_OFFSET_0+40:
873
		case R300_TX_OFFSET_0+44:
874
		case R300_TX_OFFSET_0+48:
875
		case R300_TX_OFFSET_0+52:
876
		case R300_TX_OFFSET_0+56:
877
		case R300_TX_OFFSET_0+60:
878
			r = r100_cs_packet_next_reloc(p, &reloc);
879
			if (r) {
880
				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
881
					  idx, reg);
882
				r100_cs_dump_packet(p, pkt);
883
				return r;
884
			}
885
			ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
886
			break;
887
		default:
888
			/* FIXME: we don't want to allow anyothers packet */
889
			break;
890
		}
891
		if (onereg) {
892
			/* FIXME: forbid onereg write to register on relocate */
893
			break;
894
		}
895
	}
896
	return 0;
897
}
898
 
899
int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
900
					 struct radeon_cs_packet *pkt,
901
					 struct radeon_object *robj)
902
{
903
	struct radeon_cs_chunk *ib_chunk;
904
	unsigned idx;
905
 
906
	ib_chunk = &p->chunks[p->chunk_ib_idx];
907
	idx = pkt->idx + 1;
908
	if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) {
909
		DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
910
			  "(need %u have %lu) !\n",
911
			  ib_chunk->kdata[idx+2] + 1,
912
			  radeon_object_size(robj));
913
		return -EINVAL;
914
	}
915
	return 0;
916
}
917
 
918
static int r100_packet3_check(struct radeon_cs_parser *p,
919
			      struct radeon_cs_packet *pkt)
920
{
921
	struct radeon_cs_chunk *ib_chunk;
922
	struct radeon_cs_reloc *reloc;
923
	unsigned idx;
924
	unsigned i, c;
925
	volatile uint32_t *ib;
926
	int r;
927
 
928
	ib = p->ib->ptr;
929
	ib_chunk = &p->chunks[p->chunk_ib_idx];
930
	idx = pkt->idx + 1;
931
	switch (pkt->opcode) {
932
	case PACKET3_3D_LOAD_VBPNTR:
933
		c = ib_chunk->kdata[idx++];
934
		for (i = 0; i < (c - 1); i += 2, idx += 3) {
935
			r = r100_cs_packet_next_reloc(p, &reloc);
936
			if (r) {
937
				DRM_ERROR("No reloc for packet3 %d\n",
938
					  pkt->opcode);
939
				r100_cs_dump_packet(p, pkt);
940
				return r;
941
			}
942
			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
943
			r = r100_cs_packet_next_reloc(p, &reloc);
944
			if (r) {
945
				DRM_ERROR("No reloc for packet3 %d\n",
946
					  pkt->opcode);
947
				r100_cs_dump_packet(p, pkt);
948
				return r;
949
			}
950
			ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
951
		}
952
		if (c & 1) {
953
			r = r100_cs_packet_next_reloc(p, &reloc);
954
			if (r) {
955
				DRM_ERROR("No reloc for packet3 %d\n",
956
					  pkt->opcode);
957
				r100_cs_dump_packet(p, pkt);
958
				return r;
959
			}
960
			ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
961
		}
962
		break;
963
	case PACKET3_INDX_BUFFER:
964
		r = r100_cs_packet_next_reloc(p, &reloc);
965
		if (r) {
966
			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
967
			r100_cs_dump_packet(p, pkt);
968
			return r;
969
		}
970
		ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
971
		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
972
		if (r) {
973
			return r;
974
		}
975
		break;
976
	case 0x23:
977
		/* FIXME: cleanup */
978
		/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
979
		r = r100_cs_packet_next_reloc(p, &reloc);
980
		if (r) {
981
			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
982
			r100_cs_dump_packet(p, pkt);
983
			return r;
984
		}
985
		ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
986
		break;
987
	case PACKET3_3D_DRAW_IMMD:
988
		/* triggers drawing using in-packet vertex data */
989
	case PACKET3_3D_DRAW_IMMD_2:
990
		/* triggers drawing using in-packet vertex data */
991
	case PACKET3_3D_DRAW_VBUF_2:
992
		/* triggers drawing of vertex buffers setup elsewhere */
993
	case PACKET3_3D_DRAW_INDX_2:
994
		/* triggers drawing using indices to vertex buffer */
995
	case PACKET3_3D_DRAW_VBUF:
996
		/* triggers drawing of vertex buffers setup elsewhere */
997
	case PACKET3_3D_DRAW_INDX:
998
		/* triggers drawing using indices to vertex buffer */
999
	case PACKET3_NOP:
1000
		break;
1001
	default:
1002
		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1003
		return -EINVAL;
1004
	}
1005
	return 0;
1006
}
1007
 
1008
int r100_cs_parse(struct radeon_cs_parser *p)
1009
{
1010
	struct radeon_cs_packet pkt;
1011
	int r;
1012
 
1013
	do {
1014
		r = r100_cs_packet_parse(p, &pkt, p->idx);
1015
		if (r) {
1016
			return r;
1017
		}
1018
		p->idx += pkt.count + 2;
1019
		switch (pkt.type) {
1020
			case PACKET_TYPE0:
1021
				r = r100_packet0_check(p, &pkt);
1022
				break;
1023
			case PACKET_TYPE2:
1024
				break;
1025
			case PACKET_TYPE3:
1026
				r = r100_packet3_check(p, &pkt);
1027
				break;
1028
			default:
1029
				DRM_ERROR("Unknown packet type %d !\n",
1030
					  pkt.type);
1031
				return -EINVAL;
1032
		}
1033
		if (r) {
1034
			return r;
1035
		}
1036
	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1037
	return 0;
1038
}
1039
 
1128 serge 1040
#endif
1117 serge 1041
 
1042
/*
1043
 * Global GPU functions
1044
 */
1045
void r100_errata(struct radeon_device *rdev)
1046
{
1047
	rdev->pll_errata = 0;
1048
 
1049
	if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
1050
		rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
1051
	}
1052
 
1053
	if (rdev->family == CHIP_RV100 ||
1054
	    rdev->family == CHIP_RS100 ||
1055
	    rdev->family == CHIP_RS200) {
1056
		rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
1057
	}
1058
}
1059
 
1060
 
1061
 
1062
/* Wait for vertical sync on primary CRTC */
1063
void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
1064
{
1065
	uint32_t crtc_gen_cntl, tmp;
1066
	int i;
1067
 
1068
	crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
1069
	if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
1070
	    !(crtc_gen_cntl & RADEON_CRTC_EN)) {
1071
		return;
1072
	}
1073
	/* Clear the CRTC_VBLANK_SAVE bit */
1074
	WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
1075
	for (i = 0; i < rdev->usec_timeout; i++) {
1076
		tmp = RREG32(RADEON_CRTC_STATUS);
1077
		if (tmp & RADEON_CRTC_VBLANK_SAVE) {
1078
			return;
1079
		}
1080
		DRM_UDELAY(1);
1081
	}
1082
}
1083
 
1084
/* Wait for vertical sync on secondary CRTC */
1085
void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
1086
{
1087
	uint32_t crtc2_gen_cntl, tmp;
1088
	int i;
1089
 
1090
	crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
1091
	if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
1092
	    !(crtc2_gen_cntl & RADEON_CRTC2_EN))
1093
		return;
1094
 
1095
	/* Clear the CRTC_VBLANK_SAVE bit */
1096
	WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
1097
	for (i = 0; i < rdev->usec_timeout; i++) {
1098
		tmp = RREG32(RADEON_CRTC2_STATUS);
1099
		if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
1100
			return;
1101
		}
1102
		DRM_UDELAY(1);
1103
	}
1104
}
1105
 
1106
int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
1107
{
1108
	unsigned i;
1109
	uint32_t tmp;
1110
 
1111
	for (i = 0; i < rdev->usec_timeout; i++) {
1112
		tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
1113
		if (tmp >= n) {
1114
			return 0;
1115
		}
1116
		DRM_UDELAY(1);
1117
	}
1118
	return -1;
1119
}
1120
 
1121
int r100_gui_wait_for_idle(struct radeon_device *rdev)
1122
{
1123
	unsigned i;
1124
	uint32_t tmp;
1125
 
1126
	if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
1127
		printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
1128
		       " Bad things might happen.\n");
1129
	}
1130
	for (i = 0; i < rdev->usec_timeout; i++) {
1131
		tmp = RREG32(RADEON_RBBM_STATUS);
1132
		if (!(tmp & (1 << 31))) {
1133
			return 0;
1134
		}
1135
		DRM_UDELAY(1);
1136
	}
1137
	return -1;
1138
}
1139
 
1140
int r100_mc_wait_for_idle(struct radeon_device *rdev)
1141
{
1142
	unsigned i;
1143
	uint32_t tmp;
1144
 
1145
	for (i = 0; i < rdev->usec_timeout; i++) {
1146
		/* read MC_STATUS */
1147
		tmp = RREG32(0x0150);
1148
		if (tmp & (1 << 2)) {
1149
			return 0;
1150
		}
1151
		DRM_UDELAY(1);
1152
	}
1153
	return -1;
1154
}
1155
 
1156
void r100_gpu_init(struct radeon_device *rdev)
1157
{
1158
	/* TODO: anythings to do here ? pipes ? */
1159
	r100_hdp_reset(rdev);
1160
}
1161
 
1162
void r100_hdp_reset(struct radeon_device *rdev)
1163
{
1164
	uint32_t tmp;
1165
 
1120 serge 1166
    dbgprintf("%s\n",__FUNCTION__);
1117 serge 1167
 
1168
	tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
1169
	tmp |= (7 << 28);
1170
	WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
1171
	(void)RREG32(RADEON_HOST_PATH_CNTL);
1172
	udelay(200);
1173
	WREG32(RADEON_RBBM_SOFT_RESET, 0);
1174
	WREG32(RADEON_HOST_PATH_CNTL, tmp);
1175
	(void)RREG32(RADEON_HOST_PATH_CNTL);
1176
}
1177
 
1178
int r100_rb2d_reset(struct radeon_device *rdev)
1179
{
1180
	uint32_t tmp;
1181
	int i;
1182
 
1120 serge 1183
    dbgprintf("%s\n",__FUNCTION__);
1117 serge 1184
 
1185
	WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
1186
	(void)RREG32(RADEON_RBBM_SOFT_RESET);
1187
	udelay(200);
1188
	WREG32(RADEON_RBBM_SOFT_RESET, 0);
1189
	/* Wait to prevent race in RBBM_STATUS */
1190
	mdelay(1);
1191
	for (i = 0; i < rdev->usec_timeout; i++) {
1192
		tmp = RREG32(RADEON_RBBM_STATUS);
1193
		if (!(tmp & (1 << 26))) {
1194
			DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
1195
				 tmp);
1196
			return 0;
1197
		}
1198
		DRM_UDELAY(1);
1199
	}
1200
	tmp = RREG32(RADEON_RBBM_STATUS);
1201
	DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
1202
	return -1;
1203
}
1204
 
1205
int r100_gpu_reset(struct radeon_device *rdev)
1206
{
1207
	uint32_t status;
1208
 
1209
	/* reset order likely matter */
1210
	status = RREG32(RADEON_RBBM_STATUS);
1211
	/* reset HDP */
1212
	r100_hdp_reset(rdev);
1213
	/* reset rb2d */
1214
	if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
1215
		r100_rb2d_reset(rdev);
1216
	}
1217
	/* TODO: reset 3D engine */
1218
	/* reset CP */
1219
	status = RREG32(RADEON_RBBM_STATUS);
1220
	if (status & (1 << 16)) {
1221
		r100_cp_reset(rdev);
1222
	}
1223
	/* Check if GPU is idle */
1224
	status = RREG32(RADEON_RBBM_STATUS);
1225
	if (status & (1 << 31)) {
1226
		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
1227
		return -1;
1228
	}
1229
	DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
1230
	return 0;
1231
}
1232
 
1233
 
1234
/*
1235
 * VRAM info
1236
 */
1237
static void r100_vram_get_type(struct radeon_device *rdev)
1238
{
1239
	uint32_t tmp;
1240
 
1241
	rdev->mc.vram_is_ddr = false;
1242
	if (rdev->flags & RADEON_IS_IGP)
1243
		rdev->mc.vram_is_ddr = true;
1244
	else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
1245
		rdev->mc.vram_is_ddr = true;
1246
	if ((rdev->family == CHIP_RV100) ||
1247
	    (rdev->family == CHIP_RS100) ||
1248
	    (rdev->family == CHIP_RS200)) {
1249
		tmp = RREG32(RADEON_MEM_CNTL);
1250
		if (tmp & RV100_HALF_MODE) {
1251
			rdev->mc.vram_width = 32;
1252
		} else {
1253
			rdev->mc.vram_width = 64;
1254
		}
1255
		if (rdev->flags & RADEON_SINGLE_CRTC) {
1256
			rdev->mc.vram_width /= 4;
1257
			rdev->mc.vram_is_ddr = true;
1258
		}
1259
	} else if (rdev->family <= CHIP_RV280) {
1260
		tmp = RREG32(RADEON_MEM_CNTL);
1261
		if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
1262
			rdev->mc.vram_width = 128;
1263
		} else {
1264
			rdev->mc.vram_width = 64;
1265
		}
1266
	} else {
1267
		/* newer IGPs */
1268
		rdev->mc.vram_width = 128;
1269
	}
1270
}
1271
 
1272
void r100_vram_info(struct radeon_device *rdev)
1273
{
1274
	r100_vram_get_type(rdev);
1275
 
1276
	if (rdev->flags & RADEON_IS_IGP) {
1277
		uint32_t tom;
1278
		/* read NB_TOM to get the amount of ram stolen for the GPU */
1279
		tom = RREG32(RADEON_NB_TOM);
1280
		rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
1281
		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
1282
	} else {
1283
		rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
1284
		/* Some production boards of m6 will report 0
1285
		 * if it's 8 MB
1286
		 */
1287
		if (rdev->mc.vram_size == 0) {
1288
			rdev->mc.vram_size = 8192 * 1024;
1289
			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
1290
		}
1291
	}
1292
 
1293
	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
1294
	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
1295
}
1296
 
1297
/*
1298
 * Indirect registers accessor
1299
 */
1300
void r100_pll_errata_after_index(struct radeon_device *rdev)
1301
{
1302
	if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
1303
		return;
1304
	}
1305
	(void)RREG32(RADEON_CLOCK_CNTL_DATA);
1306
	(void)RREG32(RADEON_CRTC_GEN_CNTL);
1307
}
1308
 
1309
static void r100_pll_errata_after_data(struct radeon_device *rdev)
1310
{
1311
	/* This workarounds is necessary on RV100, RS100 and RS200 chips
1312
	 * or the chip could hang on a subsequent access
1313
	 */
1314
	if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
1315
		udelay(5000);
1316
	}
1317
 
1318
	/* This function is required to workaround a hardware bug in some (all?)
1319
	 * revisions of the R300.  This workaround should be called after every
1320
	 * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
1321
	 * may not be correct.
1322
	 */
1323
	if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
1324
		uint32_t save, tmp;
1325
 
1326
		save = RREG32(RADEON_CLOCK_CNTL_INDEX);
1327
		tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
1328
		WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
1329
		tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
1330
		WREG32(RADEON_CLOCK_CNTL_INDEX, save);
1331
	}
1332
}
1333
 
1334
uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
1335
{
1336
	uint32_t data;
1337
 
1338
	WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
1339
	r100_pll_errata_after_index(rdev);
1340
	data = RREG32(RADEON_CLOCK_CNTL_DATA);
1341
	r100_pll_errata_after_data(rdev);
1342
	return data;
1343
}
1344
 
1345
void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1346
{
1347
	WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
1348
	r100_pll_errata_after_index(rdev);
1349
	WREG32(RADEON_CLOCK_CNTL_DATA, v);
1350
	r100_pll_errata_after_data(rdev);
1351
}
1352
 
1353
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
1354
{
1355
	if (reg < 0x10000)
1356
		return readl(((void __iomem *)rdev->rmmio) + reg);
1357
	else {
1358
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
1359
		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
1360
	}
1361
}
1362
 
1363
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
1364
{
1365
	if (reg < 0x10000)
1366
		writel(v, ((void __iomem *)rdev->rmmio) + reg);
1367
	else {
1368
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
1369
		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
1370
	}
1371
}
1372
 
1373
int r100_init(struct radeon_device *rdev)
1374
{
1375
	return 0;
1376
}
1377
 
1129 serge 1378
/*
1379
 * Debugfs info
1380
 */
1381
#if defined(CONFIG_DEBUG_FS)
1382
static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
1383
{
1384
	struct drm_info_node *node = (struct drm_info_node *) m->private;
1385
	struct drm_device *dev = node->minor->dev;
1386
	struct radeon_device *rdev = dev->dev_private;
1387
	uint32_t reg, value;
1388
	unsigned i;
1389
 
1390
	seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
1391
	seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
1392
	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1393
	for (i = 0; i < 64; i++) {
1394
		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
1395
		reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
1396
		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
1397
		value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
1398
		seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
1399
	}
1400
	return 0;
1401
}
1402
 
1403
static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
1404
{
1405
	struct drm_info_node *node = (struct drm_info_node *) m->private;
1406
	struct drm_device *dev = node->minor->dev;
1407
	struct radeon_device *rdev = dev->dev_private;
1408
	uint32_t rdp, wdp;
1409
	unsigned count, i, j;
1410
 
1411
	radeon_ring_free_size(rdev);
1412
	rdp = RREG32(RADEON_CP_RB_RPTR);
1413
	wdp = RREG32(RADEON_CP_RB_WPTR);
1414
	count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
1415
	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1416
	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
1417
	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
1418
	seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
1419
	seq_printf(m, "%u dwords in ring\n", count);
1420
	for (j = 0; j <= count; j++) {
1421
		i = (rdp + j) & rdev->cp.ptr_mask;
1422
		seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
1423
	}
1424
	return 0;
1425
}
1426
 
1427
 
1428
static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
1429
{
1430
	struct drm_info_node *node = (struct drm_info_node *) m->private;
1431
	struct drm_device *dev = node->minor->dev;
1432
	struct radeon_device *rdev = dev->dev_private;
1433
	uint32_t csq_stat, csq2_stat, tmp;
1434
	unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
1435
	unsigned i;
1436
 
1437
	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
1438
	seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
1439
	csq_stat = RREG32(RADEON_CP_CSQ_STAT);
1440
	csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
1441
	r_rptr = (csq_stat >> 0) & 0x3ff;
1442
	r_wptr = (csq_stat >> 10) & 0x3ff;
1443
	ib1_rptr = (csq_stat >> 20) & 0x3ff;
1444
	ib1_wptr = (csq2_stat >> 0) & 0x3ff;
1445
	ib2_rptr = (csq2_stat >> 10) & 0x3ff;
1446
	ib2_wptr = (csq2_stat >> 20) & 0x3ff;
1447
	seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
1448
	seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
1449
	seq_printf(m, "Ring rptr %u\n", r_rptr);
1450
	seq_printf(m, "Ring wptr %u\n", r_wptr);
1451
	seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
1452
	seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
1453
	seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
1454
	seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
1455
	/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
1456
	 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
1457
	seq_printf(m, "Ring fifo:\n");
1458
	for (i = 0; i < 256; i++) {
1459
		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1460
		tmp = RREG32(RADEON_CP_CSQ_DATA);
1461
		seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
1462
	}
1463
	seq_printf(m, "Indirect1 fifo:\n");
1464
	for (i = 256; i <= 512; i++) {
1465
		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1466
		tmp = RREG32(RADEON_CP_CSQ_DATA);
1467
		seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
1468
	}
1469
	seq_printf(m, "Indirect2 fifo:\n");
1470
	for (i = 640; i < ib1_wptr; i++) {
1471
		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
1472
		tmp = RREG32(RADEON_CP_CSQ_DATA);
1473
		seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
1474
	}
1475
	return 0;
1476
}
1477
 
1478
static int r100_debugfs_mc_info(struct seq_file *m, void *data)
1479
{
1480
	struct drm_info_node *node = (struct drm_info_node *) m->private;
1481
	struct drm_device *dev = node->minor->dev;
1482
	struct radeon_device *rdev = dev->dev_private;
1483
	uint32_t tmp;
1484
 
1485
	tmp = RREG32(RADEON_CONFIG_MEMSIZE);
1486
	seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
1487
	tmp = RREG32(RADEON_MC_FB_LOCATION);
1488
	seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
1489
	tmp = RREG32(RADEON_BUS_CNTL);
1490
	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
1491
	tmp = RREG32(RADEON_MC_AGP_LOCATION);
1492
	seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
1493
	tmp = RREG32(RADEON_AGP_BASE);
1494
	seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
1495
	tmp = RREG32(RADEON_HOST_PATH_CNTL);
1496
	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
1497
	tmp = RREG32(0x01D0);
1498
	seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
1499
	tmp = RREG32(RADEON_AIC_LO_ADDR);
1500
	seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
1501
	tmp = RREG32(RADEON_AIC_HI_ADDR);
1502
	seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
1503
	tmp = RREG32(0x01E4);
1504
	seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
1505
	return 0;
1506
}
1507
 
1508
static struct drm_info_list r100_debugfs_rbbm_list[] = {
1509
	{"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
1510
};
1511
 
1512
static struct drm_info_list r100_debugfs_cp_list[] = {
1513
	{"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
1514
	{"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
1515
};
1516
 
1517
static struct drm_info_list r100_debugfs_mc_info_list[] = {
1518
	{"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
1519
};
1520
#endif
1521
 
1522
int r100_debugfs_rbbm_init(struct radeon_device *rdev)
1523
{
1524
#if defined(CONFIG_DEBUG_FS)
1525
	return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
1526
#else
1527
	return 0;
1528
#endif
1529
}
1530
 
1531
int r100_debugfs_cp_init(struct radeon_device *rdev)
1532
{
1533
#if defined(CONFIG_DEBUG_FS)
1534
	return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
1535
#else
1536
	return 0;
1537
#endif
1538
}
1539
 
1540
int r100_debugfs_mc_info_init(struct radeon_device *rdev)
1541
{
1542
#if defined(CONFIG_DEBUG_FS)
1543
	return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
1544
#else
1545
	return 0;
1546
#endif
1547
}