Subversion Repositories Kolibri OS

Rev

Rev 3192 | Rev 5078 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1117 serge 1
/*
2
 * Copyright 2008 Advanced Micro Devices, Inc.
3
 * Copyright 2008 Red Hat Inc.
4
 * Copyright 2009 Jerome Glisse.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
 * OTHER DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors: Dave Airlie
25
 *          Alex Deucher
26
 *          Jerome Glisse
27
 */
1179 serge 28
#include 
1963 serge 29
#include 
2997 Serge 30
#include 
31
#include 
1117 serge 32
#include "radeon_reg.h"
33
#include "radeon.h"
1963 serge 34
#include "radeon_asic.h"
1179 serge 35
#include "r100d.h"
1221 serge 36
#include "rs100d.h"
37
#include "rv200d.h"
38
#include "rv250d.h"
1963 serge 39
#include "atom.h"
1117 serge 40
 
1221 serge 41
#include 
2997 Serge 42
#include 
1221 serge 43
 
1179 serge 44
#include "r100_reg_safe.h"
45
#include "rn50_reg_safe.h"
1221 serge 46
 
47
/* Firmware Names */
48
#define FIRMWARE_R100		"radeon/R100_cp.bin"
49
#define FIRMWARE_R200		"radeon/R200_cp.bin"
50
#define FIRMWARE_R300		"radeon/R300_cp.bin"
51
#define FIRMWARE_R420		"radeon/R420_cp.bin"
52
#define FIRMWARE_RS690		"radeon/RS690_cp.bin"
53
#define FIRMWARE_RS600		"radeon/RS600_cp.bin"
54
#define FIRMWARE_R520		"radeon/R520_cp.bin"
55
 
56
MODULE_FIRMWARE(FIRMWARE_R100);
57
MODULE_FIRMWARE(FIRMWARE_R200);
58
MODULE_FIRMWARE(FIRMWARE_R300);
59
MODULE_FIRMWARE(FIRMWARE_R420);
60
MODULE_FIRMWARE(FIRMWARE_RS690);
61
MODULE_FIRMWARE(FIRMWARE_RS600);
62
MODULE_FIRMWARE(FIRMWARE_R520);
63
 
64
 
1117 serge 65
/* This files gather functions specifics to:
66
 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
2997 Serge 67
 * and others in some cases.
1117 serge 68
 */
69
 
3764 Serge 70
static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
71
{
72
	if (crtc == 0) {
73
		if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
74
			return true;
75
		else
76
			return false;
77
	} else {
78
		if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
79
			return true;
80
		else
81
			return false;
82
	}
83
}
84
 
85
static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
86
{
87
	u32 vline1, vline2;
88
 
89
	if (crtc == 0) {
90
		vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
91
		vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
92
	} else {
93
		vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
94
		vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
95
	}
96
	if (vline1 != vline2)
97
		return true;
98
	else
99
		return false;
100
}
101
 
2997 Serge 102
/**
103
 * r100_wait_for_vblank - vblank wait asic callback.
104
 *
105
 * @rdev: radeon_device pointer
106
 * @crtc: crtc to wait for vblank on
107
 *
108
 * Wait for vblank on the requested crtc (r1xx-r4xx).
109
 */
110
void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
111
{
3764 Serge 112
	unsigned i = 0;
2997 Serge 113
 
114
	if (crtc >= rdev->num_crtc)
115
		return;
116
 
117
	if (crtc == 0) {
3764 Serge 118
		if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
119
			return;
120
	} else {
121
		if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
122
			return;
123
	}
124
 
125
	/* depending on when we hit vblank, we may be close to active; if so,
126
	 * wait for another frame.
127
	 */
128
	while (r100_is_in_vblank(rdev, crtc)) {
129
		if (i++ % 100 == 0) {
130
			if (!r100_is_counter_moving(rdev, crtc))
2997 Serge 131
					break;
132
			}
133
		}
3764 Serge 134
 
135
	while (!r100_is_in_vblank(rdev, crtc)) {
136
		if (i++ % 100 == 0) {
137
			if (!r100_is_counter_moving(rdev, crtc))
2997 Serge 138
					break;
139
		}
140
	}
141
}
1963 serge 142
u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
143
{
144
	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
145
	u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
2997 Serge 146
	int i;
1963 serge 147
 
148
	/* Lock the graphics update lock */
149
	/* update the scanout addresses */
150
	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
151
 
152
	/* Wait for update_pending to go high. */
2997 Serge 153
	for (i = 0; i < rdev->usec_timeout; i++) {
154
		if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
155
			break;
156
		udelay(1);
157
	}
1963 serge 158
	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
159
 
160
	/* Unlock the lock, so double-buffering can take place inside vblank */
161
	tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
162
	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
163
 
164
	/* Return current update_pending status: */
165
	return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
166
}
167
bool r100_gui_idle(struct radeon_device *rdev)
168
{
169
	if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
170
		return false;
171
	else
172
		return true;
173
}
174
 
1321 serge 175
/* hpd for digital panel detect/disconnect */
2997 Serge 176
/**
177
 * r100_hpd_sense - hpd sense callback.
178
 *
179
 * @rdev: radeon_device pointer
180
 * @hpd: hpd (hotplug detect) pin
181
 *
182
 * Checks if a digital monitor is connected (r1xx-r4xx).
183
 * Returns true if connected, false if not connected.
184
 */
1321 serge 185
bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
186
{
187
	bool connected = false;
188
 
189
	switch (hpd) {
190
	case RADEON_HPD_1:
191
		if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
192
			connected = true;
193
		break;
194
	case RADEON_HPD_2:
195
		if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
196
			connected = true;
197
		break;
198
	default:
199
		break;
200
	}
201
	return connected;
202
}
203
 
2997 Serge 204
/**
205
 * r100_hpd_set_polarity - hpd set polarity callback.
206
 *
207
 * @rdev: radeon_device pointer
208
 * @hpd: hpd (hotplug detect) pin
209
 *
210
 * Set the polarity of the hpd pin (r1xx-r4xx).
211
 */
1321 serge 212
void r100_hpd_set_polarity(struct radeon_device *rdev,
213
			   enum radeon_hpd_id hpd)
214
{
215
	u32 tmp;
216
	bool connected = r100_hpd_sense(rdev, hpd);
217
 
218
	switch (hpd) {
219
	case RADEON_HPD_1:
220
		tmp = RREG32(RADEON_FP_GEN_CNTL);
221
		if (connected)
222
			tmp &= ~RADEON_FP_DETECT_INT_POL;
223
		else
224
			tmp |= RADEON_FP_DETECT_INT_POL;
225
		WREG32(RADEON_FP_GEN_CNTL, tmp);
226
		break;
227
	case RADEON_HPD_2:
228
		tmp = RREG32(RADEON_FP2_GEN_CNTL);
229
		if (connected)
230
			tmp &= ~RADEON_FP2_DETECT_INT_POL;
231
		else
232
			tmp |= RADEON_FP2_DETECT_INT_POL;
233
		WREG32(RADEON_FP2_GEN_CNTL, tmp);
234
		break;
235
	default:
236
		break;
237
	}
238
}
239
 
2997 Serge 240
/**
241
 * r100_hpd_init - hpd setup callback.
242
 *
243
 * @rdev: radeon_device pointer
244
 *
245
 * Setup the hpd pins used by the card (r1xx-r4xx).
246
 * Set the polarity, and enable the hpd interrupts.
247
 */
1321 serge 248
void r100_hpd_init(struct radeon_device *rdev)
249
{
250
	struct drm_device *dev = rdev->ddev;
251
	struct drm_connector *connector;
2997 Serge 252
	unsigned enable = 0;
1321 serge 253
 
254
	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
255
		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
2997 Serge 256
		enable |= 1 << radeon_connector->hpd.hpd;
257
		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
1321 serge 258
	}
2997 Serge 259
//	radeon_irq_kms_enable_hpd(rdev, enable);
1321 serge 260
}
261
 
2997 Serge 262
/**
263
 * r100_hpd_fini - hpd tear down callback.
264
 *
265
 * @rdev: radeon_device pointer
266
 *
267
 * Tear down the hpd pins used by the card (r1xx-r4xx).
268
 * Disable the hpd interrupts.
269
 */
1321 serge 270
void r100_hpd_fini(struct radeon_device *rdev)
271
{
272
	struct drm_device *dev = rdev->ddev;
273
	struct drm_connector *connector;
2997 Serge 274
	unsigned disable = 0;
1321 serge 275
 
276
	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
277
		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
2997 Serge 278
		disable |= 1 << radeon_connector->hpd.hpd;
1321 serge 279
	}
2997 Serge 280
//	radeon_irq_kms_disable_hpd(rdev, disable);
1321 serge 281
}
282
 
1117 serge 283
/*
284
 * PCI GART
285
 */
286
void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
287
{
288
	/* TODO: can we do somethings here ? */
289
	/* It seems hw only cache one entry so we should discard this
290
	 * entry otherwise if first GPU GART read hit this entry it
291
	 * could end up in wrong address. */
292
}
293
 
1179 serge 294
int r100_pci_gart_init(struct radeon_device *rdev)
1117 serge 295
{
296
	int r;
297
 
2997 Serge 298
	if (rdev->gart.ptr) {
1963 serge 299
		WARN(1, "R100 PCI GART already initialized\n");
1179 serge 300
		return 0;
301
	}
1117 serge 302
	/* Initialize common gart structure */
303
	r = radeon_gart_init(rdev);
1179 serge 304
	if (r)
1117 serge 305
		return r;
1268 serge 306
    rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
2997 Serge 307
	rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
308
	rdev->asic->gart.set_page = &r100_pci_gart_set_page;
1179 serge 309
	return radeon_gart_table_ram_alloc(rdev);
310
}
311
 
312
int r100_pci_gart_enable(struct radeon_device *rdev)
313
{
314
	uint32_t tmp;
315
 
1430 serge 316
	radeon_gart_restore(rdev);
1117 serge 317
	/* discard memory request outside of configured range */
318
	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
319
	WREG32(RADEON_AIC_CNTL, tmp);
320
	/* set address range for PCI address translate */
1430 serge 321
	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
322
	WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
1117 serge 323
	/* set PCI GART page-table base address */
324
	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
325
	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
326
	WREG32(RADEON_AIC_CNTL, tmp);
327
	r100_pci_gart_tlb_flush(rdev);
2997 Serge 328
	DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
329
		 (unsigned)(rdev->mc.gtt_size >> 20),
330
		 (unsigned long long)rdev->gart.table_addr);
1117 serge 331
	rdev->gart.ready = true;
332
	return 0;
333
}
334
 
335
void r100_pci_gart_disable(struct radeon_device *rdev)
336
{
337
	uint32_t tmp;
338
 
339
	/* discard memory request outside of configured range */
340
	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
341
	WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
342
	WREG32(RADEON_AIC_LO_ADDR, 0);
343
	WREG32(RADEON_AIC_HI_ADDR, 0);
344
}
345
 
346
int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
347
{
2997 Serge 348
	u32 *gtt = rdev->gart.ptr;
349
 
1117 serge 350
	if (i < 0 || i > rdev->gart.num_gpu_pages) {
351
		return -EINVAL;
352
	}
2997 Serge 353
	gtt[i] = cpu_to_le32(lower_32_bits(addr));
1117 serge 354
	return 0;
355
}
356
 
1179 serge 357
void r100_pci_gart_fini(struct radeon_device *rdev)
1117 serge 358
{
1963 serge 359
	radeon_gart_fini(rdev);
1117 serge 360
		r100_pci_gart_disable(rdev);
1179 serge 361
	radeon_gart_table_ram_free(rdev);
1117 serge 362
}
363
 
2005 serge 364
int r100_irq_set(struct radeon_device *rdev)
365
{
366
	uint32_t tmp = 0;
1117 serge 367
 
2005 serge 368
	if (!rdev->irq.installed) {
369
		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
370
		WREG32(R_000040_GEN_INT_CNTL, 0);
371
		return -EINVAL;
372
	}
2997 Serge 373
	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
2005 serge 374
		tmp |= RADEON_SW_INT_ENABLE;
375
	}
376
	if (rdev->irq.crtc_vblank_int[0] ||
2997 Serge 377
	    atomic_read(&rdev->irq.pflip[0])) {
2005 serge 378
		tmp |= RADEON_CRTC_VBLANK_MASK;
379
	}
380
	if (rdev->irq.crtc_vblank_int[1] ||
2997 Serge 381
	    atomic_read(&rdev->irq.pflip[1])) {
2005 serge 382
		tmp |= RADEON_CRTC2_VBLANK_MASK;
383
	}
384
	if (rdev->irq.hpd[0]) {
385
		tmp |= RADEON_FP_DETECT_MASK;
386
	}
387
	if (rdev->irq.hpd[1]) {
388
		tmp |= RADEON_FP2_DETECT_MASK;
389
	}
390
	WREG32(RADEON_GEN_INT_CNTL, tmp);
391
	return 0;
392
}
393
 
1221 serge 394
void r100_irq_disable(struct radeon_device *rdev)
1117 serge 395
{
1221 serge 396
	u32 tmp;
1117 serge 397
 
1221 serge 398
	WREG32(R_000040_GEN_INT_CNTL, 0);
399
	/* Wait and acknowledge irq */
400
	mdelay(1);
401
	tmp = RREG32(R_000044_GEN_INT_STATUS);
402
	WREG32(R_000044_GEN_INT_STATUS, tmp);
1117 serge 403
}
404
 
2997 Serge 405
static uint32_t r100_irq_ack(struct radeon_device *rdev)
1117 serge 406
{
1221 serge 407
	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
1321 serge 408
	uint32_t irq_mask = RADEON_SW_INT_TEST |
409
		RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
410
		RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
1117 serge 411
 
1221 serge 412
	if (irqs) {
413
		WREG32(RADEON_GEN_INT_STATUS, irqs);
1129 serge 414
	}
1221 serge 415
	return irqs & irq_mask;
1117 serge 416
}
417
 
2005 serge 418
int r100_irq_process(struct radeon_device *rdev)
419
{
420
	uint32_t status, msi_rearm;
421
	bool queue_hotplug = false;
1117 serge 422
 
2005 serge 423
	status = r100_irq_ack(rdev);
424
	if (!status) {
425
		return IRQ_NONE;
426
	}
427
	if (rdev->shutdown) {
428
		return IRQ_NONE;
429
	}
430
	while (status) {
431
		/* SW interrupt */
432
		if (status & RADEON_SW_INT_TEST) {
2997 Serge 433
			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
2005 serge 434
		}
435
		/* Vertical blank interrupts */
436
		if (status & RADEON_CRTC_VBLANK_STAT) {
437
			if (rdev->irq.crtc_vblank_int[0]) {
438
//				drm_handle_vblank(rdev->ddev, 0);
439
				rdev->pm.vblank_sync = true;
440
//				wake_up(&rdev->irq.vblank_queue);
441
			}
442
//			if (rdev->irq.pflip[0])
443
//				radeon_crtc_handle_flip(rdev, 0);
444
		}
445
		if (status & RADEON_CRTC2_VBLANK_STAT) {
446
			if (rdev->irq.crtc_vblank_int[1]) {
447
//				drm_handle_vblank(rdev->ddev, 1);
448
				rdev->pm.vblank_sync = true;
449
//				wake_up(&rdev->irq.vblank_queue);
450
			}
451
//			if (rdev->irq.pflip[1])
452
//				radeon_crtc_handle_flip(rdev, 1);
453
		}
454
		if (status & RADEON_FP_DETECT_STAT) {
455
			queue_hotplug = true;
456
			DRM_DEBUG("HPD1\n");
457
		}
458
		if (status & RADEON_FP2_DETECT_STAT) {
459
			queue_hotplug = true;
460
			DRM_DEBUG("HPD2\n");
461
		}
462
		status = r100_irq_ack(rdev);
463
	}
464
//	if (queue_hotplug)
465
//		schedule_work(&rdev->hotplug_work);
466
	if (rdev->msi_enabled) {
467
		switch (rdev->family) {
468
		case CHIP_RS400:
469
		case CHIP_RS480:
470
			msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
471
			WREG32(RADEON_AIC_CNTL, msi_rearm);
472
			WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
473
			break;
474
		default:
2997 Serge 475
			WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
2005 serge 476
			break;
477
		}
478
	}
479
	return IRQ_HANDLED;
480
}
481
 
1403 serge 482
u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
483
{
484
	if (crtc == 0)
485
		return RREG32(RADEON_CRTC_CRNT_FRAME);
486
	else
487
		return RREG32(RADEON_CRTC2_CRNT_FRAME);
488
}
1117 serge 489
 
1404 serge 490
/* Who ever call radeon_fence_emit should call ring_lock and ask
491
 * for enough space (today caller are ib schedule and buffer move) */
1117 serge 492
void r100_fence_ring_emit(struct radeon_device *rdev,
493
			  struct radeon_fence *fence)
494
{
2997 Serge 495
	struct radeon_ring *ring = &rdev->ring[fence->ring];
496
 
1404 serge 497
	/* We have to make sure that caches are flushed before
498
	 * CPU might read something from VRAM. */
2997 Serge 499
	radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
500
	radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
501
	radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
502
	radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
1117 serge 503
	/* Wait until IDLE & CLEAN */
2997 Serge 504
	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
505
	radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
506
	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
507
	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
1403 serge 508
				RADEON_HDP_READ_BUFFER_INVALIDATE);
2997 Serge 509
	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
510
	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
1117 serge 511
	/* Emit fence sequence & fire IRQ */
2997 Serge 512
	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
513
	radeon_ring_write(ring, fence->seq);
514
	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
515
	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
1117 serge 516
}
517
 
2997 Serge 518
void r100_semaphore_ring_emit(struct radeon_device *rdev,
519
			      struct radeon_ring *ring,
520
			      struct radeon_semaphore *semaphore,
521
			      bool emit_wait)
522
{
523
	/* Unused on older asics, since we don't have semaphores or multiple rings */
524
	BUG();
525
}
526
 
1117 serge 527
int r100_copy_blit(struct radeon_device *rdev,
528
		   uint64_t src_offset,
529
		   uint64_t dst_offset,
2997 Serge 530
		   unsigned num_gpu_pages,
531
		   struct radeon_fence **fence)
1117 serge 532
{
2997 Serge 533
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1117 serge 534
	uint32_t cur_pages;
2997 Serge 535
	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
1117 serge 536
	uint32_t pitch;
537
	uint32_t stride_pixels;
538
	unsigned ndw;
539
	int num_loops;
540
	int r = 0;
541
 
542
	/* radeon limited to 16k stride */
543
	stride_bytes &= 0x3fff;
544
	/* radeon pitch is /64 */
545
	pitch = stride_bytes / 64;
546
	stride_pixels = stride_bytes / 4;
2997 Serge 547
	num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
1117 serge 548
 
549
	/* Ask for enough room for blit + flush + fence */
550
	ndw = 64 + (10 * num_loops);
2997 Serge 551
	r = radeon_ring_lock(rdev, ring, ndw);
1117 serge 552
	if (r) {
553
		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
554
		return -EINVAL;
555
	}
2997 Serge 556
	while (num_gpu_pages > 0) {
557
		cur_pages = num_gpu_pages;
1117 serge 558
		if (cur_pages > 8191) {
559
			cur_pages = 8191;
560
		}
2997 Serge 561
		num_gpu_pages -= cur_pages;
1117 serge 562
 
563
		/* pages are in Y direction - height
564
		   page width in X direction - width */
2997 Serge 565
		radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
566
		radeon_ring_write(ring,
1117 serge 567
				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
568
				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
569
				  RADEON_GMC_SRC_CLIPPING |
570
				  RADEON_GMC_DST_CLIPPING |
571
				  RADEON_GMC_BRUSH_NONE |
572
				  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
573
				  RADEON_GMC_SRC_DATATYPE_COLOR |
574
				  RADEON_ROP3_S |
575
				  RADEON_DP_SRC_SOURCE_MEMORY |
576
				  RADEON_GMC_CLR_CMP_CNTL_DIS |
577
				  RADEON_GMC_WR_MSK_DIS);
2997 Serge 578
		radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
579
		radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
580
		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
581
		radeon_ring_write(ring, 0);
582
		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
583
		radeon_ring_write(ring, num_gpu_pages);
584
		radeon_ring_write(ring, num_gpu_pages);
585
		radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
1117 serge 586
	}
2997 Serge 587
	radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
588
	radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
589
	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
590
	radeon_ring_write(ring,
1117 serge 591
			  RADEON_WAIT_2D_IDLECLEAN |
592
			  RADEON_WAIT_HOST_IDLECLEAN |
593
			  RADEON_WAIT_DMA_GUI_IDLE);
594
	if (fence) {
2997 Serge 595
		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
1117 serge 596
	}
2997 Serge 597
	radeon_ring_unlock_commit(rdev, ring);
1117 serge 598
	return r;
599
}
600
 
1179 serge 601
static int r100_cp_wait_for_idle(struct radeon_device *rdev)
602
{
603
	unsigned i;
604
	u32 tmp;
605
 
606
	for (i = 0; i < rdev->usec_timeout; i++) {
607
		tmp = RREG32(R_000E40_RBBM_STATUS);
608
		if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
609
			return 0;
610
		}
611
		udelay(1);
612
	}
613
	return -1;
614
}
615
 
2997 Serge 616
void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
1117 serge 617
{
618
	int r;
619
 
2997 Serge 620
	r = radeon_ring_lock(rdev, ring, 2);
1117 serge 621
	if (r) {
622
		return;
623
	}
2997 Serge 624
	radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
625
	radeon_ring_write(ring,
1117 serge 626
			  RADEON_ISYNC_ANY2D_IDLE3D |
627
			  RADEON_ISYNC_ANY3D_IDLE2D |
628
			  RADEON_ISYNC_WAIT_IDLEGUI |
629
			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
2997 Serge 630
	radeon_ring_unlock_commit(rdev, ring);
1117 serge 631
}
632
 
1221 serge 633
 
634
/* Load the microcode for the CP */
635
static int r100_cp_init_microcode(struct radeon_device *rdev)
1117 serge 636
{
1221 serge 637
	struct platform_device *pdev;
638
	const char *fw_name = NULL;
639
	int err;
1117 serge 640
 
1963 serge 641
	DRM_DEBUG_KMS("\n");
1117 serge 642
 
1412 serge 643
    pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
644
    err = IS_ERR(pdev);
645
    if (err) {
646
        printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
647
        return -EINVAL;
648
    }
1117 serge 649
	if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
650
	    (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
651
	    (rdev->family == CHIP_RS200)) {
652
		DRM_INFO("Loading R100 Microcode\n");
1221 serge 653
		fw_name = FIRMWARE_R100;
1117 serge 654
	} else if ((rdev->family == CHIP_R200) ||
655
		   (rdev->family == CHIP_RV250) ||
656
		   (rdev->family == CHIP_RV280) ||
657
		   (rdev->family == CHIP_RS300)) {
658
		DRM_INFO("Loading R200 Microcode\n");
1221 serge 659
		fw_name = FIRMWARE_R200;
1117 serge 660
	} else if ((rdev->family == CHIP_R300) ||
661
		   (rdev->family == CHIP_R350) ||
662
		   (rdev->family == CHIP_RV350) ||
663
		   (rdev->family == CHIP_RV380) ||
664
		   (rdev->family == CHIP_RS400) ||
665
		   (rdev->family == CHIP_RS480)) {
666
		DRM_INFO("Loading R300 Microcode\n");
1221 serge 667
		fw_name = FIRMWARE_R300;
1117 serge 668
	} else if ((rdev->family == CHIP_R420) ||
669
		   (rdev->family == CHIP_R423) ||
670
		   (rdev->family == CHIP_RV410)) {
671
		DRM_INFO("Loading R400 Microcode\n");
1221 serge 672
		fw_name = FIRMWARE_R420;
1117 serge 673
	} else if ((rdev->family == CHIP_RS690) ||
674
		   (rdev->family == CHIP_RS740)) {
675
		DRM_INFO("Loading RS690/RS740 Microcode\n");
1221 serge 676
		fw_name = FIRMWARE_RS690;
1117 serge 677
	} else if (rdev->family == CHIP_RS600) {
678
		DRM_INFO("Loading RS600 Microcode\n");
1221 serge 679
		fw_name = FIRMWARE_RS600;
1117 serge 680
	} else if ((rdev->family == CHIP_RV515) ||
681
		   (rdev->family == CHIP_R520) ||
682
		   (rdev->family == CHIP_RV530) ||
683
		   (rdev->family == CHIP_R580) ||
684
		   (rdev->family == CHIP_RV560) ||
685
		   (rdev->family == CHIP_RV570)) {
686
		DRM_INFO("Loading R500 Microcode\n");
1221 serge 687
		fw_name = FIRMWARE_R520;
1117 serge 688
		}
1221 serge 689
 
1412 serge 690
   err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
691
   platform_device_unregister(pdev);
1221 serge 692
   if (err) {
693
       printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
694
              fw_name);
695
	} else if (rdev->me_fw->size % 8) {
696
		printk(KERN_ERR
697
		       "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
698
		       rdev->me_fw->size, fw_name);
699
		err = -EINVAL;
700
		release_firmware(rdev->me_fw);
701
		rdev->me_fw = NULL;
1117 serge 702
	}
1221 serge 703
	return err;
1117 serge 704
}
705
 
1221 serge 706
static void r100_cp_load_microcode(struct radeon_device *rdev)
707
{
708
	const __be32 *fw_data;
709
	int i, size;
710
 
711
	if (r100_gui_wait_for_idle(rdev)) {
712
		printk(KERN_WARNING "Failed to wait GUI idle while "
713
		       "programming pipes. Bad things might happen.\n");
714
	}
715
 
716
	if (rdev->me_fw) {
717
		size = rdev->me_fw->size / 4;
718
		fw_data = (const __be32 *)&rdev->me_fw->data[0];
719
		WREG32(RADEON_CP_ME_RAM_ADDR, 0);
720
		for (i = 0; i < size; i += 2) {
721
			WREG32(RADEON_CP_ME_RAM_DATAH,
722
			       be32_to_cpup(&fw_data[i]));
723
			WREG32(RADEON_CP_ME_RAM_DATAL,
724
			       be32_to_cpup(&fw_data[i + 1]));
725
		}
726
	}
727
}
728
 
1117 serge 729
int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
730
{
2997 Serge 731
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1117 serge 732
	unsigned rb_bufsz;
733
	unsigned rb_blksz;
734
	unsigned max_fetch;
735
	unsigned pre_write_timer;
736
	unsigned pre_write_limit;
737
	unsigned indirect2_start;
738
	unsigned indirect1_start;
739
	uint32_t tmp;
740
	int r;
741
 
1129 serge 742
	if (r100_debugfs_cp_init(rdev)) {
743
		DRM_ERROR("Failed to register debugfs file for CP !\n");
744
	}
1179 serge 745
	if (!rdev->me_fw) {
746
		r = r100_cp_init_microcode(rdev);
747
		if (r) {
748
			DRM_ERROR("Failed to load firmware!\n");
749
			return r;
750
		}
751
	}
752
 
1117 serge 753
	/* Align ring size */
754
	rb_bufsz = drm_order(ring_size / 8);
755
	ring_size = (1 << (rb_bufsz + 1)) * 4;
756
	r100_cp_load_microcode(rdev);
2997 Serge 757
	r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
758
			     RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR,
759
			     0, 0x7fffff, RADEON_CP_PACKET2);
1117 serge 760
	if (r) {
761
		return r;
762
	}
763
	/* Each time the cp read 1024 bytes (16 dword/quadword) update
764
	 * the rptr copy in system ram */
765
	rb_blksz = 9;
766
	/* cp will read 128bytes at a time (4 dwords) */
767
	max_fetch = 1;
2997 Serge 768
	ring->align_mask = 16 - 1;
1117 serge 769
	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
770
	pre_write_timer = 64;
771
	/* Force CP_RB_WPTR write if written more than one time before the
772
	 * delay expire
773
	 */
774
	pre_write_limit = 0;
775
	/* Setup the cp cache like this (cache size is 96 dwords) :
776
	 *	RING		0  to 15
777
	 *	INDIRECT1	16 to 79
778
	 *	INDIRECT2	80 to 95
779
	 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
780
	 *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
781
	 *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
782
	 * Idea being that most of the gpu cmd will be through indirect1 buffer
783
	 * so it gets the bigger cache.
784
	 */
785
	indirect2_start = 80;
786
	indirect1_start = 16;
787
	/* cp setup */
788
	WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1268 serge 789
	tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1117 serge 790
	       REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1963 serge 791
	       REG_SET(RADEON_MAX_FETCH, max_fetch));
1268 serge 792
#ifdef __BIG_ENDIAN
793
	tmp |= RADEON_BUF_SWAP_32BIT;
794
#endif
1963 serge 795
	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
1268 serge 796
 
1117 serge 797
	/* Set ring address */
2997 Serge 798
	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
799
	WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
1117 serge 800
	/* Force read & write ptr to 0 */
1963 serge 801
	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
1117 serge 802
	WREG32(RADEON_CP_RB_RPTR_WR, 0);
2997 Serge 803
	ring->wptr = 0;
804
	WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1963 serge 805
 
806
	/* set the wb address whether it's enabled or not */
807
	WREG32(R_00070C_CP_RB_RPTR_ADDR,
808
		S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
809
	WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
810
 
811
	if (rdev->wb.enabled)
812
		WREG32(R_000770_SCRATCH_UMSK, 0xff);
813
	else {
814
		tmp |= RADEON_RB_NO_UPDATE;
815
		WREG32(R_000770_SCRATCH_UMSK, 0);
816
	}
817
 
1117 serge 818
	WREG32(RADEON_CP_RB_CNTL, tmp);
819
	udelay(10);
2997 Serge 820
	ring->rptr = RREG32(RADEON_CP_RB_RPTR);
1117 serge 821
	/* Set cp mode to bus mastering & enable cp*/
822
	WREG32(RADEON_CP_CSQ_MODE,
823
	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
824
	       REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1963 serge 825
	WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
826
	WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1117 serge 827
	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
2997 Serge 828
	radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
829
	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
1117 serge 830
	if (r) {
831
		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
832
		return r;
833
	}
2997 Serge 834
	ring->ready = true;
3192 Serge 835
	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
836
 
837
	if (!ring->rptr_save_reg /* not resuming from suspend */
838
	    && radeon_ring_supports_scratch_reg(rdev, ring)) {
839
		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
840
		if (r) {
841
			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
842
			ring->rptr_save_reg = 0;
843
		}
844
	}
1117 serge 845
	return 0;
846
}
847
 
848
void r100_cp_fini(struct radeon_device *rdev)
849
{
1179 serge 850
	if (r100_cp_wait_for_idle(rdev)) {
851
		DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
852
	}
1117 serge 853
	/* Disable ring */
1179 serge 854
	r100_cp_disable(rdev);
3192 Serge 855
	radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
2997 Serge 856
	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1117 serge 857
	DRM_INFO("radeon: cp finalized\n");
858
}
859
 
860
void r100_cp_disable(struct radeon_device *rdev)
861
{
862
	/* Disable ring */
3192 Serge 863
	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2997 Serge 864
	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1117 serge 865
	WREG32(RADEON_CP_CSQ_MODE, 0);
866
	WREG32(RADEON_CP_CSQ_CNTL, 0);
1963 serge 867
	WREG32(R_000770_SCRATCH_UMSK, 0);
1117 serge 868
	if (r100_gui_wait_for_idle(rdev)) {
869
		printk(KERN_WARNING "Failed to wait GUI idle while "
870
		       "programming pipes. Bad things might happen.\n");
871
	}
872
}
873
 
2997 Serge 874
#if 0
875
/*
876
 * CS functions
877
 */
878
int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
879
			    struct radeon_cs_packet *pkt,
880
			    unsigned idx,
881
			    unsigned reg)
1179 serge 882
{
2997 Serge 883
	int r;
884
	u32 tile_flags = 0;
885
	u32 tmp;
886
	struct radeon_cs_reloc *reloc;
887
	u32 value;
888
 
3764 Serge 889
	r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2997 Serge 890
	if (r) {
891
		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
892
			  idx, reg);
3764 Serge 893
		radeon_cs_dump_packet(p, pkt);
2997 Serge 894
		return r;
895
	}
896
 
897
	value = radeon_get_ib_value(p, idx);
898
	tmp = value & 0x003fffff;
899
	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
900
 
901
	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
902
		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
903
			tile_flags |= RADEON_DST_TILE_MACRO;
904
		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
905
			if (reg == RADEON_SRC_PITCH_OFFSET) {
906
				DRM_ERROR("Cannot src blit from microtiled surface\n");
3764 Serge 907
				radeon_cs_dump_packet(p, pkt);
2997 Serge 908
				return -EINVAL;
909
			}
910
			tile_flags |= RADEON_DST_TILE_MICRO;
911
		}
912
 
913
		tmp |= tile_flags;
914
		p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
915
	} else
916
		p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
917
	return 0;
1179 serge 918
}
919
 
2997 Serge 920
int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
921
			     struct radeon_cs_packet *pkt,
922
			     int idx)
923
{
924
	unsigned c, i;
925
	struct radeon_cs_reloc *reloc;
926
	struct r100_cs_track *track;
927
	int r = 0;
928
	volatile uint32_t *ib;
929
	u32 idx_value;
1179 serge 930
 
2997 Serge 931
	ib = p->ib.ptr;
932
	track = (struct r100_cs_track *)p->track;
933
	c = radeon_get_ib_value(p, idx++) & 0x1F;
934
	if (c > 16) {
935
	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
936
		      pkt->opcode);
3764 Serge 937
	    radeon_cs_dump_packet(p, pkt);
2997 Serge 938
	    return -EINVAL;
939
	}
940
	track->num_arrays = c;
941
	for (i = 0; i < (c - 1); i+=2, idx+=3) {
3764 Serge 942
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2997 Serge 943
		if (r) {
944
			DRM_ERROR("No reloc for packet3 %d\n",
945
				  pkt->opcode);
3764 Serge 946
			radeon_cs_dump_packet(p, pkt);
2997 Serge 947
			return r;
948
		}
949
		idx_value = radeon_get_ib_value(p, idx);
950
		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
951
 
952
		track->arrays[i + 0].esize = idx_value >> 8;
953
		track->arrays[i + 0].robj = reloc->robj;
954
		track->arrays[i + 0].esize &= 0x7F;
3764 Serge 955
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2997 Serge 956
		if (r) {
957
			DRM_ERROR("No reloc for packet3 %d\n",
958
				  pkt->opcode);
3764 Serge 959
			radeon_cs_dump_packet(p, pkt);
2997 Serge 960
			return r;
961
		}
962
		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
963
		track->arrays[i + 1].robj = reloc->robj;
964
		track->arrays[i + 1].esize = idx_value >> 24;
965
		track->arrays[i + 1].esize &= 0x7F;
966
	}
967
	if (c & 1) {
3764 Serge 968
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2997 Serge 969
		if (r) {
970
			DRM_ERROR("No reloc for packet3 %d\n",
971
					  pkt->opcode);
3764 Serge 972
			radeon_cs_dump_packet(p, pkt);
2997 Serge 973
			return r;
974
		}
975
		idx_value = radeon_get_ib_value(p, idx);
976
		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
977
		track->arrays[i + 0].robj = reloc->robj;
978
		track->arrays[i + 0].esize = idx_value >> 8;
979
		track->arrays[i + 0].esize &= 0x7F;
980
	}
981
	return r;
982
}
983
 
1117 serge 984
int r100_cs_parse_packet0(struct radeon_cs_parser *p,
985
			  struct radeon_cs_packet *pkt,
986
			  const unsigned *auth, unsigned n,
987
			  radeon_packet0_check_t check)
988
{
989
	unsigned reg;
990
	unsigned i, j, m;
991
	unsigned idx;
992
	int r;
993
 
994
	idx = pkt->idx + 1;
995
	reg = pkt->reg;
996
	/* Check that register fall into register range
997
	 * determined by the number of entry (n) in the
998
	 * safe register bitmap.
999
	 */
1000
	if (pkt->one_reg_wr) {
1001
		if ((reg >> 7) > n) {
1002
			return -EINVAL;
1003
		}
1004
	} else {
1005
		if (((reg + (pkt->count << 2)) >> 7) > n) {
1006
			return -EINVAL;
1007
		}
1008
	}
1009
	for (i = 0; i <= pkt->count; i++, idx++) {
1010
		j = (reg >> 7);
1011
		m = 1 << ((reg >> 2) & 31);
1012
		if (auth[j] & m) {
1013
			r = check(p, pkt, idx, reg);
1014
			if (r) {
1015
				return r;
1016
			}
1017
		}
1018
		if (pkt->one_reg_wr) {
1019
			if (!(auth[j] & m)) {
1020
				break;
1021
			}
1022
		} else {
1023
			reg += 4;
1024
		}
1025
	}
1026
	return 0;
1027
}
1028
 
1029
void r100_cs_dump_packet(struct radeon_cs_parser *p,
1030
			 struct radeon_cs_packet *pkt)
1031
{
1032
	volatile uint32_t *ib;
1033
	unsigned i;
1034
	unsigned idx;
1035
 
2997 Serge 1036
	ib = p->ib.ptr;
1117 serge 1037
	idx = pkt->idx;
1038
	for (i = 0; i <= (pkt->count + 1); i++, idx++) {
1039
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
1040
	}
1041
}
1042
 
1043
/**
1044
 * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
1045
 * @parser:	parser structure holding parsing context.
1046
 * @pkt:	where to store packet informations
1047
 *
1048
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
1049
 * if packet is bigger than remaining ib size. or if packets is unknown.
1050
 **/
1051
int r100_cs_packet_parse(struct radeon_cs_parser *p,
1052
			 struct radeon_cs_packet *pkt,
1053
			 unsigned idx)
1054
{
1055
	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1179 serge 1056
	uint32_t header;
1117 serge 1057
 
1058
	if (idx >= ib_chunk->length_dw) {
1059
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1060
			  idx, ib_chunk->length_dw);
1061
		return -EINVAL;
1062
	}
1221 serge 1063
	header = radeon_get_ib_value(p, idx);
1117 serge 1064
	pkt->idx = idx;
1065
	pkt->type = CP_PACKET_GET_TYPE(header);
1066
	pkt->count = CP_PACKET_GET_COUNT(header);
1067
	switch (pkt->type) {
1068
	case PACKET_TYPE0:
1069
		pkt->reg = CP_PACKET0_GET_REG(header);
1070
		pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
1071
		break;
1072
	case PACKET_TYPE3:
1073
		pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1074
		break;
1075
	case PACKET_TYPE2:
1076
		pkt->count = -1;
1077
		break;
1078
	default:
1079
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1080
		return -EINVAL;
1081
	}
1082
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1083
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1084
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1085
		return -EINVAL;
1086
	}
1087
	return 0;
1088
}
1089
 
1090
/**
1179 serge 1091
 * r100_cs_packet_next_vline() - parse userspace VLINE packet
1092
 * @parser:		parser structure holding parsing context.
1093
 *
1094
 * Userspace sends a special sequence for VLINE waits.
1095
 * PACKET0 - VLINE_START_END + value
1096
 * PACKET0 - WAIT_UNTIL +_value
1097
 * RELOC (P3) - crtc_id in reloc.
1098
 *
1099
 * This function parses this and relocates the VLINE START END
1100
 * and WAIT UNTIL packets to the correct crtc.
1101
 * It also detects a switched off crtc and nulls out the
1102
 * wait in that case.
1103
 */
1104
int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
1105
{
1106
	struct drm_mode_object *obj;
1107
	struct drm_crtc *crtc;
1108
	struct radeon_crtc *radeon_crtc;
1109
	struct radeon_cs_packet p3reloc, waitreloc;
1110
	int crtc_id;
1111
	int r;
1112
	uint32_t header, h_idx, reg;
1221 serge 1113
	volatile uint32_t *ib;
1179 serge 1114
 
2997 Serge 1115
	ib = p->ib.ptr;
1179 serge 1116
 
1117
	/* parse the wait until */
3764 Serge 1118
	r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
1179 serge 1119
	if (r)
1120
		return r;
1121
 
1122
	/* check its a wait until and only 1 count */
1123
	if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1124
	    waitreloc.count != 0) {
1125
		DRM_ERROR("vline wait had illegal wait until segment\n");
1963 serge 1126
		return -EINVAL;
1179 serge 1127
	}
1128
 
1221 serge 1129
	if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1179 serge 1130
		DRM_ERROR("vline wait had illegal wait until\n");
1963 serge 1131
		return -EINVAL;
1179 serge 1132
	}
1133
 
1134
	/* jump over the NOP */
3764 Serge 1135
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1179 serge 1136
	if (r)
1137
		return r;
1138
 
1139
	h_idx = p->idx - 2;
1221 serge 1140
	p->idx += waitreloc.count + 2;
1141
	p->idx += p3reloc.count + 2;
1179 serge 1142
 
1221 serge 1143
	header = radeon_get_ib_value(p, h_idx);
1144
	crtc_id = radeon_get_ib_value(p, h_idx + 5);
3764 Serge 1145
	reg = R100_CP_PACKET0_GET_REG(header);
1179 serge 1146
	obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1147
	if (!obj) {
1148
		DRM_ERROR("cannot find crtc %d\n", crtc_id);
1963 serge 1149
		return -EINVAL;
1179 serge 1150
	}
1151
	crtc = obj_to_crtc(obj);
1152
	radeon_crtc = to_radeon_crtc(crtc);
1153
	crtc_id = radeon_crtc->crtc_id;
1154
 
1155
	if (!crtc->enabled) {
1156
		/* if the CRTC isn't enabled - we need to nop out the wait until */
1221 serge 1157
		ib[h_idx + 2] = PACKET2(0);
1158
		ib[h_idx + 3] = PACKET2(0);
1179 serge 1159
	} else if (crtc_id == 1) {
1160
		switch (reg) {
1161
		case AVIVO_D1MODE_VLINE_START_END:
1221 serge 1162
			header &= ~R300_CP_PACKET0_REG_MASK;
1179 serge 1163
			header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1164
			break;
1165
		case RADEON_CRTC_GUI_TRIG_VLINE:
1221 serge 1166
			header &= ~R300_CP_PACKET0_REG_MASK;
1179 serge 1167
			header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1168
			break;
1169
		default:
1170
			DRM_ERROR("unknown crtc reloc\n");
1963 serge 1171
			return -EINVAL;
1179 serge 1172
		}
1221 serge 1173
		ib[h_idx] = header;
1174
		ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1179 serge 1175
	}
1963 serge 1176
 
1177
	return 0;
1179 serge 1178
}
1179
 
1180
static int r100_get_vtx_size(uint32_t vtx_fmt)
1181
{
1182
	int vtx_size;
1183
	vtx_size = 2;
1184
	/* ordered according to bits in spec */
1185
	if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1186
		vtx_size++;
1187
	if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1188
		vtx_size += 3;
1189
	if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1190
		vtx_size++;
1191
	if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1192
		vtx_size++;
1193
	if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1194
		vtx_size += 3;
1195
	if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1196
		vtx_size++;
1197
	if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1198
		vtx_size++;
1199
	if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1200
		vtx_size += 2;
1201
	if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1202
		vtx_size += 2;
1203
	if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1204
		vtx_size++;
1205
	if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1206
		vtx_size += 2;
1207
	if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1208
		vtx_size++;
1209
	if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1210
		vtx_size += 2;
1211
	if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1212
		vtx_size++;
1213
	if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1214
		vtx_size++;
1215
	/* blend weight */
1216
	if (vtx_fmt & (0x7 << 15))
1217
		vtx_size += (vtx_fmt >> 15) & 0x7;
1218
	if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1219
		vtx_size += 3;
1220
	if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1221
		vtx_size += 2;
1222
	if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1223
		vtx_size++;
1224
	if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1225
		vtx_size++;
1226
	if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1227
		vtx_size++;
1228
	if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1229
		vtx_size++;
1230
	return vtx_size;
1231
}
1232
 
1117 serge 1233
static int r100_packet0_check(struct radeon_cs_parser *p,
1179 serge 1234
			      struct radeon_cs_packet *pkt,
1235
			      unsigned idx, unsigned reg)
1117 serge 1236
{
1237
	struct radeon_cs_reloc *reloc;
1179 serge 1238
	struct r100_cs_track *track;
1117 serge 1239
	volatile uint32_t *ib;
1240
	uint32_t tmp;
1241
	int r;
1179 serge 1242
	int i, face;
1243
	u32 tile_flags = 0;
1221 serge 1244
	u32 idx_value;
1117 serge 1245
 
2997 Serge 1246
	ib = p->ib.ptr;
1179 serge 1247
	track = (struct r100_cs_track *)p->track;
1248
 
1221 serge 1249
	idx_value = radeon_get_ib_value(p, idx);
1250
 
1117 serge 1251
		switch (reg) {
1179 serge 1252
		case RADEON_CRTC_GUI_TRIG_VLINE:
1253
			r = r100_cs_packet_parse_vline(p);
1254
			if (r) {
1255
				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1256
						idx, reg);
3764 Serge 1257
			radeon_cs_dump_packet(p, pkt);
1179 serge 1258
				return r;
1259
			}
1260
			break;
1117 serge 1261
		/* FIXME: only allow PACKET3 blit? easier to check for out of
1262
		 * range access */
1263
		case RADEON_DST_PITCH_OFFSET:
1264
		case RADEON_SRC_PITCH_OFFSET:
1179 serge 1265
		r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1266
		if (r)
1267
			return r;
1268
		break;
1269
	case RADEON_RB3D_DEPTHOFFSET:
3764 Serge 1270
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1117 serge 1271
			if (r) {
1272
				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1273
					  idx, reg);
3764 Serge 1274
			radeon_cs_dump_packet(p, pkt);
1117 serge 1275
				return r;
1276
			}
1179 serge 1277
		track->zb.robj = reloc->robj;
1221 serge 1278
		track->zb.offset = idx_value;
1963 serge 1279
		track->zb_dirty = true;
1221 serge 1280
		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1117 serge 1281
			break;
1282
		case RADEON_RB3D_COLOROFFSET:
3764 Serge 1283
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1179 serge 1284
		if (r) {
1285
			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1286
				  idx, reg);
3764 Serge 1287
			radeon_cs_dump_packet(p, pkt);
1179 serge 1288
			return r;
1289
		}
1290
		track->cb[0].robj = reloc->robj;
1221 serge 1291
		track->cb[0].offset = idx_value;
1963 serge 1292
		track->cb_dirty = true;
1221 serge 1293
		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1179 serge 1294
		break;
1117 serge 1295
		case RADEON_PP_TXOFFSET_0:
1296
		case RADEON_PP_TXOFFSET_1:
1297
		case RADEON_PP_TXOFFSET_2:
1179 serge 1298
		i = (reg - RADEON_PP_TXOFFSET_0) / 24;
3764 Serge 1299
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1179 serge 1300
		if (r) {
1301
			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1302
				  idx, reg);
3764 Serge 1303
			radeon_cs_dump_packet(p, pkt);
1179 serge 1304
			return r;
1305
		}
2997 Serge 1306
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1307
			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1308
				tile_flags |= RADEON_TXO_MACRO_TILE;
1309
			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1310
				tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1311
 
1312
			tmp = idx_value & ~(0x7 << 2);
1313
			tmp |= tile_flags;
1314
			ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset);
1315
		} else
1221 serge 1316
		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1179 serge 1317
		track->textures[i].robj = reloc->robj;
1963 serge 1318
		track->tex_dirty = true;
1179 serge 1319
		break;
1320
	case RADEON_PP_CUBIC_OFFSET_T0_0:
1321
	case RADEON_PP_CUBIC_OFFSET_T0_1:
1322
	case RADEON_PP_CUBIC_OFFSET_T0_2:
1323
	case RADEON_PP_CUBIC_OFFSET_T0_3:
1324
	case RADEON_PP_CUBIC_OFFSET_T0_4:
1325
		i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
3764 Serge 1326
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1179 serge 1327
		if (r) {
1328
			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1329
				  idx, reg);
3764 Serge 1330
			radeon_cs_dump_packet(p, pkt);
1179 serge 1331
			return r;
1332
		}
1221 serge 1333
		track->textures[0].cube_info[i].offset = idx_value;
1334
		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1179 serge 1335
		track->textures[0].cube_info[i].robj = reloc->robj;
1963 serge 1336
		track->tex_dirty = true;
1179 serge 1337
		break;
1338
	case RADEON_PP_CUBIC_OFFSET_T1_0:
1339
	case RADEON_PP_CUBIC_OFFSET_T1_1:
1340
	case RADEON_PP_CUBIC_OFFSET_T1_2:
1341
	case RADEON_PP_CUBIC_OFFSET_T1_3:
1342
	case RADEON_PP_CUBIC_OFFSET_T1_4:
1343
		i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
3764 Serge 1344
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1179 serge 1345
		if (r) {
1346
			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1347
				  idx, reg);
3764 Serge 1348
			radeon_cs_dump_packet(p, pkt);
1179 serge 1349
			return r;
1350
			}
1221 serge 1351
		track->textures[1].cube_info[i].offset = idx_value;
1352
		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1179 serge 1353
		track->textures[1].cube_info[i].robj = reloc->robj;
1963 serge 1354
		track->tex_dirty = true;
1179 serge 1355
		break;
1356
	case RADEON_PP_CUBIC_OFFSET_T2_0:
1357
	case RADEON_PP_CUBIC_OFFSET_T2_1:
1358
	case RADEON_PP_CUBIC_OFFSET_T2_2:
1359
	case RADEON_PP_CUBIC_OFFSET_T2_3:
1360
	case RADEON_PP_CUBIC_OFFSET_T2_4:
1361
		i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
3764 Serge 1362
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1117 serge 1363
			if (r) {
1364
				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1365
					  idx, reg);
3764 Serge 1366
			radeon_cs_dump_packet(p, pkt);
1117 serge 1367
				return r;
1368
			}
1221 serge 1369
		track->textures[2].cube_info[i].offset = idx_value;
1370
		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1179 serge 1371
		track->textures[2].cube_info[i].robj = reloc->robj;
1963 serge 1372
		track->tex_dirty = true;
1179 serge 1373
		break;
1374
	case RADEON_RE_WIDTH_HEIGHT:
1221 serge 1375
		track->maxy = ((idx_value >> 16) & 0x7FF);
1963 serge 1376
		track->cb_dirty = true;
1377
		track->zb_dirty = true;
1117 serge 1378
			break;
1179 serge 1379
		case RADEON_RB3D_COLORPITCH:
3764 Serge 1380
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1179 serge 1381
			if (r) {
1382
				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1383
					  idx, reg);
3764 Serge 1384
			radeon_cs_dump_packet(p, pkt);
1179 serge 1385
				return r;
1386
			}
2997 Serge 1387
		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1179 serge 1388
			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1389
				tile_flags |= RADEON_COLOR_TILE_ENABLE;
1390
			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1391
				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1392
 
1221 serge 1393
		tmp = idx_value & ~(0x7 << 16);
1179 serge 1394
			tmp |= tile_flags;
1395
			ib[idx] = tmp;
2997 Serge 1396
		} else
1397
			ib[idx] = idx_value;
1179 serge 1398
 
1221 serge 1399
		track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1963 serge 1400
		track->cb_dirty = true;
1179 serge 1401
		break;
1402
	case RADEON_RB3D_DEPTHPITCH:
1221 serge 1403
		track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1963 serge 1404
		track->zb_dirty = true;
1179 serge 1405
		break;
1406
	case RADEON_RB3D_CNTL:
1221 serge 1407
		switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1179 serge 1408
		case 7:
1409
		case 8:
1410
		case 9:
1411
		case 11:
1412
		case 12:
1413
			track->cb[0].cpp = 1;
1414
			break;
1415
		case 3:
1416
		case 4:
1417
		case 15:
1418
			track->cb[0].cpp = 2;
1419
			break;
1420
		case 6:
1421
			track->cb[0].cpp = 4;
1422
			break;
1117 serge 1423
		default:
1179 serge 1424
			DRM_ERROR("Invalid color buffer format (%d) !\n",
1221 serge 1425
				  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1179 serge 1426
			return -EINVAL;
1427
		}
1221 serge 1428
		track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1963 serge 1429
		track->cb_dirty = true;
1430
		track->zb_dirty = true;
1179 serge 1431
		break;
1432
	case RADEON_RB3D_ZSTENCILCNTL:
1221 serge 1433
		switch (idx_value & 0xf) {
1179 serge 1434
		case 0:
1435
			track->zb.cpp = 2;
1117 serge 1436
			break;
1179 serge 1437
		case 2:
1438
		case 3:
1439
		case 4:
1440
		case 5:
1441
		case 9:
1442
		case 11:
1443
			track->zb.cpp = 4;
1444
			break;
1445
		default:
1446
			break;
1117 serge 1447
		}
1963 serge 1448
		track->zb_dirty = true;
1117 serge 1449
			break;
1179 serge 1450
		case RADEON_RB3D_ZPASS_ADDR:
3764 Serge 1451
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1179 serge 1452
			if (r) {
1453
				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1454
					  idx, reg);
3764 Serge 1455
			radeon_cs_dump_packet(p, pkt);
1179 serge 1456
				return r;
1457
			}
1221 serge 1458
		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1179 serge 1459
			break;
1460
	case RADEON_PP_CNTL:
1461
		{
1221 serge 1462
			uint32_t temp = idx_value >> 4;
1179 serge 1463
			for (i = 0; i < track->num_texture; i++)
1464
				track->textures[i].enabled = !!(temp & (1 << i));
1963 serge 1465
			track->tex_dirty = true;
1117 serge 1466
		}
1179 serge 1467
			break;
1468
	case RADEON_SE_VF_CNTL:
1221 serge 1469
		track->vap_vf_cntl = idx_value;
1179 serge 1470
		break;
1471
	case RADEON_SE_VTX_FMT:
1221 serge 1472
		track->vtx_size = r100_get_vtx_size(idx_value);
1179 serge 1473
		break;
1474
	case RADEON_PP_TEX_SIZE_0:
1475
	case RADEON_PP_TEX_SIZE_1:
1476
	case RADEON_PP_TEX_SIZE_2:
1477
		i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1221 serge 1478
		track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1479
		track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1963 serge 1480
		track->tex_dirty = true;
1179 serge 1481
		break;
1482
	case RADEON_PP_TEX_PITCH_0:
1483
	case RADEON_PP_TEX_PITCH_1:
1484
	case RADEON_PP_TEX_PITCH_2:
1485
		i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1221 serge 1486
		track->textures[i].pitch = idx_value + 32;
1963 serge 1487
		track->tex_dirty = true;
1179 serge 1488
		break;
1489
	case RADEON_PP_TXFILTER_0:
1490
	case RADEON_PP_TXFILTER_1:
1491
	case RADEON_PP_TXFILTER_2:
1492
		i = (reg - RADEON_PP_TXFILTER_0) / 24;
1221 serge 1493
		track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1179 serge 1494
						 >> RADEON_MAX_MIP_LEVEL_SHIFT);
1221 serge 1495
		tmp = (idx_value >> 23) & 0x7;
1179 serge 1496
		if (tmp == 2 || tmp == 6)
1497
			track->textures[i].roundup_w = false;
1221 serge 1498
		tmp = (idx_value >> 27) & 0x7;
1179 serge 1499
		if (tmp == 2 || tmp == 6)
1500
			track->textures[i].roundup_h = false;
1963 serge 1501
		track->tex_dirty = true;
1179 serge 1502
		break;
1503
	case RADEON_PP_TXFORMAT_0:
1504
	case RADEON_PP_TXFORMAT_1:
1505
	case RADEON_PP_TXFORMAT_2:
1506
		i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1221 serge 1507
		if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1179 serge 1508
			track->textures[i].use_pitch = 1;
1509
		} else {
1510
			track->textures[i].use_pitch = 0;
1221 serge 1511
			track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
1512
			track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
1179 serge 1513
		}
1221 serge 1514
		if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1179 serge 1515
			track->textures[i].tex_coord_type = 2;
1221 serge 1516
		switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1179 serge 1517
		case RADEON_TXFORMAT_I8:
1518
		case RADEON_TXFORMAT_RGB332:
1519
		case RADEON_TXFORMAT_Y8:
1520
			track->textures[i].cpp = 1;
1963 serge 1521
			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1179 serge 1522
			break;
1523
		case RADEON_TXFORMAT_AI88:
1524
		case RADEON_TXFORMAT_ARGB1555:
1525
		case RADEON_TXFORMAT_RGB565:
1526
		case RADEON_TXFORMAT_ARGB4444:
1527
		case RADEON_TXFORMAT_VYUY422:
1528
		case RADEON_TXFORMAT_YVYU422:
1529
		case RADEON_TXFORMAT_SHADOW16:
1530
		case RADEON_TXFORMAT_LDUDV655:
1531
		case RADEON_TXFORMAT_DUDV88:
1532
			track->textures[i].cpp = 2;
1963 serge 1533
			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1179 serge 1534
			break;
1535
		case RADEON_TXFORMAT_ARGB8888:
1536
		case RADEON_TXFORMAT_RGBA8888:
1537
		case RADEON_TXFORMAT_SHADOW32:
1538
		case RADEON_TXFORMAT_LDUDUV8888:
1539
			track->textures[i].cpp = 4;
1963 serge 1540
			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1179 serge 1541
			break;
1403 serge 1542
		case RADEON_TXFORMAT_DXT1:
1543
			track->textures[i].cpp = 1;
1544
			track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1545
			break;
1546
		case RADEON_TXFORMAT_DXT23:
1547
		case RADEON_TXFORMAT_DXT45:
1548
			track->textures[i].cpp = 1;
1549
			track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1550
			break;
1179 serge 1551
		}
1221 serge 1552
		track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1553
		track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1963 serge 1554
		track->tex_dirty = true;
1179 serge 1555
		break;
1556
	case RADEON_PP_CUBIC_FACES_0:
1557
	case RADEON_PP_CUBIC_FACES_1:
1558
	case RADEON_PP_CUBIC_FACES_2:
1221 serge 1559
		tmp = idx_value;
1179 serge 1560
		i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1561
		for (face = 0; face < 4; face++) {
1562
			track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1563
			track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1564
		}
1963 serge 1565
		track->tex_dirty = true;
1179 serge 1566
		break;
1567
	default:
1568
		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1569
		       reg, idx);
1570
		return -EINVAL;
1117 serge 1571
	}
1572
	return 0;
1573
}
1574
 
1575
int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
1576
					 struct radeon_cs_packet *pkt,
1321 serge 1577
					 struct radeon_bo *robj)
1117 serge 1578
{
1579
	unsigned idx;
1221 serge 1580
	u32 value;
1117 serge 1581
	idx = pkt->idx + 1;
1221 serge 1582
	value = radeon_get_ib_value(p, idx + 2);
1321 serge 1583
	if ((value + 1) > radeon_bo_size(robj)) {
1117 serge 1584
		DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1585
			  "(need %u have %lu) !\n",
1221 serge 1586
			  value + 1,
1321 serge 1587
			  radeon_bo_size(robj));
1117 serge 1588
		return -EINVAL;
1589
	}
1590
	return 0;
1591
}
1592
 
1593
static int r100_packet3_check(struct radeon_cs_parser *p,
1594
			      struct radeon_cs_packet *pkt)
1595
{
1596
	struct radeon_cs_reloc *reloc;
1179 serge 1597
	struct r100_cs_track *track;
1117 serge 1598
	unsigned idx;
1599
	volatile uint32_t *ib;
1600
	int r;
1601
 
2997 Serge 1602
	ib = p->ib.ptr;
1117 serge 1603
	idx = pkt->idx + 1;
1179 serge 1604
	track = (struct r100_cs_track *)p->track;
1117 serge 1605
	switch (pkt->opcode) {
1606
	case PACKET3_3D_LOAD_VBPNTR:
1221 serge 1607
		r = r100_packet3_load_vbpntr(p, pkt, idx);
1608
		if (r)
1117 serge 1609
				return r;
1610
		break;
1611
	case PACKET3_INDX_BUFFER:
3764 Serge 1612
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1117 serge 1613
		if (r) {
1614
			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
3764 Serge 1615
			radeon_cs_dump_packet(p, pkt);
1117 serge 1616
			return r;
1617
		}
1221 serge 1618
		ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset);
1117 serge 1619
		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1620
		if (r) {
1621
			return r;
1622
		}
1623
		break;
1624
	case 0x23:
1625
		/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
3764 Serge 1626
		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1117 serge 1627
		if (r) {
1628
			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
3764 Serge 1629
			radeon_cs_dump_packet(p, pkt);
1117 serge 1630
			return r;
1631
		}
1221 serge 1632
		ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset);
1179 serge 1633
		track->num_arrays = 1;
1221 serge 1634
		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
1179 serge 1635
 
1636
		track->arrays[0].robj = reloc->robj;
1637
		track->arrays[0].esize = track->vtx_size;
1638
 
1221 serge 1639
		track->max_indx = radeon_get_ib_value(p, idx+1);
1179 serge 1640
 
1221 serge 1641
		track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
1179 serge 1642
		track->immd_dwords = pkt->count - 1;
1643
		r = r100_cs_track_check(p->rdev, track);
1644
		if (r)
1645
			return r;
1117 serge 1646
		break;
1647
	case PACKET3_3D_DRAW_IMMD:
1221 serge 1648
		if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
1179 serge 1649
			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1650
			return -EINVAL;
1651
		}
1403 serge 1652
		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
1221 serge 1653
		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1179 serge 1654
		track->immd_dwords = pkt->count - 1;
1655
		r = r100_cs_track_check(p->rdev, track);
1656
		if (r)
1657
			return r;
1658
		break;
1117 serge 1659
		/* triggers drawing using in-packet vertex data */
1660
	case PACKET3_3D_DRAW_IMMD_2:
1221 serge 1661
		if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
1179 serge 1662
			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1663
			return -EINVAL;
1664
		}
1221 serge 1665
		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1179 serge 1666
		track->immd_dwords = pkt->count;
1667
		r = r100_cs_track_check(p->rdev, track);
1668
		if (r)
1669
			return r;
1670
		break;
1117 serge 1671
		/* triggers drawing using in-packet vertex data */
1672
	case PACKET3_3D_DRAW_VBUF_2:
1221 serge 1673
		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1179 serge 1674
		r = r100_cs_track_check(p->rdev, track);
1675
		if (r)
1676
			return r;
1677
		break;
1117 serge 1678
		/* triggers drawing of vertex buffers setup elsewhere */
1679
	case PACKET3_3D_DRAW_INDX_2:
1221 serge 1680
		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
1179 serge 1681
		r = r100_cs_track_check(p->rdev, track);
1682
		if (r)
1683
			return r;
1684
		break;
1117 serge 1685
		/* triggers drawing using indices to vertex buffer */
1686
	case PACKET3_3D_DRAW_VBUF:
1221 serge 1687
		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1179 serge 1688
		r = r100_cs_track_check(p->rdev, track);
1689
		if (r)
1690
			return r;
1691
		break;
1117 serge 1692
		/* triggers drawing of vertex buffers setup elsewhere */
1693
	case PACKET3_3D_DRAW_INDX:
1221 serge 1694
		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
1179 serge 1695
		r = r100_cs_track_check(p->rdev, track);
1696
		if (r)
1697
			return r;
1698
		break;
1117 serge 1699
		/* triggers drawing using indices to vertex buffer */
1963 serge 1700
	case PACKET3_3D_CLEAR_HIZ:
1701
	case PACKET3_3D_CLEAR_ZMASK:
1702
		if (p->rdev->hyperz_filp != p->filp)
1703
			return -EINVAL;
1704
		break;
1117 serge 1705
	case PACKET3_NOP:
1706
		break;
1707
	default:
1708
		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1709
		return -EINVAL;
1710
	}
1711
	return 0;
1712
}
1713
 
1714
int r100_cs_parse(struct radeon_cs_parser *p)
1715
{
1716
	struct radeon_cs_packet pkt;
1179 serge 1717
	struct r100_cs_track *track;
1117 serge 1718
	int r;
1719
 
1179 serge 1720
	track = kzalloc(sizeof(*track), GFP_KERNEL);
2997 Serge 1721
	if (!track)
1722
		return -ENOMEM;
1179 serge 1723
	r100_cs_track_clear(p->rdev, track);
1724
	p->track = track;
1117 serge 1725
	do {
3764 Serge 1726
		r = radeon_cs_packet_parse(p, &pkt, p->idx);
1117 serge 1727
		if (r) {
1728
			return r;
1729
		}
1730
		p->idx += pkt.count + 2;
1731
		switch (pkt.type) {
3764 Serge 1732
		case RADEON_PACKET_TYPE0:
1179 serge 1733
				if (p->rdev->family >= CHIP_R200)
1734
					r = r100_cs_parse_packet0(p, &pkt,
1735
								  p->rdev->config.r100.reg_safe_bm,
1736
								  p->rdev->config.r100.reg_safe_bm_size,
1737
								  &r200_packet0_check);
1738
				else
1739
					r = r100_cs_parse_packet0(p, &pkt,
1740
								  p->rdev->config.r100.reg_safe_bm,
1741
								  p->rdev->config.r100.reg_safe_bm_size,
1742
								  &r100_packet0_check);
1117 serge 1743
				break;
3764 Serge 1744
		case RADEON_PACKET_TYPE2:
1117 serge 1745
				break;
3764 Serge 1746
		case RADEON_PACKET_TYPE3:
1117 serge 1747
				r = r100_packet3_check(p, &pkt);
1748
				break;
1749
			default:
1750
				DRM_ERROR("Unknown packet type %d !\n",
1751
					  pkt.type);
1752
				return -EINVAL;
1753
		}
3764 Serge 1754
		if (r)
1117 serge 1755
			return r;
1756
	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1757
	return 0;
1758
}
1759
 
2997 Serge 1760
static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
1761
{
1762
	DRM_ERROR("pitch                      %d\n", t->pitch);
1763
	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
1764
	DRM_ERROR("width                      %d\n", t->width);
1765
	DRM_ERROR("width_11                   %d\n", t->width_11);
1766
	DRM_ERROR("height                     %d\n", t->height);
1767
	DRM_ERROR("height_11                  %d\n", t->height_11);
1768
	DRM_ERROR("num levels                 %d\n", t->num_levels);
1769
	DRM_ERROR("depth                      %d\n", t->txdepth);
1770
	DRM_ERROR("bpp                        %d\n", t->cpp);
1771
	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
1772
	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
1773
	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
1774
	DRM_ERROR("compress format            %d\n", t->compress_format);
1775
}
1117 serge 1776
 
2997 Serge 1777
static int r100_track_compress_size(int compress_format, int w, int h)
1117 serge 1778
{
2997 Serge 1779
	int block_width, block_height, block_bytes;
1780
	int wblocks, hblocks;
1781
	int min_wblocks;
1782
	int sz;
1117 serge 1783
 
2997 Serge 1784
	block_width = 4;
1785
	block_height = 4;
1786
 
1787
	switch (compress_format) {
1788
	case R100_TRACK_COMP_DXT1:
1789
		block_bytes = 8;
1790
		min_wblocks = 4;
1791
		break;
1792
	default:
1793
	case R100_TRACK_COMP_DXT35:
1794
		block_bytes = 16;
1795
		min_wblocks = 2;
1796
		break;
1117 serge 1797
	}
1798
 
2997 Serge 1799
	hblocks = (h + block_height - 1) / block_height;
1800
	wblocks = (w + block_width - 1) / block_width;
1801
	if (wblocks < min_wblocks)
1802
		wblocks = min_wblocks;
1803
	sz = wblocks * hblocks * block_bytes;
1804
	return sz;
1805
}
1806
 
1807
static int r100_cs_track_cube(struct radeon_device *rdev,
1808
			      struct r100_cs_track *track, unsigned idx)
1809
{
1810
	unsigned face, w, h;
1811
	struct radeon_bo *cube_robj;
1812
	unsigned long size;
1813
	unsigned compress_format = track->textures[idx].compress_format;
1814
 
1815
	for (face = 0; face < 5; face++) {
1816
		cube_robj = track->textures[idx].cube_info[face].robj;
1817
		w = track->textures[idx].cube_info[face].width;
1818
		h = track->textures[idx].cube_info[face].height;
1819
 
1820
		if (compress_format) {
1821
			size = r100_track_compress_size(compress_format, w, h);
1822
		} else
1823
			size = w * h;
1824
		size *= track->textures[idx].cpp;
1825
 
1826
		size += track->textures[idx].cube_info[face].offset;
1827
 
1828
		if (size > radeon_bo_size(cube_robj)) {
1829
			DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
1830
				  size, radeon_bo_size(cube_robj));
1831
			r100_cs_track_texture_print(&track->textures[idx]);
1832
			return -1;
1833
		}
1117 serge 1834
	}
2997 Serge 1835
	return 0;
1117 serge 1836
}
1837
 
2997 Serge 1838
static int r100_cs_track_texture_check(struct radeon_device *rdev,
1839
				       struct r100_cs_track *track)
1117 serge 1840
{
2997 Serge 1841
	struct radeon_bo *robj;
1842
	unsigned long size;
1843
	unsigned u, i, w, h, d;
1844
	int ret;
1117 serge 1845
 
2997 Serge 1846
	for (u = 0; u < track->num_texture; u++) {
1847
		if (!track->textures[u].enabled)
1848
			continue;
1849
		if (track->textures[u].lookup_disable)
1850
			continue;
1851
		robj = track->textures[u].robj;
1852
		if (robj == NULL) {
1853
			DRM_ERROR("No texture bound to unit %u\n", u);
1854
			return -EINVAL;
1855
		}
1856
		size = 0;
1857
		for (i = 0; i <= track->textures[u].num_levels; i++) {
1858
			if (track->textures[u].use_pitch) {
1859
				if (rdev->family < CHIP_R300)
1860
					w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
1861
				else
1862
					w = track->textures[u].pitch / (1 << i);
1863
			} else {
1864
				w = track->textures[u].width;
1865
				if (rdev->family >= CHIP_RV515)
1866
					w |= track->textures[u].width_11;
1867
				w = w / (1 << i);
1868
				if (track->textures[u].roundup_w)
1869
					w = roundup_pow_of_two(w);
1870
			}
1871
			h = track->textures[u].height;
1872
			if (rdev->family >= CHIP_RV515)
1873
				h |= track->textures[u].height_11;
1874
			h = h / (1 << i);
1875
			if (track->textures[u].roundup_h)
1876
				h = roundup_pow_of_two(h);
1877
			if (track->textures[u].tex_coord_type == 1) {
1878
				d = (1 << track->textures[u].txdepth) / (1 << i);
1879
				if (!d)
1880
					d = 1;
1881
			} else {
1882
				d = 1;
1883
			}
1884
			if (track->textures[u].compress_format) {
1885
 
1886
				size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
1887
				/* compressed textures are block based */
1888
			} else
1889
				size += w * h * d;
1890
		}
1891
		size *= track->textures[u].cpp;
1892
 
1893
		switch (track->textures[u].tex_coord_type) {
1894
		case 0:
1895
		case 1:
1896
			break;
1897
		case 2:
1898
			if (track->separate_cube) {
1899
				ret = r100_cs_track_cube(rdev, track, u);
1900
				if (ret)
1901
					return ret;
1902
			} else
1903
				size *= 6;
1904
			break;
1905
		default:
1906
			DRM_ERROR("Invalid texture coordinate type %u for unit "
1907
				  "%u\n", track->textures[u].tex_coord_type, u);
1908
			return -EINVAL;
1909
		}
1910
		if (size > radeon_bo_size(robj)) {
1911
			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
1912
				  "%lu\n", u, size, radeon_bo_size(robj));
1913
			r100_cs_track_texture_print(&track->textures[u]);
1914
			return -EINVAL;
1915
		}
1117 serge 1916
	}
2997 Serge 1917
	return 0;
1918
}
1919
 
1920
int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
1921
{
1922
	unsigned i;
1923
	unsigned long size;
1924
	unsigned prim_walk;
1925
	unsigned nverts;
1926
	unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
1927
 
1928
	if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
1929
	    !track->blend_read_enable)
1930
		num_cb = 0;
1931
 
1932
	for (i = 0; i < num_cb; i++) {
1933
		if (track->cb[i].robj == NULL) {
1934
			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
1935
			return -EINVAL;
1117 serge 1936
		}
2997 Serge 1937
		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
1938
		size += track->cb[i].offset;
1939
		if (size > radeon_bo_size(track->cb[i].robj)) {
1940
			DRM_ERROR("[drm] Buffer too small for color buffer %d "
1941
				  "(need %lu have %lu) !\n", i, size,
1942
				  radeon_bo_size(track->cb[i].robj));
1943
			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
1944
				  i, track->cb[i].pitch, track->cb[i].cpp,
1945
				  track->cb[i].offset, track->maxy);
1946
			return -EINVAL;
1947
		}
1117 serge 1948
	}
2997 Serge 1949
	track->cb_dirty = false;
1950
 
1951
	if (track->zb_dirty && track->z_enabled) {
1952
		if (track->zb.robj == NULL) {
1953
			DRM_ERROR("[drm] No buffer for z buffer !\n");
1954
			return -EINVAL;
1955
		}
1956
		size = track->zb.pitch * track->zb.cpp * track->maxy;
1957
		size += track->zb.offset;
1958
		if (size > radeon_bo_size(track->zb.robj)) {
1959
			DRM_ERROR("[drm] Buffer too small for z buffer "
1960
				  "(need %lu have %lu) !\n", size,
1961
				  radeon_bo_size(track->zb.robj));
1962
			DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
1963
				  track->zb.pitch, track->zb.cpp,
1964
				  track->zb.offset, track->maxy);
1965
			return -EINVAL;
1966
		}
1967
	}
1968
	track->zb_dirty = false;
1969
 
1970
	if (track->aa_dirty && track->aaresolve) {
1971
		if (track->aa.robj == NULL) {
1972
			DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
1973
			return -EINVAL;
1974
		}
1975
		/* I believe the format comes from colorbuffer0. */
1976
		size = track->aa.pitch * track->cb[0].cpp * track->maxy;
1977
		size += track->aa.offset;
1978
		if (size > radeon_bo_size(track->aa.robj)) {
1979
			DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
1980
				  "(need %lu have %lu) !\n", i, size,
1981
				  radeon_bo_size(track->aa.robj));
1982
			DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
1983
				  i, track->aa.pitch, track->cb[0].cpp,
1984
				  track->aa.offset, track->maxy);
1985
			return -EINVAL;
1986
		}
1987
	}
1988
	track->aa_dirty = false;
1989
 
1990
	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
1991
	if (track->vap_vf_cntl & (1 << 14)) {
1992
		nverts = track->vap_alt_nverts;
1993
	} else {
1994
		nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
1995
	}
1996
	switch (prim_walk) {
1997
	case 1:
1998
		for (i = 0; i < track->num_arrays; i++) {
1999
			size = track->arrays[i].esize * track->max_indx * 4;
2000
			if (track->arrays[i].robj == NULL) {
2001
				DRM_ERROR("(PW %u) Vertex array %u no buffer "
2002
					  "bound\n", prim_walk, i);
2003
				return -EINVAL;
2004
			}
2005
			if (size > radeon_bo_size(track->arrays[i].robj)) {
2006
				dev_err(rdev->dev, "(PW %u) Vertex array %u "
2007
					"need %lu dwords have %lu dwords\n",
2008
					prim_walk, i, size >> 2,
2009
					radeon_bo_size(track->arrays[i].robj)
2010
					>> 2);
2011
				DRM_ERROR("Max indices %u\n", track->max_indx);
2012
				return -EINVAL;
2013
			}
2014
		}
2015
		break;
2016
	case 2:
2017
		for (i = 0; i < track->num_arrays; i++) {
2018
			size = track->arrays[i].esize * (nverts - 1) * 4;
2019
			if (track->arrays[i].robj == NULL) {
2020
				DRM_ERROR("(PW %u) Vertex array %u no buffer "
2021
					  "bound\n", prim_walk, i);
2022
				return -EINVAL;
2023
			}
2024
			if (size > radeon_bo_size(track->arrays[i].robj)) {
2025
				dev_err(rdev->dev, "(PW %u) Vertex array %u "
2026
					"need %lu dwords have %lu dwords\n",
2027
					prim_walk, i, size >> 2,
2028
					radeon_bo_size(track->arrays[i].robj)
2029
					>> 2);
2030
				return -EINVAL;
2031
			}
2032
		}
2033
		break;
2034
	case 3:
2035
		size = track->vtx_size * nverts;
2036
		if (size != track->immd_dwords) {
2037
			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2038
				  track->immd_dwords, size);
2039
			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2040
				  nverts, track->vtx_size);
2041
			return -EINVAL;
2042
		}
2043
		break;
2044
	default:
2045
		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2046
			  prim_walk);
2047
		return -EINVAL;
2048
	}
2049
 
2050
	if (track->tex_dirty) {
2051
		track->tex_dirty = false;
2052
		return r100_cs_track_texture_check(rdev, track);
2053
	}
2054
	return 0;
1117 serge 2055
}
2056
 
2997 Serge 2057
void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
1117 serge 2058
{
2997 Serge 2059
	unsigned i, face;
1117 serge 2060
 
2997 Serge 2061
	track->cb_dirty = true;
2062
	track->zb_dirty = true;
2063
	track->tex_dirty = true;
2064
	track->aa_dirty = true;
1117 serge 2065
 
2997 Serge 2066
	if (rdev->family < CHIP_R300) {
2067
		track->num_cb = 1;
2068
		if (rdev->family <= CHIP_RS200)
2069
			track->num_texture = 3;
2070
		else
2071
			track->num_texture = 6;
2072
		track->maxy = 2048;
2073
		track->separate_cube = 1;
2074
	} else {
2075
		track->num_cb = 4;
2076
		track->num_texture = 16;
2077
		track->maxy = 4096;
2078
		track->separate_cube = 0;
2079
		track->aaresolve = false;
2080
		track->aa.robj = NULL;
2081
	}
2082
 
2083
	for (i = 0; i < track->num_cb; i++) {
2084
		track->cb[i].robj = NULL;
2085
		track->cb[i].pitch = 8192;
2086
		track->cb[i].cpp = 16;
2087
		track->cb[i].offset = 0;
2088
	}
2089
	track->z_enabled = true;
2090
	track->zb.robj = NULL;
2091
	track->zb.pitch = 8192;
2092
	track->zb.cpp = 4;
2093
	track->zb.offset = 0;
2094
	track->vtx_size = 0x7F;
2095
	track->immd_dwords = 0xFFFFFFFFUL;
2096
	track->num_arrays = 11;
2097
	track->max_indx = 0x00FFFFFFUL;
2098
	for (i = 0; i < track->num_arrays; i++) {
2099
		track->arrays[i].robj = NULL;
2100
		track->arrays[i].esize = 0x7F;
2101
	}
2102
	for (i = 0; i < track->num_texture; i++) {
2103
		track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2104
		track->textures[i].pitch = 16536;
2105
		track->textures[i].width = 16536;
2106
		track->textures[i].height = 16536;
2107
		track->textures[i].width_11 = 1 << 11;
2108
		track->textures[i].height_11 = 1 << 11;
2109
		track->textures[i].num_levels = 12;
2110
		if (rdev->family <= CHIP_RS200) {
2111
			track->textures[i].tex_coord_type = 0;
2112
			track->textures[i].txdepth = 0;
2113
		} else {
2114
			track->textures[i].txdepth = 16;
2115
			track->textures[i].tex_coord_type = 1;
1117 serge 2116
		}
2997 Serge 2117
		track->textures[i].cpp = 64;
2118
		track->textures[i].robj = NULL;
2119
		/* CS IB emission code makes sure texture unit are disabled */
2120
		track->textures[i].enabled = false;
2121
		track->textures[i].lookup_disable = false;
2122
		track->textures[i].roundup_w = true;
2123
		track->textures[i].roundup_h = true;
2124
		if (track->separate_cube)
2125
			for (face = 0; face < 5; face++) {
2126
				track->textures[i].cube_info[face].robj = NULL;
2127
				track->textures[i].cube_info[face].width = 16536;
2128
				track->textures[i].cube_info[face].height = 16536;
2129
				track->textures[i].cube_info[face].offset = 0;
2130
			}
1117 serge 2131
	}
2132
}
2997 Serge 2133
#endif
1117 serge 2134
 
2997 Serge 2135
/*
2136
 * Global GPU functions
2137
 */
2138
static void r100_errata(struct radeon_device *rdev)
1117 serge 2139
{
2997 Serge 2140
	rdev->pll_errata = 0;
2141
 
2142
	if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2143
		rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
2144
	}
2145
 
2146
	if (rdev->family == CHIP_RV100 ||
2147
	    rdev->family == CHIP_RS100 ||
2148
	    rdev->family == CHIP_RS200) {
2149
		rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
2150
	}
2151
}
2152
 
2153
static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2154
{
1117 serge 2155
	unsigned i;
2156
	uint32_t tmp;
2157
 
2158
	for (i = 0; i < rdev->usec_timeout; i++) {
2159
		tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
2160
		if (tmp >= n) {
2161
			return 0;
2162
		}
2163
		DRM_UDELAY(1);
2164
	}
2165
	return -1;
2166
}
2167
 
2168
int r100_gui_wait_for_idle(struct radeon_device *rdev)
2169
{
2170
	unsigned i;
2171
	uint32_t tmp;
2172
 
2173
	if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2174
		printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
2175
		       " Bad things might happen.\n");
2176
	}
2177
	for (i = 0; i < rdev->usec_timeout; i++) {
2178
		tmp = RREG32(RADEON_RBBM_STATUS);
1430 serge 2179
		if (!(tmp & RADEON_RBBM_ACTIVE)) {
1117 serge 2180
			return 0;
2181
		}
2182
		DRM_UDELAY(1);
2183
	}
2184
	return -1;
2185
}
2186
 
2187
int r100_mc_wait_for_idle(struct radeon_device *rdev)
2188
{
2189
	unsigned i;
2190
	uint32_t tmp;
2191
 
2192
	for (i = 0; i < rdev->usec_timeout; i++) {
2193
		/* read MC_STATUS */
1430 serge 2194
		tmp = RREG32(RADEON_MC_STATUS);
2195
		if (tmp & RADEON_MC_IDLE) {
1117 serge 2196
			return 0;
2197
		}
2198
		DRM_UDELAY(1);
2199
	}
2200
	return -1;
2201
}
2202
 
2997 Serge 2203
bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1117 serge 2204
{
1963 serge 2205
	u32 rbbm_status;
1117 serge 2206
 
1963 serge 2207
	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2208
	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2997 Serge 2209
		radeon_ring_lockup_update(ring);
1963 serge 2210
		return false;
1117 serge 2211
		}
1963 serge 2212
	/* force CP activities */
2997 Serge 2213
	radeon_ring_force_activity(rdev, ring);
2214
	return radeon_ring_test_lockup(rdev, ring);
1117 serge 2215
}
2216
 
2997 Serge 2217
/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2218
void r100_enable_bm(struct radeon_device *rdev)
2219
{
2220
	uint32_t tmp;
2221
	/* Enable bus mastering */
2222
	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2223
	WREG32(RADEON_BUS_CNTL, tmp);
2224
}
2225
 
1963 serge 2226
void r100_bm_disable(struct radeon_device *rdev)
1117 serge 2227
{
1963 serge 2228
	u32 tmp;
1117 serge 2229
 
1963 serge 2230
	/* disable bus mastering */
2231
	tmp = RREG32(R_000030_BUS_CNTL);
2232
	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2233
	mdelay(1);
2234
	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2235
	mdelay(1);
2236
	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2237
	tmp = RREG32(RADEON_BUS_CNTL);
2238
	mdelay(1);
2997 Serge 2239
	pci_clear_master(rdev->pdev);
1963 serge 2240
	mdelay(1);
2241
}
2242
 
2243
int r100_asic_reset(struct radeon_device *rdev)
2244
{
2245
	struct r100_mc_save save;
2246
	u32 status, tmp;
2247
	int ret = 0;
2248
 
2249
	status = RREG32(R_000E40_RBBM_STATUS);
2250
	if (!G_000E40_GUI_ACTIVE(status)) {
2251
		return 0;
1117 serge 2252
	}
1963 serge 2253
	r100_mc_stop(rdev, &save);
2254
	status = RREG32(R_000E40_RBBM_STATUS);
2255
	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2256
	/* stop CP */
2257
	WREG32(RADEON_CP_CSQ_CNTL, 0);
2258
	tmp = RREG32(RADEON_CP_RB_CNTL);
2259
	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
2260
	WREG32(RADEON_CP_RB_RPTR_WR, 0);
2261
	WREG32(RADEON_CP_RB_WPTR, 0);
2262
	WREG32(RADEON_CP_RB_CNTL, tmp);
2263
	/* save PCI state */
2264
//   pci_save_state(rdev->pdev);
2265
	/* disable bus mastering */
2266
	r100_bm_disable(rdev);
2267
	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
2268
					S_0000F0_SOFT_RESET_RE(1) |
2269
					S_0000F0_SOFT_RESET_PP(1) |
2270
					S_0000F0_SOFT_RESET_RB(1));
2271
	RREG32(R_0000F0_RBBM_SOFT_RESET);
2272
	mdelay(500);
2273
	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2274
	mdelay(1);
2275
	status = RREG32(R_000E40_RBBM_STATUS);
2276
	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
1117 serge 2277
	/* reset CP */
1963 serge 2278
	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
2279
	RREG32(R_0000F0_RBBM_SOFT_RESET);
2280
	mdelay(500);
2281
	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
2282
	mdelay(1);
2283
	status = RREG32(R_000E40_RBBM_STATUS);
2284
	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2285
	/* restore PCI & busmastering */
2286
//   pci_restore_state(rdev->pdev);
2287
	r100_enable_bm(rdev);
1117 serge 2288
	/* Check if GPU is idle */
1963 serge 2289
	if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2290
		G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2291
		dev_err(rdev->dev, "failed to reset GPU\n");
2292
		ret = -1;
2293
	} else
2294
		dev_info(rdev->dev, "GPU reset succeed\n");
2295
	r100_mc_resume(rdev, &save);
2296
	return ret;
1117 serge 2297
}
2298
 
1321 serge 2299
void r100_set_common_regs(struct radeon_device *rdev)
2300
{
1430 serge 2301
	struct drm_device *dev = rdev->ddev;
2302
	bool force_dac2 = false;
1963 serge 2303
	u32 tmp;
1430 serge 2304
 
1321 serge 2305
	/* set these so they don't interfere with anything */
2306
	WREG32(RADEON_OV0_SCALE_CNTL, 0);
2307
	WREG32(RADEON_SUBPIC_CNTL, 0);
2308
	WREG32(RADEON_VIPH_CONTROL, 0);
2309
	WREG32(RADEON_I2C_CNTL_1, 0);
2310
	WREG32(RADEON_DVI_I2C_CNTL_1, 0);
2311
	WREG32(RADEON_CAP0_TRIG_CNTL, 0);
2312
	WREG32(RADEON_CAP1_TRIG_CNTL, 0);
1430 serge 2313
 
2314
	/* always set up dac2 on rn50 and some rv100 as lots
2315
	 * of servers seem to wire it up to a VGA port but
2316
	 * don't report it in the bios connector
2317
	 * table.
2318
	 */
2319
	switch (dev->pdev->device) {
2320
		/* RN50 */
2321
	case 0x515e:
2322
	case 0x5969:
2323
		force_dac2 = true;
2324
		break;
2325
		/* RV100*/
2326
	case 0x5159:
2327
	case 0x515a:
2328
		/* DELL triple head servers */
2329
		if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2330
		    ((dev->pdev->subsystem_device == 0x016c) ||
2331
		     (dev->pdev->subsystem_device == 0x016d) ||
2332
		     (dev->pdev->subsystem_device == 0x016e) ||
2333
		     (dev->pdev->subsystem_device == 0x016f) ||
2334
		     (dev->pdev->subsystem_device == 0x0170) ||
2335
		     (dev->pdev->subsystem_device == 0x017d) ||
2336
		     (dev->pdev->subsystem_device == 0x017e) ||
2337
		     (dev->pdev->subsystem_device == 0x0183) ||
2338
		     (dev->pdev->subsystem_device == 0x018a) ||
2339
		     (dev->pdev->subsystem_device == 0x019a)))
2340
			force_dac2 = true;
2341
		break;
2342
	}
2343
 
2344
	if (force_dac2) {
2345
		u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2346
		u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2347
		u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2348
 
2349
		/* For CRT on DAC2, don't turn it on if BIOS didn't
2350
		   enable it, even it's detected.
2351
		*/
2352
 
2353
		/* force it to crtc0 */
2354
		dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2355
		dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2356
		disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2357
 
2358
		/* set up the TV DAC */
2359
		tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2360
				 RADEON_TV_DAC_STD_MASK |
2361
				 RADEON_TV_DAC_RDACPD |
2362
				 RADEON_TV_DAC_GDACPD |
2363
				 RADEON_TV_DAC_BDACPD |
2364
				 RADEON_TV_DAC_BGADJ_MASK |
2365
				 RADEON_TV_DAC_DACADJ_MASK);
2366
		tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2367
				RADEON_TV_DAC_NHOLD |
2368
				RADEON_TV_DAC_STD_PS2 |
2369
				(0x58 << 16));
2370
 
2371
		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2372
		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2373
		WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2374
	}
1963 serge 2375
 
2376
	/* switch PM block to ACPI mode */
2377
	tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
2378
	tmp &= ~RADEON_PM_MODE_SEL;
2379
	WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
2380
 
1321 serge 2381
}
1117 serge 2382
 
2383
/*
2384
 * VRAM info
2385
 */
2386
static void r100_vram_get_type(struct radeon_device *rdev)
2387
{
2388
	uint32_t tmp;
2389
 
2390
	rdev->mc.vram_is_ddr = false;
2391
	if (rdev->flags & RADEON_IS_IGP)
2392
		rdev->mc.vram_is_ddr = true;
2393
	else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
2394
		rdev->mc.vram_is_ddr = true;
2395
	if ((rdev->family == CHIP_RV100) ||
2396
	    (rdev->family == CHIP_RS100) ||
2397
	    (rdev->family == CHIP_RS200)) {
2398
		tmp = RREG32(RADEON_MEM_CNTL);
2399
		if (tmp & RV100_HALF_MODE) {
2400
			rdev->mc.vram_width = 32;
2401
		} else {
2402
			rdev->mc.vram_width = 64;
2403
		}
2404
		if (rdev->flags & RADEON_SINGLE_CRTC) {
2405
			rdev->mc.vram_width /= 4;
2406
			rdev->mc.vram_is_ddr = true;
2407
		}
2408
	} else if (rdev->family <= CHIP_RV280) {
2409
		tmp = RREG32(RADEON_MEM_CNTL);
2410
		if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2411
			rdev->mc.vram_width = 128;
2412
		} else {
2413
			rdev->mc.vram_width = 64;
2414
		}
2415
	} else {
2416
		/* newer IGPs */
2417
		rdev->mc.vram_width = 128;
2418
	}
2419
}
2420
 
1179 serge 2421
static u32 r100_get_accessible_vram(struct radeon_device *rdev)
1117 serge 2422
{
1179 serge 2423
	u32 aper_size;
2424
	u8 byte;
1117 serge 2425
 
1179 serge 2426
	aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2427
 
2428
	/* Set HDP_APER_CNTL only on cards that are known not to be broken,
2429
	 * that is has the 2nd generation multifunction PCI interface
2430
	 */
2431
	if (rdev->family == CHIP_RV280 ||
2432
	    rdev->family >= CHIP_RV350) {
2433
		WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
2434
		       ~RADEON_HDP_APER_CNTL);
2435
		DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2436
		return aper_size * 2;
2437
	}
2438
 
2439
	/* Older cards have all sorts of funny issues to deal with. First
2440
	 * check if it's a multifunction card by reading the PCI config
2441
	 * header type... Limit those to one aperture size
2442
	 */
2443
//   pci_read_config_byte(rdev->pdev, 0xe, &byte);
2444
//   if (byte & 0x80) {
2445
//       DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2446
//       DRM_INFO("Limiting VRAM to one aperture\n");
2447
//       return aper_size;
2448
//   }
2449
 
2450
	/* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2451
	 * have set it up. We don't write this as it's broken on some ASICs but
2452
	 * we expect the BIOS to have done the right thing (might be too optimistic...)
2453
	 */
2454
	if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
2455
		return aper_size * 2;
2456
	return aper_size;
2457
}
2458
 
2459
void r100_vram_init_sizes(struct radeon_device *rdev)
2460
{
2461
	u64 config_aper_size;
2462
 
1430 serge 2463
	/* work out accessible VRAM */
1963 serge 2464
	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2465
	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
1430 serge 2466
	rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2467
	/* FIXME we don't use the second aperture yet when we could use it */
2468
	if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2469
		rdev->mc.visible_vram_size = rdev->mc.aper_size;
1179 serge 2470
	config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
1117 serge 2471
	if (rdev->flags & RADEON_IS_IGP) {
2472
		uint32_t tom;
2473
		/* read NB_TOM to get the amount of ram stolen for the GPU */
2474
		tom = RREG32(RADEON_NB_TOM);
1179 serge 2475
		rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2476
		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2477
		rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
1117 serge 2478
	} else {
1179 serge 2479
		rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
1117 serge 2480
		/* Some production boards of m6 will report 0
2481
		 * if it's 8 MB
2482
		 */
1179 serge 2483
		if (rdev->mc.real_vram_size == 0) {
2484
			rdev->mc.real_vram_size = 8192 * 1024;
2485
			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
1117 serge 2486
		}
1179 serge 2487
		 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
1430 serge 2488
		 * Novell bug 204882 + along with lots of ubuntu ones
2489
		 */
1963 serge 2490
		if (rdev->mc.aper_size > config_aper_size)
2491
			config_aper_size = rdev->mc.aper_size;
2492
 
1179 serge 2493
		if (config_aper_size > rdev->mc.real_vram_size)
2494
			rdev->mc.mc_vram_size = config_aper_size;
2495
		else
2496
			rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
1117 serge 2497
	}
2498
}
2499
 
1179 serge 2500
void r100_vga_set_state(struct radeon_device *rdev, bool state)
2501
{
2502
	uint32_t temp;
2503
 
2504
	temp = RREG32(RADEON_CONFIG_CNTL);
2505
	if (state == false) {
1963 serge 2506
		temp &= ~RADEON_CFG_VGA_RAM_EN;
2507
		temp |= RADEON_CFG_VGA_IO_DIS;
1179 serge 2508
	} else {
1963 serge 2509
		temp &= ~RADEON_CFG_VGA_IO_DIS;
1179 serge 2510
	}
2511
	WREG32(RADEON_CONFIG_CNTL, temp);
2512
}
2513
 
2997 Serge 2514
static void r100_mc_init(struct radeon_device *rdev)
1179 serge 2515
{
1430 serge 2516
	u64 base;
2517
 
1179 serge 2518
	r100_vram_get_type(rdev);
2519
	r100_vram_init_sizes(rdev);
1430 serge 2520
	base = rdev->mc.aper_base;
2521
	if (rdev->flags & RADEON_IS_IGP)
2522
		base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2523
	radeon_vram_location(rdev, &rdev->mc, base);
1963 serge 2524
	rdev->mc.gtt_base_align = 0;
1430 serge 2525
	if (!(rdev->flags & RADEON_IS_AGP))
2526
		radeon_gtt_location(rdev, &rdev->mc);
1963 serge 2527
	radeon_update_bandwidth_info(rdev);
1179 serge 2528
}
2529
 
2530
 
1117 serge 2531
/*
2532
 * Indirect registers accessor
2533
 */
2534
void r100_pll_errata_after_index(struct radeon_device *rdev)
2535
{
1963 serge 2536
	if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
1117 serge 2537
	(void)RREG32(RADEON_CLOCK_CNTL_DATA);
2538
	(void)RREG32(RADEON_CRTC_GEN_CNTL);
1963 serge 2539
	}
1117 serge 2540
}
2541
 
2542
static void r100_pll_errata_after_data(struct radeon_device *rdev)
2543
{
2544
	/* This workarounds is necessary on RV100, RS100 and RS200 chips
2545
	 * or the chip could hang on a subsequent access
2546
	 */
2547
	if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2997 Serge 2548
		mdelay(5);
1117 serge 2549
	}
2550
 
2551
	/* This function is required to workaround a hardware bug in some (all?)
2552
	 * revisions of the R300.  This workaround should be called after every
2553
	 * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
2554
	 * may not be correct.
2555
	 */
2556
	if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2557
		uint32_t save, tmp;
2558
 
2559
		save = RREG32(RADEON_CLOCK_CNTL_INDEX);
2560
		tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2561
		WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
2562
		tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
2563
		WREG32(RADEON_CLOCK_CNTL_INDEX, save);
2564
	}
2565
}
2566
 
2567
uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
2568
{
2569
	uint32_t data;
2570
 
2571
	WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2572
	r100_pll_errata_after_index(rdev);
2573
	data = RREG32(RADEON_CLOCK_CNTL_DATA);
2574
	r100_pll_errata_after_data(rdev);
2575
	return data;
2576
}
2577
 
2578
void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
2579
{
2580
	WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2581
	r100_pll_errata_after_index(rdev);
2582
	WREG32(RADEON_CLOCK_CNTL_DATA, v);
2583
	r100_pll_errata_after_data(rdev);
2584
}
2585
 
2997 Serge 2586
static void r100_set_safe_registers(struct radeon_device *rdev)
1117 serge 2587
{
1179 serge 2588
	if (ASIC_IS_RN50(rdev)) {
2589
		rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2590
		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2591
	} else if (rdev->family < CHIP_R200) {
2592
		rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2593
		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2594
	} else {
1221 serge 2595
		r200_set_safe_registers(rdev);
1117 serge 2596
	}
2597
}
2598
 
1129 serge 2599
/*
2600
 * Debugfs info
2601
 */
2602
#if defined(CONFIG_DEBUG_FS)
2603
static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2604
{
2605
	struct drm_info_node *node = (struct drm_info_node *) m->private;
2606
	struct drm_device *dev = node->minor->dev;
2607
	struct radeon_device *rdev = dev->dev_private;
2608
	uint32_t reg, value;
2609
	unsigned i;
2610
 
2611
	seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2612
	seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2613
	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2614
	for (i = 0; i < 64; i++) {
2615
		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2616
		reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2617
		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
2618
		value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
2619
		seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2620
	}
2621
	return 0;
2622
}
2623
 
2624
static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
2625
{
2626
	struct drm_info_node *node = (struct drm_info_node *) m->private;
2627
	struct drm_device *dev = node->minor->dev;
2628
	struct radeon_device *rdev = dev->dev_private;
2997 Serge 2629
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1129 serge 2630
	uint32_t rdp, wdp;
2631
	unsigned count, i, j;
2632
 
2997 Serge 2633
	radeon_ring_free_size(rdev, ring);
1129 serge 2634
	rdp = RREG32(RADEON_CP_RB_RPTR);
2635
	wdp = RREG32(RADEON_CP_RB_WPTR);
2997 Serge 2636
	count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
1129 serge 2637
	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2638
	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
2639
	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
2997 Serge 2640
	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
1129 serge 2641
	seq_printf(m, "%u dwords in ring\n", count);
2642
	for (j = 0; j <= count; j++) {
2997 Serge 2643
		i = (rdp + j) & ring->ptr_mask;
2644
		seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
1129 serge 2645
	}
2646
	return 0;
2647
}
2648
 
2649
 
2650
static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
2651
{
2652
	struct drm_info_node *node = (struct drm_info_node *) m->private;
2653
	struct drm_device *dev = node->minor->dev;
2654
	struct radeon_device *rdev = dev->dev_private;
2655
	uint32_t csq_stat, csq2_stat, tmp;
2656
	unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
2657
	unsigned i;
2658
 
2659
	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2660
	seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
2661
	csq_stat = RREG32(RADEON_CP_CSQ_STAT);
2662
	csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
2663
	r_rptr = (csq_stat >> 0) & 0x3ff;
2664
	r_wptr = (csq_stat >> 10) & 0x3ff;
2665
	ib1_rptr = (csq_stat >> 20) & 0x3ff;
2666
	ib1_wptr = (csq2_stat >> 0) & 0x3ff;
2667
	ib2_rptr = (csq2_stat >> 10) & 0x3ff;
2668
	ib2_wptr = (csq2_stat >> 20) & 0x3ff;
2669
	seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
2670
	seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
2671
	seq_printf(m, "Ring rptr %u\n", r_rptr);
2672
	seq_printf(m, "Ring wptr %u\n", r_wptr);
2673
	seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
2674
	seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
2675
	seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
2676
	seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
2677
	/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
2678
	 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
2679
	seq_printf(m, "Ring fifo:\n");
2680
	for (i = 0; i < 256; i++) {
2681
		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2682
		tmp = RREG32(RADEON_CP_CSQ_DATA);
2683
		seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
2684
	}
2685
	seq_printf(m, "Indirect1 fifo:\n");
2686
	for (i = 256; i <= 512; i++) {
2687
		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2688
		tmp = RREG32(RADEON_CP_CSQ_DATA);
2689
		seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
2690
	}
2691
	seq_printf(m, "Indirect2 fifo:\n");
2692
	for (i = 640; i < ib1_wptr; i++) {
2693
		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
2694
		tmp = RREG32(RADEON_CP_CSQ_DATA);
2695
		seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
2696
	}
2697
	return 0;
2698
}
2699
 
2700
static int r100_debugfs_mc_info(struct seq_file *m, void *data)
2701
{
2702
	struct drm_info_node *node = (struct drm_info_node *) m->private;
2703
	struct drm_device *dev = node->minor->dev;
2704
	struct radeon_device *rdev = dev->dev_private;
2705
	uint32_t tmp;
2706
 
2707
	tmp = RREG32(RADEON_CONFIG_MEMSIZE);
2708
	seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
2709
	tmp = RREG32(RADEON_MC_FB_LOCATION);
2710
	seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
2711
	tmp = RREG32(RADEON_BUS_CNTL);
2712
	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
2713
	tmp = RREG32(RADEON_MC_AGP_LOCATION);
2714
	seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
2715
	tmp = RREG32(RADEON_AGP_BASE);
2716
	seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
2717
	tmp = RREG32(RADEON_HOST_PATH_CNTL);
2718
	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
2719
	tmp = RREG32(0x01D0);
2720
	seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
2721
	tmp = RREG32(RADEON_AIC_LO_ADDR);
2722
	seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
2723
	tmp = RREG32(RADEON_AIC_HI_ADDR);
2724
	seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
2725
	tmp = RREG32(0x01E4);
2726
	seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
2727
	return 0;
2728
}
2729
 
2730
static struct drm_info_list r100_debugfs_rbbm_list[] = {
2731
	{"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
2732
};
2733
 
2734
static struct drm_info_list r100_debugfs_cp_list[] = {
2735
	{"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
2736
	{"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
2737
};
2738
 
2739
static struct drm_info_list r100_debugfs_mc_info_list[] = {
2740
	{"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
2741
};
2742
#endif
2743
 
2744
int r100_debugfs_rbbm_init(struct radeon_device *rdev)
2745
{
2746
#if defined(CONFIG_DEBUG_FS)
2747
	return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
2748
#else
2749
	return 0;
2750
#endif
2751
}
2752
 
2753
int r100_debugfs_cp_init(struct radeon_device *rdev)
2754
{
2755
#if defined(CONFIG_DEBUG_FS)
2756
	return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
2757
#else
2758
	return 0;
2759
#endif
2760
}
2761
 
2762
int r100_debugfs_mc_info_init(struct radeon_device *rdev)
2763
{
2764
#if defined(CONFIG_DEBUG_FS)
2765
	return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
2766
#else
2767
	return 0;
2768
#endif
2769
}
1179 serge 2770
 
2771
int r100_set_surface_reg(struct radeon_device *rdev, int reg,
2772
			 uint32_t tiling_flags, uint32_t pitch,
2773
			 uint32_t offset, uint32_t obj_size)
2774
{
2775
	int surf_index = reg * 16;
2776
	int flags = 0;
2777
 
2778
	if (rdev->family <= CHIP_RS200) {
2779
		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
2780
				 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
2781
			flags |= RADEON_SURF_TILE_COLOR_BOTH;
2782
		if (tiling_flags & RADEON_TILING_MACRO)
2783
			flags |= RADEON_SURF_TILE_COLOR_MACRO;
2784
	} else if (rdev->family <= CHIP_RV280) {
2785
		if (tiling_flags & (RADEON_TILING_MACRO))
2786
			flags |= R200_SURF_TILE_COLOR_MACRO;
2787
		if (tiling_flags & RADEON_TILING_MICRO)
2788
			flags |= R200_SURF_TILE_COLOR_MICRO;
2789
	} else {
2790
		if (tiling_flags & RADEON_TILING_MACRO)
2791
			flags |= R300_SURF_TILE_MACRO;
2792
		if (tiling_flags & RADEON_TILING_MICRO)
2793
			flags |= R300_SURF_TILE_MICRO;
2794
	}
2795
 
2796
	if (tiling_flags & RADEON_TILING_SWAP_16BIT)
2797
		flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
2798
	if (tiling_flags & RADEON_TILING_SWAP_32BIT)
2799
		flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
2800
 
1963 serge 2801
	/* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */
2802
	if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) {
2803
		if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO)))
2804
			if (ASIC_IS_RN50(rdev))
2805
				pitch /= 16;
2806
	}
2807
 
2808
	/* r100/r200 divide by 16 */
2809
	if (rdev->family < CHIP_R300)
2810
		flags |= pitch / 16;
2811
	else
2812
		flags |= pitch / 8;
2813
 
2814
 
2815
	DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
1179 serge 2816
	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
2817
	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
2818
	WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
2819
	return 0;
2820
}
2821
 
2822
void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
2823
{
2824
	int surf_index = reg * 16;
2825
	WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
2826
}
2827
 
2828
void r100_bandwidth_update(struct radeon_device *rdev)
2829
{
2830
	fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
2831
	fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
2832
	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
2833
	uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
2834
	fixed20_12 memtcas_ff[8] = {
1963 serge 2835
		dfixed_init(1),
2836
		dfixed_init(2),
2837
		dfixed_init(3),
2838
		dfixed_init(0),
2839
		dfixed_init_half(1),
2840
		dfixed_init_half(2),
2841
		dfixed_init(0),
1179 serge 2842
	};
2843
	fixed20_12 memtcas_rs480_ff[8] = {
1963 serge 2844
		dfixed_init(0),
2845
		dfixed_init(1),
2846
		dfixed_init(2),
2847
		dfixed_init(3),
2848
		dfixed_init(0),
2849
		dfixed_init_half(1),
2850
		dfixed_init_half(2),
2851
		dfixed_init_half(3),
1179 serge 2852
	};
2853
	fixed20_12 memtcas2_ff[8] = {
1963 serge 2854
		dfixed_init(0),
2855
		dfixed_init(1),
2856
		dfixed_init(2),
2857
		dfixed_init(3),
2858
		dfixed_init(4),
2859
		dfixed_init(5),
2860
		dfixed_init(6),
2861
		dfixed_init(7),
1179 serge 2862
	};
2863
	fixed20_12 memtrbs[8] = {
1963 serge 2864
		dfixed_init(1),
2865
		dfixed_init_half(1),
2866
		dfixed_init(2),
2867
		dfixed_init_half(2),
2868
		dfixed_init(3),
2869
		dfixed_init_half(3),
2870
		dfixed_init(4),
2871
		dfixed_init_half(4)
1179 serge 2872
	};
2873
	fixed20_12 memtrbs_r4xx[8] = {
1963 serge 2874
		dfixed_init(4),
2875
		dfixed_init(5),
2876
		dfixed_init(6),
2877
		dfixed_init(7),
2878
		dfixed_init(8),
2879
		dfixed_init(9),
2880
		dfixed_init(10),
2881
		dfixed_init(11)
1179 serge 2882
	};
2883
	fixed20_12 min_mem_eff;
2884
	fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
2885
	fixed20_12 cur_latency_mclk, cur_latency_sclk;
2886
	fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
2887
		disp_drain_rate2, read_return_rate;
2888
	fixed20_12 time_disp1_drop_priority;
2889
	int c;
2890
	int cur_size = 16;       /* in octawords */
2891
	int critical_point = 0, critical_point2;
2892
/* 	uint32_t read_return_rate, time_disp1_drop_priority; */
2893
	int stop_req, max_stop_req;
2894
	struct drm_display_mode *mode1 = NULL;
2895
	struct drm_display_mode *mode2 = NULL;
2896
	uint32_t pixel_bytes1 = 0;
2897
	uint32_t pixel_bytes2 = 0;
2898
 
1963 serge 2899
	radeon_update_display_priority(rdev);
2900
 
1179 serge 2901
	if (rdev->mode_info.crtcs[0]->base.enabled) {
2902
		mode1 = &rdev->mode_info.crtcs[0]->base.mode;
2903
		pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
2904
	}
1221 serge 2905
	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
1179 serge 2906
	if (rdev->mode_info.crtcs[1]->base.enabled) {
2907
		mode2 = &rdev->mode_info.crtcs[1]->base.mode;
2908
		pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
2909
	}
1221 serge 2910
	}
1179 serge 2911
 
1963 serge 2912
	min_mem_eff.full = dfixed_const_8(0);
1179 serge 2913
	/* get modes */
2914
	if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
2915
		uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
2916
		mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
2917
		mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
2918
		/* check crtc enables */
2919
		if (mode2)
2920
			mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
2921
		if (mode1)
2922
			mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
2923
		WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
2924
	}
2925
 
2926
	/*
2927
	 * determine is there is enough bw for current mode
2928
	 */
1963 serge 2929
	sclk_ff = rdev->pm.sclk;
2930
	mclk_ff = rdev->pm.mclk;
1179 serge 2931
 
2932
	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
1963 serge 2933
	temp_ff.full = dfixed_const(temp);
2934
	mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
1179 serge 2935
 
2936
	pix_clk.full = 0;
2937
	pix_clk2.full = 0;
2938
	peak_disp_bw.full = 0;
2939
	if (mode1) {
1963 serge 2940
		temp_ff.full = dfixed_const(1000);
2941
		pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
2942
		pix_clk.full = dfixed_div(pix_clk, temp_ff);
2943
		temp_ff.full = dfixed_const(pixel_bytes1);
2944
		peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
1179 serge 2945
	}
2946
	if (mode2) {
1963 serge 2947
		temp_ff.full = dfixed_const(1000);
2948
		pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
2949
		pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
2950
		temp_ff.full = dfixed_const(pixel_bytes2);
2951
		peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
1179 serge 2952
	}
2953
 
1963 serge 2954
	mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
1179 serge 2955
	if (peak_disp_bw.full >= mem_bw.full) {
2956
		DRM_ERROR("You may not have enough display bandwidth for current mode\n"
2957
			  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
2958
	}
2959
 
2960
	/*  Get values from the EXT_MEM_CNTL register...converting its contents. */
2961
	temp = RREG32(RADEON_MEM_TIMING_CNTL);
2962
	if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
2963
		mem_trcd = ((temp >> 2) & 0x3) + 1;
2964
		mem_trp  = ((temp & 0x3)) + 1;
2965
		mem_tras = ((temp & 0x70) >> 4) + 1;
2966
	} else if (rdev->family == CHIP_R300 ||
2967
		   rdev->family == CHIP_R350) { /* r300, r350 */
2968
		mem_trcd = (temp & 0x7) + 1;
2969
		mem_trp = ((temp >> 8) & 0x7) + 1;
2970
		mem_tras = ((temp >> 11) & 0xf) + 4;
2971
	} else if (rdev->family == CHIP_RV350 ||
2972
		   rdev->family <= CHIP_RV380) {
2973
		/* rv3x0 */
2974
		mem_trcd = (temp & 0x7) + 3;
2975
		mem_trp = ((temp >> 8) & 0x7) + 3;
2976
		mem_tras = ((temp >> 11) & 0xf) + 6;
2977
	} else if (rdev->family == CHIP_R420 ||
2978
		   rdev->family == CHIP_R423 ||
2979
		   rdev->family == CHIP_RV410) {
2980
		/* r4xx */
2981
		mem_trcd = (temp & 0xf) + 3;
2982
		if (mem_trcd > 15)
2983
			mem_trcd = 15;
2984
		mem_trp = ((temp >> 8) & 0xf) + 3;
2985
		if (mem_trp > 15)
2986
			mem_trp = 15;
2987
		mem_tras = ((temp >> 12) & 0x1f) + 6;
2988
		if (mem_tras > 31)
2989
			mem_tras = 31;
2990
	} else { /* RV200, R200 */
2991
		mem_trcd = (temp & 0x7) + 1;
2992
		mem_trp = ((temp >> 8) & 0x7) + 1;
2993
		mem_tras = ((temp >> 12) & 0xf) + 4;
2994
	}
2995
	/* convert to FF */
1963 serge 2996
	trcd_ff.full = dfixed_const(mem_trcd);
2997
	trp_ff.full = dfixed_const(mem_trp);
2998
	tras_ff.full = dfixed_const(mem_tras);
1179 serge 2999
 
3000
	/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3001
	temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
3002
	data = (temp & (7 << 20)) >> 20;
3003
	if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3004
		if (rdev->family == CHIP_RS480) /* don't think rs400 */
3005
			tcas_ff = memtcas_rs480_ff[data];
3006
		else
3007
			tcas_ff = memtcas_ff[data];
3008
	} else
3009
		tcas_ff = memtcas2_ff[data];
3010
 
3011
	if (rdev->family == CHIP_RS400 ||
3012
	    rdev->family == CHIP_RS480) {
3013
		/* extra cas latency stored in bits 23-25 0-4 clocks */
3014
		data = (temp >> 23) & 0x7;
3015
		if (data < 5)
1963 serge 3016
			tcas_ff.full += dfixed_const(data);
1179 serge 3017
	}
3018
 
3019
	if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3020
		/* on the R300, Tcas is included in Trbs.
3021
		 */
3022
		temp = RREG32(RADEON_MEM_CNTL);
3023
		data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3024
		if (data == 1) {
3025
			if (R300_MEM_USE_CD_CH_ONLY & temp) {
3026
				temp = RREG32(R300_MC_IND_INDEX);
3027
				temp &= ~R300_MC_IND_ADDR_MASK;
3028
				temp |= R300_MC_READ_CNTL_CD_mcind;
3029
				WREG32(R300_MC_IND_INDEX, temp);
3030
				temp = RREG32(R300_MC_IND_DATA);
3031
				data = (R300_MEM_RBS_POSITION_C_MASK & temp);
3032
			} else {
3033
				temp = RREG32(R300_MC_READ_CNTL_AB);
3034
				data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3035
			}
3036
		} else {
3037
			temp = RREG32(R300_MC_READ_CNTL_AB);
3038
			data = (R300_MEM_RBS_POSITION_A_MASK & temp);
3039
		}
3040
		if (rdev->family == CHIP_RV410 ||
3041
		    rdev->family == CHIP_R420 ||
3042
		    rdev->family == CHIP_R423)
3043
			trbs_ff = memtrbs_r4xx[data];
3044
		else
3045
			trbs_ff = memtrbs[data];
3046
		tcas_ff.full += trbs_ff.full;
3047
	}
3048
 
3049
	sclk_eff_ff.full = sclk_ff.full;
3050
 
3051
	if (rdev->flags & RADEON_IS_AGP) {
3052
		fixed20_12 agpmode_ff;
1963 serge 3053
		agpmode_ff.full = dfixed_const(radeon_agpmode);
3054
		temp_ff.full = dfixed_const_666(16);
3055
		sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
1179 serge 3056
	}
3057
	/* TODO PCIE lanes may affect this - agpmode == 16?? */
3058
 
3059
	if (ASIC_IS_R300(rdev)) {
1963 serge 3060
		sclk_delay_ff.full = dfixed_const(250);
1179 serge 3061
	} else {
3062
		if ((rdev->family == CHIP_RV100) ||
3063
		    rdev->flags & RADEON_IS_IGP) {
3064
			if (rdev->mc.vram_is_ddr)
1963 serge 3065
				sclk_delay_ff.full = dfixed_const(41);
1179 serge 3066
			else
1963 serge 3067
				sclk_delay_ff.full = dfixed_const(33);
1179 serge 3068
		} else {
3069
			if (rdev->mc.vram_width == 128)
1963 serge 3070
				sclk_delay_ff.full = dfixed_const(57);
1179 serge 3071
			else
1963 serge 3072
				sclk_delay_ff.full = dfixed_const(41);
1179 serge 3073
		}
3074
	}
3075
 
1963 serge 3076
	mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
1179 serge 3077
 
3078
	if (rdev->mc.vram_is_ddr) {
3079
		if (rdev->mc.vram_width == 32) {
1963 serge 3080
			k1.full = dfixed_const(40);
1179 serge 3081
			c  = 3;
3082
		} else {
1963 serge 3083
			k1.full = dfixed_const(20);
1179 serge 3084
			c  = 1;
3085
		}
3086
	} else {
1963 serge 3087
		k1.full = dfixed_const(40);
1179 serge 3088
		c  = 3;
3089
	}
3090
 
1963 serge 3091
	temp_ff.full = dfixed_const(2);
3092
	mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3093
	temp_ff.full = dfixed_const(c);
3094
	mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3095
	temp_ff.full = dfixed_const(4);
3096
	mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3097
	mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
1179 serge 3098
	mc_latency_mclk.full += k1.full;
3099
 
1963 serge 3100
	mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3101
	mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
1179 serge 3102
 
3103
	/*
3104
	  HW cursor time assuming worst case of full size colour cursor.
3105
	*/
1963 serge 3106
	temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
1179 serge 3107
	temp_ff.full += trcd_ff.full;
3108
	if (temp_ff.full < tras_ff.full)
3109
		temp_ff.full = tras_ff.full;
1963 serge 3110
	cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
1179 serge 3111
 
1963 serge 3112
	temp_ff.full = dfixed_const(cur_size);
3113
	cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
1179 serge 3114
	/*
3115
	  Find the total latency for the display data.
3116
	*/
1963 serge 3117
	disp_latency_overhead.full = dfixed_const(8);
3118
	disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
1179 serge 3119
	mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3120
	mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3121
 
3122
	if (mc_latency_mclk.full > mc_latency_sclk.full)
3123
		disp_latency.full = mc_latency_mclk.full;
3124
	else
3125
		disp_latency.full = mc_latency_sclk.full;
3126
 
3127
	/* setup Max GRPH_STOP_REQ default value */
3128
	if (ASIC_IS_RV100(rdev))
3129
		max_stop_req = 0x5c;
3130
	else
3131
		max_stop_req = 0x7c;
3132
 
3133
	if (mode1) {
3134
		/*  CRTC1
3135
		    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3136
		    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3137
		*/
3138
		stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3139
 
3140
		if (stop_req > max_stop_req)
3141
			stop_req = max_stop_req;
3142
 
3143
		/*
3144
		  Find the drain rate of the display buffer.
3145
		*/
1963 serge 3146
		temp_ff.full = dfixed_const((16/pixel_bytes1));
3147
		disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
1179 serge 3148
 
3149
		/*
3150
		  Find the critical point of the display buffer.
3151
		*/
1963 serge 3152
		crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3153
		crit_point_ff.full += dfixed_const_half(0);
1179 serge 3154
 
1963 serge 3155
		critical_point = dfixed_trunc(crit_point_ff);
1179 serge 3156
 
3157
		if (rdev->disp_priority == 2) {
3158
			critical_point = 0;
3159
		}
3160
 
3161
		/*
3162
		  The critical point should never be above max_stop_req-4.  Setting
3163
		  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3164
		*/
3165
		if (max_stop_req - critical_point < 4)
3166
			critical_point = 0;
3167
 
3168
		if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3169
			/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3170
			critical_point = 0x10;
3171
		}
3172
 
3173
		temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
3174
		temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3175
		temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3176
		temp &= ~(RADEON_GRPH_START_REQ_MASK);
3177
		if ((rdev->family == CHIP_R350) &&
3178
		    (stop_req > 0x15)) {
3179
			stop_req -= 0x10;
3180
		}
3181
		temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3182
		temp |= RADEON_GRPH_BUFFER_SIZE;
3183
		temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3184
			  RADEON_GRPH_CRITICAL_AT_SOF |
3185
			  RADEON_GRPH_STOP_CNTL);
3186
		/*
3187
		  Write the result into the register.
3188
		*/
3189
		WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3190
						       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3191
 
3192
#if 0
3193
		if ((rdev->family == CHIP_RS400) ||
3194
		    (rdev->family == CHIP_RS480)) {
3195
			/* attempt to program RS400 disp regs correctly ??? */
3196
			temp = RREG32(RS400_DISP1_REG_CNTL);
3197
			temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
3198
				  RS400_DISP1_STOP_REQ_LEVEL_MASK);
3199
			WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3200
						       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3201
						       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3202
			temp = RREG32(RS400_DMIF_MEM_CNTL1);
3203
			temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
3204
				  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
3205
			WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3206
						      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3207
						      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3208
		}
3209
#endif
3210
 
1963 serge 3211
		DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
1179 serge 3212
			  /* 	  (unsigned int)info->SavedReg->grph_buffer_cntl, */
3213
			  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3214
	}
3215
 
3216
	if (mode2) {
3217
		u32 grph2_cntl;
3218
		stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3219
 
3220
		if (stop_req > max_stop_req)
3221
			stop_req = max_stop_req;
3222
 
3223
		/*
3224
		  Find the drain rate of the display buffer.
3225
		*/
1963 serge 3226
		temp_ff.full = dfixed_const((16/pixel_bytes2));
3227
		disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
1179 serge 3228
 
3229
		grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3230
		grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3231
		grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3232
		grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3233
		if ((rdev->family == CHIP_R350) &&
3234
		    (stop_req > 0x15)) {
3235
			stop_req -= 0x10;
3236
		}
3237
		grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3238
		grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3239
		grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
3240
			  RADEON_GRPH_CRITICAL_AT_SOF |
3241
			  RADEON_GRPH_STOP_CNTL);
3242
 
3243
		if ((rdev->family == CHIP_RS100) ||
3244
		    (rdev->family == CHIP_RS200))
3245
			critical_point2 = 0;
3246
		else {
3247
			temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
1963 serge 3248
			temp_ff.full = dfixed_const(temp);
3249
			temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
1179 serge 3250
			if (sclk_ff.full < temp_ff.full)
3251
				temp_ff.full = sclk_ff.full;
3252
 
3253
			read_return_rate.full = temp_ff.full;
3254
 
3255
			if (mode1) {
3256
				temp_ff.full = read_return_rate.full - disp_drain_rate.full;
1963 serge 3257
				time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
1179 serge 3258
			} else {
3259
				time_disp1_drop_priority.full = 0;
3260
			}
3261
			crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
1963 serge 3262
			crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3263
			crit_point_ff.full += dfixed_const_half(0);
1179 serge 3264
 
1963 serge 3265
			critical_point2 = dfixed_trunc(crit_point_ff);
1179 serge 3266
 
3267
			if (rdev->disp_priority == 2) {
3268
				critical_point2 = 0;
3269
			}
3270
 
3271
			if (max_stop_req - critical_point2 < 4)
3272
				critical_point2 = 0;
3273
 
3274
		}
3275
 
3276
		if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3277
			/* some R300 cards have problem with this set to 0 */
3278
			critical_point2 = 0x10;
3279
		}
3280
 
3281
		WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
3282
						  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3283
 
3284
		if ((rdev->family == CHIP_RS400) ||
3285
		    (rdev->family == CHIP_RS480)) {
3286
#if 0
3287
			/* attempt to program RS400 disp2 regs correctly ??? */
3288
			temp = RREG32(RS400_DISP2_REQ_CNTL1);
3289
			temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
3290
				  RS400_DISP2_STOP_REQ_LEVEL_MASK);
3291
			WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3292
						       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3293
						       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3294
			temp = RREG32(RS400_DISP2_REQ_CNTL2);
3295
			temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
3296
				  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
3297
			WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3298
						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3299
						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3300
#endif
3301
			WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3302
			WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3303
			WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
3304
			WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3305
		}
3306
 
1963 serge 3307
		DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
1179 serge 3308
			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3309
	}
3310
}
3311
 
2997 Serge 3312
int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1963 serge 3313
{
1412 serge 3314
	uint32_t scratch;
3315
	uint32_t tmp = 0;
3316
	unsigned i;
3317
	int r;
1179 serge 3318
 
1412 serge 3319
	r = radeon_scratch_get(rdev, &scratch);
3320
	if (r) {
3321
		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3322
		return r;
3323
	}
3324
	WREG32(scratch, 0xCAFEDEAD);
2997 Serge 3325
	r = radeon_ring_lock(rdev, ring, 2);
1412 serge 3326
	if (r) {
3327
		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3328
		radeon_scratch_free(rdev, scratch);
3329
		return r;
3330
	}
2997 Serge 3331
	radeon_ring_write(ring, PACKET0(scratch, 0));
3332
	radeon_ring_write(ring, 0xDEADBEEF);
3333
	radeon_ring_unlock_commit(rdev, ring);
1412 serge 3334
	for (i = 0; i < rdev->usec_timeout; i++) {
3335
		tmp = RREG32(scratch);
3336
		if (tmp == 0xDEADBEEF) {
3337
			break;
3338
		}
3339
		DRM_UDELAY(1);
3340
	}
3341
	if (i < rdev->usec_timeout) {
3342
		DRM_INFO("ring test succeeded in %d usecs\n", i);
3343
	} else {
1963 serge 3344
		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
1412 serge 3345
			  scratch, tmp);
3346
		r = -EINVAL;
3347
	}
3348
	radeon_scratch_free(rdev, scratch);
3349
	return r;
3350
}
3351
 
1963 serge 3352
void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3353
{
2997 Serge 3354
	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3355
 
3356
	if (ring->rptr_save_reg) {
3357
		u32 next_rptr = ring->wptr + 2 + 3;
3358
		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3359
		radeon_ring_write(ring, next_rptr);
3360
	}
3361
 
3362
	radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3363
	radeon_ring_write(ring, ib->gpu_addr);
3364
	radeon_ring_write(ring, ib->length_dw);
1963 serge 3365
}
3366
 
2997 Serge 3367
int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1963 serge 3368
{
2997 Serge 3369
	struct radeon_ib ib;
1963 serge 3370
	uint32_t scratch;
3371
	uint32_t tmp = 0;
3372
	unsigned i;
3373
	int r;
3374
 
3375
	r = radeon_scratch_get(rdev, &scratch);
3376
	if (r) {
3377
		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3378
		return r;
3379
	}
3380
	WREG32(scratch, 0xCAFEDEAD);
2997 Serge 3381
	r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
1963 serge 3382
	if (r) {
2997 Serge 3383
		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3384
		goto free_scratch;
1963 serge 3385
	}
2997 Serge 3386
	ib.ptr[0] = PACKET0(scratch, 0);
3387
	ib.ptr[1] = 0xDEADBEEF;
3388
	ib.ptr[2] = PACKET2(0);
3389
	ib.ptr[3] = PACKET2(0);
3390
	ib.ptr[4] = PACKET2(0);
3391
	ib.ptr[5] = PACKET2(0);
3392
	ib.ptr[6] = PACKET2(0);
3393
	ib.ptr[7] = PACKET2(0);
3394
	ib.length_dw = 8;
3395
	r = radeon_ib_schedule(rdev, &ib, NULL);
1963 serge 3396
	if (r) {
2997 Serge 3397
		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3398
		goto free_ib;
1963 serge 3399
	}
2997 Serge 3400
	r = radeon_fence_wait(ib.fence, false);
1963 serge 3401
	if (r) {
2997 Serge 3402
		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3403
		goto free_ib;
1963 serge 3404
	}
3405
	for (i = 0; i < rdev->usec_timeout; i++) {
3406
		tmp = RREG32(scratch);
3407
		if (tmp == 0xDEADBEEF) {
3408
			break;
3409
		}
3410
		DRM_UDELAY(1);
3411
	}
3412
	if (i < rdev->usec_timeout) {
3413
		DRM_INFO("ib test succeeded in %u usecs\n", i);
3414
	} else {
3415
		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3416
			  scratch, tmp);
3417
		r = -EINVAL;
3418
	}
2997 Serge 3419
free_ib:
3420
	radeon_ib_free(rdev, &ib);
3421
free_scratch:
1963 serge 3422
	radeon_scratch_free(rdev, scratch);
3423
	return r;
3424
}
3425
 
1179 serge 3426
void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3427
{
3428
	/* Shutdown CP we shouldn't need to do that but better be safe than
3429
	 * sorry
3430
	 */
2997 Serge 3431
	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1179 serge 3432
	WREG32(R_000740_CP_CSQ_CNTL, 0);
3433
 
3434
	/* Save few CRTC registers */
1221 serge 3435
	save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
1179 serge 3436
	save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
3437
	save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
3438
	save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
3439
	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3440
		save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
3441
		save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
3442
	}
3443
 
3444
	/* Disable VGA aperture access */
1221 serge 3445
	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
1179 serge 3446
	/* Disable cursor, overlay, crtc */
3447
	WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
3448
	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
3449
					S_000054_CRTC_DISPLAY_DIS(1));
3450
	WREG32(R_000050_CRTC_GEN_CNTL,
3451
			(C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
3452
			S_000050_CRTC_DISP_REQ_EN_B(1));
3453
	WREG32(R_000420_OV0_SCALE_CNTL,
3454
		C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
3455
	WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
3456
	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3457
		WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
3458
						S_000360_CUR2_LOCK(1));
3459
		WREG32(R_0003F8_CRTC2_GEN_CNTL,
3460
			(C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
3461
			S_0003F8_CRTC2_DISPLAY_DIS(1) |
3462
			S_0003F8_CRTC2_DISP_REQ_EN_B(1));
3463
		WREG32(R_000360_CUR2_OFFSET,
3464
			C_000360_CUR2_LOCK & save->CUR2_OFFSET);
3465
	}
3466
}
3467
 
3468
void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3469
{
3470
	/* Update base address for crtc */
1430 serge 3471
	WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
1179 serge 3472
	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
1430 serge 3473
		WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
1179 serge 3474
	}
3475
	/* Restore CRTC registers */
1221 serge 3476
	WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
1179 serge 3477
	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
3478
	WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
3479
	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3480
		WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
3481
	}
3482
}
3483
 
1221 serge 3484
void r100_vga_render_disable(struct radeon_device *rdev)
3485
{
3486
	u32 tmp;
3487
 
3488
	tmp = RREG8(R_0003C2_GENMO_WT);
3489
	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
3490
}
3491
 
3492
static void r100_debugfs(struct radeon_device *rdev)
3493
{
3494
	int r;
3495
 
3496
	r = r100_debugfs_mc_info_init(rdev);
3497
	if (r)
3498
		dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3499
}
3500
 
3501
static void r100_mc_program(struct radeon_device *rdev)
3502
{
3503
	struct r100_mc_save save;
3504
 
3505
	/* Stops all mc clients */
3506
	r100_mc_stop(rdev, &save);
3507
	if (rdev->flags & RADEON_IS_AGP) {
3508
		WREG32(R_00014C_MC_AGP_LOCATION,
3509
			S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3510
			S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3511
		WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3512
		if (rdev->family > CHIP_RV200)
3513
			WREG32(R_00015C_AGP_BASE_2,
3514
				upper_32_bits(rdev->mc.agp_base) & 0xff);
3515
	} else {
3516
		WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3517
		WREG32(R_000170_AGP_BASE, 0);
3518
		if (rdev->family > CHIP_RV200)
3519
			WREG32(R_00015C_AGP_BASE_2, 0);
3520
	}
3521
	/* Wait for mc idle */
3522
	if (r100_mc_wait_for_idle(rdev))
3523
		dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3524
	/* Program MC, should be a 32bits limited address space */
3525
	WREG32(R_000148_MC_FB_LOCATION,
3526
		S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3527
		S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3528
	r100_mc_resume(rdev, &save);
3529
}
3530
 
2997 Serge 3531
static void r100_clock_startup(struct radeon_device *rdev)
1221 serge 3532
{
3533
	u32 tmp;
3534
 
3535
	if (radeon_dynclks != -1 && radeon_dynclks)
3536
		radeon_legacy_set_clock_gating(rdev, 1);
3537
	/* We need to force on some of the block */
3538
	tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
3539
	tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3540
	if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3541
		tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
3542
	WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
3543
}
3544
 
3545
static int r100_startup(struct radeon_device *rdev)
3546
{
3547
	int r;
3548
 
1321 serge 3549
	/* set common regs */
3550
	r100_set_common_regs(rdev);
3551
	/* program mc */
1221 serge 3552
	r100_mc_program(rdev);
3553
	/* Resume clock */
3554
	r100_clock_startup(rdev);
3555
	/* Initialize GART (initialize after TTM so we can allocate
3556
	 * memory through TTM but finalize after TTM) */
1321 serge 3557
	r100_enable_bm(rdev);
1221 serge 3558
	if (rdev->flags & RADEON_IS_PCI) {
3559
		r = r100_pci_gart_enable(rdev);
3560
		if (r)
3561
			return r;
3562
	}
2005 serge 3563
 
3564
	/* allocate wb buffer */
3565
	r = radeon_wb_init(rdev);
3566
	if (r)
3567
		return r;
3568
 
3120 serge 3569
	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3570
	if (r) {
3571
		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3572
		return r;
3573
	}
3574
 
1221 serge 3575
	/* Enable IRQ */
3764 Serge 3576
	if (!rdev->irq.installed) {
3577
		r = radeon_irq_kms_init(rdev);
3578
		if (r)
3579
			return r;
3580
	}
3581
 
2005 serge 3582
	r100_irq_set(rdev);
1404 serge 3583
	rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
1221 serge 3584
	/* 1M ring buffer */
1412 serge 3585
   r = r100_cp_init(rdev, 1024 * 1024);
3586
   if (r) {
1963 serge 3587
		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
1412 serge 3588
       return r;
3589
   }
2997 Serge 3590
 
3591
	r = radeon_ib_pool_init(rdev);
2005 serge 3592
	if (r) {
2997 Serge 3593
		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2005 serge 3594
		return r;
3595
	}
3120 serge 3596
 
1221 serge 3597
	return 0;
3598
}
3599
 
1963 serge 3600
/*
3601
 * Due to how kexec works, it can leave the hw fully initialised when it
3602
 * boots the new kernel. However doing our init sequence with the CP and
3603
 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
3604
 * do some quick sanity checks and restore sane values to avoid this
3605
 * problem.
3606
 */
3607
void r100_restore_sanity(struct radeon_device *rdev)
3608
{
3609
	u32 tmp;
1221 serge 3610
 
1963 serge 3611
	tmp = RREG32(RADEON_CP_CSQ_CNTL);
3612
	if (tmp) {
3613
		WREG32(RADEON_CP_CSQ_CNTL, 0);
3614
	}
3615
	tmp = RREG32(RADEON_CP_RB_CNTL);
3616
	if (tmp) {
3617
		WREG32(RADEON_CP_RB_CNTL, 0);
3618
	}
3619
	tmp = RREG32(RADEON_SCRATCH_UMSK);
3620
	if (tmp) {
3621
		WREG32(RADEON_SCRATCH_UMSK, 0);
3622
	}
3623
}
1221 serge 3624
 
3625
int r100_init(struct radeon_device *rdev)
3626
{
3627
	int r;
3628
 
3629
	/* Register debugfs file specific to this group of asics */
3630
	r100_debugfs(rdev);
3631
	/* Disable VGA */
3632
	r100_vga_render_disable(rdev);
3633
	/* Initialize scratch registers */
3634
	radeon_scratch_init(rdev);
3635
	/* Initialize surface registers */
3636
	radeon_surface_init(rdev);
1963 serge 3637
	/* sanity check some register to avoid hangs like after kexec */
3638
	r100_restore_sanity(rdev);
1221 serge 3639
	/* TODO: disable VGA need to use VGA request */
3640
	/* BIOS*/
3641
	if (!radeon_get_bios(rdev)) {
3642
		if (ASIC_IS_AVIVO(rdev))
3643
			return -EINVAL;
3644
	}
3645
	if (rdev->is_atom_bios) {
3646
		dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
3647
		return -EINVAL;
3648
	} else {
3649
		r = radeon_combios_init(rdev);
3650
		if (r)
3651
			return r;
3652
	}
3653
	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
1963 serge 3654
	if (radeon_asic_reset(rdev)) {
1221 serge 3655
		dev_warn(rdev->dev,
3656
			"GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3657
			RREG32(R_000E40_RBBM_STATUS),
3658
			RREG32(R_0007C0_CP_STAT));
3659
	}
3660
	/* check if cards are posted or not */
1321 serge 3661
	if (radeon_boot_test_post_card(rdev) == false)
3662
		return -EINVAL;
1221 serge 3663
	/* Set asic errata */
3664
	r100_errata(rdev);
3665
	/* Initialize clocks */
3666
	radeon_get_clock_info(rdev->ddev);
1430 serge 3667
	/* initialize AGP */
3668
	if (rdev->flags & RADEON_IS_AGP) {
3669
		r = radeon_agp_init(rdev);
3670
		if (r) {
3671
			radeon_agp_disable(rdev);
3672
		}
3673
	}
3674
	/* initialize VRAM */
3675
	r100_mc_init(rdev);
1221 serge 3676
	/* Fence driver */
2005 serge 3677
	r = radeon_fence_driver_init(rdev);
3678
	if (r)
3679
		return r;
1221 serge 3680
	/* Memory manager */
1321 serge 3681
	r = radeon_bo_init(rdev);
1221 serge 3682
	if (r)
3683
		return r;
3684
	if (rdev->flags & RADEON_IS_PCI) {
3685
		r = r100_pci_gart_init(rdev);
3686
		if (r)
3687
			return r;
3688
	}
3689
	r100_set_safe_registers(rdev);
2997 Serge 3690
 
1221 serge 3691
	rdev->accel_working = true;
3692
	r = r100_startup(rdev);
3693
	if (r) {
3694
		/* Somethings want wront with the accel init stop accel */
3695
		dev_err(rdev->dev, "Disabling GPU acceleration\n");
3696
		if (rdev->flags & RADEON_IS_PCI)
3697
			r100_pci_gart_fini(rdev);
3698
		rdev->accel_working = false;
3699
	}
3700
	return 0;
3701
}
2997 Serge 3702
 
3192 Serge 3703
uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
3704
		      bool always_indirect)
2997 Serge 3705
{
3192 Serge 3706
	if (reg < rdev->rmmio_size && !always_indirect)
2997 Serge 3707
		return readl(((void __iomem *)rdev->rmmio) + reg);
3708
	else {
3192 Serge 3709
		unsigned long flags;
3710
		uint32_t ret;
3711
 
3712
		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
2997 Serge 3713
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
3192 Serge 3714
		ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
3715
		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
3716
 
3717
		return ret;
2997 Serge 3718
	}
3719
}
3720
 
3192 Serge 3721
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
3722
		  bool always_indirect)
2997 Serge 3723
{
3192 Serge 3724
	if (reg < rdev->rmmio_size && !always_indirect)
2997 Serge 3725
		writel(v, ((void __iomem *)rdev->rmmio) + reg);
3726
	else {
3192 Serge 3727
		unsigned long flags;
3728
 
3729
		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
2997 Serge 3730
		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
3731
		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
3192 Serge 3732
		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
2997 Serge 3733
	}
3734
}
3735
 
3736
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
3737
{
3738
	if (reg < rdev->rio_mem_size)
3739
		return ioread32(rdev->rio_mem + reg);
3740
	else {
3741
		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
3742
		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
3743
	}
3744
}
3745
 
3746
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
3747
{
3748
	if (reg < rdev->rio_mem_size)
3749
		iowrite32(v, rdev->rio_mem + reg);
3750
	else {
3751
		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
3752
		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
3753
	}
3754
}