WebSVN – Kolibri OS – Blame – /drivers/video/drm/radeon/ni_dma.c

Rev	Author	Line No.	Line
5078	serge	1	/*
		2	* Copyright 2010 Advanced Micro Devices, Inc.
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice shall be included in
		12	* all copies or substantial portions of the Software.
		13	*
		14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
		18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
		19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
		20	* OTHER DEALINGS IN THE SOFTWARE.
		21	*
		22	* Authors: Alex Deucher
		23	*/
		24	#include
		25	#include "radeon.h"
		26	#include "radeon_asic.h"
		27	#include "radeon_trace.h"
		28	#include "nid.h"
		29
		30	u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
		31
		32	/*
		33	* DMA
		34	* Starting with R600, the GPU has an asynchronous
		35	* DMA engine. The programming model is very similar
		36	* to the 3D engine (ring buffer, IBs, etc.), but the
		37	* DMA controller has it's own packet format that is
		38	* different form the PM4 format used by the 3D engine.
		39	* It supports copying data, writing embedded data,
		40	* solid fills, and a number of other things. It also
		41	* has support for tiling/detiling of buffers.
		42	* Cayman and newer support two asynchronous DMA engines.
		43	*/
		44
		45	/**
		46	* cayman_dma_get_rptr - get the current read pointer
		47	*
		48	* @rdev: radeon_device pointer
		49	* @ring: radeon ring pointer
		50	*
		51	* Get the current rptr from the hardware (cayman+).
		52	*/
		53	uint32_t cayman_dma_get_rptr(struct radeon_device *rdev,
		54	struct radeon_ring *ring)
		55	{
		56	u32 rptr, reg;
		57
		58	if (rdev->wb.enabled) {
		59	rptr = rdev->wb.wb[ring->rptr_offs/4];
		60	} else {
		61	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
		62	reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET;
		63	else
		64	reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET;
		65
		66	rptr = RREG32(reg);
		67	}
		68
		69	return (rptr & 0x3fffc) >> 2;
		70	}
		71
		72	/**
		73	* cayman_dma_get_wptr - get the current write pointer
		74	*
		75	* @rdev: radeon_device pointer
		76	* @ring: radeon ring pointer
		77	*
		78	* Get the current wptr from the hardware (cayman+).
		79	*/
		80	uint32_t cayman_dma_get_wptr(struct radeon_device *rdev,
		81	struct radeon_ring *ring)
		82	{
		83	u32 reg;
		84
		85	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
		86	reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
		87	else
		88	reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
		89
		90	return (RREG32(reg) & 0x3fffc) >> 2;
		91	}
		92
		93	/**
		94	* cayman_dma_set_wptr - commit the write pointer
		95	*
		96	* @rdev: radeon_device pointer
		97	* @ring: radeon ring pointer
		98	*
		99	* Write the wptr back to the hardware (cayman+).
		100	*/
		101	void cayman_dma_set_wptr(struct radeon_device *rdev,
		102	struct radeon_ring *ring)
		103	{
		104	u32 reg;
		105
		106	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
		107	reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
		108	else
		109	reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
		110
		111	WREG32(reg, (ring->wptr << 2) & 0x3fffc);
		112	}
		113
		114	/**
		115	* cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
		116	*
		117	* @rdev: radeon_device pointer
		118	* @ib: IB object to schedule
		119	*
		120	* Schedule an IB in the DMA ring (cayman-SI).
		121	*/
		122	void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
		123	struct radeon_ib *ib)
		124	{
		125	struct radeon_ring *ring = &rdev->ring[ib->ring];
5271	serge	126	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
5078	serge	127
		128	if (rdev->wb.enabled) {
		129	u32 next_rptr = ring->wptr + 4;
		130	while ((next_rptr & 7) != 5)
		131	next_rptr++;
		132	next_rptr += 3;
		133	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
		134	radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
		135	radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
		136	radeon_ring_write(ring, next_rptr);
		137	}
		138
		139	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
		140	* Pad as necessary with NOPs.
		141	*/
		142	while ((ring->wptr & 7) != 5)
		143	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
5271	serge	144	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
5078	serge	145	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
		146	radeon_ring_write(ring, (ib->length_dw << 12) \| (upper_32_bits(ib->gpu_addr) & 0xFF));
		147
		148	}
		149
		150	/**
		151	* cayman_dma_stop - stop the async dma engines
		152	*
		153	* @rdev: radeon_device pointer
		154	*
		155	* Stop the async dma engines (cayman-SI).
		156	*/
		157	void cayman_dma_stop(struct radeon_device *rdev)
		158	{
		159	u32 rb_cntl;
		160
		161	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) \|\|
		162	(rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
		163	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
		164
		165	/* dma0 */
		166	rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
		167	rb_cntl &= ~DMA_RB_ENABLE;
		168	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
		169
		170	/* dma1 */
		171	rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
		172	rb_cntl &= ~DMA_RB_ENABLE;
		173	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
		174
		175	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
		176	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
		177	}
		178
		179	/**
		180	* cayman_dma_resume - setup and start the async dma engines
		181	*
		182	* @rdev: radeon_device pointer
		183	*
		184	* Set up the DMA ring buffers and enable them. (cayman-SI).
		185	* Returns 0 for success, error for failure.
		186	*/
		187	int cayman_dma_resume(struct radeon_device *rdev)
		188	{
		189	struct radeon_ring *ring;
		190	u32 rb_cntl, dma_cntl, ib_cntl;
		191	u32 rb_bufsz;
		192	u32 reg_offset, wb_offset;
		193	int i, r;
		194
		195	for (i = 0; i < 2; i++) {
		196	if (i == 0) {
		197	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
		198	reg_offset = DMA0_REGISTER_OFFSET;
		199	wb_offset = R600_WB_DMA_RPTR_OFFSET;
		200	} else {
		201	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
		202	reg_offset = DMA1_REGISTER_OFFSET;
		203	wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
		204	}
		205
		206	WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
		207	WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
		208
		209	/* Set ring buffer size in dwords */
		210	rb_bufsz = order_base_2(ring->ring_size / 4);
		211	rb_cntl = rb_bufsz << 1;
		212	#ifdef __BIG_ENDIAN
		213	rb_cntl \|= DMA_RB_SWAP_ENABLE \| DMA_RPTR_WRITEBACK_SWAP_ENABLE;
		214	#endif
		215	WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
		216
		217	/* Initialize the ring buffer's read and write pointers */
		218	WREG32(DMA_RB_RPTR + reg_offset, 0);
		219	WREG32(DMA_RB_WPTR + reg_offset, 0);
		220
		221	/* set the wb address whether it's enabled or not */
		222	WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
		223	upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
		224	WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
		225	((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
		226
		227	if (rdev->wb.enabled)
		228	rb_cntl \|= DMA_RPTR_WRITEBACK_ENABLE;
		229
		230	WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
		231
		232	/* enable DMA IBs */
		233	ib_cntl = DMA_IB_ENABLE \| CMD_VMID_FORCE;
		234	#ifdef __BIG_ENDIAN
		235	ib_cntl \|= DMA_IB_SWAP_ENABLE;
		236	#endif
		237	WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
		238
		239	dma_cntl = RREG32(DMA_CNTL + reg_offset);
		240	dma_cntl &= ~CTXEMPTY_INT_ENABLE;
		241	WREG32(DMA_CNTL + reg_offset, dma_cntl);
		242
		243	ring->wptr = 0;
		244	WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
		245
		246	WREG32(DMA_RB_CNTL + reg_offset, rb_cntl \| DMA_RB_ENABLE);
		247
		248	ring->ready = true;
		249
		250	r = radeon_ring_test(rdev, ring->idx, ring);
		251	if (r) {
		252	ring->ready = false;
		253	return r;
		254	}
		255	}
		256
		257	if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) \|\|
		258	(rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
		259	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
		260
		261	return 0;
		262	}
		263
		264	/**
		265	* cayman_dma_fini - tear down the async dma engines
		266	*
		267	* @rdev: radeon_device pointer
		268	*
		269	* Stop the async dma engines and free the rings (cayman-SI).
		270	*/
		271	void cayman_dma_fini(struct radeon_device *rdev)
		272	{
		273	cayman_dma_stop(rdev);
		274	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
		275	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
		276	}
		277
		278	/**
		279	* cayman_dma_is_lockup - Check if the DMA engine is locked up
		280	*
		281	* @rdev: radeon_device pointer
		282	* @ring: radeon_ring structure holding ring information
		283	*
		284	* Check if the async DMA engine is locked up.
		285	* Returns true if the engine appears to be locked up, false if not.
		286	*/
		287	bool cayman_dma_is_lockup(struct radeon_device rdev, struct radeon_ring ring)
		288	{
		289	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
		290	u32 mask;
		291
		292	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
		293	mask = RADEON_RESET_DMA;
		294	else
		295	mask = RADEON_RESET_DMA1;
		296
		297	if (!(reset_mask & mask)) {
		298	radeon_ring_lockup_update(rdev, ring);
		299	return false;
		300	}
		301	return radeon_ring_test_lockup(rdev, ring);
		302	}
		303
		304	/**
		305	* cayman_dma_vm_copy_pages - update PTEs by copying them from the GART
		306	*
		307	* @rdev: radeon_device pointer
		308	* @ib: indirect buffer to fill with commands
		309	* @pe: addr of the page entry
		310	* @src: src addr where to copy from
		311	* @count: number of page entries to update
		312	*
		313	* Update PTEs by copying them from the GART using the DMA (cayman/TN).
		314	*/
		315	void cayman_dma_vm_copy_pages(struct radeon_device *rdev,
		316	struct radeon_ib *ib,
		317	uint64_t pe, uint64_t src,
		318	unsigned count)
		319	{
		320	unsigned ndw;
		321
		322	while (count) {
		323	ndw = count * 2;
		324	if (ndw > 0xFFFFE)
		325	ndw = 0xFFFFE;
		326
		327	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
		328	0, 0, ndw);
		329	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
		330	ib->ptr[ib->length_dw++] = lower_32_bits(src);
		331	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		332	ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
		333
		334	pe += ndw * 4;
		335	src += ndw * 4;
		336	count -= ndw / 2;
		337	}
		338	}
		339
		340	/**
		341	* cayman_dma_vm_write_pages - update PTEs by writing them manually
		342	*
		343	* @rdev: radeon_device pointer
		344	* @ib: indirect buffer to fill with commands
		345	* @pe: addr of the page entry
		346	* @addr: dst addr to write into pe
		347	* @count: number of page entries to update
		348	* @incr: increase next addr by incr bytes
6104	serge	349	* @flags: hw access flags
5078	serge	350	*
		351	* Update PTEs by writing them manually using the DMA (cayman/TN).
		352	*/
		353	void cayman_dma_vm_write_pages(struct radeon_device *rdev,
6104	serge	354	struct radeon_ib *ib,
		355	uint64_t pe,
		356	uint64_t addr, unsigned count,
		357	uint32_t incr, uint32_t flags)
5078	serge	358	{
		359	uint64_t value;
		360	unsigned ndw;
		361
6104	serge	362	while (count) {
		363	ndw = count * 2;
		364	if (ndw > 0xFFFFE)
		365	ndw = 0xFFFFE;
5078	serge	366
6104	serge	367	/* for non-physically contiguous pages (system) */
5078	serge	368	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE,
		369	0, 0, ndw);
6104	serge	370	ib->ptr[ib->length_dw++] = pe;
		371	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		372	for (; ndw > 0; ndw -= 2, --count, pe += 8) {
		373	if (flags & R600_PTE_SYSTEM) {
		374	value = radeon_vm_map_gart(rdev, addr);
		375	} else if (flags & R600_PTE_VALID) {
		376	value = addr;
		377	} else {
		378	value = 0;
5078	serge	379	}
6104	serge	380	addr += incr;
		381	value \|= flags;
		382	ib->ptr[ib->length_dw++] = value;
		383	ib->ptr[ib->length_dw++] = upper_32_bits(value);
5078	serge	384	}
6104	serge	385	}
5078	serge	386	}
		387
		388	/**
		389	* cayman_dma_vm_set_pages - update the page tables using the DMA
		390	*
		391	* @rdev: radeon_device pointer
		392	* @ib: indirect buffer to fill with commands
		393	* @pe: addr of the page entry
		394	* @addr: dst addr to write into pe
		395	* @count: number of page entries to update
		396	* @incr: increase next addr by incr bytes
		397	* @flags: hw access flags
		398	*
		399	* Update the page tables using the DMA (cayman/TN).
		400	*/
		401	void cayman_dma_vm_set_pages(struct radeon_device *rdev,
		402	struct radeon_ib *ib,
		403	uint64_t pe,
		404	uint64_t addr, unsigned count,
		405	uint32_t incr, uint32_t flags)
		406	{
		407	uint64_t value;
		408	unsigned ndw;
		409
6104	serge	410	while (count) {
		411	ndw = count * 2;
		412	if (ndw > 0xFFFFE)
		413	ndw = 0xFFFFE;
5078	serge	414
6104	serge	415	if (flags & R600_PTE_VALID)
		416	value = addr;
		417	else
		418	value = 0;
5078	serge	419
6104	serge	420	/* for physically contiguous pages (vram) */
		421	ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
		422	ib->ptr[ib->length_dw++] = pe; /* dst addr */
		423	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		424	ib->ptr[ib->length_dw++] = flags; /* mask */
		425	ib->ptr[ib->length_dw++] = 0;
		426	ib->ptr[ib->length_dw++] = value; /* value */
		427	ib->ptr[ib->length_dw++] = upper_32_bits(value);
		428	ib->ptr[ib->length_dw++] = incr; /* increment size */
		429	ib->ptr[ib->length_dw++] = 0;
5078	serge	430
6104	serge	431	pe += ndw * 4;
		432	addr += (ndw / 2) * incr;
		433	count -= ndw / 2;
		434	}
5078	serge	435	}
		436
		437	/**
		438	* cayman_dma_vm_pad_ib - pad the IB to the required number of dw
		439	*
		440	* @ib: indirect buffer to fill with padding
		441	*
		442	*/
		443	void cayman_dma_vm_pad_ib(struct radeon_ib *ib)
		444	{
		445	while (ib->length_dw & 0x7)
		446	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
		447	}
		448
5271	serge	449	void cayman_dma_vm_flush(struct radeon_device rdev, struct radeon_ring ring,
		450	unsigned vm_id, uint64_t pd_addr)
5078	serge	451	{
		452	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
5271	serge	453	radeon_ring_write(ring, (0xf << 16) \| ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
		454	radeon_ring_write(ring, pd_addr >> 12);
5078	serge	455
		456	/* flush hdp cache */
		457	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
		458	radeon_ring_write(ring, (0xf << 16) \| (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
		459	radeon_ring_write(ring, 1);
		460
		461	/* bits 0-7 are the VM contexts0-7 */
		462	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
		463	radeon_ring_write(ring, (0xf << 16) \| (VM_INVALIDATE_REQUEST >> 2));
5271	serge	464	radeon_ring_write(ring, 1 << vm_id);
6104	serge	465
		466	/* wait for invalidate to complete */
		467	radeon_ring_write(ring, DMA_SRBM_READ_PACKET);
		468	radeon_ring_write(ring, (0xff << 20) \| (VM_INVALIDATE_REQUEST >> 2));
		469	radeon_ring_write(ring, 0); /* mask */
		470	radeon_ring_write(ring, 0); /* value */
5078	serge	471	}
		472

Subversion Repositories Kolibri OS

(root)/drivers/video/drm/radeon/ni_dma.c – Rev 6104