WebSVN – Kolibri OS – Diff – /drivers/video/drm/radeon/si_dma.c


/*
 * Copyright 2013 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Alex Deucher
 */
#include 
#include "radeon.h"
#include "radeon_asic.h"
#include "radeon_trace.h"
#include "sid.h"
 
u32 si_gpu_check_soft_reset(struct radeon_device *rdev);
 
/**
 * si_dma_is_lockup - Check if the DMA engine is locked up
 *
 * @rdev: radeon_device pointer
 * @ring: radeon_ring structure holding ring information
 *
 * Check if the async DMA engine is locked up.
 * Returns true if the engine appears to be locked up, false if not.
 */
bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
{
	u32 reset_mask = si_gpu_check_soft_reset(rdev);
	u32 mask;
 
	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
		mask = RADEON_RESET_DMA;
	else
		mask = RADEON_RESET_DMA1;
 
	if (!(reset_mask & mask)) {
		radeon_ring_lockup_update(rdev, ring);
		return false;
	}
	return radeon_ring_test_lockup(rdev, ring);
}
 
/**
 * si_dma_vm_copy_pages - update PTEs by copying them from the GART
 *
 * @rdev: radeon_device pointer
 * @ib: indirect buffer to fill with commands
 * @pe: addr of the page entry
 * @src: src addr where to copy from
 * @count: number of page entries to update
 *
 * Update PTEs by copying them from the GART using the DMA (SI).
 */
void si_dma_vm_copy_pages(struct radeon_device *rdev,
			  struct radeon_ib *ib,
			  uint64_t pe, uint64_t src,
			  unsigned count)
{
	while (count) {
		unsigned bytes = count * 8;
		if (bytes > 0xFFFF8)
			bytes = 0xFFFF8;
 
		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
						      1, 0, 0, bytes);
		ib->ptr[ib->length_dw++] = lower_32_bits(pe);
		ib->ptr[ib->length_dw++] = lower_32_bits(src);
		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
 
		pe += bytes;
		src += bytes;
		count -= bytes / 8;
	}
}
 
/**
 * si_dma_vm_write_pages - update PTEs by writing them manually
 *
 * @rdev: radeon_device pointer
 * @ib: indirect buffer to fill with commands
 * @pe: addr of the page entry
 * @addr: dst addr to write into pe
 * @count: number of page entries to update
 * @incr: increase next addr by incr bytes
 * @flags: access flags
 *
 * Update PTEs by writing them manually using the DMA (SI).
 */
void si_dma_vm_write_pages(struct radeon_device *rdev,
			   struct radeon_ib *ib,
			   uint64_t pe,
			   uint64_t addr, unsigned count,
			   uint32_t incr, uint32_t flags)
{
	uint64_t value;
	unsigned ndw;
 
	while (count) {
		ndw = count * 2;
		if (ndw > 0xFFFFE)
			ndw = 0xFFFFE;
 
		/* for non-physically contiguous pages (system) */
		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
		ib->ptr[ib->length_dw++] = pe;
		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
			if (flags & R600_PTE_SYSTEM) {
				value = radeon_vm_map_gart(rdev, addr);
 
			} else if (flags & R600_PTE_VALID) {
				value = addr;
			} else {
				value = 0;
			}
			addr += incr;
			value |= flags;
			ib->ptr[ib->length_dw++] = value;
			ib->ptr[ib->length_dw++] = upper_32_bits(value);
		}
	}
}
 
/**
 * si_dma_vm_set_pages - update the page tables using the DMA
 *
 * @rdev: radeon_device pointer
 * @ib: indirect buffer to fill with commands
 * @pe: addr of the page entry
 * @addr: dst addr to write into pe
 * @count: number of page entries to update
 * @incr: increase next addr by incr bytes
 * @flags: access flags
 *
 * Update the page tables using the DMA (SI).
 */
void si_dma_vm_set_pages(struct radeon_device *rdev,
			 struct radeon_ib *ib,
			 uint64_t pe,
			 uint64_t addr, unsigned count,
			 uint32_t incr, uint32_t flags)
{
	uint64_t value;
	unsigned ndw;
 
	while (count) {
		ndw = count * 2;
		if (ndw > 0xFFFFE)
			ndw = 0xFFFFE;
 
		if (flags & R600_PTE_VALID)
			value = addr;
		else
			value = 0;
 
		/* for physically contiguous pages (vram) */
		ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
		ib->ptr[ib->length_dw++] = pe; /* dst addr */
		ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
		ib->ptr[ib->length_dw++] = flags; /* mask */
		ib->ptr[ib->length_dw++] = 0;
		ib->ptr[ib->length_dw++] = value; /* value */
		ib->ptr[ib->length_dw++] = upper_32_bits(value);
		ib->ptr[ib->length_dw++] = incr; /* increment size */
		ib->ptr[ib->length_dw++] = 0;
		pe += ndw * 4;
		addr += (ndw / 2) * incr;
		count -= ndw / 2;
	}
}
 
void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
		     unsigned vm_id, uint64_t pd_addr)
 
{
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
	if (vm_id < 8) {
		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
	} else {
		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));
	}
	radeon_ring_write(ring, pd_addr >> 12);
 
	/* flush hdp cache */
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
	radeon_ring_write(ring, 1);
 
	/* bits 0-7 are the VM contexts0-7 */
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
	radeon_ring_write(ring, 1 << vm_id);
 
	/* wait for invalidate to complete */
	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
	radeon_ring_write(ring, VM_INVALIDATE_REQUEST);
	radeon_ring_write(ring, 0xff << 16); /* retry */
	radeon_ring_write(ring, 1 << vm_id); /* mask */
	radeon_ring_write(ring, 0); /* value */
	radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
}
 
/**
 * si_copy_dma - copy pages using the DMA engine
 *
 * @rdev: radeon_device pointer
 * @src_offset: src GPU address
 * @dst_offset: dst GPU address
 * @num_gpu_pages: number of GPU pages to xfer
 * @resv: reservation object to sync to
 *
 * Copy GPU paging using the DMA engine (SI).
 * Used by the radeon ttm implementation to move pages if
 * registered as the asic copy callback.
 */
struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
				 uint64_t src_offset, uint64_t dst_offset,
				 unsigned num_gpu_pages,
				 struct reservation_object *resv)
{
	struct radeon_fence *fence;
	struct radeon_sync sync;
	int ring_index = rdev->asic->copy.dma_ring_index;
	struct radeon_ring *ring = &rdev->ring[ring_index];
	u32 size_in_bytes, cur_size_in_bytes;
	int i, num_loops;
	int r = 0;
 
	radeon_sync_create(&sync);
 
	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
	if (r) {
		DRM_ERROR("radeon: moving bo (%d).\n", r);
		radeon_sync_free(rdev, &sync, NULL);
		return ERR_PTR(r);
	}
 
	radeon_sync_resv(rdev, &sync, resv, false);
	radeon_sync_rings(rdev, &sync, ring->idx);
 
	for (i = 0; i < num_loops; i++) {
		cur_size_in_bytes = size_in_bytes;
		if (cur_size_in_bytes > 0xFFFFF)
			cur_size_in_bytes = 0xFFFFF;
		size_in_bytes -= cur_size_in_bytes;
		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
		radeon_ring_write(ring, lower_32_bits(dst_offset));
		radeon_ring_write(ring, lower_32_bits(src_offset));
		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
		src_offset += cur_size_in_bytes;
		dst_offset += cur_size_in_bytes;
	}
 
	r = radeon_fence_emit(rdev, &fence, ring->idx);
	if (r) {
		radeon_ring_unlock_undo(rdev, ring);
		radeon_sync_free(rdev, &sync, NULL);
		return ERR_PTR(r);
	}
 
	radeon_ring_unlock_commit(rdev, ring, false);
	radeon_sync_free(rdev, &sync, fence);
 
	return fence;
}

Subversion Repositories Kolibri OS

(root)/drivers/video/drm/radeon/si_dma.c – Rev 5271 → 6104

Rev 5271		Rev 6104
1	/*	1	/*
2	* Copyright 2013 Advanced Micro Devices, Inc.	2	* Copyright 2013 Advanced Micro Devices, Inc.
3	*	3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a	4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),	5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation	6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,	7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the	8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:	9	* Software is furnished to do so, subject to the following conditions:
10	*	10	*
11	* The above copyright notice and this permission notice shall be included in	11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.	12	* all copies or substantial portions of the Software.
13	*	13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR	14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,	15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL	16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR	17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,	18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR	19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.	20	* OTHER DEALINGS IN THE SOFTWARE.
21	*	21	*
22	* Authors: Alex Deucher	22	* Authors: Alex Deucher
23	*/	23	*/
24	#include	24	#include
25	#include "radeon.h"	25	#include "radeon.h"
26	#include "radeon_asic.h"	26	#include "radeon_asic.h"
27	#include "radeon_trace.h"	27	#include "radeon_trace.h"
28	#include "sid.h"	28	#include "sid.h"
29		29
30	u32 si_gpu_check_soft_reset(struct radeon_device *rdev);	30	u32 si_gpu_check_soft_reset(struct radeon_device *rdev);
31		31
32	/**	32	/**
33	* si_dma_is_lockup - Check if the DMA engine is locked up	33	* si_dma_is_lockup - Check if the DMA engine is locked up
34	*	34	*
35	* @rdev: radeon_device pointer	35	* @rdev: radeon_device pointer
36	* @ring: radeon_ring structure holding ring information	36	* @ring: radeon_ring structure holding ring information
37	*	37	*
38	* Check if the async DMA engine is locked up.	38	* Check if the async DMA engine is locked up.
39	* Returns true if the engine appears to be locked up, false if not.	39	* Returns true if the engine appears to be locked up, false if not.
40	*/	40	*/
41	bool si_dma_is_lockup(struct radeon_device rdev, struct radeon_ring ring)	41	bool si_dma_is_lockup(struct radeon_device rdev, struct radeon_ring ring)
42	{	42	{
43	u32 reset_mask = si_gpu_check_soft_reset(rdev);	43	u32 reset_mask = si_gpu_check_soft_reset(rdev);
44	u32 mask;	44	u32 mask;
45		45
46	if (ring->idx == R600_RING_TYPE_DMA_INDEX)	46	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
47	mask = RADEON_RESET_DMA;	47	mask = RADEON_RESET_DMA;
48	else	48	else
49	mask = RADEON_RESET_DMA1;	49	mask = RADEON_RESET_DMA1;
50		50
51	if (!(reset_mask & mask)) {	51	if (!(reset_mask & mask)) {
52	radeon_ring_lockup_update(rdev, ring);	52	radeon_ring_lockup_update(rdev, ring);
53	return false;	53	return false;
54	}	54	}
55	return radeon_ring_test_lockup(rdev, ring);	55	return radeon_ring_test_lockup(rdev, ring);
56	}	56	}
57		57
58	/**	58	/**
59	* si_dma_vm_copy_pages - update PTEs by copying them from the GART	59	* si_dma_vm_copy_pages - update PTEs by copying them from the GART
60	*	60	*
61	* @rdev: radeon_device pointer	61	* @rdev: radeon_device pointer
62	* @ib: indirect buffer to fill with commands	62	* @ib: indirect buffer to fill with commands
63	* @pe: addr of the page entry	63	* @pe: addr of the page entry
64	* @src: src addr where to copy from	64	* @src: src addr where to copy from
65	* @count: number of page entries to update	65	* @count: number of page entries to update
66	*	66	*
67	* Update PTEs by copying them from the GART using the DMA (SI).	67	* Update PTEs by copying them from the GART using the DMA (SI).
68	*/	68	*/
69	void si_dma_vm_copy_pages(struct radeon_device *rdev,	69	void si_dma_vm_copy_pages(struct radeon_device *rdev,
70	struct radeon_ib *ib,	70	struct radeon_ib *ib,
71	uint64_t pe, uint64_t src,	71	uint64_t pe, uint64_t src,
72	unsigned count)	72	unsigned count)
73	{	73	{
74	while (count) {	74	while (count) {
75	unsigned bytes = count * 8;	75	unsigned bytes = count * 8;
76	if (bytes > 0xFFFF8)	76	if (bytes > 0xFFFF8)
77	bytes = 0xFFFF8;	77	bytes = 0xFFFF8;
78		78
79	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,	79	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
80	1, 0, 0, bytes);	80	1, 0, 0, bytes);
81	ib->ptr[ib->length_dw++] = lower_32_bits(pe);	81	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
82	ib->ptr[ib->length_dw++] = lower_32_bits(src);	82	ib->ptr[ib->length_dw++] = lower_32_bits(src);
83	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;	83	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
84	ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;	84	ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
85		85
86	pe += bytes;	86	pe += bytes;
87	src += bytes;	87	src += bytes;
88	count -= bytes / 8;	88	count -= bytes / 8;
89	}	89	}
90	}	90	}
91		91
92	/**	92	/**
93	* si_dma_vm_write_pages - update PTEs by writing them manually	93	* si_dma_vm_write_pages - update PTEs by writing them manually
94	*	94	*
95	* @rdev: radeon_device pointer	95	* @rdev: radeon_device pointer
96	* @ib: indirect buffer to fill with commands	96	* @ib: indirect buffer to fill with commands
97	* @pe: addr of the page entry	97	* @pe: addr of the page entry
98	* @addr: dst addr to write into pe	98	* @addr: dst addr to write into pe
99	* @count: number of page entries to update	99	* @count: number of page entries to update
100	* @incr: increase next addr by incr bytes	100	* @incr: increase next addr by incr bytes
101	* @flags: access flags	101	* @flags: access flags
102	*	102	*
103	* Update PTEs by writing them manually using the DMA (SI).	103	* Update PTEs by writing them manually using the DMA (SI).
104	*/	104	*/
105	void si_dma_vm_write_pages(struct radeon_device *rdev,	105	void si_dma_vm_write_pages(struct radeon_device *rdev,
106	struct radeon_ib *ib,	106	struct radeon_ib *ib,
107	uint64_t pe,	107	uint64_t pe,
108	uint64_t addr, unsigned count,	108	uint64_t addr, unsigned count,
109	uint32_t incr, uint32_t flags)	109	uint32_t incr, uint32_t flags)
110	{	110	{
111	uint64_t value;	111	uint64_t value;
112	unsigned ndw;	112	unsigned ndw;
113		113
114	while (count) {	114	while (count) {
115	ndw = count * 2;	115	ndw = count * 2;
116	if (ndw > 0xFFFFE)	116	if (ndw > 0xFFFFE)
117	ndw = 0xFFFFE;	117	ndw = 0xFFFFE;
118		118
119	/* for non-physically contiguous pages (system) */	119	/* for non-physically contiguous pages (system) */
120	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);	120	ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
121	ib->ptr[ib->length_dw++] = pe;	121	ib->ptr[ib->length_dw++] = pe;
122	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;	122	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
123	for (; ndw > 0; ndw -= 2, --count, pe += 8) {	123	for (; ndw > 0; ndw -= 2, --count, pe += 8) {
124	if (flags & R600_PTE_SYSTEM) {	124	if (flags & R600_PTE_SYSTEM) {
125	value = radeon_vm_map_gart(rdev, addr);	125	value = radeon_vm_map_gart(rdev, addr);
126	value &= 0xFFFFFFFFFFFFF000ULL;	-
127	} else if (flags & R600_PTE_VALID) {	126	} else if (flags & R600_PTE_VALID) {
128	value = addr;	127	value = addr;
129	} else {	128	} else {
130	value = 0;	129	value = 0;
131	}	130	}
132	addr += incr;	131	addr += incr;
133	value \|= flags;	132	value \|= flags;
134	ib->ptr[ib->length_dw++] = value;	133	ib->ptr[ib->length_dw++] = value;
135	ib->ptr[ib->length_dw++] = upper_32_bits(value);	134	ib->ptr[ib->length_dw++] = upper_32_bits(value);
136	}	135	}
137	}	136	}
138	}	137	}
139		138
140	/**	139	/**
141	* si_dma_vm_set_pages - update the page tables using the DMA	140	* si_dma_vm_set_pages - update the page tables using the DMA
142	*	141	*
143	* @rdev: radeon_device pointer	142	* @rdev: radeon_device pointer
144	* @ib: indirect buffer to fill with commands	143	* @ib: indirect buffer to fill with commands
145	* @pe: addr of the page entry	144	* @pe: addr of the page entry
146	* @addr: dst addr to write into pe	145	* @addr: dst addr to write into pe
147	* @count: number of page entries to update	146	* @count: number of page entries to update
148	* @incr: increase next addr by incr bytes	147	* @incr: increase next addr by incr bytes
149	* @flags: access flags	148	* @flags: access flags
150	*	149	*
151	* Update the page tables using the DMA (SI).	150	* Update the page tables using the DMA (SI).
152	*/	151	*/
153	void si_dma_vm_set_pages(struct radeon_device *rdev,	152	void si_dma_vm_set_pages(struct radeon_device *rdev,
154	struct radeon_ib *ib,	153	struct radeon_ib *ib,
155	uint64_t pe,	154	uint64_t pe,
156	uint64_t addr, unsigned count,	155	uint64_t addr, unsigned count,
157	uint32_t incr, uint32_t flags)	156	uint32_t incr, uint32_t flags)
158	{	157	{
159	uint64_t value;	158	uint64_t value;
160	unsigned ndw;	159	unsigned ndw;
161		160
162	while (count) {	161	while (count) {
163	ndw = count * 2;	162	ndw = count * 2;
164	if (ndw > 0xFFFFE)	163	if (ndw > 0xFFFFE)
165	ndw = 0xFFFFE;	164	ndw = 0xFFFFE;
166		165
167	if (flags & R600_PTE_VALID)	166	if (flags & R600_PTE_VALID)
168	value = addr;	167	value = addr;
169	else	168	else
170	value = 0;	169	value = 0;
171		170
172	/* for physically contiguous pages (vram) */	171	/* for physically contiguous pages (vram) */
173	ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);	172	ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
174	ib->ptr[ib->length_dw++] = pe; /* dst addr */	173	ib->ptr[ib->length_dw++] = pe; /* dst addr */
175	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;	174	ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
176	ib->ptr[ib->length_dw++] = flags; /* mask */	175	ib->ptr[ib->length_dw++] = flags; /* mask */
177	ib->ptr[ib->length_dw++] = 0;	176	ib->ptr[ib->length_dw++] = 0;
178	ib->ptr[ib->length_dw++] = value; /* value */	177	ib->ptr[ib->length_dw++] = value; /* value */
179	ib->ptr[ib->length_dw++] = upper_32_bits(value);	178	ib->ptr[ib->length_dw++] = upper_32_bits(value);
180	ib->ptr[ib->length_dw++] = incr; /* increment size */	179	ib->ptr[ib->length_dw++] = incr; /* increment size */
181	ib->ptr[ib->length_dw++] = 0;	180	ib->ptr[ib->length_dw++] = 0;
182	pe += ndw * 4;	181	pe += ndw * 4;
183	addr += (ndw / 2) * incr;	182	addr += (ndw / 2) * incr;
184	count -= ndw / 2;	183	count -= ndw / 2;
185	}	184	}
186	}	185	}
187		186
188	void si_dma_vm_flush(struct radeon_device rdev, struct radeon_ring ring,	187	void si_dma_vm_flush(struct radeon_device rdev, struct radeon_ring ring,
189	unsigned vm_id, uint64_t pd_addr)	188	unsigned vm_id, uint64_t pd_addr)
190		189
191	{	190	{
192	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));	191	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
193	if (vm_id < 8) {	192	if (vm_id < 8) {
194	radeon_ring_write(ring, (0xf << 16) \| ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));	193	radeon_ring_write(ring, (0xf << 16) \| ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
195	} else {	194	} else {
196	radeon_ring_write(ring, (0xf << 16) \| ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));	195	radeon_ring_write(ring, (0xf << 16) \| ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));
197	}	196	}
198	radeon_ring_write(ring, pd_addr >> 12);	197	radeon_ring_write(ring, pd_addr >> 12);
199		198
200	/* flush hdp cache */	199	/* flush hdp cache */
201	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));	200	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
202	radeon_ring_write(ring, (0xf << 16) \| (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));	201	radeon_ring_write(ring, (0xf << 16) \| (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
203	radeon_ring_write(ring, 1);	202	radeon_ring_write(ring, 1);
204		203
205	/* bits 0-7 are the VM contexts0-7 */	204	/* bits 0-7 are the VM contexts0-7 */
206	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));	205	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
207	radeon_ring_write(ring, (0xf << 16) \| (VM_INVALIDATE_REQUEST >> 2));	206	radeon_ring_write(ring, (0xf << 16) \| (VM_INVALIDATE_REQUEST >> 2));
208	radeon_ring_write(ring, 1 << vm_id);	207	radeon_ring_write(ring, 1 << vm_id);
-		208
-		209	/* wait for invalidate to complete */
-		210	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
-		211	radeon_ring_write(ring, VM_INVALIDATE_REQUEST);
-		212	radeon_ring_write(ring, 0xff << 16); /* retry */
-		213	radeon_ring_write(ring, 1 << vm_id); /* mask */
-		214	radeon_ring_write(ring, 0); /* value */
-		215	radeon_ring_write(ring, (0 << 28) \| 0x20); /* func(always) \| poll interval */
209	}	216	}
210		217
211	/**	218	/**
212	* si_copy_dma - copy pages using the DMA engine	219	* si_copy_dma - copy pages using the DMA engine
213	*	220	*
214	* @rdev: radeon_device pointer	221	* @rdev: radeon_device pointer
215	* @src_offset: src GPU address	222	* @src_offset: src GPU address
216	* @dst_offset: dst GPU address	223	* @dst_offset: dst GPU address
217	* @num_gpu_pages: number of GPU pages to xfer	224	* @num_gpu_pages: number of GPU pages to xfer
218	* @resv: reservation object to sync to	225	* @resv: reservation object to sync to
219	*	226	*
220	* Copy GPU paging using the DMA engine (SI).	227	* Copy GPU paging using the DMA engine (SI).
221	* Used by the radeon ttm implementation to move pages if	228	* Used by the radeon ttm implementation to move pages if
222	* registered as the asic copy callback.	229	* registered as the asic copy callback.
223	*/	230	*/
224	struct radeon_fence si_copy_dma(struct radeon_device rdev,	231	struct radeon_fence si_copy_dma(struct radeon_device rdev,
225	uint64_t src_offset, uint64_t dst_offset,	232	uint64_t src_offset, uint64_t dst_offset,
226	unsigned num_gpu_pages,	233	unsigned num_gpu_pages,
227	struct reservation_object *resv)	234	struct reservation_object *resv)
228	{	235	{
229	struct radeon_fence *fence;	236	struct radeon_fence *fence;
230	struct radeon_sync sync;	237	struct radeon_sync sync;
231	int ring_index = rdev->asic->copy.dma_ring_index;	238	int ring_index = rdev->asic->copy.dma_ring_index;
232	struct radeon_ring *ring = &rdev->ring[ring_index];	239	struct radeon_ring *ring = &rdev->ring[ring_index];
233	u32 size_in_bytes, cur_size_in_bytes;	240	u32 size_in_bytes, cur_size_in_bytes;
234	int i, num_loops;	241	int i, num_loops;
235	int r = 0;	242	int r = 0;
236		243
237	radeon_sync_create(&sync);	244	radeon_sync_create(&sync);
238		245
239	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);	246	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
240	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);	247	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
241	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);	248	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
242	if (r) {	249	if (r) {
243	DRM_ERROR("radeon: moving bo (%d).\n", r);	250	DRM_ERROR("radeon: moving bo (%d).\n", r);
244	radeon_sync_free(rdev, &sync, NULL);	251	radeon_sync_free(rdev, &sync, NULL);
245	return ERR_PTR(r);	252	return ERR_PTR(r);
246	}	253	}
247		254
248	radeon_sync_resv(rdev, &sync, resv, false);	255	radeon_sync_resv(rdev, &sync, resv, false);
249	radeon_sync_rings(rdev, &sync, ring->idx);	256	radeon_sync_rings(rdev, &sync, ring->idx);
250		257
251	for (i = 0; i < num_loops; i++) {	258	for (i = 0; i < num_loops; i++) {
252	cur_size_in_bytes = size_in_bytes;	259	cur_size_in_bytes = size_in_bytes;
253	if (cur_size_in_bytes > 0xFFFFF)	260	if (cur_size_in_bytes > 0xFFFFF)
254	cur_size_in_bytes = 0xFFFFF;	261	cur_size_in_bytes = 0xFFFFF;
255	size_in_bytes -= cur_size_in_bytes;	262	size_in_bytes -= cur_size_in_bytes;
256	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));	263	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
257	radeon_ring_write(ring, lower_32_bits(dst_offset));	264	radeon_ring_write(ring, lower_32_bits(dst_offset));
258	radeon_ring_write(ring, lower_32_bits(src_offset));	265	radeon_ring_write(ring, lower_32_bits(src_offset));
259	radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);	266	radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
260	radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);	267	radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
261	src_offset += cur_size_in_bytes;	268	src_offset += cur_size_in_bytes;
262	dst_offset += cur_size_in_bytes;	269	dst_offset += cur_size_in_bytes;
263	}	270	}
264		271
265	r = radeon_fence_emit(rdev, &fence, ring->idx);	272	r = radeon_fence_emit(rdev, &fence, ring->idx);
266	if (r) {	273	if (r) {
267	radeon_ring_unlock_undo(rdev, ring);	274	radeon_ring_unlock_undo(rdev, ring);
268	radeon_sync_free(rdev, &sync, NULL);	275	radeon_sync_free(rdev, &sync, NULL);
269	return ERR_PTR(r);	276	return ERR_PTR(r);
270	}	277	}
271		278
272	radeon_ring_unlock_commit(rdev, ring, false);	279	radeon_ring_unlock_commit(rdev, ring, false);
273	radeon_sync_free(rdev, &sync, fence);	280	radeon_sync_free(rdev, &sync, fence);
274		281
275	return fence;	282	return fence;
276	}	283	}