Rev 5139 | Rev 5271 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 5139 | Rev 5179 | ||
---|---|---|---|
1 | /* |
1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. |
2 | * Copyright 2013 Advanced Micro Devices, Inc. |
3 | * |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
12 | * all copies or substantial portions of the Software. |
13 | * |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
21 | * |
22 | * Authors: Alex Deucher |
22 | * Authors: Alex Deucher |
23 | */ |
23 | */ |
24 | #include |
24 | #include |
25 | #include "radeon.h" |
25 | #include "radeon.h" |
26 | #include "radeon_asic.h" |
26 | #include "radeon_asic.h" |
27 | #include "r600d.h" |
27 | #include "r600d.h" |
28 | 28 | ||
29 | u32 r600_gpu_check_soft_reset(struct radeon_device *rdev); |
29 | u32 r600_gpu_check_soft_reset(struct radeon_device *rdev); |
30 | 30 | ||
31 | /* |
31 | /* |
32 | * DMA |
32 | * DMA |
33 | * Starting with R600, the GPU has an asynchronous |
33 | * Starting with R600, the GPU has an asynchronous |
34 | * DMA engine. The programming model is very similar |
34 | * DMA engine. The programming model is very similar |
35 | * to the 3D engine (ring buffer, IBs, etc.), but the |
35 | * to the 3D engine (ring buffer, IBs, etc.), but the |
36 | * DMA controller has it's own packet format that is |
36 | * DMA controller has it's own packet format that is |
37 | * different form the PM4 format used by the 3D engine. |
37 | * different form the PM4 format used by the 3D engine. |
38 | * It supports copying data, writing embedded data, |
38 | * It supports copying data, writing embedded data, |
39 | * solid fills, and a number of other things. It also |
39 | * solid fills, and a number of other things. It also |
40 | * has support for tiling/detiling of buffers. |
40 | * has support for tiling/detiling of buffers. |
41 | */ |
41 | */ |
42 | 42 | ||
43 | /** |
43 | /** |
44 | * r600_dma_get_rptr - get the current read pointer |
44 | * r600_dma_get_rptr - get the current read pointer |
45 | * |
45 | * |
46 | * @rdev: radeon_device pointer |
46 | * @rdev: radeon_device pointer |
47 | * @ring: radeon ring pointer |
47 | * @ring: radeon ring pointer |
48 | * |
48 | * |
49 | * Get the current rptr from the hardware (r6xx+). |
49 | * Get the current rptr from the hardware (r6xx+). |
50 | */ |
50 | */ |
51 | uint32_t r600_dma_get_rptr(struct radeon_device *rdev, |
51 | uint32_t r600_dma_get_rptr(struct radeon_device *rdev, |
52 | struct radeon_ring *ring) |
52 | struct radeon_ring *ring) |
53 | { |
53 | { |
54 | u32 rptr; |
54 | u32 rptr; |
55 | 55 | ||
56 | if (rdev->wb.enabled) |
56 | if (rdev->wb.enabled) |
57 | rptr = rdev->wb.wb[ring->rptr_offs/4]; |
57 | rptr = rdev->wb.wb[ring->rptr_offs/4]; |
58 | else |
58 | else |
59 | rptr = RREG32(DMA_RB_RPTR); |
59 | rptr = RREG32(DMA_RB_RPTR); |
60 | 60 | ||
61 | return (rptr & 0x3fffc) >> 2; |
61 | return (rptr & 0x3fffc) >> 2; |
62 | } |
62 | } |
63 | 63 | ||
64 | /** |
64 | /** |
65 | * r600_dma_get_wptr - get the current write pointer |
65 | * r600_dma_get_wptr - get the current write pointer |
66 | * |
66 | * |
67 | * @rdev: radeon_device pointer |
67 | * @rdev: radeon_device pointer |
68 | * @ring: radeon ring pointer |
68 | * @ring: radeon ring pointer |
69 | * |
69 | * |
70 | * Get the current wptr from the hardware (r6xx+). |
70 | * Get the current wptr from the hardware (r6xx+). |
71 | */ |
71 | */ |
72 | uint32_t r600_dma_get_wptr(struct radeon_device *rdev, |
72 | uint32_t r600_dma_get_wptr(struct radeon_device *rdev, |
73 | struct radeon_ring *ring) |
73 | struct radeon_ring *ring) |
74 | { |
74 | { |
75 | return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2; |
75 | return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2; |
76 | } |
76 | } |
77 | 77 | ||
78 | /** |
78 | /** |
79 | * r600_dma_set_wptr - commit the write pointer |
79 | * r600_dma_set_wptr - commit the write pointer |
80 | * |
80 | * |
81 | * @rdev: radeon_device pointer |
81 | * @rdev: radeon_device pointer |
82 | * @ring: radeon ring pointer |
82 | * @ring: radeon ring pointer |
83 | * |
83 | * |
84 | * Write the wptr back to the hardware (r6xx+). |
84 | * Write the wptr back to the hardware (r6xx+). |
85 | */ |
85 | */ |
86 | void r600_dma_set_wptr(struct radeon_device *rdev, |
86 | void r600_dma_set_wptr(struct radeon_device *rdev, |
87 | struct radeon_ring *ring) |
87 | struct radeon_ring *ring) |
88 | { |
88 | { |
89 | WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc); |
89 | WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc); |
90 | } |
90 | } |
91 | 91 | ||
92 | /** |
92 | /** |
93 | * r600_dma_stop - stop the async dma engine |
93 | * r600_dma_stop - stop the async dma engine |
94 | * |
94 | * |
95 | * @rdev: radeon_device pointer |
95 | * @rdev: radeon_device pointer |
96 | * |
96 | * |
97 | * Stop the async dma engine (r6xx-evergreen). |
97 | * Stop the async dma engine (r6xx-evergreen). |
98 | */ |
98 | */ |
99 | void r600_dma_stop(struct radeon_device *rdev) |
99 | void r600_dma_stop(struct radeon_device *rdev) |
100 | { |
100 | { |
101 | u32 rb_cntl = RREG32(DMA_RB_CNTL); |
101 | u32 rb_cntl = RREG32(DMA_RB_CNTL); |
102 | 102 | ||
103 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
103 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
104 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
104 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
105 | 105 | ||
106 | rb_cntl &= ~DMA_RB_ENABLE; |
106 | rb_cntl &= ~DMA_RB_ENABLE; |
107 | WREG32(DMA_RB_CNTL, rb_cntl); |
107 | WREG32(DMA_RB_CNTL, rb_cntl); |
108 | 108 | ||
109 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; |
109 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; |
110 | } |
110 | } |
111 | 111 | ||
112 | /** |
112 | /** |
113 | * r600_dma_resume - setup and start the async dma engine |
113 | * r600_dma_resume - setup and start the async dma engine |
114 | * |
114 | * |
115 | * @rdev: radeon_device pointer |
115 | * @rdev: radeon_device pointer |
116 | * |
116 | * |
117 | * Set up the DMA ring buffer and enable it. (r6xx-evergreen). |
117 | * Set up the DMA ring buffer and enable it. (r6xx-evergreen). |
118 | * Returns 0 for success, error for failure. |
118 | * Returns 0 for success, error for failure. |
119 | */ |
119 | */ |
120 | int r600_dma_resume(struct radeon_device *rdev) |
120 | int r600_dma_resume(struct radeon_device *rdev) |
121 | { |
121 | { |
122 | struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; |
122 | struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; |
123 | u32 rb_cntl, dma_cntl, ib_cntl; |
123 | u32 rb_cntl, dma_cntl, ib_cntl; |
124 | u32 rb_bufsz; |
124 | u32 rb_bufsz; |
125 | int r; |
125 | int r; |
126 | 126 | ||
127 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); |
127 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); |
128 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); |
128 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); |
129 | 129 | ||
130 | /* Set ring buffer size in dwords */ |
130 | /* Set ring buffer size in dwords */ |
131 | rb_bufsz = order_base_2(ring->ring_size / 4); |
131 | rb_bufsz = order_base_2(ring->ring_size / 4); |
132 | rb_cntl = rb_bufsz << 1; |
132 | rb_cntl = rb_bufsz << 1; |
133 | #ifdef __BIG_ENDIAN |
133 | #ifdef __BIG_ENDIAN |
134 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; |
134 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; |
135 | #endif |
135 | #endif |
136 | WREG32(DMA_RB_CNTL, rb_cntl); |
136 | WREG32(DMA_RB_CNTL, rb_cntl); |
137 | 137 | ||
138 | /* Initialize the ring buffer's read and write pointers */ |
138 | /* Initialize the ring buffer's read and write pointers */ |
139 | WREG32(DMA_RB_RPTR, 0); |
139 | WREG32(DMA_RB_RPTR, 0); |
140 | WREG32(DMA_RB_WPTR, 0); |
140 | WREG32(DMA_RB_WPTR, 0); |
141 | 141 | ||
142 | /* set the wb address whether it's enabled or not */ |
142 | /* set the wb address whether it's enabled or not */ |
143 | WREG32(DMA_RB_RPTR_ADDR_HI, |
143 | WREG32(DMA_RB_RPTR_ADDR_HI, |
144 | upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); |
144 | upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); |
145 | WREG32(DMA_RB_RPTR_ADDR_LO, |
145 | WREG32(DMA_RB_RPTR_ADDR_LO, |
146 | ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); |
146 | ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); |
147 | 147 | ||
148 | if (rdev->wb.enabled) |
148 | if (rdev->wb.enabled) |
149 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; |
149 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; |
150 | 150 | ||
151 | WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); |
151 | WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); |
152 | 152 | ||
153 | /* enable DMA IBs */ |
153 | /* enable DMA IBs */ |
154 | ib_cntl = DMA_IB_ENABLE; |
154 | ib_cntl = DMA_IB_ENABLE; |
155 | #ifdef __BIG_ENDIAN |
155 | #ifdef __BIG_ENDIAN |
156 | ib_cntl |= DMA_IB_SWAP_ENABLE; |
156 | ib_cntl |= DMA_IB_SWAP_ENABLE; |
157 | #endif |
157 | #endif |
158 | WREG32(DMA_IB_CNTL, ib_cntl); |
158 | WREG32(DMA_IB_CNTL, ib_cntl); |
159 | 159 | ||
160 | dma_cntl = RREG32(DMA_CNTL); |
160 | dma_cntl = RREG32(DMA_CNTL); |
161 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; |
161 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; |
162 | WREG32(DMA_CNTL, dma_cntl); |
162 | WREG32(DMA_CNTL, dma_cntl); |
163 | 163 | ||
164 | if (rdev->family >= CHIP_RV770) |
164 | if (rdev->family >= CHIP_RV770) |
165 | WREG32(DMA_MODE, 1); |
165 | WREG32(DMA_MODE, 1); |
166 | 166 | ||
167 | ring->wptr = 0; |
167 | ring->wptr = 0; |
168 | WREG32(DMA_RB_WPTR, ring->wptr << 2); |
168 | WREG32(DMA_RB_WPTR, ring->wptr << 2); |
169 | 169 | ||
170 | WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); |
170 | WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); |
171 | 171 | ||
172 | ring->ready = true; |
172 | ring->ready = true; |
173 | 173 | ||
174 | r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); |
174 | r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); |
175 | if (r) { |
175 | if (r) { |
176 | ring->ready = false; |
176 | ring->ready = false; |
177 | return r; |
177 | return r; |
178 | } |
178 | } |
179 | 179 | ||
180 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
180 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
181 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); |
181 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); |
182 | 182 | ||
183 | return 0; |
183 | return 0; |
184 | } |
184 | } |
185 | 185 | ||
186 | /** |
186 | /** |
187 | * r600_dma_fini - tear down the async dma engine |
187 | * r600_dma_fini - tear down the async dma engine |
188 | * |
188 | * |
189 | * @rdev: radeon_device pointer |
189 | * @rdev: radeon_device pointer |
190 | * |
190 | * |
191 | * Stop the async dma engine and free the ring (r6xx-evergreen). |
191 | * Stop the async dma engine and free the ring (r6xx-evergreen). |
192 | */ |
192 | */ |
193 | void r600_dma_fini(struct radeon_device *rdev) |
193 | void r600_dma_fini(struct radeon_device *rdev) |
194 | { |
194 | { |
195 | r600_dma_stop(rdev); |
195 | r600_dma_stop(rdev); |
196 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); |
196 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); |
197 | } |
197 | } |
198 | 198 | ||
199 | /** |
199 | /** |
200 | * r600_dma_is_lockup - Check if the DMA engine is locked up |
200 | * r600_dma_is_lockup - Check if the DMA engine is locked up |
201 | * |
201 | * |
202 | * @rdev: radeon_device pointer |
202 | * @rdev: radeon_device pointer |
203 | * @ring: radeon_ring structure holding ring information |
203 | * @ring: radeon_ring structure holding ring information |
204 | * |
204 | * |
205 | * Check if the async DMA engine is locked up. |
205 | * Check if the async DMA engine is locked up. |
206 | * Returns true if the engine appears to be locked up, false if not. |
206 | * Returns true if the engine appears to be locked up, false if not. |
207 | */ |
207 | */ |
208 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) |
208 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) |
209 | { |
209 | { |
210 | u32 reset_mask = r600_gpu_check_soft_reset(rdev); |
210 | u32 reset_mask = r600_gpu_check_soft_reset(rdev); |
211 | 211 | ||
212 | if (!(reset_mask & RADEON_RESET_DMA)) { |
212 | if (!(reset_mask & RADEON_RESET_DMA)) { |
213 | radeon_ring_lockup_update(rdev, ring); |
213 | radeon_ring_lockup_update(rdev, ring); |
214 | return false; |
214 | return false; |
215 | } |
215 | } |
216 | return radeon_ring_test_lockup(rdev, ring); |
216 | return radeon_ring_test_lockup(rdev, ring); |
217 | } |
217 | } |
218 | 218 | ||
219 | 219 | ||
220 | /** |
220 | /** |
221 | * r600_dma_ring_test - simple async dma engine test |
221 | * r600_dma_ring_test - simple async dma engine test |
222 | * |
222 | * |
223 | * @rdev: radeon_device pointer |
223 | * @rdev: radeon_device pointer |
224 | * @ring: radeon_ring structure holding ring information |
224 | * @ring: radeon_ring structure holding ring information |
225 | * |
225 | * |
226 | * Test the DMA engine by writing using it to write an |
226 | * Test the DMA engine by writing using it to write an |
227 | * value to memory. (r6xx-SI). |
227 | * value to memory. (r6xx-SI). |
228 | * Returns 0 for success, error for failure. |
228 | * Returns 0 for success, error for failure. |
229 | */ |
229 | */ |
230 | int r600_dma_ring_test(struct radeon_device *rdev, |
230 | int r600_dma_ring_test(struct radeon_device *rdev, |
231 | struct radeon_ring *ring) |
231 | struct radeon_ring *ring) |
232 | { |
232 | { |
233 | unsigned i; |
233 | unsigned i; |
234 | int r; |
234 | int r; |
235 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; |
235 | unsigned index; |
236 | u32 tmp; |
236 | u32 tmp; |
- | 237 | u64 gpu_addr; |
|
237 | 238 | ||
238 | if (!ptr) { |
239 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
- | 240 | index = R600_WB_DMA_RING_TEST_OFFSET; |
|
239 | DRM_ERROR("invalid vram scratch pointer\n"); |
241 | else |
240 | return -EINVAL; |
242 | index = CAYMAN_WB_DMA1_RING_TEST_OFFSET; |
- | 243 | ||
241 | } |
244 | gpu_addr = rdev->wb.gpu_addr + index; |
242 | 245 | ||
243 | tmp = 0xCAFEDEAD; |
246 | tmp = 0xCAFEDEAD; |
244 | writel(tmp, ptr); |
247 | rdev->wb.wb[index/4] = cpu_to_le32(tmp); |
245 | 248 | ||
246 | r = radeon_ring_lock(rdev, ring, 4); |
249 | r = radeon_ring_lock(rdev, ring, 4); |
247 | if (r) { |
250 | if (r) { |
248 | DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); |
251 | DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); |
249 | return r; |
252 | return r; |
250 | } |
253 | } |
251 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
254 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
252 | radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); |
255 | radeon_ring_write(ring, lower_32_bits(gpu_addr)); |
253 | radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); |
256 | radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xff); |
254 | radeon_ring_write(ring, 0xDEADBEEF); |
257 | radeon_ring_write(ring, 0xDEADBEEF); |
255 | radeon_ring_unlock_commit(rdev, ring, false); |
258 | radeon_ring_unlock_commit(rdev, ring, false); |
256 | 259 | ||
257 | for (i = 0; i < rdev->usec_timeout; i++) { |
260 | for (i = 0; i < rdev->usec_timeout; i++) { |
258 | tmp = readl(ptr); |
261 | tmp = le32_to_cpu(rdev->wb.wb[index/4]); |
259 | if (tmp == 0xDEADBEEF) |
262 | if (tmp == 0xDEADBEEF) |
260 | break; |
263 | break; |
261 | DRM_UDELAY(1); |
264 | DRM_UDELAY(1); |
262 | } |
265 | } |
263 | 266 | ||
264 | if (i < rdev->usec_timeout) { |
267 | if (i < rdev->usec_timeout) { |
265 | DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
268 | DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); |
266 | } else { |
269 | } else { |
267 | DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", |
270 | DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", |
268 | ring->idx, tmp); |
271 | ring->idx, tmp); |
269 | r = -EINVAL; |
272 | r = -EINVAL; |
270 | } |
273 | } |
271 | return r; |
274 | return r; |
272 | } |
275 | } |
273 | 276 | ||
274 | /** |
277 | /** |
275 | * r600_dma_fence_ring_emit - emit a fence on the DMA ring |
278 | * r600_dma_fence_ring_emit - emit a fence on the DMA ring |
276 | * |
279 | * |
277 | * @rdev: radeon_device pointer |
280 | * @rdev: radeon_device pointer |
278 | * @fence: radeon fence object |
281 | * @fence: radeon fence object |
279 | * |
282 | * |
280 | * Add a DMA fence packet to the ring to write |
283 | * Add a DMA fence packet to the ring to write |
281 | * the fence seq number and DMA trap packet to generate |
284 | * the fence seq number and DMA trap packet to generate |
282 | * an interrupt if needed (r6xx-r7xx). |
285 | * an interrupt if needed (r6xx-r7xx). |
283 | */ |
286 | */ |
284 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, |
287 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, |
285 | struct radeon_fence *fence) |
288 | struct radeon_fence *fence) |
286 | { |
289 | { |
287 | struct radeon_ring *ring = &rdev->ring[fence->ring]; |
290 | struct radeon_ring *ring = &rdev->ring[fence->ring]; |
288 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; |
291 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; |
289 | 292 | ||
290 | /* write the fence */ |
293 | /* write the fence */ |
291 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); |
294 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); |
292 | radeon_ring_write(ring, addr & 0xfffffffc); |
295 | radeon_ring_write(ring, addr & 0xfffffffc); |
293 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); |
296 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); |
294 | radeon_ring_write(ring, lower_32_bits(fence->seq)); |
297 | radeon_ring_write(ring, lower_32_bits(fence->seq)); |
295 | /* generate an interrupt */ |
298 | /* generate an interrupt */ |
296 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); |
299 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); |
297 | } |
300 | } |
298 | 301 | ||
299 | /** |
302 | /** |
300 | * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring |
303 | * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring |
301 | * |
304 | * |
302 | * @rdev: radeon_device pointer |
305 | * @rdev: radeon_device pointer |
303 | * @ring: radeon_ring structure holding ring information |
306 | * @ring: radeon_ring structure holding ring information |
304 | * @semaphore: radeon semaphore object |
307 | * @semaphore: radeon semaphore object |
305 | * @emit_wait: wait or signal semaphore |
308 | * @emit_wait: wait or signal semaphore |
306 | * |
309 | * |
307 | * Add a DMA semaphore packet to the ring wait on or signal |
310 | * Add a DMA semaphore packet to the ring wait on or signal |
308 | * other rings (r6xx-SI). |
311 | * other rings (r6xx-SI). |
309 | */ |
312 | */ |
310 | bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, |
313 | bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, |
311 | struct radeon_ring *ring, |
314 | struct radeon_ring *ring, |
312 | struct radeon_semaphore *semaphore, |
315 | struct radeon_semaphore *semaphore, |
313 | bool emit_wait) |
316 | bool emit_wait) |
314 | { |
317 | { |
315 | u64 addr = semaphore->gpu_addr; |
318 | u64 addr = semaphore->gpu_addr; |
316 | u32 s = emit_wait ? 0 : 1; |
319 | u32 s = emit_wait ? 0 : 1; |
317 | 320 | ||
318 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); |
321 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); |
319 | radeon_ring_write(ring, addr & 0xfffffffc); |
322 | radeon_ring_write(ring, addr & 0xfffffffc); |
320 | radeon_ring_write(ring, upper_32_bits(addr) & 0xff); |
323 | radeon_ring_write(ring, upper_32_bits(addr) & 0xff); |
321 | 324 | ||
322 | return true; |
325 | return true; |
323 | } |
326 | } |
324 | 327 | ||
325 | /** |
328 | /** |
326 | * r600_dma_ib_test - test an IB on the DMA engine |
329 | * r600_dma_ib_test - test an IB on the DMA engine |
327 | * |
330 | * |
328 | * @rdev: radeon_device pointer |
331 | * @rdev: radeon_device pointer |
329 | * @ring: radeon_ring structure holding ring information |
332 | * @ring: radeon_ring structure holding ring information |
330 | * |
333 | * |
331 | * Test a simple IB in the DMA ring (r6xx-SI). |
334 | * Test a simple IB in the DMA ring (r6xx-SI). |
332 | * Returns 0 on success, error on failure. |
335 | * Returns 0 on success, error on failure. |
333 | */ |
336 | */ |
334 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) |
337 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) |
335 | { |
338 | { |
336 | struct radeon_ib ib; |
339 | struct radeon_ib ib; |
337 | unsigned i; |
340 | unsigned i; |
338 | int r; |
341 | int r; |
339 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; |
342 | void __iomem *ptr = (void *)rdev->vram_scratch.ptr; |
340 | u32 tmp = 0; |
343 | u32 tmp = 0; |
341 | 344 | ||
342 | if (!ptr) { |
345 | if (!ptr) { |
343 | DRM_ERROR("invalid vram scratch pointer\n"); |
346 | DRM_ERROR("invalid vram scratch pointer\n"); |
344 | return -EINVAL; |
347 | return -EINVAL; |
345 | } |
348 | } |
346 | 349 | ||
347 | tmp = 0xCAFEDEAD; |
350 | tmp = 0xCAFEDEAD; |
348 | writel(tmp, ptr); |
351 | writel(tmp, ptr); |
349 | 352 | ||
350 | r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); |
353 | r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); |
351 | if (r) { |
354 | if (r) { |
352 | DRM_ERROR("radeon: failed to get ib (%d).\n", r); |
355 | DRM_ERROR("radeon: failed to get ib (%d).\n", r); |
353 | return r; |
356 | return r; |
354 | } |
357 | } |
355 | 358 | ||
356 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); |
359 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); |
357 | ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; |
360 | ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; |
358 | ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; |
361 | ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; |
359 | ib.ptr[3] = 0xDEADBEEF; |
362 | ib.ptr[3] = 0xDEADBEEF; |
360 | ib.length_dw = 4; |
363 | ib.length_dw = 4; |
361 | 364 | ||
362 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
365 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
363 | if (r) { |
366 | if (r) { |
364 | radeon_ib_free(rdev, &ib); |
367 | radeon_ib_free(rdev, &ib); |
365 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
368 | DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); |
366 | return r; |
369 | return r; |
367 | } |
370 | } |
368 | r = radeon_fence_wait(ib.fence, false); |
371 | r = radeon_fence_wait(ib.fence, false); |
369 | if (r) { |
372 | if (r) { |
370 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
373 | DRM_ERROR("radeon: fence wait failed (%d).\n", r); |
371 | return r; |
374 | return r; |
372 | } |
375 | } |
373 | for (i = 0; i < rdev->usec_timeout; i++) { |
376 | for (i = 0; i < rdev->usec_timeout; i++) { |
374 | tmp = readl(ptr); |
377 | tmp = readl(ptr); |
375 | if (tmp == 0xDEADBEEF) |
378 | if (tmp == 0xDEADBEEF) |
376 | break; |
379 | break; |
377 | DRM_UDELAY(1); |
380 | DRM_UDELAY(1); |
378 | } |
381 | } |
379 | if (i < rdev->usec_timeout) { |
382 | if (i < rdev->usec_timeout) { |
380 | DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); |
383 | DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i); |
381 | } else { |
384 | } else { |
382 | DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); |
385 | DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp); |
383 | r = -EINVAL; |
386 | r = -EINVAL; |
384 | } |
387 | } |
385 | radeon_ib_free(rdev, &ib); |
388 | radeon_ib_free(rdev, &ib); |
386 | return r; |
389 | return r; |
387 | } |
390 | } |
388 | 391 | ||
389 | /** |
392 | /** |
390 | * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine |
393 | * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine |
391 | * |
394 | * |
392 | * @rdev: radeon_device pointer |
395 | * @rdev: radeon_device pointer |
393 | * @ib: IB object to schedule |
396 | * @ib: IB object to schedule |
394 | * |
397 | * |
395 | * Schedule an IB in the DMA ring (r6xx-r7xx). |
398 | * Schedule an IB in the DMA ring (r6xx-r7xx). |
396 | */ |
399 | */ |
397 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) |
400 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) |
398 | { |
401 | { |
399 | struct radeon_ring *ring = &rdev->ring[ib->ring]; |
402 | struct radeon_ring *ring = &rdev->ring[ib->ring]; |
400 | 403 | ||
401 | if (rdev->wb.enabled) { |
404 | if (rdev->wb.enabled) { |
402 | u32 next_rptr = ring->wptr + 4; |
405 | u32 next_rptr = ring->wptr + 4; |
403 | while ((next_rptr & 7) != 5) |
406 | while ((next_rptr & 7) != 5) |
404 | next_rptr++; |
407 | next_rptr++; |
405 | next_rptr += 3; |
408 | next_rptr += 3; |
406 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
409 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
407 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); |
410 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); |
408 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); |
411 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); |
409 | radeon_ring_write(ring, next_rptr); |
412 | radeon_ring_write(ring, next_rptr); |
410 | } |
413 | } |
411 | 414 | ||
412 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
415 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
413 | * Pad as necessary with NOPs. |
416 | * Pad as necessary with NOPs. |
414 | */ |
417 | */ |
415 | while ((ring->wptr & 7) != 5) |
418 | while ((ring->wptr & 7) != 5) |
416 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); |
419 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); |
417 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); |
420 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); |
418 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); |
421 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); |
419 | radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); |
422 | radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); |
420 | 423 | ||
421 | } |
424 | } |
422 | 425 | ||
423 | /** |
426 | /** |
424 | * r600_copy_dma - copy pages using the DMA engine |
427 | * r600_copy_dma - copy pages using the DMA engine |
425 | * |
428 | * |
426 | * @rdev: radeon_device pointer |
429 | * @rdev: radeon_device pointer |
427 | * @src_offset: src GPU address |
430 | * @src_offset: src GPU address |
428 | * @dst_offset: dst GPU address |
431 | * @dst_offset: dst GPU address |
429 | * @num_gpu_pages: number of GPU pages to xfer |
432 | * @num_gpu_pages: number of GPU pages to xfer |
430 | * @fence: radeon fence object |
433 | * @fence: radeon fence object |
431 | * |
434 | * |
432 | * Copy GPU paging using the DMA engine (r6xx). |
435 | * Copy GPU paging using the DMA engine (r6xx). |
433 | * Used by the radeon ttm implementation to move pages if |
436 | * Used by the radeon ttm implementation to move pages if |
434 | * registered as the asic copy callback. |
437 | * registered as the asic copy callback. |
435 | */ |
438 | */ |
436 | int r600_copy_dma(struct radeon_device *rdev, |
439 | int r600_copy_dma(struct radeon_device *rdev, |
437 | uint64_t src_offset, uint64_t dst_offset, |
440 | uint64_t src_offset, uint64_t dst_offset, |
438 | unsigned num_gpu_pages, |
441 | unsigned num_gpu_pages, |
439 | struct radeon_fence **fence) |
442 | struct radeon_fence **fence) |
440 | { |
443 | { |
441 | struct radeon_semaphore *sem = NULL; |
444 | struct radeon_semaphore *sem = NULL; |
442 | int ring_index = rdev->asic->copy.dma_ring_index; |
445 | int ring_index = rdev->asic->copy.dma_ring_index; |
443 | struct radeon_ring *ring = &rdev->ring[ring_index]; |
446 | struct radeon_ring *ring = &rdev->ring[ring_index]; |
444 | u32 size_in_dw, cur_size_in_dw; |
447 | u32 size_in_dw, cur_size_in_dw; |
445 | int i, num_loops; |
448 | int i, num_loops; |
446 | int r = 0; |
449 | int r = 0; |
447 | 450 | ||
448 | r = radeon_semaphore_create(rdev, &sem); |
451 | r = radeon_semaphore_create(rdev, &sem); |
449 | if (r) { |
452 | if (r) { |
450 | DRM_ERROR("radeon: moving bo (%d).\n", r); |
453 | DRM_ERROR("radeon: moving bo (%d).\n", r); |
451 | return r; |
454 | return r; |
452 | } |
455 | } |
453 | 456 | ||
454 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; |
457 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; |
455 | num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); |
458 | num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); |
456 | r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); |
459 | r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); |
457 | if (r) { |
460 | if (r) { |
458 | DRM_ERROR("radeon: moving bo (%d).\n", r); |
461 | DRM_ERROR("radeon: moving bo (%d).\n", r); |
459 | radeon_semaphore_free(rdev, &sem, NULL); |
462 | radeon_semaphore_free(rdev, &sem, NULL); |
460 | return r; |
463 | return r; |
461 | } |
464 | } |
462 | 465 | ||
463 | radeon_semaphore_sync_to(sem, *fence); |
466 | radeon_semaphore_sync_to(sem, *fence); |
464 | radeon_semaphore_sync_rings(rdev, sem, ring->idx); |
467 | radeon_semaphore_sync_rings(rdev, sem, ring->idx); |
465 | 468 | ||
466 | for (i = 0; i < num_loops; i++) { |
469 | for (i = 0; i < num_loops; i++) { |
467 | cur_size_in_dw = size_in_dw; |
470 | cur_size_in_dw = size_in_dw; |
468 | if (cur_size_in_dw > 0xFFFE) |
471 | if (cur_size_in_dw > 0xFFFE) |
469 | cur_size_in_dw = 0xFFFE; |
472 | cur_size_in_dw = 0xFFFE; |
470 | size_in_dw -= cur_size_in_dw; |
473 | size_in_dw -= cur_size_in_dw; |
471 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); |
474 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); |
472 | radeon_ring_write(ring, dst_offset & 0xfffffffc); |
475 | radeon_ring_write(ring, dst_offset & 0xfffffffc); |
473 | radeon_ring_write(ring, src_offset & 0xfffffffc); |
476 | radeon_ring_write(ring, src_offset & 0xfffffffc); |
474 | radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | |
477 | radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | |
475 | (upper_32_bits(src_offset) & 0xff))); |
478 | (upper_32_bits(src_offset) & 0xff))); |
476 | src_offset += cur_size_in_dw * 4; |
479 | src_offset += cur_size_in_dw * 4; |
477 | dst_offset += cur_size_in_dw * 4; |
480 | dst_offset += cur_size_in_dw * 4; |
478 | } |
481 | } |
479 | 482 | ||
480 | r = radeon_fence_emit(rdev, fence, ring->idx); |
483 | r = radeon_fence_emit(rdev, fence, ring->idx); |
481 | if (r) { |
484 | if (r) { |
482 | radeon_ring_unlock_undo(rdev, ring); |
485 | radeon_ring_unlock_undo(rdev, ring); |
483 | radeon_semaphore_free(rdev, &sem, NULL); |
486 | radeon_semaphore_free(rdev, &sem, NULL); |
484 | return r; |
487 | return r; |
485 | } |
488 | } |
486 | 489 | ||
487 | radeon_ring_unlock_commit(rdev, ring, false); |
490 | radeon_ring_unlock_commit(rdev, ring, false); |
488 | radeon_semaphore_free(rdev, &sem, *fence); |
491 | radeon_semaphore_free(rdev, &sem, *fence); |
489 | 492 | ||
490 | return r; |
493 | return r; |
491 | }><>>><>><>>>>>><>><>><> |
494 | }><>>><>><>>>>>><>><>><> |