Rev 2007 | Rev 3031 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 2007 | Rev 2997 | ||
---|---|---|---|
Line 24... | Line 24... | ||
24 | #include |
24 | #include |
25 | #include |
25 | #include |
26 | #include "radeon_reg.h" |
26 | #include "radeon_reg.h" |
27 | #include "radeon.h" |
27 | #include "radeon.h" |
Line -... | Line 28... | ||
- | 28 | ||
- | 29 | #define RADEON_BENCHMARK_COPY_BLIT 1 |
|
- | 30 | #define RADEON_BENCHMARK_COPY_DMA 0 |
|
- | 31 | ||
- | 32 | #define RADEON_BENCHMARK_ITERATIONS 1024 |
|
- | 33 | #define RADEON_BENCHMARK_COMMON_MODES_N 17 |
|
28 | 34 | ||
- | 35 | static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, |
|
- | 36 | uint64_t saddr, uint64_t daddr, |
|
29 | unsigned int inline jiffies_to_msecs(const unsigned long j) |
37 | int flag, int n) |
- | 38 | { |
|
30 | { |
39 | unsigned long start_jiffies; |
- | 40 | unsigned long end_jiffies; |
|
31 | return (10 * j); |
41 | struct radeon_fence *fence = NULL; |
Line -... | Line 42... | ||
- | 42 | int i, r; |
|
- | 43 | ||
- | 44 | start_jiffies = jiffies; |
|
- | 45 | for (i = 0; i < n; i++) { |
|
- | 46 | switch (flag) { |
|
- | 47 | case RADEON_BENCHMARK_COPY_DMA: |
|
- | 48 | r = radeon_copy_dma(rdev, saddr, daddr, |
|
- | 49 | size / RADEON_GPU_PAGE_SIZE, |
|
- | 50 | &fence); |
|
- | 51 | break; |
|
- | 52 | case RADEON_BENCHMARK_COPY_BLIT: |
|
- | 53 | r = radeon_copy_blit(rdev, saddr, daddr, |
|
- | 54 | size / RADEON_GPU_PAGE_SIZE, |
|
- | 55 | &fence); |
|
- | 56 | break; |
|
- | 57 | default: |
|
- | 58 | DRM_ERROR("Unknown copy method\n"); |
|
- | 59 | r = -EINVAL; |
|
- | 60 | } |
|
- | 61 | if (r) |
|
- | 62 | goto exit_do_move; |
|
- | 63 | r = radeon_fence_wait(fence, false); |
|
- | 64 | if (r) |
|
- | 65 | goto exit_do_move; |
|
- | 66 | radeon_fence_unref(&fence); |
|
- | 67 | } |
|
- | 68 | end_jiffies = GetTimerTicks(); |
|
- | 69 | r = jiffies_to_msecs(end_jiffies - start_jiffies); |
|
- | 70 | ||
- | 71 | exit_do_move: |
|
- | 72 | if (fence) |
|
- | 73 | radeon_fence_unref(&fence); |
|
Line -... | Line 74... | ||
- | 74 | return r; |
|
- | 75 | } |
|
- | 76 | ||
- | 77 | ||
- | 78 | static void radeon_benchmark_log_results(int n, unsigned size, |
|
- | 79 | unsigned int time, |
|
- | 80 | unsigned sdomain, unsigned ddomain, |
|
- | 81 | char *kind) |
|
- | 82 | { |
|
- | 83 | unsigned int throughput = (n * (size >> 10)) / time; |
|
- | 84 | DRM_INFO("radeon: %s %u bo moves of %u kB from" |
|
- | 85 | " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n", |
|
- | 86 | kind, n, size >> 10, sdomain, ddomain, time, |
|
32 | }; |
87 | throughput * 8, throughput); |
33 | 88 | } |
|
34 | 89 | ||
35 | void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, |
90 | static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, |
36 | unsigned sdomain, unsigned ddomain) |
91 | unsigned sdomain, unsigned ddomain) |
37 | { |
- | |
38 | struct radeon_bo *dobj = NULL; |
92 | { |
39 | struct radeon_bo *sobj = NULL; |
- | |
40 | struct radeon_fence *fence = NULL; |
- | |
41 | uint64_t saddr, daddr; |
- | |
42 | unsigned long start_jiffies; |
93 | struct radeon_bo *dobj = NULL; |
43 | unsigned long end_jiffies; |
94 | struct radeon_bo *sobj = NULL; |
44 | unsigned long time; |
- | |
45 | unsigned i, n, size; |
- | |
Line 46... | Line -... | ||
46 | int r; |
- | |
47 | - | ||
Line 48... | Line 95... | ||
48 | ENTER(); |
95 | uint64_t saddr, daddr; |
Line -... | Line 96... | ||
- | 96 | int r, n; |
|
49 | 97 | int time; |
|
50 | size = bsize; |
98 | |
51 | n = 4; //1024; |
99 | |
52 | 100 | ENTER(); |
|
53 | dbgprintf("source domain %x\n", sdomain); |
101 | |
54 | 102 | n = RADEON_BENCHMARK_ITERATIONS; |
|
Line 62... | Line 110... | ||
62 | r = radeon_bo_pin(sobj, sdomain, &saddr); |
110 | r = radeon_bo_pin(sobj, sdomain, &saddr); |
63 | // radeon_bo_unreserve(sobj); |
111 | // radeon_bo_unreserve(sobj); |
64 | if (r) { |
112 | if (r) { |
65 | goto out_cleanup; |
113 | goto out_cleanup; |
66 | } |
114 | } |
67 | - | ||
68 | dbgprintf("destination domain %x\n", ddomain); |
- | |
69 | - | ||
70 | r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, &dobj); |
115 | r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, NULL, &dobj); |
71 | if (r) { |
116 | if (r) { |
72 | goto out_cleanup; |
117 | goto out_cleanup; |
73 | } |
118 | } |
74 | r = radeon_bo_reserve(dobj, false); |
119 | r = radeon_bo_reserve(dobj, false); |
75 | if (unlikely(r != 0)) |
120 | if (unlikely(r != 0)) |
Line 80... | Line 125... | ||
80 | goto out_cleanup; |
125 | goto out_cleanup; |
81 | } |
126 | } |
82 | dbgprintf("done\n"); |
127 | dbgprintf("done\n"); |
Line 83... | Line 128... | ||
83 | 128 | ||
- | 129 | /* r100 doesn't have dma engine so skip the test */ |
|
- | 130 | /* also, VRAM-to-VRAM test doesn't make much sense for DMA */ |
|
84 | /* r100 doesn't have dma engine so skip the test */ |
131 | /* skip it as well if domains are the same */ |
- | 132 | if ((rdev->asic->copy.dma) && (sdomain != ddomain)) { |
|
- | 133 | time = radeon_benchmark_do_move(rdev, size, saddr, daddr, |
|
- | 134 | RADEON_BENCHMARK_COPY_DMA, n); |
|
- | 135 | if (time < 0) |
|
- | 136 | goto out_cleanup; |
|
- | 137 | if (time > 0) |
|
- | 138 | radeon_benchmark_log_results(n, size, time, |
|
- | 139 | sdomain, ddomain, "dma"); |
|
85 | if (rdev->asic->copy_dma) { |
140 | } |
- | 141 | ||
- | 142 | time = radeon_benchmark_do_move(rdev, size, saddr, daddr, |
|
- | 143 | RADEON_BENCHMARK_COPY_BLIT, n); |
|
- | 144 | if (time < 0) |
|
- | 145 | goto out_cleanup; |
|
- | 146 | if (time > 0) |
|
86 | 147 | radeon_benchmark_log_results(n, size, time, |
|
Line 87... | Line -... | ||
87 | dbgprintf("copy dma\n"); |
- | |
88 | - | ||
89 | start_jiffies = GetTimerTicks(); |
- | |
90 | for (i = 0; i < n; i++) { |
- | |
91 | r = radeon_fence_create(rdev, &fence); |
- | |
92 | if (r) { |
- | |
93 | goto out_cleanup; |
- | |
94 | } |
- | |
95 | - | ||
96 | r = radeon_copy_dma(rdev, saddr, daddr, |
- | |
97 | size / RADEON_GPU_PAGE_SIZE, fence); |
- | |
98 | - | ||
99 | if (r) { |
- | |
100 | goto out_cleanup; |
- | |
101 | } |
- | |
102 | } |
- | |
103 | - | ||
104 | r = radeon_fence_wait(fence, false); |
- | |
105 | if (r) { |
- | |
106 | goto out_cleanup; |
- | |
107 | } |
- | |
108 | radeon_fence_unref(&fence); |
- | |
109 | - | ||
110 | end_jiffies = GetTimerTicks(); |
- | |
111 | time = end_jiffies - start_jiffies; |
- | |
112 | time = jiffies_to_msecs(time); |
- | |
113 | if (time > 0) { |
- | |
114 | i = ((n * size) >> 10) / time; |
- | |
115 | printk(KERN_INFO "radeon: dma %u bo moves of %ukb from" |
- | |
116 | " %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n", |
- | |
117 | n, size >> 10, |
- | |
118 | sdomain, ddomain, time, |
- | |
119 | i, i * 1000, (i * 1000) / 1024); |
- | |
120 | } |
- | |
121 | } |
- | |
122 | - | ||
123 | start_jiffies = GetTimerTicks(); |
- | |
124 | for (i = 0; i < n; i++) { |
- | |
125 | r = radeon_fence_create(rdev, &fence); |
- | |
126 | if (r) { |
- | |
127 | goto out_cleanup; |
- | |
128 | } |
- | |
129 | r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); |
- | |
130 | if (r) { |
- | |
131 | goto out_cleanup; |
- | |
132 | } |
- | |
133 | } |
- | |
134 | - | ||
135 | r = radeon_fence_wait(fence, false); |
- | |
136 | if (r) { |
- | |
137 | goto out_cleanup; |
- | |
138 | } |
- | |
139 | radeon_fence_unref(&fence); |
- | |
140 | - | ||
141 | end_jiffies = GetTimerTicks(); |
- | |
142 | time = end_jiffies - start_jiffies; |
- | |
143 | time = jiffies_to_msecs(time); |
- | |
144 | if (time > 0) { |
- | |
145 | i = ((n * size) >> 10) / time; |
- | |
146 | printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d" |
- | |
147 | " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10, |
- | |
148 | sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024); |
148 | sdomain, ddomain, "blit"); |
149 | } |
- | |
150 | out_cleanup: |
- | |
151 | - | ||
152 | dbgprintf("cleanup\n"); |
149 | |
153 | 150 | out_cleanup: |
|
154 | if (sobj) { |
151 | if (sobj) { |
155 | r = radeon_bo_reserve(sobj, false); |
152 | r = radeon_bo_reserve(sobj, false); |
156 | if (likely(r == 0)) { |
153 | if (likely(r == 0)) { |
Line 165... | Line 162... | ||
165 | radeon_bo_unpin(dobj); |
162 | radeon_bo_unpin(dobj); |
166 | radeon_bo_unreserve(dobj); |
163 | radeon_bo_unreserve(dobj); |
167 | } |
164 | } |
168 | radeon_bo_unref(&dobj); |
165 | radeon_bo_unref(&dobj); |
169 | } |
166 | } |
170 | if (fence) { |
- | |
171 | radeon_fence_unref(&fence); |
- | |
172 | } |
167 | |
173 | if (r) { |
168 | if (r) { |
174 | printk(KERN_WARNING "Error while benchmarking BO move.\n"); |
169 | DRM_ERROR("Error while benchmarking BO move.\n"); |
175 | } |
170 | } |
Line 176... | Line 171... | ||
176 | 171 | ||
Line 177... | Line 172... | ||
177 | LEAVE(); |
172 | LEAVE(); |
Line 178... | Line 173... | ||
178 | 173 | ||
179 | } |
174 | } |
- | 175 | ||
- | 176 | void radeon_benchmark(struct radeon_device *rdev, int test_number) |
|
- | 177 | { |
|
- | 178 | int i; |
|
- | 179 | int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = { |
|
- | 180 | 640 * 480 * 4, |
|
- | 181 | 720 * 480 * 4, |
|
- | 182 | 800 * 600 * 4, |
|
- | 183 | 848 * 480 * 4, |
|
- | 184 | 1024 * 768 * 4, |
|
- | 185 | 1152 * 768 * 4, |
|
- | 186 | 1280 * 720 * 4, |
|
- | 187 | 1280 * 800 * 4, |
|
- | 188 | 1280 * 854 * 4, |
|
- | 189 | 1280 * 960 * 4, |
|
- | 190 | 1280 * 1024 * 4, |
|
- | 191 | 1440 * 900 * 4, |
|
- | 192 | 1400 * 1050 * 4, |
|
- | 193 | 1680 * 1050 * 4, |
|
- | 194 | 1600 * 1200 * 4, |
|
- | 195 | 1920 * 1080 * 4, |
|
- | 196 | 1920 * 1200 * 4 |
|
- | 197 | }; |
|
- | 198 | ||
180 | 199 | switch (test_number) { |
|
- | 200 | case 1: |
|
- | 201 | /* simple test, VRAM to GTT and GTT to VRAM */ |
|
- | 202 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT, |
|
- | 203 | RADEON_GEM_DOMAIN_VRAM); |
|
- | 204 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, |
|
- | 205 | RADEON_GEM_DOMAIN_GTT); |
|
- | 206 | break; |
|
- | 207 | case 2: |
|
- | 208 | /* simple test, VRAM to VRAM */ |
|
- | 209 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, |
|
- | 210 | RADEON_GEM_DOMAIN_VRAM); |
|
- | 211 | break; |
|
- | 212 | case 3: |
|
- | 213 | /* GTT to VRAM, buffer size sweep, powers of 2 */ |
|
181 | void radeon_benchmark(struct radeon_device *rdev) |
214 | for (i = 1; i <= 16384; i <<= 1) |
- | 215 | radeon_benchmark_move(rdev, i * RADEON_GPU_PAGE_SIZE, |
|
- | 216 | RADEON_GEM_DOMAIN_GTT, |
|
- | 217 | RADEON_GEM_DOMAIN_VRAM); |
|
- | 218 | break; |
|
182 | { |
219 | case 4: |
- | 220 | /* VRAM to GTT, buffer size sweep, powers of 2 */ |
|
183 | radeon_benchmark_move(rdev, 4096*4096, RADEON_GEM_DOMAIN_GTT, |
221 | for (i = 1; i <= 16384; i <<= 1) |
- | 222 | radeon_benchmark_move(rdev, i * RADEON_GPU_PAGE_SIZE, |
|
- | 223 | RADEON_GEM_DOMAIN_VRAM, |
|
- | 224 | RADEON_GEM_DOMAIN_GTT); |
|
- | 225 | break; |
|
184 | RADEON_GEM_DOMAIN_VRAM); |
226 | case 5: |
- | 227 | /* VRAM to VRAM, buffer size sweep, powers of 2 */ |
|
185 | radeon_benchmark_move(rdev, 4096*4096, RADEON_GEM_DOMAIN_VRAM, |
228 | for (i = 1; i <= 16384; i <<= 1) |
- | 229 | radeon_benchmark_move(rdev, i * RADEON_GPU_PAGE_SIZE, |
|
- | 230 | RADEON_GEM_DOMAIN_VRAM, |
|
- | 231 | RADEON_GEM_DOMAIN_VRAM); |
|
- | 232 | break; |
|
- | 233 | case 6: |
|
- | 234 | /* GTT to VRAM, buffer size sweep, common modes */ |
|
- | 235 | for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) |
|
- | 236 | radeon_benchmark_move(rdev, common_modes[i], |
|
- | 237 | RADEON_GEM_DOMAIN_GTT, |
|
- | 238 | RADEON_GEM_DOMAIN_VRAM); |
|
- | 239 | break; |
|
- | 240 | case 7: |
|
- | 241 | /* VRAM to GTT, buffer size sweep, common modes */ |
|
- | 242 | for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) |
|
- | 243 | radeon_benchmark_move(rdev, common_modes[i], |
|
- | 244 | RADEON_GEM_DOMAIN_VRAM, |
|
- | 245 | RADEON_GEM_DOMAIN_GTT); |
|
- | 246 | break; |
|
- | 247 | case 8: |
|
- | 248 | /* VRAM to VRAM, buffer size sweep, common modes */ |
|
- | 249 | for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) |
|
- | 250 | radeon_benchmark_move(rdev, common_modes[i], |
|
- | 251 | RADEON_GEM_DOMAIN_VRAM, |
|
- | 252 | RADEON_GEM_DOMAIN_VRAM); |
|
- | 253 | break; |
|
- | 254 | ||
186 | RADEON_GEM_DOMAIN_GTT); |
255 | default: |