26,33 → 26,81 |
#include "radeon_reg.h" |
#include "radeon.h" |
|
unsigned int inline jiffies_to_msecs(const unsigned long j) |
#define RADEON_BENCHMARK_COPY_BLIT 1 |
#define RADEON_BENCHMARK_COPY_DMA 0 |
|
#define RADEON_BENCHMARK_ITERATIONS 1024 |
#define RADEON_BENCHMARK_COMMON_MODES_N 17 |
|
static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, |
uint64_t saddr, uint64_t daddr, |
int flag, int n) |
{ |
return (10 * j); |
}; |
unsigned long start_jiffies; |
unsigned long end_jiffies; |
struct radeon_fence *fence = NULL; |
int i, r; |
|
start_jiffies = jiffies; |
for (i = 0; i < n; i++) { |
switch (flag) { |
case RADEON_BENCHMARK_COPY_DMA: |
r = radeon_copy_dma(rdev, saddr, daddr, |
size / RADEON_GPU_PAGE_SIZE, |
&fence); |
break; |
case RADEON_BENCHMARK_COPY_BLIT: |
r = radeon_copy_blit(rdev, saddr, daddr, |
size / RADEON_GPU_PAGE_SIZE, |
&fence); |
break; |
default: |
DRM_ERROR("Unknown copy method\n"); |
r = -EINVAL; |
} |
if (r) |
goto exit_do_move; |
r = radeon_fence_wait(fence, false); |
if (r) |
goto exit_do_move; |
radeon_fence_unref(&fence); |
} |
end_jiffies = GetTimerTicks(); |
r = jiffies_to_msecs(end_jiffies - start_jiffies); |
|
void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, |
exit_do_move: |
if (fence) |
radeon_fence_unref(&fence); |
return r; |
} |
|
|
static void radeon_benchmark_log_results(int n, unsigned size, |
unsigned int time, |
unsigned sdomain, unsigned ddomain, |
char *kind) |
{ |
unsigned int throughput = (n * (size >> 10)) / time; |
DRM_INFO("radeon: %s %u bo moves of %u kB from" |
" %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n", |
kind, n, size >> 10, sdomain, ddomain, time, |
throughput * 8, throughput); |
} |
|
static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, |
unsigned sdomain, unsigned ddomain) |
{ |
struct radeon_bo *dobj = NULL; |
struct radeon_bo *sobj = NULL; |
struct radeon_fence *fence = NULL; |
uint64_t saddr, daddr; |
unsigned long start_jiffies; |
unsigned long end_jiffies; |
unsigned long time; |
unsigned i, n, size; |
int r; |
int r, n; |
int time; |
|
|
ENTER(); |
|
size = bsize; |
n = 4; //1024; |
|
dbgprintf("source domain %x\n", sdomain); |
|
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj); |
n = RADEON_BENCHMARK_ITERATIONS; |
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, NULL, &sobj); |
if (r) { |
goto out_cleanup; |
} |
64,10 → 112,7 |
if (r) { |
goto out_cleanup; |
} |
|
dbgprintf("destination domain %x\n", ddomain); |
|
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, &dobj); |
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, NULL, &dobj); |
if (r) { |
goto out_cleanup; |
} |
82,75 → 127,27 |
dbgprintf("done\n"); |
|
/* r100 doesn't have dma engine so skip the test */ |
if (rdev->asic->copy_dma) { |
|
dbgprintf("copy dma\n"); |
|
start_jiffies = GetTimerTicks(); |
for (i = 0; i < n; i++) { |
r = radeon_fence_create(rdev, &fence); |
if (r) { |
/* also, VRAM-to-VRAM test doesn't make much sense for DMA */ |
/* skip it as well if domains are the same */ |
if ((rdev->asic->copy.dma) && (sdomain != ddomain)) { |
time = radeon_benchmark_do_move(rdev, size, saddr, daddr, |
RADEON_BENCHMARK_COPY_DMA, n); |
if (time < 0) |
goto out_cleanup; |
if (time > 0) |
radeon_benchmark_log_results(n, size, time, |
sdomain, ddomain, "dma"); |
} |
|
r = radeon_copy_dma(rdev, saddr, daddr, |
size / RADEON_GPU_PAGE_SIZE, fence); |
|
if (r) { |
time = radeon_benchmark_do_move(rdev, size, saddr, daddr, |
RADEON_BENCHMARK_COPY_BLIT, n); |
if (time < 0) |
goto out_cleanup; |
} |
} |
if (time > 0) |
radeon_benchmark_log_results(n, size, time, |
sdomain, ddomain, "blit"); |
|
r = radeon_fence_wait(fence, false); |
if (r) { |
goto out_cleanup; |
} |
radeon_fence_unref(&fence); |
|
end_jiffies = GetTimerTicks(); |
time = end_jiffies - start_jiffies; |
time = jiffies_to_msecs(time); |
if (time > 0) { |
i = ((n * size) >> 10) / time; |
printk(KERN_INFO "radeon: dma %u bo moves of %ukb from" |
" %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n", |
n, size >> 10, |
sdomain, ddomain, time, |
i, i * 1000, (i * 1000) / 1024); |
} |
} |
|
start_jiffies = GetTimerTicks(); |
for (i = 0; i < n; i++) { |
r = radeon_fence_create(rdev, &fence); |
if (r) { |
goto out_cleanup; |
} |
r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); |
if (r) { |
goto out_cleanup; |
} |
} |
|
r = radeon_fence_wait(fence, false); |
if (r) { |
goto out_cleanup; |
} |
radeon_fence_unref(&fence); |
|
end_jiffies = GetTimerTicks(); |
time = end_jiffies - start_jiffies; |
time = jiffies_to_msecs(time); |
if (time > 0) { |
i = ((n * size) >> 10) / time; |
printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d" |
" in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10, |
sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024); |
} |
out_cleanup: |
|
dbgprintf("cleanup\n"); |
|
if (sobj) { |
r = radeon_bo_reserve(sobj, false); |
if (likely(r == 0)) { |
167,11 → 164,9 |
} |
radeon_bo_unref(&dobj); |
} |
if (fence) { |
radeon_fence_unref(&fence); |
} |
|
if (r) { |
printk(KERN_WARNING "Error while benchmarking BO move.\n"); |
DRM_ERROR("Error while benchmarking BO move.\n"); |
} |
|
LEAVE(); |
178,12 → 173,86 |
|
} |
|
void radeon_benchmark(struct radeon_device *rdev) |
void radeon_benchmark(struct radeon_device *rdev, int test_number) |
{ |
radeon_benchmark_move(rdev, 4096*4096, RADEON_GEM_DOMAIN_GTT, |
int i; |
int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = { |
640 * 480 * 4, |
720 * 480 * 4, |
800 * 600 * 4, |
848 * 480 * 4, |
1024 * 768 * 4, |
1152 * 768 * 4, |
1280 * 720 * 4, |
1280 * 800 * 4, |
1280 * 854 * 4, |
1280 * 960 * 4, |
1280 * 1024 * 4, |
1440 * 900 * 4, |
1400 * 1050 * 4, |
1680 * 1050 * 4, |
1600 * 1200 * 4, |
1920 * 1080 * 4, |
1920 * 1200 * 4 |
}; |
|
switch (test_number) { |
case 1: |
/* simple test, VRAM to GTT and GTT to VRAM */ |
radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT, |
RADEON_GEM_DOMAIN_VRAM); |
radeon_benchmark_move(rdev, 4096*4096, RADEON_GEM_DOMAIN_VRAM, |
radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, |
RADEON_GEM_DOMAIN_GTT); |
radeon_benchmark_move(rdev, 4096*4096, RADEON_GEM_DOMAIN_VRAM, |
break; |
case 2: |
/* simple test, VRAM to VRAM */ |
radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, |
RADEON_GEM_DOMAIN_VRAM); |
break; |
case 3: |
/* GTT to VRAM, buffer size sweep, powers of 2 */ |
for (i = 1; i <= 16384; i <<= 1) |
radeon_benchmark_move(rdev, i * RADEON_GPU_PAGE_SIZE, |
RADEON_GEM_DOMAIN_GTT, |
RADEON_GEM_DOMAIN_VRAM); |
break; |
case 4: |
/* VRAM to GTT, buffer size sweep, powers of 2 */ |
for (i = 1; i <= 16384; i <<= 1) |
radeon_benchmark_move(rdev, i * RADEON_GPU_PAGE_SIZE, |
RADEON_GEM_DOMAIN_VRAM, |
RADEON_GEM_DOMAIN_GTT); |
break; |
case 5: |
/* VRAM to VRAM, buffer size sweep, powers of 2 */ |
for (i = 1; i <= 16384; i <<= 1) |
radeon_benchmark_move(rdev, i * RADEON_GPU_PAGE_SIZE, |
RADEON_GEM_DOMAIN_VRAM, |
RADEON_GEM_DOMAIN_VRAM); |
break; |
case 6: |
/* GTT to VRAM, buffer size sweep, common modes */ |
for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) |
radeon_benchmark_move(rdev, common_modes[i], |
RADEON_GEM_DOMAIN_GTT, |
RADEON_GEM_DOMAIN_VRAM); |
break; |
case 7: |
/* VRAM to GTT, buffer size sweep, common modes */ |
for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) |
radeon_benchmark_move(rdev, common_modes[i], |
RADEON_GEM_DOMAIN_VRAM, |
RADEON_GEM_DOMAIN_GTT); |
break; |
case 8: |
/* VRAM to VRAM, buffer size sweep, common modes */ |
for (i = 0; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) |
radeon_benchmark_move(rdev, common_modes[i], |
RADEON_GEM_DOMAIN_VRAM, |
RADEON_GEM_DOMAIN_VRAM); |
break; |
|
default: |
DRM_ERROR("Unknown benchmark\n"); |
} |
} |