0,0 → 1,559 |
/* |
* Copyright (c) 2011 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
|
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
|
#include "sna.h" |
#include "sna_reg.h" |
|
#define DBG_NO_HW 0 |
#define DBG_NO_TILING 1 |
#define DBG_NO_CACHE 0 |
#define DBG_NO_CACHE_LEVEL 0 |
#define DBG_NO_CPU 0 |
#define DBG_NO_USERPTR 0 |
#define DBG_NO_LLC 0 |
#define DBG_NO_SEMAPHORES 0 |
#define DBG_NO_MADV 0 |
#define DBG_NO_UPLOAD_CACHE 0 |
#define DBG_NO_UPLOAD_ACTIVE 0 |
#define DBG_NO_MAP_UPLOAD 0 |
#define DBG_NO_RELAXED_FENCING 0 |
#define DBG_NO_SECURE_BATCHES 0 |
#define DBG_NO_PINNED_BATCHES 0 |
#define DBG_NO_FAST_RELOC 0 |
#define DBG_NO_HANDLE_LUT 0 |
#define DBG_DUMP 0 |
|
#define MAX_GTT_VMA_CACHE 512 |
#define MAX_CPU_VMA_CACHE INT16_MAX |
#define MAP_PRESERVE_TIME 10 |
|
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) |
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) |
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3)) |
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) |
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3) |
|
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) |
|
#define LOCAL_I915_PARAM_HAS_BLT 11 |
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 |
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 |
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 |
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 |
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 |
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25 |
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 |
|
|
|
static int gem_param(struct kgem *kgem, int name) |
{ |
ioctl_t io; |
|
drm_i915_getparam_t gp; |
int v = -1; /* No param uses the sign bit, reserve it for errors */ |
|
VG_CLEAR(gp); |
gp.param = name; |
gp.value = &v; |
|
io.handle = kgem->fd; |
io.io_code = SRV_GET_PARAM; |
io.input = &gp; |
io.inp_size = sizeof(gp); |
io.output = NULL; |
io.out_size = 0; |
|
if (call_service(&io)!=0) |
return -1; |
|
VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); |
return v; |
} |
|
static bool test_has_no_reloc(struct kgem *kgem) |
{ |
if (DBG_NO_FAST_RELOC) |
return false; |
|
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; |
} |
|
static bool test_has_handle_lut(struct kgem *kgem) |
{ |
if (DBG_NO_HANDLE_LUT) |
return false; |
|
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; |
} |
|
static bool test_has_semaphores_enabled(struct kgem *kgem) |
{ |
FILE *file; |
bool detected = false; |
int ret; |
|
if (DBG_NO_SEMAPHORES) |
return false; |
|
ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES); |
if (ret != -1) |
return ret > 0; |
|
return detected; |
} |
|
|
static bool test_has_relaxed_fencing(struct kgem *kgem) |
{ |
if (kgem->gen < 040) { |
if (DBG_NO_RELAXED_FENCING) |
return false; |
|
return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0; |
} else |
return true; |
} |
|
static bool test_has_llc(struct kgem *kgem) |
{ |
int has_llc = -1; |
|
if (DBG_NO_LLC) |
return false; |
|
#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */ |
has_llc = gem_param(kgem, I915_PARAM_HAS_LLC); |
#endif |
if (has_llc == -1) { |
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); |
has_llc = kgem->gen >= 060; |
} |
|
return has_llc; |
} |
|
static bool test_has_cacheing(struct kgem *kgem) |
{ |
uint32_t handle; |
bool ret = false; |
|
if (DBG_NO_CACHE_LEVEL) |
return false; |
|
/* Incoherent blt and sampler hangs the GPU */ |
if (kgem->gen == 040) |
return false; |
|
// handle = gem_create(kgem->fd, 1); |
// if (handle == 0) |
// return false; |
|
// ret = gem_set_cacheing(kgem->fd, handle, UNCACHED); |
// gem_close(kgem->fd, handle); |
return ret; |
} |
|
static bool test_has_userptr(struct kgem *kgem) |
{ |
#if defined(USE_USERPTR) |
uint32_t handle; |
void *ptr; |
|
if (DBG_NO_USERPTR) |
return false; |
|
/* Incoherent blt and sampler hangs the GPU */ |
if (kgem->gen == 040) |
return false; |
|
ptr = malloc(PAGE_SIZE); |
handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); |
gem_close(kgem->fd, handle); |
free(ptr); |
|
return handle != 0; |
#else |
return false; |
#endif |
} |
|
static bool test_has_secure_batches(struct kgem *kgem) |
{ |
if (DBG_NO_SECURE_BATCHES) |
return false; |
|
return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; |
} |
|
static bool test_has_pinned_batches(struct kgem *kgem) |
{ |
if (DBG_NO_PINNED_BATCHES) |
return false; |
|
return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; |
} |
|
|
|
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) |
{ |
struct drm_i915_gem_get_aperture aperture; |
size_t totalram; |
unsigned half_gpu_max; |
unsigned int i, j; |
|
DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); |
|
memset(kgem, 0, sizeof(*kgem)); |
|
kgem->fd = fd; |
kgem->gen = gen; |
|
list_init(&kgem->requests[0]); |
list_init(&kgem->requests[1]); |
list_init(&kgem->batch_buffers); |
list_init(&kgem->active_buffers); |
list_init(&kgem->flushing); |
list_init(&kgem->large); |
list_init(&kgem->large_inactive); |
list_init(&kgem->snoop); |
list_init(&kgem->scanout); |
for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++) |
list_init(&kgem->pinned_batches[i]); |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) |
list_init(&kgem->inactive[i]); |
for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { |
for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) |
list_init(&kgem->active[i][j]); |
} |
for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { |
for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) |
list_init(&kgem->vma[i].inactive[j]); |
} |
|
kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; |
kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; |
|
kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; |
DBG(("%s: has BLT ring? %d\n", __FUNCTION__, |
kgem->has_blt)); |
|
kgem->has_relaxed_delta = |
gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; |
DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, |
kgem->has_relaxed_delta)); |
|
|
kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); |
DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, |
kgem->has_relaxed_fencing)); |
|
kgem->has_llc = test_has_llc(kgem); |
DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, |
kgem->has_llc)); |
|
kgem->has_cacheing = test_has_cacheing(kgem); |
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, |
kgem->has_cacheing)); |
|
kgem->has_userptr = test_has_userptr(kgem); |
DBG(("%s: has userptr? %d\n", __FUNCTION__, |
kgem->has_userptr)); |
|
kgem->has_no_reloc = test_has_no_reloc(kgem); |
DBG(("%s: has no-reloc? %d\n", __FUNCTION__, |
kgem->has_no_reloc)); |
|
kgem->has_handle_lut = test_has_handle_lut(kgem); |
DBG(("%s: has handle-lut? %d\n", __FUNCTION__, |
kgem->has_handle_lut)); |
|
kgem->has_semaphores = false; |
if (kgem->has_blt && test_has_semaphores_enabled(kgem)) |
kgem->has_semaphores = true; |
DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, |
kgem->has_semaphores)); |
|
kgem->can_blt_cpu = gen >= 030; |
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, |
kgem->can_blt_cpu)); |
|
kgem->has_secure_batches = test_has_secure_batches(kgem); |
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, |
kgem->has_secure_batches)); |
|
kgem->has_pinned_batches = test_has_pinned_batches(kgem); |
DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, |
kgem->has_pinned_batches)); |
|
#if 0 |
|
if (!is_hw_supported(kgem, dev)) { |
xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, |
"Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); |
kgem->wedged = 1; |
} else if (__kgem_throttle(kgem)) { |
xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, |
"Detected a hung GPU, disabling acceleration.\n"); |
kgem->wedged = 1; |
} |
|
kgem->batch_size = ARRAY_SIZE(kgem->batch); |
if (gen == 020 && !kgem->has_pinned_batches) |
/* Limited to what we can pin */ |
kgem->batch_size = 4*1024; |
if (gen == 022) |
/* 865g cannot handle a batch spanning multiple pages */ |
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); |
if ((gen >> 3) == 7) |
kgem->batch_size = 16*1024; |
if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) |
kgem->batch_size = 4*1024; |
|
if (!kgem_init_pinned_batches(kgem) && gen == 020) { |
xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING, |
"Unable to reserve memory for GPU, disabling acceleration.\n"); |
kgem->wedged = 1; |
} |
|
DBG(("%s: maximum batch size? %d\n", __FUNCTION__, |
kgem->batch_size)); |
|
kgem->min_alignment = 4; |
if (gen < 040) |
kgem->min_alignment = 64; |
|
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; |
DBG(("%s: half cpu cache %d pages\n", __FUNCTION__, |
kgem->half_cpu_cache_pages)); |
|
kgem->next_request = __kgem_request_alloc(kgem); |
|
DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, |
!DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing), |
kgem->has_llc, kgem->has_cacheing, kgem->has_userptr)); |
|
VG_CLEAR(aperture); |
aperture.aper_size = 0; |
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); |
if (aperture.aper_size == 0) |
aperture.aper_size = 64*1024*1024; |
|
DBG(("%s: aperture size %lld, available now %lld\n", |
__FUNCTION__, |
(long long)aperture.aper_size, |
(long long)aperture.aper_available_size)); |
|
kgem->aperture_total = aperture.aper_size; |
kgem->aperture_high = aperture.aper_size * 3/4; |
kgem->aperture_low = aperture.aper_size * 1/3; |
if (gen < 033) { |
/* Severe alignment penalties */ |
kgem->aperture_high /= 2; |
kgem->aperture_low /= 2; |
} |
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, |
kgem->aperture_low, kgem->aperture_low / (1024*1024), |
kgem->aperture_high, kgem->aperture_high / (1024*1024))); |
|
kgem->aperture_mappable = agp_aperture_size(dev, gen); |
if (kgem->aperture_mappable == 0 || |
kgem->aperture_mappable > aperture.aper_size) |
kgem->aperture_mappable = aperture.aper_size; |
DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, |
kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); |
|
kgem->buffer_size = 64 * 1024; |
while (kgem->buffer_size < kgem->aperture_mappable >> 10) |
kgem->buffer_size *= 2; |
if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) |
kgem->buffer_size = kgem->half_cpu_cache_pages << 12; |
DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, |
kgem->buffer_size, kgem->buffer_size / 1024)); |
|
kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; |
kgem->max_gpu_size = kgem->max_object_size; |
if (!kgem->has_llc) |
kgem->max_gpu_size = MAX_CACHE_SIZE; |
|
totalram = total_ram_size(); |
if (totalram == 0) { |
DBG(("%s: total ram size unknown, assuming maximum of total aperture\n", |
__FUNCTION__)); |
totalram = kgem->aperture_total; |
} |
DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); |
if (kgem->max_object_size > totalram / 2) |
kgem->max_object_size = totalram / 2; |
if (kgem->max_gpu_size > totalram / 4) |
kgem->max_gpu_size = totalram / 4; |
|
kgem->max_cpu_size = kgem->max_object_size; |
|
half_gpu_max = kgem->max_gpu_size / 2; |
kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; |
if (kgem->max_copy_tile_size > half_gpu_max) |
kgem->max_copy_tile_size = half_gpu_max; |
|
if (kgem->has_llc) |
kgem->max_upload_tile_size = kgem->max_copy_tile_size; |
else |
kgem->max_upload_tile_size = kgem->aperture_mappable / 4; |
if (kgem->max_upload_tile_size > half_gpu_max) |
kgem->max_upload_tile_size = half_gpu_max; |
|
kgem->large_object_size = MAX_CACHE_SIZE; |
if (kgem->large_object_size > kgem->max_gpu_size) |
kgem->large_object_size = kgem->max_gpu_size; |
|
if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) { |
if (kgem->large_object_size > kgem->max_cpu_size) |
kgem->large_object_size = kgem->max_cpu_size; |
} else |
kgem->max_cpu_size = 0; |
if (DBG_NO_CPU) |
kgem->max_cpu_size = 0; |
|
DBG(("%s: maximum object size=%d\n", |
__FUNCTION__, kgem->max_object_size)); |
DBG(("%s: large object thresold=%d\n", |
__FUNCTION__, kgem->large_object_size)); |
DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", |
__FUNCTION__, |
kgem->max_gpu_size, kgem->max_cpu_size, |
kgem->max_upload_tile_size, kgem->max_copy_tile_size)); |
|
/* Convert the aperture thresholds to pages */ |
kgem->aperture_low /= PAGE_SIZE; |
kgem->aperture_high /= PAGE_SIZE; |
|
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; |
if ((int)kgem->fence_max < 0) |
kgem->fence_max = 5; /* minimum safe value for all hw */ |
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); |
|
kgem->batch_flags_base = 0; |
if (kgem->has_no_reloc) |
kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC; |
if (kgem->has_handle_lut) |
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; |
if (kgem->has_pinned_batches) |
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; |
|
#endif |
|
} |
|
|
|
void kgem_clear_dirty(struct kgem *kgem) |
{ |
struct list * const buffers = &kgem->next_request->buffers; |
struct kgem_bo *bo; |
|
list_for_each_entry(bo, buffers, request) { |
if (!bo->dirty) |
break; |
|
bo->dirty = false; |
} |
} |
|
|
|
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) |
{ |
struct kgem_bo_binding *b; |
|
for (b = &bo->binding; b && b->offset; b = b->next) |
if (format == b->format) |
return b->offset; |
|
return 0; |
} |
|
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) |
{ |
struct kgem_bo_binding *b; |
|
for (b = &bo->binding; b; b = b->next) { |
if (b->offset) |
continue; |
|
b->offset = offset; |
b->format = format; |
|
if (b->next) |
b->next->offset = 0; |
|
return; |
} |
|
b = malloc(sizeof(*b)); |
if (b) { |
b->next = bo->binding.next; |
b->format = format; |
b->offset = offset; |
bo->binding.next = b; |
} |
} |
|
uint32_t kgem_add_reloc(struct kgem *kgem, |
uint32_t pos, |
struct kgem_bo *bo, |
uint32_t read_write_domain, |
uint32_t delta) |
{ |
return 0; |
} |
|
void kgem_reset(struct kgem *kgem) |
{ |
|
}; |
|
void _kgem_submit(struct kgem *kgem) |
{ |
}; |
|
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) |
{ |
struct kgem_bo *bo = NULL; |
|
return bo; |
}; |
|
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) |
{ |
|
|
}; |