/drivers/video/Intel-2D/gen6_render.c |
---|
1149,6 → 1149,9 |
int id = 1 << GEN6_VERTEX(op->u.gen6.flags); |
int ndwords; |
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) |
return true; |
ndwords = op->need_magic_ca_pass ? 60 : 6; |
if ((sna->render.vb_id & id) == 0) |
ndwords += 5; |
1165,6 → 1168,12 |
static int gen6_get_rectangles__flush(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
/* Preventing discarding new vbo after lock contention */ |
if (sna_vertex_wait__locked(&sna->render)) { |
int rem = vertex_space(sna); |
if (rem > op->floats_per_rect) |
return rem; |
} |
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) |
return 0; |
1218,7 → 1227,7 |
gen4_vertex_flush(sna); |
gen6_magic_ca_pass(sna, op); |
} |
// sna_vertex_wait__locked(&sna->render); |
sna_vertex_wait__locked(&sna->render); |
_kgem_submit(&sna->kgem); |
emit_state(sna, op); |
goto start; |
2014,8 → 2023,18 |
} |
tmp->done = gen6_render_composite_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) |
goto cleanup_mask; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_composite_state(sna, tmp); |
gen6_align_vertex(sna, tmp); |
return true; |
2654,6 → 2673,13 |
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); |
assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); |
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) |
goto fallback; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_copy_state(sna, &op->base); |
gen6_align_vertex(sna, &op->base); |
3149,8 → 3175,14 |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return false; |
} |
} |
gen6_emit_fill_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
3173,6 → 3205,7 |
return true; |
} |
#endif |
static void gen6_render_flush(struct sna *sna) |
{ |
3182,8 → 3215,18 |
assert(sna->render.vertex_offset == 0); |
} |
#endif |
static void |
gen6_render_context_switch(struct kgem *kgem, |
int new_mode) |
{ |
if (kgem->nbatch) { |
DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); |
_kgem_submit(kgem); |
} |
kgem->ring = new_mode; |
} |
static void |
gen6_render_retire(struct kgem *kgem) |
{ |
3200,7 → 3243,24 |
} |
} |
static void |
gen6_render_expire(struct kgem *kgem) |
{ |
struct sna *sna; |
sna = container_of(kgem, struct sna, kgem); |
if (sna->render.vbo && !sna->render.vertex_used) { |
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); |
kgem_bo_destroy(kgem, sna->render.vbo); |
assert(!sna->render.active); |
sna->render.vbo = NULL; |
sna->render.vertices = sna->render.vertex_data; |
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
sna->render.vertex_used = 0; |
sna->render.vertex_index = 0; |
} |
} |
static void gen6_render_reset(struct sna *sna) |
{ |
sna->render_state.gen6.needs_invariant = true; |
3319,8 → 3379,9 |
if (!gen6_render_setup(sna)) |
return false; |
// sna->kgem.context_switch = gen6_render_context_switch; |
sna->kgem.context_switch = gen6_render_context_switch; |
sna->kgem.retire = gen6_render_retire; |
sna->kgem.expire = gen6_render_expire; |
// sna->render.composite = gen6_render_composite; |
// sna->render.video = gen6_render_video; |
3445,3 → 3506,90 |
return sna->render.vertex_size - sna->render.vertex_used; |
} |
void gen4_vertex_close(struct sna *sna) |
{ |
struct kgem_bo *bo, *free_bo = NULL; |
unsigned int i, delta = 0; |
assert(sna->render.vertex_offset == 0); |
if (!sna->render.vb_id) |
return; |
DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n", |
__FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0, |
sna->render.vb_id, sna->render.nvertex_reloc)); |
assert(!sna->render.active); |
bo = sna->render.vbo; |
if (bo) { |
if (sna->render.vertex_size - sna->render.vertex_used < 64) { |
DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); |
sna->render.vbo = NULL; |
sna->render.vertices = sna->render.vertex_data; |
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
free_bo = bo; |
} else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) { |
DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); |
sna->render.vertices = |
kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); |
if (sna->render.vertices == NULL) { |
sna->render.vbo = NULL; |
sna->render.vertices = sna->render.vertex_data; |
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
free_bo = bo; |
} |
} |
} else { |
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { |
DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, |
sna->render.vertex_used, sna->kgem.nbatch)); |
memcpy(sna->kgem.batch + sna->kgem.nbatch, |
sna->render.vertex_data, |
sna->render.vertex_used * 4); |
delta = sna->kgem.nbatch * 4; |
bo = NULL; |
sna->kgem.nbatch += sna->render.vertex_used; |
} else { |
bo = kgem_create_linear(&sna->kgem, |
4*sna->render.vertex_used, |
CREATE_NO_THROTTLE); |
if (bo && !kgem_bo_write(&sna->kgem, bo, |
sna->render.vertex_data, |
4*sna->render.vertex_used)) { |
kgem_bo_destroy(&sna->kgem, bo); |
bo = NULL; |
} |
DBG(("%s: new vbo: %d\n", __FUNCTION__, |
sna->render.vertex_used)); |
free_bo = bo; |
} |
} |
assert(sna->render.nvertex_reloc); |
for (i = 0; i < sna->render.nvertex_reloc; i++) { |
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, |
i, sna->render.vertex_reloc[i])); |
sna->kgem.batch[sna->render.vertex_reloc[i]] = |
kgem_add_reloc(&sna->kgem, |
sna->render.vertex_reloc[i], bo, |
I915_GEM_DOMAIN_VERTEX << 16, |
delta); |
} |
sna->render.nvertex_reloc = 0; |
sna->render.vb_id = 0; |
if (sna->render.vbo == NULL) { |
assert(!sna->render.active); |
sna->render.vertex_used = 0; |
sna->render.vertex_index = 0; |
assert(sna->render.vertices == sna->render.vertex_data); |
assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); |
} |
if (free_bo) |
kgem_bo_destroy(&sna->kgem, free_bo); |
} |
/drivers/video/Intel-2D/i915_drm.h |
---|
218,13 → 218,13 |
#define DRM_IOCTL_I915_HWS_ADDR |
#define DRM_IOCTL_I915_GEM_INIT |
#define DRM_IOCTL_I915_GEM_EXECBUFFER |
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 |
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 SRV_I915_GEM_EXECBUFFER2 |
#define DRM_IOCTL_I915_GEM_PIN SRV_I915_GEM_PIN |
#define DRM_IOCTL_I915_GEM_UNPIN |
#define DRM_IOCTL_I915_GEM_BUSY SRV_I915_GEM_BUSY |
#define DRM_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHEING |
#define DRM_IOCTL_I915_GEM_GET_CACHEING |
#define DRM_IOCTL_I915_GEM_THROTTLE |
#define DRM_IOCTL_I915_GEM_THROTTLE SRV_I915_GEM_THROTTLE |
#define DRM_IOCTL_I915_GEM_ENTERVT |
#define DRM_IOCTL_I915_GEM_LEAVEVT |
#define DRM_IOCTL_I915_GEM_CREATE SRV_I915_GEM_CREATE |
/drivers/video/Intel-2D/kgem-sna.c |
---|
108,6 → 108,15 |
#define LOCAL_I915_EXEC_IS_PINNED (1<<10) |
#define LOCAL_I915_EXEC_NO_RELOC (1<<11) |
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) |
struct local_i915_gem_userptr { |
uint64_t user_ptr; |
uint32_t user_size; |
uint32_t flags; |
#define I915_USERPTR_READ_ONLY (1<<0) |
#define I915_USERPTR_UNSYNCHRONIZED (1<<31) |
uint32_t handle; |
}; |
#define UNCACHED 0 |
#define SNOOPED 1 |
118,6 → 127,13 |
#define LOCAL_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHEING |
struct local_fbinfo { |
int width; |
int height; |
int pitch; |
int tiling; |
}; |
struct kgem_buffer { |
struct kgem_bo base; |
void *mem; |
189,7 → 205,7 |
ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); |
} while (ret == -1 && (errno == EINTR || errno == EAGAIN)); |
*/ |
return ret == 0; |
return false;//ret == 0; |
} |
static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing) |
260,19 → 276,19 |
retry_mmap: |
// ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED, |
// kgem->fd, mmap_arg.offset); |
if (ptr == 0) { |
// if (ptr == 0) { |
printf("%s: failed to mmap %d, %d bytes, into GTT domain: %d\n", |
__FUNCTION__, bo->handle, bytes(bo), 0); |
if (__kgem_throttle_retire(kgem, 0)) |
goto retry_mmap; |
// if (__kgem_throttle_retire(kgem, 0)) |
// goto retry_mmap; |
if (kgem->need_expire) { |
kgem_cleanup_cache(kgem); |
goto retry_mmap; |
} |
// if (kgem->need_expire) { |
// kgem_cleanup_cache(kgem); |
// goto retry_mmap; |
// } |
ptr = NULL; |
} |
// } |
return ptr; |
} |
639,10 → 655,10 |
static bool __kgem_throttle(struct kgem *kgem) |
{ |
// if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) |
return false; |
// return errno == EIO; |
return errno == EIO; |
} |
static bool is_hw_supported(struct kgem *kgem, |
1073,9 → 1089,140 |
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; |
if (kgem->has_pinned_batches) |
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; |
} |
/* XXX hopefully a good approximation */ |
static uint32_t kgem_get_unique_id(struct kgem *kgem) |
{ |
uint32_t id; |
id = ++kgem->unique_id; |
if (id == 0) |
id = ++kgem->unique_id; |
return id; |
} |
inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) |
{ |
if (flags & CREATE_PRIME) |
return 256; |
if (flags & CREATE_SCANOUT) |
return 64; |
return kgem->min_alignment; |
} |
static uint32_t kgem_untiled_pitch(struct kgem *kgem, |
uint32_t width, uint32_t bpp, |
unsigned flags) |
{ |
width = ALIGN(width, 2) * bpp >> 3; |
return ALIGN(width, kgem_pitch_alignment(kgem, flags)); |
} |
static uint32_t kgem_surface_size(struct kgem *kgem, |
bool relaxed_fencing, |
unsigned flags, |
uint32_t width, |
uint32_t height, |
uint32_t bpp, |
uint32_t tiling, |
uint32_t *pitch) |
{ |
uint32_t tile_width, tile_height; |
uint32_t size; |
assert(width <= MAXSHORT); |
assert(height <= MAXSHORT); |
if (kgem->gen <= 030) { |
if (tiling) { |
if (kgem->gen < 030) { |
tile_width = 128; |
tile_height = 32; |
} else { |
tile_width = 512; |
tile_height = 16; |
} |
} else { |
tile_width = 2 * bpp >> 3; |
tile_width = ALIGN(tile_width, |
kgem_pitch_alignment(kgem, flags)); |
tile_height = 2; |
} |
} else switch (tiling) { |
default: |
case I915_TILING_NONE: |
tile_width = 2 * bpp >> 3; |
tile_width = ALIGN(tile_width, |
kgem_pitch_alignment(kgem, flags)); |
tile_height = 2; |
break; |
/* XXX align to an even tile row */ |
case I915_TILING_X: |
tile_width = 512; |
tile_height = 16; |
break; |
case I915_TILING_Y: |
tile_width = 128; |
tile_height = 64; |
break; |
} |
*pitch = ALIGN(width * bpp / 8, tile_width); |
height = ALIGN(height, tile_height); |
if (kgem->gen >= 040) |
return PAGE_ALIGN(*pitch * height); |
/* If it is too wide for the blitter, don't even bother. */ |
if (tiling != I915_TILING_NONE) { |
if (*pitch > 8192) |
return 0; |
for (size = tile_width; size < *pitch; size <<= 1) |
; |
*pitch = size; |
} else { |
if (*pitch >= 32768) |
return 0; |
} |
size = *pitch * height; |
if (relaxed_fencing || tiling == I915_TILING_NONE) |
return PAGE_ALIGN(size); |
/* We need to allocate a pot fence region for a tiled buffer. */ |
if (kgem->gen < 030) |
tile_width = 512 * 1024; |
else |
tile_width = 1024 * 1024; |
while (tile_width < size) |
tile_width *= 2; |
return tile_width; |
} |
static uint32_t kgem_aligned_height(struct kgem *kgem, |
uint32_t height, uint32_t tiling) |
{ |
uint32_t tile_height; |
if (kgem->gen <= 030) { |
tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1; |
} else switch (tiling) { |
/* XXX align to an even tile row */ |
default: |
case I915_TILING_NONE: |
tile_height = 1; |
break; |
case I915_TILING_X: |
tile_height = 16; |
break; |
case I915_TILING_Y: |
tile_height = 64; |
break; |
} |
return ALIGN(height, tile_height); |
} |
static struct drm_i915_gem_exec_object2 * |
kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) |
{ |
1763,12 → 1910,28 |
return retired; |
} |
bool __kgem_ring_is_idle(struct kgem *kgem, int ring) |
{ |
struct kgem_request *rq; |
assert(!list_is_empty(&kgem->requests[ring])); |
rq = list_last_entry(&kgem->requests[ring], |
struct kgem_request, list); |
if (__kgem_busy(kgem, rq->bo->handle)) { |
DBG(("%s: last requests handle=%d still busy\n", |
__FUNCTION__, rq->bo->handle)); |
return false; |
} |
DBG(("%s: ring=%d idle (handle=%d)\n", |
__FUNCTION__, ring, rq->bo->handle)); |
kgem_retire__requests_ring(kgem, ring); |
assert(list_is_empty(&kgem->requests[ring])); |
return true; |
} |
static void kgem_commit(struct kgem *kgem) |
{ |
struct kgem_request *rq = kgem->next_request; |
2328,15 → 2491,15 |
// ret = drmIoctl(kgem->fd, |
// DRM_IOCTL_I915_GEM_EXECBUFFER2, |
// &execbuf); |
// while (ret == -1 && errno == EBUSY && retry--) { |
// __kgem_throttle(kgem); |
// ret = drmIoctl(kgem->fd, |
// DRM_IOCTL_I915_GEM_EXECBUFFER2, |
// &execbuf); |
// } |
ret = drmIoctl(kgem->fd, |
DRM_IOCTL_I915_GEM_EXECBUFFER2, |
&execbuf); |
while (ret == -1 && errno == EBUSY && retry--) { |
__kgem_throttle(kgem); |
ret = drmIoctl(kgem->fd, |
DRM_IOCTL_I915_GEM_EXECBUFFER2, |
&execbuf); |
} |
if (DEBUG_SYNC && ret == 0) { |
struct drm_i915_gem_set_domain set_domain; |
2898,8 → 3061,6 |
return size; |
} |
#if 0 |
struct kgem_bo *kgem_create_2d(struct kgem *kgem, |
int width, |
int height, |
3379,6 → 3540,7 |
return bo; |
} |
#if 0 |
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, |
int width, |
int height, |
3497,21 → 3659,80 |
__kgem_bo_destroy(kgem, bo); |
} |
void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) |
{ |
assert(bo->rq); |
assert(bo->exec == NULL); |
assert(bo->needs_flush); |
/* The kernel will emit a flush *and* update its own flushing lists. */ |
if (!__kgem_busy(kgem, bo->handle)) |
__kgem_bo_clear_busy(bo); |
DBG(("%s: handle=%d, busy?=%d\n", |
__FUNCTION__, bo->handle, bo->rq != NULL)); |
} |
inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) |
{ |
return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring; |
} |
bool kgem_check_bo(struct kgem *kgem, ...) |
{ |
va_list ap; |
struct kgem_bo *bo; |
int num_exec = 0; |
int num_pages = 0; |
bool flush = false; |
va_start(ap, kgem); |
while ((bo = va_arg(ap, struct kgem_bo *))) { |
while (bo->proxy) |
bo = bo->proxy; |
if (bo->exec) |
continue; |
if (needs_semaphore(kgem, bo)) |
return false; |
num_pages += num_pages(bo); |
num_exec++; |
flush |= bo->flush; |
} |
va_end(ap); |
DBG(("%s: num_pages=+%d, num_exec=+%d\n", |
__FUNCTION__, num_pages, num_exec)); |
if (!num_pages) |
return true; |
if (kgem_flush(kgem, flush)) |
return false; |
if (kgem->aperture > kgem->aperture_low && |
kgem_ring_is_idle(kgem, kgem->ring)) { |
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n", |
__FUNCTION__, kgem->aperture, kgem->aperture_low)); |
return false; |
} |
if (num_pages + kgem->aperture > kgem->aperture_high) { |
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n", |
__FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high)); |
return false; |
} |
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) { |
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__, |
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem))); |
return false; |
} |
return true; |
} |
3532,6 → 3753,16 |
uint32_t kgem_add_reloc(struct kgem *kgem, |
uint32_t pos, |
struct kgem_bo *bo, |
3545,6 → 3776,9 |
assert((read_write_domain & 0x7fff) == 0 || bo != NULL); |
// if( bo != NULL && bo->handle == -1) |
// return 0; |
index = kgem->nreloc++; |
assert(index < ARRAY_SIZE(kgem->reloc)); |
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); |
3851,6 → 4085,42 |
} |
} |
struct kgem_bo *kgem_create_proxy(struct kgem *kgem, |
struct kgem_bo *target, |
int offset, int length) |
{ |
struct kgem_bo *bo; |
DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n", |
__FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1, |
offset, length, target->io)); |
bo = __kgem_bo_alloc(target->handle, length); |
if (bo == NULL) |
return NULL; |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->reusable = false; |
bo->size.bytes = length; |
bo->io = target->io && target->proxy == NULL; |
bo->dirty = target->dirty; |
bo->tiling = target->tiling; |
bo->pitch = target->pitch; |
assert(!bo->scanout); |
bo->proxy = kgem_bo_reference(target); |
bo->delta = offset; |
if (target->exec) { |
list_move_tail(&bo->request, &kgem->next_request->buffers); |
bo->exec = &_kgem_dummy_exec; |
} |
bo->rq = target->rq; |
return bo; |
} |
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) |
{ |
struct kgem_bo_binding *b; |
3889,5 → 4159,37 |
} |
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb) |
{ |
struct kgem_bo *bo; |
size_t size; |
int ret; |
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb); |
if( ret != 0 ) |
return 0; |
size = fb->pitch * fb->height / PAGE_SIZE; |
bo = __kgem_bo_alloc(-2, size); |
if (!bo) { |
return 0; |
} |
bo->domain = DOMAIN_GTT; |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->pitch = fb->pitch; |
bo->tiling = I915_TILING_NONE; |
bo->scanout = 1; |
fb->fb_bo = bo; |
printf("fb width %d height %d pitch %d bo %p\n", |
fb->width, fb->height, fb->pitch, fb->fb_bo); |
return 1; |
}; |
/drivers/video/Intel-2D/kgem.h |
---|
28,7 → 28,7 |
#ifndef KGEM_H |
#define KGEM_H |
#define HAS_DEBUG_FULL 1 |
#define HAS_DEBUG_FULL 0 |
#include <stdint.h> |
#include <stdbool.h> |
/drivers/video/Intel-2D/sna.c |
---|
5,6 → 5,10 |
#include "sna.h" |
#include <pixlib2.h> |
static struct sna_fb sna_fb; |
typedef struct __attribute__((packed)) |
{ |
unsigned handle; |
64,7 → 68,7 |
// render->clear = no_render_clear; |
render->reset = no_render_reset; |
render->flush = no_render_flush; |
// render->flush = no_render_flush; |
// render->fini = no_render_fini; |
// sna->kgem.context_switch = no_render_context_switch; |
129,26 → 133,11 |
// return false; |
sna_device = sna; |
#if 0 |
{ |
struct kgem_bo *screen_bo; |
bitmap_t screen; |
screen.pitch = 1024*4; |
screen.gaddr = 0; |
screen.width = 1024; |
screen.height = 768; |
screen.obj = (void*)-1; |
screen_bo = create_bo(&screen); |
sna->render.clear(sna, &screen, screen_bo); |
return kgem_init_fb(&sna->kgem, &sna_fb); |
} |
#endif |
return true; |
} |
int sna_init(uint32_t service) |
{ |
ioctl_t io; |
339,37 → 328,69 |
#endif |
int sna_blit_copy(uint32_t dst_bitmap, int dst_x, int dst_y, |
int w, int h, uint32_t src_bitmap, int src_x, int src_y) |
int sna_blit_copy(bitmap_t *src_bitmap, int dst_x, int dst_y, |
int w, int h, int src_x, int src_y) |
{ |
struct sna_copy_op copy; |
struct kgem_bo src_bo, dst_bo; |
struct _Pixmap src, dst; |
struct kgem_bo *src_bo; |
memset(&src_bo, 0, sizeof(src_bo)); |
memset(&dst_bo, 0, sizeof(dst_bo)); |
memset(&src, 0, sizeof(src)); |
memset(&dst, 0, sizeof(dst)); |
// src_bo.gaddr = src_bitmap->gaddr; |
// src_bo.pitch = src_bitmap->pitch; |
// src_bo.tiling = 0; |
src.drawable.bitsPerPixel = 32; |
src.drawable.width = src_bitmap->width; |
src.drawable.height = src_bitmap->height; |
// dst_bo.gaddr = dst_bitmap->gaddr; |
// dst_bo.pitch = dst_bitmap->pitch; |
// dst_bo.tiling = 0; |
dst.drawable.bitsPerPixel = 32; |
dst.drawable.width = sna_fb.width; |
dst.drawable.height = sna_fb.height; |
memset(©, 0, sizeof(copy)); |
sna_device->render.copy(sna_device, GXcopy, NULL, &src_bo, NULL, &dst_bo, ©); |
src_bo = (struct kgem_bo*)src_bitmap->handle; |
if( sna_device->render.copy(sna_device, GXcopy, |
&src, src_bo, |
&dst, sna_fb.fb_bo, ©) ) |
{ |
copy.blt(sna_device, ©, src_x, src_y, w, h, dst_x, dst_y); |
copy.done(sna_device, ©); |
} |
kgem_submit(&sna_device->kgem); |
// __asm__ __volatile__("int3"); |
// _kgem_submit(&sna_device->kgem, &execbuffer); |
}; |
int sna_create_bitmap(bitmap_t *bitmap) |
{ |
struct kgem_bo *bo; |
bo = kgem_create_2d(&sna_device->kgem, bitmap->width, bitmap->height, |
32,I915_TILING_NONE, CREATE_CPU_MAP); |
if(bo == NULL) |
goto err_1; |
void *map = kgem_bo_map(&sna_device->kgem, bo); |
if(map == NULL) |
goto err_2; |
bitmap->handle = (uint32_t)bo; |
bitmap->pitch = bo->pitch; |
bitmap->data = map; |
return 0; |
err_2: |
kgem_bo_destroy(&sna_device->kgem, bo); |
err_1: |
return -1; |
}; |
/* |
int sna_blit_tex(bitmap_t *dst_bitmap, int dst_x, int dst_y, |
/drivers/video/Intel-2D/sna.h |
---|
44,8 → 44,8 |
#include <stdint.h> |
#include <memory.h> |
#include <malloc.h> |
#include <errno.h> |
#include "intel_driver.h" |
#include "pciaccess.h" |
69,13 → 69,15 |
#define SRV_I915_GEM_GET_APERTURE 26 |
#define SRV_I915_GEM_PWRITE 27 |
#define SRV_I915_GEM_BUSY 28 |
#define SRV_I915_GEM_SET_DOMAIN 29 |
#define SRV_I915_GEM_MMAP 30 |
#define SRV_I915_GEM_THROTTLE 32 |
#define SRV_FBINFO 33 |
#define SRV_I915_GEM_EXECBUFFER2 34 |
#define SRV_I915_GEM_MMAP_GTT 31 |
#define DRM_IOCTL_GEM_CLOSE SRV_DRM_GEM_CLOSE |
#define PIXMAN_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) | \ |
153,7 → 155,16 |
} PixmapRec; |
struct sna_fb |
{ |
uint32_t width; |
uint32_t height; |
uint32_t pitch; |
uint32_t tiling; |
struct kgem_bo *fb_bo; |
}; |
struct pixman_box16 |
{ |
int16_t x1, y1, x2, y2; |
/drivers/video/Intel-2D/sna_render.h |
---|
471,7 → 471,6 |
struct kgem_bo *sna_static_stream_fini(struct sna *sna, |
struct sna_static_stream *stream); |
/* |
struct kgem_bo * |
sna_render_get_solid(struct sna *sna, |
uint32_t color); |
479,9 → 478,6 |
void |
sna_render_flush_solid(struct sna *sna); |
struct kgem_bo * |
sna_render_get_gradient(struct sna *sna, |
PictGradient *pattern); |
uint32_t sna_rgba_for_color(uint32_t color, int depth); |
uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); |
493,8 → 489,6 |
uint32_t format); |
bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); |
*/ |
void no_render_init(struct sna *sna); |
bool gen2_render_init(struct sna *sna); |
683,8 → 677,37 |
sna_composite_mask_is_opaque(PicturePtr mask); |
#endif |
void sna_vertex_init(struct sna *sna); |
static inline void sna_vertex_lock(struct sna_render *r) |
{ |
// pthread_mutex_lock(&r->lock); |
} |
static inline void sna_vertex_acquire__locked(struct sna_render *r) |
{ |
r->active++; |
} |
static inline void sna_vertex_unlock(struct sna_render *r) |
{ |
// pthread_mutex_unlock(&r->lock); |
} |
static inline void sna_vertex_release__locked(struct sna_render *r) |
{ |
assert(r->active > 0); |
--r->active; |
// if (--r->active == 0) |
// pthread_cond_signal(&r->wait); |
} |
static inline bool sna_vertex_wait__locked(struct sna_render *r) |
{ |
bool was_active = r->active; |
// while (r->active) |
// pthread_cond_wait(&r->wait, &r->lock); |
return was_active; |
} |
#endif /* SNA_RENDER_H */ |
/drivers/video/Intel-2D/utils.c |
---|
0,0 → 1,150 |
#include <stdint.h> |
#include <stdio.h> |
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, |
unsigned int *ecx, unsigned int *edx) |
{ |
/* ecx is often an input as well as an output. */ |
asm volatile("cpuid" |
: "=a" (*eax), |
"=b" (*ebx), |
"=c" (*ecx), |
"=d" (*edx) |
: "0" (*eax), "2" (*ecx) |
: "memory"); |
} |
/* Some CPUID calls want 'count' to be placed in ecx */ |
static inline void cpuid_count(unsigned int op, int count, |
unsigned int *eax, unsigned int *ebx, |
unsigned int *ecx, unsigned int *edx) |
{ |
*eax = op; |
*ecx = count; |
native_cpuid(eax, ebx, ecx, edx); |
} |
enum _cache_type { |
CACHE_TYPE_NULL = 0, |
CACHE_TYPE_DATA = 1, |
CACHE_TYPE_INST = 2, |
CACHE_TYPE_UNIFIED = 3 |
}; |
union _cpuid4_leaf_eax { |
struct { |
enum _cache_type type:5; |
unsigned int level:3; |
unsigned int is_self_initializing:1; |
unsigned int is_fully_associative:1; |
unsigned int reserved:4; |
unsigned int num_threads_sharing:12; |
unsigned int num_cores_on_die:6; |
} split; |
uint32_t full; |
}; |
union _cpuid4_leaf_ebx { |
struct { |
unsigned int coherency_line_size:12; |
unsigned int physical_line_partition:10; |
unsigned int ways_of_associativity:10; |
} split; |
uint32_t full; |
}; |
union _cpuid4_leaf_ecx { |
struct { |
unsigned int number_of_sets:32; |
} split; |
uint32_t full; |
}; |
struct _cpuid4_info_regs { |
union _cpuid4_leaf_eax eax; |
union _cpuid4_leaf_ebx ebx; |
union _cpuid4_leaf_ecx ecx; |
unsigned long size; |
}; |
static int |
cpuid4_cache_lookup_regs(int index, |
struct _cpuid4_info_regs *this_leaf) |
{ |
union _cpuid4_leaf_eax eax; |
union _cpuid4_leaf_ebx ebx; |
union _cpuid4_leaf_ecx ecx; |
unsigned edx; |
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
if (eax.split.type == CACHE_TYPE_NULL) |
return -1; /* better error ? */ |
this_leaf->eax = eax; |
this_leaf->ebx = ebx; |
this_leaf->ecx = ecx; |
this_leaf->size = (ecx.split.number_of_sets + 1) * |
(ebx.split.coherency_line_size + 1) * |
(ebx.split.physical_line_partition + 1) * |
(ebx.split.ways_of_associativity + 1); |
return 0; |
} |
static int find_num_cache_leaves() |
{ |
unsigned int eax, ebx, ecx, edx, op; |
union _cpuid4_leaf_eax cache_eax; |
int i = -1; |
do { |
++i; |
/* Do cpuid(op) loop to find out num_cache_leaves */ |
cpuid_count(4, i, &eax, &ebx, &ecx, &edx); |
cache_eax.full = eax; |
} while (cache_eax.split.type != CACHE_TYPE_NULL); |
return i; |
}; |
unsigned int cpu_cache_size() |
{ |
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
unsigned int num_cache_leaves; |
num_cache_leaves = find_num_cache_leaves(); |
for (i = 0; i < num_cache_leaves; i++) |
{ |
struct _cpuid4_info_regs this_leaf; |
int retval; |
retval = cpuid4_cache_lookup_regs(i, &this_leaf); |
if (retval >= 0) { |
switch (this_leaf.eax.split.level) |
{ |
case 1: |
if (this_leaf.eax.split.type == CACHE_TYPE_DATA) |
new_l1d = this_leaf.size; |
else if (this_leaf.eax.split.type == CACHE_TYPE_INST) |
new_l1i = this_leaf.size; |
break; |
case 2: |
new_l2 = this_leaf.size; |
break; |
case 3: |
new_l3 = this_leaf.size; |
break; |
default: |
break; |
} |
} |
} |
printf("l2 cache %d l3 cache %d\n", new_l2, new_l3); |
return new_l3 != 0 ? new_l3 : new_l2; |
}; |
/drivers/video/drm/i915/i915_dma.c |
---|
1547,6 → 1547,7 |
return 0; |
} |
#endif |
int i915_driver_open(struct drm_device *dev, struct drm_file *file) |
{ |
1553,6 → 1554,8 |
struct drm_i915_file_private *file_priv; |
DRM_DEBUG_DRIVER("\n"); |
ENTER(); |
file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL); |
if (!file_priv) |
return -ENOMEM; |
1564,9 → 1567,11 |
idr_init(&file_priv->context_idr); |
LEAVE(); |
return 0; |
} |
#if 0 |
/** |
* i915_driver_lastclose - clean up after all DRM clients have exited |
* @dev: DRM device |
/drivers/video/drm/i915/i915_drv.c |
---|
492,7 → 492,7 |
// DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME, |
// .load = i915_driver_load, |
// .unload = i915_driver_unload, |
// .open = i915_driver_open, |
.open = i915_driver_open, |
// .lastclose = i915_driver_lastclose, |
// .preclose = i915_driver_preclose, |
// .postclose = i915_driver_postclose, |
577,6 → 577,12 |
dev->driver = &driver; |
if (dev->driver->open) { |
ret = dev->driver->open(dev, priv); |
if (ret < 0) |
goto err_g4; |
} |
ret = i915_driver_load(dev, ent->driver_data ); |
if (ret) |
/drivers/video/drm/i915/i915_gem.c |
---|
37,6 → 37,10 |
extern int x86_clflush_size; |
#define PROT_READ 0x1 /* page can be read */ |
#define PROT_WRITE 0x2 /* page can be written */ |
#define MAP_SHARED 0x01 /* Share changes */ |
#undef mb |
#undef rmb |
#undef wmb |
44,6 → 48,10 |
#define rmb() asm volatile ("lfence") |
#define wmb() asm volatile ("sfence") |
unsigned long vm_mmap(struct file *file, unsigned long addr, |
unsigned long len, unsigned long prot, |
unsigned long flag, unsigned long offset); |
static inline void clflush(volatile void *__p) |
{ |
asm volatile("clflush %0" : "+m" (*(volatile char*)__p)); |
1296,8 → 1304,8 |
if (obj == NULL) |
return -ENOENT; |
dbgprintf("%s offset %lld size %lld not supported\n", |
args->offset, args->size); |
dbgprintf("%s offset %lld size %lld\n", |
__FUNCTION__, args->offset, args->size); |
/* prime objects have no backing filp to GEM mmap |
* pages from. |
*/ |
1306,17 → 1314,16 |
return -EINVAL; |
} |
// addr = vm_mmap(obj->filp, 0, args->size, |
// PROT_READ | PROT_WRITE, MAP_SHARED, |
// args->offset); |
addr = vm_mmap(obj->filp, 0, args->size, |
PROT_READ | PROT_WRITE, MAP_SHARED, |
args->offset); |
drm_gem_object_unreference_unlocked(obj); |
// if (IS_ERR((void *)addr)) |
// return addr; |
if (IS_ERR((void *)addr)) |
return addr; |
args->addr_ptr = (uint64_t) addr; |
return -EINVAL; |
// return 0; |
return 0; |
} |
1444,8 → 1451,8 |
// i915_gem_object_free_mmap_offset(obj); |
// if (obj->base.filp == NULL) |
// return; |
if (obj->base.filp == NULL) |
return; |
/* Our goal here is to return as much of the memory as |
* is possible back to the system as we are called from OOM. |
1491,7 → 1498,7 |
page_cache_release(page); |
// page_cache_release(page); |
} |
//DRM_DEBUG_KMS("%s release %d pages\n", __FUNCTION__, page_count); |
obj->dirty = 0; |
1784,7 → 1791,17 |
list_add_tail(&request->list, &ring->request_list); |
request->file_priv = NULL; |
if (file) { |
struct drm_i915_file_private *file_priv = file->driver_priv; |
spin_lock(&file_priv->mm.lock); |
request->file_priv = file_priv; |
list_add_tail(&request->client_list, |
&file_priv->mm.request_list); |
spin_unlock(&file_priv->mm.lock); |
} |
trace_i915_gem_request_add(ring, request->seqno); |
ring->outstanding_lazy_request = 0; |
if (!dev_priv->mm.suspended) { |
1805,8 → 1822,21 |
return 0; |
} |
static inline void |
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) |
{ |
struct drm_i915_file_private *file_priv = request->file_priv; |
if (!file_priv) |
return; |
spin_lock(&file_priv->mm.lock); |
if (request->file_priv) { |
list_del(&request->client_list); |
request->file_priv = NULL; |
} |
spin_unlock(&file_priv->mm.lock); |
} |
static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, |
struct intel_ring_buffer *ring) |
1819,7 → 1849,7 |
list); |
list_del(&request->list); |
// i915_gem_request_remove_from_client(request); |
i915_gem_request_remove_from_client(request); |
kfree(request); |
} |
1887,6 → 1917,8 |
{ |
uint32_t seqno; |
ENTER(); |
if (list_empty(&ring->request_list)) |
return; |
1913,6 → 1945,7 |
ring->last_retired_head = request->tail; |
list_del(&request->list); |
i915_gem_request_remove_from_client(request); |
kfree(request); |
} |
1939,6 → 1972,7 |
} |
WARN_ON(i915_verify_lists(ring->dev)); |
LEAVE(); |
} |
void |
1961,6 → 1995,8 |
bool idle; |
int i; |
ENTER(); |
dev_priv = container_of(work, drm_i915_private_t, |
mm.retire_work.work); |
dev = dev_priv->dev; |
1990,6 → 2026,8 |
intel_mark_idle(dev); |
mutex_unlock(&dev->struct_mutex); |
LEAVE(); |
} |
/** |
2127,6 → 2165,9 |
drm_i915_private_t *dev_priv = obj->base.dev->dev_private; |
int ret = 0; |
if(obj == get_fb_obj()) |
return 0; |
if (obj->gtt_space == NULL) |
return 0; |
3105,7 → 3146,6 |
return 0; |
} |
#if 0 |
/* Throttle our rendering by waiting until the ring has completed our requests |
* emitted over 20 msec ago. |
* |
3121,7 → 3161,7 |
{ |
struct drm_i915_private *dev_priv = dev->dev_private; |
struct drm_i915_file_private *file_priv = file->driver_priv; |
unsigned long recent_enough = GetTimerTics() - msecs_to_jiffies(20); |
unsigned long recent_enough = GetTimerTicks() - msecs_to_jiffies(20); |
struct drm_i915_gem_request *request; |
struct intel_ring_buffer *ring = NULL; |
u32 seqno = 0; |
3149,7 → 3189,6 |
return ret; |
} |
#endif |
int |
i915_gem_object_pin(struct drm_i915_gem_object *obj, |
3162,7 → 3201,6 |
if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) |
return -EBUSY; |
#if 0 |
if (obj->gtt_space != NULL) { |
if ((alignment && obj->gtt_offset & (alignment - 1)) || |
(map_and_fenceable && !obj->map_and_fenceable)) { |
3178,7 → 3216,6 |
return ret; |
} |
} |
#endif |
if (obj->gtt_space == NULL) { |
struct drm_i915_private *dev_priv = obj->base.dev->dev_private; |
3342,7 → 3379,6 |
return ret; |
} |
#if 0 |
int |
i915_gem_throttle_ioctl(struct drm_device *dev, void *data, |
struct drm_file *file_priv) |
3350,6 → 3386,8 |
return i915_gem_ring_throttle(dev, file_priv); |
} |
#if 0 |
int |
i915_gem_madvise_ioctl(struct drm_device *dev, void *data, |
struct drm_file *file_priv) |
3545,7 → 3583,7 |
mutex_unlock(&dev->struct_mutex); |
/* Cancel the retire work handler, which should be idle now. */ |
// cancel_delayed_work_sync(&dev_priv->mm.retire_work); |
cancel_delayed_work_sync(&dev_priv->mm.retire_work); |
return 0; |
} |
/drivers/video/drm/i915/i915_gem_execbuffer.c |
---|
0,0 → 1,1171 |
/* |
* Copyright © 2008,2010 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
* IN THE SOFTWARE. |
* |
* Authors: |
* Eric Anholt <eric@anholt.net> |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#define iowrite32(v, addr) writel((v), (addr)) |
#include <drm/drmP.h> |
#include <drm/i915_drm.h> |
#include "i915_drv.h" |
#include "i915_trace.h" |
#include "intel_drv.h" |
//#include <linux/dma_remapping.h> |
#define I915_EXEC_SECURE (1<<9) |
#define I915_EXEC_IS_PINNED (1<<10) |
#define wmb() asm volatile ("sfence") |
struct drm_i915_gem_object *get_fb_obj(); |
static inline __attribute__((const)) |
bool is_power_of_2(unsigned long n) |
{ |
return (n != 0 && ((n & (n - 1)) == 0)); |
} |
static unsigned long |
copy_to_user(void __user *to, const void *from, unsigned long n) |
{ |
memcpy(to, from, n); |
return 0; |
} |
static unsigned long |
copy_from_user(void *to, const void __user *from, unsigned long n) |
{ |
memcpy(to, from, n); |
return 0; |
} |
struct eb_objects { |
int and; |
struct hlist_head buckets[0]; |
}; |
static struct eb_objects * |
eb_create(int size) |
{ |
struct eb_objects *eb; |
int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; |
BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); |
while (count > size) |
count >>= 1; |
eb = kzalloc(count*sizeof(struct hlist_head) + |
sizeof(struct eb_objects), |
GFP_KERNEL); |
if (eb == NULL) |
return eb; |
eb->and = count - 1; |
return eb; |
} |
static void |
eb_reset(struct eb_objects *eb) |
{ |
memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); |
} |
static void |
eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) |
{ |
hlist_add_head(&obj->exec_node, |
&eb->buckets[obj->exec_handle & eb->and]); |
} |
static struct drm_i915_gem_object * |
eb_get_object(struct eb_objects *eb, unsigned long handle) |
{ |
struct hlist_head *head; |
struct hlist_node *node; |
struct drm_i915_gem_object *obj; |
head = &eb->buckets[handle & eb->and]; |
hlist_for_each(node, head) { |
obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); |
if (obj->exec_handle == handle) |
return obj; |
} |
return NULL; |
} |
static void |
eb_destroy(struct eb_objects *eb) |
{ |
kfree(eb); |
} |
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) |
{ |
return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || |
!obj->map_and_fenceable || |
obj->cache_level != I915_CACHE_NONE); |
} |
static int |
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, |
struct eb_objects *eb, |
struct drm_i915_gem_relocation_entry *reloc) |
{ |
struct drm_device *dev = obj->base.dev; |
struct drm_gem_object *target_obj; |
struct drm_i915_gem_object *target_i915_obj; |
uint32_t target_offset; |
int ret = -EINVAL; |
/* we've already hold a reference to all valid objects */ |
target_obj = &eb_get_object(eb, reloc->target_handle)->base; |
if (unlikely(target_obj == NULL)) |
return -ENOENT; |
target_i915_obj = to_intel_bo(target_obj); |
target_offset = target_i915_obj->gtt_offset; |
/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and |
* pipe_control writes because the gpu doesn't properly redirect them |
* through the ppgtt for non_secure batchbuffers. */ |
if (unlikely(IS_GEN6(dev) && |
reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && |
!target_i915_obj->has_global_gtt_mapping)) { |
i915_gem_gtt_bind_object(target_i915_obj, |
target_i915_obj->cache_level); |
} |
/* Validate that the target is in a valid r/w GPU domain */ |
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { |
DRM_DEBUG("reloc with multiple write domains: " |
"obj %p target %d offset %d " |
"read %08x write %08x", |
obj, reloc->target_handle, |
(int) reloc->offset, |
reloc->read_domains, |
reloc->write_domain); |
return ret; |
} |
if (unlikely((reloc->write_domain | reloc->read_domains) |
& ~I915_GEM_GPU_DOMAINS)) { |
DRM_DEBUG("reloc with read/write non-GPU domains: " |
"obj %p target %d offset %d " |
"read %08x write %08x", |
obj, reloc->target_handle, |
(int) reloc->offset, |
reloc->read_domains, |
reloc->write_domain); |
return ret; |
} |
if (unlikely(reloc->write_domain && target_obj->pending_write_domain && |
reloc->write_domain != target_obj->pending_write_domain)) { |
DRM_DEBUG("Write domain conflict: " |
"obj %p target %d offset %d " |
"new %08x old %08x\n", |
obj, reloc->target_handle, |
(int) reloc->offset, |
reloc->write_domain, |
target_obj->pending_write_domain); |
return ret; |
} |
target_obj->pending_read_domains |= reloc->read_domains; |
target_obj->pending_write_domain |= reloc->write_domain; |
/* If the relocation already has the right value in it, no |
* more work needs to be done. |
*/ |
if (target_offset == reloc->presumed_offset) |
return 0; |
/* Check that the relocation address is valid... */ |
if (unlikely(reloc->offset > obj->base.size - 4)) { |
DRM_DEBUG("Relocation beyond object bounds: " |
"obj %p target %d offset %d size %d.\n", |
obj, reloc->target_handle, |
(int) reloc->offset, |
(int) obj->base.size); |
return ret; |
} |
if (unlikely(reloc->offset & 3)) { |
DRM_DEBUG("Relocation not 4-byte aligned: " |
"obj %p target %d offset %d.\n", |
obj, reloc->target_handle, |
(int) reloc->offset); |
return ret; |
} |
/* We can't wait for rendering with pagefaults disabled */ |
// if (obj->active && in_atomic()) |
// return -EFAULT; |
reloc->delta += target_offset; |
if (use_cpu_reloc(obj)) { |
uint32_t page_offset = reloc->offset & ~PAGE_MASK; |
char *vaddr; |
ret = i915_gem_object_set_to_cpu_domain(obj, 1); |
if (ret) |
return ret; |
vaddr = (char *)MapIoMem((addr_t)i915_gem_object_get_page(obj, |
reloc->offset >> PAGE_SHIFT), 4096, 3); |
*(uint32_t *)(vaddr + page_offset) = reloc->delta; |
FreeKernelSpace(vaddr); |
} else { |
struct drm_i915_private *dev_priv = dev->dev_private; |
uint32_t __iomem *reloc_entry; |
void __iomem *reloc_page; |
ret = i915_gem_object_set_to_gtt_domain(obj, true); |
if (ret) |
return ret; |
ret = i915_gem_object_put_fence(obj); |
if (ret) |
return ret; |
/* Map the page containing the relocation we're going to perform. */ |
reloc->offset += obj->gtt_offset; |
reloc_page = (void*)MapIoMem(reloc->offset & PAGE_MASK, 4096, 3); |
reloc_entry = (uint32_t __iomem *) |
(reloc_page + (reloc->offset & ~PAGE_MASK)); |
iowrite32(reloc->delta, reloc_entry); |
FreeKernelSpace(reloc_page); |
} |
/* and update the user's relocation entry */ |
reloc->presumed_offset = target_offset; |
return 0; |
} |
static int |
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, |
struct eb_objects *eb) |
{ |
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) |
struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; |
struct drm_i915_gem_relocation_entry __user *user_relocs; |
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; |
int remain, ret; |
user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; |
remain = entry->relocation_count; |
while (remain) { |
struct drm_i915_gem_relocation_entry *r = stack_reloc; |
int count = remain; |
if (count > ARRAY_SIZE(stack_reloc)) |
count = ARRAY_SIZE(stack_reloc); |
remain -= count; |
memcpy(r, user_relocs, count*sizeof(r[0])); |
do { |
u64 offset = r->presumed_offset; |
ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); |
if (ret) |
return ret; |
memcpy(&user_relocs->presumed_offset, |
&r->presumed_offset, |
sizeof(r->presumed_offset)); |
user_relocs++; |
r++; |
} while (--count); |
} |
return 0; |
#undef N_RELOC |
} |
static int |
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, |
struct eb_objects *eb, |
struct drm_i915_gem_relocation_entry *relocs) |
{ |
const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; |
int i, ret; |
for (i = 0; i < entry->relocation_count; i++) { |
ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); |
if (ret) |
return ret; |
} |
return 0; |
} |
static int |
i915_gem_execbuffer_relocate(struct drm_device *dev, |
struct eb_objects *eb, |
struct list_head *objects) |
{ |
struct drm_i915_gem_object *obj; |
int ret = 0; |
/* This is the fast path and we cannot handle a pagefault whilst |
* holding the struct mutex lest the user pass in the relocations |
* contained within a mmaped bo. For in such a case we, the page |
* fault handler would call i915_gem_fault() and we would try to |
* acquire the struct mutex again. Obviously this is bad and so |
* lockdep complains vehemently. |
*/ |
// pagefault_disable(); |
list_for_each_entry(obj, objects, exec_list) { |
ret = i915_gem_execbuffer_relocate_object(obj, eb); |
if (ret) |
break; |
} |
// pagefault_enable(); |
return ret; |
} |
#define __EXEC_OBJECT_HAS_PIN (1<<31) |
#define __EXEC_OBJECT_HAS_FENCE (1<<30) |
static int |
need_reloc_mappable(struct drm_i915_gem_object *obj) |
{ |
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; |
return entry->relocation_count && !use_cpu_reloc(obj); |
} |
static int |
i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, |
struct intel_ring_buffer *ring) |
{ |
struct drm_i915_private *dev_priv = obj->base.dev->dev_private; |
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; |
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
bool need_fence, need_mappable; |
int ret; |
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(obj); |
ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); |
if (ret) |
return ret; |
entry->flags |= __EXEC_OBJECT_HAS_PIN; |
if (has_fenced_gpu_access) { |
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { |
ret = i915_gem_object_get_fence(obj); |
if (ret) |
return ret; |
if (i915_gem_object_pin_fence(obj)) |
entry->flags |= __EXEC_OBJECT_HAS_FENCE; |
obj->pending_fenced_gpu_access = true; |
} |
} |
/* Ensure ppgtt mapping exists if needed */ |
if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { |
i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, |
obj, obj->cache_level); |
obj->has_aliasing_ppgtt_mapping = 1; |
} |
entry->offset = obj->gtt_offset; |
return 0; |
} |
static void |
i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) |
{ |
struct drm_i915_gem_exec_object2 *entry; |
if (!obj->gtt_space) |
return; |
entry = obj->exec_entry; |
if (entry->flags & __EXEC_OBJECT_HAS_FENCE) |
i915_gem_object_unpin_fence(obj); |
if (entry->flags & __EXEC_OBJECT_HAS_PIN) |
i915_gem_object_unpin(obj); |
entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); |
} |
static int |
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, |
struct drm_file *file, |
struct list_head *objects) |
{ |
struct drm_i915_gem_object *obj; |
struct list_head ordered_objects; |
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
int retry; |
INIT_LIST_HEAD(&ordered_objects); |
while (!list_empty(objects)) { |
struct drm_i915_gem_exec_object2 *entry; |
bool need_fence, need_mappable; |
obj = list_first_entry(objects, |
struct drm_i915_gem_object, |
exec_list); |
entry = obj->exec_entry; |
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(obj); |
if (need_mappable) |
list_move(&obj->exec_list, &ordered_objects); |
else |
list_move_tail(&obj->exec_list, &ordered_objects); |
obj->base.pending_read_domains = 0; |
obj->base.pending_write_domain = 0; |
obj->pending_fenced_gpu_access = false; |
} |
list_splice(&ordered_objects, objects); |
/* Attempt to pin all of the buffers into the GTT. |
* This is done in 3 phases: |
* |
* 1a. Unbind all objects that do not match the GTT constraints for |
* the execbuffer (fenceable, mappable, alignment etc). |
* 1b. Increment pin count for already bound objects. |
* 2. Bind new objects. |
* 3. Decrement pin count. |
* |
* This avoid unnecessary unbinding of later objects in order to make |
* room for the earlier objects *unless* we need to defragment. |
*/ |
retry = 0; |
do { |
int ret = 0; |
/* Unbind any ill-fitting objects or pin. */ |
list_for_each_entry(obj, objects, exec_list) { |
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; |
bool need_fence, need_mappable; |
if (!obj->gtt_space) |
continue; |
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(obj); |
if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || |
(need_mappable && !obj->map_and_fenceable)) |
ret = i915_gem_object_unbind(obj); |
else |
ret = i915_gem_execbuffer_reserve_object(obj, ring); |
if (ret) |
goto err; |
} |
/* Bind fresh objects */ |
list_for_each_entry(obj, objects, exec_list) { |
if (obj->gtt_space) |
continue; |
ret = i915_gem_execbuffer_reserve_object(obj, ring); |
if (ret) |
goto err; |
} |
err: /* Decrement pin count for bound objects */ |
list_for_each_entry(obj, objects, exec_list) |
i915_gem_execbuffer_unreserve_object(obj); |
if (ret != -ENOSPC || retry++) |
return ret; |
// ret = i915_gem_evict_everything(ring->dev); |
if (ret) |
return ret; |
} while (1); |
} |
static int |
i915_gem_execbuffer_relocate_slow(struct drm_device *dev, |
struct drm_file *file, |
struct intel_ring_buffer *ring, |
struct list_head *objects, |
struct eb_objects *eb, |
struct drm_i915_gem_exec_object2 *exec, |
int count) |
{ |
struct drm_i915_gem_relocation_entry *reloc; |
struct drm_i915_gem_object *obj; |
int *reloc_offset; |
int i, total, ret; |
/* We may process another execbuffer during the unlock... */ |
while (!list_empty(objects)) { |
obj = list_first_entry(objects, |
struct drm_i915_gem_object, |
exec_list); |
list_del_init(&obj->exec_list); |
drm_gem_object_unreference(&obj->base); |
} |
mutex_unlock(&dev->struct_mutex); |
total = 0; |
for (i = 0; i < count; i++) |
total += exec[i].relocation_count; |
reloc_offset = malloc(count * sizeof(*reloc_offset)); |
reloc = malloc(total * sizeof(*reloc)); |
if (reloc == NULL || reloc_offset == NULL) { |
free(reloc); |
free(reloc_offset); |
mutex_lock(&dev->struct_mutex); |
return -ENOMEM; |
} |
total = 0; |
for (i = 0; i < count; i++) { |
struct drm_i915_gem_relocation_entry __user *user_relocs; |
u64 invalid_offset = (u64)-1; |
int j; |
user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr; |
if (copy_from_user(reloc+total, user_relocs, |
exec[i].relocation_count * sizeof(*reloc))) { |
ret = -EFAULT; |
mutex_lock(&dev->struct_mutex); |
goto err; |
} |
/* As we do not update the known relocation offsets after |
* relocating (due to the complexities in lock handling), |
* we need to mark them as invalid now so that we force the |
* relocation processing next time. Just in case the target |
* object is evicted and then rebound into its old |
* presumed_offset before the next execbuffer - if that |
* happened we would make the mistake of assuming that the |
* relocations were valid. |
*/ |
for (j = 0; j < exec[i].relocation_count; j++) { |
if (copy_to_user(&user_relocs[j].presumed_offset, |
&invalid_offset, |
sizeof(invalid_offset))) { |
ret = -EFAULT; |
mutex_lock(&dev->struct_mutex); |
goto err; |
} |
} |
reloc_offset[i] = total; |
total += exec[i].relocation_count; |
} |
ret = i915_mutex_lock_interruptible(dev); |
if (ret) { |
mutex_lock(&dev->struct_mutex); |
goto err; |
} |
/* reacquire the objects */ |
eb_reset(eb); |
for (i = 0; i < count; i++) { |
if(exec[i].handle == -2) |
obj = get_fb_obj(); |
else |
obj = to_intel_bo(drm_gem_object_lookup(dev, file, |
exec[i].handle)); |
if (&obj->base == NULL) { |
DRM_DEBUG("Invalid object handle %d at index %d\n", |
exec[i].handle, i); |
ret = -ENOENT; |
goto err; |
} |
list_add_tail(&obj->exec_list, objects); |
obj->exec_handle = exec[i].handle; |
obj->exec_entry = &exec[i]; |
eb_add_object(eb, obj); |
} |
ret = i915_gem_execbuffer_reserve(ring, file, objects); |
if (ret) |
goto err; |
list_for_each_entry(obj, objects, exec_list) { |
int offset = obj->exec_entry - exec; |
ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, |
reloc + reloc_offset[offset]); |
if (ret) |
goto err; |
} |
/* Leave the user relocations as are, this is the painfully slow path, |
* and we want to avoid the complication of dropping the lock whilst |
* having buffers reserved in the aperture and so causing spurious |
* ENOSPC for random operations. |
*/ |
err: |
free(reloc); |
free(reloc_offset); |
return ret; |
} |
static int |
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) |
{ |
u32 plane, flip_mask; |
int ret; |
/* Check for any pending flips. As we only maintain a flip queue depth |
* of 1, we can simply insert a WAIT for the next display flip prior |
* to executing the batch and avoid stalling the CPU. |
*/ |
for (plane = 0; flips >> plane; plane++) { |
if (((flips >> plane) & 1) == 0) |
continue; |
if (plane) |
flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; |
else |
flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; |
ret = intel_ring_begin(ring, 2); |
if (ret) |
return ret; |
intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); |
intel_ring_emit(ring, MI_NOOP); |
intel_ring_advance(ring); |
} |
return 0; |
} |
static int |
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, |
struct list_head *objects) |
{ |
struct drm_i915_gem_object *obj; |
uint32_t flush_domains = 0; |
uint32_t flips = 0; |
int ret; |
list_for_each_entry(obj, objects, exec_list) { |
ret = i915_gem_object_sync(obj, ring); |
if (ret) |
return ret; |
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) |
i915_gem_clflush_object(obj); |
if (obj->base.pending_write_domain) |
flips |= atomic_read(&obj->pending_flip); |
flush_domains |= obj->base.write_domain; |
} |
if (flips) { |
ret = i915_gem_execbuffer_wait_for_flips(ring, flips); |
if (ret) |
return ret; |
} |
if (flush_domains & I915_GEM_DOMAIN_CPU) |
i915_gem_chipset_flush(ring->dev); |
if (flush_domains & I915_GEM_DOMAIN_GTT) |
wmb(); |
/* Unconditionally invalidate gpu caches and ensure that we do flush |
* any residual writes from the previous batch. |
*/ |
return intel_ring_invalidate_all_caches(ring); |
} |
static bool |
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) |
{ |
return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; |
} |
static int |
validate_exec_list(struct drm_i915_gem_exec_object2 *exec, |
int count) |
{ |
int i; |
for (i = 0; i < count; i++) { |
char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; |
int length; /* limited by fault_in_pages_readable() */ |
/* First check for malicious input causing overflow */ |
if (exec[i].relocation_count > |
INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) |
return -EINVAL; |
length = exec[i].relocation_count * |
sizeof(struct drm_i915_gem_relocation_entry); |
// if (!access_ok(VERIFY_READ, ptr, length)) |
// return -EFAULT; |
/* we may also need to update the presumed offsets */ |
// if (!access_ok(VERIFY_WRITE, ptr, length)) |
// return -EFAULT; |
// if (fault_in_multipages_readable(ptr, length)) |
// return -EFAULT; |
} |
return 0; |
} |
static void |
i915_gem_execbuffer_move_to_active(struct list_head *objects, |
struct intel_ring_buffer *ring) |
{ |
struct drm_i915_gem_object *obj; |
list_for_each_entry(obj, objects, exec_list) { |
u32 old_read = obj->base.read_domains; |
u32 old_write = obj->base.write_domain; |
obj->base.read_domains = obj->base.pending_read_domains; |
obj->base.write_domain = obj->base.pending_write_domain; |
obj->fenced_gpu_access = obj->pending_fenced_gpu_access; |
i915_gem_object_move_to_active(obj, ring); |
if (obj->base.write_domain) { |
obj->dirty = 1; |
obj->last_write_seqno = intel_ring_get_seqno(ring); |
if (obj->pin_count) /* check for potential scanout */ |
intel_mark_fb_busy(obj); |
} |
trace_i915_gem_object_change_domain(obj, old_read, old_write); |
} |
} |
static void |
i915_gem_execbuffer_retire_commands(struct drm_device *dev, |
struct drm_file *file, |
struct intel_ring_buffer *ring) |
{ |
/* Unconditionally force add_request to emit a full flush. */ |
ring->gpu_caches_dirty = true; |
/* Add a breadcrumb for the completion of the batch buffer */ |
(void)i915_add_request(ring, file, NULL); |
} |
static int |
i915_reset_gen7_sol_offsets(struct drm_device *dev, |
struct intel_ring_buffer *ring) |
{ |
drm_i915_private_t *dev_priv = dev->dev_private; |
int ret, i; |
if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) |
return 0; |
ret = intel_ring_begin(ring, 4 * 3); |
if (ret) |
return ret; |
for (i = 0; i < 4; i++) { |
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); |
intel_ring_emit(ring, 0); |
} |
intel_ring_advance(ring); |
return 0; |
} |
static int |
i915_gem_do_execbuffer(struct drm_device *dev, void *data, |
struct drm_file *file, |
struct drm_i915_gem_execbuffer2 *args, |
struct drm_i915_gem_exec_object2 *exec) |
{ |
drm_i915_private_t *dev_priv = dev->dev_private; |
struct list_head objects; |
struct eb_objects *eb; |
struct drm_i915_gem_object *batch_obj; |
struct drm_clip_rect *cliprects = NULL; |
struct intel_ring_buffer *ring; |
u32 ctx_id = i915_execbuffer2_get_context_id(*args); |
u32 exec_start, exec_len; |
u32 mask; |
u32 flags; |
int ret, mode, i; |
if (!i915_gem_check_execbuffer(args)) { |
DRM_DEBUG("execbuf with invalid offset/length\n"); |
return -EINVAL; |
} |
ret = validate_exec_list(exec, args->buffer_count); |
if (ret) |
return ret; |
flags = 0; |
if (args->flags & I915_EXEC_SECURE) { |
// if (!file->is_master || !capable(CAP_SYS_ADMIN)) |
// return -EPERM; |
flags |= I915_DISPATCH_SECURE; |
} |
if (args->flags & I915_EXEC_IS_PINNED) |
flags |= I915_DISPATCH_PINNED; |
switch (args->flags & I915_EXEC_RING_MASK) { |
case I915_EXEC_DEFAULT: |
case I915_EXEC_RENDER: |
ring = &dev_priv->ring[RCS]; |
break; |
case I915_EXEC_BSD: |
ring = &dev_priv->ring[VCS]; |
if (ctx_id != 0) { |
DRM_DEBUG("Ring %s doesn't support contexts\n", |
ring->name); |
return -EPERM; |
} |
break; |
case I915_EXEC_BLT: |
ring = &dev_priv->ring[BCS]; |
if (ctx_id != 0) { |
DRM_DEBUG("Ring %s doesn't support contexts\n", |
ring->name); |
return -EPERM; |
} |
break; |
default: |
DRM_DEBUG("execbuf with unknown ring: %d\n", |
(int)(args->flags & I915_EXEC_RING_MASK)); |
return -EINVAL; |
} |
if (!intel_ring_initialized(ring)) { |
DRM_DEBUG("execbuf with invalid ring: %d\n", |
(int)(args->flags & I915_EXEC_RING_MASK)); |
return -EINVAL; |
} |
mode = args->flags & I915_EXEC_CONSTANTS_MASK; |
mask = I915_EXEC_CONSTANTS_MASK; |
switch (mode) { |
case I915_EXEC_CONSTANTS_REL_GENERAL: |
case I915_EXEC_CONSTANTS_ABSOLUTE: |
case I915_EXEC_CONSTANTS_REL_SURFACE: |
if (ring == &dev_priv->ring[RCS] && |
mode != dev_priv->relative_constants_mode) { |
if (INTEL_INFO(dev)->gen < 4) |
return -EINVAL; |
if (INTEL_INFO(dev)->gen > 5 && |
mode == I915_EXEC_CONSTANTS_REL_SURFACE) |
return -EINVAL; |
/* The HW changed the meaning on this bit on gen6 */ |
if (INTEL_INFO(dev)->gen >= 6) |
mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; |
} |
break; |
default: |
DRM_DEBUG("execbuf with unknown constants: %d\n", mode); |
return -EINVAL; |
} |
if (args->buffer_count < 1) { |
DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); |
return -EINVAL; |
} |
if (args->num_cliprects != 0) { |
if (ring != &dev_priv->ring[RCS]) { |
DRM_DEBUG("clip rectangles are only valid with the render ring\n"); |
return -EINVAL; |
} |
if (INTEL_INFO(dev)->gen >= 5) { |
DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); |
return -EINVAL; |
} |
if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { |
DRM_DEBUG("execbuf with %u cliprects\n", |
args->num_cliprects); |
return -EINVAL; |
} |
cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), |
GFP_KERNEL); |
if (cliprects == NULL) { |
ret = -ENOMEM; |
goto pre_mutex_err; |
} |
if (copy_from_user(cliprects, |
(struct drm_clip_rect __user *)(uintptr_t) |
args->cliprects_ptr, |
sizeof(*cliprects)*args->num_cliprects)) { |
ret = -EFAULT; |
goto pre_mutex_err; |
} |
} |
ret = i915_mutex_lock_interruptible(dev); |
if (ret) |
goto pre_mutex_err; |
if (dev_priv->mm.suspended) { |
mutex_unlock(&dev->struct_mutex); |
ret = -EBUSY; |
goto pre_mutex_err; |
} |
eb = eb_create(args->buffer_count); |
if (eb == NULL) { |
mutex_unlock(&dev->struct_mutex); |
ret = -ENOMEM; |
goto pre_mutex_err; |
} |
/* Look up object handles */ |
INIT_LIST_HEAD(&objects); |
for (i = 0; i < args->buffer_count; i++) { |
struct drm_i915_gem_object *obj; |
if(exec[i].handle == -2) |
obj = get_fb_obj(); |
else |
obj = to_intel_bo(drm_gem_object_lookup(dev, file, |
exec[i].handle)); |
if (&obj->base == NULL) { |
DRM_DEBUG("Invalid object handle %d at index %d\n", |
exec[i].handle, i); |
/* prevent error path from reading uninitialized data */ |
ret = -ENOENT; |
goto err; |
} |
if (!list_empty(&obj->exec_list)) { |
DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", |
obj, exec[i].handle, i); |
ret = -EINVAL; |
goto err; |
} |
list_add_tail(&obj->exec_list, &objects); |
obj->exec_handle = exec[i].handle; |
obj->exec_entry = &exec[i]; |
eb_add_object(eb, obj); |
} |
/* take note of the batch buffer before we might reorder the lists */ |
batch_obj = list_entry(objects.prev, |
struct drm_i915_gem_object, |
exec_list); |
/* Move the objects en-masse into the GTT, evicting if necessary. */ |
ret = i915_gem_execbuffer_reserve(ring, file, &objects); |
if (ret) |
goto err; |
/* The objects are in their final locations, apply the relocations. */ |
ret = i915_gem_execbuffer_relocate(dev, eb, &objects); |
if (ret) { |
if (ret == -EFAULT) { |
ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, |
&objects, eb, |
exec, |
args->buffer_count); |
BUG_ON(!mutex_is_locked(&dev->struct_mutex)); |
} |
if (ret) |
goto err; |
} |
/* Set the pending read domains for the batch buffer to COMMAND */ |
if (batch_obj->base.pending_write_domain) { |
DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); |
ret = -EINVAL; |
goto err; |
} |
batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; |
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure |
* batch" bit. Hence we need to pin secure batches into the global gtt. |
* hsw should have this fixed, but let's be paranoid and do it |
* unconditionally for now. */ |
if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) |
i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); |
ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); |
if (ret) |
goto err; |
ret = i915_switch_context(ring, file, ctx_id); |
if (ret) |
goto err; |
if (ring == &dev_priv->ring[RCS] && |
mode != dev_priv->relative_constants_mode) { |
ret = intel_ring_begin(ring, 4); |
if (ret) |
goto err; |
intel_ring_emit(ring, MI_NOOP); |
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
intel_ring_emit(ring, INSTPM); |
intel_ring_emit(ring, mask << 16 | mode); |
intel_ring_advance(ring); |
dev_priv->relative_constants_mode = mode; |
} |
if (args->flags & I915_EXEC_GEN7_SOL_RESET) { |
ret = i915_reset_gen7_sol_offsets(dev, ring); |
if (ret) |
goto err; |
} |
exec_start = batch_obj->gtt_offset + args->batch_start_offset; |
exec_len = args->batch_len; |
if (cliprects) { |
// for (i = 0; i < args->num_cliprects; i++) { |
// ret = i915_emit_box(dev, &cliprects[i], |
// args->DR1, args->DR4); |
// if (ret) |
// goto err; |
// ret = ring->dispatch_execbuffer(ring, |
// exec_start, exec_len, |
// flags); |
// if (ret) |
// goto err; |
// } |
} else { |
ret = ring->dispatch_execbuffer(ring, |
exec_start, exec_len, |
flags); |
if (ret) |
goto err; |
} |
// i915_gem_execbuffer_move_to_active(&objects, ring); |
// i915_gem_execbuffer_retire_commands(dev, file, ring); |
ring->gpu_caches_dirty = true; |
intel_ring_flush_all_caches(ring); |
err: |
eb_destroy(eb); |
while (!list_empty(&objects)) { |
struct drm_i915_gem_object *obj; |
obj = list_first_entry(&objects, |
struct drm_i915_gem_object, |
exec_list); |
list_del_init(&obj->exec_list); |
drm_gem_object_unreference(&obj->base); |
} |
mutex_unlock(&dev->struct_mutex); |
pre_mutex_err: |
kfree(cliprects); |
return ret; |
} |
int |
i915_gem_execbuffer2(struct drm_device *dev, void *data, |
struct drm_file *file) |
{ |
struct drm_i915_gem_execbuffer2 *args = data; |
struct drm_i915_gem_exec_object2 *exec2_list = NULL; |
int ret; |
if (args->buffer_count < 1 || |
args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { |
DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); |
return -EINVAL; |
} |
exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, 0); |
if (exec2_list == NULL) |
exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count); |
if (exec2_list == NULL) { |
DRM_DEBUG("Failed to allocate exec list for %d buffers\n", |
args->buffer_count); |
return -ENOMEM; |
} |
ret = copy_from_user(exec2_list, |
(struct drm_i915_relocation_entry __user *) |
(uintptr_t) args->buffers_ptr, |
sizeof(*exec2_list) * args->buffer_count); |
if (ret != 0) { |
DRM_DEBUG("copy %d exec entries failed %d\n", |
args->buffer_count, ret); |
free(exec2_list); |
return -EFAULT; |
} |
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); |
if (!ret) { |
/* Copy the new buffer offsets back to the user's exec list. */ |
ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr, |
exec2_list, |
sizeof(*exec2_list) * args->buffer_count); |
if (ret) { |
ret = -EFAULT; |
DRM_DEBUG("failed to copy %d exec entries " |
"back to user (%d)\n", |
args->buffer_count, ret); |
} |
} |
free(exec2_list); |
return ret; |
} |
/drivers/video/drm/i915/i915_trace.h |
---|
23,5 → 23,7 |
#define trace_i915_reg_rw(a, b, c, d) |
#define trace_i915_ring_wait_begin(a) |
#define trace_i915_gem_object_pwrite(a, b, c) |
#define trace_i915_gem_request_add(a, b) |
#define trace_i915_gem_ring_dispatch(a, b, c); |
#endif |
/drivers/video/drm/i915/intel_display.c |
---|
6985,6 → 6985,8 |
struct drm_device *dev = obj->base.dev; |
struct drm_crtc *crtc; |
ENTER(); |
if (!i915_powersave) |
return; |
7002,6 → 7004,8 |
struct drm_device *dev = obj->base.dev; |
struct drm_crtc *crtc; |
ENTER(); |
if (!i915_powersave) |
return; |
/drivers/video/drm/i915/intel_fb.c |
---|
43,7 → 43,13 |
#include <drm/i915_drm.h> |
#include "i915_drv.h" |
static struct drm_i915_gem_object *fb_obj; |
struct drm_i915_gem_object *get_fb_obj() |
{ |
return fb_obj; |
}; |
struct fb_info *framebuffer_alloc(size_t size, struct device *dev) |
{ |
#define BYTES_PER_LONG (BITS_PER_LONG/8) |
144,6 → 150,10 |
obj->gtt_space = &lfb_vm_node; |
obj->gtt_offset = 0; |
obj->pin_count = 2; |
obj->cache_level = I915_CACHE_NONE; |
obj->base.write_domain = 0; |
obj->base.read_domains = I915_GEM_DOMAIN_GTT; |
} |
/***********************************************************************/ |
182,7 → 192,7 |
info->fix.smem_start = dev->mode_config.fb_base + obj->gtt_offset; |
info->fix.smem_len = size; |
info->screen_base = 0xFE000000; |
info->screen_base = (void*) 0xFE000000; |
info->screen_size = size; |
// memset(info->screen_base, 0, size); |
200,6 → 210,8 |
mutex_unlock(&dev->struct_mutex); |
// vga_switcheroo_client_fb_set(dev->pdev, info); |
fb_obj = obj; |
return 0; |
out_unpin: |
/drivers/video/drm/i915/kms_display.c |
---|
624,8 → 624,25 |
return old; |
}; |
struct sna_fb |
{ |
uint32_t width; |
uint32_t height; |
uint32_t pitch; |
uint32_t tiling; |
}; |
int i915_fbinfo(struct sna_fb *fb) |
{ |
fb->width = os_display->width; |
fb->height = os_display->height; |
fb->pitch = os_display->pitch; |
fb->tiling = 0; |
return 0; |
}; |
#ifdef __HWA__ |
extern struct hmm bm_mm; |
/drivers/video/drm/i915/main.c |
---|
70,8 → 70,8 |
if(!dbg_open(log)) |
{ |
// strcpy(log, "/tmp1/1/i915.log"); |
strcpy(log, "/RD/1/DRIVERS/i915.log"); |
strcpy(log, "/tmp1/1/i915.log"); |
// strcpy(log, "/RD/1/DRIVERS/i915.log"); |
if(!dbg_open(log)) |
{ |
132,8 → 132,14 |
#define SRV_I915_GEM_PWRITE 27 |
#define SRV_I915_GEM_BUSY 28 |
#define SRV_I915_GEM_SET_DOMAIN 29 |
#define SRV_I915_GEM_MMAP 30 |
#define SRV_I915_GEM_THROTTLE 32 |
#define SRV_FBINFO 33 |
#define SRV_I915_GEM_EXECBUFFER2 34 |
#define check_input(size) \ |
if( unlikely((inp==NULL)||(io->inp_size != (size))) ) \ |
break; |
247,6 → 253,22 |
retval = i915_gem_set_domain_ioctl(main_device, inp, file); |
break; |
case SRV_I915_GEM_THROTTLE: |
retval = i915_gem_throttle_ioctl(main_device, inp, file); |
break; |
case SRV_I915_GEM_MMAP: |
retval = i915_gem_mmap_ioctl(main_device, inp, file); |
break; |
case SRV_FBINFO: |
retval = i915_fbinfo(inp); |
break; |
case SRV_I915_GEM_EXECBUFFER2: |
retval = i915_gem_execbuffer2(main_device, inp, file); |
break; |
}; |
return retval; |
/drivers/video/drm/i915/utils.c |
---|
1,3 → 1,5 |
#include <ddk.h> |
#include <linux/mm.h> |
#include <drm/drmP.h> |
#include <drm/i915_drm.h> |
#include "i915_drv.h" |
57,3 → 59,39 |
return page; |
}; |
unsigned long vm_mmap(struct file *file, unsigned long addr, |
unsigned long len, unsigned long prot, |
unsigned long flag, unsigned long offset) |
{ |
char *mem, *ptr; |
int i; |
if (unlikely(offset + PAGE_ALIGN(len) < offset)) |
return -EINVAL; |
if (unlikely(offset & ~PAGE_MASK)) |
return -EINVAL; |
mem = UserAlloc(len); |
if(unlikely(mem == NULL)) |
return -ENOMEM; |
for(i = offset, ptr = mem; i < offset+len; i+= 4096, ptr+= 4096) |
{ |
struct page *page; |
page = shmem_read_mapping_page_gfp(file, i/PAGE_SIZE,0); |
if (unlikely(IS_ERR(page))) |
goto err; |
MapPage(ptr, (addr_t)page, PG_SHARED|PG_UW); |
} |
return (unsigned long)mem; |
err: |
UserFree(mem); |
return -ENOMEM; |
}; |