/drivers/video/Intel-2D/kgem-sna.c |
---|
File deleted |
/drivers/video/Intel-2D/brw/brw_eu.h |
---|
2239,8 → 2239,6 |
struct brw_reg src0, |
struct brw_reg src1); |
void brw_print_reg(struct brw_reg reg); |
static inline void brw_math_invert(struct brw_compile *p, |
struct brw_reg dst, |
struct brw_reg src) |
/drivers/video/Intel-2D/compiler.h |
---|
37,21 → 37,48 |
#define must_check __attribute__((warn_unused_result)) |
#define constant __attribute__((const)) |
#define pure __attribute__((pure)) |
#define __packed__ __attribute__((__packed__)) |
#define tightly_packed __attribute__((__packed__)) |
#define flatten __attribute__((flatten)) |
#define page_aligned __attribute__((aligned(4096))) |
#else |
#define likely(expr) (expr) |
#define unlikely(expr) (expr) |
#define noinline |
#define force_inline |
#define force_inline inline |
#define fastcall |
#define must_check |
#define constant |
#define pure |
#define __packed__ |
#define tighly_packed |
#define flatten |
#define page_aligned |
#endif |
#define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) |
#if HAS_GCC(4, 5) |
#define sse2 __attribute__((target("sse2,fpmath=sse"))) |
#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse"))) |
#endif |
#if HAS_GCC(4, 7) |
#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse"))) |
#endif |
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) |
#define fast __attribute__((optimize("Ofast"))) |
#else |
#define fast |
#endif |
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) |
#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) |
#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__) |
#define fast_memcpy __attribute__((target("inline-all-stringops"))) |
#else |
#define fast_memcpy |
#endif |
#ifdef HAVE_VALGRIND |
#define VG(x) x |
#else |
/drivers/video/Intel-2D/gen3_render.c |
---|
107,7 → 107,15 |
{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false}, |
{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false}, |
{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false}, |
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false} |
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}, |
{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false}, |
{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false}, |
{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false}, |
{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true}, |
{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false}, |
{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true}, |
{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false}, |
{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true}, |
}; |
#define xFixedToDouble(f) pixman_fixed_to_double(f) |
187,7 → 195,14 |
switch (format) { |
case PICT_a8r8g8b8: |
case PICT_x8r8g8b8: |
case PICT_r5g6b5: |
case PICT_a1r5g5b5: |
case PICT_x1r5g5b5: |
case PICT_a2r10g10b10: |
case PICT_x2r10g10b10: |
case PICT_a8: |
case PICT_a4r4g4b4: |
case PICT_x4r4g4b4: |
return false; |
default: |
return true; |
207,15 → 222,257 |
case PICT_a8b8g8r8: |
case PICT_x8b8g8r8: |
return BIAS | COLR_BUF_ARGB8888; |
case PICT_r5g6b5: |
case PICT_b5g6r5: |
return BIAS | COLR_BUF_RGB565; |
case PICT_a1r5g5b5: |
case PICT_x1r5g5b5: |
case PICT_a1b5g5r5: |
case PICT_x1b5g5r5: |
return BIAS | COLR_BUF_ARGB1555; |
case PICT_a2r10g10b10: |
case PICT_x2r10g10b10: |
case PICT_a2b10g10r10: |
case PICT_x2b10g10r10: |
return BIAS | COLR_BUF_ARGB2AAA; |
case PICT_a8: |
return BIAS | COLR_BUF_8BIT; |
case PICT_a4r4g4b4: |
case PICT_x4r4g4b4: |
case PICT_a4b4g4r4: |
case PICT_x4b4g4r4: |
return BIAS | COLR_BUF_ARGB4444; |
} |
#undef BIAS |
} |
#if 0 |
static bool gen3_check_repeat(PicturePtr p) |
{ |
if (!p->repeat) |
return true; |
switch (p->repeatType) { |
case RepeatNone: |
case RepeatNormal: |
case RepeatPad: |
case RepeatReflect: |
return true; |
default: |
return false; |
} |
} |
static uint32_t gen3_filter(uint32_t filter) |
{ |
switch (filter) { |
default: |
assert(0); |
case PictFilterNearest: |
return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | |
FILTER_NEAREST << SS2_MIN_FILTER_SHIFT | |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); |
case PictFilterBilinear: |
return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT | |
FILTER_LINEAR << SS2_MIN_FILTER_SHIFT | |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); |
} |
} |
static bool gen3_check_filter(PicturePtr p) |
{ |
switch (p->filter) { |
case PictFilterNearest: |
case PictFilterBilinear: |
return true; |
default: |
return false; |
} |
} |
fastcall static void |
gen3_emit_composite_primitive_identity_gradient(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
int16_t dst_x, dst_y; |
int16_t src_x, src_y; |
dst_x = r->dst.x + op->dst.x; |
dst_y = r->dst.y + op->dst.y; |
src_x = r->src.x + op->src.offset[0]; |
src_y = r->src.y + op->src.offset[1]; |
gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); |
OUT_VERTEX(src_x + r->width); |
OUT_VERTEX(src_y + r->height); |
gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); |
OUT_VERTEX(src_x); |
OUT_VERTEX(src_y + r->height); |
gen3_emit_composite_dstcoord(sna, dst_x, dst_y); |
OUT_VERTEX(src_x); |
OUT_VERTEX(src_y); |
} |
fastcall static void |
gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
do { |
v[0] = box->x2; |
v[1] = box->y2; |
v[2] = box->x2 + op->src.offset[0]; |
v[3] = box->y2 + op->src.offset[1]; |
v[4] = box->x1; |
v[5] = box->y2; |
v[6] = box->x1 + op->src.offset[0]; |
v[7] = box->y2 + op->src.offset[1]; |
v[8] = box->x1; |
v[9] = box->y1; |
v[10] = box->x1 + op->src.offset[0]; |
v[11] = box->y1 + op->src.offset[1]; |
v += 12; |
box++; |
} while (--nbox); |
} |
fastcall static void |
gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
const PictTransform *transform = op->src.transform; |
do { |
v[0] = box->x2; |
v[1] = box->y2; |
_sna_get_transformed_scaled(box->x2 + op->src.offset[0], |
box->y2 + op->src.offset[1], |
transform, op->src.scale, |
&v[2], &v[3]); |
v[4] = box->x1; |
v[5] = box->y2; |
_sna_get_transformed_scaled(box->x1 + op->src.offset[0], |
box->y2 + op->src.offset[1], |
transform, op->src.scale, |
&v[6], &v[7]); |
v[8] = box->x1; |
v[9] = box->y1; |
_sna_get_transformed_scaled(box->x1 + op->src.offset[0], |
box->y1 + op->src.offset[1], |
transform, op->src.scale, |
&v[10], &v[11]); |
box++; |
v += 12; |
} while (--nbox); |
} |
fastcall static void |
gen3_emit_composite_primitive_identity_source(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float w = r->width; |
float h = r->height; |
float *v; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
v[8] = v[4] = r->dst.x + op->dst.x; |
v[0] = v[4] + w; |
v[9] = r->dst.y + op->dst.y; |
v[5] = v[1] = v[9] + h; |
v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; |
v[2] = v[6] + w * op->src.scale[0]; |
v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; |
v[7] = v[3] = v[11] + h * op->src.scale[1]; |
} |
fastcall static void |
gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
do { |
v[0] = box->x2 + op->dst.x; |
v[8] = v[4] = box->x1 + op->dst.x; |
v[5] = v[1] = box->y2 + op->dst.y; |
v[9] = box->y1 + op->dst.y; |
v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; |
v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; |
v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; |
v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; |
v += 12; |
box++; |
} while (--nbox); |
} |
fastcall static void |
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float w = r->width; |
float h = r->height; |
float *v; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
v[8] = v[4] = r->dst.x; |
v[9] = r->dst.y; |
v[0] = v[4] + w; |
v[5] = v[1] = v[9] + h; |
v[10] = v[6] = r->src.x * op->src.scale[0]; |
v[11] = r->src.y * op->src.scale[1]; |
v[2] = v[6] + w * op->src.scale[0]; |
v[7] = v[3] = v[11] + h * op->src.scale[1]; |
} |
fastcall static void |
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float w = r->width; |
float h = r->height; |
float *v; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
v[8] = v[4] = r->dst.x + op->dst.x; |
v[0] = v[4] + w; |
v[9] = r->dst.y + op->dst.y; |
v[5] = v[1] = v[9] + h; |
v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; |
v[2] = v[6] + w * op->mask.scale[0]; |
v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; |
v[7] = v[3] = v[11] + h * op->mask.scale[1]; |
} |
#endif |
fastcall static void |
gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
884,6 → 1141,7 |
state->current_dst = bo->unique_id; |
} |
assert(bo->exec); |
kgem_bo_mark_dirty(bo); |
} |
938,6 → 1196,7 |
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); |
ss2 |= S2_TEXCOORD_FMT(tex_count, |
op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); |
assert(op->src.card_format); |
map[tex_count * 2 + 0] = |
op->src.card_format | |
gen3_ms_tiling(op->src.bo->tiling) | |
973,6 → 1232,7 |
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); |
ss2 |= S2_TEXCOORD_FMT(tex_count, |
op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); |
assert(op->mask.card_format); |
map[tex_count * 2 + 0] = |
op->mask.card_format | |
gen3_ms_tiling(op->mask.bo->tiling) | |
1357,8 → 1617,8 |
goto start; |
} |
assert(op->floats_per_rect >= vertex_space(sna)); |
assert(rem <= vertex_space(sna)); |
assert(op->floats_per_rect <= rem); |
if (want > 1 && want * op->floats_per_rect > rem) |
want = rem / op->floats_per_rect; |
sna->render.vertex_index += 3*want; |
1494,83 → 1754,674 |
return false; |
} |
#if 0 |
static int |
gen3_composite_picture(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_op *op, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y, |
bool precise) |
{ |
PixmapPtr pixmap; |
uint32_t color; |
int16_t dx, dy; |
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", |
__FUNCTION__, x, y, w, h, dst_x, dst_y)); |
channel->card_format = 0; |
if (picture->pDrawable == NULL) { |
SourcePict *source = picture->pSourcePict; |
int ret = -1; |
switch (source->type) { |
case SourcePictTypeSolidFill: |
DBG(("%s: solid fill [%08x], format %08x\n", |
__FUNCTION__, |
(unsigned)source->solidFill.color, |
(unsigned)picture->format)); |
ret = gen3_init_solid(channel, source->solidFill.color); |
break; |
case SourcePictTypeLinear: |
ret = gen3_init_linear(sna, picture, op, channel, |
x - dst_x, y - dst_y); |
break; |
case SourcePictTypeRadial: |
ret = gen3_init_radial(sna, picture, op, channel, |
x - dst_x, y - dst_y); |
break; |
} |
if (ret == -1) { |
if (!precise) |
ret = sna_render_picture_approximate_gradient(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
if (ret == -1) |
ret = sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
return ret; |
} |
if (picture->alphaMap) { |
DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
if (sna_picture_is_solid(picture, &color)) { |
DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color)); |
return gen3_init_solid(channel, color); |
} |
if (sna_picture_is_clear(picture, x, y, w, h, &color)) { |
DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color)); |
return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8)); |
} |
if (!gen3_check_repeat(picture)) |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
if (!gen3_check_filter(picture)) |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; |
channel->filter = picture->filter; |
channel->pict_format = picture->format; |
pixmap = get_drawable_pixmap(picture->pDrawable); |
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); |
x += dx + picture->pDrawable->x; |
y += dy + picture->pDrawable->y; |
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { |
DBG(("%s: integer translation (%d, %d), removing\n", |
__FUNCTION__, dx, dy)); |
x += dx; |
y += dy; |
channel->transform = NULL; |
channel->filter = PictFilterNearest; |
} else { |
channel->transform = picture->transform; |
channel->is_affine = sna_transform_is_affine(picture->transform); |
} |
if (!gen3_composite_channel_set_format(channel, picture->format) && |
!gen3_composite_channel_set_xformat(picture, channel, x, y, w, h)) |
return sna_render_picture_convert(sna, picture, channel, pixmap, |
x, y, w, h, dst_x, dst_y, |
false); |
assert(channel->card_format); |
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { |
DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n", |
__FUNCTION__, |
pixmap->drawable.width, pixmap->drawable.height, |
x, y, w, h)); |
return sna_render_picture_extract(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
return sna_render_pixmap_bo(sna, channel, pixmap, |
x, y, w, h, dst_x, dst_y); |
} |
static inline bool |
source_use_blt(struct sna *sna, PicturePtr picture) |
{ |
/* If it is a solid, try to use the BLT paths */ |
if (!picture->pDrawable) |
return picture->pSourcePict->type == SourcePictTypeSolidFill; |
if (picture->pDrawable->width == 1 && |
picture->pDrawable->height == 1 && |
picture->repeat) |
return true; |
if (too_large(picture->pDrawable->width, picture->pDrawable->height)) |
return true; |
return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER); |
} |
static bool |
try_blt(struct sna *sna, |
PicturePtr dst, |
PicturePtr src, |
int width, int height) |
{ |
if (sna->kgem.mode != KGEM_RENDER) { |
DBG(("%s: already performing BLT\n", __FUNCTION__)); |
return true; |
} |
if (too_large(width, height)) { |
DBG(("%s: operation too large for 3D pipe (%d, %d)\n", |
__FUNCTION__, width, height)); |
return true; |
} |
if (too_large(dst->pDrawable->width, dst->pDrawable->height)) { |
DBG(("%s: target too large for 3D pipe (%d, %d)\n", |
__FUNCTION__, |
dst->pDrawable->width, dst->pDrawable->height)); |
return true; |
} |
/* is the source picture only in cpu memory e.g. a shm pixmap? */ |
return source_use_blt(sna, src); |
} |
#endif |
static void |
gen3_align_vertex(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) { |
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) |
gen3_vertex_finish(sna); |
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", |
sna->render_state.gen3.last_floats_per_vertex, |
op->floats_per_vertex, |
sna->render.vertex_index, |
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); |
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; |
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; |
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect); |
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex; |
} |
} |
static inline bool is_constant_ps(uint32_t type) |
{ |
switch (type) { |
case SHADER_NONE: /* be warned! */ |
case SHADER_ZERO: |
case SHADER_BLACK: |
case SHADER_WHITE: |
case SHADER_CONSTANT: |
return true; |
default: |
return false; |
} |
} |
#if 0 |
static bool |
gen3_composite_fallback(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst) |
{ |
PixmapPtr src_pixmap; |
PixmapPtr mask_pixmap; |
PixmapPtr dst_pixmap; |
bool src_fallback, mask_fallback; |
if (!gen3_check_dst_format(dst->format)) { |
DBG(("%s: unknown destination format: %d\n", |
__FUNCTION__, dst->format)); |
return true; |
} |
dst_pixmap = get_drawable_pixmap(dst->pDrawable); |
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; |
src_fallback = source_fallback(src, src_pixmap, |
dst->polyMode == PolyModePrecise); |
if (mask) { |
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; |
mask_fallback = source_fallback(mask, mask_pixmap, |
dst->polyMode == PolyModePrecise); |
} else { |
mask_pixmap = NULL; |
mask_fallback = false; |
} |
/* If we are using the destination as a source and need to |
* readback in order to upload the source, do it all |
* on the cpu. |
*/ |
if (src_pixmap == dst_pixmap && src_fallback) { |
DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); |
return true; |
} |
if (mask_pixmap == dst_pixmap && mask_fallback) { |
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); |
return true; |
} |
if (mask && |
mask->componentAlpha && PICT_FORMAT_RGB(mask->format) && |
gen3_blend_op[op].src_alpha && |
gen3_blend_op[op].src_blend != BLENDFACT_ZERO && |
op != PictOpOver) { |
DBG(("%s: component-alpha mask with op=%d, should fallback\n", |
__FUNCTION__, op)); |
return true; |
} |
/* If anything is on the GPU, push everything out to the GPU */ |
if (dst_use_gpu(dst_pixmap)) { |
DBG(("%s: dst is already on the GPU, try to use GPU\n", |
__FUNCTION__)); |
return false; |
} |
if (src_pixmap && !src_fallback) { |
DBG(("%s: src is already on the GPU, try to use GPU\n", |
__FUNCTION__)); |
return false; |
} |
if (mask_pixmap && !mask_fallback) { |
DBG(("%s: mask is already on the GPU, try to use GPU\n", |
__FUNCTION__)); |
return false; |
} |
/* However if the dst is not on the GPU and we need to |
* render one of the sources using the CPU, we may |
* as well do the entire operation in place onthe CPU. |
*/ |
if (src_fallback) { |
DBG(("%s: dst is on the CPU and src will fallback\n", |
__FUNCTION__)); |
return true; |
} |
if (mask && mask_fallback) { |
DBG(("%s: dst is on the CPU and mask will fallback\n", |
__FUNCTION__)); |
return true; |
} |
if (too_large(dst_pixmap->drawable.width, |
dst_pixmap->drawable.height) && |
dst_is_cpu(dst_pixmap)) { |
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); |
return true; |
} |
DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n", |
__FUNCTION__, dst_use_cpu(dst_pixmap))); |
return dst_use_cpu(dst_pixmap); |
} |
static bool |
gen3_render_composite(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t mask_x, int16_t mask_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp) |
{ |
DBG(("%s()\n", __FUNCTION__)); |
if (op >= ARRAY_SIZE(gen3_blend_op)) { |
DBG(("%s: fallback due to unhandled blend op: %d\n", |
__FUNCTION__, op)); |
return false; |
} |
/* Try to use the BLT engine unless it implies a |
* 3D -> 2D context switch. |
*/ |
if (mask == NULL && |
try_blt(sna, dst, src, width, height) && |
sna_blt_composite(sna, |
op, src, dst, |
src_x, src_y, |
dst_x, dst_y, |
width, height, |
tmp, false)) |
return true; |
if (gen3_composite_fallback(sna, op, src, mask, dst)) |
return false; |
if (need_tiling(sna, width, height)) |
return sna_tiling_composite(op, src, mask, dst, |
src_x, src_y, |
mask_x, mask_y, |
dst_x, dst_y, |
width, height, |
tmp); |
if (!gen3_composite_set_target(sna, tmp, dst, |
dst_x, dst_y, width, height)) { |
DBG(("%s: unable to set render target\n", |
__FUNCTION__)); |
return false; |
} |
tmp->op = op; |
tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format); |
if (too_large(tmp->dst.width, tmp->dst.height) || |
!gen3_check_pitch_3d(tmp->dst.bo)) { |
if (!sna_render_composite_redirect(sna, tmp, |
dst_x, dst_y, width, height, |
op > PictOpSrc || dst->pCompositeClip->data)) |
return false; |
} |
tmp->u.gen3.num_constants = 0; |
tmp->src.u.gen3.type = SHADER_TEXTURE; |
tmp->src.is_affine = true; |
DBG(("%s: preparing source\n", __FUNCTION__)); |
switch (gen3_composite_picture(sna, src, tmp, &tmp->src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_dst; |
case 0: |
tmp->src.u.gen3.type = SHADER_ZERO; |
break; |
case 1: |
if (mask == NULL && tmp->src.bo && |
sna_blt_composite__convert(sna, |
dst_x, dst_y, width, height, |
tmp)) |
return true; |
gen3_composite_channel_convert(&tmp->src); |
break; |
} |
DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type)); |
tmp->mask.u.gen3.type = SHADER_NONE; |
tmp->mask.is_affine = true; |
tmp->need_magic_ca_pass = false; |
tmp->has_component_alpha = false; |
if (mask && tmp->src.u.gen3.type != SHADER_ZERO) { |
if (!reuse_source(sna, |
src, &tmp->src, src_x, src_y, |
mask, &tmp->mask, mask_x, mask_y)) { |
tmp->mask.u.gen3.type = SHADER_TEXTURE; |
DBG(("%s: preparing mask\n", __FUNCTION__)); |
switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask, |
mask_x, mask_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_src; |
case 0: |
tmp->mask.u.gen3.type = SHADER_ZERO; |
break; |
case 1: |
gen3_composite_channel_convert(&tmp->mask); |
break; |
} |
} |
DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type)); |
if (tmp->mask.u.gen3.type == SHADER_ZERO) { |
if (tmp->src.bo) { |
kgem_bo_destroy(&sna->kgem, |
tmp->src.bo); |
tmp->src.bo = NULL; |
} |
tmp->src.u.gen3.type = SHADER_ZERO; |
tmp->mask.u.gen3.type = SHADER_NONE; |
} |
if (tmp->mask.u.gen3.type != SHADER_NONE) { |
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { |
/* Check if it's component alpha that relies on a source alpha |
* and on the source value. We can only get one of those |
* into the single source value that we get to blend with. |
*/ |
DBG(("%s: component-alpha mask: %d\n", |
__FUNCTION__, tmp->mask.u.gen3.type)); |
tmp->has_component_alpha = true; |
if (tmp->mask.u.gen3.type == SHADER_WHITE) { |
tmp->mask.u.gen3.type = SHADER_NONE; |
tmp->has_component_alpha = false; |
} else if (gen3_blend_op[op].src_alpha && |
gen3_blend_op[op].src_blend != BLENDFACT_ZERO) { |
if (op != PictOpOver) |
goto cleanup_mask; |
tmp->need_magic_ca_pass = true; |
tmp->op = PictOpOutReverse; |
} |
} else { |
if (tmp->mask.is_opaque) { |
tmp->mask.u.gen3.type = SHADER_NONE; |
} else if (is_constant_ps(tmp->src.u.gen3.type) && |
is_constant_ps(tmp->mask.u.gen3.type)) { |
uint32_t v; |
v = multa(tmp->src.u.gen3.mode, |
tmp->mask.u.gen3.mode, |
24); |
v |= multa(tmp->src.u.gen3.mode, |
tmp->mask.u.gen3.mode, |
16); |
v |= multa(tmp->src.u.gen3.mode, |
tmp->mask.u.gen3.mode, |
8); |
v |= multa(tmp->src.u.gen3.mode, |
tmp->mask.u.gen3.mode, |
0); |
DBG(("%s: combining constant source/mask: %x x %x -> %x\n", |
__FUNCTION__, |
tmp->src.u.gen3.mode, |
tmp->mask.u.gen3.mode, |
v)); |
static void |
gen3_align_vertex(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) { |
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) |
gen3_vertex_finish(sna); |
tmp->src.u.gen3.type = SHADER_CONSTANT; |
tmp->src.u.gen3.mode = v; |
tmp->src.is_opaque = false; |
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", |
sna->render_state.gen3.last_floats_per_vertex, |
op->floats_per_vertex, |
sna->render.vertex_index, |
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); |
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; |
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; |
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect); |
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex; |
tmp->mask.u.gen3.type = SHADER_NONE; |
} |
} |
} |
} |
DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__, |
tmp->src.u.gen3.type, tmp->mask.u.gen3.type, |
tmp->src.is_affine, tmp->mask.is_affine)); |
tmp->prim_emit = gen3_emit_composite_primitive; |
if (is_constant_ps(tmp->mask.u.gen3.type)) { |
switch (tmp->src.u.gen3.type) { |
case SHADER_NONE: |
case SHADER_ZERO: |
case SHADER_BLACK: |
case SHADER_WHITE: |
case SHADER_CONSTANT: |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2; |
tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_constant; |
tmp->emit_boxes = gen3_emit_composite_boxes_constant; |
} |
break; |
case SHADER_LINEAR: |
case SHADER_RADIAL: |
if (tmp->src.transform == NULL) { |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2; |
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient; |
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient; |
} |
} else if (tmp->src.is_affine) { |
tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2]; |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2; |
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient; |
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient; |
} |
} |
break; |
case SHADER_TEXTURE: |
if (tmp->src.transform == NULL) { |
if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2; |
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset; |
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset; |
} |
} else { |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2; |
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_identity_source; |
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source; |
} |
} |
} else if (tmp->src.is_affine) { |
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; |
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2; |
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_affine_source; |
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source; |
} |
} |
break; |
} |
} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) { |
if (tmp->mask.transform == NULL) { |
if (is_constant_ps(tmp->src.u.gen3.type)) { |
if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset; |
} |
} else { |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask; |
} |
} |
} else if (tmp->src.transform == NULL) { |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask; |
} |
} else if (tmp->src.is_affine) { |
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; |
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; |
#if defined(sse2) && !defined(__x86_64__) |
if (sna->cpu_features & SSE2) { |
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2; |
} else |
#endif |
{ |
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask; |
} |
} |
} |
} |
tmp->floats_per_vertex = 2; |
if (!is_constant_ps(tmp->src.u.gen3.type)) |
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4; |
if (!is_constant_ps(tmp->mask.u.gen3.type)) |
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4; |
DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__, |
!is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0, |
!is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0, |
tmp->floats_per_vertex, |
tmp->prim_emit != gen3_emit_composite_primitive)); |
tmp->floats_per_rect = 3 * tmp->floats_per_vertex; |
tmp->blt = gen3_render_composite_blt; |
tmp->box = gen3_render_composite_box; |
tmp->boxes = gen3_render_composite_boxes__blt; |
if (tmp->emit_boxes) { |
tmp->boxes = gen3_render_composite_boxes; |
tmp->thread_boxes = gen3_render_composite_boxes__thread; |
} |
tmp->done = gen3_render_composite_done; |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) |
goto cleanup_mask; |
} |
gen3_emit_composite_state(sna, tmp); |
gen3_align_vertex(sna, tmp); |
return true; |
cleanup_mask: |
if (tmp->mask.bo) |
kgem_bo_destroy(&sna->kgem, tmp->mask.bo); |
cleanup_src: |
if (tmp->src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->src.bo); |
cleanup_dst: |
if (tmp->redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->dst.bo); |
return false; |
} |
#endif |
1710,19 → 2561,6 |
static inline bool is_constant_ps(uint32_t type) |
{ |
switch (type) { |
case SHADER_NONE: /* be warned! */ |
case SHADER_ZERO: |
case SHADER_BLACK: |
case SHADER_WHITE: |
case SHADER_CONSTANT: |
return true; |
default: |
return false; |
} |
} |
1820,17 → 2658,58 |
static void gen3_render_flush(struct sna *sna) |
{ |
gen3_vertex_close(sna); |
assert(sna->render.vertex_reloc[0] == 0); |
assert(sna->render.vertex_offset == 0); |
} |
static void |
gen3_render_fini(struct sna *sna) |
{ |
} |
const char *gen3_render_init(struct sna *sna, const char *backend) |
{ |
struct sna_render *render = &sna->render; |
#if 0 |
#if !NO_COMPOSITE |
render->composite = gen3_render_composite; |
render->prefer_gpu |= PREFER_GPU_RENDER; |
#endif |
#if !NO_COMPOSITE_SPANS |
render->check_composite_spans = gen3_check_composite_spans; |
render->composite_spans = gen3_render_composite_spans; |
render->prefer_gpu |= PREFER_GPU_SPANS; |
#endif |
render->video = gen3_render_video; |
render->copy_boxes = gen3_render_copy_boxes; |
render->copy = gen3_render_copy; |
render->fill_boxes = gen3_render_fill_boxes; |
render->fill = gen3_render_fill; |
render->fill_one = gen3_render_fill_one; |
#endif |
render->blit_tex = gen3_blit_tex; |
render->caps = HW_BIT_BLIT | HW_TEX_BLIT; |
render->reset = gen3_render_reset; |
render->flush = gen3_render_flush; |
render->fini = gen3_render_fini; |
render->max_3d_size = MAX_3D_SIZE; |
render->max_3d_pitch = MAX_3D_PITCH; |
sna->kgem.retire = gen3_render_retire; |
sna->kgem.expire = gen3_render_expire; |
return "Alviso (gen3)"; |
} |
static bool |
gen3_blit_tex(struct sna *sna, |
1933,39 → 2812,3 |
gen3_align_vertex(sna, tmp); |
return true; |
} |
static void gen3_render_flush(struct sna *sna) |
{ |
gen3_vertex_close(sna); |
assert(sna->render.vertex_reloc[0] == 0); |
assert(sna->render.vertex_offset == 0); |
} |
static void |
gen3_render_fini(struct sna *sna) |
{ |
} |
bool gen3_render_init(struct sna *sna) |
{ |
struct sna_render *render = &sna->render; |
// render->video = gen3_render_video; |
render->blit_tex = gen3_blit_tex; |
render->reset = gen3_render_reset; |
render->flush = gen3_render_flush; |
render->fini = gen3_render_fini; |
render->max_3d_size = MAX_3D_SIZE; |
render->max_3d_pitch = MAX_3D_PITCH; |
render->caps = HW_BIT_BLIT | HW_TEX_BLIT; |
sna->kgem.retire = gen3_render_retire; |
sna->kgem.expire = gen3_render_expire; |
return true; |
} |
/drivers/video/Intel-2D/gen4_render.c |
---|
51,6 → 51,7 |
*/ |
#define FORCE_SPANS 0 |
#define FORCE_NONRECTILINEAR_SPANS -1 |
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ |
#define NO_COMPOSITE 0 |
#define NO_COMPOSITE_SPANS 0 |
61,6 → 62,8 |
#define NO_FILL_BOXES 0 |
#define NO_VIDEO 0 |
#define MAX_FLUSH_VERTICES 6 |
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) |
/* Set up a default static partitioning of the URB, which is supposed to |
286,8 → 289,24 |
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_x8r8g8b8: |
return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM; |
case PICT_a8b8g8r8: |
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_x8b8g8r8: |
return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM; |
case PICT_a2r10g10b10: |
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_x2r10g10b10: |
return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM; |
case PICT_r8g8b8: |
return GEN4_SURFACEFORMAT_R8G8B8_UNORM; |
case PICT_r5g6b5: |
return GEN4_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_a1r5g5b5: |
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN4_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
} |
299,8 → 318,22 |
case PICT_a8r8g8b8: |
case PICT_x8r8g8b8: |
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_a8b8g8r8: |
case PICT_x8b8g8r8: |
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_a2r10g10b10: |
case PICT_x2r10g10b10: |
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_r5g6b5: |
return GEN4_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_x1r5g5b5: |
case PICT_a1r5g5b5: |
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN4_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
case PICT_x4r4g4b4: |
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
} |
393,7 → 426,7 |
assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo)); |
/* After the first bind, we manage the cache domains within the batch */ |
offset = kgem_bo_get_binding(bo, format); |
offset = kgem_bo_get_binding(bo, format | is_dst << 31); |
if (offset) { |
if (is_dst) |
kgem_bo_mark_dirty(bo); |
408,9 → 441,10 |
GEN4_SURFACE_BLEND_ENABLED | |
format << GEN4_SURFACE_FORMAT_SHIFT); |
if (is_dst) |
if (is_dst) { |
ss[0] |= GEN4_SURFACE_RC_READ_WRITE; |
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; |
else |
} else |
domains = I915_GEM_DOMAIN_SAMPLER << 16; |
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); |
421,7 → 455,7 |
ss[4] = 0; |
ss[5] = 0; |
kgem_bo_set_binding(bo, format, offset); |
kgem_bo_set_binding(bo, format | is_dst << 31, offset); |
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", |
offset, bo->handle, ss[1], |
476,7 → 510,7 |
static bool gen4_rectangle_begin(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
int id = op->u.gen4.ve_id; |
unsigned int id = 1 << op->u.gen4.ve_id; |
int ndwords; |
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) |
484,13 → 518,14 |
/* 7xpipelined pointers + 6xprimitive + 1xflush */ |
ndwords = op->need_magic_ca_pass? 20 : 6; |
if ((sna->render.vb_id & (1 << id)) == 0) |
if ((sna->render.vb_id & id) == 0) |
ndwords += 5; |
ndwords += 2*FORCE_FLUSH; |
if (!kgem_check_batch(&sna->kgem, ndwords)) |
return false; |
if ((sna->render.vb_id & (1 << id)) == 0) |
if ((sna->render.vb_id & id) == 0) |
gen4_emit_vertex_buffer(sna, op); |
if (sna->render.vertex_offset == 0) |
gen4_emit_primitive(sna); |
508,7 → 543,8 |
return rem; |
} |
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6)) |
if (!kgem_check_batch(&sna->kgem, |
2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6))) |
return 0; |
if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) |
return 0; |
534,6 → 570,27 |
int rem; |
assert(want); |
#if FORCE_FLUSH |
rem = sna->render.vertex_offset; |
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) |
rem = sna->kgem.nbatch - 5; |
if (rem) { |
rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3; |
if (rem <= 0) { |
if (sna->render.vertex_offset) { |
gen4_vertex_flush(sna); |
if (gen4_magic_ca_pass(sna, op)) |
gen4_emit_pipelined_pointers(sna, op, op->op, |
op->u.gen4.wm_kernel); |
} |
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); |
rem = MAX_FLUSH_VERTICES; |
} |
} else |
rem = MAX_FLUSH_VERTICES; |
if (want > rem) |
want = rem; |
#endif |
start: |
rem = vertex_space(sna); |
552,8 → 609,8 |
goto start; |
} |
assert(op->floats_per_rect >= vertex_space(sna)); |
assert(rem <= vertex_space(sna)); |
assert(op->floats_per_rect <= rem); |
if (want > 1 && want * op->floats_per_rect > rem) |
want = rem / op->floats_per_rect; |
673,7 → 730,7 |
{ |
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); |
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { |
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) { |
DBG(("%s: flushing batch: %d < %d+%d\n", |
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, |
150, 4*8)); |
904,6 → 961,8 |
{ |
bool flush; |
assert(op->dst.bo->exec); |
flush = wm_binding_table & 1; |
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { |
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__, |
984,23 → 1043,450 |
op->prim_emit(sna, op, r); |
} |
#if 0 |
fastcall static void |
gen4_render_composite_box(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box) |
{ |
struct sna_composite_rectangles r; |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.mask = r.src = r.dst; |
op->prim_emit(sna, op, &r); |
} |
static void |
gen4_render_composite_boxes__blt(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", |
__FUNCTION__, nbox, op->dst.x, op->dst.y, |
op->src.offset[0], op->src.offset[1], |
op->src.width, op->src.height, |
op->mask.offset[0], op->mask.offset[1], |
op->mask.width, op->mask.height)); |
do { |
int nbox_this_time; |
nbox_this_time = gen4_get_rectangles(sna, op, nbox, |
gen4_bind_surfaces); |
nbox -= nbox_this_time; |
do { |
struct sna_composite_rectangles r; |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.mask = r.src = r.dst; |
op->prim_emit(sna, op, &r); |
box++; |
} while (--nbox_this_time); |
} while (nbox); |
} |
static void |
gen4_render_composite_boxes(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen4_get_rectangles(sna, op, nbox, |
gen4_bind_surfaces); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
} while (nbox); |
} |
#if !FORCE_FLUSH |
static void |
gen4_render_composite_boxes__thread(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
sna_vertex_lock(&sna->render); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen4_get_rectangles(sna, op, nbox, |
gen4_bind_surfaces); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
sna_vertex_acquire__locked(&sna->render); |
sna_vertex_unlock(&sna->render); |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
sna_vertex_lock(&sna->render); |
sna_vertex_release__locked(&sna->render); |
} while (nbox); |
sna_vertex_unlock(&sna->render); |
} |
#endif |
#ifndef MAX |
#define MAX(a,b) ((a) > (b) ? (a) : (b)) |
#endif |
static uint32_t gen4_bind_video_source(struct sna *sna, |
struct kgem_bo *src_bo, |
uint32_t src_offset, |
int src_width, |
int src_height, |
int src_pitch, |
uint32_t src_surf_format) |
{ |
struct gen4_surface_state *ss; |
sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); |
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); |
ss->ss0.surface_type = GEN4_SURFACE_2D; |
ss->ss0.surface_format = src_surf_format; |
ss->ss0.color_blend = 1; |
ss->ss1.base_addr = |
kgem_add_reloc(&sna->kgem, |
sna->kgem.surface + 1, |
src_bo, |
I915_GEM_DOMAIN_SAMPLER << 16, |
src_offset); |
ss->ss2.width = src_width - 1; |
ss->ss2.height = src_height - 1; |
ss->ss3.pitch = src_pitch - 1; |
return sna->kgem.surface * sizeof(uint32_t); |
} |
static void gen4_video_bind_surfaces(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
bool dirty = kgem_bo_is_dirty(op->dst.bo); |
struct sna_video_frame *frame = op->priv; |
uint32_t src_surf_format; |
uint32_t src_surf_base[6]; |
int src_width[6]; |
int src_height[6]; |
int src_pitch[6]; |
uint32_t *binding_table; |
uint16_t offset; |
int n_src, n; |
src_surf_base[0] = 0; |
src_surf_base[1] = 0; |
src_surf_base[2] = frame->VBufOffset; |
src_surf_base[3] = frame->VBufOffset; |
src_surf_base[4] = frame->UBufOffset; |
src_surf_base[5] = frame->UBufOffset; |
if (is_planar_fourcc(frame->id)) { |
src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM; |
src_width[1] = src_width[0] = frame->width; |
src_height[1] = src_height[0] = frame->height; |
src_pitch[1] = src_pitch[0] = frame->pitch[1]; |
src_width[4] = src_width[5] = src_width[2] = src_width[3] = |
frame->width / 2; |
src_height[4] = src_height[5] = src_height[2] = src_height[3] = |
frame->height / 2; |
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = |
frame->pitch[0]; |
n_src = 6; |
} else { |
if (frame->id == FOURCC_UYVY) |
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY; |
else |
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL; |
src_width[0] = frame->width; |
src_height[0] = frame->height; |
src_pitch[0] = frame->pitch[0]; |
n_src = 1; |
} |
gen4_get_batch(sna, op); |
binding_table = gen4_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen4_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen4_get_dest_format(op->dst.format), |
true); |
for (n = 0; n < n_src; n++) { |
binding_table[1+n] = |
gen4_bind_video_source(sna, |
frame->bo, |
src_surf_base[n], |
src_width[n], |
src_height[n], |
src_pitch[n], |
src_surf_format); |
} |
gen4_emit_state(sna, op, offset | dirty); |
} |
static bool |
gen4_render_video(struct sna *sna, |
struct sna_video *video, |
struct sna_video_frame *frame, |
RegionPtr dstRegion, |
PixmapPtr pixmap) |
{ |
struct sna_composite_op tmp; |
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; |
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; |
int src_width = frame->src.x2 - frame->src.x1; |
int src_height = frame->src.y2 - frame->src.y1; |
float src_offset_x, src_offset_y; |
float src_scale_x, src_scale_y; |
int nbox, pix_xoff, pix_yoff; |
struct sna_pixmap *priv; |
BoxPtr box; |
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, |
src_width, src_height, dst_width, dst_height)); |
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); |
if (priv == NULL) |
return false; |
memset(&tmp, 0, sizeof(tmp)); |
tmp.op = PictOpSrc; |
tmp.dst.pixmap = pixmap; |
tmp.dst.width = pixmap->drawable.width; |
tmp.dst.height = pixmap->drawable.height; |
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); |
tmp.dst.bo = priv->gpu_bo; |
if (src_width == dst_width && src_height == dst_height) |
tmp.src.filter = SAMPLER_FILTER_NEAREST; |
else |
tmp.src.filter = SAMPLER_FILTER_BILINEAR; |
tmp.src.repeat = SAMPLER_EXTEND_PAD; |
tmp.src.bo = frame->bo; |
tmp.mask.bo = NULL; |
tmp.u.gen4.wm_kernel = |
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; |
tmp.u.gen4.ve_id = 2; |
tmp.is_affine = true; |
tmp.floats_per_vertex = 3; |
tmp.floats_per_rect = 9; |
tmp.priv = frame; |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); |
} |
gen4_video_bind_surfaces(sna, &tmp); |
gen4_align_vertex(sna, &tmp); |
/* Set up the offset for translating from the given region (in screen |
* coordinates) to the backing pixmap. |
*/ |
#ifdef COMPOSITE |
pix_xoff = -pixmap->screen_x + pixmap->drawable.x; |
pix_yoff = -pixmap->screen_y + pixmap->drawable.y; |
#else |
pix_xoff = 0; |
pix_yoff = 0; |
#endif |
src_scale_x = (float)src_width / dst_width / frame->width; |
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; |
src_scale_y = (float)src_height / dst_height / frame->height; |
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; |
box = REGION_RECTS(dstRegion); |
nbox = REGION_NUM_RECTS(dstRegion); |
do { |
int n; |
n = gen4_get_rectangles(sna, &tmp, nbox, |
gen4_video_bind_surfaces); |
assert(n); |
nbox -= n; |
do { |
BoxRec r; |
r.x1 = box->x1 + pix_xoff; |
r.x2 = box->x2 + pix_xoff; |
r.y1 = box->y1 + pix_yoff; |
r.y2 = box->y2 + pix_yoff; |
OUT_VERTEX(r.x2, r.y2); |
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y2); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y1); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); |
if (!DAMAGE_IS_ALL(priv->gpu_damage)) { |
sna_damage_add_box(&priv->gpu_damage, &r); |
sna_damage_subtract_box(&priv->cpu_damage, &r); |
} |
box++; |
} while (--n); |
} while (nbox); |
gen4_vertex_flush(sna); |
return true; |
} |
static int |
gen4_composite_picture(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int x, int y, |
int w, int h, |
int dst_x, int dst_y, |
bool precise) |
{ |
PixmapPtr pixmap; |
uint32_t color; |
int16_t dx, dy; |
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", |
__FUNCTION__, x, y, w, h, dst_x, dst_y)); |
channel->is_solid = false; |
channel->card_format = -1; |
if (sna_picture_is_solid(picture, &color)) |
return gen4_channel_init_solid(sna, channel, color); |
if (picture->pDrawable == NULL) { |
int ret; |
if (picture->pSourcePict->type == SourcePictTypeLinear) |
return gen4_channel_init_linear(sna, picture, channel, |
x, y, |
w, h, |
dst_x, dst_y); |
DBG(("%s -- fixup, gradient\n", __FUNCTION__)); |
ret = -1; |
if (!precise) |
ret = sna_render_picture_approximate_gradient(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
if (ret == -1) |
ret = sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
return ret; |
} |
if (picture->alphaMap) { |
DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
if (!gen4_check_repeat(picture)) { |
DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__)); |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
if (!gen4_check_filter(picture)) { |
DBG(("%s: unhandled filter fixup\n", __FUNCTION__)); |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; |
channel->filter = picture->filter; |
pixmap = get_drawable_pixmap(picture->pDrawable); |
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); |
x += dx + picture->pDrawable->x; |
y += dy + picture->pDrawable->y; |
channel->is_affine = sna_transform_is_affine(picture->transform); |
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { |
DBG(("%s: integer translation (%d, %d), removing\n", |
__FUNCTION__, dx, dy)); |
x += dx; |
y += dy; |
channel->transform = NULL; |
channel->filter = PictFilterNearest; |
} else |
channel->transform = picture->transform; |
channel->pict_format = picture->format; |
channel->card_format = gen4_get_card_format(picture->format); |
if (channel->card_format == -1) |
return sna_render_picture_convert(sna, picture, channel, pixmap, |
x, y, w, h, dst_x, dst_y, |
false); |
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) |
return sna_render_picture_extract(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
return sna_render_pixmap_bo(sna, channel, pixmap, |
x, y, w, h, dst_x, dst_y); |
} |
static void gen4_composite_channel_convert(struct sna_composite_channel *channel) |
{ |
DBG(("%s: repeat %d -> %d, filter %d -> %d\n", |
__FUNCTION__, |
channel->repeat, gen4_repeat(channel->repeat), |
channel->filter, gen4_repeat(channel->filter))); |
channel->repeat = gen4_repeat(channel->repeat); |
channel->filter = gen4_filter(channel->filter); |
if (channel->card_format == (unsigned)-1) |
channel->card_format = gen4_get_card_format(channel->pict_format); |
} |
#endif |
static void |
gen4_render_composite_done(struct sna *sna, |
const struct sna_composite_op *op) |
1014,122 → 1500,426 |
} |
#if 0 |
static bool |
gen4_composite_set_target(struct sna *sna, |
struct sna_composite_op *op, |
PicturePtr dst, |
int x, int y, int w, int h, |
bool partial) |
{ |
BoxRec box; |
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); |
op->dst.width = op->dst.pixmap->drawable.width; |
op->dst.height = op->dst.pixmap->drawable.height; |
op->dst.format = dst->format; |
if (w && h) { |
box.x1 = x; |
box.y1 = y; |
box.x2 = x + w; |
box.y2 = y + h; |
} else |
sna_render_picture_extents(dst, &box); |
op->dst.bo = sna_drawable_use_bo (dst->pDrawable, |
PREFER_GPU | FORCE_GPU | RENDER_GPU, |
&box, &op->damage); |
if (op->dst.bo == NULL) |
return false; |
get_drawable_deltas(dst->pDrawable, op->dst.pixmap, |
&op->dst.x, &op->dst.y); |
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", |
__FUNCTION__, |
op->dst.pixmap, (int)op->dst.format, |
op->dst.width, op->dst.height, |
op->dst.bo->pitch, |
op->dst.x, op->dst.y, |
op->damage ? *op->damage : (void *)-1)); |
assert(op->dst.bo->proxy == NULL); |
if (too_large(op->dst.width, op->dst.height) && |
!sna_render_composite_redirect(sna, op, x, y, w, h, partial)) |
return false; |
return true; |
} |
static bool |
try_blt(struct sna *sna, |
PicturePtr dst, PicturePtr src, |
int width, int height) |
{ |
if (sna->kgem.mode != KGEM_RENDER) { |
DBG(("%s: already performing BLT\n", __FUNCTION__)); |
return true; |
} |
if (too_large(width, height)) { |
DBG(("%s: operation too large for 3D pipe (%d, %d)\n", |
__FUNCTION__, width, height)); |
return true; |
} |
if (too_large(dst->pDrawable->width, dst->pDrawable->height)) |
return true; |
/* The blitter is much faster for solids */ |
if (sna_picture_is_solid(src, NULL)) |
return true; |
/* is the source picture only in cpu memory e.g. a shm pixmap? */ |
return picture_is_cpu(sna, src); |
} |
static bool |
check_gradient(PicturePtr picture, bool precise) |
{ |
switch (picture->pSourcePict->type) { |
case SourcePictTypeSolidFill: |
case SourcePictTypeLinear: |
return false; |
default: |
return precise; |
} |
} |
static bool |
has_alphamap(PicturePtr p) |
{ |
return p->alphaMap != NULL; |
} |
static bool |
need_upload(struct sna *sna, PicturePtr p) |
{ |
return p->pDrawable && untransformed(p) && |
!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER); |
} |
static bool |
source_is_busy(PixmapPtr pixmap) |
{ |
struct sna_pixmap *priv = sna_pixmap(pixmap); |
if (priv == NULL) |
return false; |
if (priv->clear) |
return false; |
if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) |
return true; |
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) |
return true; |
return priv->gpu_damage && !priv->cpu_damage; |
} |
static bool |
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise) |
{ |
if (sna_picture_is_solid(p, NULL)) |
return false; |
if (p->pSourcePict) |
return check_gradient(p, precise); |
if (!gen4_check_repeat(p) || !gen4_check_format(p->format)) |
return true; |
/* soft errors: perfer to upload/compute rather than readback */ |
if (pixmap && source_is_busy(pixmap)) |
return false; |
return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p); |
} |
static bool |
gen4_composite_fallback(struct sna *sna, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst) |
{ |
PixmapPtr src_pixmap; |
PixmapPtr mask_pixmap; |
PixmapPtr dst_pixmap; |
bool src_fallback, mask_fallback; |
if (!gen4_check_dst_format(dst->format)) { |
DBG(("%s: unknown destination format: %d\n", |
__FUNCTION__, dst->format)); |
return true; |
} |
dst_pixmap = get_drawable_pixmap(dst->pDrawable); |
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; |
src_fallback = source_fallback(sna, src, src_pixmap, |
dst->polyMode == PolyModePrecise); |
if (mask) { |
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; |
mask_fallback = source_fallback(sna, mask, mask_pixmap, |
dst->polyMode == PolyModePrecise); |
} else { |
mask_pixmap = NULL; |
mask_fallback = false; |
} |
/* If we are using the destination as a source and need to |
* readback in order to upload the source, do it all |
* on the cpu. |
*/ |
if (src_pixmap == dst_pixmap && src_fallback) { |
DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); |
return true; |
} |
if (mask_pixmap == dst_pixmap && mask_fallback) { |
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); |
return true; |
} |
/* If anything is on the GPU, push everything out to the GPU */ |
if (dst_use_gpu(dst_pixmap)) { |
DBG(("%s: dst is already on the GPU, try to use GPU\n", |
__FUNCTION__)); |
return false; |
} |
if (src_pixmap && !src_fallback) { |
DBG(("%s: src is already on the GPU, try to use GPU\n", |
__FUNCTION__)); |
return false; |
} |
if (mask_pixmap && !mask_fallback) { |
DBG(("%s: mask is already on the GPU, try to use GPU\n", |
__FUNCTION__)); |
return false; |
} |
/* However if the dst is not on the GPU and we need to |
* render one of the sources using the CPU, we may |
* as well do the entire operation in place onthe CPU. |
*/ |
if (src_fallback) { |
DBG(("%s: dst is on the CPU and src will fallback\n", |
__FUNCTION__)); |
return true; |
} |
if (mask_fallback) { |
DBG(("%s: dst is on the CPU and mask will fallback\n", |
__FUNCTION__)); |
return true; |
} |
if (too_large(dst_pixmap->drawable.width, |
dst_pixmap->drawable.height) && |
dst_is_cpu(dst_pixmap)) { |
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); |
return true; |
} |
DBG(("%s: dst is not on the GPU and the operation should not fallback\n", |
__FUNCTION__)); |
return dst_use_cpu(dst_pixmap); |
} |
static int |
reuse_source(struct sna *sna, |
PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, |
PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) |
{ |
uint32_t color; |
if (src_x != msk_x || src_y != msk_y) |
return false; |
if (src == mask) { |
DBG(("%s: mask is source\n", __FUNCTION__)); |
*mc = *sc; |
mc->bo = kgem_bo_reference(mc->bo); |
return true; |
} |
if (sna_picture_is_solid(mask, &color)) |
return gen4_channel_init_solid(sna, mc, color); |
if (sc->is_solid) |
return false; |
if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) |
return false; |
DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); |
if (!sna_transform_equal(src->transform, mask->transform)) |
return false; |
if (!sna_picture_alphamap_equal(src, mask)) |
return false; |
if (!gen4_check_repeat(mask)) |
return false; |
if (!gen4_check_filter(mask)) |
return false; |
if (!gen4_check_format(mask->format)) |
return false; |
DBG(("%s: reusing source channel for mask with a twist\n", |
__FUNCTION__)); |
*mc = *sc; |
mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone); |
mc->filter = gen4_filter(mask->filter); |
mc->pict_format = mask->format; |
mc->card_format = gen4_get_card_format(mask->format); |
mc->bo = kgem_bo_reference(mc->bo); |
return true; |
} |
static bool |
gen4_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
gen4_render_composite(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t msk_x, int16_t msk_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp) |
{ |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.mode)); |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.ring)); |
if (op >= ARRAY_SIZE(gen4_blend_op)) |
return false; |
tmp->op = PictOpSrc; |
if (mask == NULL && |
try_blt(sna, dst, src, width, height) && |
sna_blt_composite(sna, op, |
src, dst, |
src_x, src_y, |
dst_x, dst_y, |
width, height, |
tmp, false)) |
return true; |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
if (gen4_composite_fallback(sna, src, mask, dst)) |
return false; |
if (need_tiling(sna, width, height)) |
return sna_tiling_composite(op, src, mask, dst, |
src_x, src_y, |
msk_x, msk_y, |
dst_x, dst_y, |
width, height, |
tmp); |
tmp->src.repeat = RepeatNone; |
tmp->src.filter = PictFilterNearest; |
tmp->src.is_affine = true; |
if (!gen4_composite_set_target(sna, tmp, dst, |
dst_x, dst_y, width, height, |
op > PictOpSrc || dst->pCompositeClip->data)) { |
DBG(("%s: failed to set composite target\n", __FUNCTION__)); |
return false; |
} |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
tmp->op = op; |
switch (gen4_composite_picture(sna, src, &tmp->src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
DBG(("%s: failed to prepare source\n", __FUNCTION__)); |
goto cleanup_dst; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->src, 0)) |
goto cleanup_dst; |
/* fall through to fixup */ |
case 1: |
if (mask == NULL && |
sna_blt_composite__convert(sna, |
dst_x, dst_y, width, height, |
tmp)) |
return true; |
gen4_composite_channel_convert(&tmp->src); |
break; |
} |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.is_affine = true; |
if (mask) { |
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { |
tmp->has_component_alpha = true; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
/* Check if it's component alpha that relies on a source alpha and on |
* the source value. We can only get one of those into the single |
* source value that we get to blend with. |
*/ |
if (gen4_blend_op[op].src_alpha && |
(gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) { |
if (op != PictOpOver) { |
DBG(("%s -- fallback: unhandled component alpha blend\n", |
__FUNCTION__)); |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
// tmp->src.offset[0] = -dst_x; |
// tmp->src.offset[1] = -dst_y; |
goto cleanup_src; |
} |
tmp->need_magic_ca_pass = true; |
tmp->op = PictOpOutReverse; |
} |
} |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
// tmp->mask.offset[0] = -dst_x; |
// tmp->mask.offset[1] = -dst_y; |
if (!reuse_source(sna, |
src, &tmp->src, src_x, src_y, |
mask, &tmp->mask, msk_x, msk_y)) { |
switch (gen4_composite_picture(sna, mask, &tmp->mask, |
msk_x, msk_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
DBG(("%s: failed to prepare mask\n", __FUNCTION__)); |
goto cleanup_src; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) |
goto cleanup_src; |
/* fall through to fixup */ |
case 1: |
gen4_composite_channel_convert(&tmp->mask); |
break; |
} |
} |
tmp->is_affine &= tmp->mask.is_affine; |
} |
tmp->u.gen4.wm_kernel = |
gen4_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine); |
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp); |
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp); |
tmp->blt = gen4_render_composite_blt; |
tmp->box = gen4_render_composite_box; |
tmp->boxes = gen4_render_composite_boxes__blt; |
if (tmp->emit_boxes) { |
tmp->boxes = gen4_render_composite_boxes; |
#if !FORCE_FLUSH |
tmp->thread_boxes = gen4_render_composite_boxes__thread; |
#endif |
} |
tmp->done = gen4_render_composite_done; |
if (!kgem_check_bo(&sna->kgem, |
1136,13 → 1926,70 |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) |
goto cleanup_mask; |
} |
gen4_bind_surfaces(sna, tmp); |
gen4_align_vertex(sna, tmp); |
return true; |
cleanup_mask: |
if (tmp->mask.bo) |
kgem_bo_destroy(&sna->kgem, tmp->mask.bo); |
cleanup_src: |
if (tmp->src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->src.bo); |
cleanup_dst: |
if (tmp->redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->dst.bo); |
return false; |
} |
#endif |
static void |
gen4_render_flush(struct sna *sna) |
{ |
1230,7 → 2077,7 |
} |
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, |
int gen, uint32_t kernel) |
uint32_t kernel) |
{ |
struct gen4_sf_unit_state *sf; |
1384,7 → 2231,7 |
} |
state->vs = gen4_create_vs_unit_state(&general); |
state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf); |
state->sf = gen4_create_sf_state(&general, sf); |
wm_state = sna_static_stream_map(&general, |
sizeof(*wm_state) * KERNEL_COUNT * |
1421,20 → 2268,52 |
return state->general_bo != NULL; |
} |
bool gen4_render_init(struct sna *sna) |
const char *gen4_render_init(struct sna *sna, const char *backend) |
{ |
if (!gen4_render_setup(sna)) |
return false; |
return backend; |
sna->kgem.retire = gen4_render_retire; |
sna->kgem.expire = gen4_render_expire; |
#if 0 |
#if !NO_COMPOSITE |
sna->render.composite = gen4_render_composite; |
sna->render.prefer_gpu |= PREFER_GPU_RENDER; |
#endif |
#if !NO_COMPOSITE_SPANS |
sna->render.check_composite_spans = gen4_check_composite_spans; |
sna->render.composite_spans = gen4_render_composite_spans; |
if (0) |
sna->render.prefer_gpu |= PREFER_GPU_SPANS; |
#endif |
#if !NO_VIDEO |
sna->render.video = gen4_render_video; |
#endif |
#if !NO_COPY_BOXES |
sna->render.copy_boxes = gen4_render_copy_boxes; |
#endif |
#if !NO_COPY |
sna->render.copy = gen4_render_copy; |
#endif |
#if !NO_FILL_BOXES |
sna->render.fill_boxes = gen4_render_fill_boxes; |
#endif |
#if !NO_FILL |
sna->render.fill = gen4_render_fill; |
#endif |
#if !NO_FILL_ONE |
sna->render.fill_one = gen4_render_fill_one; |
#endif |
#endif |
sna->render.blit_tex = gen4_blit_tex; |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
sna->render.flush = gen4_render_flush; |
sna->render.reset = gen4_render_reset; |
sna->render.fini = gen4_render_fini; |
1441,8 → 2320,95 |
sna->render.max_3d_size = GEN4_MAX_3D_SIZE; |
sna->render.max_3d_pitch = 1 << 18; |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)"; |
} |
static bool |
gen4_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
{ |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.ring)); |
tmp->op = PictOpSrc; |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
tmp->src.repeat = RepeatNone; |
tmp->src.filter = PictFilterNearest; |
tmp->src.is_affine = true; |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.is_affine = true; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
// tmp->src.offset[0] = -dst_x; |
// tmp->src.offset[1] = -dst_y; |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
// tmp->mask.offset[0] = -dst_x; |
// tmp->mask.offset[1] = -dst_y; |
tmp->u.gen4.wm_kernel = |
gen4_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine); |
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp); |
tmp->blt = gen4_render_composite_blt; |
tmp->done = gen4_render_composite_done; |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
} |
gen4_bind_surfaces(sna, tmp); |
gen4_align_vertex(sna, tmp); |
return true; |
} |
/drivers/video/Intel-2D/gen4_vertex.c |
---|
34,6 → 34,10 |
#include "sna_render_inline.h" |
#include "gen4_vertex.h" |
#ifndef sse2 |
#define sse2 |
#endif |
void gen4_vertex_flush(struct sna *sna) |
{ |
DBG(("%s[%x] = %d\n", __FUNCTION__, |
223,7 → 227,601 |
kgem_bo_destroy(&sna->kgem, free_bo); |
} |
fastcall static void |
/* specialised vertex emission routines */ |
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */ |
#define OUT_VERTEX_F(v) vertex_emit(sna, v) |
force_inline static float |
compute_linear(const struct sna_composite_channel *channel, |
int16_t x, int16_t y) |
{ |
return ((x+channel->offset[0]) * channel->u.linear.dx + |
(y+channel->offset[1]) * channel->u.linear.dy + |
channel->u.linear.offset); |
} |
sse2 inline static void |
emit_texcoord(struct sna *sna, |
const struct sna_composite_channel *channel, |
int16_t x, int16_t y) |
{ |
if (channel->is_solid) { |
OUT_VERTEX_F(x); |
return; |
} |
x += channel->offset[0]; |
y += channel->offset[1]; |
if (channel->is_affine) { |
float s, t; |
sna_get_transformed_coordinates(x, y, |
channel->transform, |
&s, &t); |
OUT_VERTEX_F(s * channel->scale[0]); |
OUT_VERTEX_F(t * channel->scale[1]); |
} else { |
float s, t, w; |
sna_get_transformed_coordinates_3d(x, y, |
channel->transform, |
&s, &t, &w); |
OUT_VERTEX_F(s * channel->scale[0]); |
OUT_VERTEX_F(t * channel->scale[1]); |
OUT_VERTEX_F(w); |
} |
} |
sse2 force_inline static void |
emit_vertex(struct sna *sna, |
const struct sna_composite_op *op, |
int16_t srcX, int16_t srcY, |
int16_t mskX, int16_t mskY, |
int16_t dstX, int16_t dstY) |
{ |
OUT_VERTEX(dstX, dstY); |
emit_texcoord(sna, &op->src, srcX, srcY); |
} |
sse2 fastcall static void |
emit_primitive(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
emit_vertex(sna, op, |
r->src.x + r->width, r->src.y + r->height, |
r->mask.x + r->width, r->mask.y + r->height, |
r->dst.x + r->width, r->dst.y + r->height); |
emit_vertex(sna, op, |
r->src.x, r->src.y + r->height, |
r->mask.x, r->mask.y + r->height, |
r->dst.x, r->dst.y + r->height); |
emit_vertex(sna, op, |
r->src.x, r->src.y, |
r->mask.x, r->mask.y, |
r->dst.x, r->dst.y); |
} |
sse2 force_inline static void |
emit_vertex_mask(struct sna *sna, |
const struct sna_composite_op *op, |
int16_t srcX, int16_t srcY, |
int16_t mskX, int16_t mskY, |
int16_t dstX, int16_t dstY) |
{ |
OUT_VERTEX(dstX, dstY); |
emit_texcoord(sna, &op->src, srcX, srcY); |
emit_texcoord(sna, &op->mask, mskX, mskY); |
} |
sse2 fastcall static void |
emit_primitive_mask(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
emit_vertex_mask(sna, op, |
r->src.x + r->width, r->src.y + r->height, |
r->mask.x + r->width, r->mask.y + r->height, |
r->dst.x + r->width, r->dst.y + r->height); |
emit_vertex_mask(sna, op, |
r->src.x, r->src.y + r->height, |
r->mask.x, r->mask.y + r->height, |
r->dst.x, r->dst.y + r->height); |
emit_vertex_mask(sna, op, |
r->src.x, r->src.y, |
r->mask.x, r->mask.y, |
r->dst.x, r->dst.y); |
} |
sse2 fastcall static void |
emit_primitive_solid(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
assert(op->floats_per_rect == 6); |
assert((sna->render.vertex_used % 2) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
dst.p.x = r->dst.x; |
v[2] = dst.f; |
dst.p.y = r->dst.y; |
v[4] = dst.f; |
v[5] = v[3] = v[1] = .5; |
} |
sse2 fastcall static void |
emit_boxes_solid(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
dst.p.x = box->x1; |
v[2] = dst.f; |
dst.p.y = box->y1; |
v[4] = dst.f; |
v[5] = v[3] = v[1] = .5; |
box++; |
v += 6; |
} while (--nbox); |
} |
sse2 fastcall static void |
emit_primitive_linear(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
assert(op->floats_per_rect == 6); |
assert((sna->render.vertex_used % 2) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
dst.p.x = r->dst.x; |
v[2] = dst.f; |
dst.p.y = r->dst.y; |
v[4] = dst.f; |
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
v[5] = compute_linear(&op->src, r->src.x, r->src.y); |
} |
sse2 fastcall static void |
emit_boxes_linear(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
do { |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
dst.p.x = box->x1; |
v[2] = dst.f; |
dst.p.y = box->y1; |
v[4] = dst.f; |
v[1] = compute_linear(&op->src, box->x2, box->y2); |
v[3] = compute_linear(&op->src, box->x1, box->y2); |
v[5] = compute_linear(&op->src, box->x1, box->y1); |
v += 6; |
box++; |
} while (--nbox); |
} |
sse2 fastcall static void |
emit_primitive_identity_source(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float *v; |
assert(op->floats_per_rect == 9); |
assert((sna->render.vertex_used % 3) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 9; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
dst.p.x = r->dst.x; |
v[3] = dst.f; |
dst.p.y = r->dst.y; |
v[6] = dst.f; |
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; |
v[1] = v[4] + r->width * op->src.scale[0]; |
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; |
v[5] = v[2] = v[8] + r->height * op->src.scale[1]; |
} |
sse2 fastcall static void |
emit_boxes_identity_source(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
dst.p.x = box->x1; |
v[3] = dst.f; |
dst.p.y = box->y1; |
v[6] = dst.f; |
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; |
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; |
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; |
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; |
v += 9; |
box++; |
} while (--nbox); |
} |
sse2 fastcall static void |
emit_primitive_simple_source(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float xx = op->src.transform->matrix[0][0]; |
float x0 = op->src.transform->matrix[0][2]; |
float yy = op->src.transform->matrix[1][1]; |
float y0 = op->src.transform->matrix[1][2]; |
float sx = op->src.scale[0]; |
float sy = op->src.scale[1]; |
int16_t tx = op->src.offset[0]; |
int16_t ty = op->src.offset[1]; |
assert(op->floats_per_rect == 9); |
assert((sna->render.vertex_used % 3) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 3*3; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
dst.p.x = r->dst.x; |
v[3] = dst.f; |
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; |
dst.p.y = r->dst.y; |
v[6] = dst.f; |
v[8] = ((r->src.y + ty) * yy + y0) * sy; |
} |
sse2 fastcall static void |
emit_boxes_simple_source(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float xx = op->src.transform->matrix[0][0]; |
float x0 = op->src.transform->matrix[0][2]; |
float yy = op->src.transform->matrix[1][1]; |
float y0 = op->src.transform->matrix[1][2]; |
float sx = op->src.scale[0]; |
float sy = op->src.scale[1]; |
int16_t tx = op->src.offset[0]; |
int16_t ty = op->src.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[1] = ((box->x2 + tx) * xx + x0) * sx; |
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; |
dst.p.x = box->x1; |
v[3] = dst.f; |
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; |
dst.p.y = box->y1; |
v[6] = dst.f; |
v[8] = ((box->y1 + ty) * yy + y0) * sy; |
v += 9; |
box++; |
} while (--nbox); |
} |
sse2 fastcall static void |
emit_primitive_affine_source(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float *v; |
assert(op->floats_per_rect == 9); |
assert((sna->render.vertex_used % 3) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 9; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, |
op->src.offset[1] + r->src.y + r->height, |
op->src.transform, op->src.scale, |
&v[1], &v[2]); |
dst.p.x = r->dst.x; |
v[3] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
op->src.offset[1] + r->src.y + r->height, |
op->src.transform, op->src.scale, |
&v[4], &v[5]); |
dst.p.y = r->dst.y; |
v[6] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
op->src.offset[1] + r->src.y, |
op->src.transform, op->src.scale, |
&v[7], &v[8]); |
} |
sse2 fastcall static void |
emit_boxes_affine_source(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + box->x2, |
op->src.offset[1] + box->y2, |
op->src.transform, op->src.scale, |
&v[1], &v[2]); |
dst.p.x = box->x1; |
v[3] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + box->x1, |
op->src.offset[1] + box->y2, |
op->src.transform, op->src.scale, |
&v[4], &v[5]); |
dst.p.y = box->y1; |
v[6] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + box->x1, |
op->src.offset[1] + box->y1, |
op->src.transform, op->src.scale, |
&v[7], &v[8]); |
box++; |
v += 9; |
} while (--nbox); |
} |
sse2 fastcall static void |
emit_primitive_identity_mask(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float msk_x, msk_y; |
float w, h; |
float *v; |
msk_x = r->mask.x + op->mask.offset[0]; |
msk_y = r->mask.y + op->mask.offset[1]; |
w = r->width; |
h = r->height; |
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
assert(op->floats_per_rect == 12); |
assert((sna->render.vertex_used % 4) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[2] = (msk_x + w) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[4] = dst.f; |
v[10] = v[6] = msk_x * op->mask.scale[0]; |
dst.p.y = r->dst.y; |
v[8] = dst.f; |
v[11] = msk_y * op->mask.scale[1]; |
v[9] = v[5] = v[1] = .5; |
} |
sse2 fastcall static void |
emit_boxes_identity_mask(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float msk_x = op->mask.offset[0]; |
float msk_y = op->mask.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
dst.p.x = box->x1; |
v[4] = dst.f; |
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
dst.p.y = box->y1; |
v[8] = dst.f; |
v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
v[9] = v[5] = v[1] = .5; |
v += 12; |
box++; |
} while (--nbox); |
} |
sse2 fastcall static void |
emit_primitive_linear_identity_mask(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float msk_x, msk_y; |
float w, h; |
float *v; |
msk_x = r->mask.x + op->mask.offset[0]; |
msk_y = r->mask.y + op->mask.offset[1]; |
w = r->width; |
h = r->height; |
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
assert(op->floats_per_rect == 12); |
assert((sna->render.vertex_used % 4) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[2] = (msk_x + w) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[4] = dst.f; |
v[10] = v[6] = msk_x * op->mask.scale[0]; |
dst.p.y = r->dst.y; |
v[8] = dst.f; |
v[11] = msk_y * op->mask.scale[1]; |
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
v[9] = compute_linear(&op->src, r->src.x, r->src.y); |
} |
sse2 fastcall static void |
emit_boxes_linear_identity_mask(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float msk_x = op->mask.offset[0]; |
float msk_y = op->mask.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
dst.p.x = box->x1; |
v[4] = dst.f; |
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
dst.p.y = box->y1; |
v[8] = dst.f; |
v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
v[1] = compute_linear(&op->src, box->x2, box->y2); |
v[5] = compute_linear(&op->src, box->x1, box->y2); |
v[9] = compute_linear(&op->src, box->x1, box->y1); |
v += 12; |
box++; |
} while (--nbox); |
} |
sse2 fastcall static void |
emit_primitive_identity_source_mask(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
272,8 → 870,831 |
v[14] = msk_y * op->mask.scale[1]; |
} |
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) |
sse2 fastcall static void |
emit_primitive_simple_source_identity(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float xx = op->src.transform->matrix[0][0]; |
float x0 = op->src.transform->matrix[0][2]; |
float yy = op->src.transform->matrix[1][1]; |
float y0 = op->src.transform->matrix[1][2]; |
float sx = op->src.scale[0]; |
float sy = op->src.scale[1]; |
int16_t tx = op->src.offset[0]; |
int16_t ty = op->src.offset[1]; |
float msk_x = r->mask.x + op->mask.offset[0]; |
float msk_y = r->mask.y + op->mask.offset[1]; |
float w = r->width, h = r->height; |
assert(op->floats_per_rect == 15); |
assert((sna->render.vertex_used % 5) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 3*5; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
v[3] = (msk_x + w) * op->mask.scale[0]; |
v[4] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[5] = dst.f; |
v[6] = ((r->src.x + tx) * xx + x0) * sx; |
v[7] = v[2]; |
v[8] = msk_x * op->mask.scale[0]; |
v[9] = v[4]; |
dst.p.y = r->dst.y; |
v[10] = dst.f; |
v[11] = v[6]; |
v[12] = ((r->src.y + ty) * yy + y0) * sy; |
v[13] = v[8]; |
v[14] = msk_y * op->mask.scale[1]; |
} |
sse2 fastcall static void |
emit_primitive_affine_source_identity(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float msk_x = r->mask.x + op->mask.offset[0]; |
float msk_y = r->mask.y + op->mask.offset[1]; |
float w = r->width, h = r->height; |
assert(op->floats_per_rect == 15); |
assert((sna->render.vertex_used % 5) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 3*5; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, |
op->src.offset[1] + r->src.y + r->height, |
op->src.transform, op->src.scale, |
&v[1], &v[2]); |
v[3] = (msk_x + w) * op->mask.scale[0]; |
v[4] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[5] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
op->src.offset[1] + r->src.y + r->height, |
op->src.transform, op->src.scale, |
&v[6], &v[7]); |
v[8] = msk_x * op->mask.scale[0]; |
v[9] = v[4]; |
dst.p.y = r->dst.y; |
v[10] = dst.f; |
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
op->src.offset[1] + r->src.y, |
op->src.transform, op->src.scale, |
&v[11], &v[12]); |
v[13] = v[8]; |
v[14] = msk_y * op->mask.scale[1]; |
} |
/* SSE4_2 */ |
#if defined(sse4_2) |
sse4_2 fastcall static void |
emit_primitive_linear__sse4_2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
assert(op->floats_per_rect == 6); |
assert((sna->render.vertex_used % 2) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
dst.p.x = r->dst.x; |
v[2] = dst.f; |
dst.p.y = r->dst.y; |
v[4] = dst.f; |
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
v[5] = compute_linear(&op->src, r->src.x, r->src.y); |
} |
sse4_2 fastcall static void |
emit_boxes_linear__sse4_2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
do { |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
dst.p.x = box->x1; |
v[2] = dst.f; |
dst.p.y = box->y1; |
v[4] = dst.f; |
v[1] = compute_linear(&op->src, box->x2, box->y2); |
v[3] = compute_linear(&op->src, box->x1, box->y2); |
v[5] = compute_linear(&op->src, box->x1, box->y1); |
v += 6; |
box++; |
} while (--nbox); |
} |
sse4_2 fastcall static void |
emit_primitive_identity_source__sse4_2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float *v; |
assert(op->floats_per_rect == 9); |
assert((sna->render.vertex_used % 3) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 9; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
dst.p.x = r->dst.x; |
v[3] = dst.f; |
dst.p.y = r->dst.y; |
v[6] = dst.f; |
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; |
v[1] = v[4] + r->width * op->src.scale[0]; |
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; |
v[5] = v[2] = v[8] + r->height * op->src.scale[1]; |
} |
sse4_2 fastcall static void |
emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
dst.p.x = box->x1; |
v[3] = dst.f; |
dst.p.y = box->y1; |
v[6] = dst.f; |
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; |
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; |
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; |
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; |
v += 9; |
box++; |
} while (--nbox); |
} |
sse4_2 fastcall static void |
emit_primitive_simple_source__sse4_2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float xx = op->src.transform->matrix[0][0]; |
float x0 = op->src.transform->matrix[0][2]; |
float yy = op->src.transform->matrix[1][1]; |
float y0 = op->src.transform->matrix[1][2]; |
float sx = op->src.scale[0]; |
float sy = op->src.scale[1]; |
int16_t tx = op->src.offset[0]; |
int16_t ty = op->src.offset[1]; |
assert(op->floats_per_rect == 9); |
assert((sna->render.vertex_used % 3) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 3*3; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
dst.p.x = r->dst.x; |
v[3] = dst.f; |
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; |
dst.p.y = r->dst.y; |
v[6] = dst.f; |
v[8] = ((r->src.y + ty) * yy + y0) * sy; |
} |
sse4_2 fastcall static void |
emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float xx = op->src.transform->matrix[0][0]; |
float x0 = op->src.transform->matrix[0][2]; |
float yy = op->src.transform->matrix[1][1]; |
float y0 = op->src.transform->matrix[1][2]; |
float sx = op->src.scale[0]; |
float sy = op->src.scale[1]; |
int16_t tx = op->src.offset[0]; |
int16_t ty = op->src.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[1] = ((box->x2 + tx) * xx + x0) * sx; |
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; |
dst.p.x = box->x1; |
v[3] = dst.f; |
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; |
dst.p.y = box->y1; |
v[6] = dst.f; |
v[8] = ((box->y1 + ty) * yy + y0) * sy; |
v += 9; |
box++; |
} while (--nbox); |
} |
sse4_2 fastcall static void |
emit_primitive_identity_mask__sse4_2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float msk_x, msk_y; |
float w, h; |
float *v; |
msk_x = r->mask.x + op->mask.offset[0]; |
msk_y = r->mask.y + op->mask.offset[1]; |
w = r->width; |
h = r->height; |
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
assert(op->floats_per_rect == 12); |
assert((sna->render.vertex_used % 4) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[2] = (msk_x + w) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[4] = dst.f; |
v[10] = v[6] = msk_x * op->mask.scale[0]; |
dst.p.y = r->dst.y; |
v[8] = dst.f; |
v[11] = msk_y * op->mask.scale[1]; |
v[9] = v[5] = v[1] = .5; |
} |
sse4_2 fastcall static void |
emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float msk_x = op->mask.offset[0]; |
float msk_y = op->mask.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
dst.p.x = box->x1; |
v[4] = dst.f; |
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
dst.p.y = box->y1; |
v[8] = dst.f; |
v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
v[9] = v[5] = v[1] = .5; |
v += 12; |
box++; |
} while (--nbox); |
} |
sse4_2 fastcall static void |
emit_primitive_linear_identity_mask__sse4_2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float msk_x, msk_y; |
float w, h; |
float *v; |
msk_x = r->mask.x + op->mask.offset[0]; |
msk_y = r->mask.y + op->mask.offset[1]; |
w = r->width; |
h = r->height; |
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
assert(op->floats_per_rect == 12); |
assert((sna->render.vertex_used % 4) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[2] = (msk_x + w) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[4] = dst.f; |
v[10] = v[6] = msk_x * op->mask.scale[0]; |
dst.p.y = r->dst.y; |
v[8] = dst.f; |
v[11] = msk_y * op->mask.scale[1]; |
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
v[9] = compute_linear(&op->src, r->src.x, r->src.y); |
} |
sse4_2 fastcall static void |
emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float msk_x = op->mask.offset[0]; |
float msk_y = op->mask.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
dst.p.x = box->x1; |
v[4] = dst.f; |
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
dst.p.y = box->y1; |
v[8] = dst.f; |
v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
v[1] = compute_linear(&op->src, box->x2, box->y2); |
v[5] = compute_linear(&op->src, box->x1, box->y2); |
v[9] = compute_linear(&op->src, box->x1, box->y1); |
v += 12; |
box++; |
} while (--nbox); |
} |
#endif |
/* AVX2 */ |
#if defined(avx2) |
avx2 fastcall static void |
emit_primitive_linear__avx2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
assert(op->floats_per_rect == 6); |
assert((sna->render.vertex_used % 2) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
dst.p.x = r->dst.x; |
v[2] = dst.f; |
dst.p.y = r->dst.y; |
v[4] = dst.f; |
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
v[5] = compute_linear(&op->src, r->src.x, r->src.y); |
} |
avx2 fastcall static void |
emit_boxes_linear__avx2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
do { |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
dst.p.x = box->x1; |
v[2] = dst.f; |
dst.p.y = box->y1; |
v[4] = dst.f; |
v[1] = compute_linear(&op->src, box->x2, box->y2); |
v[3] = compute_linear(&op->src, box->x1, box->y2); |
v[5] = compute_linear(&op->src, box->x1, box->y1); |
v += 6; |
box++; |
} while (--nbox); |
} |
avx2 fastcall static void |
emit_primitive_identity_source__avx2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float *v; |
assert(op->floats_per_rect == 9); |
assert((sna->render.vertex_used % 3) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 9; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
dst.p.x = r->dst.x; |
v[3] = dst.f; |
dst.p.y = r->dst.y; |
v[6] = dst.f; |
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; |
v[1] = v[4] + r->width * op->src.scale[0]; |
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; |
v[5] = v[2] = v[8] + r->height * op->src.scale[1]; |
} |
avx2 fastcall static void |
emit_boxes_identity_source__avx2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
dst.p.x = box->x1; |
v[3] = dst.f; |
dst.p.y = box->y1; |
v[6] = dst.f; |
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; |
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; |
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; |
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; |
v += 9; |
box++; |
} while (--nbox); |
} |
avx2 fastcall static void |
emit_primitive_simple_source__avx2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
float *v; |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float xx = op->src.transform->matrix[0][0]; |
float x0 = op->src.transform->matrix[0][2]; |
float yy = op->src.transform->matrix[1][1]; |
float y0 = op->src.transform->matrix[1][2]; |
float sx = op->src.scale[0]; |
float sy = op->src.scale[1]; |
int16_t tx = op->src.offset[0]; |
int16_t ty = op->src.offset[1]; |
assert(op->floats_per_rect == 9); |
assert((sna->render.vertex_used % 3) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 3*3; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
dst.p.x = r->dst.x; |
v[3] = dst.f; |
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; |
dst.p.y = r->dst.y; |
v[6] = dst.f; |
v[8] = ((r->src.y + ty) * yy + y0) * sy; |
} |
avx2 fastcall static void |
emit_boxes_simple_source__avx2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float xx = op->src.transform->matrix[0][0]; |
float x0 = op->src.transform->matrix[0][2]; |
float yy = op->src.transform->matrix[1][1]; |
float y0 = op->src.transform->matrix[1][2]; |
float sx = op->src.scale[0]; |
float sy = op->src.scale[1]; |
int16_t tx = op->src.offset[0]; |
int16_t ty = op->src.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[1] = ((box->x2 + tx) * xx + x0) * sx; |
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; |
dst.p.x = box->x1; |
v[3] = dst.f; |
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; |
dst.p.y = box->y1; |
v[6] = dst.f; |
v[8] = ((box->y1 + ty) * yy + y0) * sy; |
v += 9; |
box++; |
} while (--nbox); |
} |
avx2 fastcall static void |
emit_primitive_identity_mask__avx2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float msk_x, msk_y; |
float w, h; |
float *v; |
msk_x = r->mask.x + op->mask.offset[0]; |
msk_y = r->mask.y + op->mask.offset[1]; |
w = r->width; |
h = r->height; |
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
assert(op->floats_per_rect == 12); |
assert((sna->render.vertex_used % 4) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[2] = (msk_x + w) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[4] = dst.f; |
v[10] = v[6] = msk_x * op->mask.scale[0]; |
dst.p.y = r->dst.y; |
v[8] = dst.f; |
v[11] = msk_y * op->mask.scale[1]; |
v[9] = v[5] = v[1] = .5; |
} |
avx2 fastcall static void |
emit_boxes_identity_mask__avx2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float msk_x = op->mask.offset[0]; |
float msk_y = op->mask.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
dst.p.x = box->x1; |
v[4] = dst.f; |
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
dst.p.y = box->y1; |
v[8] = dst.f; |
v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
v[9] = v[5] = v[1] = .5; |
v += 12; |
box++; |
} while (--nbox); |
} |
avx2 fastcall static void |
emit_primitive_linear_identity_mask__avx2(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
float msk_x, msk_y; |
float w, h; |
float *v; |
msk_x = r->mask.x + op->mask.offset[0]; |
msk_y = r->mask.y + op->mask.offset[1]; |
w = r->width; |
h = r->height; |
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
assert(op->floats_per_rect == 12); |
assert((sna->render.vertex_used % 4) == 0); |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += 12; |
dst.p.x = r->dst.x + r->width; |
dst.p.y = r->dst.y + r->height; |
v[0] = dst.f; |
v[2] = (msk_x + w) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
dst.p.x = r->dst.x; |
v[4] = dst.f; |
v[10] = v[6] = msk_x * op->mask.scale[0]; |
dst.p.y = r->dst.y; |
v[8] = dst.f; |
v[11] = msk_y * op->mask.scale[1]; |
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
v[9] = compute_linear(&op->src, r->src.x, r->src.y); |
} |
avx2 fastcall static void |
emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op, |
const BoxRec *box, int nbox, |
float *v) |
{ |
float msk_x = op->mask.offset[0]; |
float msk_y = op->mask.offset[1]; |
do { |
union { |
struct sna_coordinate p; |
float f; |
} dst; |
dst.p.x = box->x2; |
dst.p.y = box->y2; |
v[0] = dst.f; |
v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
dst.p.x = box->x1; |
v[4] = dst.f; |
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
dst.p.y = box->y1; |
v[8] = dst.f; |
v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
v[1] = compute_linear(&op->src, box->x2, box->y2); |
v[5] = compute_linear(&op->src, box->x1, box->y2); |
v[9] = compute_linear(&op->src, box->x1, box->y1); |
v += 12; |
box++; |
} while (--nbox); |
} |
#endif |
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp) |
{ |
unsigned vb; |
if (tmp->mask.bo) { |
280,8 → 1701,38 |
if (tmp->mask.transform == NULL) { |
if (tmp->src.is_solid) { |
DBG(("%s: solid, identity mask\n", __FUNCTION__)); |
#if defined(avx2) |
if (sna->cpu_features & AVX2) { |
tmp->prim_emit = emit_primitive_identity_mask__avx2; |
} else |
#endif |
#if defined(sse4_2) |
if (sna->cpu_features & SSE4_2) { |
tmp->prim_emit = emit_primitive_identity_mask__sse4_2; |
} else |
#endif |
{ |
tmp->prim_emit = emit_primitive_identity_mask; |
} |
tmp->floats_per_vertex = 4; |
vb = 1 | 2 << 2; |
} else if (tmp->src.is_linear) { |
DBG(("%s: linear, identity mask\n", __FUNCTION__)); |
#if defined(avx2) |
if (sna->cpu_features & AVX2) { |
tmp->prim_emit = emit_primitive_linear_identity_mask__avx2; |
} else |
#endif |
#if defined(sse4_2) |
if (sna->cpu_features & SSE4_2) { |
tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2; |
} else |
#endif |
{ |
tmp->prim_emit = emit_primitive_linear_identity_mask; |
} |
tmp->floats_per_vertex = 4; |
vb = 1 | 2 << 2; |
} else if (tmp->src.transform == NULL) { |
DBG(("%s: identity source, identity mask\n", __FUNCTION__)); |
tmp->prim_emit = emit_primitive_identity_source_mask; |
288,17 → 1739,130 |
tmp->floats_per_vertex = 5; |
vb = 2 << 2 | 2; |
} else if (tmp->src.is_affine) { |
DBG(("%s: simple src, identity mask\n", __FUNCTION__)); |
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; |
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; |
if (!sna_affine_transform_is_rotation(tmp->src.transform)) { |
DBG(("%s: simple src, identity mask\n", __FUNCTION__)); |
tmp->prim_emit = emit_primitive_simple_source_identity; |
} else { |
DBG(("%s: affine src, identity mask\n", __FUNCTION__)); |
tmp->prim_emit = emit_primitive_affine_source_identity; |
} |
tmp->floats_per_vertex = 5; |
vb = 2 << 2 | 2; |
} else { |
DBG(("%s: projective source, identity mask\n", __FUNCTION__)); |
tmp->prim_emit = emit_primitive_mask; |
tmp->floats_per_vertex = 6; |
vb = 2 << 2 | 3; |
} |
} else { |
tmp->prim_emit = emit_primitive_mask; |
tmp->floats_per_vertex = 1; |
vb = 0; |
if (tmp->mask.is_solid) { |
tmp->floats_per_vertex += 1; |
vb |= 1 << 2; |
} else if (tmp->mask.is_affine) { |
tmp->floats_per_vertex += 2; |
vb |= 2 << 2; |
}else { |
tmp->floats_per_vertex += 3; |
vb |= 3 << 2; |
} |
if (tmp->src.is_solid) { |
tmp->floats_per_vertex += 1; |
vb |= 1; |
} else if (tmp->src.is_affine) { |
tmp->floats_per_vertex += 2; |
vb |= 2 ; |
}else { |
tmp->floats_per_vertex += 3; |
vb |= 3; |
} |
DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n", |
__FUNCTION__,tmp->floats_per_vertex, vb)); |
} |
} else { |
#if 0 |
if (tmp->src.is_solid) { |
DBG(("%s: solid, no mask\n", __FUNCTION__)); |
tmp->prim_emit = emit_primitive_solid; |
if (tmp->src.is_opaque && tmp->op == PictOpOver) |
tmp->op = PictOpSrc; |
tmp->floats_per_vertex = 2; |
vb = 1; |
} else if (tmp->src.is_linear) { |
DBG(("%s: linear, no mask\n", __FUNCTION__)); |
#if defined(avx2) |
if (sna->cpu_features & AVX2) { |
tmp->prim_emit = emit_primitive_linear__avx2; |
} else |
#endif |
#if defined(sse4_2) |
if (sna->cpu_features & SSE4_2) { |
tmp->prim_emit = emit_primitive_linear__sse4_2; |
} else |
#endif |
{ |
tmp->prim_emit = emit_primitive_linear; |
} |
tmp->floats_per_vertex = 2; |
vb = 1; |
} else if (tmp->src.transform == NULL) { |
DBG(("%s: identity src, no mask\n", __FUNCTION__)); |
#if defined(avx2) |
if (sna->cpu_features & AVX2) { |
tmp->prim_emit = emit_primitive_identity_source__avx2; |
} else |
#endif |
#if defined(sse4_2) |
if (sna->cpu_features & SSE4_2) { |
tmp->prim_emit = emit_primitive_identity_source__sse4_2; |
} else |
#endif |
{ |
tmp->prim_emit = emit_primitive_identity_source; |
} |
tmp->floats_per_vertex = 3; |
vb = 2; |
} else if (tmp->src.is_affine) { |
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; |
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; |
if (!sna_affine_transform_is_rotation(tmp->src.transform)) { |
DBG(("%s: simple src, no mask\n", __FUNCTION__)); |
#if defined(avx2) |
if (sna->cpu_features & AVX2) { |
tmp->prim_emit = emit_primitive_simple_source__avx2; |
} else |
#endif |
#if defined(sse4_2) |
if (sna->cpu_features & SSE4_2) { |
tmp->prim_emit = emit_primitive_simple_source__sse4_2; |
} else |
#endif |
{ |
tmp->prim_emit = emit_primitive_simple_source; |
} |
} else { |
DBG(("%s: affine src, no mask\n", __FUNCTION__)); |
tmp->prim_emit = emit_primitive_affine_source; |
} |
tmp->floats_per_vertex = 3; |
vb = 2; |
} else { |
DBG(("%s: projective src, no mask\n", __FUNCTION__)); |
assert(!tmp->src.is_solid); |
tmp->prim_emit = emit_primitive; |
tmp->floats_per_vertex = 4; |
vb = 3; |
} |
#endif |
} |
tmp->floats_per_rect = 3 * tmp->floats_per_vertex; |
return vb; |
} |
/drivers/video/Intel-2D/gen4_vertex.h |
---|
10,7 → 10,6 |
int gen4_vertex_finish(struct sna *sna); |
void gen4_vertex_close(struct sna *sna); |
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp); |
//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp); |
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp); |
#endif /* GEN4_VERTEX_H */ |
/drivers/video/Intel-2D/gen5_render.c |
---|
279,8 → 279,24 |
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_x8r8g8b8: |
return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM; |
case PICT_a8b8g8r8: |
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_x8b8g8r8: |
return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM; |
case PICT_a2r10g10b10: |
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_x2r10g10b10: |
return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM; |
case PICT_r8g8b8: |
return GEN5_SURFACEFORMAT_R8G8B8_UNORM; |
case PICT_r5g6b5: |
return GEN5_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_a1r5g5b5: |
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN5_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
} |
292,8 → 308,22 |
case PICT_a8r8g8b8: |
case PICT_x8r8g8b8: |
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_a8b8g8r8: |
case PICT_x8b8g8r8: |
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_a2r10g10b10: |
case PICT_x2r10g10b10: |
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_r5g6b5: |
return GEN5_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_x1r5g5b5: |
case PICT_a1r5g5b5: |
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN5_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
case PICT_x4r4g4b4: |
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
} |
typedef struct gen5_surface_state_padded { |
384,7 → 414,7 |
/* After the first bind, we manage the cache domains within the batch */ |
if (!DBG_NO_SURFACE_CACHE) { |
offset = kgem_bo_get_binding(bo, format); |
offset = kgem_bo_get_binding(bo, format | is_dst << 31); |
if (offset) { |
if (is_dst) |
kgem_bo_mark_dirty(bo); |
400,9 → 430,10 |
GEN5_SURFACE_BLEND_ENABLED | |
format << GEN5_SURFACE_FORMAT_SHIFT); |
if (is_dst) |
if (is_dst) { |
ss[0] |= GEN5_SURFACE_RC_READ_WRITE; |
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; |
else |
} else |
domains = I915_GEM_DOMAIN_SAMPLER << 16; |
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); |
413,7 → 444,7 |
ss[4] = 0; |
ss[5] = 0; |
kgem_bo_set_binding(bo, format, offset); |
kgem_bo_set_binding(bo, format | is_dst << 31, offset); |
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", |
offset, bo->handle, ss[1], |
541,8 → 572,8 |
goto start; |
} |
assert(op->floats_per_rect >= vertex_space(sna)); |
assert(rem <= vertex_space(sna)); |
assert(op->floats_per_rect <= rem); |
if (want > 1 && want * op->floats_per_rect > rem) |
want = rem / op->floats_per_rect; |
649,8 → 680,13 |
* |
* However, the kernel flushes the pipeline between batches, |
* so we should be safe.... |
* OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); |
* |
* On the other hand, after using BLT we must use a non-pipelined |
* operation... |
*/ |
if (sna->kgem.nreloc) |
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); |
OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D); |
gen5_emit_state_base_address(sna); |
732,12 → 768,15 |
kernel); |
bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format); |
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp)); |
key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31; |
DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n", |
__FUNCTION__, sp, bp, key, |
sna->render_state.gen5.last_pipelined_pointers & 0xffff, |
(sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff, |
sna->render_state.gen5.last_pipelined_pointers)); |
if (key == sna->render_state.gen5.last_pipelined_pointers) |
return false; |
OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5); |
OUT_BATCH(sna->render_state.gen5.vs); |
OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */ |
746,11 → 785,15 |
OUT_BATCH(sna->render_state.gen5.wm + sp); |
OUT_BATCH(sna->render_state.gen5.cc + bp); |
bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16); |
sna->render_state.gen5.last_pipelined_pointers = key; |
return true; |
gen5_emit_urb(sna); |
return bp; |
} |
static void |
static bool |
gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) |
{ |
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); |
762,7 → 805,7 |
if (!DBG_NO_STATE_CACHE && |
sna->render_state.gen5.drawrect_limit == limit && |
sna->render_state.gen5.drawrect_offset == offset) |
return; |
return false; |
sna->render_state.gen5.drawrect_offset = offset; |
sna->render_state.gen5.drawrect_limit = limit; |
771,6 → 814,7 |
OUT_BATCH(0x00000000); |
OUT_BATCH(limit); |
OUT_BATCH(offset); |
return true; |
} |
static void |
892,11 → 936,35 |
} |
} |
inline static void |
gen5_emit_pipe_flush(struct sna *sna) |
{ |
OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
static void |
gen5_emit_state(struct sna *sna, |
const struct sna_composite_op *op, |
uint16_t offset) |
{ |
bool flush = false; |
assert(op->dst.bo->exec); |
/* drawrect must be first for Ironlake BLT workaround */ |
if (gen5_emit_drawing_rectangle(sna, op)) |
offset &= ~1; |
gen5_emit_binding_table(sna, offset & ~1); |
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){ |
DBG(("%s: changed blend state, flush required? %d\n", |
__FUNCTION__, (offset & 1) && op->op > PictOpSrc)); |
flush = (offset & 1) && op->op > PictOpSrc; |
} |
gen5_emit_vertex_elements(sna, op); |
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { |
DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__, |
kgem_bo_is_dirty(op->src.bo), |
904,19 → 972,18 |
OUT_BATCH(MI_FLUSH); |
kgem_clear_dirty(&sna->kgem); |
kgem_bo_mark_dirty(op->dst.bo); |
flush = false; |
} |
/* drawrect must be first for Ironlake BLT workaround */ |
gen5_emit_drawing_rectangle(sna, op); |
gen5_emit_binding_table(sna, offset); |
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)) |
gen5_emit_urb(sna); |
gen5_emit_vertex_elements(sna, op); |
if (flush) { |
DBG(("%s: forcing flush\n", __FUNCTION__)); |
gen5_emit_pipe_flush(sna); |
} |
} |
static void gen5_bind_surfaces(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
bool dirty = kgem_bo_is_dirty(op->dst.bo); |
uint32_t *binding_table; |
uint16_t offset; |
953,7 → 1020,7 |
offset = sna->render_state.gen5.surface_table; |
} |
gen5_emit_state(sna, op, offset); |
gen5_emit_state(sna, op, offset | dirty); |
} |
fastcall static void |
972,8 → 1039,335 |
op->prim_emit(sna, op, r); |
} |
#if 0 |
fastcall static void |
gen5_render_composite_box(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box) |
{ |
struct sna_composite_rectangles r; |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.mask = r.src = r.dst; |
op->prim_emit(sna, op, &r); |
} |
static void |
gen5_render_composite_boxes__blt(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", |
__FUNCTION__, nbox, op->dst.x, op->dst.y, |
op->src.offset[0], op->src.offset[1], |
op->src.width, op->src.height, |
op->mask.offset[0], op->mask.offset[1], |
op->mask.width, op->mask.height)); |
do { |
int nbox_this_time; |
nbox_this_time = gen5_get_rectangles(sna, op, nbox, |
gen5_bind_surfaces); |
nbox -= nbox_this_time; |
do { |
struct sna_composite_rectangles r; |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.mask = r.src = r.dst; |
op->prim_emit(sna, op, &r); |
box++; |
} while (--nbox_this_time); |
} while (nbox); |
} |
static void |
gen5_render_composite_boxes(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen5_get_rectangles(sna, op, nbox, |
gen5_bind_surfaces); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
} while (nbox); |
} |
static void |
gen5_render_composite_boxes__thread(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
sna_vertex_lock(&sna->render); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen5_get_rectangles(sna, op, nbox, |
gen5_bind_surfaces); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
sna_vertex_acquire__locked(&sna->render); |
sna_vertex_unlock(&sna->render); |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
sna_vertex_lock(&sna->render); |
sna_vertex_release__locked(&sna->render); |
} while (nbox); |
sna_vertex_unlock(&sna->render); |
} |
#ifndef MAX |
#define MAX(a,b) ((a) > (b) ? (a) : (b)) |
#endif |
static uint32_t gen5_bind_video_source(struct sna *sna, |
struct kgem_bo *src_bo, |
uint32_t src_offset, |
int src_width, |
int src_height, |
int src_pitch, |
uint32_t src_surf_format) |
{ |
struct gen5_surface_state *ss; |
sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); |
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); |
ss->ss0.surface_type = GEN5_SURFACE_2D; |
ss->ss0.surface_format = src_surf_format; |
ss->ss0.color_blend = 1; |
ss->ss1.base_addr = |
kgem_add_reloc(&sna->kgem, |
sna->kgem.surface + 1, |
src_bo, |
I915_GEM_DOMAIN_SAMPLER << 16, |
src_offset); |
ss->ss2.width = src_width - 1; |
ss->ss2.height = src_height - 1; |
ss->ss3.pitch = src_pitch - 1; |
return sna->kgem.surface * sizeof(uint32_t); |
} |
static void gen5_video_bind_surfaces(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
bool dirty = kgem_bo_is_dirty(op->dst.bo); |
struct sna_video_frame *frame = op->priv; |
uint32_t src_surf_format; |
uint32_t src_surf_base[6]; |
int src_width[6]; |
int src_height[6]; |
int src_pitch[6]; |
uint32_t *binding_table; |
uint16_t offset; |
int n_src, n; |
src_surf_base[0] = 0; |
src_surf_base[1] = 0; |
src_surf_base[2] = frame->VBufOffset; |
src_surf_base[3] = frame->VBufOffset; |
src_surf_base[4] = frame->UBufOffset; |
src_surf_base[5] = frame->UBufOffset; |
if (is_planar_fourcc(frame->id)) { |
src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM; |
src_width[1] = src_width[0] = frame->width; |
src_height[1] = src_height[0] = frame->height; |
src_pitch[1] = src_pitch[0] = frame->pitch[1]; |
src_width[4] = src_width[5] = src_width[2] = src_width[3] = |
frame->width / 2; |
src_height[4] = src_height[5] = src_height[2] = src_height[3] = |
frame->height / 2; |
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = |
frame->pitch[0]; |
n_src = 6; |
} else { |
if (frame->id == FOURCC_UYVY) |
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY; |
else |
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL; |
src_width[0] = frame->width; |
src_height[0] = frame->height; |
src_pitch[0] = frame->pitch[0]; |
n_src = 1; |
} |
gen5_get_batch(sna, op); |
binding_table = gen5_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen5_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen5_get_dest_format(op->dst.format), |
true); |
for (n = 0; n < n_src; n++) { |
binding_table[1+n] = |
gen5_bind_video_source(sna, |
frame->bo, |
src_surf_base[n], |
src_width[n], |
src_height[n], |
src_pitch[n], |
src_surf_format); |
} |
gen5_emit_state(sna, op, offset | dirty); |
} |
static bool |
gen5_render_video(struct sna *sna, |
struct sna_video *video, |
struct sna_video_frame *frame, |
RegionPtr dstRegion, |
PixmapPtr pixmap) |
{ |
struct sna_composite_op tmp; |
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; |
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; |
int src_width = frame->src.x2 - frame->src.x1; |
int src_height = frame->src.y2 - frame->src.y1; |
float src_offset_x, src_offset_y; |
float src_scale_x, src_scale_y; |
int nbox, pix_xoff, pix_yoff; |
struct sna_pixmap *priv; |
BoxPtr box; |
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, |
src_width, src_height, dst_width, dst_height)); |
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); |
if (priv == NULL) |
return false; |
memset(&tmp, 0, sizeof(tmp)); |
tmp.op = PictOpSrc; |
tmp.dst.pixmap = pixmap; |
tmp.dst.width = pixmap->drawable.width; |
tmp.dst.height = pixmap->drawable.height; |
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); |
tmp.dst.bo = priv->gpu_bo; |
if (src_width == dst_width && src_height == dst_height) |
tmp.src.filter = SAMPLER_FILTER_NEAREST; |
else |
tmp.src.filter = SAMPLER_FILTER_BILINEAR; |
tmp.src.repeat = SAMPLER_EXTEND_PAD; |
tmp.src.bo = frame->bo; |
tmp.mask.bo = NULL; |
tmp.u.gen5.wm_kernel = |
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; |
tmp.u.gen5.ve_id = 2; |
tmp.is_affine = true; |
tmp.floats_per_vertex = 3; |
tmp.floats_per_rect = 9; |
tmp.priv = frame; |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); |
} |
gen5_video_bind_surfaces(sna, &tmp); |
gen5_align_vertex(sna, &tmp); |
/* Set up the offset for translating from the given region (in screen |
* coordinates) to the backing pixmap. |
*/ |
#ifdef COMPOSITE |
pix_xoff = -pixmap->screen_x + pixmap->drawable.x; |
pix_yoff = -pixmap->screen_y + pixmap->drawable.y; |
#else |
pix_xoff = 0; |
pix_yoff = 0; |
#endif |
src_scale_x = (float)src_width / dst_width / frame->width; |
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; |
src_scale_y = (float)src_height / dst_height / frame->height; |
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; |
box = REGION_RECTS(dstRegion); |
nbox = REGION_NUM_RECTS(dstRegion); |
while (nbox--) { |
BoxRec r; |
r.x1 = box->x1 + pix_xoff; |
r.x2 = box->x2 + pix_xoff; |
r.y1 = box->y1 + pix_yoff; |
r.y2 = box->y2 + pix_yoff; |
gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces); |
OUT_VERTEX(r.x2, r.y2); |
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y2); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y1); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); |
if (!DAMAGE_IS_ALL(priv->gpu_damage)) { |
sna_damage_add_box(&priv->gpu_damage, &r); |
sna_damage_subtract_box(&priv->cpu_damage, &r); |
} |
box++; |
} |
gen4_vertex_flush(sna); |
return true; |
} |
#endif |
static void |
gen5_render_composite_done(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
986,84 → 1380,198 |
} |
#if 0 |
static bool |
gen5_composite_set_target(struct sna *sna, |
struct sna_composite_op *op, |
PicturePtr dst, |
int x, int y, int w, int h, |
bool partial) |
{ |
BoxRec box; |
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); |
op->dst.width = op->dst.pixmap->drawable.width; |
op->dst.height = op->dst.pixmap->drawable.height; |
op->dst.format = dst->format; |
if (w && h) { |
box.x1 = x; |
box.y1 = y; |
box.x2 = x + w; |
box.y2 = y + h; |
} else |
sna_render_picture_extents(dst, &box); |
op->dst.bo = sna_drawable_use_bo (dst->pDrawable, |
PREFER_GPU | FORCE_GPU | RENDER_GPU, |
&box, &op->damage); |
if (op->dst.bo == NULL) |
return false; |
get_drawable_deltas(dst->pDrawable, op->dst.pixmap, |
&op->dst.x, &op->dst.y); |
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", |
__FUNCTION__, |
op->dst.pixmap, (int)op->dst.format, |
op->dst.width, op->dst.height, |
op->dst.bo->pitch, |
op->dst.x, op->dst.y, |
op->damage ? *op->damage : (void *)-1)); |
assert(op->dst.bo->proxy == NULL); |
if (too_large(op->dst.width, op->dst.height) && |
!sna_render_composite_redirect(sna, op, x, y, w, h, partial)) |
return false; |
return true; |
} |
static bool |
gen5_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
gen5_render_composite(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t msk_x, int16_t msk_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp) |
{ |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.mode)); |
tmp->op = PictOpSrc; |
if (op >= ARRAY_SIZE(gen5_blend_op)) { |
DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op)); |
return false; |
} |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
if (mask == NULL && |
try_blt(sna, dst, src, width, height) && |
sna_blt_composite(sna, op, |
src, dst, |
src_x, src_y, |
dst_x, dst_y, |
width, height, |
tmp, false)) |
return true; |
if (gen5_composite_fallback(sna, src, mask, dst)) |
return false; |
tmp->src.repeat = RepeatNone; |
tmp->src.filter = PictFilterNearest; |
tmp->src.is_affine = true; |
if (need_tiling(sna, width, height)) |
return sna_tiling_composite(op, src, mask, dst, |
src_x, src_y, |
msk_x, msk_y, |
dst_x, dst_y, |
width, height, |
tmp); |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
if (!gen5_composite_set_target(sna, tmp, dst, |
dst_x, dst_y, width, height, |
op > PictOpSrc || dst->pCompositeClip->data)) { |
DBG(("%s: failed to set composite target\n", __FUNCTION__)); |
return false; |
} |
DBG(("%s: preparing source\n", __FUNCTION__)); |
tmp->op = op; |
switch (gen5_composite_picture(sna, src, &tmp->src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
DBG(("%s: failed to prepare source picture\n", __FUNCTION__)); |
goto cleanup_dst; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->src, 0)) |
goto cleanup_dst; |
/* fall through to fixup */ |
case 1: |
if (mask == NULL && |
sna_blt_composite__convert(sna, |
dst_x, dst_y, width, height, |
tmp)) |
return true; |
tmp->is_affine = tmp->src.is_affine; |
gen5_composite_channel_convert(&tmp->src); |
break; |
} |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.is_affine = true; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
if (mask) { |
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { |
tmp->has_component_alpha = true; |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
/* Check if it's component alpha that relies on a source alpha and on |
* the source value. We can only get one of those into the single |
* source value that we get to blend with. |
*/ |
if (gen5_blend_op[op].src_alpha && |
(gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) { |
if (op != PictOpOver) { |
DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op)); |
goto cleanup_src; |
} |
tmp->need_magic_ca_pass = true; |
tmp->op = PictOpOutReverse; |
} |
} |
if (!reuse_source(sna, |
src, &tmp->src, src_x, src_y, |
mask, &tmp->mask, msk_x, msk_y)) { |
DBG(("%s: preparing mask\n", __FUNCTION__)); |
switch (gen5_composite_picture(sna, mask, &tmp->mask, |
msk_x, msk_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
DBG(("%s: failed to prepare mask picture\n", __FUNCTION__)); |
goto cleanup_src; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) |
goto cleanup_src; |
/* fall through to fixup */ |
case 1: |
gen5_composite_channel_convert(&tmp->mask); |
break; |
} |
} |
tmp->is_affine &= tmp->mask.is_affine; |
} |
tmp->u.gen5.wm_kernel = |
gen5_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine); |
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp); |
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp); |
tmp->blt = gen5_render_composite_blt; |
// tmp->box = gen5_render_composite_box; |
tmp->box = gen5_render_composite_box; |
tmp->boxes = gen5_render_composite_boxes__blt; |
if (tmp->emit_boxes) { |
tmp->boxes = gen5_render_composite_boxes; |
tmp->thread_boxes = gen5_render_composite_boxes__thread; |
} |
tmp->done = gen5_render_composite_done; |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) |
goto cleanup_mask; |
} |
gen5_bind_surfaces(sna, tmp); |
1070,11 → 1578,448 |
gen5_align_vertex(sna, tmp); |
return true; |
cleanup_mask: |
if (tmp->mask.bo) |
kgem_bo_destroy(&sna->kgem, tmp->mask.bo); |
cleanup_src: |
if (tmp->src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->src.bo); |
cleanup_dst: |
if (tmp->redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->dst.bo); |
return false; |
} |
#if !NO_COMPOSITE_SPANS |
fastcall static void |
gen5_render_composite_spans_box(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const BoxRec *box, float opacity) |
{ |
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", |
__FUNCTION__, |
op->base.src.offset[0], op->base.src.offset[1], |
opacity, |
op->base.dst.x, op->base.dst.y, |
box->x1, box->y1, |
box->x2 - box->x1, |
box->y2 - box->y1)); |
gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces); |
op->prim_emit(sna, op, box, opacity); |
} |
static void |
gen5_render_composite_spans_boxes(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const BoxRec *box, int nbox, |
float opacity) |
{ |
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", |
__FUNCTION__, nbox, |
op->base.src.offset[0], op->base.src.offset[1], |
opacity, |
op->base.dst.x, op->base.dst.y)); |
do { |
int nbox_this_time; |
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox, |
gen5_bind_surfaces); |
nbox -= nbox_this_time; |
do { |
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, |
box->x1, box->y1, |
box->x2 - box->x1, |
box->y2 - box->y1)); |
op->prim_emit(sna, op, box++, opacity); |
} while (--nbox_this_time); |
} while (nbox); |
} |
fastcall static void |
gen5_render_composite_spans_boxes__thread(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const struct sna_opacity_box *box, |
int nbox) |
{ |
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", |
__FUNCTION__, nbox, |
op->base.src.offset[0], op->base.src.offset[1], |
op->base.dst.x, op->base.dst.y)); |
sna_vertex_lock(&sna->render); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox, |
gen5_bind_surfaces); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; |
sna_vertex_acquire__locked(&sna->render); |
sna_vertex_unlock(&sna->render); |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
sna_vertex_lock(&sna->render); |
sna_vertex_release__locked(&sna->render); |
} while (nbox); |
sna_vertex_unlock(&sna->render); |
} |
fastcall static void |
gen5_render_composite_spans_done(struct sna *sna, |
const struct sna_composite_spans_op *op) |
{ |
if (sna->render.vertex_offset) |
gen4_vertex_flush(sna); |
DBG(("%s()\n", __FUNCTION__)); |
kgem_bo_destroy(&sna->kgem, op->base.src.bo); |
sna_render_composite_redirect_done(sna, &op->base); |
} |
static bool |
gen5_check_composite_spans(struct sna *sna, |
uint8_t op, PicturePtr src, PicturePtr dst, |
int16_t width, int16_t height, |
unsigned flags) |
{ |
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", |
__FUNCTION__, op, width, height, flags)); |
if (op >= ARRAY_SIZE(gen5_blend_op)) |
return false; |
if (gen5_composite_fallback(sna, src, NULL, dst)) { |
DBG(("%s: operation would fallback\n", __FUNCTION__)); |
return false; |
} |
if (need_tiling(sna, width, height) && |
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { |
DBG(("%s: fallback, tiled operation not on GPU\n", |
__FUNCTION__)); |
return false; |
} |
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { |
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); |
assert(priv); |
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) |
return true; |
if (flags & COMPOSITE_SPANS_INPLACE_HINT) |
return false; |
if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 && |
dst->format == PICT_a8) |
return false; |
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); |
} |
return true; |
} |
static bool |
gen5_render_composite_spans(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
unsigned flags, |
struct sna_composite_spans_op *tmp) |
{ |
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, |
width, height, flags, sna->kgem.ring)); |
assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags)); |
if (need_tiling(sna, width, height)) { |
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", |
__FUNCTION__, width, height)); |
return sna_tiling_composite_spans(op, src, dst, |
src_x, src_y, dst_x, dst_y, |
width, height, flags, tmp); |
} |
tmp->base.op = op; |
if (!gen5_composite_set_target(sna, &tmp->base, dst, |
dst_x, dst_y, width, height, |
true)) |
return false; |
switch (gen5_composite_picture(sna, src, &tmp->base.src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_dst; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) |
goto cleanup_dst; |
/* fall through to fixup */ |
case 1: |
gen5_composite_channel_convert(&tmp->base.src); |
break; |
} |
tmp->base.mask.bo = NULL; |
tmp->base.is_affine = tmp->base.src.is_affine; |
tmp->base.has_component_alpha = false; |
tmp->base.need_magic_ca_pass = false; |
tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp); |
tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; |
tmp->box = gen5_render_composite_spans_box; |
tmp->boxes = gen5_render_composite_spans_boxes; |
if (tmp->emit_boxes) |
tmp->thread_boxes = gen5_render_composite_spans_boxes__thread; |
tmp->done = gen5_render_composite_spans_done; |
if (!kgem_check_bo(&sna->kgem, |
tmp->base.dst.bo, tmp->base.src.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->base.dst.bo, tmp->base.src.bo, |
NULL)) |
goto cleanup_src; |
} |
gen5_bind_surfaces(sna, &tmp->base); |
gen5_align_vertex(sna, &tmp->base); |
return true; |
cleanup_src: |
if (tmp->base.src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); |
cleanup_dst: |
if (tmp->base.redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); |
return false; |
} |
#endif |
static bool |
gen5_render_copy_boxes(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n, unsigned flags) |
{ |
struct sna_composite_op tmp; |
DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n", |
__FUNCTION__, alu, |
src->drawable.serialNumber, src_bo->handle, |
dst->drawable.serialNumber, dst_bo->handle, |
n, box->x1, box->y1, box->x2, box->y2, |
flags)); |
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) { |
fallback_blt: |
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) |
return false; |
return sna_blt_copy_boxes_fallback(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n); |
} |
memset(&tmp, 0, sizeof(tmp)); |
if (dst->drawable.depth == src->drawable.depth) { |
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); |
tmp.src.pict_format = tmp.dst.format; |
} else { |
tmp.dst.format = sna_format_for_depth(dst->drawable.depth); |
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); |
} |
if (!gen5_check_format(tmp.src.pict_format)) { |
DBG(("%s: unsupported source format, %x, use BLT\n", |
__FUNCTION__, tmp.src.pict_format)); |
goto fallback_blt; |
} |
DBG(("%s (%d, %d)->(%d, %d) x %d\n", |
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); |
tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.dst.bo = dst_bo; |
tmp.damage = NULL; |
sna_render_composite_redirect_init(&tmp); |
if (too_large(tmp.dst.width, tmp.dst.height)) { |
BoxRec extents = box[0]; |
int i; |
for (i = 1; i < n; i++) { |
if (box[i].x1 < extents.x1) |
extents.x1 = box[i].x1; |
if (box[i].y1 < extents.y1) |
extents.y1 = box[i].y1; |
if (box[i].x2 > extents.x2) |
extents.x2 = box[i].x2; |
if (box[i].y2 > extents.y2) |
extents.y2 = box[i].y2; |
} |
if (!sna_render_composite_redirect(sna, &tmp, |
extents.x1 + dst_dx, |
extents.y1 + dst_dy, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1, |
n > 1)) |
goto fallback_tiled; |
} |
tmp.src.filter = SAMPLER_FILTER_NEAREST; |
tmp.src.repeat = SAMPLER_EXTEND_NONE; |
tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format); |
if (too_large(src->drawable.width, src->drawable.height)) { |
BoxRec extents = box[0]; |
int i; |
for (i = 1; i < n; i++) { |
if (box[i].x1 < extents.x1) |
extents.x1 = box[i].x1; |
if (box[i].y1 < extents.y1) |
extents.y1 = box[i].y1; |
if (box[i].x2 > extents.x2) |
extents.x2 = box[i].x2; |
if (box[i].y2 > extents.y2) |
extents.y2 = box[i].y2; |
} |
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, |
extents.x1 + src_dx, |
extents.y1 + src_dy, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1)) |
goto fallback_tiled_dst; |
} else { |
tmp.src.bo = kgem_bo_reference(src_bo); |
tmp.src.width = src->drawable.width; |
tmp.src.height = src->drawable.height; |
tmp.src.offset[0] = tmp.src.offset[1] = 0; |
tmp.src.scale[0] = 1.f/src->drawable.width; |
tmp.src.scale[1] = 1.f/src->drawable.height; |
} |
tmp.is_affine = true; |
tmp.floats_per_vertex = 3; |
tmp.floats_per_rect = 9; |
tmp.u.gen5.wm_kernel = WM_KERNEL; |
tmp.u.gen5.ve_id = 2; |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { |
DBG(("%s: aperture check failed\n", __FUNCTION__)); |
goto fallback_tiled_src; |
} |
} |
dst_dx += tmp.dst.x; |
dst_dy += tmp.dst.y; |
tmp.dst.x = tmp.dst.y = 0; |
src_dx += tmp.src.offset[0]; |
src_dy += tmp.src.offset[1]; |
gen5_copy_bind_surfaces(sna, &tmp); |
gen5_align_vertex(sna, &tmp); |
do { |
int n_this_time; |
n_this_time = gen5_get_rectangles(sna, &tmp, n, |
gen5_copy_bind_surfaces); |
n -= n_this_time; |
do { |
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", |
box->x1 + src_dx, box->y1 + src_dy, |
box->x1 + dst_dx, box->y1 + dst_dy, |
box->x2 - box->x1, box->y2 - box->y1)); |
OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy); |
OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]); |
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); |
OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy); |
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); |
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); |
OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy); |
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); |
OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]); |
box++; |
} while (--n_this_time); |
} while (n); |
gen4_vertex_flush(sna); |
sna_render_composite_redirect_done(sna, &tmp); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return true; |
fallback_tiled_src: |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
fallback_tiled_dst: |
if (tmp.redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp.dst.bo); |
fallback_tiled: |
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
DBG(("%s: tiled fallback\n", __FUNCTION__)); |
return sna_tiling_copy_boxes(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n); |
} |
#endif |
static void |
gen5_render_flush(struct sna *sna) |
{ |
gen4_vertex_close(sna); |
1397,16 → 2342,38 |
return state->general_bo != NULL; |
} |
bool gen5_render_init(struct sna *sna) |
const char *gen5_render_init(struct sna *sna, const char *backend) |
{ |
if (!gen5_render_setup(sna)) |
return false; |
return backend; |
sna->kgem.context_switch = gen5_render_context_switch; |
sna->kgem.retire = gen5_render_retire; |
sna->kgem.expire = gen5_render_expire; |
#if 0 |
#if !NO_COMPOSITE |
sna->render.composite = gen5_render_composite; |
sna->render.prefer_gpu |= PREFER_GPU_RENDER; |
#endif |
#if !NO_COMPOSITE_SPANS |
sna->render.check_composite_spans = gen5_check_composite_spans; |
sna->render.composite_spans = gen5_render_composite_spans; |
if (sna->PciInfo->device_id == 0x0044) |
sna->render.prefer_gpu |= PREFER_GPU_SPANS; |
#endif |
sna->render.video = gen5_render_video; |
sna->render.copy_boxes = gen5_render_copy_boxes; |
sna->render.copy = gen5_render_copy; |
sna->render.fill_boxes = gen5_render_fill_boxes; |
sna->render.fill = gen5_render_fill; |
sna->render.fill_one = gen5_render_fill_one; |
#endif |
sna->render.blit_tex = gen5_blit_tex; |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
sna->render.flush = gen5_render_flush; |
sna->render.reset = gen5_render_reset; |
1414,7 → 2381,89 |
sna->render.max_3d_size = MAX_3D_SIZE; |
sna->render.max_3d_pitch = 1 << 18; |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
return "Ironlake (gen5)"; |
}; |
static bool |
gen5_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
{ |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.mode)); |
tmp->op = PictOpSrc; |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
tmp->src.repeat = RepeatNone; |
tmp->src.filter = PictFilterNearest; |
tmp->src.is_affine = true; |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.is_affine = true; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
tmp->u.gen5.wm_kernel = |
gen5_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine); |
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp); |
tmp->blt = gen5_render_composite_blt; |
tmp->done = gen5_render_composite_done; |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { |
kgem_submit(&sna->kgem); |
} |
gen5_bind_surfaces(sna, tmp); |
gen5_align_vertex(sna, tmp); |
return true; |
} |
/drivers/video/Intel-2D/gen6_render.c |
---|
1,3508 → 1,3585 |
/* |
* Copyright © 2006,2008,2011 Intel Corporation |
* Copyright © 2007 Red Hat, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Wang Zhenyu <zhenyu.z.wang@sna.com> |
* Eric Anholt <eric@anholt.net> |
* Carl Worth <cworth@redhat.com> |
* Keith Packard <keithp@keithp.com> |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include "sna.h" |
#include "sna_reg.h" |
#include "sna_render.h" |
#include "sna_render_inline.h" |
//#include "sna_video.h" |
#include "brw/brw.h" |
#include "gen6_render.h" |
#include "gen4_source.h" |
#include "gen4_vertex.h" |
#define NO_COMPOSITE 0 |
#define NO_COMPOSITE_SPANS 0 |
#define NO_COPY 0 |
#define NO_COPY_BOXES 0 |
#define NO_FILL 0 |
#define NO_FILL_BOXES 0 |
#define NO_FILL_ONE 0 |
#define NO_FILL_CLEAR 0 |
#define NO_RING_SWITCH 1 |
#define PREFER_RENDER 0 |
#define USE_8_PIXEL_DISPATCH 1 |
#define USE_16_PIXEL_DISPATCH 1 |
#define USE_32_PIXEL_DISPATCH 0 |
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH |
#error "Must select at least 8, 16 or 32 pixel dispatch" |
#endif |
#define GEN6_MAX_SIZE 8192 |
struct gt_info { |
int max_vs_threads; |
int max_gs_threads; |
int max_wm_threads; |
struct { |
int size; |
int max_vs_entries; |
int max_gs_entries; |
} urb; |
}; |
static const struct gt_info gt1_info = { |
.max_vs_threads = 24, |
.max_gs_threads = 21, |
.max_wm_threads = 40, |
.urb = { 32, 256, 256 }, |
}; |
static const struct gt_info gt2_info = { |
.max_vs_threads = 60, |
.max_gs_threads = 60, |
.max_wm_threads = 80, |
.urb = { 64, 256, 256 }, |
}; |
static const uint32_t ps_kernel_packed[][4] = { |
#include "exa_wm_src_affine.g6b" |
#include "exa_wm_src_sample_argb.g6b" |
#include "exa_wm_yuv_rgb.g6b" |
#include "exa_wm_write.g6b" |
}; |
static const uint32_t ps_kernel_planar[][4] = { |
#include "exa_wm_src_affine.g6b" |
#include "exa_wm_src_sample_planar.g6b" |
#include "exa_wm_yuv_rgb.g6b" |
#include "exa_wm_write.g6b" |
}; |
#define NOKERNEL(kernel_enum, func, ns) \ |
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} |
#define KERNEL(kernel_enum, kernel, ns) \ |
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} |
static const struct wm_kernel_info { |
const char *name; |
const void *data; |
unsigned int size; |
unsigned int num_surfaces; |
} wm_kernels[] = { |
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), |
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), |
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), |
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), |
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), |
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), |
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), |
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), |
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), |
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), |
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), |
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), |
}; |
#undef KERNEL |
static const struct blendinfo { |
bool src_alpha; |
uint32_t src_blend; |
uint32_t dst_blend; |
} gen6_blend_op[] = { |
/* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, |
/* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, |
/* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, |
/* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, |
/* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, |
/* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, |
/* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, |
/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, |
/* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, |
}; |
/** |
* Highest-valued BLENDFACTOR used in gen6_blend_op. |
* |
* This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, |
* GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, |
* GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} |
*/ |
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) |
#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) |
#define BLEND_OFFSET(s, d) \ |
(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) |
#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) |
#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) |
#define SAMPLER_OFFSET(sf, se, mf, me) \ |
(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) |
#define VERTEX_2s2s 0 |
#define COPY_SAMPLER 0 |
#define COPY_VERTEX VERTEX_2s2s |
#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) |
#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) |
#define FILL_VERTEX VERTEX_2s2s |
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) |
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) |
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) |
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) |
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) |
#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) |
#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) |
#define OUT_BATCH(v) batch_emit(sna, v) |
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) |
#define OUT_VERTEX_F(v) vertex_emit(sna, v) |
static inline bool too_large(int width, int height) |
{ |
return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; |
} |
static uint32_t gen6_get_blend(int op, |
bool has_component_alpha, |
uint32_t dst_format) |
{ |
uint32_t src, dst; |
src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; |
dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend; |
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend; |
#if 0 |
/* If there's no dst alpha channel, adjust the blend op so that |
* we'll treat it always as 1. |
*/ |
if (PICT_FORMAT_A(dst_format) == 0) { |
if (src == GEN6_BLENDFACTOR_DST_ALPHA) |
src = GEN6_BLENDFACTOR_ONE; |
else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) |
src = GEN6_BLENDFACTOR_ZERO; |
} |
/* If the source alpha is being used, then we should only be in a |
* case where the source blend factor is 0, and the source blend |
* value is the mask channels multiplied by the source picture's alpha. |
*/ |
if (has_component_alpha && gen6_blend_op[op].src_alpha) { |
if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) |
dst = GEN6_BLENDFACTOR_SRC_COLOR; |
else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) |
dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; |
} |
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", |
op, dst_format, PICT_FORMAT_A(dst_format), |
src, dst, (int)BLEND_OFFSET(src, dst))); |
#endif |
return BLEND_OFFSET(src, dst); |
} |
static uint32_t gen6_get_card_format(PictFormat format) |
{ |
switch (format) { |
default: |
return -1; |
case PICT_a8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_x8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; |
case PICT_a8: |
return GEN6_SURFACEFORMAT_A8_UNORM; |
}; |
/* |
switch (format) { |
default: |
return -1; |
case PICT_a8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_x8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; |
case PICT_a8b8g8r8: |
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_x8b8g8r8: |
return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; |
case PICT_a2r10g10b10: |
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_x2r10g10b10: |
return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; |
case PICT_r8g8b8: |
return GEN6_SURFACEFORMAT_R8G8B8_UNORM; |
case PICT_r5g6b5: |
return GEN6_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_a1r5g5b5: |
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN6_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
*/ |
} |
static uint32_t gen6_get_dest_format(PictFormat format) |
{ |
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; |
#if 0 |
switch (format) { |
default: |
return -1; |
case PICT_a8r8g8b8: |
case PICT_x8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_a8b8g8r8: |
case PICT_x8b8g8r8: |
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_a2r10g10b10: |
case PICT_x2r10g10b10: |
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_r5g6b5: |
return GEN6_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_x1r5g5b5: |
case PICT_a1r5g5b5: |
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN6_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
case PICT_x4r4g4b4: |
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
#endif |
} |
#if 0 |
static bool gen6_check_dst_format(PictFormat format) |
{ |
if (gen6_get_dest_format(format) != -1) |
return true; |
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); |
return false; |
} |
static bool gen6_check_format(uint32_t format) |
{ |
if (gen6_get_card_format(format) != -1) |
return true; |
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); |
return false; |
} |
static uint32_t gen6_filter(uint32_t filter) |
{ |
switch (filter) { |
default: |
assert(0); |
case PictFilterNearest: |
return SAMPLER_FILTER_NEAREST; |
case PictFilterBilinear: |
return SAMPLER_FILTER_BILINEAR; |
} |
} |
static uint32_t gen6_check_filter(PicturePtr picture) |
{ |
switch (picture->filter) { |
case PictFilterNearest: |
case PictFilterBilinear: |
return true; |
default: |
return false; |
} |
} |
static uint32_t gen6_repeat(uint32_t repeat) |
{ |
switch (repeat) { |
default: |
assert(0); |
case RepeatNone: |
return SAMPLER_EXTEND_NONE; |
case RepeatNormal: |
return SAMPLER_EXTEND_REPEAT; |
case RepeatPad: |
return SAMPLER_EXTEND_PAD; |
case RepeatReflect: |
return SAMPLER_EXTEND_REFLECT; |
} |
} |
static bool gen6_check_repeat(PicturePtr picture) |
{ |
if (!picture->repeat) |
return true; |
switch (picture->repeatType) { |
case RepeatNone: |
case RepeatNormal: |
case RepeatPad: |
case RepeatReflect: |
return true; |
default: |
return false; |
} |
} |
#endif |
static int |
gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) |
{ |
int base; |
if (has_mask) { |
if (is_ca) { |
if (gen6_blend_op[op].src_alpha) |
base = GEN6_WM_KERNEL_MASKSA; |
else |
base = GEN6_WM_KERNEL_MASKCA; |
} else |
base = GEN6_WM_KERNEL_MASK; |
} else |
base = GEN6_WM_KERNEL_NOMASK; |
return base + !is_affine; |
} |
static void |
gen6_emit_urb(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); |
OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | |
(sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ |
OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | |
(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ |
} |
static void |
gen6_emit_state_base_address(struct sna *sna) |
{ |
OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); |
OUT_BATCH(0); /* general */ |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ |
sna->kgem.nbatch, |
NULL, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ |
sna->kgem.nbatch, |
sna->render_state.gen6.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
OUT_BATCH(0); /* indirect */ |
OUT_BATCH(kgem_add_reloc(&sna->kgem, |
sna->kgem.nbatch, |
sna->render_state.gen6.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
/* upper bounds, disable */ |
OUT_BATCH(0); |
OUT_BATCH(BASE_ADDRESS_MODIFY); |
OUT_BATCH(0); |
OUT_BATCH(BASE_ADDRESS_MODIFY); |
} |
static void |
gen6_emit_viewports(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | |
GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | |
(4 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_vs(struct sna *sna) |
{ |
/* disable VS constant buffer */ |
OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); |
OUT_BATCH(0); /* no VS kernel */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* pass-through */ |
} |
static void |
gen6_emit_gs(struct sna *sna) |
{ |
/* disable GS constant buffer */ |
OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); |
OUT_BATCH(0); /* no GS kernel */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* pass-through */ |
} |
static void |
gen6_emit_clip(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); /* pass-through */ |
OUT_BATCH(0); |
} |
static void |
gen6_emit_wm_constants(struct sna *sna) |
{ |
/* disable WM constant buffer */ |
OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_null_depth_buffer(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); |
OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | |
GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_invariant(struct sna *sna) |
{ |
OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); |
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); |
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | |
GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); |
OUT_BATCH(1); |
gen6_emit_urb(sna); |
gen6_emit_state_base_address(sna); |
gen6_emit_viewports(sna); |
gen6_emit_vs(sna); |
gen6_emit_gs(sna); |
gen6_emit_clip(sna); |
gen6_emit_wm_constants(sna); |
gen6_emit_null_depth_buffer(sna); |
sna->render_state.gen6.needs_invariant = false; |
} |
static bool |
gen6_emit_cc(struct sna *sna, int blend) |
{ |
struct gen6_render_state *render = &sna->render_state.gen6; |
if (render->blend == blend) |
return blend != NO_BLEND; |
DBG(("%s: blend = %x\n", __FUNCTION__, blend)); |
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); |
OUT_BATCH((render->cc_blend + blend) | 1); |
if (render->blend == (unsigned)-1) { |
OUT_BATCH(1); |
OUT_BATCH(1); |
} else { |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
render->blend = blend; |
return blend != NO_BLEND; |
} |
static void |
gen6_emit_sampler(struct sna *sna, uint32_t state) |
{ |
if (sna->render_state.gen6.samplers == state) |
return; |
sna->render_state.gen6.samplers = state; |
DBG(("%s: sampler = %x\n", __FUNCTION__, state)); |
OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | |
GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | |
(4 - 2)); |
OUT_BATCH(0); /* VS */ |
OUT_BATCH(0); /* GS */ |
OUT_BATCH(sna->render_state.gen6.wm_state + state); |
} |
static void |
gen6_emit_sf(struct sna *sna, bool has_mask) |
{ |
int num_sf_outputs = has_mask ? 2 : 1; |
if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) |
return; |
DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", |
__FUNCTION__, num_sf_outputs, 1, 0)); |
sna->render_state.gen6.num_sf_outputs = num_sf_outputs; |
OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); |
OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); |
OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* DW9 */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* DW14 */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* DW19 */ |
} |
static void |
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) |
{ |
const uint32_t *kernels; |
if (sna->render_state.gen6.kernel == kernel) |
return; |
sna->render_state.gen6.kernel = kernel; |
kernels = sna->render_state.gen6.wm_kernel[kernel]; |
DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", |
__FUNCTION__, |
wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, |
kernels[0], kernels[1], kernels[2])); |
OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); |
OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); |
OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | |
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); |
OUT_BATCH(0); /* scratch space */ |
OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | |
8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | |
6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); |
OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | |
(kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | |
(kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | |
(kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | |
GEN6_3DSTATE_WM_DISPATCH_ENABLE); |
OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); |
OUT_BATCH(kernels[2]); |
OUT_BATCH(kernels[1]); |
} |
static bool |
gen6_emit_binding_table(struct sna *sna, uint16_t offset) |
{ |
if (sna->render_state.gen6.surface_table == offset) |
return false; |
/* Binding table pointers */ |
OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | |
GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | |
(4 - 2)); |
OUT_BATCH(0); /* vs */ |
OUT_BATCH(0); /* gs */ |
/* Only the PS uses the binding table */ |
OUT_BATCH(offset*4); |
sna->render_state.gen6.surface_table = offset; |
return true; |
} |
static bool |
gen6_emit_drawing_rectangle(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); |
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; |
assert(!too_large(op->dst.x, op->dst.y)); |
assert(!too_large(op->dst.width, op->dst.height)); |
if (sna->render_state.gen6.drawrect_limit == limit && |
sna->render_state.gen6.drawrect_offset == offset) |
return false; |
/* [DevSNB-C+{W/A}] Before any depth stall flush (including those |
* produced by non-pipelined state commands), software needs to first |
* send a PIPE_CONTROL with no bits set except Post-Sync Operation != |
* 0. |
* |
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent |
* BEFORE the pipe-control with a post-sync op and no write-cache |
* flushes. |
*/ |
if (!sna->render_state.gen6.first_state_packet) { |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); |
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, |
sna->render_state.gen6.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16 | |
I915_GEM_DOMAIN_INSTRUCTION, |
64)); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(limit); |
OUT_BATCH(offset); |
sna->render_state.gen6.drawrect_offset = offset; |
sna->render_state.gen6.drawrect_limit = limit; |
return true; |
} |
static void |
gen6_emit_vertex_elements(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
/* |
* vertex data in vertex buffer |
* position: (x, y) |
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) |
* texture coordinate 1 if (has_mask is true): same as above |
*/ |
struct gen6_render_state *render = &sna->render_state.gen6; |
uint32_t src_format, dw; |
int id = GEN6_VERTEX(op->u.gen6.flags); |
bool has_mask; |
DBG(("%s: setup id=%d\n", __FUNCTION__, id)); |
if (render->ve_id == id) |
return; |
render->ve_id = id; |
/* The VUE layout |
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0) |
* dword 4-7: position (x, y, 1.0, 1.0), |
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) |
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) |
* |
* dword 4-15 are fetched from vertex buffer |
*/ |
has_mask = (id >> 2) != 0; |
OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | |
((2 * (3 + has_mask)) + 1 - 2)); |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | |
0 << VE0_OFFSET_SHIFT); |
OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); |
/* x,y */ |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | |
0 << VE0_OFFSET_SHIFT); |
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); |
/* u0, v0, w0 */ |
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); |
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; |
switch (id & 3) { |
default: |
assert(0); |
case 0: |
src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 1: |
src_format = GEN6_SURFACEFORMAT_R32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 2: |
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 3: |
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; |
break; |
} |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
src_format << VE0_FORMAT_SHIFT | |
4 << VE0_OFFSET_SHIFT); |
OUT_BATCH(dw); |
/* u1, v1, w1 */ |
if (has_mask) { |
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); |
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); |
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; |
switch (id >> 2) { |
case 1: |
src_format = GEN6_SURFACEFORMAT_R32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
default: |
assert(0); |
case 2: |
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 3: |
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; |
break; |
} |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
src_format << VE0_FORMAT_SHIFT | |
offset << VE0_OFFSET_SHIFT); |
OUT_BATCH(dw); |
} |
} |
static void |
gen6_emit_flush(struct sna *sna) |
{ |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | |
GEN6_PIPE_CONTROL_TC_FLUSH | |
GEN6_PIPE_CONTROL_CS_STALL); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_state(struct sna *sna, |
const struct sna_composite_op *op, |
uint16_t wm_binding_table) |
{ |
bool need_stall = wm_binding_table & 1; |
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) |
need_stall = false; |
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); |
gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); |
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); |
gen6_emit_vertex_elements(sna, op); |
need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); |
if (gen6_emit_drawing_rectangle(sna, op)) |
need_stall = false; |
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { |
gen6_emit_flush(sna); |
kgem_clear_dirty(&sna->kgem); |
if (op->dst.bo->exec) |
kgem_bo_mark_dirty(op->dst.bo); |
need_stall = false; |
} |
if (need_stall) { |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
sna->render_state.gen6.first_state_packet = false; |
} |
static bool gen6_magic_ca_pass(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
struct gen6_render_state *state = &sna->render_state.gen6; |
if (!op->need_magic_ca_pass) |
return false; |
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, |
sna->render.vertex_start, sna->render.vertex_index)); |
gen6_emit_flush(sna); |
gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); |
gen6_emit_wm(sna, |
gen6_choose_composite_kernel(PictOpAdd, |
true, true, |
op->is_affine), |
true); |
OUT_BATCH(GEN6_3DPRIMITIVE | |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | |
0 << 9 | |
4); |
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); |
OUT_BATCH(sna->render.vertex_start); |
OUT_BATCH(1); /* single instance */ |
OUT_BATCH(0); /* start instance location */ |
OUT_BATCH(0); /* index buffer offset, ignored */ |
state->last_primitive = sna->kgem.nbatch; |
return true; |
} |
typedef struct gen6_surface_state_padded { |
struct gen6_surface_state state; |
char pad[32 - sizeof(struct gen6_surface_state)]; |
} gen6_surface_state_padded; |
static void null_create(struct sna_static_stream *stream) |
{ |
/* A bunch of zeros useful for legacy border color and depth-stencil */ |
sna_static_stream_map(stream, 64, 64); |
} |
static void scratch_create(struct sna_static_stream *stream) |
{ |
/* 64 bytes of scratch space for random writes, such as |
* the pipe-control w/a. |
*/ |
sna_static_stream_map(stream, 64, 64); |
} |
static void |
sampler_state_init(struct gen6_sampler_state *sampler_state, |
sampler_filter_t filter, |
sampler_extend_t extend) |
{ |
sampler_state->ss0.lod_preclamp = 1; /* GL mode */ |
/* We use the legacy mode to get the semantics specified by |
* the Render extension. */ |
sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; |
switch (filter) { |
default: |
case SAMPLER_FILTER_NEAREST: |
sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; |
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; |
break; |
case SAMPLER_FILTER_BILINEAR: |
sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; |
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; |
break; |
} |
switch (extend) { |
default: |
case SAMPLER_EXTEND_NONE: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
break; |
case SAMPLER_EXTEND_REPEAT: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; |
break; |
case SAMPLER_EXTEND_PAD: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; |
break; |
case SAMPLER_EXTEND_REFLECT: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; |
break; |
} |
} |
static void |
sampler_copy_init(struct gen6_sampler_state *ss) |
{ |
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); |
ss->ss3.non_normalized_coord = 1; |
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); |
} |
static void |
sampler_fill_init(struct gen6_sampler_state *ss) |
{ |
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); |
ss->ss3.non_normalized_coord = 1; |
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); |
} |
static uint32_t |
gen6_tiling_bits(uint32_t tiling) |
{ |
return 0; |
/* |
switch (tiling) { |
default: assert(0); |
case I915_TILING_NONE: return 0; |
case I915_TILING_X: return GEN6_SURFACE_TILED; |
case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; |
} |
*/ |
} |
/** |
* Sets up the common fields for a surface state buffer for the given |
* picture in the given surface state buffer. |
*/ |
static int |
gen6_bind_bo(struct sna *sna, |
struct kgem_bo *bo, |
uint32_t width, |
uint32_t height, |
uint32_t format, |
bool is_dst) |
{ |
uint32_t *ss; |
uint32_t domains; |
uint16_t offset; |
uint32_t is_scanout = is_dst && bo->scanout; |
/* After the first bind, we manage the cache domains within the batch */ |
offset = kgem_bo_get_binding(bo, format | is_scanout << 31); |
if (offset) { |
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", |
offset, bo->handle, format, |
is_dst ? "render" : "sampler")); |
if (is_dst) |
kgem_bo_mark_dirty(bo); |
return offset * sizeof(uint32_t); |
} |
offset = sna->kgem.surface -= |
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
ss = sna->kgem.batch + offset; |
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | |
GEN6_SURFACE_BLEND_ENABLED | |
format << GEN6_SURFACE_FORMAT_SHIFT); |
if (is_dst) |
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; |
else |
domains = I915_GEM_DOMAIN_SAMPLER << 16; |
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); |
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | |
(height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); |
assert(bo->pitch <= (1 << 18)); |
ss[3] = (gen6_tiling_bits(bo->tiling) | |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); |
ss[4] = 0; |
ss[5] = is_scanout ? 0 : 3 << 16; |
kgem_bo_set_binding(bo, format | is_scanout << 31, offset); |
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", |
offset, bo->handle, ss[1], |
format, width, height, bo->pitch, bo->tiling, |
domains & 0xffff ? "render" : "sampler")); |
return offset * sizeof(uint32_t); |
} |
static void gen6_emit_vertex_buffer(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
int id = GEN6_VERTEX(op->u.gen6.flags); |
OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); |
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | |
4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); |
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; |
OUT_BATCH(0); |
OUT_BATCH(~0); /* max address: disabled */ |
OUT_BATCH(0); |
sna->render.vb_id |= 1 << id; |
} |
static void gen6_emit_primitive(struct sna *sna) |
{ |
if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { |
DBG(("%s: continuing previous primitive, start=%d, index=%d\n", |
__FUNCTION__, |
sna->render.vertex_start, |
sna->render.vertex_index)); |
sna->render.vertex_offset = sna->kgem.nbatch - 5; |
return; |
} |
OUT_BATCH(GEN6_3DPRIMITIVE | |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | |
0 << 9 | |
4); |
sna->render.vertex_offset = sna->kgem.nbatch; |
OUT_BATCH(0); /* vertex count, to be filled in later */ |
OUT_BATCH(sna->render.vertex_index); |
OUT_BATCH(1); /* single instance */ |
OUT_BATCH(0); /* start instance location */ |
OUT_BATCH(0); /* index buffer offset, ignored */ |
sna->render.vertex_start = sna->render.vertex_index; |
DBG(("%s: started new primitive: index=%d\n", |
__FUNCTION__, sna->render.vertex_start)); |
sna->render_state.gen6.last_primitive = sna->kgem.nbatch; |
} |
static bool gen6_rectangle_begin(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
int id = 1 << GEN6_VERTEX(op->u.gen6.flags); |
int ndwords; |
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) |
return true; |
ndwords = op->need_magic_ca_pass ? 60 : 6; |
if ((sna->render.vb_id & id) == 0) |
ndwords += 5; |
if (!kgem_check_batch(&sna->kgem, ndwords)) |
return false; |
if ((sna->render.vb_id & id) == 0) |
gen6_emit_vertex_buffer(sna, op); |
gen6_emit_primitive(sna); |
return true; |
} |
static int gen6_get_rectangles__flush(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
/* Preventing discarding new vbo after lock contention */ |
if (sna_vertex_wait__locked(&sna->render)) { |
int rem = vertex_space(sna); |
if (rem > op->floats_per_rect) |
return rem; |
} |
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) |
return 0; |
if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) |
return 0; |
if (sna->render.vertex_offset) { |
gen4_vertex_flush(sna); |
if (gen6_magic_ca_pass(sna, op)) { |
gen6_emit_flush(sna); |
gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); |
gen6_emit_wm(sna, |
GEN6_KERNEL(op->u.gen6.flags), |
GEN6_VERTEX(op->u.gen6.flags) >> 2); |
} |
} |
return gen4_vertex_finish(sna); |
} |
inline static int gen6_get_rectangles(struct sna *sna, |
const struct sna_composite_op *op, |
int want, |
void (*emit_state)(struct sna *, const struct sna_composite_op *op)) |
{ |
int rem; |
start: |
rem = vertex_space(sna); |
if (unlikely(rem < op->floats_per_rect)) { |
DBG(("flushing vbo for %s: %d < %d\n", |
__FUNCTION__, rem, op->floats_per_rect)); |
rem = gen6_get_rectangles__flush(sna, op); |
if (unlikely(rem == 0)) |
goto flush; |
} |
if (unlikely(sna->render.vertex_offset == 0 && |
!gen6_rectangle_begin(sna, op))) |
goto flush; |
if (want > 1 && want * op->floats_per_rect > rem) |
want = rem / op->floats_per_rect; |
assert(want > 0); |
sna->render.vertex_index += 3*want; |
return want; |
flush: |
if (sna->render.vertex_offset) { |
gen4_vertex_flush(sna); |
gen6_magic_ca_pass(sna, op); |
} |
sna_vertex_wait__locked(&sna->render); |
_kgem_submit(&sna->kgem); |
emit_state(sna, op); |
goto start; |
} |
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, |
uint16_t *offset) |
{ |
uint32_t *table; |
sna->kgem.surface -= |
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
/* Clear all surplus entries to zero in case of prefetch */ |
table = memset(sna->kgem.batch + sna->kgem.surface, |
0, sizeof(struct gen6_surface_state_padded)); |
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); |
*offset = sna->kgem.surface; |
return table; |
} |
static bool |
gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) |
{ |
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); |
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { |
DBG(("%s: flushing batch: %d < %d+%d\n", |
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, |
150, 4*8)); |
kgem_submit(&sna->kgem); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
if (sna->render_state.gen6.needs_invariant) |
gen6_emit_invariant(sna); |
return kgem_bo_is_dirty(op->dst.bo); |
} |
static void gen6_emit_composite_state(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
dirty = gen6_get_batch(sna, op); |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
binding_table[1] = |
gen6_bind_bo(sna, |
op->src.bo, op->src.width, op->src.height, |
op->src.card_format, |
false); |
if (op->mask.bo) { |
binding_table[2] = |
gen6_bind_bo(sna, |
op->mask.bo, |
op->mask.width, |
op->mask.height, |
op->mask.card_format, |
false); |
} |
if (sna->kgem.surface == offset && |
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && |
(op->mask.bo == NULL || |
sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { |
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
offset = sna->render_state.gen6.surface_table; |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static void |
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) |
{ |
assert (sna->render.vertex_offset == 0); |
if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { |
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) |
gen4_vertex_finish(sna); |
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", |
sna->render_state.gen6.floats_per_vertex, |
op->floats_per_vertex, |
sna->render.vertex_index, |
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); |
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; |
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; |
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; |
} |
assert((sna->render.vertex_used % op->floats_per_vertex) == 0); |
} |
fastcall static void |
gen6_render_composite_blt(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); |
op->prim_emit(sna, op, r); |
} |
#if 0 |
fastcall static void |
gen6_render_composite_box(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box) |
{ |
struct sna_composite_rectangles r; |
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.src = r.mask = r.dst; |
op->prim_emit(sna, op, &r); |
} |
static void |
gen6_render_composite_boxes__blt(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("composite_boxes(%d)\n", nbox)); |
do { |
int nbox_this_time; |
nbox_this_time = gen6_get_rectangles(sna, op, nbox, |
gen6_emit_composite_state); |
nbox -= nbox_this_time; |
do { |
struct sna_composite_rectangles r; |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.src = r.mask = r.dst; |
op->prim_emit(sna, op, &r); |
box++; |
} while (--nbox_this_time); |
} while (nbox); |
} |
static void |
gen6_render_composite_boxes(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen6_get_rectangles(sna, op, nbox, |
gen6_emit_composite_state); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
} while (nbox); |
} |
static void |
gen6_render_composite_boxes__thread(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
sna_vertex_lock(&sna->render); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen6_get_rectangles(sna, op, nbox, |
gen6_emit_composite_state); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
sna_vertex_acquire__locked(&sna->render); |
sna_vertex_unlock(&sna->render); |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
sna_vertex_lock(&sna->render); |
sna_vertex_release__locked(&sna->render); |
} while (nbox); |
sna_vertex_unlock(&sna->render); |
} |
#endif |
#ifndef MAX |
#define MAX(a,b) ((a) > (b) ? (a) : (b)) |
#endif |
static uint32_t |
gen6_composite_create_blend_state(struct sna_static_stream *stream) |
{ |
char *base, *ptr; |
int src, dst; |
base = sna_static_stream_map(stream, |
GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, |
64); |
ptr = base; |
for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { |
for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { |
struct gen6_blend_state *blend = |
(struct gen6_blend_state *)ptr; |
blend->blend0.dest_blend_factor = dst; |
blend->blend0.source_blend_factor = src; |
blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; |
blend->blend0.blend_enable = |
!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); |
blend->blend1.post_blend_clamp_enable = 1; |
blend->blend1.pre_blend_clamp_enable = 1; |
ptr += GEN6_BLEND_STATE_PADDED_SIZE; |
} |
} |
return sna_static_stream_offsetof(stream, base); |
} |
#if 0 |
static uint32_t gen6_bind_video_source(struct sna *sna, |
struct kgem_bo *src_bo, |
uint32_t src_offset, |
int src_width, |
int src_height, |
int src_pitch, |
uint32_t src_surf_format) |
{ |
struct gen6_surface_state *ss; |
sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); |
ss->ss0.surface_type = GEN6_SURFACE_2D; |
ss->ss0.surface_format = src_surf_format; |
ss->ss1.base_addr = |
kgem_add_reloc(&sna->kgem, |
sna->kgem.surface + 1, |
src_bo, |
I915_GEM_DOMAIN_SAMPLER << 16, |
src_offset); |
ss->ss2.width = src_width - 1; |
ss->ss2.height = src_height - 1; |
ss->ss3.pitch = src_pitch - 1; |
return sna->kgem.surface * sizeof(uint32_t); |
} |
static void gen6_emit_video_state(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
struct sna_video_frame *frame = op->priv; |
uint32_t src_surf_format; |
uint32_t src_surf_base[6]; |
int src_width[6]; |
int src_height[6]; |
int src_pitch[6]; |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
int n_src, n; |
dirty = gen6_get_batch(sna, op); |
src_surf_base[0] = 0; |
src_surf_base[1] = 0; |
src_surf_base[2] = frame->VBufOffset; |
src_surf_base[3] = frame->VBufOffset; |
src_surf_base[4] = frame->UBufOffset; |
src_surf_base[5] = frame->UBufOffset; |
if (is_planar_fourcc(frame->id)) { |
src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; |
src_width[1] = src_width[0] = frame->width; |
src_height[1] = src_height[0] = frame->height; |
src_pitch[1] = src_pitch[0] = frame->pitch[1]; |
src_width[4] = src_width[5] = src_width[2] = src_width[3] = |
frame->width / 2; |
src_height[4] = src_height[5] = src_height[2] = src_height[3] = |
frame->height / 2; |
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = |
frame->pitch[0]; |
n_src = 6; |
} else { |
if (frame->id == FOURCC_UYVY) |
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; |
else |
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; |
src_width[0] = frame->width; |
src_height[0] = frame->height; |
src_pitch[0] = frame->pitch[0]; |
n_src = 1; |
} |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
for (n = 0; n < n_src; n++) { |
binding_table[1+n] = |
gen6_bind_video_source(sna, |
frame->bo, |
src_surf_base[n], |
src_width[n], |
src_height[n], |
src_pitch[n], |
src_surf_format); |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static bool |
gen6_render_video(struct sna *sna, |
struct sna_video *video, |
struct sna_video_frame *frame, |
RegionPtr dstRegion, |
short src_w, short src_h, |
short drw_w, short drw_h, |
short dx, short dy, |
PixmapPtr pixmap) |
{ |
struct sna_composite_op tmp; |
int nbox, pix_xoff, pix_yoff; |
float src_scale_x, src_scale_y; |
struct sna_pixmap *priv; |
unsigned filter; |
BoxPtr box; |
DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", |
__FUNCTION__, src_w, src_h, drw_w, drw_h, |
REGION_NUM_RECTS(dstRegion), |
REGION_EXTENTS(NULL, dstRegion)->x1, |
REGION_EXTENTS(NULL, dstRegion)->y1, |
REGION_EXTENTS(NULL, dstRegion)->x2, |
REGION_EXTENTS(NULL, dstRegion)->y2)); |
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); |
if (priv == NULL) |
return false; |
memset(&tmp, 0, sizeof(tmp)); |
tmp.dst.pixmap = pixmap; |
tmp.dst.width = pixmap->drawable.width; |
tmp.dst.height = pixmap->drawable.height; |
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); |
tmp.dst.bo = priv->gpu_bo; |
tmp.src.bo = frame->bo; |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 3; |
tmp.floats_per_rect = 9; |
if (src_w == drw_w && src_h == drw_h) |
filter = SAMPLER_FILTER_NEAREST; |
else |
filter = SAMPLER_FILTER_BILINEAR; |
tmp.u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, |
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), |
NO_BLEND, |
is_planar_fourcc(frame->id) ? |
GEN6_WM_KERNEL_VIDEO_PLANAR : |
GEN6_WM_KERNEL_VIDEO_PACKED, |
2); |
tmp.priv = frame; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_video_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
/* Set up the offset for translating from the given region (in screen |
* coordinates) to the backing pixmap. |
*/ |
#ifdef COMPOSITE |
pix_xoff = -pixmap->screen_x + pixmap->drawable.x; |
pix_yoff = -pixmap->screen_y + pixmap->drawable.y; |
#else |
pix_xoff = 0; |
pix_yoff = 0; |
#endif |
/* Use normalized texture coordinates */ |
src_scale_x = ((float)src_w / frame->width) / (float)drw_w; |
src_scale_y = ((float)src_h / frame->height) / (float)drw_h; |
box = REGION_RECTS(dstRegion); |
nbox = REGION_NUM_RECTS(dstRegion); |
while (nbox--) { |
BoxRec r; |
r.x1 = box->x1 + pix_xoff; |
r.x2 = box->x2 + pix_xoff; |
r.y1 = box->y1 + pix_yoff; |
r.y2 = box->y2 + pix_yoff; |
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); |
OUT_VERTEX(r.x2, r.y2); |
OUT_VERTEX_F((box->x2 - dx) * src_scale_x); |
OUT_VERTEX_F((box->y2 - dy) * src_scale_y); |
OUT_VERTEX(r.x1, r.y2); |
OUT_VERTEX_F((box->x1 - dx) * src_scale_x); |
OUT_VERTEX_F((box->y2 - dy) * src_scale_y); |
OUT_VERTEX(r.x1, r.y1); |
OUT_VERTEX_F((box->x1 - dx) * src_scale_x); |
OUT_VERTEX_F((box->y1 - dy) * src_scale_y); |
if (!DAMAGE_IS_ALL(priv->gpu_damage)) { |
sna_damage_add_box(&priv->gpu_damage, &r); |
sna_damage_subtract_box(&priv->cpu_damage, &r); |
} |
box++; |
} |
priv->clear = false; |
gen4_vertex_flush(sna); |
return true; |
} |
static int |
gen6_composite_picture(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int x, int y, |
int w, int h, |
int dst_x, int dst_y, |
bool precise) |
{ |
PixmapPtr pixmap; |
uint32_t color; |
int16_t dx, dy; |
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", |
__FUNCTION__, x, y, w, h, dst_x, dst_y)); |
channel->is_solid = false; |
channel->card_format = -1; |
if (sna_picture_is_solid(picture, &color)) |
return gen4_channel_init_solid(sna, channel, color); |
if (picture->pDrawable == NULL) { |
int ret; |
if (picture->pSourcePict->type == SourcePictTypeLinear) |
return gen4_channel_init_linear(sna, picture, channel, |
x, y, |
w, h, |
dst_x, dst_y); |
DBG(("%s -- fixup, gradient\n", __FUNCTION__)); |
ret = -1; |
if (!precise) |
ret = sna_render_picture_approximate_gradient(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
if (ret == -1) |
ret = sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
return ret; |
} |
if (picture->alphaMap) { |
DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
if (!gen6_check_repeat(picture)) |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
if (!gen6_check_filter(picture)) |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; |
channel->filter = picture->filter; |
pixmap = get_drawable_pixmap(picture->pDrawable); |
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); |
x += dx + picture->pDrawable->x; |
y += dy + picture->pDrawable->y; |
channel->is_affine = sna_transform_is_affine(picture->transform); |
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { |
DBG(("%s: integer translation (%d, %d), removing\n", |
__FUNCTION__, dx, dy)); |
x += dx; |
y += dy; |
channel->transform = NULL; |
channel->filter = PictFilterNearest; |
} else |
channel->transform = picture->transform; |
channel->pict_format = picture->format; |
channel->card_format = gen6_get_card_format(picture->format); |
if (channel->card_format == (unsigned)-1) |
return sna_render_picture_convert(sna, picture, channel, pixmap, |
x, y, w, h, dst_x, dst_y, |
false); |
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { |
DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, |
pixmap->drawable.width, pixmap->drawable.height)); |
return sna_render_picture_extract(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
return sna_render_pixmap_bo(sna, channel, pixmap, |
x, y, w, h, dst_x, dst_y); |
} |
inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) |
{ |
channel->repeat = gen6_repeat(channel->repeat); |
channel->filter = gen6_filter(channel->filter); |
if (channel->card_format == (unsigned)-1) |
channel->card_format = gen6_get_card_format(channel->pict_format); |
assert(channel->card_format != (unsigned)-1); |
} |
#endif |
static void gen6_render_composite_done(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
DBG(("%s\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) { |
gen4_vertex_flush(sna); |
gen6_magic_ca_pass(sna, op); |
} |
// sna_render_composite_redirect_done(sna, op); |
} |
#if 0 |
static bool |
gen6_composite_set_target(struct sna *sna, |
struct sna_composite_op *op, |
PicturePtr dst, |
int x, int y, int w, int h) |
{ |
BoxRec box; |
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); |
op->dst.format = dst->format; |
op->dst.width = op->dst.pixmap->drawable.width; |
op->dst.height = op->dst.pixmap->drawable.height; |
if (w && h) { |
box.x1 = x; |
box.y1 = y; |
box.x2 = x + w; |
box.y2 = y + h; |
} else |
sna_render_picture_extents(dst, &box); |
// op->dst.bo = sna_drawable_use_bo (dst->pDrawable, |
// PREFER_GPU | FORCE_GPU | RENDER_GPU, |
// &box, &op->damage); |
if (op->dst.bo == NULL) |
return false; |
get_drawable_deltas(dst->pDrawable, op->dst.pixmap, |
&op->dst.x, &op->dst.y); |
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", |
__FUNCTION__, |
op->dst.pixmap, (int)op->dst.format, |
op->dst.width, op->dst.height, |
op->dst.bo->pitch, |
op->dst.x, op->dst.y, |
op->damage ? *op->damage : (void *)-1)); |
assert(op->dst.bo->proxy == NULL); |
if (too_large(op->dst.width, op->dst.height) && |
!sna_render_composite_redirect(sna, op, x, y, w, h)) |
return false; |
return true; |
} |
static bool |
gen6_render_composite(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t msk_x, int16_t msk_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp) |
{ |
if (op >= ARRAY_SIZE(gen6_blend_op)) |
return false; |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.ring)); |
if (op == PictOpClear) |
op = PictOpSrc; |
tmp->op = op; |
if (!gen6_composite_set_target(sna, tmp, dst, |
dst_x, dst_y, width, height)) |
return false; |
switch (gen6_composite_picture(sna, src, &tmp->src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_dst; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->src, 0)) |
goto cleanup_dst; |
/* fall through to fixup */ |
case 1: |
/* Did we just switch rings to prepare the source? */ |
if (mask == NULL && |
prefer_blt_composite(sna, tmp) && |
sna_blt_composite__convert(sna, |
dst_x, dst_y, width, height, |
tmp)) |
return true; |
gen6_composite_channel_convert(&tmp->src); |
break; |
} |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.bo = NULL; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
if (mask) { |
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { |
tmp->has_component_alpha = true; |
/* Check if it's component alpha that relies on a source alpha and on |
* the source value. We can only get one of those into the single |
* source value that we get to blend with. |
*/ |
if (gen6_blend_op[op].src_alpha && |
(gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { |
if (op != PictOpOver) |
goto cleanup_src; |
tmp->need_magic_ca_pass = true; |
tmp->op = PictOpOutReverse; |
} |
} |
if (!reuse_source(sna, |
src, &tmp->src, src_x, src_y, |
mask, &tmp->mask, msk_x, msk_y)) { |
switch (gen6_composite_picture(sna, mask, &tmp->mask, |
msk_x, msk_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_src; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) |
goto cleanup_src; |
/* fall through to fixup */ |
case 1: |
gen6_composite_channel_convert(&tmp->mask); |
break; |
} |
} |
tmp->is_affine &= tmp->mask.is_affine; |
} |
tmp->u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, |
tmp->src.repeat, |
tmp->mask.filter, |
tmp->mask.repeat), |
gen6_get_blend(tmp->op, |
tmp->has_component_alpha, |
tmp->dst.format), |
gen6_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine), |
gen4_choose_composite_emitter(tmp)); |
// tmp->blt = gen6_render_composite_blt; |
// tmp->box = gen6_render_composite_box; |
// tmp->boxes = gen6_render_composite_boxes__blt; |
// if (tmp->emit_boxes) { |
// tmp->boxes = gen6_render_composite_boxes; |
// tmp->thread_boxes = gen6_render_composite_boxes__thread; |
// } |
tmp->done = gen6_render_composite_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) |
goto cleanup_mask; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_composite_state(sna, tmp); |
gen6_align_vertex(sna, tmp); |
return true; |
cleanup_mask: |
if (tmp->mask.bo) |
kgem_bo_destroy(&sna->kgem, tmp->mask.bo); |
cleanup_src: |
if (tmp->src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->src.bo); |
cleanup_dst: |
if (tmp->redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->dst.bo); |
return false; |
} |
#if !NO_COMPOSITE_SPANS |
fastcall static void |
gen6_render_composite_spans_box(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const BoxRec *box, float opacity) |
{ |
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", |
__FUNCTION__, |
op->base.src.offset[0], op->base.src.offset[1], |
opacity, |
op->base.dst.x, op->base.dst.y, |
box->x1, box->y1, |
box->x2 - box->x1, |
box->y2 - box->y1)); |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); |
op->prim_emit(sna, op, box, opacity); |
} |
static void |
gen6_render_composite_spans_boxes(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const BoxRec *box, int nbox, |
float opacity) |
{ |
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", |
__FUNCTION__, nbox, |
op->base.src.offset[0], op->base.src.offset[1], |
opacity, |
op->base.dst.x, op->base.dst.y)); |
do { |
int nbox_this_time; |
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, |
gen6_emit_composite_state); |
nbox -= nbox_this_time; |
do { |
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, |
box->x1, box->y1, |
box->x2 - box->x1, |
box->y2 - box->y1)); |
op->prim_emit(sna, op, box++, opacity); |
} while (--nbox_this_time); |
} while (nbox); |
} |
fastcall static void |
gen6_render_composite_spans_boxes__thread(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const struct sna_opacity_box *box, |
int nbox) |
{ |
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", |
__FUNCTION__, nbox, |
op->base.src.offset[0], op->base.src.offset[1], |
op->base.dst.x, op->base.dst.y)); |
sna_vertex_lock(&sna->render); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, |
gen6_emit_composite_state); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; |
sna_vertex_acquire__locked(&sna->render); |
sna_vertex_unlock(&sna->render); |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
sna_vertex_lock(&sna->render); |
sna_vertex_release__locked(&sna->render); |
} while (nbox); |
sna_vertex_unlock(&sna->render); |
} |
fastcall static void |
gen6_render_composite_spans_done(struct sna *sna, |
const struct sna_composite_spans_op *op) |
{ |
DBG(("%s()\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) |
gen4_vertex_flush(sna); |
if (op->base.src.bo) |
kgem_bo_destroy(&sna->kgem, op->base.src.bo); |
sna_render_composite_redirect_done(sna, &op->base); |
} |
static bool |
gen6_check_composite_spans(struct sna *sna, |
uint8_t op, PicturePtr src, PicturePtr dst, |
int16_t width, int16_t height, |
unsigned flags) |
{ |
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", |
__FUNCTION__, op, width, height, flags)); |
if (op >= ARRAY_SIZE(gen6_blend_op)) |
return false; |
if (gen6_composite_fallback(sna, src, NULL, dst)) { |
DBG(("%s: operation would fallback\n", __FUNCTION__)); |
return false; |
} |
if (need_tiling(sna, width, height) && |
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { |
DBG(("%s: fallback, tiled operation not on GPU\n", |
__FUNCTION__)); |
return false; |
} |
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { |
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); |
assert(priv); |
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) |
return true; |
if (flags & COMPOSITE_SPANS_INPLACE_HINT) |
return false; |
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); |
} |
return true; |
} |
static bool |
gen6_render_composite_spans(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
unsigned flags, |
struct sna_composite_spans_op *tmp) |
{ |
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, |
width, height, flags, sna->kgem.ring)); |
assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); |
if (need_tiling(sna, width, height)) { |
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", |
__FUNCTION__, width, height)); |
return sna_tiling_composite_spans(op, src, dst, |
src_x, src_y, dst_x, dst_y, |
width, height, flags, tmp); |
} |
tmp->base.op = op; |
if (!gen6_composite_set_target(sna, &tmp->base, dst, |
dst_x, dst_y, width, height)) |
return false; |
switch (gen6_composite_picture(sna, src, &tmp->base.src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_dst; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) |
goto cleanup_dst; |
/* fall through to fixup */ |
case 1: |
gen6_composite_channel_convert(&tmp->base.src); |
break; |
} |
tmp->base.mask.bo = NULL; |
tmp->base.is_affine = tmp->base.src.is_affine; |
tmp->base.need_magic_ca_pass = false; |
tmp->base.u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, |
tmp->base.src.repeat, |
SAMPLER_FILTER_NEAREST, |
SAMPLER_EXTEND_PAD), |
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), |
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, |
gen4_choose_spans_emitter(tmp)); |
tmp->box = gen6_render_composite_spans_box; |
tmp->boxes = gen6_render_composite_spans_boxes; |
if (tmp->emit_boxes) |
tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; |
tmp->done = gen6_render_composite_spans_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->base.dst.bo, tmp->base.src.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->base.dst.bo, tmp->base.src.bo, |
NULL)) |
goto cleanup_src; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_composite_state(sna, &tmp->base); |
gen6_align_vertex(sna, &tmp->base); |
return true; |
cleanup_src: |
if (tmp->base.src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); |
cleanup_dst: |
if (tmp->base.redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); |
return false; |
} |
#endif |
static void |
gen6_emit_copy_state(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
dirty = gen6_get_batch(sna, op); |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
binding_table[1] = |
gen6_bind_bo(sna, |
op->src.bo, op->src.width, op->src.height, |
op->src.card_format, |
false); |
if (sna->kgem.surface == offset && |
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { |
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
offset = sna->render_state.gen6.surface_table; |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static inline bool prefer_blt_copy(struct sna *sna, |
struct kgem_bo *src_bo, |
struct kgem_bo *dst_bo, |
unsigned flags) |
{ |
if (flags & COPY_SYNC) |
return false; |
if (PREFER_RENDER) |
return PREFER_RENDER > 0; |
if (sna->kgem.ring == KGEM_BLT) |
return true; |
if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) |
return true; |
if (untiled_tlb_miss(src_bo) || |
untiled_tlb_miss(dst_bo)) |
return true; |
if (!prefer_blt_ring(sna, dst_bo, flags)) |
return false; |
return (prefer_blt_bo(sna, src_bo) >= 0 && |
prefer_blt_bo(sna, dst_bo) > 0); |
} |
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) |
{ |
*extents = box[0]; |
while (--n) { |
box++; |
if (box->x1 < extents->x1) |
extents->x1 = box->x1; |
if (box->x2 > extents->x2) |
extents->x2 = box->x2; |
if (box->y1 < extents->y1) |
extents->y1 = box->y1; |
if (box->y2 > extents->y2) |
extents->y2 = box->y2; |
} |
} |
static inline bool |
overlaps(struct sna *sna, |
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n, BoxRec *extents) |
{ |
if (src_bo != dst_bo) |
return false; |
boxes_extents(box, n, extents); |
return (extents->x2 + src_dx > extents->x1 + dst_dx && |
extents->x1 + src_dx < extents->x2 + dst_dx && |
extents->y2 + src_dy > extents->y1 + dst_dy && |
extents->y1 + src_dy < extents->y2 + dst_dy); |
} |
static bool |
gen6_render_copy_boxes(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n, unsigned flags) |
{ |
struct sna_composite_op tmp; |
BoxRec extents; |
DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", |
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, |
src_bo == dst_bo, |
overlaps(sna, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
box, n, &extents))); |
if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && |
sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
if (!(alu == GXcopy || alu == GXclear)) { |
fallback_blt: |
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) |
return false; |
return sna_blt_copy_boxes_fallback(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n); |
} |
if (overlaps(sna, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
box, n, &extents)) { |
if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) |
goto fallback_blt; |
if (can_switch_to_blt(sna, dst_bo, flags) && |
sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
return sna_render_copy_boxes__overlap(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n, &extents); |
} |
if (dst->drawable.depth == src->drawable.depth) { |
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); |
tmp.src.pict_format = tmp.dst.format; |
} else { |
tmp.dst.format = sna_format_for_depth(dst->drawable.depth); |
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); |
} |
if (!gen6_check_format(tmp.src.pict_format)) |
goto fallback_blt; |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.bo = dst_bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.damage = NULL; |
sna_render_composite_redirect_init(&tmp); |
if (too_large(tmp.dst.width, tmp.dst.height)) { |
int i; |
extents = box[0]; |
for (i = 1; i < n; i++) { |
if (box[i].x1 < extents.x1) |
extents.x1 = box[i].x1; |
if (box[i].y1 < extents.y1) |
extents.y1 = box[i].y1; |
if (box[i].x2 > extents.x2) |
extents.x2 = box[i].x2; |
if (box[i].y2 > extents.y2) |
extents.y2 = box[i].y2; |
} |
if (!sna_render_composite_redirect(sna, &tmp, |
extents.x1 + dst_dx, |
extents.y1 + dst_dy, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1)) |
goto fallback_tiled; |
dst_dx += tmp.dst.x; |
dst_dy += tmp.dst.y; |
tmp.dst.x = tmp.dst.y = 0; |
} |
tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); |
if (too_large(src->drawable.width, src->drawable.height)) { |
int i; |
extents = box[0]; |
for (i = 1; i < n; i++) { |
if (extents.x1 < box[i].x1) |
extents.x1 = box[i].x1; |
if (extents.y1 < box[i].y1) |
extents.y1 = box[i].y1; |
if (extents.x2 > box[i].x2) |
extents.x2 = box[i].x2; |
if (extents.y2 > box[i].y2) |
extents.y2 = box[i].y2; |
} |
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, |
extents.x1 + src_dx, |
extents.y1 + src_dy, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1)) { |
DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); |
goto fallback_tiled_dst; |
} |
src_dx += tmp.src.offset[0]; |
src_dy += tmp.src.offset[1]; |
} else { |
tmp.src.bo = src_bo; |
tmp.src.width = src->drawable.width; |
tmp.src.height = src->drawable.height; |
} |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = 0; |
tmp.u.gen6.flags = COPY_FLAGS(alu); |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { |
DBG(("%s: too large for a single operation\n", |
__FUNCTION__)); |
goto fallback_tiled_src; |
} |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_copy_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
do { |
int16_t *v; |
int n_this_time; |
n_this_time = gen6_get_rectangles(sna, &tmp, n, |
gen6_emit_copy_state); |
n -= n_this_time; |
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); |
sna->render.vertex_used += 6 * n_this_time; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
do { |
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", |
box->x1 + src_dx, box->y1 + src_dy, |
box->x1 + dst_dx, box->y1 + dst_dy, |
box->x2 - box->x1, box->y2 - box->y1)); |
v[0] = box->x2 + dst_dx; |
v[2] = box->x2 + src_dx; |
v[1] = v[5] = box->y2 + dst_dy; |
v[3] = v[7] = box->y2 + src_dy; |
v[8] = v[4] = box->x1 + dst_dx; |
v[10] = v[6] = box->x1 + src_dx; |
v[9] = box->y1 + dst_dy; |
v[11] = box->y1 + src_dy; |
v += 12; box++; |
} while (--n_this_time); |
} while (n); |
gen4_vertex_flush(sna); |
sna_render_composite_redirect_done(sna, &tmp); |
if (tmp.src.bo != src_bo) |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return true; |
fallback_tiled_src: |
if (tmp.src.bo != src_bo) |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
fallback_tiled_dst: |
if (tmp.redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp.dst.bo); |
fallback_tiled: |
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
return sna_tiling_copy_boxes(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n); |
} |
static void |
gen6_render_copy_blt(struct sna *sna, |
const struct sna_copy_op *op, |
int16_t sx, int16_t sy, |
int16_t w, int16_t h, |
int16_t dx, int16_t dy) |
{ |
int16_t *v; |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = dx+w; v[1] = dy+h; |
v[2] = sx+w; v[3] = sy+h; |
v[4] = dx; v[5] = dy+h; |
v[6] = sx; v[7] = sy+h; |
v[8] = dx; v[9] = dy; |
v[10] = sx; v[11] = sy; |
} |
static void |
gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) |
{ |
DBG(("%s()\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) |
gen4_vertex_flush(sna); |
} |
static bool |
gen6_render_copy(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
struct sna_copy_op *op) |
{ |
DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", |
__FUNCTION__, alu, |
src->drawable.width, src->drawable.height, |
dst->drawable.width, dst->drawable.height)); |
fallback: |
op->base.dst.format = PIXMAN_a8r8g8b8; |
op->base.src.pict_format = op->base.dst.format; |
op->base.dst.pixmap = dst; |
op->base.dst.width = dst->drawable.width; |
op->base.dst.height = dst->drawable.height; |
op->base.dst.bo = dst_bo; |
op->base.src.bo = src_bo; |
op->base.src.card_format = |
gen6_get_card_format(op->base.src.pict_format); |
op->base.src.width = src->drawable.width; |
op->base.src.height = src->drawable.height; |
op->base.mask.bo = NULL; |
op->base.floats_per_vertex = 2; |
op->base.floats_per_rect = 6; |
op->base.u.gen6.flags = COPY_FLAGS(alu); |
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); |
assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); |
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) |
goto fallback; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_copy_state(sna, &op->base); |
gen6_align_vertex(sna, &op->base); |
op->blt = gen6_render_copy_blt; |
op->done = gen6_render_copy_done; |
return true; |
} |
#endif |
static bool |
gen6_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
{ |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.ring)); |
tmp->op = PictOpSrc; |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
tmp->src.repeat = SAMPLER_EXTEND_NONE; |
tmp->src.is_affine = true; |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
if ( (tmp->src.width == width) && |
(tmp->src.height == height) ) |
tmp->src.filter = SAMPLER_FILTER_NEAREST; |
else |
tmp->src.filter = SAMPLER_FILTER_BILINEAR; |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.is_affine = true; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
// tmp->src.offset[0] = -dst_x; |
// tmp->src.offset[1] = -dst_y; |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
// tmp->mask.offset[0] = -dst_x; |
// tmp->mask.offset[1] = -dst_y; |
tmp->u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, |
tmp->src.repeat, |
tmp->mask.filter, |
tmp->mask.repeat), |
gen6_get_blend(tmp->op, |
tmp->has_component_alpha, |
tmp->dst.format), |
/* gen6_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine), |
*/ |
GEN6_WM_KERNEL_MASK, |
gen4_choose_composite_emitter(tmp)); |
tmp->blt = gen6_render_composite_blt; |
// tmp->box = gen6_render_composite_box; |
tmp->done = gen6_render_composite_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_composite_state(sna, tmp); |
gen6_align_vertex(sna, tmp); |
return true; |
} |
#if 0 |
static void |
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) |
{ |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
dirty = gen6_get_batch(sna, op); |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
binding_table[1] = |
gen6_bind_bo(sna, |
op->src.bo, 1, 1, |
GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, |
false); |
if (sna->kgem.surface == offset && |
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { |
sna->kgem.surface += |
sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); |
offset = sna->render_state.gen6.surface_table; |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static inline bool prefer_blt_fill(struct sna *sna, |
struct kgem_bo *bo) |
{ |
if (PREFER_RENDER) |
return PREFER_RENDER < 0; |
if (untiled_tlb_miss(bo)) |
return true; |
return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0; |
} |
static bool |
gen6_render_fill_boxes(struct sna *sna, |
CARD8 op, |
PictFormat format, |
const xRenderColor *color, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
const BoxRec *box, int n) |
{ |
struct sna_composite_op tmp; |
uint32_t pixel; |
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", |
__FUNCTION__, op, |
color->red, color->green, color->blue, color->alpha, (int)format)); |
if (op >= ARRAY_SIZE(gen6_blend_op)) { |
DBG(("%s: fallback due to unhandled blend op: %d\n", |
__FUNCTION__, op)); |
return false; |
} |
if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) { |
uint8_t alu = GXinvalid; |
if (op <= PictOpSrc) { |
pixel = 0; |
if (op == PictOpClear) |
alu = GXclear; |
else if (sna_get_pixel_from_rgba(&pixel, |
color->red, |
color->green, |
color->blue, |
color->alpha, |
format)) |
alu = GXcopy; |
} |
if (alu != GXinvalid && |
sna_blt_fill_boxes(sna, alu, |
dst_bo, dst->drawable.bitsPerPixel, |
pixel, box, n)) |
return true; |
if (!gen6_check_dst_format(format)) |
return false; |
} |
if (op == PictOpClear) { |
pixel = 0; |
op = PictOpSrc; |
} else if (!sna_get_pixel_from_rgba(&pixel, |
color->red, |
color->green, |
color->blue, |
color->alpha, |
PICT_a8r8g8b8)) |
return false; |
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", |
__FUNCTION__, pixel, n, |
box[0].x1, box[0].y1, box[0].x2, box[0].y2)); |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.format = format; |
tmp.dst.bo = dst_bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.damage = NULL; |
sna_render_composite_redirect_init(&tmp); |
if (too_large(dst->drawable.width, dst->drawable.height)) { |
BoxRec extents; |
boxes_extents(box, n, &extents); |
if (!sna_render_composite_redirect(sna, &tmp, |
extents.x1, extents.y1, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1)) |
return sna_tiling_fill_boxes(sna, op, format, color, |
dst, dst_bo, box, n); |
} |
tmp.src.bo = sna_render_get_solid(sna, pixel); |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = false; |
tmp.u.gen6.flags = FILL_FLAGS(op, format); |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); |
} |
gen6_emit_fill_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
do { |
int n_this_time; |
int16_t *v; |
n_this_time = gen6_get_rectangles(sna, &tmp, n, |
gen6_emit_fill_state); |
n -= n_this_time; |
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); |
sna->render.vertex_used += 6 * n_this_time; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
do { |
DBG((" (%d, %d), (%d, %d)\n", |
box->x1, box->y1, box->x2, box->y2)); |
v[0] = box->x2; |
v[5] = v[1] = box->y2; |
v[8] = v[4] = box->x1; |
v[9] = box->y1; |
v[2] = v[3] = v[7] = 1; |
v[6] = v[10] = v[11] = 0; |
v += 12; box++; |
} while (--n_this_time); |
} while (n); |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
sna_render_composite_redirect_done(sna, &tmp); |
return true; |
} |
static void |
gen6_render_op_fill_blt(struct sna *sna, |
const struct sna_fill_op *op, |
int16_t x, int16_t y, int16_t w, int16_t h) |
{ |
int16_t *v; |
DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = x+w; |
v[4] = v[8] = x; |
v[1] = v[5] = y+h; |
v[9] = y; |
v[2] = v[3] = v[7] = 1; |
v[6] = v[10] = v[11] = 0; |
} |
fastcall static void |
gen6_render_op_fill_box(struct sna *sna, |
const struct sna_fill_op *op, |
const BoxRec *box) |
{ |
int16_t *v; |
DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = box->x2; |
v[8] = v[4] = box->x1; |
v[5] = v[1] = box->y2; |
v[9] = box->y1; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
} |
fastcall static void |
gen6_render_op_fill_boxes(struct sna *sna, |
const struct sna_fill_op *op, |
const BoxRec *box, |
int nbox) |
{ |
DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, |
box->x1, box->y1, box->x2, box->y2, nbox)); |
do { |
int nbox_this_time; |
int16_t *v; |
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, |
gen6_emit_fill_state); |
nbox -= nbox_this_time; |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6 * nbox_this_time; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
do { |
v[0] = box->x2; |
v[8] = v[4] = box->x1; |
v[5] = v[1] = box->y2; |
v[9] = box->y1; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
box++; v += 12; |
} while (--nbox_this_time); |
} while (nbox); |
} |
static void |
gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) |
{ |
DBG(("%s()\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, op->base.src.bo); |
} |
static bool |
gen6_render_fill(struct sna *sna, uint8_t alu, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
uint32_t color, |
struct sna_fill_op *op) |
{ |
DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); |
if (prefer_blt_fill(sna, dst_bo) && |
sna_blt_fill(sna, alu, |
dst_bo, dst->drawable.bitsPerPixel, |
color, |
op)) |
return true; |
if (!(alu == GXcopy || alu == GXclear) || |
too_large(dst->drawable.width, dst->drawable.height)) |
return sna_blt_fill(sna, alu, |
dst_bo, dst->drawable.bitsPerPixel, |
color, |
op); |
if (alu == GXclear) |
color = 0; |
op->base.dst.pixmap = dst; |
op->base.dst.width = dst->drawable.width; |
op->base.dst.height = dst->drawable.height; |
op->base.dst.format = sna_format_for_depth(dst->drawable.depth); |
op->base.dst.bo = dst_bo; |
op->base.dst.x = op->base.dst.y = 0; |
op->base.src.bo = |
sna_render_get_solid(sna, |
sna_rgba_for_color(color, |
dst->drawable.depth)); |
op->base.mask.bo = NULL; |
op->base.need_magic_ca_pass = false; |
op->base.floats_per_vertex = 2; |
op->base.floats_per_rect = 6; |
op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; |
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); |
} |
gen6_emit_fill_state(sna, &op->base); |
gen6_align_vertex(sna, &op->base); |
op->blt = gen6_render_op_fill_blt; |
op->box = gen6_render_op_fill_box; |
op->boxes = gen6_render_op_fill_boxes; |
op->done = gen6_render_op_fill_done; |
return true; |
} |
static bool |
gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, |
uint32_t color, |
int16_t x1, int16_t y1, int16_t x2, int16_t y2, |
uint8_t alu) |
{ |
BoxRec box; |
box.x1 = x1; |
box.y1 = y1; |
box.x2 = x2; |
box.y2 = y2; |
return sna_blt_fill_boxes(sna, alu, |
bo, dst->drawable.bitsPerPixel, |
color, &box, 1); |
} |
static bool |
gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, |
uint32_t color, |
int16_t x1, int16_t y1, |
int16_t x2, int16_t y2, |
uint8_t alu) |
{ |
struct sna_composite_op tmp; |
int16_t *v; |
/* Prefer to use the BLT if already engaged */ |
if (prefer_blt_fill(sna, bo) && |
gen6_render_fill_one_try_blt(sna, dst, bo, color, |
x1, y1, x2, y2, alu)) |
return true; |
/* Must use the BLT if we can't RENDER... */ |
if (!(alu == GXcopy || alu == GXclear) || |
too_large(dst->drawable.width, dst->drawable.height)) |
return gen6_render_fill_one_try_blt(sna, dst, bo, color, |
x1, y1, x2, y2, alu); |
if (alu == GXclear) |
color = 0; |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.format = sna_format_for_depth(dst->drawable.depth); |
tmp.dst.bo = bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.src.bo = |
sna_render_get_solid(sna, |
sna_rgba_for_color(color, |
dst->drawable.depth)); |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = false; |
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return false; |
} |
} |
gen6_emit_fill_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); |
DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = x2; |
v[8] = v[4] = x1; |
v[5] = v[1] = y2; |
v[9] = y1; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return true; |
} |
static bool |
gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) |
{ |
BoxRec box; |
box.x1 = 0; |
box.y1 = 0; |
box.x2 = dst->drawable.width; |
box.y2 = dst->drawable.height; |
return sna_blt_fill_boxes(sna, GXclear, |
bo, dst->drawable.bitsPerPixel, |
0, &box, 1); |
} |
static bool |
gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) |
{ |
struct sna_composite_op tmp; |
int16_t *v; |
DBG(("%s: %dx%d\n", |
__FUNCTION__, |
dst->drawable.width, |
dst->drawable.height)); |
/* Prefer to use the BLT if, and only if, already engaged */ |
if (sna->kgem.ring == KGEM_BLT && |
gen6_render_clear_try_blt(sna, dst, bo)) |
return true; |
/* Must use the BLT if we can't RENDER... */ |
if (too_large(dst->drawable.width, dst->drawable.height)) |
return gen6_render_clear_try_blt(sna, dst, bo); |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.format = sna_format_for_depth(dst->drawable.depth); |
tmp.dst.bo = bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.src.bo = sna_render_get_solid(sna, 0); |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = false; |
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return false; |
} |
} |
gen6_emit_fill_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = dst->drawable.width; |
v[5] = v[1] = dst->drawable.height; |
v[8] = v[4] = 0; |
v[9] = 0; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return true; |
} |
#endif |
static void gen6_render_flush(struct sna *sna) |
{ |
gen4_vertex_close(sna); |
assert(sna->render.vb_id == 0); |
assert(sna->render.vertex_offset == 0); |
} |
static void |
gen6_render_context_switch(struct kgem *kgem, |
int new_mode) |
{ |
if (kgem->nbatch) { |
DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); |
_kgem_submit(kgem); |
} |
kgem->ring = new_mode; |
} |
static void |
gen6_render_retire(struct kgem *kgem) |
{ |
struct sna *sna; |
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) |
kgem->ring = kgem->mode; |
sna = container_of(kgem, struct sna, kgem); |
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { |
DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); |
sna->render.vertex_used = 0; |
sna->render.vertex_index = 0; |
} |
} |
static void |
gen6_render_expire(struct kgem *kgem) |
{ |
struct sna *sna; |
sna = container_of(kgem, struct sna, kgem); |
if (sna->render.vbo && !sna->render.vertex_used) { |
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); |
kgem_bo_destroy(kgem, sna->render.vbo); |
assert(!sna->render.active); |
sna->render.vbo = NULL; |
sna->render.vertices = sna->render.vertex_data; |
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
sna->render.vertex_used = 0; |
sna->render.vertex_index = 0; |
} |
} |
static void gen6_render_reset(struct sna *sna) |
{ |
sna->render_state.gen6.needs_invariant = true; |
sna->render_state.gen6.first_state_packet = true; |
sna->render_state.gen6.ve_id = 3 << 2; |
sna->render_state.gen6.last_primitive = -1; |
sna->render_state.gen6.num_sf_outputs = 0; |
sna->render_state.gen6.samplers = -1; |
sna->render_state.gen6.blend = -1; |
sna->render_state.gen6.kernel = -1; |
sna->render_state.gen6.drawrect_offset = -1; |
sna->render_state.gen6.drawrect_limit = -1; |
sna->render_state.gen6.surface_table = -1; |
sna->render.vertex_offset = 0; |
sna->render.nvertex_reloc = 0; |
sna->render.vb_id = 0; |
} |
static void gen6_render_fini(struct sna *sna) |
{ |
kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); |
} |
static bool is_gt2(struct sna *sna) |
{ |
return DEVICE_ID(sna->PciInfo) & 0x30; |
} |
static bool is_mobile(struct sna *sna) |
{ |
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6; |
} |
static bool gen6_render_setup(struct sna *sna) |
{ |
struct gen6_render_state *state = &sna->render_state.gen6; |
struct sna_static_stream general; |
struct gen6_sampler_state *ss; |
int i, j, k, l, m; |
state->info = >1_info; |
if (is_gt2(sna)) |
state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ |
sna_static_stream_init(&general); |
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer |
* dumps, you know it points to zero. |
*/ |
null_create(&general); |
scratch_create(&general); |
for (m = 0; m < GEN6_KERNEL_COUNT; m++) { |
if (wm_kernels[m].size) { |
state->wm_kernel[m][1] = |
sna_static_stream_add(&general, |
wm_kernels[m].data, |
wm_kernels[m].size, |
64); |
} else { |
if (USE_8_PIXEL_DISPATCH) { |
state->wm_kernel[m][0] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 8); |
} |
if (USE_16_PIXEL_DISPATCH) { |
state->wm_kernel[m][1] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 16); |
} |
if (USE_32_PIXEL_DISPATCH) { |
state->wm_kernel[m][2] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 32); |
} |
} |
if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { |
state->wm_kernel[m][1] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 16); |
} |
} |
ss = sna_static_stream_map(&general, |
2 * sizeof(*ss) * |
(2 + |
FILTER_COUNT * EXTEND_COUNT * |
FILTER_COUNT * EXTEND_COUNT), |
32); |
state->wm_state = sna_static_stream_offsetof(&general, ss); |
sampler_copy_init(ss); ss += 2; |
sampler_fill_init(ss); ss += 2; |
for (i = 0; i < FILTER_COUNT; i++) { |
for (j = 0; j < EXTEND_COUNT; j++) { |
for (k = 0; k < FILTER_COUNT; k++) { |
for (l = 0; l < EXTEND_COUNT; l++) { |
sampler_state_init(ss++, i, j); |
sampler_state_init(ss++, k, l); |
} |
} |
} |
} |
state->cc_blend = gen6_composite_create_blend_state(&general); |
state->general_bo = sna_static_stream_fini(sna, &general); |
return state->general_bo != NULL; |
} |
bool gen6_render_init(struct sna *sna) |
{ |
if (!gen6_render_setup(sna)) |
return false; |
sna->kgem.context_switch = gen6_render_context_switch; |
sna->kgem.retire = gen6_render_retire; |
sna->kgem.expire = gen6_render_expire; |
// sna->render.composite = gen6_render_composite; |
// sna->render.video = gen6_render_video; |
// sna->render.copy_boxes = gen6_render_copy_boxes; |
sna->render.blit_tex = gen6_blit_tex; |
// sna->render.copy = gen6_render_copy; |
// sna->render.fill_boxes = gen6_render_fill_boxes; |
// sna->render.fill = gen6_render_fill; |
// sna->render.fill_one = gen6_render_fill_one; |
// sna->render.clear = gen6_render_clear; |
sna->render.flush = gen6_render_flush; |
sna->render.reset = gen6_render_reset; |
sna->render.fini = gen6_render_fini; |
sna->render.max_3d_size = GEN6_MAX_SIZE; |
sna->render.max_3d_pitch = 1 << 18; |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
return true; |
} |
/* |
* Copyright © 2006,2008,2011 Intel Corporation |
* Copyright © 2007 Red Hat, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Wang Zhenyu <zhenyu.z.wang@sna.com> |
* Eric Anholt <eric@anholt.net> |
* Carl Worth <cworth@redhat.com> |
* Keith Packard <keithp@keithp.com> |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#include "sna.h" |
#include "sna_reg.h" |
#include "sna_render.h" |
#include "sna_render_inline.h" |
//#include "sna_video.h" |
#include "brw/brw.h" |
#include "gen6_render.h" |
#include "gen4_source.h" |
#include "gen4_vertex.h" |
#define NO_COMPOSITE 0 |
#define NO_COMPOSITE_SPANS 0 |
#define NO_COPY 0 |
#define NO_COPY_BOXES 0 |
#define NO_FILL 0 |
#define NO_FILL_BOXES 0 |
#define NO_FILL_ONE 0 |
#define NO_FILL_CLEAR 0 |
#define NO_RING_SWITCH 0 |
#define PREFER_RENDER 0 |
#define USE_8_PIXEL_DISPATCH 1 |
#define USE_16_PIXEL_DISPATCH 1 |
#define USE_32_PIXEL_DISPATCH 0 |
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH |
#error "Must select at least 8, 16 or 32 pixel dispatch" |
#endif |
#define GEN6_MAX_SIZE 8192 |
struct gt_info { |
const char *name; |
int max_vs_threads; |
int max_gs_threads; |
int max_wm_threads; |
struct { |
int size; |
int max_vs_entries; |
int max_gs_entries; |
} urb; |
}; |
static const struct gt_info gt1_info = { |
.name = "Sandybridge (gen6, gt1)", |
.max_vs_threads = 24, |
.max_gs_threads = 21, |
.max_wm_threads = 40, |
.urb = { 32, 256, 256 }, |
}; |
static const struct gt_info gt2_info = { |
.name = "Sandybridge (gen6, gt2)", |
.max_vs_threads = 60, |
.max_gs_threads = 60, |
.max_wm_threads = 80, |
.urb = { 64, 256, 256 }, |
}; |
static const uint32_t ps_kernel_packed[][4] = { |
#include "exa_wm_src_affine.g6b" |
#include "exa_wm_src_sample_argb.g6b" |
#include "exa_wm_yuv_rgb.g6b" |
#include "exa_wm_write.g6b" |
}; |
static const uint32_t ps_kernel_planar[][4] = { |
#include "exa_wm_src_affine.g6b" |
#include "exa_wm_src_sample_planar.g6b" |
#include "exa_wm_yuv_rgb.g6b" |
#include "exa_wm_write.g6b" |
}; |
#define NOKERNEL(kernel_enum, func, ns) \ |
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} |
#define KERNEL(kernel_enum, kernel, ns) \ |
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} |
static const struct wm_kernel_info { |
const char *name; |
const void *data; |
unsigned int size; |
unsigned int num_surfaces; |
} wm_kernels[] = { |
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), |
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), |
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), |
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), |
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), |
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), |
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), |
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), |
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), |
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), |
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), |
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), |
}; |
#undef KERNEL |
static const struct blendinfo { |
bool src_alpha; |
uint32_t src_blend; |
uint32_t dst_blend; |
} gen6_blend_op[] = { |
/* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, |
/* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, |
/* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, |
/* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, |
/* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, |
/* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, |
/* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, |
/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, |
/* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, |
/* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, |
}; |
/** |
* Highest-valued BLENDFACTOR used in gen6_blend_op. |
* |
* This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, |
* GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, |
* GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} |
*/ |
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) |
#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) |
#define BLEND_OFFSET(s, d) \ |
(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) |
#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) |
#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) |
#define SAMPLER_OFFSET(sf, se, mf, me) \ |
(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) |
#define VERTEX_2s2s 0 |
#define COPY_SAMPLER 0 |
#define COPY_VERTEX VERTEX_2s2s |
#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) |
#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) |
#define FILL_VERTEX VERTEX_2s2s |
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) |
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) |
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) |
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) |
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) |
#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) |
#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) |
#define OUT_BATCH(v) batch_emit(sna, v) |
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) |
#define OUT_VERTEX_F(v) vertex_emit(sna, v) |
static inline bool too_large(int width, int height) |
{ |
return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; |
} |
static uint32_t gen6_get_blend(int op, |
bool has_component_alpha, |
uint32_t dst_format) |
{ |
uint32_t src, dst; |
src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; |
dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend; |
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend; |
#if 0 |
/* If there's no dst alpha channel, adjust the blend op so that |
* we'll treat it always as 1. |
*/ |
if (PICT_FORMAT_A(dst_format) == 0) { |
if (src == GEN6_BLENDFACTOR_DST_ALPHA) |
src = GEN6_BLENDFACTOR_ONE; |
else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) |
src = GEN6_BLENDFACTOR_ZERO; |
} |
/* If the source alpha is being used, then we should only be in a |
* case where the source blend factor is 0, and the source blend |
* value is the mask channels multiplied by the source picture's alpha. |
*/ |
if (has_component_alpha && gen6_blend_op[op].src_alpha) { |
if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) |
dst = GEN6_BLENDFACTOR_SRC_COLOR; |
else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) |
dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; |
} |
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", |
op, dst_format, PICT_FORMAT_A(dst_format), |
src, dst, (int)BLEND_OFFSET(src, dst))); |
#endif |
return BLEND_OFFSET(src, dst); |
} |
static uint32_t gen6_get_card_format(PictFormat format) |
{ |
switch (format) { |
default: |
return -1; |
case PICT_a8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_x8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; |
case PICT_a8b8g8r8: |
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_x8b8g8r8: |
return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; |
case PICT_a2r10g10b10: |
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_x2r10g10b10: |
return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; |
case PICT_r8g8b8: |
return GEN6_SURFACEFORMAT_R8G8B8_UNORM; |
case PICT_r5g6b5: |
return GEN6_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_a1r5g5b5: |
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN6_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
} |
static uint32_t gen6_get_dest_format(PictFormat format) |
{ |
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; |
#if 0 |
switch (format) { |
default: |
return -1; |
case PICT_a8r8g8b8: |
case PICT_x8r8g8b8: |
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_a8b8g8r8: |
case PICT_x8b8g8r8: |
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_a2r10g10b10: |
case PICT_x2r10g10b10: |
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_r5g6b5: |
return GEN6_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_x1r5g5b5: |
case PICT_a1r5g5b5: |
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN6_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
case PICT_x4r4g4b4: |
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
#endif |
} |
#if 0 |
static bool gen6_check_dst_format(PictFormat format) |
{ |
if (gen6_get_dest_format(format) != -1) |
return true; |
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); |
return false; |
} |
static bool gen6_check_format(uint32_t format) |
{ |
if (gen6_get_card_format(format) != -1) |
return true; |
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); |
return false; |
} |
static uint32_t gen6_filter(uint32_t filter) |
{ |
switch (filter) { |
default: |
assert(0); |
case PictFilterNearest: |
return SAMPLER_FILTER_NEAREST; |
case PictFilterBilinear: |
return SAMPLER_FILTER_BILINEAR; |
} |
} |
static uint32_t gen6_check_filter(PicturePtr picture) |
{ |
switch (picture->filter) { |
case PictFilterNearest: |
case PictFilterBilinear: |
return true; |
default: |
return false; |
} |
} |
static uint32_t gen6_repeat(uint32_t repeat) |
{ |
switch (repeat) { |
default: |
assert(0); |
case RepeatNone: |
return SAMPLER_EXTEND_NONE; |
case RepeatNormal: |
return SAMPLER_EXTEND_REPEAT; |
case RepeatPad: |
return SAMPLER_EXTEND_PAD; |
case RepeatReflect: |
return SAMPLER_EXTEND_REFLECT; |
} |
} |
static bool gen6_check_repeat(PicturePtr picture) |
{ |
if (!picture->repeat) |
return true; |
switch (picture->repeatType) { |
case RepeatNone: |
case RepeatNormal: |
case RepeatPad: |
case RepeatReflect: |
return true; |
default: |
return false; |
} |
} |
#endif |
static int |
gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) |
{ |
int base; |
if (has_mask) { |
if (is_ca) { |
if (gen6_blend_op[op].src_alpha) |
base = GEN6_WM_KERNEL_MASKSA; |
else |
base = GEN6_WM_KERNEL_MASKCA; |
} else |
base = GEN6_WM_KERNEL_MASK; |
} else |
base = GEN6_WM_KERNEL_NOMASK; |
return base + !is_affine; |
} |
static void |
gen6_emit_urb(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); |
OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | |
(sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ |
OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | |
(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ |
} |
static void |
gen6_emit_state_base_address(struct sna *sna) |
{ |
OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); |
OUT_BATCH(0); /* general */ |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ |
sna->kgem.nbatch, |
NULL, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ |
sna->kgem.nbatch, |
sna->render_state.gen6.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
OUT_BATCH(0); /* indirect */ |
OUT_BATCH(kgem_add_reloc(&sna->kgem, |
sna->kgem.nbatch, |
sna->render_state.gen6.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
/* upper bounds, disable */ |
OUT_BATCH(0); |
OUT_BATCH(BASE_ADDRESS_MODIFY); |
OUT_BATCH(0); |
OUT_BATCH(BASE_ADDRESS_MODIFY); |
} |
static void |
gen6_emit_viewports(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | |
GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | |
(4 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_vs(struct sna *sna) |
{ |
/* disable VS constant buffer */ |
OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); |
OUT_BATCH(0); /* no VS kernel */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* pass-through */ |
} |
static void |
gen6_emit_gs(struct sna *sna) |
{ |
/* disable GS constant buffer */ |
OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); |
OUT_BATCH(0); /* no GS kernel */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* pass-through */ |
} |
static void |
gen6_emit_clip(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); /* pass-through */ |
OUT_BATCH(0); |
} |
static void |
gen6_emit_wm_constants(struct sna *sna) |
{ |
/* disable WM constant buffer */ |
OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_null_depth_buffer(struct sna *sna) |
{ |
OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); |
OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | |
GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_invariant(struct sna *sna) |
{ |
OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); |
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); |
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | |
GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); |
OUT_BATCH(1); |
gen6_emit_urb(sna); |
gen6_emit_state_base_address(sna); |
gen6_emit_viewports(sna); |
gen6_emit_vs(sna); |
gen6_emit_gs(sna); |
gen6_emit_clip(sna); |
gen6_emit_wm_constants(sna); |
gen6_emit_null_depth_buffer(sna); |
sna->render_state.gen6.needs_invariant = false; |
} |
static bool |
gen6_emit_cc(struct sna *sna, int blend) |
{ |
struct gen6_render_state *render = &sna->render_state.gen6; |
if (render->blend == blend) |
return blend != NO_BLEND; |
DBG(("%s: blend = %x\n", __FUNCTION__, blend)); |
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); |
OUT_BATCH((render->cc_blend + blend) | 1); |
if (render->blend == (unsigned)-1) { |
OUT_BATCH(1); |
OUT_BATCH(1); |
} else { |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
render->blend = blend; |
return blend != NO_BLEND; |
} |
static void |
gen6_emit_sampler(struct sna *sna, uint32_t state) |
{ |
if (sna->render_state.gen6.samplers == state) |
return; |
sna->render_state.gen6.samplers = state; |
DBG(("%s: sampler = %x\n", __FUNCTION__, state)); |
OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | |
GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | |
(4 - 2)); |
OUT_BATCH(0); /* VS */ |
OUT_BATCH(0); /* GS */ |
OUT_BATCH(sna->render_state.gen6.wm_state + state); |
} |
static void |
gen6_emit_sf(struct sna *sna, bool has_mask) |
{ |
int num_sf_outputs = has_mask ? 2 : 1; |
if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) |
return; |
DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", |
__FUNCTION__, num_sf_outputs, 1, 0)); |
sna->render_state.gen6.num_sf_outputs = num_sf_outputs; |
OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); |
OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); |
OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* DW9 */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* DW14 */ |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); |
OUT_BATCH(0); /* DW19 */ |
} |
static void |
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) |
{ |
const uint32_t *kernels; |
if (sna->render_state.gen6.kernel == kernel) |
return; |
sna->render_state.gen6.kernel = kernel; |
kernels = sna->render_state.gen6.wm_kernel[kernel]; |
DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", |
__FUNCTION__, |
wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, |
kernels[0], kernels[1], kernels[2])); |
OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); |
OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); |
OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | |
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); |
OUT_BATCH(0); /* scratch space */ |
OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | |
8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | |
6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); |
OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | |
(kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | |
(kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | |
(kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | |
GEN6_3DSTATE_WM_DISPATCH_ENABLE); |
OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); |
OUT_BATCH(kernels[2]); |
OUT_BATCH(kernels[1]); |
} |
static bool |
gen6_emit_binding_table(struct sna *sna, uint16_t offset) |
{ |
if (sna->render_state.gen6.surface_table == offset) |
return false; |
/* Binding table pointers */ |
OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | |
GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | |
(4 - 2)); |
OUT_BATCH(0); /* vs */ |
OUT_BATCH(0); /* gs */ |
/* Only the PS uses the binding table */ |
OUT_BATCH(offset*4); |
sna->render_state.gen6.surface_table = offset; |
return true; |
} |
static bool |
gen6_emit_drawing_rectangle(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); |
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; |
assert(!too_large(op->dst.x, op->dst.y)); |
assert(!too_large(op->dst.width, op->dst.height)); |
if (sna->render_state.gen6.drawrect_limit == limit && |
sna->render_state.gen6.drawrect_offset == offset) |
return false; |
/* [DevSNB-C+{W/A}] Before any depth stall flush (including those |
* produced by non-pipelined state commands), software needs to first |
* send a PIPE_CONTROL with no bits set except Post-Sync Operation != |
* 0. |
* |
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent |
* BEFORE the pipe-control with a post-sync op and no write-cache |
* flushes. |
*/ |
if (!sna->render_state.gen6.first_state_packet) { |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); |
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, |
sna->render_state.gen6.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16 | |
I915_GEM_DOMAIN_INSTRUCTION, |
64)); |
OUT_BATCH(0); |
OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); |
OUT_BATCH(0); |
OUT_BATCH(limit); |
OUT_BATCH(offset); |
sna->render_state.gen6.drawrect_offset = offset; |
sna->render_state.gen6.drawrect_limit = limit; |
return true; |
} |
static void |
gen6_emit_vertex_elements(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
/* |
* vertex data in vertex buffer |
* position: (x, y) |
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) |
* texture coordinate 1 if (has_mask is true): same as above |
*/ |
struct gen6_render_state *render = &sna->render_state.gen6; |
uint32_t src_format, dw; |
int id = GEN6_VERTEX(op->u.gen6.flags); |
bool has_mask; |
DBG(("%s: setup id=%d\n", __FUNCTION__, id)); |
if (render->ve_id == id) |
return; |
render->ve_id = id; |
/* The VUE layout |
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0) |
* dword 4-7: position (x, y, 1.0, 1.0), |
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) |
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) |
* |
* dword 4-15 are fetched from vertex buffer |
*/ |
has_mask = (id >> 2) != 0; |
OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | |
((2 * (3 + has_mask)) + 1 - 2)); |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | |
0 << VE0_OFFSET_SHIFT); |
OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); |
/* x,y */ |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | |
0 << VE0_OFFSET_SHIFT); |
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); |
/* u0, v0, w0 */ |
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); |
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; |
switch (id & 3) { |
default: |
assert(0); |
case 0: |
src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 1: |
src_format = GEN6_SURFACEFORMAT_R32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 2: |
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 3: |
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; |
break; |
} |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
src_format << VE0_FORMAT_SHIFT | |
4 << VE0_OFFSET_SHIFT); |
OUT_BATCH(dw); |
/* u1, v1, w1 */ |
if (has_mask) { |
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); |
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); |
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; |
switch (id >> 2) { |
case 1: |
src_format = GEN6_SURFACEFORMAT_R32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
default: |
assert(0); |
case 2: |
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; |
break; |
case 3: |
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; |
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; |
break; |
} |
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | |
src_format << VE0_FORMAT_SHIFT | |
offset << VE0_OFFSET_SHIFT); |
OUT_BATCH(dw); |
} |
} |
static void |
gen6_emit_flush(struct sna *sna) |
{ |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | |
GEN6_PIPE_CONTROL_TC_FLUSH | |
GEN6_PIPE_CONTROL_CS_STALL); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
static void |
gen6_emit_state(struct sna *sna, |
const struct sna_composite_op *op, |
uint16_t wm_binding_table) |
{ |
bool need_stall = wm_binding_table & 1; |
assert(op->dst.bo->exec); |
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) |
need_stall = false; |
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); |
gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); |
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); |
gen6_emit_vertex_elements(sna, op); |
need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); |
if (gen6_emit_drawing_rectangle(sna, op)) |
need_stall = false; |
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { |
gen6_emit_flush(sna); |
kgem_clear_dirty(&sna->kgem); |
assert(op->dst.bo->exec); |
kgem_bo_mark_dirty(op->dst.bo); |
need_stall = false; |
} |
if (need_stall) { |
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
sna->render_state.gen6.first_state_packet = false; |
} |
static bool gen6_magic_ca_pass(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
struct gen6_render_state *state = &sna->render_state.gen6; |
if (!op->need_magic_ca_pass) |
return false; |
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, |
sna->render.vertex_start, sna->render.vertex_index)); |
gen6_emit_flush(sna); |
gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); |
gen6_emit_wm(sna, |
gen6_choose_composite_kernel(PictOpAdd, |
true, true, |
op->is_affine), |
true); |
OUT_BATCH(GEN6_3DPRIMITIVE | |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | |
0 << 9 | |
4); |
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); |
OUT_BATCH(sna->render.vertex_start); |
OUT_BATCH(1); /* single instance */ |
OUT_BATCH(0); /* start instance location */ |
OUT_BATCH(0); /* index buffer offset, ignored */ |
state->last_primitive = sna->kgem.nbatch; |
return true; |
} |
typedef struct gen6_surface_state_padded { |
struct gen6_surface_state state; |
char pad[32 - sizeof(struct gen6_surface_state)]; |
} gen6_surface_state_padded; |
static void null_create(struct sna_static_stream *stream) |
{ |
/* A bunch of zeros useful for legacy border color and depth-stencil */ |
sna_static_stream_map(stream, 64, 64); |
} |
static void scratch_create(struct sna_static_stream *stream) |
{ |
/* 64 bytes of scratch space for random writes, such as |
* the pipe-control w/a. |
*/ |
sna_static_stream_map(stream, 64, 64); |
} |
static void |
sampler_state_init(struct gen6_sampler_state *sampler_state, |
sampler_filter_t filter, |
sampler_extend_t extend) |
{ |
sampler_state->ss0.lod_preclamp = 1; /* GL mode */ |
/* We use the legacy mode to get the semantics specified by |
* the Render extension. */ |
sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; |
switch (filter) { |
default: |
case SAMPLER_FILTER_NEAREST: |
sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; |
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; |
break; |
case SAMPLER_FILTER_BILINEAR: |
sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; |
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; |
break; |
} |
switch (extend) { |
default: |
case SAMPLER_EXTEND_NONE: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; |
break; |
case SAMPLER_EXTEND_REPEAT: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; |
break; |
case SAMPLER_EXTEND_PAD: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; |
break; |
case SAMPLER_EXTEND_REFLECT: |
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; |
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; |
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; |
break; |
} |
} |
static void |
sampler_copy_init(struct gen6_sampler_state *ss) |
{ |
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); |
ss->ss3.non_normalized_coord = 1; |
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); |
} |
static void |
sampler_fill_init(struct gen6_sampler_state *ss) |
{ |
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); |
ss->ss3.non_normalized_coord = 1; |
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); |
} |
static uint32_t |
gen6_tiling_bits(uint32_t tiling) |
{ |
switch (tiling) { |
default: assert(0); |
case I915_TILING_NONE: return 0; |
case I915_TILING_X: return GEN6_SURFACE_TILED; |
case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; |
} |
} |
/** |
* Sets up the common fields for a surface state buffer for the given |
* picture in the given surface state buffer. |
*/ |
static int |
gen6_bind_bo(struct sna *sna, |
struct kgem_bo *bo, |
uint32_t width, |
uint32_t height, |
uint32_t format, |
bool is_dst) |
{ |
uint32_t *ss; |
uint32_t domains; |
uint16_t offset; |
uint32_t is_scanout = is_dst && bo->scanout; |
/* After the first bind, we manage the cache domains within the batch */ |
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); |
if (offset) { |
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", |
offset, bo->handle, format, |
is_dst ? "render" : "sampler")); |
if (is_dst) |
kgem_bo_mark_dirty(bo); |
return offset * sizeof(uint32_t); |
} |
offset = sna->kgem.surface -= |
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
ss = sna->kgem.batch + offset; |
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | |
GEN6_SURFACE_BLEND_ENABLED | |
format << GEN6_SURFACE_FORMAT_SHIFT); |
if (is_dst) { |
ss[0] |= GEN6_SURFACE_RC_READ_WRITE; |
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; |
} else |
domains = I915_GEM_DOMAIN_SAMPLER << 16; |
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); |
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | |
(height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); |
assert(bo->pitch <= (1 << 18)); |
ss[3] = (gen6_tiling_bits(bo->tiling) | |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); |
ss[4] = 0; |
ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16; |
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); |
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", |
offset, bo->handle, ss[1], |
format, width, height, bo->pitch, bo->tiling, |
domains & 0xffff ? "render" : "sampler")); |
return offset * sizeof(uint32_t); |
} |
static void gen6_emit_vertex_buffer(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
int id = GEN6_VERTEX(op->u.gen6.flags); |
OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); |
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | |
4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); |
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; |
OUT_BATCH(0); |
OUT_BATCH(~0); /* max address: disabled */ |
OUT_BATCH(0); |
sna->render.vb_id |= 1 << id; |
} |
static void gen6_emit_primitive(struct sna *sna) |
{ |
if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { |
DBG(("%s: continuing previous primitive, start=%d, index=%d\n", |
__FUNCTION__, |
sna->render.vertex_start, |
sna->render.vertex_index)); |
sna->render.vertex_offset = sna->kgem.nbatch - 5; |
return; |
} |
OUT_BATCH(GEN6_3DPRIMITIVE | |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | |
0 << 9 | |
4); |
sna->render.vertex_offset = sna->kgem.nbatch; |
OUT_BATCH(0); /* vertex count, to be filled in later */ |
OUT_BATCH(sna->render.vertex_index); |
OUT_BATCH(1); /* single instance */ |
OUT_BATCH(0); /* start instance location */ |
OUT_BATCH(0); /* index buffer offset, ignored */ |
sna->render.vertex_start = sna->render.vertex_index; |
DBG(("%s: started new primitive: index=%d\n", |
__FUNCTION__, sna->render.vertex_start)); |
sna->render_state.gen6.last_primitive = sna->kgem.nbatch; |
} |
static bool gen6_rectangle_begin(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
int id = 1 << GEN6_VERTEX(op->u.gen6.flags); |
int ndwords; |
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) |
return true; |
ndwords = op->need_magic_ca_pass ? 60 : 6; |
if ((sna->render.vb_id & id) == 0) |
ndwords += 5; |
if (!kgem_check_batch(&sna->kgem, ndwords)) |
return false; |
if ((sna->render.vb_id & id) == 0) |
gen6_emit_vertex_buffer(sna, op); |
gen6_emit_primitive(sna); |
return true; |
} |
static int gen6_get_rectangles__flush(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
/* Preventing discarding new vbo after lock contention */ |
if (sna_vertex_wait__locked(&sna->render)) { |
int rem = vertex_space(sna); |
if (rem > op->floats_per_rect) |
return rem; |
} |
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) |
return 0; |
if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) |
return 0; |
if (sna->render.vertex_offset) { |
gen4_vertex_flush(sna); |
if (gen6_magic_ca_pass(sna, op)) { |
gen6_emit_flush(sna); |
gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); |
gen6_emit_wm(sna, |
GEN6_KERNEL(op->u.gen6.flags), |
GEN6_VERTEX(op->u.gen6.flags) >> 2); |
} |
} |
return gen4_vertex_finish(sna); |
} |
inline static int gen6_get_rectangles(struct sna *sna, |
const struct sna_composite_op *op, |
int want, |
void (*emit_state)(struct sna *, const struct sna_composite_op *op)) |
{ |
int rem; |
assert(want); |
start: |
rem = vertex_space(sna); |
if (unlikely(rem < op->floats_per_rect)) { |
DBG(("flushing vbo for %s: %d < %d\n", |
__FUNCTION__, rem, op->floats_per_rect)); |
rem = gen6_get_rectangles__flush(sna, op); |
if (unlikely(rem == 0)) |
goto flush; |
} |
if (unlikely(sna->render.vertex_offset == 0)) { |
if (!gen6_rectangle_begin(sna, op)) |
goto flush; |
else |
goto start; |
} |
assert(rem <= vertex_space(sna)); |
assert(op->floats_per_rect <= rem); |
if (want > 1 && want * op->floats_per_rect > rem) |
want = rem / op->floats_per_rect; |
assert(want > 0); |
sna->render.vertex_index += 3*want; |
return want; |
flush: |
if (sna->render.vertex_offset) { |
gen4_vertex_flush(sna); |
gen6_magic_ca_pass(sna, op); |
} |
sna_vertex_wait__locked(&sna->render); |
_kgem_submit(&sna->kgem); |
emit_state(sna, op); |
goto start; |
} |
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, |
uint16_t *offset) |
{ |
uint32_t *table; |
sna->kgem.surface -= |
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
/* Clear all surplus entries to zero in case of prefetch */ |
table = memset(sna->kgem.batch + sna->kgem.surface, |
0, sizeof(struct gen6_surface_state_padded)); |
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); |
*offset = sna->kgem.surface; |
return table; |
} |
static bool |
gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) |
{ |
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); |
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { |
DBG(("%s: flushing batch: %d < %d+%d\n", |
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, |
150, 4*8)); |
kgem_submit(&sna->kgem); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
if (sna->render_state.gen6.needs_invariant) |
gen6_emit_invariant(sna); |
return kgem_bo_is_dirty(op->dst.bo); |
} |
static void gen6_emit_composite_state(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
dirty = gen6_get_batch(sna, op); |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
binding_table[1] = |
gen6_bind_bo(sna, |
op->src.bo, op->src.width, op->src.height, |
op->src.card_format, |
false); |
if (op->mask.bo) { |
binding_table[2] = |
gen6_bind_bo(sna, |
op->mask.bo, |
op->mask.width, |
op->mask.height, |
op->mask.card_format, |
false); |
} |
if (sna->kgem.surface == offset && |
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && |
(op->mask.bo == NULL || |
sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { |
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
offset = sna->render_state.gen6.surface_table; |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static void |
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) |
{ |
assert (sna->render.vertex_offset == 0); |
if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { |
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) |
gen4_vertex_finish(sna); |
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", |
sna->render_state.gen6.floats_per_vertex, |
op->floats_per_vertex, |
sna->render.vertex_index, |
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); |
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; |
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; |
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; |
} |
assert((sna->render.vertex_used % op->floats_per_vertex) == 0); |
} |
fastcall static void |
gen6_render_composite_blt(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r) |
{ |
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); |
op->prim_emit(sna, op, r); |
} |
#if 0 |
fastcall static void |
gen6_render_composite_box(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box) |
{ |
struct sna_composite_rectangles r; |
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.src = r.mask = r.dst; |
op->prim_emit(sna, op, &r); |
} |
static void |
gen6_render_composite_boxes__blt(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("composite_boxes(%d)\n", nbox)); |
do { |
int nbox_this_time; |
nbox_this_time = gen6_get_rectangles(sna, op, nbox, |
gen6_emit_composite_state); |
nbox -= nbox_this_time; |
do { |
struct sna_composite_rectangles r; |
DBG((" %s: (%d, %d), (%d, %d)\n", |
__FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
r.dst.x = box->x1; |
r.dst.y = box->y1; |
r.width = box->x2 - box->x1; |
r.height = box->y2 - box->y1; |
r.src = r.mask = r.dst; |
op->prim_emit(sna, op, &r); |
box++; |
} while (--nbox_this_time); |
} while (nbox); |
} |
static void |
gen6_render_composite_boxes(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen6_get_rectangles(sna, op, nbox, |
gen6_emit_composite_state); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
} while (nbox); |
} |
static void |
gen6_render_composite_boxes__thread(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box, int nbox) |
{ |
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); |
sna_vertex_lock(&sna->render); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen6_get_rectangles(sna, op, nbox, |
gen6_emit_composite_state); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->floats_per_rect; |
sna_vertex_acquire__locked(&sna->render); |
sna_vertex_unlock(&sna->render); |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
sna_vertex_lock(&sna->render); |
sna_vertex_release__locked(&sna->render); |
} while (nbox); |
sna_vertex_unlock(&sna->render); |
} |
#endif |
#ifndef MAX |
#define MAX(a,b) ((a) > (b) ? (a) : (b)) |
#endif |
static uint32_t |
gen6_composite_create_blend_state(struct sna_static_stream *stream) |
{ |
char *base, *ptr; |
int src, dst; |
base = sna_static_stream_map(stream, |
GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, |
64); |
ptr = base; |
for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { |
for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { |
struct gen6_blend_state *blend = |
(struct gen6_blend_state *)ptr; |
blend->blend0.dest_blend_factor = dst; |
blend->blend0.source_blend_factor = src; |
blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; |
blend->blend0.blend_enable = |
!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); |
blend->blend1.post_blend_clamp_enable = 1; |
blend->blend1.pre_blend_clamp_enable = 1; |
ptr += GEN6_BLEND_STATE_PADDED_SIZE; |
} |
} |
return sna_static_stream_offsetof(stream, base); |
} |
#if 0 |
static uint32_t gen6_bind_video_source(struct sna *sna, |
struct kgem_bo *src_bo, |
uint32_t src_offset, |
int src_width, |
int src_height, |
int src_pitch, |
uint32_t src_surf_format) |
{ |
struct gen6_surface_state *ss; |
sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); |
ss->ss0.surface_type = GEN6_SURFACE_2D; |
ss->ss0.surface_format = src_surf_format; |
ss->ss1.base_addr = |
kgem_add_reloc(&sna->kgem, |
sna->kgem.surface + 1, |
src_bo, |
I915_GEM_DOMAIN_SAMPLER << 16, |
src_offset); |
ss->ss2.width = src_width - 1; |
ss->ss2.height = src_height - 1; |
ss->ss3.pitch = src_pitch - 1; |
return sna->kgem.surface * sizeof(uint32_t); |
} |
static void gen6_emit_video_state(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
struct sna_video_frame *frame = op->priv; |
uint32_t src_surf_format; |
uint32_t src_surf_base[6]; |
int src_width[6]; |
int src_height[6]; |
int src_pitch[6]; |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
int n_src, n; |
dirty = gen6_get_batch(sna, op); |
src_surf_base[0] = 0; |
src_surf_base[1] = 0; |
src_surf_base[2] = frame->VBufOffset; |
src_surf_base[3] = frame->VBufOffset; |
src_surf_base[4] = frame->UBufOffset; |
src_surf_base[5] = frame->UBufOffset; |
if (is_planar_fourcc(frame->id)) { |
src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; |
src_width[1] = src_width[0] = frame->width; |
src_height[1] = src_height[0] = frame->height; |
src_pitch[1] = src_pitch[0] = frame->pitch[1]; |
src_width[4] = src_width[5] = src_width[2] = src_width[3] = |
frame->width / 2; |
src_height[4] = src_height[5] = src_height[2] = src_height[3] = |
frame->height / 2; |
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = |
frame->pitch[0]; |
n_src = 6; |
} else { |
if (frame->id == FOURCC_UYVY) |
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; |
else |
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; |
src_width[0] = frame->width; |
src_height[0] = frame->height; |
src_pitch[0] = frame->pitch[0]; |
n_src = 1; |
} |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
for (n = 0; n < n_src; n++) { |
binding_table[1+n] = |
gen6_bind_video_source(sna, |
frame->bo, |
src_surf_base[n], |
src_width[n], |
src_height[n], |
src_pitch[n], |
src_surf_format); |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static bool |
gen6_render_video(struct sna *sna, |
struct sna_video *video, |
struct sna_video_frame *frame, |
RegionPtr dstRegion, |
PixmapPtr pixmap) |
{ |
struct sna_composite_op tmp; |
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; |
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; |
int src_width = frame->src.x2 - frame->src.x1; |
int src_height = frame->src.y2 - frame->src.y1; |
float src_offset_x, src_offset_y; |
float src_scale_x, src_scale_y; |
int nbox, pix_xoff, pix_yoff; |
struct sna_pixmap *priv; |
unsigned filter; |
BoxPtr box; |
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n", |
__FUNCTION__, |
src_width, src_height, dst_width, dst_height, |
(long)REGION_NUM_RECTS(dstRegion), |
REGION_EXTENTS(NULL, dstRegion)->x1, |
REGION_EXTENTS(NULL, dstRegion)->y1, |
REGION_EXTENTS(NULL, dstRegion)->x2, |
REGION_EXTENTS(NULL, dstRegion)->y2)); |
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); |
if (priv == NULL) |
return false; |
memset(&tmp, 0, sizeof(tmp)); |
tmp.dst.pixmap = pixmap; |
tmp.dst.width = pixmap->drawable.width; |
tmp.dst.height = pixmap->drawable.height; |
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); |
tmp.dst.bo = priv->gpu_bo; |
tmp.src.bo = frame->bo; |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 3; |
tmp.floats_per_rect = 9; |
if (src_width == dst_width && src_height == dst_height) |
filter = SAMPLER_FILTER_NEAREST; |
else |
filter = SAMPLER_FILTER_BILINEAR; |
tmp.u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, |
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), |
NO_BLEND, |
is_planar_fourcc(frame->id) ? |
GEN6_WM_KERNEL_VIDEO_PLANAR : |
GEN6_WM_KERNEL_VIDEO_PACKED, |
2); |
tmp.priv = frame; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_video_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
/* Set up the offset for translating from the given region (in screen |
* coordinates) to the backing pixmap. |
*/ |
#ifdef COMPOSITE |
pix_xoff = -pixmap->screen_x + pixmap->drawable.x; |
pix_yoff = -pixmap->screen_y + pixmap->drawable.y; |
#else |
pix_xoff = 0; |
pix_yoff = 0; |
#endif |
src_scale_x = (float)src_width / dst_width / frame->width; |
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; |
src_scale_y = (float)src_height / dst_height / frame->height; |
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; |
box = REGION_RECTS(dstRegion); |
nbox = REGION_NUM_RECTS(dstRegion); |
while (nbox--) { |
BoxRec r; |
r.x1 = box->x1 + pix_xoff; |
r.x2 = box->x2 + pix_xoff; |
r.y1 = box->y1 + pix_yoff; |
r.y2 = box->y2 + pix_yoff; |
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); |
OUT_VERTEX(r.x2, r.y2); |
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y2); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y1); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); |
if (!DAMAGE_IS_ALL(priv->gpu_damage)) { |
sna_damage_add_box(&priv->gpu_damage, &r); |
sna_damage_subtract_box(&priv->cpu_damage, &r); |
} |
box++; |
} |
gen4_vertex_flush(sna); |
return true; |
} |
static int |
gen6_composite_picture(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int x, int y, |
int w, int h, |
int dst_x, int dst_y, |
bool precise) |
{ |
PixmapPtr pixmap; |
uint32_t color; |
int16_t dx, dy; |
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", |
__FUNCTION__, x, y, w, h, dst_x, dst_y)); |
channel->is_solid = false; |
channel->card_format = -1; |
if (sna_picture_is_solid(picture, &color)) |
return gen4_channel_init_solid(sna, channel, color); |
if (picture->pDrawable == NULL) { |
int ret; |
if (picture->pSourcePict->type == SourcePictTypeLinear) |
return gen4_channel_init_linear(sna, picture, channel, |
x, y, |
w, h, |
dst_x, dst_y); |
DBG(("%s -- fixup, gradient\n", __FUNCTION__)); |
ret = -1; |
if (!precise) |
ret = sna_render_picture_approximate_gradient(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
if (ret == -1) |
ret = sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
return ret; |
} |
if (picture->alphaMap) { |
DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
if (!gen6_check_repeat(picture)) |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
if (!gen6_check_filter(picture)) |
return sna_render_picture_fixup(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; |
channel->filter = picture->filter; |
pixmap = get_drawable_pixmap(picture->pDrawable); |
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); |
x += dx + picture->pDrawable->x; |
y += dy + picture->pDrawable->y; |
channel->is_affine = sna_transform_is_affine(picture->transform); |
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { |
DBG(("%s: integer translation (%d, %d), removing\n", |
__FUNCTION__, dx, dy)); |
x += dx; |
y += dy; |
channel->transform = NULL; |
channel->filter = PictFilterNearest; |
} else |
channel->transform = picture->transform; |
channel->pict_format = picture->format; |
channel->card_format = gen6_get_card_format(picture->format); |
if (channel->card_format == (unsigned)-1) |
return sna_render_picture_convert(sna, picture, channel, pixmap, |
x, y, w, h, dst_x, dst_y, |
false); |
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { |
DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, |
pixmap->drawable.width, pixmap->drawable.height)); |
return sna_render_picture_extract(sna, picture, channel, |
x, y, w, h, dst_x, dst_y); |
} |
return sna_render_pixmap_bo(sna, channel, pixmap, |
x, y, w, h, dst_x, dst_y); |
} |
inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) |
{ |
channel->repeat = gen6_repeat(channel->repeat); |
channel->filter = gen6_filter(channel->filter); |
if (channel->card_format == (unsigned)-1) |
channel->card_format = gen6_get_card_format(channel->pict_format); |
assert(channel->card_format != (unsigned)-1); |
} |
#endif |
static void gen6_render_composite_done(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
DBG(("%s\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) { |
gen4_vertex_flush(sna); |
gen6_magic_ca_pass(sna, op); |
} |
} |
#if 0 |
static bool |
gen6_composite_set_target(struct sna *sna, |
struct sna_composite_op *op, |
PicturePtr dst, |
int x, int y, int w, int h, |
bool partial) |
{ |
BoxRec box; |
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); |
op->dst.format = dst->format; |
op->dst.width = op->dst.pixmap->drawable.width; |
op->dst.height = op->dst.pixmap->drawable.height; |
if (w && h) { |
box.x1 = x; |
box.y1 = y; |
box.x2 = x + w; |
box.y2 = y + h; |
} else |
sna_render_picture_extents(dst, &box); |
// op->dst.bo = sna_drawable_use_bo (dst->pDrawable, |
// PREFER_GPU | FORCE_GPU | RENDER_GPU, |
// &box, &op->damage); |
if (op->dst.bo == NULL) |
return false; |
get_drawable_deltas(dst->pDrawable, op->dst.pixmap, |
&op->dst.x, &op->dst.y); |
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", |
__FUNCTION__, |
op->dst.pixmap, (int)op->dst.format, |
op->dst.width, op->dst.height, |
op->dst.bo->pitch, |
op->dst.x, op->dst.y, |
op->damage ? *op->damage : (void *)-1)); |
assert(op->dst.bo->proxy == NULL); |
if (too_large(op->dst.width, op->dst.height) && |
!sna_render_composite_redirect(sna, op, x, y, w, h)) |
return false; |
return true; |
} |
static bool |
prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) |
{ |
if (untiled_tlb_miss(tmp->dst.bo) || |
untiled_tlb_miss(tmp->src.bo)) |
return true; |
if (kgem_bo_is_render(tmp->dst.bo) || |
kgem_bo_is_render(tmp->src.bo)) |
return false; |
if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) |
return false; |
return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo); |
} |
static bool |
gen6_render_composite(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t msk_x, int16_t msk_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp) |
{ |
if (op >= ARRAY_SIZE(gen6_blend_op)) |
return false; |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.ring)); |
if (mask == NULL && |
try_blt(sna, dst, src, width, height) && |
sna_blt_composite(sna, op, |
src, dst, |
src_x, src_y, |
dst_x, dst_y, |
width, height, |
tmp, false)) |
return true; |
if (gen6_composite_fallback(sna, src, mask, dst)) |
return false; |
if (need_tiling(sna, width, height)) |
return sna_tiling_composite(op, src, mask, dst, |
src_x, src_y, |
msk_x, msk_y, |
dst_x, dst_y, |
width, height, |
tmp); |
if (op == PictOpClear) |
op = PictOpSrc; |
tmp->op = op; |
if (!gen6_composite_set_target(sna, tmp, dst, |
dst_x, dst_y, width, height, |
op > PictOpSrc || dst->pCompositeClip->data)) |
return false; |
switch (gen6_composite_picture(sna, src, &tmp->src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_dst; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->src, 0)) |
goto cleanup_dst; |
/* fall through to fixup */ |
case 1: |
/* Did we just switch rings to prepare the source? */ |
if (mask == NULL && |
prefer_blt_composite(sna, tmp) && |
sna_blt_composite__convert(sna, |
dst_x, dst_y, width, height, |
tmp)) |
return true; |
gen6_composite_channel_convert(&tmp->src); |
break; |
} |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.bo = NULL; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
if (mask) { |
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { |
tmp->has_component_alpha = true; |
/* Check if it's component alpha that relies on a source alpha and on |
* the source value. We can only get one of those into the single |
* source value that we get to blend with. |
*/ |
if (gen6_blend_op[op].src_alpha && |
(gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { |
if (op != PictOpOver) |
goto cleanup_src; |
tmp->need_magic_ca_pass = true; |
tmp->op = PictOpOutReverse; |
} |
} |
if (!reuse_source(sna, |
src, &tmp->src, src_x, src_y, |
mask, &tmp->mask, msk_x, msk_y)) { |
switch (gen6_composite_picture(sna, mask, &tmp->mask, |
msk_x, msk_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_src; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) |
goto cleanup_src; |
/* fall through to fixup */ |
case 1: |
gen6_composite_channel_convert(&tmp->mask); |
break; |
} |
} |
tmp->is_affine &= tmp->mask.is_affine; |
} |
tmp->u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, |
tmp->src.repeat, |
tmp->mask.filter, |
tmp->mask.repeat), |
gen6_get_blend(tmp->op, |
tmp->has_component_alpha, |
tmp->dst.format), |
gen6_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine), |
gen4_choose_composite_emitter(sna, tmp)); |
tmp->blt = gen6_render_composite_blt; |
tmp->box = gen6_render_composite_box; |
tmp->boxes = gen6_render_composite_boxes__blt; |
if (tmp->emit_boxes) { |
tmp->boxes = gen6_render_composite_boxes; |
tmp->thread_boxes = gen6_render_composite_boxes__thread; |
} |
tmp->done = gen6_render_composite_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) |
goto cleanup_mask; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_composite_state(sna, tmp); |
gen6_align_vertex(sna, tmp); |
return true; |
cleanup_mask: |
if (tmp->mask.bo) |
kgem_bo_destroy(&sna->kgem, tmp->mask.bo); |
cleanup_src: |
if (tmp->src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->src.bo); |
cleanup_dst: |
if (tmp->redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->dst.bo); |
return false; |
} |
#if !NO_COMPOSITE_SPANS |
fastcall static void |
gen6_render_composite_spans_box(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const BoxRec *box, float opacity) |
{ |
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", |
__FUNCTION__, |
op->base.src.offset[0], op->base.src.offset[1], |
opacity, |
op->base.dst.x, op->base.dst.y, |
box->x1, box->y1, |
box->x2 - box->x1, |
box->y2 - box->y1)); |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); |
op->prim_emit(sna, op, box, opacity); |
} |
static void |
gen6_render_composite_spans_boxes(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const BoxRec *box, int nbox, |
float opacity) |
{ |
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", |
__FUNCTION__, nbox, |
op->base.src.offset[0], op->base.src.offset[1], |
opacity, |
op->base.dst.x, op->base.dst.y)); |
do { |
int nbox_this_time; |
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, |
gen6_emit_composite_state); |
nbox -= nbox_this_time; |
do { |
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, |
box->x1, box->y1, |
box->x2 - box->x1, |
box->y2 - box->y1)); |
op->prim_emit(sna, op, box++, opacity); |
} while (--nbox_this_time); |
} while (nbox); |
} |
fastcall static void |
gen6_render_composite_spans_boxes__thread(struct sna *sna, |
const struct sna_composite_spans_op *op, |
const struct sna_opacity_box *box, |
int nbox) |
{ |
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", |
__FUNCTION__, nbox, |
op->base.src.offset[0], op->base.src.offset[1], |
op->base.dst.x, op->base.dst.y)); |
sna_vertex_lock(&sna->render); |
do { |
int nbox_this_time; |
float *v; |
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, |
gen6_emit_composite_state); |
assert(nbox_this_time); |
nbox -= nbox_this_time; |
v = sna->render.vertices + sna->render.vertex_used; |
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; |
sna_vertex_acquire__locked(&sna->render); |
sna_vertex_unlock(&sna->render); |
op->emit_boxes(op, box, nbox_this_time, v); |
box += nbox_this_time; |
sna_vertex_lock(&sna->render); |
sna_vertex_release__locked(&sna->render); |
} while (nbox); |
sna_vertex_unlock(&sna->render); |
} |
fastcall static void |
gen6_render_composite_spans_done(struct sna *sna, |
const struct sna_composite_spans_op *op) |
{ |
DBG(("%s()\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) |
gen4_vertex_flush(sna); |
if (op->base.src.bo) |
kgem_bo_destroy(&sna->kgem, op->base.src.bo); |
sna_render_composite_redirect_done(sna, &op->base); |
} |
static bool |
gen6_check_composite_spans(struct sna *sna, |
uint8_t op, PicturePtr src, PicturePtr dst, |
int16_t width, int16_t height, |
unsigned flags) |
{ |
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", |
__FUNCTION__, op, width, height, flags)); |
if (op >= ARRAY_SIZE(gen6_blend_op)) |
return false; |
if (gen6_composite_fallback(sna, src, NULL, dst)) { |
DBG(("%s: operation would fallback\n", __FUNCTION__)); |
return false; |
} |
if (need_tiling(sna, width, height) && |
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { |
DBG(("%s: fallback, tiled operation not on GPU\n", |
__FUNCTION__)); |
return false; |
} |
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { |
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); |
assert(priv); |
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) |
return true; |
if (flags & COMPOSITE_SPANS_INPLACE_HINT) |
return false; |
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); |
} |
return true; |
} |
static bool |
gen6_render_composite_spans(struct sna *sna, |
uint8_t op, |
PicturePtr src, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
unsigned flags, |
struct sna_composite_spans_op *tmp) |
{ |
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, |
width, height, flags, sna->kgem.ring)); |
assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); |
if (need_tiling(sna, width, height)) { |
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", |
__FUNCTION__, width, height)); |
return sna_tiling_composite_spans(op, src, dst, |
src_x, src_y, dst_x, dst_y, |
width, height, flags, tmp); |
} |
tmp->base.op = op; |
if (!gen6_composite_set_target(sna, &tmp->base, dst, |
dst_x, dst_y, width, height, true)) |
return false; |
switch (gen6_composite_picture(sna, src, &tmp->base.src, |
src_x, src_y, |
width, height, |
dst_x, dst_y, |
dst->polyMode == PolyModePrecise)) { |
case -1: |
goto cleanup_dst; |
case 0: |
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) |
goto cleanup_dst; |
/* fall through to fixup */ |
case 1: |
gen6_composite_channel_convert(&tmp->base.src); |
break; |
} |
tmp->base.mask.bo = NULL; |
tmp->base.is_affine = tmp->base.src.is_affine; |
tmp->base.need_magic_ca_pass = false; |
tmp->base.u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, |
tmp->base.src.repeat, |
SAMPLER_FILTER_NEAREST, |
SAMPLER_EXTEND_PAD), |
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), |
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, |
gen4_choose_spans_emitter(sna, tmp)); |
tmp->box = gen6_render_composite_spans_box; |
tmp->boxes = gen6_render_composite_spans_boxes; |
if (tmp->emit_boxes) |
tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; |
tmp->done = gen6_render_composite_spans_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->base.dst.bo, tmp->base.src.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, |
tmp->base.dst.bo, tmp->base.src.bo, |
NULL)) |
goto cleanup_src; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_composite_state(sna, &tmp->base); |
gen6_align_vertex(sna, &tmp->base); |
return true; |
cleanup_src: |
if (tmp->base.src.bo) |
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); |
cleanup_dst: |
if (tmp->base.redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); |
return false; |
} |
#endif |
static void |
gen6_emit_copy_state(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
dirty = gen6_get_batch(sna, op); |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
binding_table[1] = |
gen6_bind_bo(sna, |
op->src.bo, op->src.width, op->src.height, |
op->src.card_format, |
false); |
if (sna->kgem.surface == offset && |
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { |
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); |
offset = sna->render_state.gen6.surface_table; |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static inline bool prefer_blt_copy(struct sna *sna, |
struct kgem_bo *src_bo, |
struct kgem_bo *dst_bo, |
unsigned flags) |
{ |
if (flags & COPY_SYNC) |
return false; |
if (PREFER_RENDER) |
return PREFER_RENDER > 0; |
if (sna->kgem.ring == KGEM_BLT) |
return true; |
if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) |
return true; |
if (untiled_tlb_miss(src_bo) || |
untiled_tlb_miss(dst_bo)) |
return true; |
if (kgem_bo_is_render(dst_bo) || |
kgem_bo_is_render(src_bo)) |
return false; |
if (!prefer_blt_ring(sna, dst_bo, flags)) |
return false; |
return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); |
} |
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) |
{ |
*extents = box[0]; |
while (--n) { |
box++; |
if (box->x1 < extents->x1) |
extents->x1 = box->x1; |
if (box->x2 > extents->x2) |
extents->x2 = box->x2; |
if (box->y1 < extents->y1) |
extents->y1 = box->y1; |
if (box->y2 > extents->y2) |
extents->y2 = box->y2; |
} |
} |
static inline bool |
overlaps(struct sna *sna, |
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n, BoxRec *extents) |
{ |
if (src_bo != dst_bo) |
return false; |
boxes_extents(box, n, extents); |
return (extents->x2 + src_dx > extents->x1 + dst_dx && |
extents->x1 + src_dx < extents->x2 + dst_dx && |
extents->y2 + src_dy > extents->y1 + dst_dy && |
extents->y1 + src_dy < extents->y2 + dst_dy); |
} |
static bool |
gen6_render_copy_boxes(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n, unsigned flags) |
{ |
struct sna_composite_op tmp; |
BoxRec extents; |
DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", |
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, |
src_bo == dst_bo, |
overlaps(sna, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
box, n, &extents))); |
if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && |
sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
if (!(alu == GXcopy || alu == GXclear)) { |
fallback_blt: |
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) |
return false; |
return sna_blt_copy_boxes_fallback(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n); |
} |
if (overlaps(sna, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
box, n, &extents)) { |
if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) |
goto fallback_blt; |
if (can_switch_to_blt(sna, dst_bo, flags) && |
sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
return sna_render_copy_boxes__overlap(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n, &extents); |
} |
if (dst->drawable.depth == src->drawable.depth) { |
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); |
tmp.src.pict_format = tmp.dst.format; |
} else { |
tmp.dst.format = sna_format_for_depth(dst->drawable.depth); |
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); |
} |
if (!gen6_check_format(tmp.src.pict_format)) |
goto fallback_blt; |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.bo = dst_bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.damage = NULL; |
sna_render_composite_redirect_init(&tmp); |
if (too_large(tmp.dst.width, tmp.dst.height)) { |
int i; |
extents = box[0]; |
for (i = 1; i < n; i++) { |
if (box[i].x1 < extents.x1) |
extents.x1 = box[i].x1; |
if (box[i].y1 < extents.y1) |
extents.y1 = box[i].y1; |
if (box[i].x2 > extents.x2) |
extents.x2 = box[i].x2; |
if (box[i].y2 > extents.y2) |
extents.y2 = box[i].y2; |
} |
if (!sna_render_composite_redirect(sna, &tmp, |
extents.x1 + dst_dx, |
extents.y1 + dst_dy, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1, |
n > 1)) |
goto fallback_tiled; |
dst_dx += tmp.dst.x; |
dst_dy += tmp.dst.y; |
tmp.dst.x = tmp.dst.y = 0; |
} |
tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); |
if (too_large(src->drawable.width, src->drawable.height)) { |
int i; |
extents = box[0]; |
for (i = 1; i < n; i++) { |
if (box[i].x1 < extents.x1) |
extents.x1 = box[i].x1; |
if (box[i].y1 < extents.y1) |
extents.y1 = box[i].y1; |
if (box[i].x2 > extents.x2) |
extents.x2 = box[i].x2; |
if (box[i].y2 > extents.y2) |
extents.y2 = box[i].y2; |
} |
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, |
extents.x1 + src_dx, |
extents.y1 + src_dy, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1)) { |
DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); |
goto fallback_tiled_dst; |
} |
src_dx += tmp.src.offset[0]; |
src_dy += tmp.src.offset[1]; |
} else { |
tmp.src.bo = src_bo; |
tmp.src.width = src->drawable.width; |
tmp.src.height = src->drawable.height; |
} |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = 0; |
tmp.u.gen6.flags = COPY_FLAGS(alu); |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { |
DBG(("%s: too large for a single operation\n", |
__FUNCTION__)); |
goto fallback_tiled_src; |
} |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_copy_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
do { |
int16_t *v; |
int n_this_time; |
n_this_time = gen6_get_rectangles(sna, &tmp, n, |
gen6_emit_copy_state); |
n -= n_this_time; |
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); |
sna->render.vertex_used += 6 * n_this_time; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
do { |
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", |
box->x1 + src_dx, box->y1 + src_dy, |
box->x1 + dst_dx, box->y1 + dst_dy, |
box->x2 - box->x1, box->y2 - box->y1)); |
v[0] = box->x2 + dst_dx; |
v[2] = box->x2 + src_dx; |
v[1] = v[5] = box->y2 + dst_dy; |
v[3] = v[7] = box->y2 + src_dy; |
v[8] = v[4] = box->x1 + dst_dx; |
v[10] = v[6] = box->x1 + src_dx; |
v[9] = box->y1 + dst_dy; |
v[11] = box->y1 + src_dy; |
v += 12; box++; |
} while (--n_this_time); |
} while (n); |
gen4_vertex_flush(sna); |
sna_render_composite_redirect_done(sna, &tmp); |
if (tmp.src.bo != src_bo) |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return true; |
fallback_tiled_src: |
if (tmp.src.bo != src_bo) |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
fallback_tiled_dst: |
if (tmp.redirect.real_bo) |
kgem_bo_destroy(&sna->kgem, tmp.dst.bo); |
fallback_tiled: |
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy_boxes(sna, alu, |
src_bo, src_dx, src_dy, |
dst_bo, dst_dx, dst_dy, |
dst->drawable.bitsPerPixel, |
box, n)) |
return true; |
return sna_tiling_copy_boxes(sna, alu, |
src, src_bo, src_dx, src_dy, |
dst, dst_bo, dst_dx, dst_dy, |
box, n); |
} |
static void |
gen6_render_copy_blt(struct sna *sna, |
const struct sna_copy_op *op, |
int16_t sx, int16_t sy, |
int16_t w, int16_t h, |
int16_t dx, int16_t dy) |
{ |
int16_t *v; |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = dx+w; v[1] = dy+h; |
v[2] = sx+w; v[3] = sy+h; |
v[4] = dx; v[5] = dy+h; |
v[6] = sx; v[7] = sy+h; |
v[8] = dx; v[9] = dy; |
v[10] = sx; v[11] = sy; |
} |
static void |
gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) |
{ |
DBG(("%s()\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) |
gen4_vertex_flush(sna); |
} |
static bool |
gen6_render_copy(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
struct sna_copy_op *op) |
{ |
DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", |
__FUNCTION__, alu, |
src->drawable.width, src->drawable.height, |
dst->drawable.width, dst->drawable.height)); |
if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && |
sna_blt_compare_depth(&src->drawable, &dst->drawable) && |
sna_blt_copy(sna, alu, |
src_bo, dst_bo, |
dst->drawable.bitsPerPixel, |
op)) |
return true; |
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || |
too_large(src->drawable.width, src->drawable.height) || |
too_large(dst->drawable.width, dst->drawable.height)) { |
fallback: |
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) |
return false; |
return sna_blt_copy(sna, alu, src_bo, dst_bo, |
dst->drawable.bitsPerPixel, |
op); |
} |
if (dst->drawable.depth == src->drawable.depth) { |
op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); |
op->base.src.pict_format = op->base.dst.format; |
} else { |
op->base.dst.format = sna_format_for_depth(dst->drawable.depth); |
op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); |
} |
if (!gen6_check_format(op->base.src.pict_format)) |
goto fallback; |
op->base.dst.pixmap = dst; |
op->base.dst.width = dst->drawable.width; |
op->base.dst.height = dst->drawable.height; |
op->base.dst.bo = dst_bo; |
op->base.src.bo = src_bo; |
op->base.src.card_format = |
gen6_get_card_format(op->base.src.pict_format); |
op->base.src.width = src->drawable.width; |
op->base.src.height = src->drawable.height; |
op->base.mask.bo = NULL; |
op->base.floats_per_vertex = 2; |
op->base.floats_per_rect = 6; |
op->base.u.gen6.flags = COPY_FLAGS(alu); |
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); |
assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); |
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) |
goto fallback; |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_copy_state(sna, &op->base); |
gen6_align_vertex(sna, &op->base); |
op->blt = gen6_render_copy_blt; |
op->done = gen6_render_copy_done; |
return true; |
} |
#endif |
#if 0 |
static void |
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) |
{ |
uint32_t *binding_table; |
uint16_t offset; |
bool dirty; |
dirty = gen6_get_batch(sna, op); |
binding_table = gen6_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen6_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen6_get_dest_format(op->dst.format), |
true); |
binding_table[1] = |
gen6_bind_bo(sna, |
op->src.bo, 1, 1, |
GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, |
false); |
if (sna->kgem.surface == offset && |
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { |
sna->kgem.surface += |
sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); |
offset = sna->render_state.gen6.surface_table; |
} |
gen6_emit_state(sna, op, offset | dirty); |
} |
static inline bool prefer_blt_fill(struct sna *sna, |
struct kgem_bo *bo) |
{ |
if (PREFER_RENDER) |
return PREFER_RENDER < 0; |
if (kgem_bo_is_render(bo)) |
return false; |
if (untiled_tlb_miss(bo)) |
return true; |
if (!prefer_blt_ring(sna, bo, 0)) |
return false; |
return prefer_blt_bo(sna, bo); |
} |
static bool |
gen6_render_fill_boxes(struct sna *sna, |
CARD8 op, |
PictFormat format, |
const xRenderColor *color, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
const BoxRec *box, int n) |
{ |
struct sna_composite_op tmp; |
uint32_t pixel; |
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", |
__FUNCTION__, op, |
color->red, color->green, color->blue, color->alpha, (int)format)); |
if (op >= ARRAY_SIZE(gen6_blend_op)) { |
DBG(("%s: fallback due to unhandled blend op: %d\n", |
__FUNCTION__, op)); |
return false; |
} |
if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) { |
uint8_t alu = GXinvalid; |
if (op <= PictOpSrc) { |
pixel = 0; |
if (op == PictOpClear) |
alu = GXclear; |
else if (sna_get_pixel_from_rgba(&pixel, |
color->red, |
color->green, |
color->blue, |
color->alpha, |
format)) |
alu = GXcopy; |
} |
if (alu != GXinvalid && |
sna_blt_fill_boxes(sna, alu, |
dst_bo, dst->drawable.bitsPerPixel, |
pixel, box, n)) |
return true; |
if (!gen6_check_dst_format(format)) |
return false; |
} |
if (op == PictOpClear) { |
pixel = 0; |
op = PictOpSrc; |
} else if (!sna_get_pixel_from_rgba(&pixel, |
color->red, |
color->green, |
color->blue, |
color->alpha, |
PICT_a8r8g8b8)) |
return false; |
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", |
__FUNCTION__, pixel, n, |
box[0].x1, box[0].y1, box[0].x2, box[0].y2)); |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.format = format; |
tmp.dst.bo = dst_bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.damage = NULL; |
sna_render_composite_redirect_init(&tmp); |
if (too_large(dst->drawable.width, dst->drawable.height)) { |
BoxRec extents; |
boxes_extents(box, n, &extents); |
if (!sna_render_composite_redirect(sna, &tmp, |
extents.x1, extents.y1, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1, |
n > 1)) |
return sna_tiling_fill_boxes(sna, op, format, color, |
dst, dst_bo, box, n); |
} |
tmp.src.bo = sna_render_get_solid(sna, pixel); |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = false; |
tmp.u.gen6.flags = FILL_FLAGS(op, format); |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); |
} |
gen6_emit_fill_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
do { |
int n_this_time; |
int16_t *v; |
n_this_time = gen6_get_rectangles(sna, &tmp, n, |
gen6_emit_fill_state); |
n -= n_this_time; |
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); |
sna->render.vertex_used += 6 * n_this_time; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
do { |
DBG((" (%d, %d), (%d, %d)\n", |
box->x1, box->y1, box->x2, box->y2)); |
v[0] = box->x2; |
v[5] = v[1] = box->y2; |
v[8] = v[4] = box->x1; |
v[9] = box->y1; |
v[2] = v[3] = v[7] = 1; |
v[6] = v[10] = v[11] = 0; |
v += 12; box++; |
} while (--n_this_time); |
} while (n); |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
sna_render_composite_redirect_done(sna, &tmp); |
return true; |
} |
static void |
gen6_render_op_fill_blt(struct sna *sna, |
const struct sna_fill_op *op, |
int16_t x, int16_t y, int16_t w, int16_t h) |
{ |
int16_t *v; |
DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = x+w; |
v[4] = v[8] = x; |
v[1] = v[5] = y+h; |
v[9] = y; |
v[2] = v[3] = v[7] = 1; |
v[6] = v[10] = v[11] = 0; |
} |
fastcall static void |
gen6_render_op_fill_box(struct sna *sna, |
const struct sna_fill_op *op, |
const BoxRec *box) |
{ |
int16_t *v; |
DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, |
box->x1, box->y1, box->x2, box->y2)); |
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = box->x2; |
v[8] = v[4] = box->x1; |
v[5] = v[1] = box->y2; |
v[9] = box->y1; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
} |
fastcall static void |
gen6_render_op_fill_boxes(struct sna *sna, |
const struct sna_fill_op *op, |
const BoxRec *box, |
int nbox) |
{ |
DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, |
box->x1, box->y1, box->x2, box->y2, nbox)); |
do { |
int nbox_this_time; |
int16_t *v; |
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, |
gen6_emit_fill_state); |
nbox -= nbox_this_time; |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6 * nbox_this_time; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
do { |
v[0] = box->x2; |
v[8] = v[4] = box->x1; |
v[5] = v[1] = box->y2; |
v[9] = box->y1; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
box++; v += 12; |
} while (--nbox_this_time); |
} while (nbox); |
} |
static void |
gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) |
{ |
DBG(("%s()\n", __FUNCTION__)); |
assert(!sna->render.active); |
if (sna->render.vertex_offset) |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, op->base.src.bo); |
} |
static bool |
gen6_render_fill(struct sna *sna, uint8_t alu, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
uint32_t color, |
struct sna_fill_op *op) |
{ |
DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); |
if (prefer_blt_fill(sna, dst_bo) && |
sna_blt_fill(sna, alu, |
dst_bo, dst->drawable.bitsPerPixel, |
color, |
op)) |
return true; |
if (!(alu == GXcopy || alu == GXclear) || |
too_large(dst->drawable.width, dst->drawable.height)) |
return sna_blt_fill(sna, alu, |
dst_bo, dst->drawable.bitsPerPixel, |
color, |
op); |
if (alu == GXclear) |
color = 0; |
op->base.dst.pixmap = dst; |
op->base.dst.width = dst->drawable.width; |
op->base.dst.height = dst->drawable.height; |
op->base.dst.format = sna_format_for_depth(dst->drawable.depth); |
op->base.dst.bo = dst_bo; |
op->base.dst.x = op->base.dst.y = 0; |
op->base.src.bo = |
sna_render_get_solid(sna, |
sna_rgba_for_color(color, |
dst->drawable.depth)); |
op->base.mask.bo = NULL; |
op->base.need_magic_ca_pass = false; |
op->base.floats_per_vertex = 2; |
op->base.floats_per_rect = 6; |
op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; |
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); |
} |
gen6_emit_fill_state(sna, &op->base); |
gen6_align_vertex(sna, &op->base); |
op->blt = gen6_render_op_fill_blt; |
op->box = gen6_render_op_fill_box; |
op->boxes = gen6_render_op_fill_boxes; |
op->done = gen6_render_op_fill_done; |
return true; |
} |
static bool |
gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, |
uint32_t color, |
int16_t x1, int16_t y1, int16_t x2, int16_t y2, |
uint8_t alu) |
{ |
BoxRec box; |
box.x1 = x1; |
box.y1 = y1; |
box.x2 = x2; |
box.y2 = y2; |
return sna_blt_fill_boxes(sna, alu, |
bo, dst->drawable.bitsPerPixel, |
color, &box, 1); |
} |
static bool |
gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, |
uint32_t color, |
int16_t x1, int16_t y1, |
int16_t x2, int16_t y2, |
uint8_t alu) |
{ |
struct sna_composite_op tmp; |
int16_t *v; |
/* Prefer to use the BLT if already engaged */ |
if (prefer_blt_fill(sna, bo) && |
gen6_render_fill_one_try_blt(sna, dst, bo, color, |
x1, y1, x2, y2, alu)) |
return true; |
/* Must use the BLT if we can't RENDER... */ |
if (!(alu == GXcopy || alu == GXclear) || |
too_large(dst->drawable.width, dst->drawable.height)) |
return gen6_render_fill_one_try_blt(sna, dst, bo, color, |
x1, y1, x2, y2, alu); |
if (alu == GXclear) |
color = 0; |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.format = sna_format_for_depth(dst->drawable.depth); |
tmp.dst.bo = bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.src.bo = |
sna_render_get_solid(sna, |
sna_rgba_for_color(color, |
dst->drawable.depth)); |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = false; |
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return false; |
} |
} |
gen6_emit_fill_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); |
DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = x2; |
v[8] = v[4] = x1; |
v[5] = v[1] = y2; |
v[9] = y1; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return true; |
} |
static bool |
gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) |
{ |
BoxRec box; |
box.x1 = 0; |
box.y1 = 0; |
box.x2 = dst->drawable.width; |
box.y2 = dst->drawable.height; |
return sna_blt_fill_boxes(sna, GXclear, |
bo, dst->drawable.bitsPerPixel, |
0, &box, 1); |
} |
static bool |
gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) |
{ |
struct sna_composite_op tmp; |
int16_t *v; |
DBG(("%s: %dx%d\n", |
__FUNCTION__, |
dst->drawable.width, |
dst->drawable.height)); |
/* Prefer to use the BLT if, and only if, already engaged */ |
if (sna->kgem.ring == KGEM_BLT && |
gen6_render_clear_try_blt(sna, dst, bo)) |
return true; |
/* Must use the BLT if we can't RENDER... */ |
if (too_large(dst->drawable.width, dst->drawable.height)) |
return gen6_render_clear_try_blt(sna, dst, bo); |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.format = sna_format_for_depth(dst->drawable.depth); |
tmp.dst.bo = bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.src.bo = sna_render_get_solid(sna, 0); |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = false; |
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; |
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); |
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); |
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_submit(&sna->kgem); |
if (!kgem_check_bo(&sna->kgem, bo, NULL)) { |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return false; |
} |
} |
gen6_emit_fill_state(sna, &tmp); |
gen6_align_vertex(sna, &tmp); |
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); |
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; |
sna->render.vertex_used += 6; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
v[0] = dst->drawable.width; |
v[5] = v[1] = dst->drawable.height; |
v[8] = v[4] = 0; |
v[9] = 0; |
v[7] = v[2] = v[3] = 1; |
v[6] = v[10] = v[11] = 0; |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
return true; |
} |
#endif |
static void gen6_render_flush(struct sna *sna) |
{ |
gen4_vertex_close(sna); |
assert(sna->render.vb_id == 0); |
assert(sna->render.vertex_offset == 0); |
} |
static void |
gen6_render_context_switch(struct kgem *kgem, |
int new_mode) |
{ |
if (kgem->nbatch) { |
DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); |
_kgem_submit(kgem); |
} |
kgem->ring = new_mode; |
} |
static void |
gen6_render_retire(struct kgem *kgem) |
{ |
struct sna *sna; |
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) |
kgem->ring = kgem->mode; |
sna = container_of(kgem, struct sna, kgem); |
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { |
DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); |
sna->render.vertex_used = 0; |
sna->render.vertex_index = 0; |
} |
} |
static void |
gen6_render_expire(struct kgem *kgem) |
{ |
struct sna *sna; |
sna = container_of(kgem, struct sna, kgem); |
if (sna->render.vbo && !sna->render.vertex_used) { |
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); |
kgem_bo_destroy(kgem, sna->render.vbo); |
assert(!sna->render.active); |
sna->render.vbo = NULL; |
sna->render.vertices = sna->render.vertex_data; |
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
sna->render.vertex_used = 0; |
sna->render.vertex_index = 0; |
} |
} |
static void gen6_render_reset(struct sna *sna) |
{ |
sna->render_state.gen6.needs_invariant = true; |
sna->render_state.gen6.first_state_packet = true; |
sna->render_state.gen6.ve_id = 3 << 2; |
sna->render_state.gen6.last_primitive = -1; |
sna->render_state.gen6.num_sf_outputs = 0; |
sna->render_state.gen6.samplers = -1; |
sna->render_state.gen6.blend = -1; |
sna->render_state.gen6.kernel = -1; |
sna->render_state.gen6.drawrect_offset = -1; |
sna->render_state.gen6.drawrect_limit = -1; |
sna->render_state.gen6.surface_table = -1; |
sna->render.vertex_offset = 0; |
sna->render.nvertex_reloc = 0; |
sna->render.vb_id = 0; |
} |
static void gen6_render_fini(struct sna *sna) |
{ |
kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); |
} |
static bool is_gt2(struct sna *sna) |
{ |
return sna->PciInfo->device_id & 0x30; |
} |
static bool is_mobile(struct sna *sna) |
{ |
return (sna->PciInfo->device_id & 0xf) == 0x6; |
} |
static bool gen6_render_setup(struct sna *sna) |
{ |
struct gen6_render_state *state = &sna->render_state.gen6; |
struct sna_static_stream general; |
struct gen6_sampler_state *ss; |
int i, j, k, l, m; |
state->info = >1_info; |
if (is_gt2(sna)) |
state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ |
sna_static_stream_init(&general); |
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer |
* dumps, you know it points to zero. |
*/ |
null_create(&general); |
scratch_create(&general); |
for (m = 0; m < GEN6_KERNEL_COUNT; m++) { |
if (wm_kernels[m].size) { |
state->wm_kernel[m][1] = |
sna_static_stream_add(&general, |
wm_kernels[m].data, |
wm_kernels[m].size, |
64); |
} else { |
if (USE_8_PIXEL_DISPATCH) { |
state->wm_kernel[m][0] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 8); |
} |
if (USE_16_PIXEL_DISPATCH) { |
state->wm_kernel[m][1] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 16); |
} |
if (USE_32_PIXEL_DISPATCH) { |
state->wm_kernel[m][2] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 32); |
} |
} |
if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { |
state->wm_kernel[m][1] = |
sna_static_stream_compile_wm(sna, &general, |
wm_kernels[m].data, 16); |
} |
} |
ss = sna_static_stream_map(&general, |
2 * sizeof(*ss) * |
(2 + |
FILTER_COUNT * EXTEND_COUNT * |
FILTER_COUNT * EXTEND_COUNT), |
32); |
state->wm_state = sna_static_stream_offsetof(&general, ss); |
sampler_copy_init(ss); ss += 2; |
sampler_fill_init(ss); ss += 2; |
for (i = 0; i < FILTER_COUNT; i++) { |
for (j = 0; j < EXTEND_COUNT; j++) { |
for (k = 0; k < FILTER_COUNT; k++) { |
for (l = 0; l < EXTEND_COUNT; l++) { |
sampler_state_init(ss++, i, j); |
sampler_state_init(ss++, k, l); |
} |
} |
} |
} |
state->cc_blend = gen6_composite_create_blend_state(&general); |
state->general_bo = sna_static_stream_fini(sna, &general); |
return state->general_bo != NULL; |
} |
const char *gen6_render_init(struct sna *sna, const char *backend) |
{ |
if (!gen6_render_setup(sna)) |
return backend; |
sna->kgem.context_switch = gen6_render_context_switch; |
sna->kgem.retire = gen6_render_retire; |
sna->kgem.expire = gen6_render_expire; |
#if 0 |
#if !NO_COMPOSITE |
sna->render.composite = gen6_render_composite; |
sna->render.prefer_gpu |= PREFER_GPU_RENDER; |
#endif |
#if !NO_COMPOSITE_SPANS |
sna->render.check_composite_spans = gen6_check_composite_spans; |
sna->render.composite_spans = gen6_render_composite_spans; |
if (is_mobile(sna)) |
sna->render.prefer_gpu |= PREFER_GPU_SPANS; |
#endif |
sna->render.video = gen6_render_video; |
#if !NO_COPY_BOXES |
sna->render.copy_boxes = gen6_render_copy_boxes; |
#endif |
#if !NO_COPY |
sna->render.copy = gen6_render_copy; |
#endif |
#if !NO_FILL_BOXES |
sna->render.fill_boxes = gen6_render_fill_boxes; |
#endif |
#if !NO_FILL |
sna->render.fill = gen6_render_fill; |
#endif |
#if !NO_FILL_ONE |
sna->render.fill_one = gen6_render_fill_one; |
#endif |
#if !NO_FILL_CLEAR |
sna->render.clear = gen6_render_clear; |
#endif |
#endif |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
sna->render.blit_tex = gen6_blit_tex; |
sna->render.flush = gen6_render_flush; |
sna->render.reset = gen6_render_reset; |
sna->render.fini = gen6_render_fini; |
sna->render.max_3d_size = GEN6_MAX_SIZE; |
sna->render.max_3d_pitch = 1 << 18; |
return sna->render_state.gen6.info->name; |
} |
static bool |
gen6_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
{ |
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, |
width, height, sna->kgem.ring)); |
tmp->op = PictOpSrc; |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
tmp->src.repeat = SAMPLER_EXTEND_NONE; |
tmp->src.is_affine = true; |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
if ( (tmp->src.width == width) && |
(tmp->src.height == height) ) |
tmp->src.filter = SAMPLER_FILTER_NEAREST; |
else |
tmp->src.filter = SAMPLER_FILTER_BILINEAR; |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.is_affine = true; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
// tmp->src.offset[0] = -dst_x; |
// tmp->src.offset[1] = -dst_y; |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
// tmp->mask.offset[0] = -dst_x; |
// tmp->mask.offset[1] = -dst_y; |
tmp->u.gen6.flags = |
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, |
tmp->src.repeat, |
tmp->mask.filter, |
tmp->mask.repeat), |
gen6_get_blend(tmp->op, |
tmp->has_component_alpha, |
tmp->dst.format), |
/* gen6_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine), |
*/ |
GEN6_WM_KERNEL_MASK, |
gen4_choose_composite_emitter(sna, tmp)); |
tmp->blt = gen6_render_composite_blt; |
// tmp->box = gen6_render_composite_box; |
tmp->done = gen6_render_composite_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen6_emit_composite_state(sna, tmp); |
gen6_align_vertex(sna, tmp); |
return true; |
} |
/drivers/video/Intel-2D/gen7_render.c |
---|
45,6 → 45,8 |
#include "gen4_source.h" |
#include "gen4_vertex.h" |
#define ALWAYS_FLUSH 0 |
#define NO_COMPOSITE 0 |
#define NO_COMPOSITE_SPANS 0 |
#define NO_COPY 0 |
75,6 → 77,7 |
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0) |
struct gt_info { |
const char *name; |
uint32_t max_vs_threads; |
uint32_t max_gs_threads; |
uint32_t max_wm_threads; |
82,57 → 85,107 |
int size; |
int max_vs_entries; |
int max_gs_entries; |
int push_ps_size; /* in 1KBs */ |
} urb; |
int gt; |
}; |
static const struct gt_info ivb_gt_info = { |
.name = "Ivybridge (gen7)", |
.max_vs_threads = 16, |
.max_gs_threads = 16, |
.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT, |
.urb = { 128, 64, 64 }, |
.urb = { 128, 64, 64, 8 }, |
.gt = 0, |
}; |
static const struct gt_info ivb_gt1_info = { |
.name = "Ivybridge (gen7, gt1)", |
.max_vs_threads = 36, |
.max_gs_threads = 36, |
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, |
.urb = { 128, 512, 192 }, |
.urb = { 128, 512, 192, 8 }, |
.gt = 1, |
}; |
static const struct gt_info ivb_gt2_info = { |
.name = "Ivybridge (gen7, gt2)", |
.max_vs_threads = 128, |
.max_gs_threads = 128, |
.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT, |
.urb = { 256, 704, 320 }, |
.urb = { 256, 704, 320, 8 }, |
.gt = 2, |
}; |
static const struct gt_info byt_gt_info = { |
.name = "Baytrail (gen7)", |
.urb = { 128, 64, 64 }, |
.max_vs_threads = 36, |
.max_gs_threads = 36, |
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, |
.urb = { 128, 512, 192, 8 }, |
.gt = 1, |
}; |
static const struct gt_info hsw_gt_info = { |
.name = "Haswell (gen7.5)", |
.max_vs_threads = 8, |
.max_gs_threads = 8, |
.max_wm_threads = |
(8 - 1) << HSW_PS_MAX_THREADS_SHIFT | |
1 << HSW_PS_SAMPLE_MASK_SHIFT, |
.urb = { 128, 64, 64 }, |
.urb = { 128, 64, 64, 8 }, |
.gt = 0, |
}; |
static const struct gt_info hsw_gt1_info = { |
.name = "Haswell (gen7.5, gt1)", |
.max_vs_threads = 70, |
.max_gs_threads = 70, |
.max_wm_threads = |
(102 - 1) << HSW_PS_MAX_THREADS_SHIFT | |
1 << HSW_PS_SAMPLE_MASK_SHIFT, |
.urb = { 128, 640, 256 }, |
.urb = { 128, 640, 256, 8 }, |
.gt = 1, |
}; |
static const struct gt_info hsw_gt2_info = { |
.name = "Haswell (gen7.5, gt2)", |
.max_vs_threads = 140, |
.max_gs_threads = 140, |
.max_wm_threads = |
(140 - 1) << HSW_PS_MAX_THREADS_SHIFT | |
1 << HSW_PS_SAMPLE_MASK_SHIFT, |
.urb = { 256, 1664, 640, 8 }, |
.gt = 2, |
}; |
static const struct gt_info hsw_gt3_info = { |
.name = "Haswell (gen7.5, gt3)", |
.max_vs_threads = 280, |
.max_gs_threads = 280, |
.max_wm_threads = |
(204 - 1) << HSW_PS_MAX_THREADS_SHIFT | |
(280 - 1) << HSW_PS_MAX_THREADS_SHIFT | |
1 << HSW_PS_SAMPLE_MASK_SHIFT, |
.urb = { 256, 1664, 640 }, |
.urb = { 512, 3328, 1280, 16 }, |
.gt = 3, |
}; |
inline static bool is_ivb(struct sna *sna) |
{ |
return sna->kgem.gen == 070; |
} |
inline static bool is_byt(struct sna *sna) |
{ |
return sna->kgem.gen == 071; |
} |
inline static bool is_hsw(struct sna *sna) |
{ |
return sna->kgem.gen == 075; |
} |
static const uint32_t ps_kernel_packed[][4] = { |
#include "exa_wm_src_affine.g7b" |
#include "exa_wm_src_sample_argb.g7b" |
294,8 → 347,24 |
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_x8r8g8b8: |
return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; |
case PICT_a8b8g8r8: |
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_x8b8g8r8: |
return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM; |
case PICT_a2r10g10b10: |
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_x2r10g10b10: |
return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM; |
case PICT_r8g8b8: |
return GEN7_SURFACEFORMAT_R8G8B8_UNORM; |
case PICT_r5g6b5: |
return GEN7_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_a1r5g5b5: |
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN7_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
} |
307,8 → 376,22 |
case PICT_a8r8g8b8: |
case PICT_x8r8g8b8: |
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; |
case PICT_a8b8g8r8: |
case PICT_x8b8g8r8: |
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; |
case PICT_a2r10g10b10: |
case PICT_x2r10g10b10: |
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; |
case PICT_r5g6b5: |
return GEN7_SURFACEFORMAT_B5G6R5_UNORM; |
case PICT_x1r5g5b5: |
case PICT_a1r5g5b5: |
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; |
case PICT_a8: |
return GEN7_SURFACEFORMAT_A8_UNORM; |
case PICT_a4r4g4b4: |
case PICT_x4r4g4b4: |
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; |
} |
} |
335,7 → 418,7 |
gen7_emit_urb(struct sna *sna) |
{ |
OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); |
OUT_BATCH(8); /* in 1KBs */ |
OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size); |
/* num of VS entries must be divisible by 8 if size < 9 */ |
OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); |
359,6 → 442,10 |
static void |
gen7_emit_state_base_address(struct sna *sna) |
{ |
uint32_t mocs; |
mocs = is_hsw(sna) ? 5 << 8 : 3 << 8; |
OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); |
OUT_BATCH(0); /* general */ |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ |
366,17 → 453,17 |
NULL, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */ |
sna->kgem.nbatch, |
sna->render_state.gen7.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
mocs | BASE_ADDRESS_MODIFY)); |
OUT_BATCH(0); /* indirect */ |
OUT_BATCH(kgem_add_reloc(&sna->kgem, |
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ |
sna->kgem.nbatch, |
sna->render_state.gen7.general_bo, |
I915_GEM_DOMAIN_INSTRUCTION << 16, |
BASE_ADDRESS_MODIFY)); |
mocs | BASE_ADDRESS_MODIFY)); |
/* upper bounds, disable */ |
OUT_BATCH(0); |
905,10 → 992,17 |
} |
inline static void |
gen7_emit_pipe_flush(struct sna *sna) |
gen7_emit_pipe_flush(struct sna *sna, bool need_stall) |
{ |
unsigned stall; |
stall = 0; |
if (need_stall) |
stall = (GEN7_PIPE_CONTROL_CS_STALL | |
GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); |
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); |
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH); |
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall); |
OUT_BATCH(0); |
OUT_BATCH(0); |
} |
930,8 → 1024,7 |
{ |
bool need_stall; |
if (sna->render_state.gen7.emit_flush) |
gen7_emit_pipe_flush(sna); |
assert(op->dst.bo->exec); |
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); |
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); |
942,13 → 1035,18 |
need_stall = gen7_emit_binding_table(sna, wm_binding_table); |
need_stall &= gen7_emit_drawing_rectangle(sna, op); |
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { |
if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { |
gen7_emit_pipe_invalidate(sna); |
kgem_clear_dirty(&sna->kgem); |
if (op->dst.bo->exec) |
assert(op->dst.bo->exec); |
kgem_bo_mark_dirty(op->dst.bo); |
sna->render_state.gen7.emit_flush = false; |
need_stall = false; |
} |
if (sna->render_state.gen7.emit_flush) { |
gen7_emit_pipe_flush(sna, need_stall); |
need_stall = false; |
} |
if (need_stall) |
gen7_emit_pipe_stall(sna); |
1091,7 → 1189,7 |
COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32); |
/* After the first bind, we manage the cache domains within the batch */ |
offset = kgem_bo_get_binding(bo, format | is_scanout << 31); |
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); |
if (offset) { |
if (is_dst) |
kgem_bo_mark_dirty(bo); |
1104,9 → 1202,12 |
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | |
gen7_tiling_bits(bo->tiling) | |
format << GEN7_SURFACE_FORMAT_SHIFT); |
if (is_dst) |
if (bo->tiling == I915_TILING_Y) |
ss[0] |= GEN7_SURFACE_VALIGN_4; |
if (is_dst) { |
ss[0] |= GEN7_SURFACE_RC_READ_WRITE; |
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; |
else |
} else |
domains = I915_GEM_DOMAIN_SAMPLER << 16; |
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); |
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | |
1113,13 → 1214,13 |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); |
ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; |
ss[4] = 0; |
ss[5] = is_scanout ? 0 : 3 << 16; |
ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16; |
ss[6] = 0; |
ss[7] = 0; |
if (sna->kgem.gen == 075) |
if (is_hsw(sna)) |
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); |
kgem_bo_set_binding(bo, format | is_scanout << 31, offset); |
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); |
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", |
offset, bo->handle, ss[1], |
1242,8 → 1343,8 |
goto start; |
} |
assert(op->floats_per_rect >= vertex_space(sna)); |
assert(rem <= vertex_space(sna)); |
assert(op->floats_per_rect <= rem); |
if (want > 1 && want * op->floats_per_rect > rem) |
want = rem / op->floats_per_rect; |
1398,8 → 1499,258 |
return sna_static_stream_offsetof(stream, base); |
} |
#if 0 |
static uint32_t gen7_bind_video_source(struct sna *sna, |
struct kgem_bo *bo, |
uint32_t offset, |
int width, |
int height, |
int pitch, |
uint32_t format) |
{ |
uint32_t *ss, bind; |
bind = sna->kgem.surface -= |
sizeof(struct gen7_surface_state) / sizeof(uint32_t); |
assert(bo->tiling == I915_TILING_NONE); |
ss = sna->kgem.batch + bind; |
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | |
format << GEN7_SURFACE_FORMAT_SHIFT); |
ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo, |
I915_GEM_DOMAIN_SAMPLER << 16, |
offset); |
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); |
ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; |
ss[4] = 0; |
ss[5] = 0; |
ss[6] = 0; |
ss[7] = 0; |
if (is_hsw(sna)) |
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); |
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n", |
bind, bo->handle, ss[1], |
format, width, height, pitch, offset)); |
return bind * sizeof(uint32_t); |
} |
static void gen7_emit_video_state(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
struct sna_video_frame *frame = op->priv; |
uint32_t src_surf_format; |
uint32_t src_surf_base[6]; |
int src_width[6]; |
int src_height[6]; |
int src_pitch[6]; |
uint32_t *binding_table; |
uint16_t offset; |
int n_src, n; |
gen7_get_batch(sna, op); |
src_surf_base[0] = 0; |
src_surf_base[1] = 0; |
src_surf_base[2] = frame->VBufOffset; |
src_surf_base[3] = frame->VBufOffset; |
src_surf_base[4] = frame->UBufOffset; |
src_surf_base[5] = frame->UBufOffset; |
if (is_planar_fourcc(frame->id)) { |
src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM; |
src_width[1] = src_width[0] = frame->width; |
src_height[1] = src_height[0] = frame->height; |
src_pitch[1] = src_pitch[0] = frame->pitch[1]; |
src_width[4] = src_width[5] = src_width[2] = src_width[3] = |
frame->width / 2; |
src_height[4] = src_height[5] = src_height[2] = src_height[3] = |
frame->height / 2; |
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = |
frame->pitch[0]; |
n_src = 6; |
} else { |
if (frame->id == FOURCC_UYVY) |
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; |
else |
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; |
src_width[0] = frame->width; |
src_height[0] = frame->height; |
src_pitch[0] = frame->pitch[0]; |
n_src = 1; |
} |
binding_table = gen7_composite_get_binding_table(sna, &offset); |
binding_table[0] = |
gen7_bind_bo(sna, |
op->dst.bo, op->dst.width, op->dst.height, |
gen7_get_dest_format(op->dst.format), |
true); |
for (n = 0; n < n_src; n++) { |
binding_table[1+n] = |
gen7_bind_video_source(sna, |
frame->bo, |
src_surf_base[n], |
src_width[n], |
src_height[n], |
src_pitch[n], |
src_surf_format); |
} |
gen7_emit_state(sna, op, offset); |
} |
static bool |
gen7_render_video(struct sna *sna, |
struct sna_video *video, |
struct sna_video_frame *frame, |
RegionPtr dstRegion, |
PixmapPtr pixmap) |
{ |
struct sna_composite_op tmp; |
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; |
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; |
int src_width = frame->src.x2 - frame->src.x1; |
int src_height = frame->src.y2 - frame->src.y1; |
float src_offset_x, src_offset_y; |
float src_scale_x, src_scale_y; |
int nbox, pix_xoff, pix_yoff; |
struct sna_pixmap *priv; |
unsigned filter; |
BoxPtr box; |
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n", |
__FUNCTION__, |
src_width, src_height, dst_width, dst_height, |
(long)REGION_NUM_RECTS(dstRegion), |
REGION_EXTENTS(NULL, dstRegion)->x1, |
REGION_EXTENTS(NULL, dstRegion)->y1, |
REGION_EXTENTS(NULL, dstRegion)->x2, |
REGION_EXTENTS(NULL, dstRegion)->y2)); |
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); |
if (priv == NULL) |
return false; |
memset(&tmp, 0, sizeof(tmp)); |
tmp.dst.pixmap = pixmap; |
tmp.dst.width = pixmap->drawable.width; |
tmp.dst.height = pixmap->drawable.height; |
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); |
tmp.dst.bo = priv->gpu_bo; |
tmp.src.bo = frame->bo; |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 3; |
tmp.floats_per_rect = 9; |
if (src_width == dst_width && src_height == dst_height) |
filter = SAMPLER_FILTER_NEAREST; |
else |
filter = SAMPLER_FILTER_BILINEAR; |
tmp.u.gen7.flags = |
GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, |
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), |
NO_BLEND, |
is_planar_fourcc(frame->id) ? |
GEN7_WM_KERNEL_VIDEO_PLANAR : |
GEN7_WM_KERNEL_VIDEO_PACKED, |
2); |
tmp.priv = frame; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); |
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen7_emit_video_state(sna, &tmp); |
gen7_align_vertex(sna, &tmp); |
/* Set up the offset for translating from the given region (in screen |
* coordinates) to the backing pixmap. |
*/ |
#ifdef COMPOSITE |
pix_xoff = -pixmap->screen_x + pixmap->drawable.x; |
pix_yoff = -pixmap->screen_y + pixmap->drawable.y; |
#else |
pix_xoff = 0; |
pix_yoff = 0; |
#endif |
DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", |
__FUNCTION__, |
frame->src.x1, frame->src.y1, |
src_width, src_height, |
dst_width, dst_height, |
frame->width, frame->height)); |
src_scale_x = (float)src_width / dst_width / frame->width; |
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; |
src_scale_y = (float)src_height / dst_height / frame->height; |
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; |
DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", |
__FUNCTION__, |
src_scale_x, src_scale_y, |
src_offset_x, src_offset_y)); |
box = REGION_RECTS(dstRegion); |
nbox = REGION_NUM_RECTS(dstRegion); |
while (nbox--) { |
BoxRec r; |
DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", |
__FUNCTION__, |
box->x1, box->y1, |
box->x2, box->y2, |
pix_xoff, pix_yoff, |
box->x1 * src_scale_x + src_offset_x, |
box->y1 * src_scale_y + src_offset_y, |
box->x2 * src_scale_x + src_offset_x, |
box->y2 * src_scale_y + src_offset_y)); |
r.x1 = box->x1 + pix_xoff; |
r.x2 = box->x2 + pix_xoff; |
r.y1 = box->y1 + pix_yoff; |
r.y2 = box->y2 + pix_yoff; |
gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); |
OUT_VERTEX(r.x2, r.y2); |
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y2); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); |
OUT_VERTEX(r.x1, r.y1); |
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); |
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); |
if (!DAMAGE_IS_ALL(priv->gpu_damage)) { |
sna_damage_add_box(&priv->gpu_damage, &r); |
sna_damage_subtract_box(&priv->cpu_damage, &r); |
} |
box++; |
} |
gen4_vertex_flush(sna); |
return true; |
} |
#endif |
static void gen7_render_composite_done(struct sna *sna, |
const struct sna_composite_op *op) |
{ |
1410,101 → 1761,24 |
} |
static bool |
gen7_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
{ |
tmp->op = PictOpSrc; |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
tmp->src.repeat = RepeatNone; |
tmp->src.filter = PictFilterNearest; |
tmp->src.is_affine = true; |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.is_affine = true; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
tmp->u.gen7.flags = |
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, |
tmp->src.repeat, |
tmp->mask.filter, |
tmp->mask.repeat), |
gen7_get_blend(tmp->op, |
tmp->has_component_alpha, |
tmp->dst.format), |
/* gen7_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine), */ |
GEN7_WM_KERNEL_MASK, |
gen4_choose_composite_emitter(tmp)); |
tmp->blt = gen7_render_composite_blt; |
// tmp->box = gen7_render_composite_box; |
tmp->done = gen7_render_composite_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen7_emit_composite_state(sna, tmp); |
gen7_align_vertex(sna, tmp); |
return true; |
} |
1578,25 → 1852,141 |
#if 0 |
static bool |
gen7_render_fill_boxes(struct sna *sna, |
CARD8 op, |
PictFormat format, |
const xRenderColor *color, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
const BoxRec *box, int n) |
{ |
struct sna_composite_op tmp; |
uint32_t pixel; |
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", |
__FUNCTION__, op, |
color->red, color->green, color->blue, color->alpha, (int)format)); |
if (op >= ARRAY_SIZE(gen7_blend_op)) { |
DBG(("%s: fallback due to unhandled blend op: %d\n", |
__FUNCTION__, op)); |
return false; |
} |
if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) { |
uint8_t alu = GXinvalid; |
if (op <= PictOpSrc) { |
pixel = 0; |
if (op == PictOpClear) |
alu = GXclear; |
else if (sna_get_pixel_from_rgba(&pixel, |
color->red, |
color->green, |
color->blue, |
color->alpha, |
format)) |
alu = GXcopy; |
} |
if (alu != GXinvalid && |
sna_blt_fill_boxes(sna, alu, |
dst_bo, dst->drawable.bitsPerPixel, |
pixel, box, n)) |
return true; |
if (!gen7_check_dst_format(format)) |
return false; |
} |
if (op == PictOpClear) { |
pixel = 0; |
op = PictOpSrc; |
} else if (!sna_get_pixel_from_rgba(&pixel, |
color->red, |
color->green, |
color->blue, |
color->alpha, |
PICT_a8r8g8b8)) |
return false; |
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", |
__FUNCTION__, pixel, n, |
box[0].x1, box[0].y1, box[0].x2, box[0].y2)); |
tmp.dst.pixmap = dst; |
tmp.dst.width = dst->drawable.width; |
tmp.dst.height = dst->drawable.height; |
tmp.dst.format = format; |
tmp.dst.bo = dst_bo; |
tmp.dst.x = tmp.dst.y = 0; |
tmp.damage = NULL; |
sna_render_composite_redirect_init(&tmp); |
if (too_large(dst->drawable.width, dst->drawable.height)) { |
BoxRec extents; |
boxes_extents(box, n, &extents); |
if (!sna_render_composite_redirect(sna, &tmp, |
extents.x1, extents.y1, |
extents.x2 - extents.x1, |
extents.y2 - extents.y1, |
n > 1)) |
return sna_tiling_fill_boxes(sna, op, format, color, |
dst, dst_bo, box, n); |
} |
tmp.src.bo = sna_render_get_solid(sna, pixel); |
tmp.mask.bo = NULL; |
tmp.floats_per_vertex = 2; |
tmp.floats_per_rect = 6; |
tmp.need_magic_ca_pass = false; |
tmp.u.gen7.flags = FILL_FLAGS(op, format); |
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); |
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { |
kgem_submit(&sna->kgem); |
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); |
} |
gen7_emit_fill_state(sna, &tmp); |
gen7_align_vertex(sna, &tmp); |
do { |
int n_this_time; |
int16_t *v; |
n_this_time = gen7_get_rectangles(sna, &tmp, n, |
gen7_emit_fill_state); |
n -= n_this_time; |
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); |
sna->render.vertex_used += 6 * n_this_time; |
assert(sna->render.vertex_used <= sna->render.vertex_size); |
do { |
DBG((" (%d, %d), (%d, %d)\n", |
box->x1, box->y1, box->x2, box->y2)); |
v[0] = box->x2; |
v[5] = v[1] = box->y2; |
v[8] = v[4] = box->x1; |
v[9] = box->y1; |
v[2] = v[3] = v[7] = 1; |
v[6] = v[10] = v[11] = 0; |
v += 12; box++; |
} while (--n_this_time); |
} while (n); |
gen4_vertex_flush(sna); |
kgem_bo_destroy(&sna->kgem, tmp.src.bo); |
sna_render_composite_redirect_done(sna, &tmp); |
return true; |
} |
#endif |
static void gen7_render_flush(struct sna *sna) |
{ |
gen4_vertex_close(sna); |
1676,14 → 2066,20 |
kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); |
} |
static bool is_gt3(struct sna *sna) |
{ |
assert(sna->kgem.gen == 075); |
return sna->PciInfo->device_id & 0x20; |
} |
static bool is_gt2(struct sna *sna) |
{ |
return DEVICE_ID(sna->PciInfo) & 0x20; |
return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20); |
} |
static bool is_mobile(struct sna *sna) |
{ |
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6; |
return (sna->PciInfo->device_id & 0xf) == 0x6; |
} |
static bool gen7_render_setup(struct sna *sna) |
1693,19 → 2089,24 |
struct gen7_sampler_state *ss; |
int i, j, k, l, m; |
if (sna->kgem.gen == 070) { |
if (is_ivb(sna)) { |
state->info = &ivb_gt_info; |
if (DEVICE_ID(sna->PciInfo) & 0xf) { |
if (sna->PciInfo->device_id & 0xf) { |
state->info = &ivb_gt1_info; |
if (is_gt2(sna)) |
state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ |
} |
} else if (sna->kgem.gen == 075) { |
} else if (is_byt(sna)) { |
state->info = &byt_gt_info; |
} else if (is_hsw(sna)) { |
state->info = &hsw_gt_info; |
if (DEVICE_ID(sna->PciInfo) & 0xf) { |
if (sna->PciInfo->device_id & 0xf) { |
if (is_gt3(sna)) |
state->info = &hsw_gt3_info; |
else if (is_gt2(sna)) |
state->info = &hsw_gt2_info; |
else |
state->info = &hsw_gt1_info; |
if (is_gt2(sna)) |
state->info = &hsw_gt2_info; |
} |
} else |
return false; |
1772,17 → 2173,52 |
return state->general_bo != NULL; |
} |
bool gen7_render_init(struct sna *sna) |
const char *gen7_render_init(struct sna *sna, const char *backend) |
{ |
if (!gen7_render_setup(sna)) |
return false; |
return backend; |
sna->kgem.context_switch = gen7_render_context_switch; |
sna->kgem.retire = gen7_render_retire; |
sna->kgem.expire = gen7_render_expire; |
#if 0 |
#if !NO_COMPOSITE |
sna->render.composite = gen7_render_composite; |
sna->render.prefer_gpu |= PREFER_GPU_RENDER; |
#endif |
#if !NO_COMPOSITE_SPANS |
sna->render.check_composite_spans = gen7_check_composite_spans; |
sna->render.composite_spans = gen7_render_composite_spans; |
if (is_mobile(sna) || is_gt2(sna) || is_byt(sna)) |
sna->render.prefer_gpu |= PREFER_GPU_SPANS; |
#endif |
sna->render.video = gen7_render_video; |
#if !NO_COPY_BOXES |
sna->render.copy_boxes = gen7_render_copy_boxes; |
#endif |
#if !NO_COPY |
sna->render.copy = gen7_render_copy; |
#endif |
#if !NO_FILL_BOXES |
sna->render.fill_boxes = gen7_render_fill_boxes; |
#endif |
#if !NO_FILL |
sna->render.fill = gen7_render_fill; |
#endif |
#if !NO_FILL_ONE |
sna->render.fill_one = gen7_render_fill_one; |
#endif |
#if !NO_FILL_CLEAR |
sna->render.clear = gen7_render_clear; |
#endif |
#endif |
sna->render.blit_tex = gen7_blit_tex; |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
sna->render.flush = gen7_render_flush; |
sna->render.reset = gen7_render_reset; |
sna->render.fini = gen7_render_fini; |
1789,9 → 2225,102 |
sna->render.max_3d_size = GEN7_MAX_SIZE; |
sna->render.max_3d_pitch = 1 << 18; |
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; |
return true; |
return sna->render_state.gen7.info->name; |
} |
static bool |
gen7_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp) |
{ |
tmp->op = PictOpSrc; |
tmp->dst.pixmap = dst; |
tmp->dst.bo = dst_bo; |
tmp->dst.width = dst->drawable.width; |
tmp->dst.height = dst->drawable.height; |
tmp->dst.format = PICT_x8r8g8b8; |
tmp->src.repeat = RepeatNone; |
tmp->src.filter = PictFilterNearest; |
tmp->src.is_affine = true; |
tmp->src.bo = src_bo; |
tmp->src.pict_format = PICT_x8r8g8b8; |
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format); |
tmp->src.width = src->drawable.width; |
tmp->src.height = src->drawable.height; |
tmp->is_affine = tmp->src.is_affine; |
tmp->has_component_alpha = false; |
tmp->need_magic_ca_pass = false; |
tmp->mask.repeat = SAMPLER_EXTEND_NONE; |
tmp->mask.filter = SAMPLER_FILTER_NEAREST; |
tmp->mask.is_affine = true; |
tmp->mask.bo = mask_bo; |
tmp->mask.pict_format = PIXMAN_a8; |
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format); |
tmp->mask.width = mask->drawable.width; |
tmp->mask.height = mask->drawable.height; |
if( scale ) |
{ |
tmp->src.scale[0] = 1.f/width; |
tmp->src.scale[1] = 1.f/height; |
} |
else |
{ |
tmp->src.scale[0] = 1.f/src->drawable.width; |
tmp->src.scale[1] = 1.f/src->drawable.height; |
} |
tmp->mask.scale[0] = 1.f/mask->drawable.width; |
tmp->mask.scale[1] = 1.f/mask->drawable.height; |
tmp->u.gen7.flags = |
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, |
tmp->src.repeat, |
tmp->mask.filter, |
tmp->mask.repeat), |
gen7_get_blend(tmp->op, |
tmp->has_component_alpha, |
tmp->dst.format), |
/* gen7_choose_composite_kernel(tmp->op, |
tmp->mask.bo != NULL, |
tmp->has_component_alpha, |
tmp->is_affine), */ |
GEN7_WM_KERNEL_MASK, |
gen4_choose_composite_emitter(sna, tmp)); |
tmp->blt = gen7_render_composite_blt; |
// tmp->box = gen7_render_composite_box; |
tmp->done = gen7_render_composite_done; |
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); |
if (!kgem_check_bo(&sna->kgem, |
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, |
NULL)) { |
kgem_submit(&sna->kgem); |
_kgem_set_mode(&sna->kgem, KGEM_RENDER); |
} |
gen7_emit_composite_state(sna, tmp); |
gen7_align_vertex(sna, tmp); |
return true; |
} |
/drivers/video/Intel-2D/gen7_render.h |
---|
1224,6 → 1224,8 |
/* Surface state DW0 */ |
#define GEN7_SURFACE_RC_READ_WRITE (1 << 8) |
#define GEN7_SURFACE_VALIGN_4 (1 << 16) |
#define GEN7_SURFACE_HALIGN_8 (1 << 15) |
#define GEN7_SURFACE_TILED (1 << 14) |
#define GEN7_SURFACE_TILED_Y (1 << 13) |
#define GEN7_SURFACE_FORMAT_SHIFT 18 |
/drivers/video/Intel-2D/i915_pciids.h |
---|
0,0 → 1,211 |
/* |
* Copyright 2013 Intel Corporation |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef _I915_PCIIDS_H |
#define _I915_PCIIDS_H |
/* |
* A pci_device_id struct { |
* __u32 vendor, device; |
* __u32 subvendor, subdevice; |
* __u32 class, class_mask; |
* kernel_ulong_t driver_data; |
* }; |
* Don't use C99 here because "class" is reserved and we want to |
* give userspace flexibility. |
*/ |
#define INTEL_VGA_DEVICE(id, info) { \ |
0x8086, id, \ |
~0, ~0, \ |
0x030000, 0xff0000, \ |
(unsigned long) info } |
#define INTEL_QUANTA_VGA_DEVICE(info) { \ |
0x8086, 0x16a, \ |
0x152d, 0x8990, \ |
0x030000, 0xff0000, \ |
(unsigned long) info } |
#define INTEL_I830_IDS(info) \ |
INTEL_VGA_DEVICE(0x3577, info) |
#define INTEL_I845G_IDS(info) \ |
INTEL_VGA_DEVICE(0x2562, info) |
#define INTEL_I85X_IDS(info) \ |
INTEL_VGA_DEVICE(0x3582, info), /* I855_GM */ \ |
INTEL_VGA_DEVICE(0x358e, info) |
#define INTEL_I865G_IDS(info) \ |
INTEL_VGA_DEVICE(0x2572, info) /* I865_G */ |
#define INTEL_I915G_IDS(info) \ |
INTEL_VGA_DEVICE(0x2582, info), /* I915_G */ \ |
INTEL_VGA_DEVICE(0x258a, info) /* E7221_G */ |
#define INTEL_I915GM_IDS(info) \ |
INTEL_VGA_DEVICE(0x2592, info) /* I915_GM */ |
#define INTEL_I945G_IDS(info) \ |
INTEL_VGA_DEVICE(0x2772, info) /* I945_G */ |
#define INTEL_I945GM_IDS(info) \ |
INTEL_VGA_DEVICE(0x27a2, info), /* I945_GM */ \ |
INTEL_VGA_DEVICE(0x27ae, info) /* I945_GME */ |
#define INTEL_I965G_IDS(info) \ |
INTEL_VGA_DEVICE(0x2972, info), /* I946_GZ */ \ |
INTEL_VGA_DEVICE(0x2982, info), /* G35_G */ \ |
INTEL_VGA_DEVICE(0x2992, info), /* I965_Q */ \ |
INTEL_VGA_DEVICE(0x29a2, info) /* I965_G */ |
#define INTEL_G33_IDS(info) \ |
INTEL_VGA_DEVICE(0x29b2, info), /* Q35_G */ \ |
INTEL_VGA_DEVICE(0x29c2, info), /* G33_G */ \ |
INTEL_VGA_DEVICE(0x29d2, info) /* Q33_G */ |
#define INTEL_I965GM_IDS(info) \ |
INTEL_VGA_DEVICE(0x2a02, info), /* I965_GM */ \ |
INTEL_VGA_DEVICE(0x2a12, info) /* I965_GME */ |
#define INTEL_GM45_IDS(info) \ |
INTEL_VGA_DEVICE(0x2a42, info) /* GM45_G */ |
#define INTEL_G45_IDS(info) \ |
INTEL_VGA_DEVICE(0x2e02, info), /* IGD_E_G */ \ |
INTEL_VGA_DEVICE(0x2e12, info), /* Q45_G */ \ |
INTEL_VGA_DEVICE(0x2e22, info), /* G45_G */ \ |
INTEL_VGA_DEVICE(0x2e32, info), /* G41_G */ \ |
INTEL_VGA_DEVICE(0x2e42, info), /* B43_G */ \ |
INTEL_VGA_DEVICE(0x2e92, info) /* B43_G.1 */ |
#define INTEL_PINEVIEW_IDS(info) \ |
INTEL_VGA_DEVICE(0xa001, info), \ |
INTEL_VGA_DEVICE(0xa011, info) |
#define INTEL_IRONLAKE_D_IDS(info) \ |
INTEL_VGA_DEVICE(0x0042, info) |
#define INTEL_IRONLAKE_M_IDS(info) \ |
INTEL_VGA_DEVICE(0x0046, info) |
#define INTEL_SNB_D_IDS(info) \ |
INTEL_VGA_DEVICE(0x0102, info), \ |
INTEL_VGA_DEVICE(0x0112, info), \ |
INTEL_VGA_DEVICE(0x0122, info), \ |
INTEL_VGA_DEVICE(0x010A, info) |
#define INTEL_SNB_M_IDS(info) \ |
INTEL_VGA_DEVICE(0x0106, info), \ |
INTEL_VGA_DEVICE(0x0116, info), \ |
INTEL_VGA_DEVICE(0x0126, info) |
#define INTEL_IVB_M_IDS(info) \ |
INTEL_VGA_DEVICE(0x0156, info), /* GT1 mobile */ \ |
INTEL_VGA_DEVICE(0x0166, info) /* GT2 mobile */ |
#define INTEL_IVB_D_IDS(info) \ |
INTEL_VGA_DEVICE(0x0152, info), /* GT1 desktop */ \ |
INTEL_VGA_DEVICE(0x0162, info), /* GT2 desktop */ \ |
INTEL_VGA_DEVICE(0x015a, info), /* GT1 server */ \ |
INTEL_VGA_DEVICE(0x016a, info) /* GT2 server */ |
#define INTEL_IVB_Q_IDS(info) \ |
INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */ |
#define INTEL_HSW_D_IDS(info) \ |
INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \ |
INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ |
INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ |
INTEL_VGA_DEVICE(0x040a, info), /* GT1 server */ \ |
INTEL_VGA_DEVICE(0x041a, info), /* GT2 server */ \ |
INTEL_VGA_DEVICE(0x042a, info), /* GT3 server */ \ |
INTEL_VGA_DEVICE(0x040B, info), /* GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x041B, info), /* GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x042B, info), /* GT3 reserved */ \ |
INTEL_VGA_DEVICE(0x040E, info), /* GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x041E, info), /* GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x042E, info), /* GT3 reserved */ \ |
INTEL_VGA_DEVICE(0x0C02, info), /* SDV GT1 desktop */ \ |
INTEL_VGA_DEVICE(0x0C12, info), /* SDV GT2 desktop */ \ |
INTEL_VGA_DEVICE(0x0C22, info), /* SDV GT3 desktop */ \ |
INTEL_VGA_DEVICE(0x0C0A, info), /* SDV GT1 server */ \ |
INTEL_VGA_DEVICE(0x0C1A, info), /* SDV GT2 server */ \ |
INTEL_VGA_DEVICE(0x0C2A, info), /* SDV GT3 server */ \ |
INTEL_VGA_DEVICE(0x0C0B, info), /* SDV GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x0C1B, info), /* SDV GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x0C2B, info), /* SDV GT3 reserved */ \ |
INTEL_VGA_DEVICE(0x0C0E, info), /* SDV GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x0C1E, info), /* SDV GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x0C2E, info), /* SDV GT3 reserved */ \ |
INTEL_VGA_DEVICE(0x0A02, info), /* ULT GT1 desktop */ \ |
INTEL_VGA_DEVICE(0x0A12, info), /* ULT GT2 desktop */ \ |
INTEL_VGA_DEVICE(0x0A22, info), /* ULT GT3 desktop */ \ |
INTEL_VGA_DEVICE(0x0A0A, info), /* ULT GT1 server */ \ |
INTEL_VGA_DEVICE(0x0A1A, info), /* ULT GT2 server */ \ |
INTEL_VGA_DEVICE(0x0A2A, info), /* ULT GT3 server */ \ |
INTEL_VGA_DEVICE(0x0A0B, info), /* ULT GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x0A1B, info), /* ULT GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x0A2B, info), /* ULT GT3 reserved */ \ |
INTEL_VGA_DEVICE(0x0D02, info), /* CRW GT1 desktop */ \ |
INTEL_VGA_DEVICE(0x0D12, info), /* CRW GT2 desktop */ \ |
INTEL_VGA_DEVICE(0x0D22, info), /* CRW GT3 desktop */ \ |
INTEL_VGA_DEVICE(0x0D0A, info), /* CRW GT1 server */ \ |
INTEL_VGA_DEVICE(0x0D1A, info), /* CRW GT2 server */ \ |
INTEL_VGA_DEVICE(0x0D2A, info), /* CRW GT3 server */ \ |
INTEL_VGA_DEVICE(0x0D0B, info), /* CRW GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x0D1B, info), /* CRW GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ |
INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x0D2E, info) /* CRW GT3 reserved */ \ |
#define INTEL_HSW_M_IDS(info) \ |
INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \ |
INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \ |
INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \ |
INTEL_VGA_DEVICE(0x0C06, info), /* SDV GT1 mobile */ \ |
INTEL_VGA_DEVICE(0x0C16, info), /* SDV GT2 mobile */ \ |
INTEL_VGA_DEVICE(0x0C26, info), /* SDV GT3 mobile */ \ |
INTEL_VGA_DEVICE(0x0A06, info), /* ULT GT1 mobile */ \ |
INTEL_VGA_DEVICE(0x0A16, info), /* ULT GT2 mobile */ \ |
INTEL_VGA_DEVICE(0x0A26, info), /* ULT GT3 mobile */ \ |
INTEL_VGA_DEVICE(0x0A0E, info), /* ULT GT1 reserved */ \ |
INTEL_VGA_DEVICE(0x0A1E, info), /* ULT GT2 reserved */ \ |
INTEL_VGA_DEVICE(0x0A2E, info), /* ULT GT3 reserved */ \ |
INTEL_VGA_DEVICE(0x0D06, info), /* CRW GT1 mobile */ \ |
INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \ |
INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */ |
#define INTEL_VLV_M_IDS(info) \ |
INTEL_VGA_DEVICE(0x0f30, info), \ |
INTEL_VGA_DEVICE(0x0f31, info), \ |
INTEL_VGA_DEVICE(0x0f32, info), \ |
INTEL_VGA_DEVICE(0x0f33, info), \ |
INTEL_VGA_DEVICE(0x0157, info) |
#define INTEL_VLV_D_IDS(info) \ |
INTEL_VGA_DEVICE(0x0155, info) |
#endif /* _I915_PCIIDS_H */ |
/drivers/video/Intel-2D/intel_driver.h |
---|
9,180 → 9,52 |
#define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR |
#define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL |
#ifndef PCI_CHIP_I810 |
#define PCI_CHIP_I810 0x7121 |
#define PCI_CHIP_I810_DC100 0x7123 |
#define PCI_CHIP_I810_E 0x7125 |
#define PCI_CHIP_I815 0x1132 |
#define PCI_CHIP_I810_BRIDGE 0x7120 |
#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 |
#define PCI_CHIP_I810_E_BRIDGE 0x7124 |
#define PCI_CHIP_I815_BRIDGE 0x1130 |
#endif |
#ifndef PCI_CHIP_I830_M |
#define PCI_CHIP_I830_M 0x3577 |
#define PCI_CHIP_I830_M_BRIDGE 0x3575 |
#endif |
#ifndef PCI_CHIP_845_G |
#define PCI_CHIP_845_G 0x2562 |
#define PCI_CHIP_845_G_BRIDGE 0x2560 |
#endif |
#ifndef PCI_CHIP_I854 |
#define PCI_CHIP_I854 0x358E |
#define PCI_CHIP_I854_BRIDGE 0x358C |
#endif |
#ifndef PCI_CHIP_I855_GM |
#define PCI_CHIP_I855_GM 0x3582 |
#define PCI_CHIP_I855_GM_BRIDGE 0x3580 |
#endif |
#ifndef PCI_CHIP_I865_G |
#define PCI_CHIP_I865_G 0x2572 |
#define PCI_CHIP_I865_G_BRIDGE 0x2570 |
#endif |
#ifndef PCI_CHIP_I915_G |
#define PCI_CHIP_I915_G 0x2582 |
#define PCI_CHIP_I915_G_BRIDGE 0x2580 |
#endif |
#ifndef PCI_CHIP_I915_GM |
#define PCI_CHIP_I915_GM 0x2592 |
#define PCI_CHIP_I915_GM_BRIDGE 0x2590 |
#endif |
#ifndef PCI_CHIP_E7221_G |
#define PCI_CHIP_E7221_G 0x258A |
/* Same as I915_G_BRIDGE */ |
#define PCI_CHIP_E7221_G_BRIDGE 0x2580 |
#endif |
#ifndef PCI_CHIP_I945_G |
#define PCI_CHIP_I945_G 0x2772 |
#define PCI_CHIP_I945_G_BRIDGE 0x2770 |
#endif |
#ifndef PCI_CHIP_I945_GM |
#define PCI_CHIP_I945_GM 0x27A2 |
#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 |
#endif |
#ifndef PCI_CHIP_I945_GME |
#define PCI_CHIP_I945_GME 0x27AE |
#define PCI_CHIP_I945_GME_BRIDGE 0x27AC |
#endif |
#ifndef PCI_CHIP_PINEVIEW_M |
#define PCI_CHIP_PINEVIEW_M 0xA011 |
#define PCI_CHIP_PINEVIEW_M_BRIDGE 0xA010 |
#define PCI_CHIP_PINEVIEW_G 0xA001 |
#define PCI_CHIP_PINEVIEW_G_BRIDGE 0xA000 |
#endif |
#define PCI_CHIP_Q35_G 0x29B2 |
#define PCI_CHIP_G33_G 0x29C2 |
#define PCI_CHIP_Q33_G 0x29D2 |
#ifndef PCI_CHIP_G35_G |
#define PCI_CHIP_G35_G 0x2982 |
#define PCI_CHIP_G35_G_BRIDGE 0x2980 |
#endif |
#ifndef PCI_CHIP_I965_Q |
#define PCI_CHIP_I965_Q 0x2992 |
#define PCI_CHIP_I965_Q_BRIDGE 0x2990 |
#endif |
#ifndef PCI_CHIP_I965_G |
#define PCI_CHIP_I965_G 0x29A2 |
#define PCI_CHIP_I965_G_BRIDGE 0x29A0 |
#endif |
#ifndef PCI_CHIP_I946_GZ |
#define PCI_CHIP_I946_GZ 0x2972 |
#define PCI_CHIP_I946_GZ_BRIDGE 0x2970 |
#endif |
#ifndef PCI_CHIP_I965_GM |
#define PCI_CHIP_I965_GM 0x2A02 |
#define PCI_CHIP_I965_GM_BRIDGE 0x2A00 |
#endif |
#ifndef PCI_CHIP_I965_GME |
#define PCI_CHIP_I965_GME 0x2A12 |
#define PCI_CHIP_I965_GME_BRIDGE 0x2A10 |
#endif |
#ifndef PCI_CHIP_G33_G |
#define PCI_CHIP_G33_G 0x29C2 |
#define PCI_CHIP_G33_G_BRIDGE 0x29C0 |
#endif |
#ifndef PCI_CHIP_Q35_G |
#define PCI_CHIP_Q35_G 0x29B2 |
#define PCI_CHIP_Q35_G_BRIDGE 0x29B0 |
#endif |
#ifndef PCI_CHIP_Q33_G |
#define PCI_CHIP_Q33_G 0x29D2 |
#define PCI_CHIP_Q33_G_BRIDGE 0x29D0 |
#endif |
#ifndef PCI_CHIP_GM45_GM |
#define PCI_CHIP_GM45_GM 0x2A42 |
#define PCI_CHIP_GM45_BRIDGE 0x2A40 |
#endif |
#ifndef PCI_CHIP_G45_E_G |
#define PCI_CHIP_G45_E_G 0x2E02 |
#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00 |
#endif |
#ifndef PCI_CHIP_G45_G |
#define PCI_CHIP_G45_G 0x2E22 |
#define PCI_CHIP_G45_G_BRIDGE 0x2E20 |
#endif |
#ifndef PCI_CHIP_Q45_G |
#define PCI_CHIP_Q45_G 0x2E12 |
#define PCI_CHIP_Q45_G_BRIDGE 0x2E10 |
#endif |
#ifndef PCI_CHIP_G41_G |
#define PCI_CHIP_G41_G 0x2E32 |
#define PCI_CHIP_G41_G_BRIDGE 0x2E30 |
#endif |
#ifndef PCI_CHIP_B43_G |
#define PCI_CHIP_B43_G 0x2E42 |
#define PCI_CHIP_B43_G_BRIDGE 0x2E40 |
#endif |
#ifndef PCI_CHIP_B43_G1 |
#define PCI_CHIP_B43_G1 0x2E92 |
#define PCI_CHIP_B43_G1_BRIDGE 0x2E90 |
#endif |
#ifndef PCI_CHIP_IRONLAKE_D_G |
#define PCI_CHIP_IRONLAKE_D_G 0x0042 |
#define PCI_CHIP_IRONLAKE_D_G_BRIDGE 0x0040 |
#endif |
#ifndef PCI_CHIP_IRONLAKE_M_G |
#define PCI_CHIP_IRONLAKE_M_G 0x0046 |
#define PCI_CHIP_IRONLAKE_M_G_BRIDGE 0x0044 |
#endif |
#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE |
#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */ |
#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 |
#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 |
#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 |
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */ |
#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 |
#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 |
#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 |
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */ |
#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A |
#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 |
194,88 → 66,52 |
#define PCI_CHIP_HASWELL_D_GT1 0x0402 |
#define PCI_CHIP_HASWELL_D_GT2 0x0412 |
#define PCI_CHIP_HASWELL_D_GT2_PLUS 0x0422 |
#define PCI_CHIP_HASWELL_D_GT3 0x0422 |
#define PCI_CHIP_HASWELL_M_GT1 0x0406 |
#define PCI_CHIP_HASWELL_M_GT2 0x0416 |
#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426 |
#define PCI_CHIP_HASWELL_M_GT3 0x0426 |
#define PCI_CHIP_HASWELL_S_GT1 0x040A |
#define PCI_CHIP_HASWELL_S_GT2 0x041A |
#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A |
#define PCI_CHIP_HASWELL_SDV_D_GT1 0x0C02 |
#define PCI_CHIP_HASWELL_SDV_D_GT2 0x0C12 |
#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22 |
#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 |
#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 |
#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26 |
#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A |
#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A |
#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A |
#define PCI_CHIP_HASWELL_S_GT3 0x042A |
#define PCI_CHIP_HASWELL_B_GT1 0x040B |
#define PCI_CHIP_HASWELL_B_GT2 0x041B |
#define PCI_CHIP_HASWELL_B_GT3 0x042B |
#define PCI_CHIP_HASWELL_E_GT1 0x040E |
#define PCI_CHIP_HASWELL_E_GT2 0x041E |
#define PCI_CHIP_HASWELL_E_GT3 0x042E |
#define PCI_CHIP_HASWELL_ULT_D_GT1 0x0A02 |
#define PCI_CHIP_HASWELL_ULT_D_GT2 0x0A12 |
#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22 |
#define PCI_CHIP_HASWELL_ULT_D_GT3 0x0A22 |
#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 |
#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 |
#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26 |
#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 |
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A |
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A |
#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A |
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D12 |
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D22 |
#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32 |
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 |
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26 |
#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36 |
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A |
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A |
#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A |
#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A |
#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B |
#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B |
#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B |
#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E |
#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E |
#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E |
#define PCI_CHIP_VALLEYVIEW_PO 0x0f30 |
#define PCI_CHIP_VALLEYVIEW_1 0x0f31 |
#define PCI_CHIP_VALLEYVIEW_2 0x0f32 |
#define PCI_CHIP_VALLEYVIEW_3 0x0f33 |
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D02 |
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D12 |
#define PCI_CHIP_HASWELL_CRW_D_GT3 0x0D22 |
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 |
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 |
#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 |
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A |
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A |
#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A |
#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B |
#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B |
#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B |
#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E |
#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E |
#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E |
#endif |
#define I85X_CAPID 0x44 |
#define I85X_VARIANT_MASK 0x7 |
#define I85X_VARIANT_SHIFT 5 |
#define I855_GME 0x0 |
#define I855_GM 0x4 |
#define I852_GME 0x2 |
#define I852_GM 0x5 |
#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr |
#define VENDOR_ID(p) (p)->vendor_id |
#define DEVICE_ID(p) (p)->device_id |
#define SUBVENDOR_ID(p) (p)->subvendor_id |
#define SUBSYS_ID(p) (p)->subdevice_id |
#define CHIP_REVISION(p) (p)->revision |
#define INTEL_INFO(intel) ((intel)->info) |
#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1)) |
#define IS_GEN1(intel) IS_GENx(intel, 1) |
#define IS_GEN2(intel) IS_GENx(intel, 2) |
#define IS_GEN3(intel) IS_GENx(intel, 3) |
#define IS_GEN4(intel) IS_GENx(intel, 4) |
#define IS_GEN5(intel) IS_GENx(intel, 5) |
#define IS_GEN6(intel) IS_GENx(intel, 6) |
#define IS_GEN7(intel) IS_GENx(intel, 7) |
#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075) |
/* Some chips have specific errata (or limits) that we need to workaround. */ |
#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M) |
#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G) |
#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G) |
#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G) |
#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM) |
#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q) |
/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */ |
#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040) |
#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060) |
struct intel_device_info { |
int gen; |
}; |
/drivers/video/Intel-2D/intel_list.h |
---|
107,6 → 107,7 |
* There are no requirements for a list head, any struct list can be a list |
* head. |
*/ |
struct list { |
struct list *next, *prev; |
}; |
/drivers/video/Intel-2D/kgem.c |
---|
0,0 → 1,5410 |
/* |
* Copyright (c) 2011 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include "sna.h" |
#include "sna_reg.h" |
#include <time.h> |
#include <errno.h> |
#include <fcntl.h> |
#ifdef HAVE_VALGRIND |
#include <valgrind.h> |
#include <memcheck.h> |
#endif |
#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM |
#include <sys/sysinfo.h> |
#endif |
#include "sna_cpuid.h" |
static struct kgem_bo * |
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); |
static struct kgem_bo * |
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); |
#define DBG_NO_HW 0 |
#define DBG_NO_TILING 0 |
#define DBG_NO_CACHE 0 |
#define DBG_NO_CACHE_LEVEL 0 |
#define DBG_NO_CPU 0 |
#define DBG_NO_CREATE2 1 |
#define DBG_NO_USERPTR 0 |
#define DBG_NO_UNSYNCHRONIZED_USERPTR 0 |
#define DBG_NO_LLC 0 |
#define DBG_NO_SEMAPHORES 0 |
#define DBG_NO_MADV 1 |
#define DBG_NO_UPLOAD_CACHE 0 |
#define DBG_NO_UPLOAD_ACTIVE 0 |
#define DBG_NO_MAP_UPLOAD 0 |
#define DBG_NO_RELAXED_FENCING 0 |
#define DBG_NO_SECURE_BATCHES 0 |
#define DBG_NO_PINNED_BATCHES 0 |
#define DBG_NO_FAST_RELOC 0 |
#define DBG_NO_HANDLE_LUT 1 |
#define DBG_NO_WT 0 |
#define DBG_DUMP 0 |
#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */ |
#ifndef DEBUG_SYNC |
#define DEBUG_SYNC 0 |
#endif |
#if 0 |
#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) |
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) |
#else |
#define ASSERT_IDLE(kgem__, handle__) |
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) |
#endif |
/* Worst case seems to be 965gm where we cannot write within a cacheline that |
* is being simultaneously being read by the GPU, or within the sampler |
* prefetch. In general, the chipsets seem to have a requirement that sampler |
* offsets be aligned to a cacheline (64 bytes). |
*/ |
#define UPLOAD_ALIGNMENT 128 |
#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) |
#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE) |
#define MAX_GTT_VMA_CACHE 512 |
#define MAX_CPU_VMA_CACHE INT16_MAX |
#define MAP_PRESERVE_TIME 10 |
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) |
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3)) |
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) |
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3) |
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) |
#define LOCAL_I915_PARAM_HAS_BLT 11 |
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 |
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 |
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 |
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 |
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 |
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25 |
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 |
#define LOCAL_I915_PARAM_HAS_WT 27 |
#define LOCAL_I915_EXEC_IS_PINNED (1<<10) |
#define LOCAL_I915_EXEC_NO_RELOC (1<<11) |
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) |
struct local_i915_gem_userptr { |
uint64_t user_ptr; |
uint64_t user_size; |
uint32_t flags; |
#define I915_USERPTR_READ_ONLY (1<<0) |
#define I915_USERPTR_UNSYNCHRONIZED (1<<31) |
uint32_t handle; |
}; |
#define UNCACHED 0 |
#define SNOOPED 1 |
#define DISPLAY 2 |
struct local_i915_gem_caching { |
uint32_t handle; |
uint32_t caching; |
}; |
#define LOCAL_IOCTL_I915_GEM_SET_CACHING SRV_I915_GEM_SET_CACHING |
struct local_fbinfo { |
int width; |
int height; |
int pitch; |
int tiling; |
}; |
struct kgem_buffer { |
struct kgem_bo base; |
void *mem; |
uint32_t used; |
uint32_t need_io : 1; |
uint32_t write : 2; |
uint32_t mmapped : 1; |
}; |
static struct kgem_bo *__kgem_freed_bo; |
static struct kgem_request *__kgem_freed_request; |
static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; |
static inline int bytes(struct kgem_bo *bo) |
{ |
return __kgem_bo_size(bo); |
} |
#define bucket(B) (B)->size.pages.bucket |
#define num_pages(B) (B)->size.pages.count |
#ifdef DEBUG_MEMORY |
static void debug_alloc(struct kgem *kgem, size_t size) |
{ |
kgem->debug_memory.bo_allocs++; |
kgem->debug_memory.bo_bytes += size; |
} |
static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo) |
{ |
debug_alloc(kgem, bytes(bo)); |
} |
#else |
#define debug_alloc(k, b) |
#define debug_alloc__bo(k, b) |
#endif |
#ifndef NDEBUG |
static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) |
{ |
struct drm_i915_gem_get_tiling tiling; |
assert(bo); |
VG_CLEAR(tiling); |
tiling.handle = bo->handle; |
tiling.tiling_mode = -1; |
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); |
assert(tiling.tiling_mode == bo->tiling); |
} |
#else |
#define assert_tiling(kgem, bo) |
#endif |
static void kgem_sna_reset(struct kgem *kgem) |
{ |
struct sna *sna = container_of(kgem, struct sna, kgem); |
sna->render.reset(sna); |
sna->blt_state.fill_bo = 0; |
} |
static void kgem_sna_flush(struct kgem *kgem) |
{ |
struct sna *sna = container_of(kgem, struct sna, kgem); |
sna->render.flush(sna); |
// if (sna->render.solid_cache.dirty) |
// sna_render_flush_solid(sna); |
} |
static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) |
{ |
struct drm_i915_gem_set_tiling set_tiling; |
int ret; |
if (DBG_NO_TILING) |
return false; |
VG_CLEAR(set_tiling); |
do { |
set_tiling.handle = handle; |
set_tiling.tiling_mode = tiling; |
set_tiling.stride = stride; |
ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); |
} while (ret != 0); |
return ret == 0; |
} |
static bool gem_set_caching(int fd, uint32_t handle, int caching) |
{ |
struct local_i915_gem_caching arg; |
VG_CLEAR(arg); |
arg.handle = handle; |
arg.caching = caching; |
return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0; |
} |
static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags) |
{ |
if (flags & CREATE_NO_RETIRE) { |
DBG(("%s: not retiring per-request\n", __FUNCTION__)); |
return false; |
} |
if (!kgem->need_retire) { |
DBG(("%s: nothing to retire\n", __FUNCTION__)); |
return false; |
} |
if (kgem_retire(kgem)) |
return true; |
if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) { |
DBG(("%s: not throttling\n", __FUNCTION__)); |
return false; |
} |
kgem_throttle(kgem); |
return kgem_retire(kgem); |
} |
static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) |
{ |
struct drm_i915_gem_mmap_gtt mmap_arg; |
void *ptr; |
DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, |
bo->handle, bytes(bo))); |
assert(bo->proxy == NULL); |
assert(!bo->snoop); |
assert(kgem_bo_can_map(kgem, bo)); |
retry_gtt: |
VG_CLEAR(mmap_arg); |
mmap_arg.handle = bo->handle; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) { |
(void)__kgem_throttle_retire(kgem, 0); |
if (kgem_expire_cache(kgem)) |
goto retry_gtt; |
if (kgem->need_expire) { |
kgem_cleanup_cache(kgem); |
goto retry_gtt; |
} |
printf("%s: failed to retrieve GTT offset for handle=%d\n", |
__FUNCTION__, bo->handle); |
return NULL; |
} |
retry_mmap: |
ptr = (void*)(int)mmap_arg.offset; |
if (ptr == NULL) { |
ErrorF("%s: failed to mmap handle=%d, %d bytes, into GTT domain\n", |
__FUNCTION__, bo->handle, bytes(bo)); |
ptr = NULL; |
} |
return ptr; |
} |
static int __gem_write(int fd, uint32_t handle, |
int offset, int length, |
const void *src) |
{ |
struct drm_i915_gem_pwrite pwrite; |
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, |
handle, offset, length)); |
VG_CLEAR(pwrite); |
pwrite.handle = handle; |
pwrite.offset = offset; |
pwrite.size = length; |
pwrite.data_ptr = (uintptr_t)src; |
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); |
} |
static int gem_write(int fd, uint32_t handle, |
int offset, int length, |
const void *src) |
{ |
struct drm_i915_gem_pwrite pwrite; |
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, |
handle, offset, length)); |
VG_CLEAR(pwrite); |
pwrite.handle = handle; |
/* align the transfer to cachelines; fortuitously this is safe! */ |
if ((offset | length) & 63) { |
pwrite.offset = offset & ~63; |
pwrite.size = ALIGN(offset+length, 64) - pwrite.offset; |
pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset; |
} else { |
pwrite.offset = offset; |
pwrite.size = length; |
pwrite.data_ptr = (uintptr_t)src; |
} |
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); |
} |
bool __kgem_busy(struct kgem *kgem, int handle) |
{ |
struct drm_i915_gem_busy busy; |
VG_CLEAR(busy); |
busy.handle = handle; |
busy.busy = !kgem->wedged; |
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); |
DBG(("%s: handle=%d, busy=%d, wedged=%d\n", |
__FUNCTION__, handle, busy.busy, kgem->wedged)); |
return busy.busy; |
} |
static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo) |
{ |
DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n", |
__FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL, |
__kgem_busy(kgem, bo->handle))); |
assert(bo->exec == NULL); |
assert(list_is_empty(&bo->vma)); |
if (bo->rq) { |
if (!__kgem_busy(kgem, bo->handle)) { |
__kgem_bo_clear_busy(bo); |
kgem_retire(kgem); |
} |
} else { |
assert(!bo->needs_flush); |
ASSERT_IDLE(kgem, bo->handle); |
} |
} |
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, |
const void *data, int length) |
{ |
assert(bo->refcnt); |
assert(!bo->purged); |
assert(bo->proxy == NULL); |
ASSERT_IDLE(kgem, bo->handle); |
assert(length <= bytes(bo)); |
if (gem_write(kgem->fd, bo->handle, 0, length, data)) |
return false; |
DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain)); |
if (bo->exec == NULL) { |
kgem_bo_retire(kgem, bo); |
bo->domain = DOMAIN_NONE; |
} |
bo->gtt_dirty = true; |
return true; |
} |
static uint32_t gem_create(int fd, int num_pages) |
{ |
struct drm_i915_gem_create create; |
VG_CLEAR(create); |
create.handle = 0; |
create.size = PAGE_SIZE * num_pages; |
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); |
return create.handle; |
} |
static bool |
kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo) |
{ |
#if DBG_NO_MADV |
return true; |
#else |
struct drm_i915_gem_madvise madv; |
assert(bo->exec == NULL); |
assert(!bo->purged); |
VG_CLEAR(madv); |
madv.handle = bo->handle; |
madv.madv = I915_MADV_DONTNEED; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { |
bo->purged = 1; |
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; |
return madv.retained; |
} |
return true; |
#endif |
} |
static bool |
kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo) |
{ |
#if DBG_NO_MADV |
return true; |
#else |
struct drm_i915_gem_madvise madv; |
if (!bo->purged) |
return true; |
VG_CLEAR(madv); |
madv.handle = bo->handle; |
madv.madv = I915_MADV_DONTNEED; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) |
return madv.retained; |
return false; |
#endif |
} |
static bool |
kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) |
{ |
#if DBG_NO_MADV |
return true; |
#else |
struct drm_i915_gem_madvise madv; |
assert(bo->purged); |
VG_CLEAR(madv); |
madv.handle = bo->handle; |
madv.madv = I915_MADV_WILLNEED; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { |
bo->purged = !madv.retained; |
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU; |
return madv.retained; |
} |
return false; |
#endif |
} |
static void gem_close(int fd, uint32_t handle) |
{ |
struct drm_gem_close close; |
VG_CLEAR(close); |
close.handle = handle; |
(void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); |
} |
constant inline static unsigned long __fls(unsigned long word) |
{ |
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__)) |
asm("bsr %1,%0" |
: "=r" (word) |
: "rm" (word)); |
return word; |
#else |
unsigned int v = 0; |
while (word >>= 1) |
v++; |
return v; |
#endif |
} |
constant inline static int cache_bucket(int num_pages) |
{ |
return __fls(num_pages); |
} |
static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, |
int handle, int num_pages) |
{ |
assert(num_pages); |
memset(bo, 0, sizeof(*bo)); |
bo->refcnt = 1; |
bo->handle = handle; |
bo->target_handle = -1; |
num_pages(bo) = num_pages; |
bucket(bo) = cache_bucket(num_pages); |
bo->reusable = true; |
bo->domain = DOMAIN_CPU; |
list_init(&bo->request); |
list_init(&bo->list); |
list_init(&bo->vma); |
return bo; |
} |
static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) |
{ |
struct kgem_bo *bo; |
if (__kgem_freed_bo) { |
bo = __kgem_freed_bo; |
__kgem_freed_bo = *(struct kgem_bo **)bo; |
} else { |
bo = malloc(sizeof(*bo)); |
if (bo == NULL) |
return NULL; |
} |
return __kgem_bo_init(bo, handle, num_pages); |
} |
static struct kgem_request *__kgem_request_alloc(struct kgem *kgem) |
{ |
struct kgem_request *rq; |
rq = __kgem_freed_request; |
if (rq) { |
__kgem_freed_request = *(struct kgem_request **)rq; |
} else { |
rq = malloc(sizeof(*rq)); |
if (rq == NULL) |
rq = &kgem->static_request; |
} |
list_init(&rq->buffers); |
rq->bo = NULL; |
rq->ring = 0; |
return rq; |
} |
static void __kgem_request_free(struct kgem_request *rq) |
{ |
_list_del(&rq->list); |
*(struct kgem_request **)rq = __kgem_freed_request; |
__kgem_freed_request = rq; |
} |
static struct list *inactive(struct kgem *kgem, int num_pages) |
{ |
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); |
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); |
return &kgem->inactive[cache_bucket(num_pages)]; |
} |
static struct list *active(struct kgem *kgem, int num_pages, int tiling) |
{ |
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE); |
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS); |
return &kgem->active[cache_bucket(num_pages)][tiling]; |
} |
static size_t |
agp_aperture_size(struct pci_device *dev, unsigned gen) |
{ |
/* XXX assume that only future chipsets are unknown and follow |
* the post gen2 PCI layout. |
*/ |
return 0; |
} |
static size_t |
total_ram_size(void) |
{ |
uint32_t data[9]; |
size_t size = 0; |
asm volatile("int $0x40" |
: "=a" (size) |
: "a" (18),"b"(20), "c" (data) |
: "memory"); |
return size != -1 ? size : 0; |
} |
static unsigned |
cpu_cache_size__cpuid4(void) |
{ |
/* Deterministic Cache Parmaeters (Function 04h)": |
* When EAX is initialized to a value of 4, the CPUID instruction |
* returns deterministic cache information in the EAX, EBX, ECX |
* and EDX registers. This function requires ECX be initialized |
* with an index which indicates which cache to return information |
* about. The OS is expected to call this function (CPUID.4) with |
* ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches. |
* The order in which the caches are returned is not specified |
* and may change at Intel's discretion. |
* |
* Calculating the Cache Size in bytes: |
* = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1) |
*/ |
unsigned int eax, ebx, ecx, edx; |
unsigned int llc_size = 0; |
int cnt = 0; |
if (__get_cpuid_max(BASIC_CPUID, NULL) < 4) |
return 0; |
do { |
unsigned associativity, line_partitions, line_size, sets; |
__cpuid_count(4, cnt++, eax, ebx, ecx, edx); |
if ((eax & 0x1f) == 0) |
break; |
associativity = ((ebx >> 22) & 0x3ff) + 1; |
line_partitions = ((ebx >> 12) & 0x3ff) + 1; |
line_size = (ebx & 0xfff) + 1; |
sets = ecx + 1; |
llc_size = associativity * line_partitions * line_size * sets; |
} while (1); |
return llc_size; |
} |
static int gem_param(struct kgem *kgem, int name) |
{ |
drm_i915_getparam_t gp; |
int v = -1; /* No param uses the sign bit, reserve it for errors */ |
VG_CLEAR(gp); |
gp.param = name; |
gp.value = &v; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp)) |
return -1; |
VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v))); |
return v; |
} |
static bool test_has_execbuffer2(struct kgem *kgem) |
{ |
return 1; |
} |
static bool test_has_no_reloc(struct kgem *kgem) |
{ |
if (DBG_NO_FAST_RELOC) |
return false; |
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; |
} |
static bool test_has_handle_lut(struct kgem *kgem) |
{ |
if (DBG_NO_HANDLE_LUT) |
return false; |
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; |
} |
static bool test_has_wt(struct kgem *kgem) |
{ |
if (DBG_NO_WT) |
return false; |
return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0; |
} |
static bool test_has_semaphores_enabled(struct kgem *kgem) |
{ |
bool detected = false; |
int ret; |
if (DBG_NO_SEMAPHORES) |
return false; |
ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES); |
if (ret != -1) |
return ret > 0; |
return detected; |
} |
static bool __kgem_throttle(struct kgem *kgem) |
{ |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) |
return false; |
return errno == EIO; |
} |
static bool is_hw_supported(struct kgem *kgem, |
struct pci_device *dev) |
{ |
if (DBG_NO_HW) |
return false; |
if (!test_has_execbuffer2(kgem)) |
return false; |
if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */ |
return kgem->has_blt; |
/* Although pre-855gm the GMCH is fubar, it works mostly. So |
* let the user decide through "NoAccel" whether or not to risk |
* hw acceleration. |
*/ |
if (kgem->gen == 060 && dev->revision < 8) { |
/* pre-production SNB with dysfunctional BLT */ |
return false; |
} |
if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */ |
return kgem->has_blt; |
return true; |
} |
static bool test_has_relaxed_fencing(struct kgem *kgem) |
{ |
if (kgem->gen < 040) { |
if (DBG_NO_RELAXED_FENCING) |
return false; |
return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0; |
} else |
return true; |
} |
static bool test_has_llc(struct kgem *kgem) |
{ |
int has_llc = -1; |
if (DBG_NO_LLC) |
return false; |
#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */ |
has_llc = gem_param(kgem, I915_PARAM_HAS_LLC); |
#endif |
if (has_llc == -1) { |
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); |
has_llc = kgem->gen >= 060; |
} |
return has_llc; |
} |
static bool test_has_caching(struct kgem *kgem) |
{ |
uint32_t handle; |
bool ret; |
if (DBG_NO_CACHE_LEVEL) |
return false; |
/* Incoherent blt and sampler hangs the GPU */ |
if (kgem->gen == 040) |
return false; |
handle = gem_create(kgem->fd, 1); |
if (handle == 0) |
return false; |
ret = gem_set_caching(kgem->fd, handle, UNCACHED); |
gem_close(kgem->fd, handle); |
return ret; |
} |
static bool test_has_userptr(struct kgem *kgem) |
{ |
#if defined(USE_USERPTR) |
uint32_t handle; |
void *ptr; |
if (DBG_NO_USERPTR) |
return false; |
/* Incoherent blt and sampler hangs the GPU */ |
if (kgem->gen == 040) |
return false; |
if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE)) |
return false; |
handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); |
gem_close(kgem->fd, handle); |
free(ptr); |
return handle != 0; |
#else |
return false; |
#endif |
} |
static bool test_has_create2(struct kgem *kgem) |
{ |
#if defined(USE_CREATE2) |
struct local_i915_gem_create2 args; |
if (DBG_NO_CREATE2) |
return false; |
memset(&args, 0, sizeof(args)); |
args.size = PAGE_SIZE; |
args.caching = DISPLAY; |
if (drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0) |
gem_close(kgem->fd, args.handle); |
return args.handle != 0; |
#else |
return false; |
#endif |
} |
static bool test_has_secure_batches(struct kgem *kgem) |
{ |
if (DBG_NO_SECURE_BATCHES) |
return false; |
return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0; |
} |
static bool test_has_pinned_batches(struct kgem *kgem) |
{ |
if (DBG_NO_PINNED_BATCHES) |
return false; |
return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0; |
} |
static bool kgem_init_pinned_batches(struct kgem *kgem) |
{ |
int count[2] = { 2, 1 }; |
int size[2] = { 1, 2 }; |
int n, i; |
if (kgem->wedged) |
return true; |
for (n = 0; n < ARRAY_SIZE(count); n++) { |
for (i = 0; i < count[n]; i++) { |
struct drm_i915_gem_pin pin; |
struct kgem_bo *bo; |
VG_CLEAR(pin); |
pin.handle = gem_create(kgem->fd, size[n]); |
if (pin.handle == 0) |
goto err; |
DBG(("%s: new handle=%d, num_pages=%d\n", |
__FUNCTION__, pin.handle, size[n])); |
bo = __kgem_bo_alloc(pin.handle, size[n]); |
if (bo == NULL) { |
gem_close(kgem->fd, pin.handle); |
goto err; |
} |
pin.alignment = 0; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { |
gem_close(kgem->fd, pin.handle); |
goto err; |
} |
bo->presumed_offset = pin.offset; |
debug_alloc__bo(kgem, bo); |
list_add(&bo->list, &kgem->pinned_batches[n]); |
} |
} |
return true; |
err: |
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) { |
while (!list_is_empty(&kgem->pinned_batches[n])) { |
kgem_bo_destroy(kgem, |
list_first_entry(&kgem->pinned_batches[n], |
struct kgem_bo, list)); |
} |
} |
/* For simplicity populate the lists with a single unpinned bo */ |
for (n = 0; n < ARRAY_SIZE(count); n++) { |
struct kgem_bo *bo; |
uint32_t handle; |
handle = gem_create(kgem->fd, size[n]); |
if (handle == 0) |
break; |
bo = __kgem_bo_alloc(handle, size[n]); |
if (bo == NULL) { |
gem_close(kgem->fd, handle); |
break; |
} |
debug_alloc__bo(kgem, bo); |
list_add(&bo->list, &kgem->pinned_batches[n]); |
} |
return false; |
} |
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) |
{ |
struct drm_i915_gem_get_aperture aperture; |
size_t totalram; |
unsigned half_gpu_max; |
unsigned int i, j; |
DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen)); |
memset(kgem, 0, sizeof(*kgem)); |
kgem->fd = fd; |
kgem->gen = gen; |
list_init(&kgem->requests[0]); |
list_init(&kgem->requests[1]); |
list_init(&kgem->batch_buffers); |
list_init(&kgem->active_buffers); |
list_init(&kgem->flushing); |
list_init(&kgem->large); |
list_init(&kgem->large_inactive); |
list_init(&kgem->snoop); |
list_init(&kgem->scanout); |
for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++) |
list_init(&kgem->pinned_batches[i]); |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) |
list_init(&kgem->inactive[i]); |
for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { |
for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++) |
list_init(&kgem->active[i][j]); |
} |
for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { |
for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) |
list_init(&kgem->vma[i].inactive[j]); |
} |
kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; |
kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; |
kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0; |
DBG(("%s: has BLT ring? %d\n", __FUNCTION__, |
kgem->has_blt)); |
kgem->has_relaxed_delta = |
gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0; |
DBG(("%s: has relaxed delta? %d\n", __FUNCTION__, |
kgem->has_relaxed_delta)); |
kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem); |
DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__, |
kgem->has_relaxed_fencing)); |
kgem->has_llc = test_has_llc(kgem); |
DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, |
kgem->has_llc)); |
kgem->has_wt = test_has_wt(kgem); |
DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__, |
kgem->has_wt)); |
kgem->has_caching = test_has_caching(kgem); |
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, |
kgem->has_caching)); |
kgem->has_userptr = test_has_userptr(kgem); |
DBG(("%s: has userptr? %d\n", __FUNCTION__, |
kgem->has_userptr)); |
kgem->has_create2 = test_has_create2(kgem); |
kgem->has_create2 = 0; |
DBG(("%s: has create2? %d\n", __FUNCTION__, |
kgem->has_create2)); |
kgem->has_no_reloc = test_has_no_reloc(kgem); |
DBG(("%s: has no-reloc? %d\n", __FUNCTION__, |
kgem->has_no_reloc)); |
kgem->has_handle_lut = test_has_handle_lut(kgem); |
kgem->has_handle_lut = 0; |
DBG(("%s: has handle-lut? %d\n", __FUNCTION__, |
kgem->has_handle_lut)); |
kgem->has_semaphores = false; |
if (kgem->has_blt && test_has_semaphores_enabled(kgem)) |
kgem->has_semaphores = true; |
DBG(("%s: semaphores enabled? %d\n", __FUNCTION__, |
kgem->has_semaphores)); |
kgem->can_blt_cpu = gen >= 030; |
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, |
kgem->can_blt_cpu)); |
kgem->has_secure_batches = test_has_secure_batches(kgem); |
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, |
kgem->has_secure_batches)); |
kgem->has_pinned_batches = test_has_pinned_batches(kgem); |
DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__, |
kgem->has_pinned_batches)); |
if (!is_hw_supported(kgem, dev)) { |
printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n"); |
kgem->wedged = 1; |
} else if (__kgem_throttle(kgem)) { |
printf("Detected a hung GPU, disabling acceleration.\n"); |
kgem->wedged = 1; |
} |
kgem->batch_size = ARRAY_SIZE(kgem->batch); |
if (gen == 020 && !kgem->has_pinned_batches) |
/* Limited to what we can pin */ |
kgem->batch_size = 4*1024; |
if (gen == 022) |
/* 865g cannot handle a batch spanning multiple pages */ |
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t); |
if ((gen >> 3) == 7) |
kgem->batch_size = 16*1024; |
if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024) |
kgem->batch_size = 4*1024; |
if (!kgem_init_pinned_batches(kgem) && gen == 020) { |
printf("Unable to reserve memory for GPU, disabling acceleration.\n"); |
kgem->wedged = 1; |
} |
DBG(("%s: maximum batch size? %d\n", __FUNCTION__, |
kgem->batch_size)); |
kgem->min_alignment = 4; |
if (gen < 040) |
kgem->min_alignment = 64; |
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; |
DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n", |
__FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages)); |
kgem->next_request = __kgem_request_alloc(kgem); |
DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, |
!DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching), |
kgem->has_llc, kgem->has_caching, kgem->has_userptr)); |
VG_CLEAR(aperture); |
aperture.aper_size = 0; |
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); |
if (aperture.aper_size == 0) |
aperture.aper_size = 64*1024*1024; |
DBG(("%s: aperture size %lld, available now %lld\n", |
__FUNCTION__, |
(long long)aperture.aper_size, |
(long long)aperture.aper_available_size)); |
kgem->aperture_total = aperture.aper_size; |
kgem->aperture_high = aperture.aper_size * 3/4; |
kgem->aperture_low = aperture.aper_size * 1/3; |
if (gen < 033) { |
/* Severe alignment penalties */ |
kgem->aperture_high /= 2; |
kgem->aperture_low /= 2; |
} |
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, |
kgem->aperture_low, kgem->aperture_low / (1024*1024), |
kgem->aperture_high, kgem->aperture_high / (1024*1024))); |
kgem->aperture_mappable = agp_aperture_size(dev, gen); |
if (kgem->aperture_mappable == 0 || |
kgem->aperture_mappable > aperture.aper_size) |
kgem->aperture_mappable = aperture.aper_size; |
DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__, |
kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024))); |
kgem->buffer_size = 64 * 1024; |
while (kgem->buffer_size < kgem->aperture_mappable >> 10) |
kgem->buffer_size *= 2; |
if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) |
kgem->buffer_size = kgem->half_cpu_cache_pages << 12; |
kgem->buffer_size = 1 << __fls(kgem->buffer_size); |
DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, |
kgem->buffer_size, kgem->buffer_size / 1024)); |
assert(kgem->buffer_size); |
kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; |
kgem->max_gpu_size = kgem->max_object_size; |
if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE) |
kgem->max_gpu_size = MAX_CACHE_SIZE; |
totalram = total_ram_size(); |
if (totalram == 0) { |
DBG(("%s: total ram size unknown, assuming maximum of total aperture\n", |
__FUNCTION__)); |
totalram = kgem->aperture_total; |
} |
DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); |
if (kgem->max_object_size > totalram / 2) |
kgem->max_object_size = totalram / 2; |
if (kgem->max_gpu_size > totalram / 4) |
kgem->max_gpu_size = totalram / 4; |
kgem->max_cpu_size = kgem->max_object_size; |
half_gpu_max = kgem->max_gpu_size / 2; |
kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2; |
if (kgem->max_copy_tile_size > half_gpu_max) |
kgem->max_copy_tile_size = half_gpu_max; |
if (kgem->has_llc) |
kgem->max_upload_tile_size = kgem->max_copy_tile_size; |
else |
kgem->max_upload_tile_size = kgem->aperture_mappable / 4; |
if (kgem->max_upload_tile_size > half_gpu_max) |
kgem->max_upload_tile_size = half_gpu_max; |
if (kgem->max_upload_tile_size > kgem->aperture_high/2) |
kgem->max_upload_tile_size = kgem->aperture_high/2; |
if (kgem->max_upload_tile_size > kgem->aperture_low) |
kgem->max_upload_tile_size = kgem->aperture_low; |
if (kgem->max_upload_tile_size < 16*PAGE_SIZE) |
kgem->max_upload_tile_size = 16*PAGE_SIZE; |
kgem->large_object_size = MAX_CACHE_SIZE; |
if (kgem->large_object_size > half_gpu_max) |
kgem->large_object_size = half_gpu_max; |
if (kgem->max_copy_tile_size > kgem->aperture_high/2) |
kgem->max_copy_tile_size = kgem->aperture_high/2; |
if (kgem->max_copy_tile_size > kgem->aperture_low) |
kgem->max_copy_tile_size = kgem->aperture_low; |
if (kgem->max_copy_tile_size < 16*PAGE_SIZE) |
kgem->max_copy_tile_size = 16*PAGE_SIZE; |
if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) { |
if (kgem->large_object_size > kgem->max_cpu_size) |
kgem->large_object_size = kgem->max_cpu_size; |
} else |
kgem->max_cpu_size = 0; |
if (DBG_NO_CPU) |
kgem->max_cpu_size = 0; |
DBG(("%s: maximum object size=%d\n", |
__FUNCTION__, kgem->max_object_size)); |
DBG(("%s: large object thresold=%d\n", |
__FUNCTION__, kgem->large_object_size)); |
DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n", |
__FUNCTION__, |
kgem->max_gpu_size, kgem->max_cpu_size, |
kgem->max_upload_tile_size, kgem->max_copy_tile_size)); |
/* Convert the aperture thresholds to pages */ |
kgem->aperture_low /= PAGE_SIZE; |
kgem->aperture_high /= PAGE_SIZE; |
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; |
if ((int)kgem->fence_max < 0) |
kgem->fence_max = 5; /* minimum safe value for all hw */ |
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); |
kgem->batch_flags_base = 0; |
if (kgem->has_no_reloc) |
kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC; |
if (kgem->has_handle_lut) |
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; |
if (kgem->has_pinned_batches) |
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; |
} |
/* XXX hopefully a good approximation */ |
uint32_t kgem_get_unique_id(struct kgem *kgem) |
{ |
uint32_t id; |
id = ++kgem->unique_id; |
if (id == 0) |
id = ++kgem->unique_id; |
return id; |
} |
inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) |
{ |
if (flags & CREATE_PRIME) |
return 256; |
if (flags & CREATE_SCANOUT) |
return 64; |
return kgem->min_alignment; |
} |
void kgem_get_tile_size(struct kgem *kgem, int tiling, |
int *tile_width, int *tile_height, int *tile_size) |
{ |
if (kgem->gen <= 030) { |
if (tiling) { |
if (kgem->gen < 030) { |
*tile_width = 128; |
*tile_height = 16; |
*tile_size = 2048; |
} else { |
*tile_width = 512; |
*tile_height = 8; |
*tile_size = 4096; |
} |
} else { |
*tile_width = 1; |
*tile_height = 1; |
*tile_size = 1; |
} |
} else switch (tiling) { |
default: |
case I915_TILING_NONE: |
*tile_width = 1; |
*tile_height = 1; |
*tile_size = 1; |
break; |
case I915_TILING_X: |
*tile_width = 512; |
*tile_height = 8; |
*tile_size = 4096; |
break; |
case I915_TILING_Y: |
*tile_width = 128; |
*tile_height = 32; |
*tile_size = 4096; |
break; |
} |
} |
uint32_t kgem_surface_size(struct kgem *kgem, |
bool relaxed_fencing, |
unsigned flags, |
uint32_t width, |
uint32_t height, |
uint32_t bpp, |
uint32_t tiling, |
uint32_t *pitch) |
{ |
uint32_t tile_width, tile_height; |
uint32_t size; |
assert(width <= MAXSHORT); |
assert(height <= MAXSHORT); |
assert(bpp >= 8); |
if (kgem->gen <= 030) { |
if (tiling) { |
if (kgem->gen < 030) { |
tile_width = 128; |
tile_height = 32; |
} else { |
tile_width = 512; |
tile_height = 16; |
} |
} else { |
tile_width = 2 * bpp >> 3; |
tile_width = ALIGN(tile_width, |
kgem_pitch_alignment(kgem, flags)); |
tile_height = 2; |
} |
} else switch (tiling) { |
default: |
case I915_TILING_NONE: |
tile_width = 2 * bpp >> 3; |
tile_width = ALIGN(tile_width, |
kgem_pitch_alignment(kgem, flags)); |
tile_height = 2; |
break; |
/* XXX align to an even tile row */ |
case I915_TILING_X: |
tile_width = 512; |
tile_height = 16; |
break; |
case I915_TILING_Y: |
tile_width = 128; |
tile_height = 64; |
break; |
} |
*pitch = ALIGN(width * bpp / 8, tile_width); |
height = ALIGN(height, tile_height); |
if (kgem->gen >= 040) |
return PAGE_ALIGN(*pitch * height); |
/* If it is too wide for the blitter, don't even bother. */ |
if (tiling != I915_TILING_NONE) { |
if (*pitch > 8192) |
return 0; |
for (size = tile_width; size < *pitch; size <<= 1) |
; |
*pitch = size; |
} else { |
if (*pitch >= 32768) |
return 0; |
} |
size = *pitch * height; |
if (relaxed_fencing || tiling == I915_TILING_NONE) |
return PAGE_ALIGN(size); |
/* We need to allocate a pot fence region for a tiled buffer. */ |
if (kgem->gen < 030) |
tile_width = 512 * 1024; |
else |
tile_width = 1024 * 1024; |
while (tile_width < size) |
tile_width *= 2; |
return tile_width; |
} |
static uint32_t kgem_aligned_height(struct kgem *kgem, |
uint32_t height, uint32_t tiling) |
{ |
uint32_t tile_height; |
if (kgem->gen <= 030) { |
tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1; |
} else switch (tiling) { |
/* XXX align to an even tile row */ |
default: |
case I915_TILING_NONE: |
tile_height = 1; |
break; |
case I915_TILING_X: |
tile_height = 16; |
break; |
case I915_TILING_Y: |
tile_height = 64; |
break; |
} |
return ALIGN(height, tile_height); |
} |
static struct drm_i915_gem_exec_object2 * |
kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) |
{ |
struct drm_i915_gem_exec_object2 *exec; |
DBG(("%s: handle=%d, index=%d\n", |
__FUNCTION__, bo->handle, kgem->nexec)); |
assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); |
bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; |
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); |
exec->handle = bo->handle; |
exec->offset = bo->presumed_offset; |
kgem->aperture += num_pages(bo); |
return exec; |
} |
static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) |
{ |
bo->exec = kgem_add_handle(kgem, bo); |
bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring); |
list_move_tail(&bo->request, &kgem->next_request->buffers); |
/* XXX is it worth working around gcc here? */ |
kgem->flush |= bo->flush; |
} |
static uint32_t kgem_end_batch(struct kgem *kgem) |
{ |
kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; |
if (kgem->nbatch & 1) |
kgem->batch[kgem->nbatch++] = MI_NOOP; |
return kgem->nbatch; |
} |
static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) |
{ |
int n; |
assert(kgem->nreloc__self <= 256); |
if (kgem->nreloc__self == 0) |
return; |
for (n = 0; n < kgem->nreloc__self; n++) { |
int i = kgem->reloc__self[n]; |
assert(kgem->reloc[i].target_handle == ~0U); |
kgem->reloc[i].target_handle = bo->target_handle; |
kgem->reloc[i].presumed_offset = bo->presumed_offset; |
kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] = |
kgem->reloc[i].delta + bo->presumed_offset; |
} |
if (n == 256) { |
for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { |
if (kgem->reloc[n].target_handle == ~0U) { |
kgem->reloc[n].target_handle = bo->target_handle; |
kgem->reloc[n].presumed_offset = bo->presumed_offset; |
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = |
kgem->reloc[n].delta + bo->presumed_offset; |
} |
} |
} |
} |
static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) |
{ |
struct kgem_bo_binding *b; |
b = bo->binding.next; |
while (b) { |
struct kgem_bo_binding *next = b->next; |
free (b); |
b = next; |
} |
} |
static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo) |
{ |
int type = IS_CPU_MAP(bo->map); |
assert(!IS_USER_MAP(bo->map)); |
DBG(("%s: releasing %s vma for handle=%d, count=%d\n", |
__FUNCTION__, type ? "CPU" : "GTT", |
bo->handle, kgem->vma[type].count)); |
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo))); |
user_free(MAP(bo->map)); |
bo->map = NULL; |
if (!list_is_empty(&bo->vma)) { |
list_del(&bo->vma); |
kgem->vma[type].count--; |
} |
} |
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) |
{ |
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); |
assert(bo->refcnt == 0); |
assert(bo->proxy == NULL); |
assert(bo->exec == NULL); |
assert(!bo->snoop || bo->rq == NULL); |
#ifdef DEBUG_MEMORY |
kgem->debug_memory.bo_allocs--; |
kgem->debug_memory.bo_bytes -= bytes(bo); |
#endif |
kgem_bo_binding_free(kgem, bo); |
if (IS_USER_MAP(bo->map)) { |
assert(bo->rq == NULL); |
assert(!__kgem_busy(kgem, bo->handle)); |
assert(MAP(bo->map) != bo || bo->io || bo->flush); |
if (!(bo->io || bo->flush)) { |
DBG(("%s: freeing snooped base\n", __FUNCTION__)); |
assert(bo != MAP(bo->map)); |
free(MAP(bo->map)); |
} |
bo->map = NULL; |
} |
if (bo->map) |
kgem_bo_release_map(kgem, bo); |
assert(list_is_empty(&bo->vma)); |
assert(bo->map == NULL); |
_list_del(&bo->list); |
_list_del(&bo->request); |
gem_close(kgem->fd, bo->handle); |
if (!bo->io) { |
*(struct kgem_bo **)bo = __kgem_freed_bo; |
__kgem_freed_bo = bo; |
} else |
free(bo); |
} |
inline static void kgem_bo_move_to_inactive(struct kgem *kgem, |
struct kgem_bo *bo) |
{ |
DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle)); |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert(bo->rq == NULL); |
assert(bo->exec == NULL); |
assert(bo->domain != DOMAIN_GPU); |
assert(!bo->proxy); |
assert(!bo->io); |
assert(!bo->scanout); |
assert(!bo->snoop); |
assert(!bo->flush); |
assert(!bo->needs_flush); |
assert(list_is_empty(&bo->vma)); |
assert_tiling(kgem, bo); |
ASSERT_IDLE(kgem, bo->handle); |
kgem->need_expire = true; |
if (bucket(bo) >= NUM_CACHE_BUCKETS) { |
list_move(&bo->list, &kgem->large_inactive); |
return; |
} |
assert(bo->flush == false); |
list_move(&bo->list, &kgem->inactive[bucket(bo)]); |
if (bo->map) { |
int type = IS_CPU_MAP(bo->map); |
if (bucket(bo) >= NUM_CACHE_BUCKETS || |
(!type && !__kgem_bo_is_mappable(kgem, bo))) { |
// munmap(MAP(bo->map), bytes(bo)); |
bo->map = NULL; |
} |
if (bo->map) { |
list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]); |
kgem->vma[type].count++; |
} |
} |
} |
static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) |
{ |
struct kgem_bo *base; |
if (!bo->io) |
return bo; |
assert(!bo->snoop); |
base = malloc(sizeof(*base)); |
if (base) { |
DBG(("%s: transferring io handle=%d to bo\n", |
__FUNCTION__, bo->handle)); |
/* transfer the handle to a minimum bo */ |
memcpy(base, bo, sizeof(*base)); |
base->io = false; |
list_init(&base->list); |
list_replace(&bo->request, &base->request); |
list_replace(&bo->vma, &base->vma); |
free(bo); |
bo = base; |
} else |
bo->reusable = false; |
return bo; |
} |
inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, |
struct kgem_bo *bo) |
{ |
DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle)); |
list_del(&bo->list); |
assert(bo->rq == NULL); |
assert(bo->exec == NULL); |
if (bo->map) { |
assert(!list_is_empty(&bo->vma)); |
list_del(&bo->vma); |
kgem->vma[IS_CPU_MAP(bo->map)].count--; |
} |
} |
inline static void kgem_bo_remove_from_active(struct kgem *kgem, |
struct kgem_bo *bo) |
{ |
DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle)); |
list_del(&bo->list); |
assert(bo->rq != NULL); |
if (bo->rq == (void *)kgem) |
list_del(&bo->request); |
assert(list_is_empty(&bo->vma)); |
} |
static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo) |
{ |
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; |
DBG(("%s: size=%d, offset=%d, parent used=%d\n", |
__FUNCTION__, bo->size.bytes, bo->delta, io->used)); |
if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used) |
io->used = bo->delta; |
} |
static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) |
{ |
assert(bo->refcnt == 0); |
assert(bo->scanout); |
assert(bo->delta); |
assert(!bo->flush); |
assert(!bo->snoop); |
assert(!bo->io); |
if (bo->purged) { |
DBG(("%s: discarding purged scanout - external name?\n", |
__FUNCTION__)); |
kgem_bo_free(kgem, bo); |
return; |
} |
DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n", |
__FUNCTION__, bo->handle, bo->delta, bo->rq != NULL)); |
if (bo->rq) |
list_move_tail(&bo->list, &kgem->scanout); |
else |
list_move(&bo->list, &kgem->scanout); |
} |
static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) |
{ |
assert(bo->reusable); |
assert(!bo->flush); |
assert(!bo->needs_flush); |
assert(bo->refcnt == 0); |
assert(bo->exec == NULL); |
if (num_pages(bo) > kgem->max_cpu_size >> 13) { |
DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n", |
__FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13)); |
kgem_bo_free(kgem, bo); |
return; |
} |
assert(bo->tiling == I915_TILING_NONE); |
assert(bo->rq == NULL); |
DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle)); |
list_add(&bo->list, &kgem->snoop); |
} |
static struct kgem_bo * |
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) |
{ |
struct kgem_bo *bo, *first = NULL; |
DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags)); |
if ((kgem->has_caching | kgem->has_userptr) == 0) |
return NULL; |
if (list_is_empty(&kgem->snoop)) { |
DBG(("%s: inactive and cache empty\n", __FUNCTION__)); |
if (!__kgem_throttle_retire(kgem, flags)) { |
DBG(("%s: nothing retired\n", __FUNCTION__)); |
return NULL; |
} |
} |
list_for_each_entry(bo, &kgem->snoop, list) { |
assert(bo->refcnt == 0); |
assert(bo->snoop); |
assert(!bo->scanout); |
assert(!bo->purged); |
assert(bo->proxy == NULL); |
assert(bo->tiling == I915_TILING_NONE); |
assert(bo->rq == NULL); |
assert(bo->exec == NULL); |
if (num_pages > num_pages(bo)) |
continue; |
if (num_pages(bo) > 2*num_pages) { |
if (first == NULL) |
first = bo; |
continue; |
} |
list_del(&bo->list); |
bo->pitch = 0; |
bo->delta = 0; |
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", |
__FUNCTION__, bo->handle, num_pages(bo))); |
return bo; |
} |
if (first) { |
list_del(&first->list); |
first->pitch = 0; |
first->delta = 0; |
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n", |
__FUNCTION__, first->handle, num_pages(first))); |
return first; |
} |
return NULL; |
} |
void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) |
{ |
if (kgem->nexec != 1 || bo->exec == NULL) |
return; |
DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", |
__FUNCTION__, bo->handle)); |
assert(bo->exec == &kgem->exec[0]); |
assert(kgem->exec[0].handle == bo->handle); |
assert(RQ(bo->rq) == kgem->next_request); |
bo->refcnt++; |
kgem_reset(kgem); |
bo->refcnt--; |
} |
static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) |
{ |
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); |
assert(list_is_empty(&bo->list)); |
assert(bo->refcnt == 0); |
assert(!bo->purged || !bo->reusable); |
assert(bo->proxy == NULL); |
assert_tiling(kgem, bo); |
bo->binding.offset = 0; |
if (DBG_NO_CACHE) |
goto destroy; |
if (bo->snoop && !bo->flush) { |
DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle)); |
assert(bo->reusable); |
assert(list_is_empty(&bo->list)); |
if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle)) |
__kgem_bo_clear_busy(bo); |
if (bo->rq == NULL) |
kgem_bo_move_to_snoop(kgem, bo); |
return; |
} |
if (!IS_USER_MAP(bo->map)) |
bo->flush = false; |
if (bo->scanout) { |
kgem_bo_move_to_scanout(kgem, bo); |
return; |
} |
if (bo->io) |
bo = kgem_bo_replace_io(bo); |
if (!bo->reusable) { |
DBG(("%s: handle=%d, not reusable\n", |
__FUNCTION__, bo->handle)); |
goto destroy; |
} |
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU) |
kgem_bo_release_map(kgem, bo); |
assert(list_is_empty(&bo->vma)); |
assert(list_is_empty(&bo->list)); |
assert(bo->flush == false); |
assert(bo->snoop == false); |
assert(bo->io == false); |
assert(bo->scanout == false); |
kgem_bo_undo(kgem, bo); |
assert(bo->refcnt == 0); |
if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle)) |
__kgem_bo_clear_busy(bo); |
if (bo->rq) { |
struct list *cache; |
DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle)); |
if (bucket(bo) < NUM_CACHE_BUCKETS) |
cache = &kgem->active[bucket(bo)][bo->tiling]; |
else |
cache = &kgem->large; |
list_add(&bo->list, cache); |
return; |
} |
assert(bo->exec == NULL); |
assert(list_is_empty(&bo->request)); |
if (!IS_CPU_MAP(bo->map)) { |
if (!kgem_bo_set_purgeable(kgem, bo)) |
goto destroy; |
if (!kgem->has_llc && bo->domain == DOMAIN_CPU) |
goto destroy; |
DBG(("%s: handle=%d, purged\n", |
__FUNCTION__, bo->handle)); |
} |
kgem_bo_move_to_inactive(kgem, bo); |
return; |
destroy: |
if (!bo->exec) |
kgem_bo_free(kgem, bo); |
} |
static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo) |
{ |
assert(bo->refcnt); |
if (--bo->refcnt == 0) |
__kgem_bo_destroy(kgem, bo); |
} |
static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo) |
{ |
while (!list_is_empty(&bo->base.vma)) { |
struct kgem_bo *cached; |
cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma); |
assert(cached->proxy == &bo->base); |
list_del(&cached->vma); |
assert(*(struct kgem_bo **)cached->map == cached); |
*(struct kgem_bo **)cached->map = NULL; |
cached->map = NULL; |
kgem_bo_destroy(kgem, cached); |
} |
} |
static bool kgem_retire__buffers(struct kgem *kgem) |
{ |
bool retired = false; |
while (!list_is_empty(&kgem->active_buffers)) { |
struct kgem_buffer *bo = |
list_last_entry(&kgem->active_buffers, |
struct kgem_buffer, |
base.list); |
if (bo->base.rq) |
break; |
DBG(("%s: releasing upload cache for handle=%d? %d\n", |
__FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma))); |
list_del(&bo->base.list); |
kgem_buffer_release(kgem, bo); |
kgem_bo_unref(kgem, &bo->base); |
retired = true; |
} |
return retired; |
} |
static bool kgem_retire__flushing(struct kgem *kgem) |
{ |
struct kgem_bo *bo, *next; |
bool retired = false; |
list_for_each_entry_safe(bo, next, &kgem->flushing, request) { |
assert(bo->rq == (void *)kgem); |
assert(bo->exec == NULL); |
if (__kgem_busy(kgem, bo->handle)) |
break; |
__kgem_bo_clear_busy(bo); |
if (bo->refcnt) |
continue; |
if (bo->snoop) { |
kgem_bo_move_to_snoop(kgem, bo); |
} else if (bo->scanout) { |
kgem_bo_move_to_scanout(kgem, bo); |
} else if ((bo = kgem_bo_replace_io(bo))->reusable && |
kgem_bo_set_purgeable(kgem, bo)) { |
kgem_bo_move_to_inactive(kgem, bo); |
retired = true; |
} else |
kgem_bo_free(kgem, bo); |
} |
#if HAS_DEBUG_FULL |
{ |
int count = 0; |
list_for_each_entry(bo, &kgem->flushing, request) |
count++; |
ErrorF("%s: %d bo on flushing list\n", __FUNCTION__, count); |
} |
#endif |
kgem->need_retire |= !list_is_empty(&kgem->flushing); |
return retired; |
} |
static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) |
{ |
bool retired = false; |
DBG(("%s: request %d complete\n", |
__FUNCTION__, rq->bo->handle)); |
while (!list_is_empty(&rq->buffers)) { |
struct kgem_bo *bo; |
bo = list_first_entry(&rq->buffers, |
struct kgem_bo, |
request); |
assert(RQ(bo->rq) == rq); |
assert(bo->exec == NULL); |
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); |
list_del(&bo->request); |
if (bo->needs_flush) |
bo->needs_flush = __kgem_busy(kgem, bo->handle); |
if (bo->needs_flush) { |
DBG(("%s: moving %d to flushing\n", |
__FUNCTION__, bo->handle)); |
list_add(&bo->request, &kgem->flushing); |
bo->rq = (void *)kgem; |
continue; |
} |
bo->domain = DOMAIN_NONE; |
bo->rq = NULL; |
if (bo->refcnt) |
continue; |
if (bo->snoop) { |
kgem_bo_move_to_snoop(kgem, bo); |
} else if (bo->scanout) { |
kgem_bo_move_to_scanout(kgem, bo); |
} else if ((bo = kgem_bo_replace_io(bo))->reusable && |
kgem_bo_set_purgeable(kgem, bo)) { |
kgem_bo_move_to_inactive(kgem, bo); |
retired = true; |
} else { |
DBG(("%s: closing %d\n", |
__FUNCTION__, bo->handle)); |
kgem_bo_free(kgem, bo); |
} |
} |
assert(rq->bo->rq == NULL); |
assert(list_is_empty(&rq->bo->request)); |
if (--rq->bo->refcnt == 0) { |
if (kgem_bo_set_purgeable(kgem, rq->bo)) { |
kgem_bo_move_to_inactive(kgem, rq->bo); |
retired = true; |
} else { |
DBG(("%s: closing %d\n", |
__FUNCTION__, rq->bo->handle)); |
kgem_bo_free(kgem, rq->bo); |
} |
} |
__kgem_request_free(rq); |
return retired; |
} |
static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) |
{ |
bool retired = false; |
while (!list_is_empty(&kgem->requests[ring])) { |
struct kgem_request *rq; |
rq = list_first_entry(&kgem->requests[ring], |
struct kgem_request, |
list); |
if (__kgem_busy(kgem, rq->bo->handle)) |
break; |
retired |= __kgem_retire_rq(kgem, rq); |
} |
#if HAS_DEBUG_FULL |
{ |
struct kgem_bo *bo; |
int count = 0; |
list_for_each_entry(bo, &kgem->requests[ring], request) |
count++; |
bo = NULL; |
if (!list_is_empty(&kgem->requests[ring])) |
bo = list_first_entry(&kgem->requests[ring], |
struct kgem_request, |
list)->bo; |
ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n", |
__FUNCTION__, ring, count, bo ? bo->handle : 0); |
} |
#endif |
return retired; |
} |
static bool kgem_retire__requests(struct kgem *kgem) |
{ |
bool retired = false; |
int n; |
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { |
retired |= kgem_retire__requests_ring(kgem, n); |
kgem->need_retire |= !list_is_empty(&kgem->requests[n]); |
} |
return retired; |
} |
bool kgem_retire(struct kgem *kgem) |
{ |
bool retired = false; |
DBG(("%s\n", __FUNCTION__)); |
kgem->need_retire = false; |
retired |= kgem_retire__flushing(kgem); |
retired |= kgem_retire__requests(kgem); |
retired |= kgem_retire__buffers(kgem); |
DBG(("%s -- retired=%d, need_retire=%d\n", |
__FUNCTION__, retired, kgem->need_retire)); |
kgem->retire(kgem); |
return retired; |
} |
bool __kgem_ring_is_idle(struct kgem *kgem, int ring) |
{ |
struct kgem_request *rq; |
assert(!list_is_empty(&kgem->requests[ring])); |
rq = list_last_entry(&kgem->requests[ring], |
struct kgem_request, list); |
if (__kgem_busy(kgem, rq->bo->handle)) { |
DBG(("%s: last requests handle=%d still busy\n", |
__FUNCTION__, rq->bo->handle)); |
return false; |
} |
DBG(("%s: ring=%d idle (handle=%d)\n", |
__FUNCTION__, ring, rq->bo->handle)); |
kgem_retire__requests_ring(kgem, ring); |
assert(list_is_empty(&kgem->requests[ring])); |
return true; |
} |
static void kgem_commit(struct kgem *kgem) |
{ |
struct kgem_request *rq = kgem->next_request; |
struct kgem_bo *bo, *next; |
list_for_each_entry_safe(bo, next, &rq->buffers, request) { |
assert(next->request.prev == &bo->request); |
DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n", |
__FUNCTION__, bo->handle, bo->proxy != NULL, |
bo->gpu_dirty, bo->needs_flush, bo->snoop, |
(unsigned)bo->exec->offset)); |
assert(bo->exec); |
assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec); |
assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq)); |
bo->presumed_offset = bo->exec->offset; |
bo->exec = NULL; |
bo->target_handle = -1; |
if (!bo->refcnt && !bo->reusable) { |
assert(!bo->snoop); |
kgem_bo_free(kgem, bo); |
continue; |
} |
bo->binding.offset = 0; |
bo->domain = DOMAIN_GPU; |
bo->gpu_dirty = false; |
if (bo->proxy) { |
/* proxies are not used for domain tracking */ |
bo->exec = NULL; |
__kgem_bo_clear_busy(bo); |
} |
kgem->scanout_busy |= bo->scanout; |
} |
if (rq == &kgem->static_request) { |
struct drm_i915_gem_set_domain set_domain; |
DBG(("%s: syncing due to allocation failure\n", __FUNCTION__)); |
VG_CLEAR(set_domain); |
set_domain.handle = rq->bo->handle; |
set_domain.read_domains = I915_GEM_DOMAIN_GTT; |
set_domain.write_domain = I915_GEM_DOMAIN_GTT; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { |
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); |
kgem_throttle(kgem); |
} |
kgem_retire(kgem); |
assert(list_is_empty(&rq->buffers)); |
assert(rq->bo->map == NULL); |
gem_close(kgem->fd, rq->bo->handle); |
kgem_cleanup_cache(kgem); |
} else { |
list_add_tail(&rq->list, &kgem->requests[rq->ring]); |
kgem->need_throttle = kgem->need_retire = 1; |
} |
kgem->next_request = NULL; |
} |
static void kgem_close_list(struct kgem *kgem, struct list *head) |
{ |
while (!list_is_empty(head)) |
kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list)); |
} |
static void kgem_close_inactive(struct kgem *kgem) |
{ |
unsigned int i; |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) |
kgem_close_list(kgem, &kgem->inactive[i]); |
} |
static void kgem_finish_buffers(struct kgem *kgem) |
{ |
struct kgem_buffer *bo, *next; |
list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) { |
DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s\n", |
__FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL, |
bo->write, bo->mmapped ? IS_CPU_MAP(bo->base.map) ? "cpu" : "gtt" : "no")); |
assert(next->base.list.prev == &bo->base.list); |
assert(bo->base.io); |
assert(bo->base.refcnt >= 1); |
if (!bo->base.exec) { |
DBG(("%s: skipping unattached handle=%d, used=%d\n", |
__FUNCTION__, bo->base.handle, bo->used)); |
continue; |
} |
if (!bo->write) { |
assert(bo->base.exec || bo->base.refcnt > 1); |
goto decouple; |
} |
if (bo->mmapped) { |
int used; |
assert(!bo->need_io); |
used = ALIGN(bo->used, PAGE_SIZE); |
if (!DBG_NO_UPLOAD_ACTIVE && |
used + PAGE_SIZE <= bytes(&bo->base) && |
(kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) { |
DBG(("%s: retaining upload buffer (%d/%d)\n", |
__FUNCTION__, bo->used, bytes(&bo->base))); |
bo->used = used; |
list_move(&bo->base.list, |
&kgem->active_buffers); |
continue; |
} |
DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n", |
__FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map))); |
goto decouple; |
} |
if (!bo->used) { |
/* Unless we replace the handle in the execbuffer, |
* then this bo will become active. So decouple it |
* from the buffer list and track it in the normal |
* manner. |
*/ |
goto decouple; |
} |
assert(bo->need_io); |
assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); |
assert(bo->base.domain != DOMAIN_GPU); |
if (bo->base.refcnt == 1 && |
bo->base.size.pages.count > 1 && |
bo->used < bytes(&bo->base) / 2) { |
struct kgem_bo *shrink; |
unsigned alloc = NUM_PAGES(bo->used); |
shrink = search_snoop_cache(kgem, alloc, |
CREATE_INACTIVE | CREATE_NO_RETIRE); |
if (shrink) { |
void *map; |
int n; |
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", |
__FUNCTION__, |
bo->used, bytes(&bo->base), bytes(shrink), |
bo->base.handle, shrink->handle)); |
assert(bo->used <= bytes(shrink)); |
map = kgem_bo_map__cpu(kgem, shrink); |
if (map) { |
kgem_bo_sync__cpu(kgem, shrink); |
memcpy(map, bo->mem, bo->used); |
shrink->target_handle = |
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; |
for (n = 0; n < kgem->nreloc; n++) { |
if (kgem->reloc[n].target_handle == bo->base.target_handle) { |
kgem->reloc[n].target_handle = shrink->target_handle; |
kgem->reloc[n].presumed_offset = shrink->presumed_offset; |
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = |
kgem->reloc[n].delta + shrink->presumed_offset; |
} |
} |
bo->base.exec->handle = shrink->handle; |
bo->base.exec->offset = shrink->presumed_offset; |
shrink->exec = bo->base.exec; |
shrink->rq = bo->base.rq; |
list_replace(&bo->base.request, |
&shrink->request); |
list_init(&bo->base.request); |
shrink->needs_flush = bo->base.gpu_dirty; |
bo->base.exec = NULL; |
bo->base.rq = NULL; |
bo->base.gpu_dirty = false; |
bo->base.needs_flush = false; |
bo->used = 0; |
goto decouple; |
} |
__kgem_bo_destroy(kgem, shrink); |
} |
shrink = search_linear_cache(kgem, alloc, |
CREATE_INACTIVE | CREATE_NO_RETIRE); |
if (shrink) { |
int n; |
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", |
__FUNCTION__, |
bo->used, bytes(&bo->base), bytes(shrink), |
bo->base.handle, shrink->handle)); |
assert(bo->used <= bytes(shrink)); |
if (gem_write(kgem->fd, shrink->handle, |
0, bo->used, bo->mem) == 0) { |
shrink->target_handle = |
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; |
for (n = 0; n < kgem->nreloc; n++) { |
if (kgem->reloc[n].target_handle == bo->base.target_handle) { |
kgem->reloc[n].target_handle = shrink->target_handle; |
kgem->reloc[n].presumed_offset = shrink->presumed_offset; |
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = |
kgem->reloc[n].delta + shrink->presumed_offset; |
} |
} |
bo->base.exec->handle = shrink->handle; |
bo->base.exec->offset = shrink->presumed_offset; |
shrink->exec = bo->base.exec; |
shrink->rq = bo->base.rq; |
list_replace(&bo->base.request, |
&shrink->request); |
list_init(&bo->base.request); |
shrink->needs_flush = bo->base.gpu_dirty; |
bo->base.exec = NULL; |
bo->base.rq = NULL; |
bo->base.gpu_dirty = false; |
bo->base.needs_flush = false; |
bo->used = 0; |
goto decouple; |
} |
__kgem_bo_destroy(kgem, shrink); |
} |
} |
DBG(("%s: handle=%d, uploading %d/%d\n", |
__FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base))); |
ASSERT_IDLE(kgem, bo->base.handle); |
assert(bo->used <= bytes(&bo->base)); |
gem_write(kgem->fd, bo->base.handle, |
0, bo->used, bo->mem); |
bo->need_io = 0; |
decouple: |
DBG(("%s: releasing handle=%d\n", |
__FUNCTION__, bo->base.handle)); |
list_del(&bo->base.list); |
kgem_bo_unref(kgem, &bo->base); |
} |
} |
static void kgem_cleanup(struct kgem *kgem) |
{ |
int n; |
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { |
while (!list_is_empty(&kgem->requests[n])) { |
struct kgem_request *rq; |
rq = list_first_entry(&kgem->requests[n], |
struct kgem_request, |
list); |
while (!list_is_empty(&rq->buffers)) { |
struct kgem_bo *bo; |
bo = list_first_entry(&rq->buffers, |
struct kgem_bo, |
request); |
bo->exec = NULL; |
bo->gpu_dirty = false; |
__kgem_bo_clear_busy(bo); |
if (bo->refcnt == 0) |
kgem_bo_free(kgem, bo); |
} |
__kgem_request_free(rq); |
} |
} |
kgem_close_inactive(kgem); |
} |
static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size) |
{ |
int ret; |
ASSERT_IDLE(kgem, handle); |
/* If there is no surface data, just upload the batch */ |
if (kgem->surface == kgem->batch_size) |
return gem_write(kgem->fd, handle, |
0, sizeof(uint32_t)*kgem->nbatch, |
kgem->batch); |
/* Are the batch pages conjoint with the surface pages? */ |
if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) { |
assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t))); |
return gem_write(kgem->fd, handle, |
0, kgem->batch_size*sizeof(uint32_t), |
kgem->batch); |
} |
/* Disjoint surface/batch, upload separately */ |
ret = gem_write(kgem->fd, handle, |
0, sizeof(uint32_t)*kgem->nbatch, |
kgem->batch); |
if (ret) |
return ret; |
ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size); |
ret -= sizeof(uint32_t) * kgem->surface; |
assert(size-ret >= kgem->nbatch*sizeof(uint32_t)); |
return __gem_write(kgem->fd, handle, |
size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t), |
kgem->batch + kgem->surface); |
} |
void kgem_reset(struct kgem *kgem) |
{ |
if (kgem->next_request) { |
struct kgem_request *rq = kgem->next_request; |
while (!list_is_empty(&rq->buffers)) { |
struct kgem_bo *bo = |
list_first_entry(&rq->buffers, |
struct kgem_bo, |
request); |
list_del(&bo->request); |
assert(RQ(bo->rq) == rq); |
bo->binding.offset = 0; |
bo->exec = NULL; |
bo->target_handle = -1; |
bo->gpu_dirty = false; |
if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) { |
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); |
list_add(&bo->request, &kgem->flushing); |
bo->rq = (void *)kgem; |
} else |
__kgem_bo_clear_busy(bo); |
if (bo->refcnt || bo->rq) |
continue; |
if (bo->snoop) { |
kgem_bo_move_to_snoop(kgem, bo); |
} else if (bo->scanout) { |
kgem_bo_move_to_scanout(kgem, bo); |
} else if ((bo = kgem_bo_replace_io(bo))->reusable && |
kgem_bo_set_purgeable(kgem, bo)) { |
kgem_bo_move_to_inactive(kgem, bo); |
} else { |
DBG(("%s: closing %d\n", |
__FUNCTION__, bo->handle)); |
kgem_bo_free(kgem, bo); |
} |
} |
if (rq != &kgem->static_request) { |
list_init(&rq->list); |
__kgem_request_free(rq); |
} |
} |
kgem->nfence = 0; |
kgem->nexec = 0; |
kgem->nreloc = 0; |
kgem->nreloc__self = 0; |
kgem->aperture = 0; |
kgem->aperture_fenced = 0; |
kgem->nbatch = 0; |
kgem->surface = kgem->batch_size; |
kgem->mode = KGEM_NONE; |
kgem->flush = 0; |
kgem->batch_flags = kgem->batch_flags_base; |
kgem->next_request = __kgem_request_alloc(kgem); |
kgem_sna_reset(kgem); |
} |
static int compact_batch_surface(struct kgem *kgem) |
{ |
int size, shrink, n; |
if (!kgem->has_relaxed_delta) |
return kgem->batch_size; |
/* See if we can pack the contents into one or two pages */ |
n = ALIGN(kgem->batch_size, 1024); |
size = n - kgem->surface + kgem->nbatch; |
size = ALIGN(size, 1024); |
shrink = n - size; |
if (shrink) { |
DBG(("shrinking from %d to %d\n", kgem->batch_size, size)); |
shrink *= sizeof(uint32_t); |
for (n = 0; n < kgem->nreloc; n++) { |
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && |
kgem->reloc[n].target_handle == ~0U) |
kgem->reloc[n].delta -= shrink; |
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) |
kgem->reloc[n].offset -= shrink; |
} |
} |
return size * sizeof(uint32_t); |
} |
static struct kgem_bo * |
kgem_create_batch(struct kgem *kgem, int size) |
{ |
struct drm_i915_gem_set_domain set_domain; |
struct kgem_bo *bo; |
if (size <= 4096) { |
bo = list_first_entry(&kgem->pinned_batches[0], |
struct kgem_bo, |
list); |
if (!bo->rq) { |
out_4096: |
list_move_tail(&bo->list, &kgem->pinned_batches[0]); |
return kgem_bo_reference(bo); |
} |
if (!__kgem_busy(kgem, bo->handle)) { |
assert(RQ(bo->rq)->bo == bo); |
__kgem_retire_rq(kgem, RQ(bo->rq)); |
goto out_4096; |
} |
} |
if (size <= 16384) { |
bo = list_first_entry(&kgem->pinned_batches[1], |
struct kgem_bo, |
list); |
if (!bo->rq) { |
out_16384: |
list_move_tail(&bo->list, &kgem->pinned_batches[1]); |
return kgem_bo_reference(bo); |
} |
if (!__kgem_busy(kgem, bo->handle)) { |
assert(RQ(bo->rq)->bo == bo); |
__kgem_retire_rq(kgem, RQ(bo->rq)); |
goto out_16384; |
} |
} |
if (kgem->gen == 020 && !kgem->has_pinned_batches) { |
assert(size <= 16384); |
bo = list_first_entry(&kgem->pinned_batches[size > 4096], |
struct kgem_bo, |
list); |
list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]); |
DBG(("%s: syncing due to busy batches\n", __FUNCTION__)); |
VG_CLEAR(set_domain); |
set_domain.handle = bo->handle; |
set_domain.read_domains = I915_GEM_DOMAIN_GTT; |
set_domain.write_domain = I915_GEM_DOMAIN_GTT; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) { |
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); |
kgem_throttle(kgem); |
return NULL; |
} |
kgem_retire(kgem); |
assert(bo->rq == NULL); |
return kgem_bo_reference(bo); |
} |
return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); |
} |
void _kgem_submit(struct kgem *kgem) |
{ |
struct kgem_request *rq; |
uint32_t batch_end; |
int size; |
assert(!DBG_NO_HW); |
assert(!kgem->wedged); |
assert(kgem->nbatch); |
assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); |
assert(kgem->nbatch <= kgem->surface); |
batch_end = kgem_end_batch(kgem); |
kgem_sna_flush(kgem); |
DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n", |
kgem->mode, kgem->ring, kgem->batch_flags, |
batch_end, kgem->nbatch, kgem->surface, kgem->batch_size, |
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture)); |
assert(kgem->nbatch <= kgem->batch_size); |
assert(kgem->nbatch <= kgem->surface); |
assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc)); |
assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); |
assert(kgem->nfence <= kgem->fence_max); |
kgem_finish_buffers(kgem); |
#if SHOW_BATCH |
__kgem_batch_debug(kgem, batch_end); |
#endif |
rq = kgem->next_request; |
if (kgem->surface != kgem->batch_size) |
size = compact_batch_surface(kgem); |
else |
size = kgem->nbatch * sizeof(kgem->batch[0]); |
rq->bo = kgem_create_batch(kgem, size); |
if (rq->bo) { |
uint32_t handle = rq->bo->handle; |
int i; |
assert(!rq->bo->needs_flush); |
i = kgem->nexec++; |
kgem->exec[i].handle = handle; |
kgem->exec[i].relocation_count = kgem->nreloc; |
kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; |
kgem->exec[i].alignment = 0; |
kgem->exec[i].offset = rq->bo->presumed_offset; |
kgem->exec[i].flags = 0; |
kgem->exec[i].rsvd1 = 0; |
kgem->exec[i].rsvd2 = 0; |
rq->bo->target_handle = kgem->has_handle_lut ? i : handle; |
rq->bo->exec = &kgem->exec[i]; |
rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */ |
list_add(&rq->bo->request, &rq->buffers); |
rq->ring = kgem->ring == KGEM_BLT; |
kgem_fixup_self_relocs(kgem, rq->bo); |
if (kgem_batch_write(kgem, handle, size) == 0) { |
struct drm_i915_gem_execbuffer2 execbuf; |
int ret, retry = 3; |
memset(&execbuf, 0, sizeof(execbuf)); |
execbuf.buffers_ptr = (uintptr_t)kgem->exec; |
execbuf.buffer_count = kgem->nexec; |
execbuf.batch_len = batch_end*sizeof(uint32_t); |
execbuf.flags = kgem->ring | kgem->batch_flags; |
ret = drmIoctl(kgem->fd, |
DRM_IOCTL_I915_GEM_EXECBUFFER2, |
&execbuf); |
while (ret == -1 && errno == EBUSY && retry--) { |
__kgem_throttle(kgem); |
ret = drmIoctl(kgem->fd, |
DRM_IOCTL_I915_GEM_EXECBUFFER2, |
&execbuf); |
} |
if (DEBUG_SYNC && ret == 0) { |
struct drm_i915_gem_set_domain set_domain; |
VG_CLEAR(set_domain); |
set_domain.handle = handle; |
set_domain.read_domains = I915_GEM_DOMAIN_GTT; |
set_domain.write_domain = I915_GEM_DOMAIN_GTT; |
ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); |
} |
if (ret == -1) { |
// DBG(("%s: GPU hang detected [%d]\n", |
// __FUNCTION__, errno)); |
kgem_throttle(kgem); |
kgem->wedged = true; |
#if 0 |
ret = errno; |
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n", |
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, |
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno); |
for (i = 0; i < kgem->nexec; i++) { |
struct kgem_bo *bo, *found = NULL; |
list_for_each_entry(bo, &kgem->next_request->buffers, request) { |
if (bo->handle == kgem->exec[i].handle) { |
found = bo; |
break; |
} |
} |
ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", |
i, |
kgem->exec[i].handle, |
(int)kgem->exec[i].offset, |
found ? kgem_bo_size(found) : -1, |
found ? found->tiling : -1, |
(int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), |
found ? found->snoop : -1, |
found ? found->purged : -1); |
} |
for (i = 0; i < kgem->nreloc; i++) { |
ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", |
i, |
(int)kgem->reloc[i].offset, |
kgem->reloc[i].target_handle, |
kgem->reloc[i].delta, |
kgem->reloc[i].read_domains, |
kgem->reloc[i].write_domain, |
(int)kgem->reloc[i].presumed_offset); |
} |
if (DEBUG_SYNC) { |
int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); |
if (fd != -1) { |
write(fd, kgem->batch, batch_end*sizeof(uint32_t)); |
close(fd); |
} |
FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret); |
} |
#endif |
} |
} |
kgem_commit(kgem); |
} |
if (kgem->wedged) |
kgem_cleanup(kgem); |
kgem_reset(kgem); |
assert(kgem->next_request != NULL); |
} |
void kgem_throttle(struct kgem *kgem) |
{ |
kgem->need_throttle = 0; |
if (kgem->wedged) |
return; |
kgem->wedged = __kgem_throttle(kgem); |
if (kgem->wedged) { |
printf("Detected a hung GPU, disabling acceleration.\n"); |
printf("When reporting this, please include i915_error_state from debugfs and the full dmesg.\n"); |
} |
} |
void kgem_purge_cache(struct kgem *kgem) |
{ |
struct kgem_bo *bo, *next; |
int i; |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { |
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) { |
if (!kgem_bo_is_retained(kgem, bo)) { |
DBG(("%s: purging %d\n", |
__FUNCTION__, bo->handle)); |
kgem_bo_free(kgem, bo); |
} |
} |
} |
kgem->need_purge = false; |
} |
void kgem_clean_large_cache(struct kgem *kgem) |
{ |
while (!list_is_empty(&kgem->large_inactive)) { |
kgem_bo_free(kgem, |
list_first_entry(&kgem->large_inactive, |
struct kgem_bo, list)); |
} |
} |
bool kgem_expire_cache(struct kgem *kgem) |
{ |
time_t now, expire; |
struct kgem_bo *bo; |
unsigned int size = 0, count = 0; |
bool idle; |
unsigned int i; |
time(&now); |
while (__kgem_freed_bo) { |
bo = __kgem_freed_bo; |
__kgem_freed_bo = *(struct kgem_bo **)bo; |
free(bo); |
} |
while (__kgem_freed_request) { |
struct kgem_request *rq = __kgem_freed_request; |
__kgem_freed_request = *(struct kgem_request **)rq; |
free(rq); |
} |
kgem_clean_large_cache(kgem); |
expire = 0; |
list_for_each_entry(bo, &kgem->snoop, list) { |
if (bo->delta) { |
expire = now - MAX_INACTIVE_TIME/2; |
break; |
} |
bo->delta = now; |
} |
if (expire) { |
while (!list_is_empty(&kgem->snoop)) { |
bo = list_last_entry(&kgem->snoop, struct kgem_bo, list); |
if (bo->delta > expire) |
break; |
kgem_bo_free(kgem, bo); |
} |
} |
#ifdef DEBUG_MEMORY |
{ |
long snoop_size = 0; |
int snoop_count = 0; |
list_for_each_entry(bo, &kgem->snoop, list) |
snoop_count++, snoop_size += bytes(bo); |
ErrorF("%s: still allocated %d bo, %ld bytes, in snoop cache\n", |
__FUNCTION__, snoop_count, snoop_size); |
} |
#endif |
kgem_retire(kgem); |
if (kgem->wedged) |
kgem_cleanup(kgem); |
kgem->expire(kgem); |
if (kgem->need_purge) |
kgem_purge_cache(kgem); |
expire = 0; |
idle = !kgem->need_retire; |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { |
idle &= list_is_empty(&kgem->inactive[i]); |
list_for_each_entry(bo, &kgem->inactive[i], list) { |
if (bo->delta) { |
expire = now - MAX_INACTIVE_TIME; |
break; |
} |
bo->delta = now; |
} |
} |
if (idle) { |
DBG(("%s: idle\n", __FUNCTION__)); |
kgem->need_expire = false; |
return false; |
} |
if (expire == 0) |
return true; |
idle = !kgem->need_retire; |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { |
struct list preserve; |
list_init(&preserve); |
while (!list_is_empty(&kgem->inactive[i])) { |
bo = list_last_entry(&kgem->inactive[i], |
struct kgem_bo, list); |
if (bo->delta > expire) { |
idle = false; |
break; |
} |
if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) { |
idle = false; |
list_move_tail(&bo->list, &preserve); |
} else { |
count++; |
size += bytes(bo); |
kgem_bo_free(kgem, bo); |
DBG(("%s: expiring %d\n", |
__FUNCTION__, bo->handle)); |
} |
} |
if (!list_is_empty(&preserve)) { |
preserve.prev->next = kgem->inactive[i].next; |
kgem->inactive[i].next->prev = preserve.prev; |
kgem->inactive[i].next = preserve.next; |
preserve.next->prev = &kgem->inactive[i]; |
} |
} |
#ifdef DEBUG_MEMORY |
{ |
long inactive_size = 0; |
int inactive_count = 0; |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) |
list_for_each_entry(bo, &kgem->inactive[i], list) |
inactive_count++, inactive_size += bytes(bo); |
ErrorF("%s: still allocated %d bo, %ld bytes, in inactive cache\n", |
__FUNCTION__, inactive_count, inactive_size); |
} |
#endif |
DBG(("%s: expired %d objects, %d bytes, idle? %d\n", |
__FUNCTION__, count, size, idle)); |
kgem->need_expire = !idle; |
return !idle; |
(void)count; |
(void)size; |
} |
void kgem_cleanup_cache(struct kgem *kgem) |
{ |
unsigned int i; |
int n; |
/* sync to the most recent request */ |
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) { |
if (!list_is_empty(&kgem->requests[n])) { |
struct kgem_request *rq; |
struct drm_i915_gem_set_domain set_domain; |
rq = list_first_entry(&kgem->requests[n], |
struct kgem_request, |
list); |
DBG(("%s: sync on cleanup\n", __FUNCTION__)); |
VG_CLEAR(set_domain); |
set_domain.handle = rq->bo->handle; |
set_domain.read_domains = I915_GEM_DOMAIN_GTT; |
set_domain.write_domain = I915_GEM_DOMAIN_GTT; |
(void)drmIoctl(kgem->fd, |
DRM_IOCTL_I915_GEM_SET_DOMAIN, |
&set_domain); |
} |
} |
kgem_retire(kgem); |
kgem_cleanup(kgem); |
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { |
while (!list_is_empty(&kgem->inactive[i])) |
kgem_bo_free(kgem, |
list_last_entry(&kgem->inactive[i], |
struct kgem_bo, list)); |
} |
kgem_clean_large_cache(kgem); |
while (!list_is_empty(&kgem->snoop)) |
kgem_bo_free(kgem, |
list_last_entry(&kgem->snoop, |
struct kgem_bo, list)); |
while (__kgem_freed_bo) { |
struct kgem_bo *bo = __kgem_freed_bo; |
__kgem_freed_bo = *(struct kgem_bo **)bo; |
free(bo); |
} |
kgem->need_purge = false; |
kgem->need_expire = false; |
} |
static struct kgem_bo * |
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) |
{ |
struct kgem_bo *bo, *first = NULL; |
bool use_active = (flags & CREATE_INACTIVE) == 0; |
struct list *cache; |
DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n", |
__FUNCTION__, num_pages, flags, use_active, |
num_pages >= MAX_CACHE_SIZE / PAGE_SIZE, |
MAX_CACHE_SIZE / PAGE_SIZE)); |
assert(num_pages); |
if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) { |
DBG(("%s: searching large buffers\n", __FUNCTION__)); |
retry_large: |
cache = use_active ? &kgem->large : &kgem->large_inactive; |
list_for_each_entry_safe(bo, first, cache, list) { |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert(!bo->scanout); |
if (num_pages > num_pages(bo)) |
goto discard; |
if (bo->tiling != I915_TILING_NONE) { |
if (use_active) |
goto discard; |
if (!gem_set_tiling(kgem->fd, bo->handle, |
I915_TILING_NONE, 0)) |
goto discard; |
bo->tiling = I915_TILING_NONE; |
bo->pitch = 0; |
} |
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) |
goto discard; |
list_del(&bo->list); |
if (bo->rq == (void *)kgem) |
list_del(&bo->request); |
bo->delta = 0; |
assert_tiling(kgem, bo); |
return bo; |
discard: |
if (!use_active) |
kgem_bo_free(kgem, bo); |
} |
if (use_active) { |
use_active = false; |
goto retry_large; |
} |
if (__kgem_throttle_retire(kgem, flags)) |
goto retry_large; |
return NULL; |
} |
if (!use_active && list_is_empty(inactive(kgem, num_pages))) { |
DBG(("%s: inactive and cache bucket empty\n", |
__FUNCTION__)); |
if (flags & CREATE_NO_RETIRE) { |
DBG(("%s: can not retire\n", __FUNCTION__)); |
return NULL; |
} |
if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) { |
DBG(("%s: active cache bucket empty\n", __FUNCTION__)); |
return NULL; |
} |
if (!__kgem_throttle_retire(kgem, flags)) { |
DBG(("%s: nothing retired\n", __FUNCTION__)); |
return NULL; |
} |
if (list_is_empty(inactive(kgem, num_pages))) { |
DBG(("%s: active cache bucket still empty after retire\n", |
__FUNCTION__)); |
return NULL; |
} |
} |
if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { |
int for_cpu = !!(flags & CREATE_CPU_MAP); |
DBG(("%s: searching for inactive %s map\n", |
__FUNCTION__, for_cpu ? "cpu" : "gtt")); |
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)]; |
list_for_each_entry(bo, cache, vma) { |
assert(IS_CPU_MAP(bo->map) == for_cpu); |
assert(bucket(bo) == cache_bucket(num_pages)); |
assert(bo->proxy == NULL); |
assert(bo->rq == NULL); |
assert(bo->exec == NULL); |
assert(!bo->scanout); |
if (num_pages > num_pages(bo)) { |
DBG(("inactive too small: %d < %d\n", |
num_pages(bo), num_pages)); |
continue; |
} |
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { |
kgem_bo_free(kgem, bo); |
break; |
} |
if (I915_TILING_NONE != bo->tiling && |
!gem_set_tiling(kgem->fd, bo->handle, |
I915_TILING_NONE, 0)) |
continue; |
kgem_bo_remove_from_inactive(kgem, bo); |
bo->tiling = I915_TILING_NONE; |
bo->pitch = 0; |
bo->delta = 0; |
DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", |
__FUNCTION__, bo->handle, num_pages(bo))); |
assert(use_active || bo->domain != DOMAIN_GPU); |
assert(!bo->needs_flush); |
assert_tiling(kgem, bo); |
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); |
return bo; |
} |
if (flags & CREATE_EXACT) |
return NULL; |
if (flags & CREATE_CPU_MAP && !kgem->has_llc) |
return NULL; |
} |
cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages); |
list_for_each_entry(bo, cache, list) { |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert(!!bo->rq == !!use_active); |
assert(bo->proxy == NULL); |
assert(!bo->scanout); |
if (num_pages > num_pages(bo)) |
continue; |
if (use_active && |
kgem->gen <= 040 && |
bo->tiling != I915_TILING_NONE) |
continue; |
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { |
kgem_bo_free(kgem, bo); |
break; |
} |
if (I915_TILING_NONE != bo->tiling) { |
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) |
continue; |
if (first) |
continue; |
if (!gem_set_tiling(kgem->fd, bo->handle, |
I915_TILING_NONE, 0)) |
continue; |
bo->tiling = I915_TILING_NONE; |
bo->pitch = 0; |
} |
if (bo->map) { |
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { |
int for_cpu = !!(flags & CREATE_CPU_MAP); |
if (IS_CPU_MAP(bo->map) != for_cpu) { |
if (first != NULL) |
break; |
first = bo; |
continue; |
} |
} else { |
if (first != NULL) |
break; |
first = bo; |
continue; |
} |
} else { |
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { |
if (first != NULL) |
break; |
first = bo; |
continue; |
} |
} |
if (use_active) |
kgem_bo_remove_from_active(kgem, bo); |
else |
kgem_bo_remove_from_inactive(kgem, bo); |
assert(bo->tiling == I915_TILING_NONE); |
bo->pitch = 0; |
bo->delta = 0; |
DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", |
__FUNCTION__, bo->handle, num_pages(bo), |
use_active ? "active" : "inactive")); |
assert(list_is_empty(&bo->list)); |
assert(use_active || bo->domain != DOMAIN_GPU); |
assert(!bo->needs_flush || use_active); |
assert_tiling(kgem, bo); |
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); |
return bo; |
} |
if (first) { |
assert(first->tiling == I915_TILING_NONE); |
if (use_active) |
kgem_bo_remove_from_active(kgem, first); |
else |
kgem_bo_remove_from_inactive(kgem, first); |
first->pitch = 0; |
first->delta = 0; |
DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n", |
__FUNCTION__, first->handle, num_pages(first), |
use_active ? "active" : "inactive")); |
assert(list_is_empty(&first->list)); |
assert(use_active || first->domain != DOMAIN_GPU); |
assert(!first->needs_flush || use_active); |
ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active); |
return first; |
} |
return NULL; |
} |
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) |
{ |
struct kgem_bo *bo; |
uint32_t handle; |
DBG(("%s(%d)\n", __FUNCTION__, size)); |
assert(size); |
if (flags & CREATE_GTT_MAP && kgem->has_llc) { |
flags &= ~CREATE_GTT_MAP; |
flags |= CREATE_CPU_MAP; |
} |
size = NUM_PAGES(size); |
bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); |
if (bo) { |
assert(bo->domain != DOMAIN_GPU); |
ASSERT_IDLE(kgem, bo->handle); |
bo->refcnt = 1; |
return bo; |
} |
if (flags & CREATE_CACHED) |
return NULL; |
handle = gem_create(kgem->fd, size); |
if (handle == 0) |
return NULL; |
DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size)); |
bo = __kgem_bo_alloc(handle, size); |
if (bo == NULL) { |
gem_close(kgem->fd, handle); |
return NULL; |
} |
debug_alloc__bo(kgem, bo); |
return bo; |
} |
inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) |
{ |
unsigned int size; |
assert(bo->tiling); |
assert_tiling(kgem, bo); |
assert(kgem->gen < 040); |
if (kgem->gen < 030) |
size = 512 * 1024; |
else |
size = 1024 * 1024; |
while (size < bytes(bo)) |
size *= 2; |
return size; |
} |
struct kgem_bo *kgem_create_2d(struct kgem *kgem, |
int width, |
int height, |
int bpp, |
int tiling, |
uint32_t flags) |
{ |
struct list *cache; |
struct kgem_bo *bo; |
uint32_t pitch, tiled_height, size; |
uint32_t handle; |
int i, bucket, retry; |
bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT); |
if (tiling < 0) |
exact = true, tiling = -tiling; |
DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__, |
width, height, bpp, tiling, exact, |
!!(flags & CREATE_INACTIVE), |
!!(flags & CREATE_CPU_MAP), |
!!(flags & CREATE_GTT_MAP), |
!!(flags & CREATE_SCANOUT), |
!!(flags & CREATE_PRIME), |
!!(flags & CREATE_TEMPORARY))); |
size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, |
width, height, bpp, tiling, &pitch); |
assert(size && size <= kgem->max_object_size); |
size /= PAGE_SIZE; |
bucket = cache_bucket(size); |
if (flags & CREATE_SCANOUT) { |
struct kgem_bo *last = NULL; |
list_for_each_entry_reverse(bo, &kgem->scanout, list) { |
assert(bo->scanout); |
assert(bo->delta); |
assert(!bo->flush); |
assert_tiling(kgem, bo); |
if (size > num_pages(bo) || num_pages(bo) > 2*size) |
continue; |
if (bo->tiling != tiling || |
(tiling != I915_TILING_NONE && bo->pitch != pitch)) { |
if (!gem_set_tiling(kgem->fd, bo->handle, |
tiling, pitch)) |
continue; |
bo->tiling = tiling; |
bo->pitch = pitch; |
} |
if (flags & CREATE_INACTIVE && bo->rq) { |
last = bo; |
continue; |
} |
list_del(&bo->list); |
bo->unique_id = kgem_get_unique_id(kgem); |
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
if (last) { |
list_del(&last->list); |
last->unique_id = kgem_get_unique_id(kgem); |
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
last->pitch, last->tiling, last->handle, last->unique_id)); |
assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last)); |
assert_tiling(kgem, last); |
last->refcnt = 1; |
return last; |
} |
bo = NULL; //__kgem_bo_create_as_display(kgem, size, tiling, pitch); |
if (bo) |
return bo; |
} |
if (bucket >= NUM_CACHE_BUCKETS) { |
DBG(("%s: large bo num pages=%d, bucket=%d\n", |
__FUNCTION__, size, bucket)); |
if (flags & CREATE_INACTIVE) |
goto large_inactive; |
tiled_height = kgem_aligned_height(kgem, height, tiling); |
list_for_each_entry(bo, &kgem->large, list) { |
assert(!bo->purged); |
assert(!bo->scanout); |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert_tiling(kgem, bo); |
if (kgem->gen < 040) { |
if (bo->pitch < pitch) { |
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", |
bo->tiling, tiling, |
bo->pitch, pitch)); |
continue; |
} |
if (bo->pitch * tiled_height > bytes(bo)) |
continue; |
} else { |
if (num_pages(bo) < size) |
continue; |
if (bo->pitch != pitch || bo->tiling != tiling) { |
if (!gem_set_tiling(kgem->fd, bo->handle, |
tiling, pitch)) |
continue; |
bo->pitch = pitch; |
bo->tiling = tiling; |
} |
} |
kgem_bo_remove_from_active(kgem, bo); |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->delta = 0; |
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
bo->flush = true; |
return bo; |
} |
large_inactive: |
__kgem_throttle_retire(kgem, flags); |
list_for_each_entry(bo, &kgem->large_inactive, list) { |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert(!bo->scanout); |
assert_tiling(kgem, bo); |
if (size > num_pages(bo)) |
continue; |
if (bo->tiling != tiling || |
(tiling != I915_TILING_NONE && bo->pitch != pitch)) { |
if (!gem_set_tiling(kgem->fd, bo->handle, |
tiling, pitch)) |
continue; |
bo->tiling = tiling; |
bo->pitch = pitch; |
} |
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { |
kgem_bo_free(kgem, bo); |
break; |
} |
list_del(&bo->list); |
assert(bo->domain != DOMAIN_GPU); |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->pitch = pitch; |
bo->delta = 0; |
DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
goto create; |
} |
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { |
int for_cpu = !!(flags & CREATE_CPU_MAP); |
if (kgem->has_llc && tiling == I915_TILING_NONE) |
for_cpu = 1; |
/* We presume that we will need to upload to this bo, |
* and so would prefer to have an active VMA. |
*/ |
cache = &kgem->vma[for_cpu].inactive[bucket]; |
do { |
list_for_each_entry(bo, cache, vma) { |
assert(bucket(bo) == bucket); |
assert(bo->refcnt == 0); |
assert(!bo->scanout); |
assert(bo->map); |
assert(IS_CPU_MAP(bo->map) == for_cpu); |
assert(bo->rq == NULL); |
assert(list_is_empty(&bo->request)); |
assert(bo->flush == false); |
assert_tiling(kgem, bo); |
if (size > num_pages(bo)) { |
DBG(("inactive too small: %d < %d\n", |
num_pages(bo), size)); |
continue; |
} |
if (bo->tiling != tiling || |
(tiling != I915_TILING_NONE && bo->pitch != pitch)) { |
DBG(("inactive vma with wrong tiling: %d < %d\n", |
bo->tiling, tiling)); |
continue; |
} |
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { |
kgem_bo_free(kgem, bo); |
break; |
} |
assert(bo->tiling == tiling); |
bo->pitch = pitch; |
bo->delta = 0; |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->domain = DOMAIN_NONE; |
kgem_bo_remove_from_inactive(kgem, bo); |
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->reusable); |
assert(bo->domain != DOMAIN_GPU); |
ASSERT_IDLE(kgem, bo->handle); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
} while (!list_is_empty(cache) && |
__kgem_throttle_retire(kgem, flags)); |
if (flags & CREATE_CPU_MAP && !kgem->has_llc) { |
if (list_is_empty(&kgem->active[bucket][tiling]) && |
list_is_empty(&kgem->inactive[bucket])) |
flags &= ~CREATE_CACHED; |
goto create; |
} |
} |
if (flags & CREATE_INACTIVE) |
goto skip_active_search; |
/* Best active match */ |
retry = NUM_CACHE_BUCKETS - bucket; |
if (retry > 3 && (flags & CREATE_TEMPORARY) == 0) |
retry = 3; |
search_again: |
assert(bucket < NUM_CACHE_BUCKETS); |
cache = &kgem->active[bucket][tiling]; |
if (tiling) { |
tiled_height = kgem_aligned_height(kgem, height, tiling); |
list_for_each_entry(bo, cache, list) { |
assert(!bo->purged); |
assert(bo->refcnt == 0); |
assert(bucket(bo) == bucket); |
assert(bo->reusable); |
assert(bo->tiling == tiling); |
assert(bo->flush == false); |
assert(!bo->scanout); |
assert_tiling(kgem, bo); |
if (kgem->gen < 040) { |
if (bo->pitch < pitch) { |
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", |
bo->tiling, tiling, |
bo->pitch, pitch)); |
continue; |
} |
if (bo->pitch * tiled_height > bytes(bo)) |
continue; |
} else { |
if (num_pages(bo) < size) |
continue; |
if (bo->pitch != pitch) { |
if (!gem_set_tiling(kgem->fd, |
bo->handle, |
tiling, pitch)) |
continue; |
bo->pitch = pitch; |
} |
} |
kgem_bo_remove_from_active(kgem, bo); |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->delta = 0; |
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
} else { |
list_for_each_entry(bo, cache, list) { |
assert(bucket(bo) == bucket); |
assert(!bo->purged); |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert(!bo->scanout); |
assert(bo->tiling == tiling); |
assert(bo->flush == false); |
assert_tiling(kgem, bo); |
if (num_pages(bo) < size) |
continue; |
kgem_bo_remove_from_active(kgem, bo); |
bo->pitch = pitch; |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->delta = 0; |
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
} |
if (--retry && exact) { |
if (kgem->gen >= 040) { |
for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) { |
if (i == tiling) |
continue; |
cache = &kgem->active[bucket][i]; |
list_for_each_entry(bo, cache, list) { |
assert(!bo->purged); |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert(!bo->scanout); |
assert(bo->flush == false); |
assert_tiling(kgem, bo); |
if (num_pages(bo) < size) |
continue; |
if (!gem_set_tiling(kgem->fd, |
bo->handle, |
tiling, pitch)) |
continue; |
kgem_bo_remove_from_active(kgem, bo); |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->pitch = pitch; |
bo->tiling = tiling; |
bo->delta = 0; |
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
} |
} |
bucket++; |
goto search_again; |
} |
if (!exact) { /* allow an active near-miss? */ |
i = tiling; |
while (--i >= 0) { |
tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, |
width, height, bpp, tiling, &pitch); |
cache = active(kgem, tiled_height / PAGE_SIZE, i); |
tiled_height = kgem_aligned_height(kgem, height, i); |
list_for_each_entry(bo, cache, list) { |
assert(!bo->purged); |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert(!bo->scanout); |
assert(bo->flush == false); |
assert_tiling(kgem, bo); |
if (bo->tiling) { |
if (bo->pitch < pitch) { |
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", |
bo->tiling, tiling, |
bo->pitch, pitch)); |
continue; |
} |
} else |
bo->pitch = pitch; |
if (bo->pitch * tiled_height > bytes(bo)) |
continue; |
kgem_bo_remove_from_active(kgem, bo); |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->delta = 0; |
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
} |
} |
skip_active_search: |
bucket = cache_bucket(size); |
retry = NUM_CACHE_BUCKETS - bucket; |
if (retry > 3) |
retry = 3; |
search_inactive: |
/* Now just look for a close match and prefer any currently active */ |
assert(bucket < NUM_CACHE_BUCKETS); |
cache = &kgem->inactive[bucket]; |
list_for_each_entry(bo, cache, list) { |
assert(bucket(bo) == bucket); |
assert(bo->reusable); |
assert(!bo->scanout); |
assert(bo->flush == false); |
assert_tiling(kgem, bo); |
if (size > num_pages(bo)) { |
DBG(("inactive too small: %d < %d\n", |
num_pages(bo), size)); |
continue; |
} |
if (bo->tiling != tiling || |
(tiling != I915_TILING_NONE && bo->pitch != pitch)) { |
if (!gem_set_tiling(kgem->fd, bo->handle, |
tiling, pitch)) |
continue; |
if (bo->map) |
kgem_bo_release_map(kgem, bo); |
} |
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { |
kgem_bo_free(kgem, bo); |
break; |
} |
kgem_bo_remove_from_inactive(kgem, bo); |
bo->pitch = pitch; |
bo->tiling = tiling; |
bo->delta = 0; |
bo->unique_id = kgem_get_unique_id(kgem); |
assert(bo->pitch); |
DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id)); |
assert(bo->refcnt == 0); |
assert(bo->reusable); |
assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU); |
ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE); |
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); |
assert_tiling(kgem, bo); |
bo->refcnt = 1; |
return bo; |
} |
if (flags & CREATE_INACTIVE && |
!list_is_empty(&kgem->active[bucket][tiling]) && |
__kgem_throttle_retire(kgem, flags)) { |
flags &= ~CREATE_INACTIVE; |
goto search_inactive; |
} |
if (--retry) { |
bucket++; |
flags &= ~CREATE_INACTIVE; |
goto search_inactive; |
} |
create: |
if (flags & CREATE_CACHED) |
return NULL; |
if (bucket >= NUM_CACHE_BUCKETS) |
size = ALIGN(size, 1024); |
handle = gem_create(kgem->fd, size); |
if (handle == 0) |
return NULL; |
bo = __kgem_bo_alloc(handle, size); |
if (!bo) { |
gem_close(kgem->fd, handle); |
return NULL; |
} |
if (bucket >= NUM_CACHE_BUCKETS) { |
DBG(("%s: marking large bo for automatic flushing\n", |
__FUNCTION__)); |
bo->flush = true; |
} |
bo->unique_id = kgem_get_unique_id(kgem); |
if (tiling == I915_TILING_NONE || |
gem_set_tiling(kgem->fd, handle, tiling, pitch)) { |
bo->tiling = tiling; |
bo->pitch = pitch; |
} else { |
if (flags & CREATE_EXACT) { |
if (bo->pitch != pitch || bo->tiling != tiling) { |
kgem_bo_free(kgem, bo); |
return NULL; |
} |
} |
} |
assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); |
assert_tiling(kgem, bo); |
debug_alloc__bo(kgem, bo); |
DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n", |
bo->pitch, bo->tiling, bo->handle, bo->unique_id, |
size, num_pages(bo), bucket(bo))); |
return bo; |
} |
#if 0 |
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, |
int width, |
int height, |
int bpp, |
uint32_t flags) |
{ |
struct kgem_bo *bo; |
int stride, size; |
if (DBG_NO_CPU) |
return NULL; |
DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp)); |
if (kgem->has_llc) { |
bo = kgem_create_2d(kgem, width, height, bpp, |
I915_TILING_NONE, flags); |
if (bo == NULL) |
return bo; |
assert(bo->tiling == I915_TILING_NONE); |
assert_tiling(kgem, bo); |
if (kgem_bo_map__cpu(kgem, bo) == NULL) { |
kgem_bo_destroy(kgem, bo); |
return NULL; |
} |
return bo; |
} |
assert(width > 0 && height > 0); |
stride = ALIGN(width, 2) * bpp >> 3; |
stride = ALIGN(stride, 4); |
size = stride * ALIGN(height, 2); |
assert(size >= PAGE_SIZE); |
DBG(("%s: %dx%d, %d bpp, stride=%d\n", |
__FUNCTION__, width, height, bpp, stride)); |
bo = search_snoop_cache(kgem, NUM_PAGES(size), 0); |
if (bo) { |
assert(bo->tiling == I915_TILING_NONE); |
assert_tiling(kgem, bo); |
assert(bo->snoop); |
bo->refcnt = 1; |
bo->pitch = stride; |
bo->unique_id = kgem_get_unique_id(kgem); |
return bo; |
} |
if (kgem->has_caching) { |
bo = kgem_create_linear(kgem, size, flags); |
if (bo == NULL) |
return NULL; |
assert(bo->tiling == I915_TILING_NONE); |
assert_tiling(kgem, bo); |
if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) { |
kgem_bo_destroy(kgem, bo); |
return NULL; |
} |
bo->snoop = true; |
if (kgem_bo_map__cpu(kgem, bo) == NULL) { |
kgem_bo_destroy(kgem, bo); |
return NULL; |
} |
bo->pitch = stride; |
bo->unique_id = kgem_get_unique_id(kgem); |
return bo; |
} |
if (kgem->has_userptr) { |
void *ptr; |
/* XXX */ |
//if (posix_memalign(&ptr, 64, ALIGN(size, 64))) |
if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE))) |
return NULL; |
bo = kgem_create_map(kgem, ptr, size, false); |
if (bo == NULL) { |
free(ptr); |
return NULL; |
} |
bo->pitch = stride; |
bo->unique_id = kgem_get_unique_id(kgem); |
return bo; |
} |
return NULL; |
} |
#endif |
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) |
{ |
DBG(("%s: handle=%d, proxy? %d\n", |
__FUNCTION__, bo->handle, bo->proxy != NULL)); |
if (bo->proxy) { |
_list_del(&bo->vma); |
_list_del(&bo->request); |
if (bo->io && bo->exec == NULL) |
_kgem_bo_delete_buffer(kgem, bo); |
kgem_bo_unref(kgem, bo->proxy); |
kgem_bo_binding_free(kgem, bo); |
free(bo); |
return; |
} |
__kgem_bo_destroy(kgem, bo); |
} |
static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) |
{ |
assert(bo->rq); |
assert(bo->exec == NULL); |
assert(bo->needs_flush); |
/* The kernel will emit a flush *and* update its own flushing lists. */ |
if (!__kgem_busy(kgem, bo->handle)) |
__kgem_bo_clear_busy(bo); |
DBG(("%s: handle=%d, busy?=%d\n", |
__FUNCTION__, bo->handle, bo->rq != NULL)); |
} |
void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) |
{ |
kgem_bo_submit(kgem, bo); |
if (!bo->needs_flush) |
return; |
/* If the kernel fails to emit the flush, then it will be forced when |
* we assume direct access. And as the usual failure is EIO, we do |
* not actually care. |
*/ |
assert(bo->exec == NULL); |
if (bo->rq) |
__kgem_flush(kgem, bo); |
/* Whatever actually happens, we can regard the GTT write domain |
* as being flushed. |
*/ |
bo->gtt_dirty = false; |
bo->needs_flush = false; |
bo->domain = DOMAIN_NONE; |
} |
inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) |
{ |
return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring; |
} |
bool kgem_check_bo(struct kgem *kgem, ...) |
{ |
va_list ap; |
struct kgem_bo *bo; |
int num_exec = 0; |
int num_pages = 0; |
bool flush = false; |
va_start(ap, kgem); |
while ((bo = va_arg(ap, struct kgem_bo *))) { |
while (bo->proxy) |
bo = bo->proxy; |
if (bo->exec) |
continue; |
if (needs_semaphore(kgem, bo)) |
return false; |
num_pages += num_pages(bo); |
num_exec++; |
flush |= bo->flush; |
} |
va_end(ap); |
DBG(("%s: num_pages=+%d, num_exec=+%d\n", |
__FUNCTION__, num_pages, num_exec)); |
if (!num_pages) |
return true; |
if (kgem_flush(kgem, flush)) |
return false; |
if (kgem->aperture > kgem->aperture_low && |
kgem_ring_is_idle(kgem, kgem->ring)) { |
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n", |
__FUNCTION__, kgem->aperture, kgem->aperture_low)); |
return false; |
} |
if (num_pages + kgem->aperture > kgem->aperture_high) { |
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n", |
__FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high)); |
return false; |
} |
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) { |
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__, |
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem))); |
return false; |
} |
return true; |
} |
uint32_t kgem_add_reloc(struct kgem *kgem, |
uint32_t pos, |
struct kgem_bo *bo, |
uint32_t read_write_domain, |
uint32_t delta) |
{ |
int index; |
DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n", |
__FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain)); |
assert((read_write_domain & 0x7fff) == 0 || bo != NULL); |
if( bo != NULL && bo->handle == -2) |
{ |
if (bo->exec == NULL) |
kgem_add_bo(kgem, bo); |
if (read_write_domain & 0x7fff && !bo->gpu_dirty) { |
__kgem_bo_mark_dirty(bo); |
} |
return 0; |
}; |
index = kgem->nreloc++; |
assert(index < ARRAY_SIZE(kgem->reloc)); |
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); |
if (bo) { |
assert(bo->refcnt); |
while (bo->proxy) { |
DBG(("%s: adding proxy [delta=%d] for handle=%d\n", |
__FUNCTION__, bo->delta, bo->handle)); |
delta += bo->delta; |
assert(bo->handle == bo->proxy->handle); |
/* need to release the cache upon batch submit */ |
if (bo->exec == NULL) { |
list_move_tail(&bo->request, |
&kgem->next_request->buffers); |
bo->rq = MAKE_REQUEST(kgem->next_request, |
kgem->ring); |
bo->exec = &_kgem_dummy_exec; |
} |
if (read_write_domain & 0x7fff && !bo->gpu_dirty) |
__kgem_bo_mark_dirty(bo); |
bo = bo->proxy; |
assert(bo->refcnt); |
} |
assert(bo->refcnt); |
if (bo->exec == NULL) |
kgem_add_bo(kgem, bo); |
assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); |
assert(RQ_RING(bo->rq) == kgem->ring); |
if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) { |
if (bo->tiling && |
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { |
assert(kgem->nfence < kgem->fence_max); |
kgem->aperture_fenced += |
kgem_bo_fenced_size(kgem, bo); |
kgem->nfence++; |
} |
bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE; |
} |
kgem->reloc[index].delta = delta; |
kgem->reloc[index].target_handle = bo->target_handle; |
kgem->reloc[index].presumed_offset = bo->presumed_offset; |
if (read_write_domain & 0x7fff && !bo->gpu_dirty) { |
assert(!bo->snoop || kgem->can_blt_cpu); |
__kgem_bo_mark_dirty(bo); |
} |
delta += bo->presumed_offset; |
} else { |
kgem->reloc[index].delta = delta; |
kgem->reloc[index].target_handle = ~0U; |
kgem->reloc[index].presumed_offset = 0; |
if (kgem->nreloc__self < 256) |
kgem->reloc__self[kgem->nreloc__self++] = index; |
} |
kgem->reloc[index].read_domains = read_write_domain >> 16; |
kgem->reloc[index].write_domain = read_write_domain & 0x7fff; |
return delta; |
} |
static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) |
{ |
int i, j; |
DBG(("%s: type=%d, count=%d (bucket: %d)\n", |
__FUNCTION__, type, kgem->vma[type].count, bucket)); |
if (kgem->vma[type].count <= 0) |
return; |
if (kgem->need_purge) |
kgem_purge_cache(kgem); |
/* vma are limited on a per-process basis to around 64k. |
* This includes all malloc arenas as well as other file |
* mappings. In order to be fair and not hog the cache, |
* and more importantly not to exhaust that limit and to |
* start failing mappings, we keep our own number of open |
* vma to within a conservative value. |
*/ |
i = 0; |
while (kgem->vma[type].count > 0) { |
struct kgem_bo *bo = NULL; |
for (j = 0; |
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive); |
j++) { |
struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)]; |
if (!list_is_empty(head)) |
bo = list_last_entry(head, struct kgem_bo, vma); |
} |
if (bo == NULL) |
break; |
DBG(("%s: discarding inactive %s vma cache for %d\n", |
__FUNCTION__, |
IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle)); |
assert(IS_CPU_MAP(bo->map) == type); |
assert(bo->map); |
assert(bo->rq == NULL); |
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo))); |
// munmap(MAP(bo->map), bytes(bo)); |
bo->map = NULL; |
list_del(&bo->vma); |
kgem->vma[type].count--; |
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { |
DBG(("%s: freeing unpurgeable old mapping\n", |
__FUNCTION__)); |
kgem_bo_free(kgem, bo); |
} |
} |
} |
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) |
{ |
void *ptr; |
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, |
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); |
assert(bo->proxy == NULL); |
assert(list_is_empty(&bo->list)); |
assert(!IS_USER_MAP(bo->map)); |
assert_tiling(kgem, bo); |
if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) { |
DBG(("%s: converting request for GTT map into CPU map\n", |
__FUNCTION__)); |
return kgem_bo_map__cpu(kgem, bo); |
} |
if (IS_CPU_MAP(bo->map)) |
kgem_bo_release_map(kgem, bo); |
ptr = bo->map; |
if (ptr == NULL) { |
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2); |
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); |
ptr = __kgem_bo_map__gtt(kgem, bo); |
if (ptr == NULL) |
return NULL; |
/* Cache this mapping to avoid the overhead of an |
* excruciatingly slow GTT pagefault. This is more an |
* issue with compositing managers which need to frequently |
* flush CPU damage to their GPU bo. |
*/ |
bo->map = ptr; |
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); |
} |
return ptr; |
} |
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) |
{ |
void *ptr; |
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, |
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); |
assert(bo->proxy == NULL); |
assert(list_is_empty(&bo->list)); |
assert(!IS_USER_MAP(bo->map)); |
assert(bo->exec == NULL); |
assert_tiling(kgem, bo); |
if (bo->tiling == I915_TILING_NONE && !bo->scanout && |
(kgem->has_llc || bo->domain == DOMAIN_CPU)) { |
DBG(("%s: converting request for GTT map into CPU map\n", |
__FUNCTION__)); |
ptr = kgem_bo_map__cpu(kgem, bo); |
if (ptr) |
kgem_bo_sync__cpu(kgem, bo); |
return ptr; |
} |
if (IS_CPU_MAP(bo->map)) |
kgem_bo_release_map(kgem, bo); |
ptr = bo->map; |
if (ptr == NULL) { |
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2); |
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); |
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); |
ptr = __kgem_bo_map__gtt(kgem, bo); |
if (ptr == NULL) |
return NULL; |
/* Cache this mapping to avoid the overhead of an |
* excruciatingly slow GTT pagefault. This is more an |
* issue with compositing managers which need to frequently |
* flush CPU damage to their GPU bo. |
*/ |
bo->map = ptr; |
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); |
} |
if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { |
struct drm_i915_gem_set_domain set_domain; |
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, |
bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); |
/* XXX use PROT_READ to avoid the write flush? */ |
VG_CLEAR(set_domain); |
set_domain.handle = bo->handle; |
set_domain.read_domains = I915_GEM_DOMAIN_GTT; |
set_domain.write_domain = I915_GEM_DOMAIN_GTT; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { |
kgem_bo_retire(kgem, bo); |
bo->domain = DOMAIN_GTT; |
bo->gtt_dirty = true; |
} |
} |
return ptr; |
} |
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) |
{ |
void *ptr; |
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, |
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); |
assert(bo->exec == NULL); |
assert(list_is_empty(&bo->list)); |
assert(!IS_USER_MAP(bo->map)); |
assert_tiling(kgem, bo); |
if (IS_CPU_MAP(bo->map)) |
kgem_bo_release_map(kgem, bo); |
ptr = bo->map; |
if (ptr == NULL) { |
assert(bytes(bo) <= kgem->aperture_mappable / 4); |
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); |
ptr = __kgem_bo_map__gtt(kgem, bo); |
if (ptr == NULL) |
return NULL; |
/* Cache this mapping to avoid the overhead of an |
* excruciatingly slow GTT pagefault. This is more an |
* issue with compositing managers which need to frequently |
* flush CPU damage to their GPU bo. |
*/ |
bo->map = ptr; |
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); |
} |
return ptr; |
} |
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) |
{ |
if (bo->map) |
return MAP(bo->map); |
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); |
return bo->map = __kgem_bo_map__gtt(kgem, bo); |
} |
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) |
{ |
struct drm_i915_gem_mmap mmap_arg; |
DBG(("%s(handle=%d, size=%d, mapped? %d)\n", |
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); |
assert(!bo->purged); |
assert(list_is_empty(&bo->list)); |
assert(bo->proxy == NULL); |
if (IS_CPU_MAP(bo->map)) |
return MAP(bo->map); |
if (bo->map) |
kgem_bo_release_map(kgem, bo); |
kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo)); |
retry: |
VG_CLEAR(mmap_arg); |
mmap_arg.handle = bo->handle; |
mmap_arg.offset = 0; |
mmap_arg.size = bytes(bo); |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { |
if (__kgem_throttle_retire(kgem, 0)) |
goto retry; |
if (kgem->need_expire) { |
kgem_cleanup_cache(kgem); |
goto retry; |
} |
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain\n", |
__FUNCTION__, bo->handle, bytes(bo)); |
return NULL; |
} |
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); |
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); |
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); |
return (void *)(uintptr_t)mmap_arg.addr_ptr; |
} |
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) |
{ |
struct drm_i915_gem_mmap mmap_arg; |
DBG(("%s(handle=%d, size=%d, mapped? %d)\n", |
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); |
assert(bo->refcnt); |
assert(!bo->purged); |
assert(list_is_empty(&bo->list)); |
assert(bo->proxy == NULL); |
if (IS_CPU_MAP(bo->map)) |
return MAP(bo->map); |
retry: |
VG_CLEAR(mmap_arg); |
mmap_arg.handle = bo->handle; |
mmap_arg.offset = 0; |
mmap_arg.size = bytes(bo); |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { |
int err = errno; |
assert(err != EINVAL); |
if (__kgem_throttle_retire(kgem, 0)) |
goto retry; |
if (kgem->need_expire) { |
kgem_cleanup_cache(kgem); |
goto retry; |
} |
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", |
__FUNCTION__, bo->handle, bytes(bo), err); |
return NULL; |
} |
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); |
if (bo->map && bo->domain == DOMAIN_CPU) { |
DBG(("%s: discarding GTT vma for %d\n", __FUNCTION__, bo->handle)); |
kgem_bo_release_map(kgem, bo); |
} |
if (bo->map == NULL) { |
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); |
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); |
} |
return (void *)(uintptr_t)mmap_arg.addr_ptr; |
} |
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) |
{ |
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); |
assert(!bo->scanout); |
kgem_bo_submit(kgem, bo); |
/* SHM pixmaps use proxies for subpage offsets */ |
assert(!bo->purged); |
while (bo->proxy) |
bo = bo->proxy; |
assert(!bo->purged); |
if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { |
struct drm_i915_gem_set_domain set_domain; |
DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", |
__FUNCTION__, bo->handle, |
bo->needs_flush, bo->domain, |
__kgem_busy(kgem, bo->handle))); |
VG_CLEAR(set_domain); |
set_domain.handle = bo->handle; |
set_domain.read_domains = I915_GEM_DOMAIN_CPU; |
set_domain.write_domain = I915_GEM_DOMAIN_CPU; |
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { |
kgem_bo_retire(kgem, bo); |
bo->domain = DOMAIN_CPU; |
} |
} |
} |
void kgem_clear_dirty(struct kgem *kgem) |
{ |
struct list * const buffers = &kgem->next_request->buffers; |
struct kgem_bo *bo; |
list_for_each_entry(bo, buffers, request) { |
if (!bo->gpu_dirty) |
break; |
bo->gpu_dirty = false; |
} |
} |
struct kgem_bo *kgem_create_proxy(struct kgem *kgem, |
struct kgem_bo *target, |
int offset, int length) |
{ |
struct kgem_bo *bo; |
DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n", |
__FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1, |
offset, length, target->io)); |
bo = __kgem_bo_alloc(target->handle, length); |
if (bo == NULL) |
return NULL; |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->reusable = false; |
bo->size.bytes = length; |
bo->io = target->io && target->proxy == NULL; |
bo->gpu_dirty = target->gpu_dirty; |
bo->tiling = target->tiling; |
bo->pitch = target->pitch; |
bo->flush = target->flush; |
bo->snoop = target->snoop; |
assert(!bo->scanout); |
bo->proxy = kgem_bo_reference(target); |
bo->delta = offset; |
if (target->exec) { |
list_move_tail(&bo->request, &kgem->next_request->buffers); |
bo->exec = &_kgem_dummy_exec; |
} |
bo->rq = target->rq; |
return bo; |
} |
#if 0 |
static struct kgem_buffer * |
buffer_alloc(void) |
{ |
struct kgem_buffer *bo; |
bo = malloc(sizeof(*bo)); |
if (bo == NULL) |
return NULL; |
bo->mem = NULL; |
bo->need_io = false; |
bo->mmapped = true; |
return bo; |
} |
static struct kgem_buffer * |
buffer_alloc_with_data(int num_pages) |
{ |
struct kgem_buffer *bo; |
bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE); |
if (bo == NULL) |
return NULL; |
bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT); |
bo->mmapped = false; |
return bo; |
} |
static inline bool |
use_snoopable_buffer(struct kgem *kgem, uint32_t flags) |
{ |
if ((flags & KGEM_BUFFER_WRITE) == 0) |
return kgem->gen >= 030; |
return true; |
} |
static void |
init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old) |
{ |
DBG(("%s: reusing handle=%d for buffer\n", |
__FUNCTION__, old->handle)); |
assert(old->proxy == NULL); |
memcpy(&bo->base, old, sizeof(*old)); |
if (old->rq) |
list_replace(&old->request, &bo->base.request); |
else |
list_init(&bo->base.request); |
list_replace(&old->vma, &bo->base.vma); |
list_init(&bo->base.list); |
free(old); |
assert(bo->base.tiling == I915_TILING_NONE); |
bo->base.refcnt = 1; |
} |
static struct kgem_buffer * |
search_snoopable_buffer(struct kgem *kgem, unsigned alloc) |
{ |
struct kgem_buffer *bo; |
struct kgem_bo *old; |
old = search_snoop_cache(kgem, alloc, 0); |
if (old) { |
if (!old->io) { |
bo = buffer_alloc(); |
if (bo == NULL) |
return NULL; |
init_buffer_from_bo(bo, old); |
} else { |
bo = (struct kgem_buffer *)old; |
bo->base.refcnt = 1; |
} |
DBG(("%s: created CPU handle=%d for buffer, size %d\n", |
__FUNCTION__, bo->base.handle, num_pages(&bo->base))); |
assert(bo->base.snoop); |
assert(bo->base.tiling == I915_TILING_NONE); |
assert(num_pages(&bo->base) >= alloc); |
assert(bo->mmapped == true); |
assert(bo->need_io == false); |
bo->mem = kgem_bo_map__cpu(kgem, &bo->base); |
if (bo->mem == NULL) { |
bo->base.refcnt = 0; |
kgem_bo_free(kgem, &bo->base); |
bo = NULL; |
} |
return bo; |
} |
return NULL; |
} |
static struct kgem_buffer * |
create_snoopable_buffer(struct kgem *kgem, unsigned alloc) |
{ |
struct kgem_buffer *bo; |
uint32_t handle; |
if (kgem->has_llc) { |
struct kgem_bo *old; |
bo = buffer_alloc(); |
if (bo == NULL) |
return NULL; |
old = search_linear_cache(kgem, alloc, |
CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); |
if (old) { |
init_buffer_from_bo(bo, old); |
} else { |
handle = gem_create(kgem->fd, alloc); |
if (handle == 0) { |
free(bo); |
return NULL; |
} |
debug_alloc(kgem, alloc); |
__kgem_bo_init(&bo->base, handle, alloc); |
DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n", |
__FUNCTION__, bo->base.handle, alloc)); |
} |
assert(bo->base.refcnt == 1); |
assert(bo->mmapped == true); |
assert(bo->need_io == false); |
bo->mem = kgem_bo_map__cpu(kgem, &bo->base); |
if (bo->mem != NULL) |
return bo; |
bo->base.refcnt = 0; /* for valgrind */ |
kgem_bo_free(kgem, &bo->base); |
} |
if (kgem->has_caching) { |
struct kgem_bo *old; |
bo = buffer_alloc(); |
if (bo == NULL) |
return NULL; |
old = search_linear_cache(kgem, alloc, |
CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); |
if (old) { |
init_buffer_from_bo(bo, old); |
} else { |
handle = gem_create(kgem->fd, alloc); |
if (handle == 0) { |
free(bo); |
return NULL; |
} |
debug_alloc(kgem, alloc); |
__kgem_bo_init(&bo->base, handle, alloc); |
DBG(("%s: created CPU handle=%d for buffer, size %d\n", |
__FUNCTION__, bo->base.handle, alloc)); |
} |
assert(bo->base.refcnt == 1); |
assert(bo->mmapped == true); |
assert(bo->need_io == false); |
if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED)) |
goto free_caching; |
bo->base.snoop = true; |
bo->mem = kgem_bo_map__cpu(kgem, &bo->base); |
if (bo->mem == NULL) |
goto free_caching; |
return bo; |
free_caching: |
bo->base.refcnt = 0; /* for valgrind */ |
kgem_bo_free(kgem, &bo->base); |
} |
if (kgem->has_userptr) { |
bo = buffer_alloc(); |
if (bo == NULL) |
return NULL; |
//if (posix_memalign(&ptr, 64, ALIGN(size, 64))) |
if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) { |
free(bo); |
return NULL; |
} |
handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false); |
if (handle == 0) { |
free(bo->mem); |
free(bo); |
return NULL; |
} |
debug_alloc(kgem, alloc); |
__kgem_bo_init(&bo->base, handle, alloc); |
DBG(("%s: created snoop handle=%d for buffer\n", |
__FUNCTION__, bo->base.handle)); |
assert(bo->mmapped == true); |
assert(bo->need_io == false); |
bo->base.refcnt = 1; |
bo->base.snoop = true; |
bo->base.map = MAKE_USER_MAP(bo->mem); |
return bo; |
} |
return NULL; |
} |
struct kgem_bo *kgem_create_buffer(struct kgem *kgem, |
uint32_t size, uint32_t flags, |
void **ret) |
{ |
struct kgem_buffer *bo; |
unsigned offset, alloc; |
struct kgem_bo *old; |
DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n", |
__FUNCTION__, size, flags, |
!!(flags & KGEM_BUFFER_WRITE), |
!!(flags & KGEM_BUFFER_INPLACE), |
!!(flags & KGEM_BUFFER_LAST))); |
assert(size); |
/* we should never be asked to create anything TOO large */ |
assert(size <= kgem->max_object_size); |
#if !DBG_NO_UPLOAD_CACHE |
list_for_each_entry(bo, &kgem->batch_buffers, base.list) { |
assert(bo->base.io); |
assert(bo->base.refcnt >= 1); |
/* We can reuse any write buffer which we can fit */ |
if (flags == KGEM_BUFFER_LAST && |
bo->write == KGEM_BUFFER_WRITE && |
bo->base.refcnt == 1 && !bo->mmapped && |
size <= bytes(&bo->base)) { |
DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n", |
__FUNCTION__, size, bo->used, bytes(&bo->base))); |
gem_write(kgem->fd, bo->base.handle, |
0, bo->used, bo->mem); |
kgem_buffer_release(kgem, bo); |
bo->need_io = 0; |
bo->write = 0; |
offset = 0; |
bo->used = size; |
goto done; |
} |
if (flags & KGEM_BUFFER_WRITE) { |
if ((bo->write & KGEM_BUFFER_WRITE) == 0 || |
(((bo->write & ~flags) & KGEM_BUFFER_INPLACE) && |
!bo->base.snoop)) { |
DBG(("%s: skip write %x buffer, need %x\n", |
__FUNCTION__, bo->write, flags)); |
continue; |
} |
assert(bo->mmapped || bo->need_io); |
} else { |
if (bo->write & KGEM_BUFFER_WRITE) { |
DBG(("%s: skip write %x buffer, need %x\n", |
__FUNCTION__, bo->write, flags)); |
continue; |
} |
} |
if (bo->used + size <= bytes(&bo->base)) { |
DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", |
__FUNCTION__, bo->used, size, bytes(&bo->base))); |
offset = bo->used; |
bo->used += size; |
goto done; |
} |
} |
if (flags & KGEM_BUFFER_WRITE) { |
list_for_each_entry(bo, &kgem->active_buffers, base.list) { |
assert(bo->base.io); |
assert(bo->base.refcnt >= 1); |
assert(bo->mmapped); |
assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop); |
if (!kgem->has_llc && (bo->write & ~flags) & KGEM_BUFFER_INPLACE) { |
DBG(("%s: skip write %x buffer, need %x\n", |
__FUNCTION__, bo->write, flags)); |
continue; |
} |
if (bo->used + size <= bytes(&bo->base)) { |
DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", |
__FUNCTION__, bo->used, size, bytes(&bo->base))); |
offset = bo->used; |
bo->used += size; |
list_move(&bo->base.list, &kgem->batch_buffers); |
goto done; |
} |
} |
} |
#endif |
#if !DBG_NO_MAP_UPLOAD |
/* Be a little more generous and hope to hold fewer mmappings */ |
alloc = ALIGN(2*size, kgem->buffer_size); |
if (alloc > MAX_CACHE_SIZE) |
alloc = ALIGN(size, kgem->buffer_size); |
if (alloc > MAX_CACHE_SIZE) |
alloc = PAGE_ALIGN(size); |
assert(alloc); |
if (alloc > kgem->aperture_mappable / 4) |
flags &= ~KGEM_BUFFER_INPLACE; |
alloc /= PAGE_SIZE; |
if (kgem->has_llc && |
(flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { |
bo = buffer_alloc(); |
if (bo == NULL) |
goto skip_llc; |
old = NULL; |
if ((flags & KGEM_BUFFER_WRITE) == 0) |
old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP); |
if (old == NULL) |
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP); |
if (old == NULL) |
old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP); |
if (old) { |
DBG(("%s: found LLC handle=%d for buffer\n", |
__FUNCTION__, old->handle)); |
init_buffer_from_bo(bo, old); |
} else { |
uint32_t handle = gem_create(kgem->fd, alloc); |
if (handle == 0) { |
free(bo); |
goto skip_llc; |
} |
__kgem_bo_init(&bo->base, handle, alloc); |
DBG(("%s: created LLC handle=%d for buffer\n", |
__FUNCTION__, bo->base.handle)); |
debug_alloc(kgem, alloc); |
} |
assert(bo->mmapped); |
assert(!bo->need_io); |
bo->mem = kgem_bo_map__cpu(kgem, &bo->base); |
if (bo->mem) { |
if (flags & KGEM_BUFFER_WRITE) |
kgem_bo_sync__cpu(kgem, &bo->base); |
flags &= ~KGEM_BUFFER_INPLACE; |
goto init; |
} else { |
bo->base.refcnt = 0; /* for valgrind */ |
kgem_bo_free(kgem, &bo->base); |
} |
} |
skip_llc: |
if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) { |
/* The issue with using a GTT upload buffer is that we may |
* cause eviction-stalls in order to free up some GTT space. |
* An is-mappable? ioctl could help us detect when we are |
* about to block, or some per-page magic in the kernel. |
* |
* XXX This is especially noticeable on memory constrained |
* devices like gen2 or with relatively slow gpu like i3. |
*/ |
DBG(("%s: searching for an inactive GTT map for upload\n", |
__FUNCTION__)); |
old = search_linear_cache(kgem, alloc, |
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); |
#if HAVE_I915_GEM_BUFFER_INFO |
if (old) { |
struct drm_i915_gem_buffer_info info; |
/* An example of such a non-blocking ioctl might work */ |
VG_CLEAR(info); |
info.handle = handle; |
if (drmIoctl(kgem->fd, |
DRM_IOCTL_I915_GEM_BUFFER_INFO, |
&fino) == 0) { |
old->presumed_offset = info.addr; |
if ((info.flags & I915_GEM_MAPPABLE) == 0) { |
kgem_bo_move_to_inactive(kgem, old); |
old = NULL; |
} |
} |
} |
#endif |
if (old == NULL) |
old = search_linear_cache(kgem, NUM_PAGES(size), |
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); |
if (old == NULL) { |
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); |
if (old && !__kgem_bo_is_mappable(kgem, old)) { |
_kgem_bo_destroy(kgem, old); |
old = NULL; |
} |
} |
if (old) { |
DBG(("%s: reusing handle=%d for buffer\n", |
__FUNCTION__, old->handle)); |
assert(__kgem_bo_is_mappable(kgem, old)); |
assert(!old->snoop); |
assert(old->rq == NULL); |
bo = buffer_alloc(); |
if (bo == NULL) |
return NULL; |
init_buffer_from_bo(bo, old); |
assert(num_pages(&bo->base) >= NUM_PAGES(size)); |
assert(bo->mmapped); |
assert(bo->base.refcnt == 1); |
bo->mem = kgem_bo_map(kgem, &bo->base); |
if (bo->mem) { |
if (IS_CPU_MAP(bo->base.map)) |
flags &= ~KGEM_BUFFER_INPLACE; |
goto init; |
} else { |
bo->base.refcnt = 0; |
kgem_bo_free(kgem, &bo->base); |
} |
} |
} |
#else |
flags &= ~KGEM_BUFFER_INPLACE; |
#endif |
/* Be more parsimonious with pwrite/pread/cacheable buffers */ |
if ((flags & KGEM_BUFFER_INPLACE) == 0) |
alloc = NUM_PAGES(size); |
if (use_snoopable_buffer(kgem, flags)) { |
bo = search_snoopable_buffer(kgem, alloc); |
if (bo) { |
if (flags & KGEM_BUFFER_WRITE) |
kgem_bo_sync__cpu(kgem, &bo->base); |
flags &= ~KGEM_BUFFER_INPLACE; |
goto init; |
} |
if ((flags & KGEM_BUFFER_INPLACE) == 0) { |
bo = create_snoopable_buffer(kgem, alloc); |
if (bo) |
goto init; |
} |
} |
flags &= ~KGEM_BUFFER_INPLACE; |
old = NULL; |
if ((flags & KGEM_BUFFER_WRITE) == 0) |
old = search_linear_cache(kgem, alloc, 0); |
if (old == NULL) |
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); |
if (old) { |
DBG(("%s: reusing ordinary handle %d for io\n", |
__FUNCTION__, old->handle)); |
bo = buffer_alloc_with_data(num_pages(old)); |
if (bo == NULL) |
return NULL; |
init_buffer_from_bo(bo, old); |
bo->need_io = flags & KGEM_BUFFER_WRITE; |
} else { |
unsigned hint; |
if (use_snoopable_buffer(kgem, flags)) { |
bo = create_snoopable_buffer(kgem, alloc); |
if (bo) |
goto init; |
} |
bo = buffer_alloc(); |
if (bo == NULL) |
return NULL; |
hint = CREATE_INACTIVE; |
if (flags & KGEM_BUFFER_WRITE) |
hint |= CREATE_CPU_MAP; |
old = search_linear_cache(kgem, alloc, hint); |
if (old) { |
DBG(("%s: reusing handle=%d for buffer\n", |
__FUNCTION__, old->handle)); |
init_buffer_from_bo(bo, old); |
} else { |
uint32_t handle = gem_create(kgem->fd, alloc); |
if (handle == 0) { |
free(bo); |
return NULL; |
} |
DBG(("%s: created handle=%d for buffer\n", |
__FUNCTION__, handle)); |
__kgem_bo_init(&bo->base, handle, alloc); |
debug_alloc(kgem, alloc * PAGE_SIZE); |
} |
assert(bo->mmapped); |
assert(!bo->need_io); |
assert(bo->base.refcnt == 1); |
if (flags & KGEM_BUFFER_WRITE) { |
bo->mem = kgem_bo_map__cpu(kgem, &bo->base); |
if (bo->mem != NULL) { |
kgem_bo_sync__cpu(kgem, &bo->base); |
goto init; |
} |
} |
DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__)); |
old = &bo->base; |
bo = buffer_alloc_with_data(num_pages(old)); |
if (bo == NULL) { |
old->refcnt= 0; |
kgem_bo_free(kgem, old); |
return NULL; |
} |
init_buffer_from_bo(bo, old); |
assert(bo->mem); |
assert(!bo->mmapped); |
assert(bo->base.refcnt == 1); |
bo->need_io = flags & KGEM_BUFFER_WRITE; |
} |
init: |
bo->base.io = true; |
assert(bo->base.refcnt == 1); |
assert(num_pages(&bo->base) >= NUM_PAGES(size)); |
assert(!bo->need_io || !bo->base.needs_flush); |
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); |
assert(bo->mem); |
assert(!bo->mmapped || bo->base.map != NULL); |
bo->used = size; |
bo->write = flags & KGEM_BUFFER_WRITE_INPLACE; |
offset = 0; |
assert(list_is_empty(&bo->base.list)); |
list_add(&bo->base.list, &kgem->batch_buffers); |
DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n", |
__FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write)); |
done: |
bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT); |
assert(bo->mem); |
*ret = (char *)bo->mem + offset; |
return kgem_create_proxy(kgem, &bo->base, offset, size); |
} |
bool kgem_buffer_is_inplace(struct kgem_bo *_bo) |
{ |
struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy; |
return bo->write & KGEM_BUFFER_WRITE_INPLACE; |
} |
struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, |
int width, int height, int bpp, |
uint32_t flags, |
void **ret) |
{ |
struct kgem_bo *bo; |
int stride; |
assert(width > 0 && height > 0); |
assert(ret != NULL); |
stride = ALIGN(width, 2) * bpp >> 3; |
stride = ALIGN(stride, 4); |
DBG(("%s: %dx%d, %d bpp, stride=%d\n", |
__FUNCTION__, width, height, bpp, stride)); |
bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret); |
if (bo == NULL) { |
DBG(("%s: allocation failure for upload buffer\n", |
__FUNCTION__)); |
return NULL; |
} |
assert(*ret != NULL); |
assert(bo->proxy != NULL); |
if (height & 1) { |
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; |
int min; |
assert(io->used); |
/* Having padded this surface to ensure that accesses to |
* the last pair of rows is valid, remove the padding so |
* that it can be allocated to other pixmaps. |
*/ |
min = bo->delta + height * stride; |
min = ALIGN(min, UPLOAD_ALIGNMENT); |
if (io->used != min) { |
DBG(("%s: trimming buffer from %d to %d\n", |
__FUNCTION__, io->used, min)); |
io->used = min; |
} |
bo->size.bytes -= stride; |
} |
bo->map = MAKE_CPU_MAP(*ret); |
bo->pitch = stride; |
bo->unique_id = kgem_get_unique_id(kgem); |
return bo; |
} |
struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, |
const void *data, |
const BoxRec *box, |
int stride, int bpp) |
{ |
int width = box->x2 - box->x1; |
int height = box->y2 - box->y1; |
struct kgem_bo *bo; |
void *dst; |
if (!kgem_can_create_2d(kgem, width, height, bpp)) |
return NULL; |
DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n", |
__FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp)); |
assert(data); |
assert(width > 0); |
assert(height > 0); |
assert(stride); |
assert(bpp); |
bo = kgem_create_buffer_2d(kgem, |
width, height, bpp, |
KGEM_BUFFER_WRITE_INPLACE, &dst); |
if (bo) |
memcpy_blt(data, dst, bpp, |
stride, bo->pitch, |
box->x1, box->y1, |
0, 0, |
width, height); |
return bo; |
} |
void kgem_proxy_bo_attach(struct kgem_bo *bo, |
struct kgem_bo **ptr) |
{ |
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); |
assert(bo->map == NULL || IS_CPU_MAP(bo->map)); |
assert(bo->proxy); |
list_add(&bo->vma, &bo->proxy->vma); |
bo->map = ptr; |
*ptr = kgem_bo_reference(bo); |
} |
void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) |
{ |
struct kgem_buffer *bo; |
uint32_t offset = _bo->delta, length = _bo->size.bytes; |
/* We expect the caller to have already submitted the batch */ |
assert(_bo->io); |
assert(_bo->exec == NULL); |
assert(_bo->rq == NULL); |
assert(_bo->proxy); |
_bo = _bo->proxy; |
assert(_bo->proxy == NULL); |
assert(_bo->exec == NULL); |
bo = (struct kgem_buffer *)_bo; |
DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__, |
offset, length, bo->base.snoop)); |
if (bo->mmapped) { |
struct drm_i915_gem_set_domain set_domain; |
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", |
__FUNCTION__, |
bo->base.needs_flush, |
bo->base.domain, |
__kgem_busy(kgem, bo->base.handle))); |
assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc); |
VG_CLEAR(set_domain); |
set_domain.handle = bo->base.handle; |
set_domain.write_domain = 0; |
set_domain.read_domains = |
IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT; |
if (drmIoctl(kgem->fd, |
DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) |
return; |
} else { |
if (gem_read(kgem->fd, |
bo->base.handle, (char *)bo->mem+offset, |
offset, length)) |
return; |
} |
kgem_bo_retire(kgem, &bo->base); |
bo->base.domain = DOMAIN_NONE; |
} |
#endif |
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) |
{ |
struct kgem_bo_binding *b; |
for (b = &bo->binding; b && b->offset; b = b->next) |
if (format == b->format) |
return b->offset; |
return 0; |
} |
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) |
{ |
struct kgem_bo_binding *b; |
for (b = &bo->binding; b; b = b->next) { |
if (b->offset) |
continue; |
b->offset = offset; |
b->format = format; |
if (b->next) |
b->next->offset = 0; |
return; |
} |
b = malloc(sizeof(*b)); |
if (b) { |
b->next = bo->binding.next; |
b->format = format; |
b->offset = offset; |
bo->binding.next = b; |
} |
} |
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb) |
{ |
struct kgem_bo *bo; |
size_t size; |
int ret; |
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb); |
if( ret != 0 ) |
return 0; |
size = fb->pitch * fb->height / PAGE_SIZE; |
bo = __kgem_bo_alloc(-2, size); |
if (!bo) { |
return 0; |
} |
bo->domain = DOMAIN_GTT; |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->pitch = fb->pitch; |
bo->tiling = I915_TILING_NONE; |
bo->scanout = 1; |
fb->fb_bo = bo; |
// printf("fb width %d height %d pitch %d bo %p\n", |
// fb->width, fb->height, fb->pitch, fb->fb_bo); |
return 1; |
}; |
int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb) |
{ |
struct kgem_bo *bo; |
size_t size; |
int ret; |
bo = fb->fb_bo; |
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb); |
if( ret != 0 ) |
return 0; |
fb->fb_bo = bo; |
size = fb->pitch * fb->height / PAGE_SIZE; |
if((size != bo->size.pages.count) || |
(fb->pitch != bo->pitch)) |
{ |
bo->size.pages.count = size; |
bo->pitch = fb->pitch; |
printf("fb width %d height %d pitch %d bo %p\n", |
fb->width, fb->height, fb->pitch, fb->fb_bo); |
return 1; |
} |
return 0; |
}; |
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) |
{ |
kgem_bo_destroy(kgem, bo); |
kgem_bo_free(kgem, bo); |
} |
void kgem_close_batches(struct kgem *kgem) |
{ |
int n; |
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) { |
while (!list_is_empty(&kgem->pinned_batches[n])) { |
kgem_bo_destroy(kgem, |
list_first_entry(&kgem->pinned_batches[n], |
struct kgem_bo, list)); |
} |
} |
}; |
struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle, |
int pitch, int height) |
{ |
struct kgem_bo *bo; |
int size; |
size = pitch * height / PAGE_SIZE; |
bo = __kgem_bo_alloc(handle, size); |
if(bo == NULL) |
return NULL; |
bo->domain = DOMAIN_GTT; |
bo->unique_id = kgem_get_unique_id(kgem); |
bo->pitch = pitch; |
bo->tiling = I915_TILING_X; |
bo->scanout = 0; |
return bo; |
} |
/drivers/video/Intel-2D/kgem.h |
---|
28,12 → 28,10 |
#ifndef KGEM_H |
#define KGEM_H |
#define HAS_DEBUG_FULL 1 |
#include <stdint.h> |
#include <stdio.h> |
#include <stdbool.h> |
#include <stdarg.h> |
#include <stdio.h> |
#include <i915_drm.h> |
40,12 → 38,24 |
#include "compiler.h" |
#include "intel_list.h" |
#undef DBG |
#include <limits.h> |
#if !defined(MAXSHORT) || !defined(MINSHORT) || \ |
!defined(MAXINT) || !defined(MININT) |
/* |
* Some implementations #define these through <math.h>, so preclude |
* #include'ing it later. |
*/ |
#if HAS_DEBUG_FULL |
#define DBG(x) printf x |
#else |
#define DBG(x) |
#include <math.h> |
#undef MAXSHORT |
#define MAXSHORT SHRT_MAX |
#undef MINSHORT |
#define MINSHORT SHRT_MIN |
#undef MAXINT |
#define MAXINT INT_MAX |
#undef MININT |
#define MININT INT_MIN |
#endif |
struct kgem_bo { |
64,6 → 74,7 |
void *map; |
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1) |
#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0) |
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) |
struct kgem_bo_binding { |
struct kgem_bo_binding *next; |
90,7 → 101,8 |
uint32_t pitch : 18; /* max 128k */ |
uint32_t tiling : 2; |
uint32_t reusable : 1; |
uint32_t dirty : 1; |
uint32_t gpu_dirty : 1; |
uint32_t gtt_dirty : 1; |
uint32_t domain : 2; |
uint32_t needs_flush : 1; |
uint32_t snoop : 1; |
173,6 → 185,7 |
uint32_t scanout_busy:1; |
uint32_t busy:1; |
uint32_t has_create2 :1; |
uint32_t has_userptr :1; |
uint32_t has_blt :1; |
uint32_t has_relaxed_fencing :1; |
180,8 → 193,9 |
uint32_t has_semaphores :1; |
uint32_t has_secure_batches :1; |
uint32_t has_pinned_batches :1; |
uint32_t has_cacheing :1; |
uint32_t has_caching :1; |
uint32_t has_llc :1; |
uint32_t has_wt :1; |
uint32_t has_no_reloc :1; |
uint32_t has_handle_lut :1; |
200,10 → 214,23 |
void (*retire)(struct kgem *kgem); |
void (*expire)(struct kgem *kgem); |
uint32_t batch[64*1024-8]; |
struct drm_i915_gem_exec_object2 exec[256]; |
struct drm_i915_gem_relocation_entry reloc[4096]; |
#if 0 |
void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp, |
int32_t src_stride, int32_t dst_stride, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
uint16_t width, uint16_t height); |
void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp, |
int32_t src_stride, int32_t dst_stride, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
uint16_t width, uint16_t height); |
#endif |
uint16_t reloc__self[256]; |
uint32_t batch[64*1024-8] page_aligned; |
struct drm_i915_gem_exec_object2 exec[384] page_aligned; |
struct drm_i915_gem_relocation_entry reloc[8192] page_aligned; |
#ifdef DEBUG_MEMORY |
struct { |
213,9 → 240,11 |
#endif |
}; |
#define KGEM_MAX_DEFERRED_VBO 16 |
#define KGEM_BATCH_RESERVED 1 |
#define KGEM_RELOC_RESERVED 4 |
#define KGEM_EXEC_RESERVED 1 |
#define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO) |
#define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO) |
#ifndef ARRAY_SIZE |
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) |
233,6 → 262,8 |
bool read_only); |
struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name); |
struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size); |
int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo); |
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags); |
struct kgem_bo *kgem_create_proxy(struct kgem *kgem, |
239,6 → 270,7 |
struct kgem_bo *target, |
int offset, int length); |
void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr); |
int kgem_choose_tiling(struct kgem *kgem, |
int tiling, int width, int height, int bpp); |
248,6 → 280,8 |
#define KGEM_CAN_CREATE_LARGE 0x4 |
#define KGEM_CAN_CREATE_GTT 0x8 |
uint32_t kgem_get_unique_id(struct kgem *kgem); |
struct kgem_bo * |
kgem_replace_bo(struct kgem *kgem, |
struct kgem_bo *src, |
281,7 → 315,6 |
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); |
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); |
int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo); |
bool kgem_retire(struct kgem *kgem); |
304,6 → 337,11 |
return kgem_ring_is_idle(kgem, kgem->ring); |
} |
static inline bool __kgem_ring_empty(struct kgem *kgem) |
{ |
return list_is_empty(&kgem->requests[kgem->ring == KGEM_BLT]); |
} |
void _kgem_submit(struct kgem *kgem); |
static inline void kgem_submit(struct kgem *kgem) |
{ |
325,21 → 363,8 |
_kgem_submit(kgem); |
} |
void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo); |
static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) |
{ |
kgem_bo_submit(kgem, bo); |
void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo); |
if (!bo->needs_flush) |
return; |
/* If the kernel fails to emit the flush, then it will be forced when |
* we assume direct access. And as the useual failure is EIO, we do |
* not actualy care. |
*/ |
__kgem_flush(kgem, bo); |
} |
static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo) |
{ |
assert(bo->refcnt); |
367,6 → 392,9 |
kgem_submit(kgem); |
#endif |
if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) |
_kgem_submit(kgem); |
if (kgem->mode == mode) |
return; |
419,6 → 447,11 |
static inline uint32_t *kgem_get_batch(struct kgem *kgem) |
{ |
if (kgem->nreloc) { |
unsigned mode = kgem->mode; |
_kgem_submit(kgem); |
_kgem_set_mode(kgem, mode); |
} |
return kgem->batch + kgem->nbatch; |
} |
512,6 → 545,12 |
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1)) |
return false; |
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) |
return false; |
if (kgem->has_llc && bo->tiling == I915_TILING_NONE) |
return true; |
if (!bo->presumed_offset) |
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; |
544,7 → 583,7 |
if (kgem_bo_mapped(kgem, bo)) |
return true; |
if (!bo->tiling && kgem->has_llc) |
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU)) |
return true; |
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) |
553,6 → 592,22 |
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; |
} |
static inline bool kgem_bo_can_map__cpu(struct kgem *kgem, |
struct kgem_bo *bo, |
bool write) |
{ |
if (bo->purged || (bo->scanout && write)) |
return false; |
if (kgem->has_llc) |
return true; |
if (bo->domain != DOMAIN_CPU) |
return false; |
return !write || bo->exec == NULL; |
} |
static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) |
{ |
assert(bo->refcnt); |
561,6 → 616,8 |
return bo->snoop; |
} |
void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo); |
bool __kgem_busy(struct kgem *kgem, int handle); |
static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring) |
570,10 → 627,12 |
inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) |
{ |
bo->needs_flush = false; |
bo->rq = NULL; |
list_del(&bo->request); |
bo->rq = NULL; |
bo->domain = DOMAIN_NONE; |
bo->needs_flush = false; |
bo->gtt_dirty = false; |
} |
static inline bool kgem_bo_is_busy(struct kgem_bo *bo) |
584,8 → 643,6 |
return bo->rq; |
} |
/* |
static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) |
{ |
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, |
604,8 → 661,24 |
return kgem_bo_is_busy(bo); |
} |
*/ |
static inline bool kgem_bo_is_render(struct kgem_bo *bo) |
{ |
DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__, |
bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); |
assert(bo->refcnt); |
return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER; |
} |
static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo) |
{ |
while (bo->proxy) { |
bo->flush = true; |
bo = bo->proxy; |
} |
bo->flush = true; |
bo->reusable = false; |
} |
static inline bool kgem_bo_is_dirty(struct kgem_bo *bo) |
{ |
if (bo == NULL) |
612,7 → 685,7 |
return false; |
assert(bo->refcnt); |
return bo->dirty; |
return bo->gpu_dirty; |
} |
static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo) |
632,7 → 705,7 |
bo->handle, bo->proxy != NULL)); |
bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE; |
bo->needs_flush = bo->dirty = true; |
bo->needs_flush = bo->gpu_dirty = true; |
list_move(&bo->request, &RQ(bo->rq)->buffers); |
} |
643,7 → 716,7 |
assert(bo->exec); |
assert(bo->rq); |
if (bo->dirty) |
if (bo->gpu_dirty) |
return; |
__kgem_bo_mark_dirty(bo); |
672,6 → 745,9 |
void kgem_purge_cache(struct kgem *kgem); |
void kgem_cleanup_cache(struct kgem *kgem); |
void kgem_clean_scanout_cache(struct kgem *kgem); |
void kgem_clean_large_cache(struct kgem *kgem); |
#if HAS_DEBUG_FULL |
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch); |
#else |
/drivers/video/Intel-2D/kgem_debug.c |
---|
37,22 → 37,6 |
#include "kgem_debug.h" |
#include <kos32sys.h> |
/* |
void |
ErrorF(const char *f, ...) |
{ |
va_list args; |
va_start(args, f); |
VErrorF(f, args); |
va_end(args); |
} |
*/ |
#define ErrorF printf |
struct drm_i915_gem_relocation_entry * |
kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset) |
{ |
388,8 → 372,6 |
static int (*decode_3d(int gen))(struct kgem*, uint32_t) |
{ |
return kgem_gen6_decode_3d; |
/* |
if (gen >= 0100) { |
} else if (gen >= 070) { |
return kgem_gen7_decode_3d; |
401,18 → 383,12 |
return kgem_gen4_decode_3d; |
} else if (gen >= 030) { |
return kgem_gen3_decode_3d; |
} else if (gen >= 020) { |
return kgem_gen2_decode_3d; |
} |
assert(0); |
*/ |
} |
static void (*finish_state(int gen))(struct kgem*) |
{ |
return kgem_gen6_finish_state; |
/* |
if (gen >= 0100) { |
} else if (gen >= 070) { |
return kgem_gen7_finish_state; |
424,11 → 400,8 |
return kgem_gen4_finish_state; |
} else if (gen >= 030) { |
return kgem_gen3_finish_state; |
} else if (gen >= 020) { |
return kgem_gen2_finish_state; |
} |
assert(0); |
*/ |
} |
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) |
/drivers/video/Intel-2D/kgem_debug_gen3.c |
---|
0,0 → 1,1599 |
/* |
* Copyright © 2007-2011 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Eric Anholt <eric@anholt.net> |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include <assert.h> |
#include "sna.h" |
#include "sna_reg.h" |
#include "gen3_render.h" |
#include "kgem_debug.h" |
enum type { |
T_FLOAT32, |
T_FLOAT16, |
}; |
static struct state { |
struct vertex_buffer { |
int handle; |
void *base; |
const char *ptr; |
int pitch; |
struct kgem_bo *current; |
} vb; |
struct vertex_elements { |
int offset; |
bool valid; |
enum type type; |
int size; |
uint8_t swizzle[4]; |
} ve[33]; |
int num_ve; |
} state; |
static float int_as_float(int i) |
{ |
union { |
float f; |
int i; |
} x; |
x.i = i; |
return x.f; |
} |
static void gen3_update_vertex_buffer_addr(struct kgem *kgem, |
uint32_t offset) |
{ |
uint32_t handle; |
struct kgem_bo *bo = NULL; |
void *base, *ptr; |
int i; |
offset *= sizeof(uint32_t); |
for (i = 0; i < kgem->nreloc; i++) |
if (kgem->reloc[i].offset == offset) |
break; |
assert(i < kgem->nreloc); |
handle = kgem->reloc[i].target_handle; |
if (handle == 0) { |
base = kgem->batch; |
} else { |
list_for_each_entry(bo, &kgem->next_request->buffers, request) |
if (bo->handle == handle) |
break; |
assert(&bo->request != &kgem->next_request->buffers); |
base = kgem_bo_map__debug(kgem, bo); |
} |
ptr = (char *)base + kgem->reloc[i].delta; |
state.vb.current = bo; |
state.vb.base = base; |
state.vb.ptr = ptr; |
} |
static void gen3_update_vertex_buffer_pitch(struct kgem *kgem, |
uint32_t offset) |
{ |
state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f; |
state.vb.pitch *= sizeof(uint32_t); |
} |
static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data) |
{ |
state.ve[1].valid = 1; |
switch ((data >> 6) & 7) { |
case 1: |
state.ve[1].type = T_FLOAT32; |
state.ve[1].size = 3; |
state.ve[1].swizzle[0] = 1; |
state.ve[1].swizzle[1] = 1; |
state.ve[1].swizzle[2] = 1; |
state.ve[1].swizzle[3] = 3; |
break; |
case 2: |
state.ve[1].type = T_FLOAT32; |
state.ve[1].size = 4; |
state.ve[1].swizzle[0] = 1; |
state.ve[1].swizzle[1] = 1; |
state.ve[1].swizzle[2] = 1; |
state.ve[1].swizzle[3] = 1; |
break; |
case 3: |
state.ve[1].type = T_FLOAT32; |
state.ve[1].size = 2; |
state.ve[1].swizzle[0] = 1; |
state.ve[1].swizzle[1] = 1; |
state.ve[1].swizzle[2] = 2; |
state.ve[1].swizzle[3] = 3; |
break; |
case 4: |
state.ve[1].type = T_FLOAT32; |
state.ve[1].size = 3; |
state.ve[1].swizzle[0] = 1; |
state.ve[1].swizzle[1] = 1; |
state.ve[1].swizzle[2] = 3; |
state.ve[1].swizzle[3] = 1; |
break; |
} |
state.ve[2].valid = 0; |
state.ve[3].valid = 0; |
} |
static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data) |
{ |
int id; |
for (id = 0; id < 8; id++) { |
uint32_t fmt = (data >> (id*4)) & 0xf; |
int width; |
state.ve[id+4].valid = fmt != 0xf; |
width = 0; |
switch (fmt) { |
case 0: |
state.ve[id+4].type = T_FLOAT32; |
width = state.ve[id+4].size = 2; |
break; |
case 1: |
state.ve[id+4].type = T_FLOAT32; |
width = state.ve[id+4].size = 3; |
break; |
case 2: |
state.ve[id+4].type = T_FLOAT32; |
width = state.ve[id+4].size = 4; |
break; |
case 3: |
state.ve[id+4].type = T_FLOAT32; |
width = state.ve[id+4].size = 1; |
break; |
case 4: |
state.ve[id+4].type = T_FLOAT16; |
width = state.ve[id+4].size = 2; |
break; |
case 5: |
state.ve[id+4].type = T_FLOAT16; |
width = state.ve[id+4].size = 4; |
break; |
} |
state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2; |
state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2; |
state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2; |
state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2; |
} |
} |
static void gen3_update_vertex_elements_offsets(struct kgem *kgem) |
{ |
int i, offset; |
for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) { |
if (!state.ve[i].valid) |
continue; |
state.ve[i].offset = offset; |
offset += 4 * state.ve[i].size; |
state.num_ve = i; |
} |
} |
static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max) |
{ |
int c; |
ErrorF("("); |
for (c = 0; c < max; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("%f", f[c]); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < max-1) |
ErrorF(", "); |
} |
ErrorF(")"); |
} |
static void ve_out(const struct vertex_elements *ve, const void *ptr) |
{ |
switch (ve->type) { |
case T_FLOAT32: |
vertices_float32_out(ve, ptr, ve->size); |
break; |
case T_FLOAT16: |
//vertices_float16_out(ve, ptr, ve->size); |
break; |
} |
} |
static void indirect_vertex_out(struct kgem *kgem, uint32_t v) |
{ |
const struct vertex_buffer *vb = &state.vb; |
int i = 1; |
do { |
const struct vertex_elements *ve = &state.ve[i]; |
const void *ptr = vb->ptr + v * vb->pitch + ve->offset; |
if (!ve->valid) |
continue; |
ve_out(ve, ptr); |
while (++i <= state.num_ve && !state.ve[i].valid) |
; |
if (i <= state.num_ve) |
ErrorF(", "); |
} while (i <= state.num_ve); |
} |
static int inline_vertex_out(struct kgem *kgem, void *base) |
{ |
const struct vertex_buffer *vb = &state.vb; |
int i = 1; |
do { |
const struct vertex_elements *ve = &state.ve[i]; |
const void *ptr = (char *)base + ve->offset; |
if (!ve->valid) |
continue; |
ve_out(ve, ptr); |
while (++i <= state.num_ve && !state.ve[i].valid) |
; |
if (i <= state.num_ve) |
ErrorF(", "); |
} while (i <= state.num_ve); |
return vb->pitch; |
} |
static int |
gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset) |
{ |
uint32_t *data = kgem->batch + offset; |
uint32_t opcode; |
opcode = (data[0] & 0x00f80000) >> 19; |
switch (opcode) { |
case 0x11: |
kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); |
return 1; |
case 0x10: |
kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n", |
data[0]&1?"enabled":"disabled"); |
return 1; |
case 0x01: |
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n"); |
return 1; |
case 0x0a: |
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n"); |
return 1; |
case 0x05: |
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); |
return 1; |
} |
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", |
opcode); |
assert(0); |
return 1; |
} |
/** Sets the string dstname to describe the destination of the PS instruction */ |
static void |
gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask) |
{ |
uint32_t a0 = data[i]; |
int dst_nr = (a0 >> 14) & 0xf; |
char dstmask[8]; |
const char *sat; |
if (do_mask) { |
if (((a0 >> 10) & 0xf) == 0xf) { |
dstmask[0] = 0; |
} else { |
int dstmask_index = 0; |
dstmask[dstmask_index++] = '.'; |
if (a0 & (1 << 10)) |
dstmask[dstmask_index++] = 'x'; |
if (a0 & (1 << 11)) |
dstmask[dstmask_index++] = 'y'; |
if (a0 & (1 << 12)) |
dstmask[dstmask_index++] = 'z'; |
if (a0 & (1 << 13)) |
dstmask[dstmask_index++] = 'w'; |
dstmask[dstmask_index++] = 0; |
} |
if (a0 & (1 << 22)) |
sat = ".sat"; |
else |
sat = ""; |
} else { |
dstmask[0] = 0; |
sat = ""; |
} |
switch ((a0 >> 19) & 0x7) { |
case 0: |
assert(dst_nr <= 15); |
sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat); |
break; |
case 4: |
assert(dst_nr == 0); |
sprintf(dstname, "oC%s%s", dstmask, sat); |
break; |
case 5: |
assert(dst_nr == 0); |
sprintf(dstname, "oD%s%s", dstmask, sat); |
break; |
case 6: |
assert(dst_nr <= 3); |
sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); |
break; |
default: |
sprintf(dstname, "RESERVED"); |
break; |
} |
} |
static const char * |
gen3_get_channel_swizzle(uint32_t select) |
{ |
switch (select & 0x7) { |
case 0: |
return (select & 8) ? "-x" : "x"; |
case 1: |
return (select & 8) ? "-y" : "y"; |
case 2: |
return (select & 8) ? "-z" : "z"; |
case 3: |
return (select & 8) ? "-w" : "w"; |
case 4: |
return (select & 8) ? "-0" : "0"; |
case 5: |
return (select & 8) ? "-1" : "1"; |
default: |
return (select & 8) ? "-bad" : "bad"; |
} |
} |
static void |
gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) |
{ |
switch (src_type) { |
case 0: |
sprintf(name, "R%d", src_nr); |
assert(src_nr <= 15); |
break; |
case 1: |
if (src_nr < 8) |
sprintf(name, "T%d", src_nr); |
else if (src_nr == 8) |
sprintf(name, "DIFFUSE"); |
else if (src_nr == 9) |
sprintf(name, "SPECULAR"); |
else if (src_nr == 10) |
sprintf(name, "FOG"); |
else { |
assert(0); |
sprintf(name, "RESERVED"); |
} |
break; |
case 2: |
sprintf(name, "C%d", src_nr); |
assert(src_nr <= 31); |
break; |
case 4: |
sprintf(name, "oC"); |
assert(src_nr == 0); |
break; |
case 5: |
sprintf(name, "oD"); |
assert(src_nr == 0); |
break; |
case 6: |
sprintf(name, "U%d", src_nr); |
assert(src_nr <= 3); |
break; |
default: |
sprintf(name, "RESERVED"); |
assert(0); |
break; |
} |
} |
static void |
gen3_get_instruction_src0(uint32_t *data, int i, char *srcname) |
{ |
uint32_t a0 = data[i]; |
uint32_t a1 = data[i + 1]; |
int src_nr = (a0 >> 2) & 0x1f; |
const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf); |
const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf); |
const char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf); |
const char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf); |
char swizzle[100]; |
gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); |
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); |
if (strcmp(swizzle, ".xyzw") != 0) |
strcat(srcname, swizzle); |
} |
static void |
gen3_get_instruction_src1(uint32_t *data, int i, char *srcname) |
{ |
uint32_t a1 = data[i + 1]; |
uint32_t a2 = data[i + 2]; |
int src_nr = (a1 >> 8) & 0x1f; |
const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf); |
const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf); |
const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf); |
const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf); |
char swizzle[100]; |
gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); |
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); |
if (strcmp(swizzle, ".xyzw") != 0) |
strcat(srcname, swizzle); |
} |
static void |
gen3_get_instruction_src2(uint32_t *data, int i, char *srcname) |
{ |
uint32_t a2 = data[i + 2]; |
int src_nr = (a2 >> 16) & 0x1f; |
const char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf); |
const char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf); |
const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf); |
const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf); |
char swizzle[100]; |
gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); |
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); |
if (strcmp(swizzle, ".xyzw") != 0) |
strcat(srcname, swizzle); |
} |
static void |
gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) |
{ |
switch (src_type) { |
case 0: |
sprintf(name, "R%d", src_nr); |
assert(src_nr <= 15); |
break; |
case 1: |
if (src_nr < 8) |
sprintf(name, "T%d", src_nr); |
else if (src_nr == 8) |
sprintf(name, "DIFFUSE"); |
else if (src_nr == 9) |
sprintf(name, "SPECULAR"); |
else if (src_nr == 10) |
sprintf(name, "FOG"); |
else { |
assert(0); |
sprintf(name, "RESERVED"); |
} |
break; |
case 4: |
sprintf(name, "oC"); |
assert(src_nr == 0); |
break; |
case 5: |
sprintf(name, "oD"); |
assert(src_nr == 0); |
break; |
default: |
assert(0); |
sprintf(name, "RESERVED"); |
break; |
} |
} |
static void |
gen3_decode_alu1(uint32_t *data, uint32_t offset, |
int i, char *instr_prefix, const char *op_name) |
{ |
char dst[100], src0[100]; |
gen3_get_instruction_dst(data, i, dst, 1); |
gen3_get_instruction_src0(data, i, src0); |
kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix, |
op_name, dst, src0); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
} |
static void |
gen3_decode_alu2(uint32_t *data, uint32_t offset, |
int i, char *instr_prefix, const char *op_name) |
{ |
char dst[100], src0[100], src1[100]; |
gen3_get_instruction_dst(data, i, dst, 1); |
gen3_get_instruction_src0(data, i, src0); |
gen3_get_instruction_src1(data, i, src1); |
kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix, |
op_name, dst, src0, src1); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
} |
static void |
gen3_decode_alu3(uint32_t *data, uint32_t offset, |
int i, char *instr_prefix, const char *op_name) |
{ |
char dst[100], src0[100], src1[100], src2[100]; |
gen3_get_instruction_dst(data, i, dst, 1); |
gen3_get_instruction_src0(data, i, src0); |
gen3_get_instruction_src1(data, i, src1); |
gen3_get_instruction_src2(data, i, src2); |
kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix, |
op_name, dst, src0, src1, src2); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
} |
static void |
gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix, |
const char *tex_name) |
{ |
uint32_t t0 = data[i]; |
uint32_t t1 = data[i + 1]; |
char dst_name[100]; |
char addr_name[100]; |
int sampler_nr; |
gen3_get_instruction_dst(data, i, dst_name, 0); |
gen3_get_instruction_addr((t1 >> 24) & 0x7, |
(t1 >> 17) & 0xf, |
addr_name); |
sampler_nr = t0 & 0xf; |
kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix, |
tex_name, dst_name, sampler_nr, addr_name); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
} |
static void |
gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix) |
{ |
uint32_t d0 = data[i]; |
const char *sampletype; |
int dcl_nr = (d0 >> 14) & 0xf; |
const char *dcl_x = d0 & (1 << 10) ? "x" : ""; |
const char *dcl_y = d0 & (1 << 11) ? "y" : ""; |
const char *dcl_z = d0 & (1 << 12) ? "z" : ""; |
const char *dcl_w = d0 & (1 << 13) ? "w" : ""; |
char dcl_mask[10]; |
switch ((d0 >> 19) & 0x3) { |
case 1: |
sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); |
assert (strcmp(dcl_mask, ".")); |
assert(dcl_nr <= 10); |
if (dcl_nr < 8) { |
if (strcmp(dcl_mask, ".x") != 0 && |
strcmp(dcl_mask, ".xy") != 0 && |
strcmp(dcl_mask, ".xz") != 0 && |
strcmp(dcl_mask, ".w") != 0 && |
strcmp(dcl_mask, ".xyzw") != 0) { |
assert(0); |
} |
kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix, |
dcl_nr, dcl_mask); |
} else { |
if (strcmp(dcl_mask, ".xz") == 0) |
assert(0); |
else if (strcmp(dcl_mask, ".xw") == 0) |
assert(0); |
else if (strcmp(dcl_mask, ".xzw") == 0) |
assert(0); |
if (dcl_nr == 8) { |
kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix, |
dcl_mask); |
} else if (dcl_nr == 9) { |
kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix, |
dcl_mask); |
} else if (dcl_nr == 10) { |
kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix, |
dcl_mask); |
} |
} |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
break; |
case 3: |
switch ((d0 >> 22) & 0x3) { |
case 0: |
sampletype = "2D"; |
break; |
case 1: |
sampletype = "CUBE"; |
break; |
case 2: |
sampletype = "3D"; |
break; |
default: |
sampletype = "RESERVED"; |
break; |
} |
assert(dcl_nr <= 15); |
kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix, |
dcl_nr, sampletype); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
break; |
default: |
kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
} |
} |
static void |
gen3_decode_instruction(uint32_t *data, uint32_t offset, |
int i, char *instr_prefix) |
{ |
switch ((data[i] >> 24) & 0x1f) { |
case 0x0: |
kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
break; |
case 0x01: |
gen3_decode_alu2(data, offset, i, instr_prefix, "ADD"); |
break; |
case 0x02: |
gen3_decode_alu1(data, offset, i, instr_prefix, "MOV"); |
break; |
case 0x03: |
gen3_decode_alu2(data, offset, i, instr_prefix, "MUL"); |
break; |
case 0x04: |
gen3_decode_alu3(data, offset, i, instr_prefix, "MAD"); |
break; |
case 0x05: |
gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD"); |
break; |
case 0x06: |
gen3_decode_alu2(data, offset, i, instr_prefix, "DP3"); |
break; |
case 0x07: |
gen3_decode_alu2(data, offset, i, instr_prefix, "DP4"); |
break; |
case 0x08: |
gen3_decode_alu1(data, offset, i, instr_prefix, "FRC"); |
break; |
case 0x09: |
gen3_decode_alu1(data, offset, i, instr_prefix, "RCP"); |
break; |
case 0x0a: |
gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ"); |
break; |
case 0x0b: |
gen3_decode_alu1(data, offset, i, instr_prefix, "EXP"); |
break; |
case 0x0c: |
gen3_decode_alu1(data, offset, i, instr_prefix, "LOG"); |
break; |
case 0x0d: |
gen3_decode_alu2(data, offset, i, instr_prefix, "CMP"); |
break; |
case 0x0e: |
gen3_decode_alu2(data, offset, i, instr_prefix, "MIN"); |
break; |
case 0x0f: |
gen3_decode_alu2(data, offset, i, instr_prefix, "MAX"); |
break; |
case 0x10: |
gen3_decode_alu1(data, offset, i, instr_prefix, "FLR"); |
break; |
case 0x11: |
gen3_decode_alu1(data, offset, i, instr_prefix, "MOD"); |
break; |
case 0x12: |
gen3_decode_alu1(data, offset, i, instr_prefix, "TRC"); |
break; |
case 0x13: |
gen3_decode_alu2(data, offset, i, instr_prefix, "SGE"); |
break; |
case 0x14: |
gen3_decode_alu2(data, offset, i, instr_prefix, "SLT"); |
break; |
case 0x15: |
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD"); |
break; |
case 0x16: |
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP"); |
break; |
case 0x17: |
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB"); |
break; |
case 0x19: |
gen3_decode_dcl(data, offset, i, instr_prefix); |
break; |
default: |
kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); |
break; |
} |
} |
static const char * |
gen3_decode_compare_func(uint32_t op) |
{ |
switch (op&0x7) { |
case 0: return "always"; |
case 1: return "never"; |
case 2: return "less"; |
case 3: return "equal"; |
case 4: return "lequal"; |
case 5: return "greater"; |
case 6: return "notequal"; |
case 7: return "gequal"; |
} |
return ""; |
} |
static const char * |
gen3_decode_stencil_op(uint32_t op) |
{ |
switch (op&0x7) { |
case 0: return "keep"; |
case 1: return "zero"; |
case 2: return "replace"; |
case 3: return "incr_sat"; |
case 4: return "decr_sat"; |
case 5: return "greater"; |
case 6: return "incr"; |
case 7: return "decr"; |
} |
return ""; |
} |
#if 0 |
/* part of MODES_4 */ |
static const char * |
gen3_decode_logic_op(uint32_t op) |
{ |
switch (op&0xf) { |
case 0: return "clear"; |
case 1: return "nor"; |
case 2: return "and_inv"; |
case 3: return "copy_inv"; |
case 4: return "and_rvrse"; |
case 5: return "inv"; |
case 6: return "xor"; |
case 7: return "nand"; |
case 8: return "and"; |
case 9: return "equiv"; |
case 10: return "noop"; |
case 11: return "or_inv"; |
case 12: return "copy"; |
case 13: return "or_rvrse"; |
case 14: return "or"; |
case 15: return "set"; |
} |
return ""; |
} |
#endif |
static const char * |
gen3_decode_blend_fact(uint32_t op) |
{ |
switch (op&0xf) { |
case 1: return "zero"; |
case 2: return "one"; |
case 3: return "src_colr"; |
case 4: return "inv_src_colr"; |
case 5: return "src_alpha"; |
case 6: return "inv_src_alpha"; |
case 7: return "dst_alpha"; |
case 8: return "inv_dst_alpha"; |
case 9: return "dst_colr"; |
case 10: return "inv_dst_colr"; |
case 11: return "src_alpha_sat"; |
case 12: return "cnst_colr"; |
case 13: return "inv_cnst_colr"; |
case 14: return "cnst_alpha"; |
case 15: return "inv_const_alpha"; |
} |
return ""; |
} |
static const char * |
decode_tex_coord_mode(uint32_t mode) |
{ |
switch (mode&0x7) { |
case 0: return "wrap"; |
case 1: return "mirror"; |
case 2: return "clamp_edge"; |
case 3: return "cube"; |
case 4: return "clamp_border"; |
case 5: return "mirror_once"; |
} |
return ""; |
} |
static const char * |
gen3_decode_sample_filter(uint32_t mode) |
{ |
switch (mode&0x7) { |
case 0: return "nearest"; |
case 1: return "linear"; |
case 2: return "anisotropic"; |
case 3: return "4x4_1"; |
case 4: return "4x4_2"; |
case 5: return "4x4_flat"; |
case 6: return "6x5_mono"; |
} |
return ""; |
} |
static int |
gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset) |
{ |
const uint32_t *data = kgem->batch + offset; |
int len, i, word; |
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); |
len = (data[0] & 0x0000000f) + 2; |
i = 1; |
for (word = 0; word <= 8; word++) { |
if (data[0] & (1 << (4 + word))) { |
switch (word) { |
case 0: |
kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n", |
data[i]&(~1),data[i]&1?", auto cache invalidate disabled":""); |
gen3_update_vertex_buffer_addr(kgem, offset + i); |
break; |
case 1: |
kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n", |
(data[i]>>24)&0x3f,(data[i]>>16)&0x3f); |
gen3_update_vertex_buffer_pitch(kgem, offset + i); |
break; |
case 2: |
{ |
char buf[200]; |
int len = 0; |
int tex_num; |
for (tex_num = 0; tex_num < 8; tex_num++) { |
switch((data[i]>>tex_num*4)&0xf) { |
case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break; |
case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break; |
case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break; |
case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break; |
case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break; |
case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break; |
case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break; |
} |
} |
kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf); |
gen3_update_vertex_texcoords(kgem, data[i]); |
} |
break; |
case 3: |
kgem_debug_print(data, offset, i, "S3: not documented\n"); |
break; |
case 4: |
{ |
const char *cullmode = ""; |
const char *vfmt_xyzw = ""; |
switch((data[i]>>13)&0x3) { |
case 0: cullmode = "both"; break; |
case 1: cullmode = "none"; break; |
case 2: cullmode = "cw"; break; |
case 3: cullmode = "ccw"; break; |
} |
switch(data[i] & (7<<6 | 1<<2)) { |
case 1<<6: vfmt_xyzw = "XYZ,"; break; |
case 2<<6: vfmt_xyzw = "XYZW,"; break; |
case 3<<6: vfmt_xyzw = "XY,"; break; |
case 4<<6: vfmt_xyzw = "XYW,"; break; |
case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break; |
case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break; |
case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break; |
case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break; |
} |
kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f," |
"%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s%s%s " |
"%s%s%s\n", |
(data[i]>>23)&0x1ff, |
((data[i]>>19)&0xf) / 2.0, |
data[i]&(0xf<<15)?" flatshade=":"", |
data[i]&(1<<18)?"Alpha,":"", |
data[i]&(1<<17)?"Fog,":"", |
data[i]&(1<<16)?"Specular,":"", |
data[i]&(1<<15)?"Color,":"", |
cullmode, |
data[i]&(1<<12)?"PointWidth,":"", |
data[i]&(1<<11)?"SpecFog,":"", |
data[i]&(1<<10)?"Color,":"", |
data[i]&(1<<9)?"DepthOfs,":"", |
vfmt_xyzw, |
data[i]&(1<<9)?"FogParam,":"", |
data[i]&(1<<5)?"force default diffuse, ":"", |
data[i]&(1<<4)?"force default specular, ":"", |
data[i]&(1<<3)?"local depth ofs enable, ":"", |
data[i]&(1<<1)?"point sprite enable, ":"", |
data[i]&(1<<0)?"line AA enable, ":""); |
gen3_update_vertex_elements(kgem, data[i]); |
break; |
} |
case 5: |
{ |
kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s" |
"%s%s%s%s stencil_ref=0x%x, stencil_test=%s, " |
"stencil_fail=%s, stencil_pass_z_fail=%s, " |
"stencil_pass_z_pass=%s, %s%s%s%s\n", |
data[i]&(0xf<<28)?" write_disable=":"", |
data[i]&(1<<31)?"Alpha,":"", |
data[i]&(1<<30)?"Red,":"", |
data[i]&(1<<29)?"Green,":"", |
data[i]&(1<<28)?"Blue,":"", |
data[i]&(1<<27)?" force default point size,":"", |
data[i]&(1<<26)?" last pixel enable,":"", |
data[i]&(1<<25)?" global depth ofs enable,":"", |
data[i]&(1<<24)?" fog enable,":"", |
(data[i]>>16)&0xff, |
gen3_decode_compare_func(data[i]>>13), |
gen3_decode_stencil_op(data[i]>>10), |
gen3_decode_stencil_op(data[i]>>7), |
gen3_decode_stencil_op(data[i]>>4), |
data[i]&(1<<3)?"stencil write enable, ":"", |
data[i]&(1<<2)?"stencil test enable, ":"", |
data[i]&(1<<1)?"color dither enable, ":"", |
data[i]&(1<<0)?"logicop enable, ":""); |
} |
break; |
case 6: |
kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, " |
"depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, " |
"%s%stristrip_provoking_vertex=%i\n", |
data[i]&(1<<31)?"alpha test enable, ":"", |
gen3_decode_compare_func(data[i]>>28), |
data[i]&(0xff<<20), |
gen3_decode_compare_func(data[i]>>16), |
data[i]&(1<<15)?"cbuf blend enable, ":"", |
gen3_decode_blend_fact(data[i]>>8), |
gen3_decode_blend_fact(data[i]>>4), |
data[i]&(1<<3)?"depth write enable, ":"", |
data[i]&(1<<2)?"cbuf write enable, ":"", |
data[i]&(0x3)); |
break; |
case 7: |
kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]); |
break; |
} |
i++; |
} |
} |
assert(len == i); |
return len; |
} |
static int |
gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset) |
{ |
uint32_t *data = kgem->batch + offset; |
unsigned int len, i, c, idx, word, map, sampler, instr; |
const char *format, *zformat, *type; |
uint32_t opcode; |
static const struct { |
uint32_t opcode; |
int min_len; |
int max_len; |
const char *name; |
} opcodes_3d_1d[] = { |
{ 0x86, 4, 4, "3DSTATE_CHROMA_KEY" }, |
{ 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, |
{ 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, |
{ 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, |
{ 0x98, 2, 2, "3DSTATE_DEFAULT_Z" }, |
{ 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, |
{ 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, |
{ 0x9e, 4, 4, "3DSTATE_MONO_FILTER" }, |
{ 0x89, 4, 4, "3DSTATE_FOG_MODE" }, |
{ 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, |
{ 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" }, |
}, *opcode_3d_1d; |
opcode = (data[0] & 0x00ff0000) >> 16; |
switch (opcode) { |
case 0x07: |
/* This instruction is unusual. A 0 length means just 1 DWORD instead of |
* 2. The 0 length is specified in one place to be unsupported, but |
* stated to be required in another, and 0 length LOAD_INDIRECTs appear |
* to cause no harm at least. |
*/ |
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n"); |
len = (data[0] & 0x000000ff) + 1; |
i = 1; |
if (data[0] & (0x01 << 8)) { |
kgem_debug_print(data, offset, i++, "SIS.0\n"); |
kgem_debug_print(data, offset, i++, "SIS.1\n"); |
} |
if (data[0] & (0x02 << 8)) { |
kgem_debug_print(data, offset, i++, "DIS.0\n"); |
} |
if (data[0] & (0x04 << 8)) { |
kgem_debug_print(data, offset, i++, "SSB.0\n"); |
kgem_debug_print(data, offset, i++, "SSB.1\n"); |
} |
if (data[0] & (0x08 << 8)) { |
kgem_debug_print(data, offset, i++, "MSB.0\n"); |
kgem_debug_print(data, offset, i++, "MSB.1\n"); |
} |
if (data[0] & (0x10 << 8)) { |
kgem_debug_print(data, offset, i++, "PSP.0\n"); |
kgem_debug_print(data, offset, i++, "PSP.1\n"); |
} |
if (data[0] & (0x20 << 8)) { |
kgem_debug_print(data, offset, i++, "PSC.0\n"); |
kgem_debug_print(data, offset, i++, "PSC.1\n"); |
} |
assert(len == i); |
return len; |
case 0x04: |
return gen3_decode_load_state_immediate_1(kgem, offset); |
case 0x03: |
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); |
len = (data[0] & 0x0000000f) + 2; |
i = 1; |
for (word = 6; word <= 14; word++) { |
if (data[0] & (1 << word)) { |
if (word == 6) |
kgem_debug_print(data, offset, i++, "TBCF\n"); |
else if (word >= 7 && word <= 10) { |
kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7); |
kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7); |
} else if (word >= 11 && word <= 14) { |
kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n", |
word - 11, |
data[i]&0xfffffffe, |
data[i]&1?"use fence":""); |
i++; |
kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n", |
word - 11, |
data[i]>>21, (data[i]>>10)&0x3ff, |
data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):""); |
i++; |
kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n", |
word - 11, |
((data[i]>>21) + 1)*4); |
i++; |
kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11); |
kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11); |
} |
} |
} |
assert(len == i); |
return len; |
case 0x00: |
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n"); |
len = (data[0] & 0x0000003f) + 2; |
kgem_debug_print(data, offset, 1, "mask\n"); |
i = 2; |
for (map = 0; map <= 15; map++) { |
if (data[1] & (1 << map)) { |
int width, height, pitch, dword; |
struct drm_i915_gem_relocation_entry *reloc; |
const char *tiling; |
reloc = kgem_debug_get_reloc_entry(kgem, &data[i] - kgem->batch); |
assert(reloc->target_handle); |
dword = data[i]; |
kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s, handle=%d\n", map, |
dword&(1<<31)?"untrusted surface, ":"", |
dword&(1<<1)?"vertical line stride enable, ":"", |
dword&(1<<0)?"vertical ofs enable, ":"", |
reloc->target_handle); |
dword = data[i]; |
width = ((dword >> 10) & ((1 << 11) - 1))+1; |
height = ((dword >> 21) & ((1 << 11) - 1))+1; |
tiling = "none"; |
if (dword & (1 << 2)) |
tiling = "fenced"; |
else if (dword & (1 << 1)) |
tiling = dword & (1 << 0) ? "Y" : "X"; |
type = " BAD"; |
format = " (invalid)"; |
switch ((dword>>7) & 0x7) { |
case 1: |
type = "8"; |
switch ((dword>>3) & 0xf) { |
case 0: format = "I"; break; |
case 1: format = "L"; break; |
case 4: format = "A"; break; |
case 5: format = " mono"; break; |
} |
break; |
case 2: |
type = "16"; |
switch ((dword>>3) & 0xf) { |
case 0: format = " rgb565"; break; |
case 1: format = " argb1555"; break; |
case 2: format = " argb4444"; break; |
case 3: format = " ay88"; break; |
case 5: format = " 88dvdu"; break; |
case 6: format = " bump655"; break; |
case 7: format = "I"; break; |
case 8: format = "L"; break; |
case 9: format = "A"; break; |
} |
break; |
case 3: |
type = "32"; |
switch ((dword>>3) & 0xf) { |
case 0: format = " argb8888"; break; |
case 1: format = " abgr8888"; break; |
case 2: format = " xrgb8888"; break; |
case 3: format = " xbgr8888"; break; |
case 4: format = " qwvu8888"; break; |
case 5: format = " axvu8888"; break; |
case 6: format = " lxvu8888"; break; |
case 7: format = " xlvu8888"; break; |
case 8: format = " argb2101010"; break; |
case 9: format = " abgr2101010"; break; |
case 10: format = " awvu2101010"; break; |
case 11: format = " gr1616"; break; |
case 12: format = " vu1616"; break; |
case 13: format = " xI824"; break; |
case 14: format = " xA824"; break; |
case 15: format = " xL824"; break; |
} |
break; |
case 5: |
type = "422"; |
switch ((dword>>3) & 0xf) { |
case 0: format = " yuv_swapy"; break; |
case 1: format = " yuv"; break; |
case 2: format = " yuv_swapuv"; break; |
case 3: format = " yuv_swapuvy"; break; |
} |
break; |
case 6: |
type = "compressed"; |
switch ((dword>>3) & 0x7) { |
case 0: format = " dxt1"; break; |
case 1: format = " dxt2_3"; break; |
case 2: format = " dxt4_5"; break; |
case 3: format = " fxt1"; break; |
case 4: format = " dxt1_rb"; break; |
} |
break; |
case 7: |
type = "4b indexed"; |
switch ((dword>>3) & 0xf) { |
case 7: format = " argb8888"; break; |
} |
break; |
default: |
format = "BAD"; |
break; |
} |
dword = data[i]; |
kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n", |
map, width, height, type, format, tiling, |
dword&(1<<9)?" palette select":""); |
dword = data[i]; |
pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); |
kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n", |
map, pitch, |
(dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f, |
dword&(1<<8)?"miplayout legacy":"miplayout right"); |
} |
} |
assert(len == i); |
return len; |
case 0x06: |
kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); |
len = (data[0] & 0x000000ff) + 2; |
i = 2; |
for (c = 0; c <= 31; c++) { |
if (data[1] & (1 << c)) { |
kgem_debug_print(data, offset, i, "C%d.X = %f\n", |
c, int_as_float(data[i])); |
i++; |
kgem_debug_print(data, offset, i, "C%d.Y = %f\n", |
c, int_as_float(data[i])); |
i++; |
kgem_debug_print(data, offset, i, "C%d.Z = %f\n", |
c, int_as_float(data[i])); |
i++; |
kgem_debug_print(data, offset, i, "C%d.W = %f\n", |
c, int_as_float(data[i])); |
i++; |
} |
} |
assert(len == i); |
return len; |
case 0x05: |
kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); |
len = (data[0] & 0x000000ff) + 2; |
assert(((len-1) % 3) == 0); |
assert(len <= 370); |
i = 1; |
for (instr = 0; instr < (len - 1) / 3; instr++) { |
char instr_prefix[10]; |
sprintf(instr_prefix, "PS%03d", instr); |
gen3_decode_instruction(data, offset, i, instr_prefix); |
i += 3; |
} |
return len; |
case 0x01: |
kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n"); |
kgem_debug_print(data, offset, 1, "mask\n"); |
len = (data[0] & 0x0000003f) + 2; |
i = 2; |
for (sampler = 0; sampler <= 15; sampler++) { |
if (data[1] & (1 << sampler)) { |
uint32_t dword; |
const char *mip_filter = ""; |
dword = data[i]; |
switch ((dword>>20)&0x3) { |
case 0: mip_filter = "none"; break; |
case 1: mip_filter = "nearest"; break; |
case 3: mip_filter = "linear"; break; |
} |
kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s " |
"base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s " |
"lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler, |
dword&(1<<31)?" reverse gamma,":"", |
dword&(1<<30)?" packed2planar,":"", |
dword&(1<<29)?" colorspace conversion,":"", |
(dword>>22)&0x1f, |
mip_filter, |
gen3_decode_sample_filter(dword>>17), |
gen3_decode_sample_filter(dword>>14), |
((dword>>5)&0x1ff)/(0x10*1.0), |
dword&(1<<4)?" shadow,":"", |
dword&(1<<3)?4:2, |
gen3_decode_compare_func(dword)); |
dword = data[i]; |
kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s " |
"tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n", |
sampler, ((dword>>24)&0xff)/(0x10*1.0), |
dword&(1<<17)?" kill pixel enable,":"", |
decode_tex_coord_mode(dword>>12), |
decode_tex_coord_mode(dword>>9), |
decode_tex_coord_mode(dword>>6), |
dword&(1<<5)?" normalized coords,":"", |
(dword>>1)&0xf, |
dword&(1<<0)?" deinterlacer,":""); |
kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n", |
sampler); |
} |
} |
assert(len == i); |
return len; |
case 0x85: |
len = (data[0] & 0x0000000f) + 2; |
assert(len == 2); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_DEST_BUFFER_VARIABLES\n"); |
switch ((data[1] >> 8) & 0xf) { |
case 0x0: format = "g8"; break; |
case 0x1: format = "x1r5g5b5"; break; |
case 0x2: format = "r5g6b5"; break; |
case 0x3: format = "a8r8g8b8"; break; |
case 0x4: format = "ycrcb_swapy"; break; |
case 0x5: format = "ycrcb_normal"; break; |
case 0x6: format = "ycrcb_swapuv"; break; |
case 0x7: format = "ycrcb_swapuvy"; break; |
case 0x8: format = "a4r4g4b4"; break; |
case 0x9: format = "a1r5g5b5"; break; |
case 0xa: format = "a2r10g10b10"; break; |
default: format = "BAD"; break; |
} |
switch ((data[1] >> 2) & 0x3) { |
case 0x0: zformat = "u16"; break; |
case 0x1: zformat = "f16"; break; |
case 0x2: zformat = "u24x8"; break; |
default: zformat = "BAD"; break; |
} |
kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n", |
format, zformat, |
(data[1] & (1 << 31)) ? "en" : "dis"); |
return len; |
case 0x8e: |
{ |
const char *name, *tiling; |
len = (data[0] & 0x0000000f) + 2; |
assert(len == 3); |
switch((data[1] >> 24) & 0x7) { |
case 0x3: name = "color"; break; |
case 0x7: name = "depth"; break; |
default: name = "unknown"; break; |
} |
tiling = "none"; |
if (data[1] & (1 << 23)) |
tiling = "fenced"; |
else if (data[1] & (1 << 22)) |
tiling = data[1] & (1 << 21) ? "Y" : "X"; |
kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n"); |
kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); |
kgem_debug_print(data, offset, 2, "address\n"); |
return len; |
} |
case 0x81: |
len = (data[0] & 0x0000000f) + 2; |
assert(len == 3); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_SCISSOR_RECTANGLE\n"); |
kgem_debug_print(data, offset, 1, "(%d,%d)\n", |
data[1] & 0xffff, data[1] >> 16); |
kgem_debug_print(data, offset, 2, "(%d,%d)\n", |
data[2] & 0xffff, data[2] >> 16); |
return len; |
case 0x80: |
len = (data[0] & 0x0000000f) + 2; |
assert(len == 5); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_DRAWING_RECTANGLE\n"); |
kgem_debug_print(data, offset, 1, "%s\n", |
data[1]&(1<<30)?"depth ofs disabled ":""); |
kgem_debug_print(data, offset, 2, "(%d,%d)\n", |
data[2] & 0xffff, data[2] >> 16); |
kgem_debug_print(data, offset, 3, "(%d,%d)\n", |
data[3] & 0xffff, data[3] >> 16); |
kgem_debug_print(data, offset, 4, "(%d,%d)\n", |
(int16_t)(data[4] & 0xffff), |
(int16_t)(data[4] >> 16)); |
return len; |
case 0x9c: |
len = (data[0] & 0x0000000f) + 2; |
assert(len == 7); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_CLEAR_PARAMETERS\n"); |
kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n", |
data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT", |
data[1]&(1<<2)?"color,":"", |
data[1]&(1<<1)?"depth,":"", |
data[1]&(1<<0)?"stencil,":""); |
kgem_debug_print(data, offset, 2, "clear color\n"); |
kgem_debug_print(data, offset, 3, "clear depth/stencil\n"); |
kgem_debug_print(data, offset, 4, "color value (rgba8888)\n"); |
kgem_debug_print(data, offset, 5, "depth value %f\n", |
int_as_float(data[5])); |
kgem_debug_print(data, offset, 6, "clear stencil\n"); |
return len; |
} |
for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) { |
opcode_3d_1d = &opcodes_3d_1d[idx]; |
if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { |
len = (data[0] & 0xf) + 2; |
kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name); |
for (i = 1; i < len; i++) |
kgem_debug_print(data, offset, i, "dword %d\n", i); |
return len; |
} |
} |
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); |
assert(0); |
return 1; |
} |
#define VERTEX_OUT(fmt, ...) do { \ |
kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ |
i++; \ |
} while (0) |
static int |
gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset) |
{ |
uint32_t *data = kgem->batch + offset; |
char immediate = (data[0] & (1 << 23)) == 0; |
unsigned int len, i, ret; |
const char *primtype; |
unsigned int vertex = 0; |
switch ((data[0] >> 18) & 0xf) { |
case 0x0: primtype = "TRILIST"; break; |
case 0x1: primtype = "TRISTRIP"; break; |
case 0x2: primtype = "TRISTRIP_REVERSE"; break; |
case 0x3: primtype = "TRIFAN"; break; |
case 0x4: primtype = "POLYGON"; break; |
case 0x5: primtype = "LINELIST"; break; |
case 0x6: primtype = "LINESTRIP"; break; |
case 0x7: primtype = "RECTLIST"; break; |
case 0x8: primtype = "POINTLIST"; break; |
case 0x9: primtype = "DIB"; break; |
case 0xa: primtype = "CLEAR_RECT"; assert(0); break; |
default: primtype = "unknown"; break; |
} |
gen3_update_vertex_elements_offsets(kgem); |
/* XXX: 3DPRIM_DIB not supported */ |
if (immediate) { |
len = (data[0] & 0x0003ffff) + 2; |
kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype); |
for (i = 1; i < len; ) { |
ErrorF(" [%d]: ", vertex); |
i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t); |
ErrorF("\n"); |
vertex++; |
} |
ret = len; |
} else { |
/* indirect vertices */ |
len = data[0] & 0x0000ffff; /* index count */ |
if (data[0] & (1 << 17)) { |
/* random vertex access */ |
kgem_debug_print(data, offset, 0, |
"3DPRIMITIVE random indirect %s (%d)\n", primtype, len); |
assert(0); |
if (len == 0) { |
/* vertex indices continue until 0xffff is found */ |
} else { |
/* fixed size vertex index buffer */ |
} |
ret = (len + 1) / 2 + 1; |
goto out; |
} else { |
/* sequential vertex access */ |
vertex = data[1] & 0xffff; |
kgem_debug_print(data, offset, 0, |
"3DPRIMITIVE sequential indirect %s, %d starting from " |
"%d\n", primtype, len, vertex); |
kgem_debug_print(data, offset, 1, " start\n"); |
for (i = 0; i < len; i++) { |
ErrorF(" [%d]: ", vertex); |
indirect_vertex_out(kgem, vertex++); |
ErrorF("\n"); |
} |
ret = 2; |
goto out; |
} |
} |
out: |
return ret; |
} |
int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) |
{ |
static const struct { |
uint32_t opcode; |
int min_len; |
int max_len; |
const char *name; |
} opcodes[] = { |
{ 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, |
{ 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, |
{ 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, |
{ 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, |
{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, |
{ 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, |
{ 0x0d, 1, 1, "3DSTATE_MODES_4" }, |
{ 0x0c, 1, 1, "3DSTATE_MODES_5" }, |
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, |
}; |
uint32_t *data = kgem->batch + offset; |
uint32_t opcode; |
unsigned int idx; |
opcode = (data[0] & 0x1f000000) >> 24; |
switch (opcode) { |
case 0x1f: |
return gen3_decode_3d_primitive(kgem, offset); |
case 0x1d: |
return gen3_decode_3d_1d(kgem, offset); |
case 0x1c: |
return gen3_decode_3d_1c(kgem, offset); |
} |
for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) { |
if (opcode == opcodes[idx].opcode) { |
unsigned int len = 1, i; |
kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name); |
if (opcodes[idx].max_len > 1) { |
len = (data[0] & 0xff) + 2; |
assert(len >= opcodes[idx].min_len || |
len <= opcodes[idx].max_len); |
} |
for (i = 1; i < len; i++) |
kgem_debug_print(data, offset, i, "dword %d\n", i); |
return len; |
} |
} |
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); |
return 1; |
} |
void kgem_gen3_finish_state(struct kgem *kgem) |
{ |
memset(&state, 0, sizeof(state)); |
} |
/drivers/video/Intel-2D/kgem_debug_gen4.c |
---|
0,0 → 1,687 |
/* |
* Copyright © 2007-2011 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Eric Anholt <eric@anholt.net> |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include <assert.h> |
#include "sna.h" |
#include "sna_reg.h" |
#include "gen4_render.h" |
#include "kgem_debug.h" |
static struct state { |
struct vertex_buffer { |
int handle; |
void *base; |
const char *ptr; |
int pitch; |
struct kgem_bo *current; |
} vb[33]; |
struct vertex_elements { |
int buffer; |
int offset; |
bool valid; |
uint32_t type; |
uint8_t swizzle[4]; |
} ve[33]; |
int num_ve; |
struct dynamic_state { |
struct kgem_bo *current; |
void *base, *ptr; |
} dynamic_state; |
} state; |
static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) |
{ |
uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); |
struct kgem_bo *bo = NULL; |
void *base, *ptr; |
int i; |
for (i = 0; i < kgem->nreloc; i++) |
if (kgem->reloc[i].offset == reloc) |
break; |
assert(i < kgem->nreloc); |
reloc = kgem->reloc[i].target_handle; |
if (reloc == 0) { |
base = kgem->batch; |
} else { |
list_for_each_entry(bo, &kgem->next_request->buffers, request) |
if (bo->handle == reloc) |
break; |
assert(&bo->request != &kgem->next_request->buffers); |
base = kgem_bo_map__debug(kgem, bo); |
} |
ptr = (char *)base + kgem->reloc[i].delta; |
i = data[0] >> 27; |
state.vb[i].current = bo; |
state.vb[i].base = base; |
state.vb[i].ptr = ptr; |
state.vb[i].pitch = data[0] & 0x7ff; |
} |
static uint32_t |
get_ve_component(uint32_t data, int component) |
{ |
return (data >> (16 + (3 - component) * 4)) & 0x7; |
} |
static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) |
{ |
state.ve[id].buffer = data[0] >> 27; |
state.ve[id].valid = !!(data[0] & (1 << 26)); |
state.ve[id].type = (data[0] >> 16) & 0x1ff; |
state.ve[id].offset = data[0] & 0x7ff; |
state.ve[id].swizzle[0] = get_ve_component(data[1], 0); |
state.ve[id].swizzle[1] = get_ve_component(data[1], 1); |
state.ve[id].swizzle[2] = get_ve_component(data[1], 2); |
state.ve[id].swizzle[3] = get_ve_component(data[1], 3); |
} |
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) |
{ |
int c; |
ErrorF("("); |
for (c = 0; c < max; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("%d", v[c]); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
for (; c < 4; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("1.0"); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
ErrorF(")"); |
} |
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) |
{ |
int c, o; |
ErrorF("("); |
for (c = o = 0; c < 4 && o < max; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("%f", f[o++]); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
for (; c < 4; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("1.0"); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
ErrorF(")"); |
} |
static void ve_out(const struct vertex_elements *ve, const void *ptr) |
{ |
switch (ve->type) { |
case GEN4_SURFACEFORMAT_R32_FLOAT: |
vertices_float_out(ve, ptr, 1); |
break; |
case GEN4_SURFACEFORMAT_R32G32_FLOAT: |
vertices_float_out(ve, ptr, 2); |
break; |
case GEN4_SURFACEFORMAT_R32G32B32_FLOAT: |
vertices_float_out(ve, ptr, 3); |
break; |
case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT: |
vertices_float_out(ve, ptr, 4); |
break; |
case GEN4_SURFACEFORMAT_R16_SINT: |
vertices_sint16_out(ve, ptr, 1); |
break; |
case GEN4_SURFACEFORMAT_R16G16_SINT: |
vertices_sint16_out(ve, ptr, 2); |
break; |
case GEN4_SURFACEFORMAT_R16G16B16A16_SINT: |
vertices_sint16_out(ve, ptr, 4); |
break; |
case GEN4_SURFACEFORMAT_R16_SSCALED: |
vertices_sint16_out(ve, ptr, 1); |
break; |
case GEN4_SURFACEFORMAT_R16G16_SSCALED: |
vertices_sint16_out(ve, ptr, 2); |
break; |
case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED: |
vertices_sint16_out(ve, ptr, 4); |
break; |
} |
} |
static void indirect_vertex_out(struct kgem *kgem, uint32_t v) |
{ |
int i = 0; |
do { |
const struct vertex_elements *ve = &state.ve[i]; |
const struct vertex_buffer *vb = &state.vb[ve->buffer]; |
const void *ptr = vb->ptr + v * vb->pitch + ve->offset; |
if (!ve->valid) |
continue; |
ve_out(ve, ptr); |
while (++i <= state.num_ve && !state.ve[i].valid) |
; |
if (i <= state.num_ve) |
ErrorF(", "); |
} while (i <= state.num_ve); |
} |
static void primitive_out(struct kgem *kgem, uint32_t *data) |
{ |
int n; |
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ |
for (n = 0; n < data[1]; n++) { |
int v = data[2] + n; |
ErrorF(" [%d:%d] = ", n, v); |
indirect_vertex_out(kgem, v); |
ErrorF("\n"); |
} |
} |
static void |
state_base_out(uint32_t *data, uint32_t offset, unsigned int index, |
const char *name) |
{ |
if (data[index] & 1) |
kgem_debug_print(data, offset, index, |
"%s state base address 0x%08x\n", |
name, data[index] & ~1); |
else |
kgem_debug_print(data, offset, index, |
"%s state base not updated\n", |
name); |
} |
static void |
state_max_out(uint32_t *data, uint32_t offset, unsigned int index, |
const char *name) |
{ |
if (data[index] == 1) |
kgem_debug_print(data, offset, index, |
"%s state upper bound disabled\n", name); |
else if (data[index] & 1) |
kgem_debug_print(data, offset, index, |
"%s state upper bound 0x%08x\n", |
name, data[index] & ~1); |
else |
kgem_debug_print(data, offset, index, |
"%s state upper bound not updated\n", |
name); |
} |
static const char * |
get_965_surfacetype(unsigned int surfacetype) |
{ |
switch (surfacetype) { |
case 0: return "1D"; |
case 1: return "2D"; |
case 2: return "3D"; |
case 3: return "CUBE"; |
case 4: return "BUFFER"; |
case 7: return "NULL"; |
default: return "unknown"; |
} |
} |
static const char * |
get_965_depthformat(unsigned int depthformat) |
{ |
switch (depthformat) { |
case 0: return "s8_z24float"; |
case 1: return "z32float"; |
case 2: return "z24s8"; |
case 5: return "z16"; |
default: return "unknown"; |
} |
} |
static const char * |
get_965_element_component(uint32_t data, int component) |
{ |
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; |
switch (component_control) { |
case 0: |
return "nostore"; |
case 1: |
switch (component) { |
case 0: return "X"; |
case 1: return "Y"; |
case 2: return "Z"; |
case 3: return "W"; |
default: return "fail"; |
} |
case 2: |
return "0.0"; |
case 3: |
return "1.0"; |
case 4: |
return "0x1"; |
case 5: |
return "VID"; |
default: |
return "fail"; |
} |
} |
static const char * |
get_965_prim_type(uint32_t data) |
{ |
uint32_t primtype = (data >> 10) & 0x1f; |
switch (primtype) { |
case 0x01: return "point list"; |
case 0x02: return "line list"; |
case 0x03: return "line strip"; |
case 0x04: return "tri list"; |
case 0x05: return "tri strip"; |
case 0x06: return "tri fan"; |
case 0x07: return "quad list"; |
case 0x08: return "quad strip"; |
case 0x09: return "line list adj"; |
case 0x0a: return "line strip adj"; |
case 0x0b: return "tri list adj"; |
case 0x0c: return "tri strip adj"; |
case 0x0d: return "tri strip reverse"; |
case 0x0e: return "polygon"; |
case 0x0f: return "rect list"; |
case 0x10: return "line loop"; |
case 0x11: return "point list bf"; |
case 0x12: return "line strip cont"; |
case 0x13: return "line strip bf"; |
case 0x14: return "line strip cont bf"; |
case 0x15: return "tri fan no stipple"; |
default: return "fail"; |
} |
} |
#if 0 |
struct reloc { |
struct kgem_bo *bo; |
void *base; |
}; |
static void * |
get_reloc(struct kgem *kgem, |
void *base, const uint32_t *reloc, |
struct reloc *r) |
{ |
uint32_t delta = *reloc; |
memset(r, 0, sizeof(*r)); |
if (base == 0) { |
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); |
struct kgem_bo *bo = NULL; |
int i; |
for (i = 0; i < kgem->nreloc; i++) |
if (kgem->reloc[i].offset == handle) |
break; |
assert(i < kgem->nreloc); |
handle = kgem->reloc[i].target_handle; |
delta = kgem->reloc[i].delta; |
if (handle == 0) { |
base = kgem->batch; |
} else { |
list_for_each_entry(bo, &kgem->next_request->buffers, request) |
if (bo->handle == handle) |
break; |
assert(&bo->request != &kgem->next_request->buffers); |
base = kgem_bo_map__debug(kgem, bo); |
r->bo = bo; |
r->base = base; |
} |
} |
return (char *)base + delta; |
} |
#endif |
int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) |
{ |
static const struct { |
uint32_t opcode; |
int min_len; |
int max_len; |
const char *name; |
} opcodes[] = { |
{ 0x6000, 3, 3, "URB_FENCE" }, |
{ 0x6001, 2, 2, "CS_URB_FENCE" }, |
{ 0x6002, 2, 2, "CONSTANT_BUFFER" }, |
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, |
{ 0x6102, 2, 2 , "STATE_SIP" }, |
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, |
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, |
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, |
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, |
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, |
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, |
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, |
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, |
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, |
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, |
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, |
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, |
{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, |
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, |
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, |
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, |
{ 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, |
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, |
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, |
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, |
{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, |
{ 0x7b00, 6, 6, "3DPRIMITIVE" }, |
{ 0x7805, 3, 3, "3DSTATE_URB" }, |
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, |
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, |
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, |
{ 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, |
}; |
uint32_t *data = kgem->batch + offset; |
uint32_t op; |
unsigned int len; |
int i; |
const char *desc1 = NULL; |
len = (data[0] & 0xff) + 2; |
op = (data[0] & 0xffff0000) >> 16; |
switch (op) { |
case 0x6000: |
assert(len == 3); |
kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", |
(data[0] >> 13) & 1 ? "cs " : "", |
(data[0] >> 12) & 1 ? "vfe " : "", |
(data[0] >> 11) & 1 ? "sf " : "", |
(data[0] >> 10) & 1 ? "clip " : "", |
(data[0] >> 9) & 1 ? "gs " : "", |
(data[0] >> 8) & 1 ? "vs " : ""); |
kgem_debug_print(data, offset, 1, |
"vs fence: %d, gs_fence: %d, clip_fence: %d\n", |
data[1] & 0x3ff, |
(data[1] >> 10) & 0x3ff, |
(data[1] >> 20) & 0x3ff); |
kgem_debug_print(data, offset, 2, |
"sf fence: %d, vfe_fence: %d, cs_fence: %d\n", |
data[2] & 0x3ff, |
(data[2] >> 10) & 0x3ff, |
(data[2] >> 20) & 0x7ff); |
return len; |
case 0x6001: |
kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); |
kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", |
(data[1] >> 4) & 0x1f, |
(((data[1] >> 4) & 0x1f) + 1) * 64, |
data[1] & 0x7); |
return len; |
case 0x6002: |
kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", |
(data[0] >> 8) & 1 ? "valid" : "invalid"); |
kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", |
data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); |
return len; |
case 0x6101: |
i = 0; |
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); |
assert(len == 6); |
state_base_out(data, offset, i++, "general"); |
state_base_out(data, offset, i++, "surface"); |
state_base_out(data, offset, i++, "media"); |
state_max_out(data, offset, i++, "general"); |
state_max_out(data, offset, i++, "media"); |
return len; |
case 0x7801: |
assert(len == 6); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_BINDING_TABLE_POINTERS\n"); |
kgem_debug_print(data, offset, 1, "VS binding table\n"); |
kgem_debug_print(data, offset, 2, "GS binding table\n"); |
kgem_debug_print(data, offset, 3, "CLIP binding table\n"); |
kgem_debug_print(data, offset, 4, "SF binding table\n"); |
kgem_debug_print(data, offset, 5, "WM binding table\n"); |
return len; |
case 0x7808: |
assert((len - 1) % 4 == 0); |
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); |
for (i = 1; i < len;) { |
gen4_update_vertex_buffer(kgem, data + i); |
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", |
data[i] >> 27, |
data[i] & (1 << 20) ? "random" : "sequential", |
data[i] & 0x07ff); |
i++; |
kgem_debug_print(data, offset, i++, "buffer address\n"); |
kgem_debug_print(data, offset, i++, "max index\n"); |
kgem_debug_print(data, offset, i++, "mbz\n"); |
} |
return len; |
case 0x7809: |
assert((len + 1) % 2 == 0); |
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); |
memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ |
for (i = 1; i < len;) { |
gen4_update_vertex_elements(kgem, (i - 1)/2, data + i); |
kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " |
"src offset 0x%04x bytes\n", |
data[i] >> 27, |
data[i] & (1 << 26) ? "" : "in", |
(data[i] >> 16) & 0x1ff, |
data[i] & 0x07ff); |
i++; |
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " |
"dst offset 0x%02x bytes\n", |
get_965_element_component(data[i], 0), |
get_965_element_component(data[i], 1), |
get_965_element_component(data[i], 2), |
get_965_element_component(data[i], 3), |
(data[i] & 0xff) * 4); |
i++; |
} |
state.num_ve = (len - 1) / 2; /* XXX? */ |
return len; |
case 0x780a: |
assert(len == 3); |
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); |
kgem_debug_print(data, offset, 1, "beginning buffer address\n"); |
kgem_debug_print(data, offset, 2, "ending buffer address\n"); |
return len; |
case 0x7900: |
assert(len == 4); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_DRAWING_RECTANGLE\n"); |
kgem_debug_print(data, offset, 1, "top left: %d,%d\n", |
data[1] & 0xffff, |
(data[1] >> 16) & 0xffff); |
kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", |
data[2] & 0xffff, |
(data[2] >> 16) & 0xffff); |
kgem_debug_print(data, offset, 3, "origin: %d,%d\n", |
(int)data[3] & 0xffff, |
((int)data[3] >> 16) & 0xffff); |
return len; |
case 0x7905: |
assert(len == 7); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_DEPTH_BUFFER\n"); |
kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", |
get_965_surfacetype(data[1] >> 29), |
get_965_depthformat((data[1] >> 18) & 0x7), |
(data[1] & 0x0001ffff) + 1, |
data[1] & (1 << 27) ? "" : "not ", |
(data[1] & (1 << 22)) != 0, |
(data[1] & (1 << 21)) != 0); |
kgem_debug_print(data, offset, 2, "depth offset\n"); |
kgem_debug_print(data, offset, 3, "%dx%d\n", |
((data[3] & 0x0007ffc0) >> 6) + 1, |
((data[3] & 0xfff80000) >> 19) + 1); |
kgem_debug_print(data, offset, 4, "volume depth\n"); |
kgem_debug_print(data, offset, 5, "\n"); |
kgem_debug_print(data, offset, 6, "\n"); |
return len; |
case 0x7a00: |
assert(len == 4 || len == 5); |
switch ((data[1] >> 14) & 0x3) { |
case 0: desc1 = "no write"; break; |
case 1: desc1 = "qword write"; break; |
case 2: desc1 = "PS_DEPTH_COUNT write"; break; |
case 3: desc1 = "TIMESTAMP write"; break; |
} |
kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); |
kgem_debug_print(data, offset, 1, |
"%s, %scs stall, %stlb invalidate, " |
"%ssync gfdt, %sdepth stall, %sRC write flush, " |
"%sinst flush, %sTC flush\n", |
desc1, |
data[1] & (1 << 20) ? "" : "no ", |
data[1] & (1 << 18) ? "" : "no ", |
data[1] & (1 << 17) ? "" : "no ", |
data[1] & (1 << 13) ? "" : "no ", |
data[1] & (1 << 12) ? "" : "no ", |
data[1] & (1 << 11) ? "" : "no ", |
data[1] & (1 << 10) ? "" : "no "); |
if (len == 5) { |
kgem_debug_print(data, offset, 2, "destination address\n"); |
kgem_debug_print(data, offset, 3, "immediate dword low\n"); |
kgem_debug_print(data, offset, 4, "immediate dword high\n"); |
} else { |
for (i = 2; i < len; i++) { |
kgem_debug_print(data, offset, i, "\n"); |
} |
} |
return len; |
case 0x7b00: |
assert(len == 6); |
kgem_debug_print(data, offset, 0, |
"3DPRIMITIVE: %s %s\n", |
get_965_prim_type(data[0]), |
(data[0] & (1 << 15)) ? "random" : "sequential"); |
kgem_debug_print(data, offset, 1, "vertex count\n"); |
kgem_debug_print(data, offset, 2, "start vertex\n"); |
kgem_debug_print(data, offset, 3, "instance count\n"); |
kgem_debug_print(data, offset, 4, "start instance\n"); |
kgem_debug_print(data, offset, 5, "index bias\n"); |
primitive_out(kgem, data); |
return len; |
} |
/* For the rest, just dump the bytes */ |
for (i = 0; i < ARRAY_SIZE(opcodes); i++) |
if (op == opcodes[i].opcode) |
break; |
assert(i < ARRAY_SIZE(opcodes)); |
len = 1; |
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); |
if (opcodes[i].max_len > 1) { |
len = (data[0] & 0xff) + 2; |
assert(len >= opcodes[i].min_len && |
len <= opcodes[i].max_len); |
} |
for (i = 1; i < len; i++) |
kgem_debug_print(data, offset, i, "dword %d\n", i); |
return len; |
} |
void kgem_gen4_finish_state(struct kgem *kgem) |
{ |
memset(&state, 0, sizeof(state)); |
} |
/drivers/video/Intel-2D/kgem_debug_gen5.c |
---|
0,0 → 1,662 |
/* |
* Copyright © 2007-2011 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Eric Anholt <eric@anholt.net> |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include <assert.h> |
#include "sna.h" |
#include "sna_reg.h" |
#include "gen5_render.h" |
#include "kgem_debug.h" |
static struct state { |
struct vertex_buffer { |
int handle; |
void *base; |
int size; |
const char *ptr; |
int pitch; |
struct kgem_bo *current; |
} vb[17]; |
struct vertex_elements { |
int buffer; |
int offset; |
bool valid; |
uint32_t type; |
uint8_t swizzle[4]; |
} ve[17]; |
int num_ve; |
struct dynamic_state { |
struct kgem_bo *current; |
void *base, *ptr; |
} dynamic_state; |
} state; |
static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) |
{ |
struct drm_i915_gem_relocation_entry *reloc; |
struct kgem_bo *bo = NULL; |
void *base, *ptr; |
int i, size; |
reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch); |
if (reloc->target_handle == -1) { |
base = kgem->batch; |
size = kgem->nbatch * sizeof(uint32_t); |
} else { |
bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc); |
base = kgem_bo_map__debug(kgem, bo); |
size = kgem_bo_size(bo); |
} |
ptr = (char *)base + reloc->delta; |
i = data[0] >> 27; |
state.vb[i].handle = reloc->target_handle; |
state.vb[i].current = bo; |
state.vb[i].base = base; |
state.vb[i].ptr = ptr; |
state.vb[i].pitch = data[0] & 0x7ff; |
state.vb[i].size = size; |
} |
static uint32_t |
get_ve_component(uint32_t data, int component) |
{ |
return (data >> (16 + (3 - component) * 4)) & 0x7; |
} |
static void gen5_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) |
{ |
state.ve[id].buffer = data[0] >> 27; |
state.ve[id].valid = !!(data[0] & (1 << 26)); |
state.ve[id].type = (data[0] >> 16) & 0x1ff; |
state.ve[id].offset = data[0] & 0x7ff; |
state.ve[id].swizzle[0] = get_ve_component(data[1], 0); |
state.ve[id].swizzle[1] = get_ve_component(data[1], 1); |
state.ve[id].swizzle[2] = get_ve_component(data[1], 2); |
state.ve[id].swizzle[3] = get_ve_component(data[1], 3); |
} |
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) |
{ |
int c, o; |
ErrorF("("); |
for (c = o = 0; c < 4 && o < max; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("%d", v[o++]); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (o < max) |
ErrorF(", "); |
} |
ErrorF(")"); |
} |
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) |
{ |
int c, o; |
ErrorF("("); |
for (c = o = 0; c < 4 && o < max; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("%f", f[o++]); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (o < max) |
ErrorF(", "); |
} |
ErrorF(")"); |
} |
static void ve_out(const struct vertex_elements *ve, const void *ptr) |
{ |
switch (ve->type) { |
case GEN5_SURFACEFORMAT_R32_FLOAT: |
vertices_float_out(ve, ptr, 1); |
break; |
case GEN5_SURFACEFORMAT_R32G32_FLOAT: |
vertices_float_out(ve, ptr, 2); |
break; |
case GEN5_SURFACEFORMAT_R32G32B32_FLOAT: |
vertices_float_out(ve, ptr, 3); |
break; |
case GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT: |
vertices_float_out(ve, ptr, 4); |
break; |
case GEN5_SURFACEFORMAT_R16_SINT: |
vertices_sint16_out(ve, ptr, 1); |
break; |
case GEN5_SURFACEFORMAT_R16G16_SINT: |
vertices_sint16_out(ve, ptr, 2); |
break; |
case GEN5_SURFACEFORMAT_R16G16B16A16_SINT: |
vertices_sint16_out(ve, ptr, 4); |
break; |
case GEN5_SURFACEFORMAT_R16_SSCALED: |
vertices_sint16_out(ve, ptr, 1); |
break; |
case GEN5_SURFACEFORMAT_R16G16_SSCALED: |
vertices_sint16_out(ve, ptr, 2); |
break; |
case GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED: |
vertices_sint16_out(ve, ptr, 4); |
break; |
} |
} |
static void indirect_vertex_out(struct kgem *kgem, uint32_t v) |
{ |
int i = 1; |
do { |
const struct vertex_elements *ve = &state.ve[i]; |
const struct vertex_buffer *vb = &state.vb[ve->buffer]; |
const void *ptr = vb->ptr + v * vb->pitch + ve->offset; |
if (!ve->valid) |
continue; |
assert(vb->pitch); |
assert(ve->offset + v*vb->pitch < vb->size); |
ve_out(ve, ptr); |
while (++i <= state.num_ve && !state.ve[i].valid) |
; |
if (i <= state.num_ve) |
ErrorF(", "); |
} while (i <= state.num_ve); |
} |
static void primitive_out(struct kgem *kgem, uint32_t *data) |
{ |
int n; |
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ |
for (n = 0; n < data[1]; n++) { |
int v = data[2] + n; |
ErrorF(" [%d:%d] = ", n, v); |
indirect_vertex_out(kgem, v); |
ErrorF("\n"); |
} |
} |
static void |
state_base_out(uint32_t *data, uint32_t offset, unsigned int index, |
const char *name) |
{ |
if (data[index] & 1) |
kgem_debug_print(data, offset, index, |
"%s state base address 0x%08x\n", |
name, data[index] & ~1); |
else |
kgem_debug_print(data, offset, index, |
"%s state base not updated\n", |
name); |
} |
static void |
state_max_out(uint32_t *data, uint32_t offset, unsigned int index, |
const char *name) |
{ |
if (data[index] == 1) |
kgem_debug_print(data, offset, index, |
"%s state upper bound disabled\n", name); |
else if (data[index] & 1) |
kgem_debug_print(data, offset, index, |
"%s state upper bound 0x%08x\n", |
name, data[index] & ~1); |
else |
kgem_debug_print(data, offset, index, |
"%s state upper bound not updated\n", |
name); |
} |
static const char * |
get_965_surfacetype(unsigned int surfacetype) |
{ |
switch (surfacetype) { |
case 0: return "1D"; |
case 1: return "2D"; |
case 2: return "3D"; |
case 3: return "CUBE"; |
case 4: return "BUFFER"; |
case 7: return "NULL"; |
default: return "unknown"; |
} |
} |
static const char * |
get_965_depthformat(unsigned int depthformat) |
{ |
switch (depthformat) { |
case 0: return "s8_z24float"; |
case 1: return "z32float"; |
case 2: return "z24s8"; |
case 5: return "z16"; |
default: return "unknown"; |
} |
} |
static const char * |
get_965_element_component(uint32_t data, int component) |
{ |
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; |
switch (component_control) { |
case 0: |
return "nostore"; |
case 1: |
switch (component) { |
case 0: return "X"; |
case 1: return "Y"; |
case 2: return "Z"; |
case 3: return "W"; |
default: return "fail"; |
} |
case 2: |
return "0.0"; |
case 3: |
return "1.0"; |
case 4: |
return "0x1"; |
case 5: |
return "VID"; |
default: |
return "fail"; |
} |
} |
static const char * |
get_965_prim_type(uint32_t data) |
{ |
uint32_t primtype = (data >> 10) & 0x1f; |
switch (primtype) { |
case 0x01: return "point list"; |
case 0x02: return "line list"; |
case 0x03: return "line strip"; |
case 0x04: return "tri list"; |
case 0x05: return "tri strip"; |
case 0x06: return "tri fan"; |
case 0x07: return "quad list"; |
case 0x08: return "quad strip"; |
case 0x09: return "line list adj"; |
case 0x0a: return "line strip adj"; |
case 0x0b: return "tri list adj"; |
case 0x0c: return "tri strip adj"; |
case 0x0d: return "tri strip reverse"; |
case 0x0e: return "polygon"; |
case 0x0f: return "rect list"; |
case 0x10: return "line loop"; |
case 0x11: return "point list bf"; |
case 0x12: return "line strip cont"; |
case 0x13: return "line strip bf"; |
case 0x14: return "line strip cont bf"; |
case 0x15: return "tri fan no stipple"; |
default: return "fail"; |
} |
} |
#if 0 |
struct reloc { |
struct kgem_bo *bo; |
void *base; |
}; |
static void * |
get_reloc(struct kgem *kgem, |
void *base, const uint32_t *reloc, |
struct reloc *r) |
{ |
uint32_t delta = *reloc; |
memset(r, 0, sizeof(*r)); |
if (base == 0) { |
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); |
struct kgem_bo *bo = NULL; |
int i; |
for (i = 0; i < kgem->nreloc; i++) |
if (kgem->reloc[i].offset == handle) |
break; |
assert(i < kgem->nreloc); |
handle = kgem->reloc[i].target_handle; |
delta = kgem->reloc[i].delta; |
if (handle == 0) { |
base = kgem->batch; |
} else { |
list_for_each_entry(bo, &kgem->next_request->buffers, request) |
if (bo->handle == handle) |
break; |
assert(&bo->request != &kgem->next_request->buffers); |
base = kgem_bo_map(kgem, bo, PROT_READ); |
r->bo = bo; |
r->base = base; |
} |
} |
return (char *)base + delta; |
} |
#endif |
int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) |
{ |
static const struct { |
uint32_t opcode; |
int min_len; |
int max_len; |
const char *name; |
} opcodes[] = { |
{ 0x6000, 3, 3, "URB_FENCE" }, |
{ 0x6001, 2, 2, "CS_URB_FENCE" }, |
{ 0x6002, 2, 2, "CONSTANT_BUFFER" }, |
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, |
{ 0x6102, 2, 2 , "STATE_SIP" }, |
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, |
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, |
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, |
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, |
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, |
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, |
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, |
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, |
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, |
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, |
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, |
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, |
{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, |
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, |
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, |
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, |
{ 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, |
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, |
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, |
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, |
{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, |
{ 0x7b00, 6, 6, "3DPRIMITIVE" }, |
{ 0x7805, 3, 3, "3DSTATE_URB" }, |
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, |
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, |
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, |
{ 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, |
}; |
uint32_t *data = kgem->batch + offset; |
uint32_t op; |
unsigned int len; |
int i; |
const char *desc1 = NULL; |
len = (data[0] & 0xff) + 2; |
op = (data[0] & 0xffff0000) >> 16; |
switch (op) { |
case 0x6000: |
assert(len == 3); |
kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", |
(data[0] >> 13) & 1 ? "cs " : "", |
(data[0] >> 12) & 1 ? "vfe " : "", |
(data[0] >> 11) & 1 ? "sf " : "", |
(data[0] >> 10) & 1 ? "clip " : "", |
(data[0] >> 9) & 1 ? "gs " : "", |
(data[0] >> 8) & 1 ? "vs " : ""); |
kgem_debug_print(data, offset, 1, |
"vs fence: %d, gs_fence: %d, clip_fence: %d\n", |
data[1] & 0x3ff, |
(data[1] >> 10) & 0x3ff, |
(data[1] >> 20) & 0x3ff); |
kgem_debug_print(data, offset, 2, |
"sf fence: %d, vfe_fence: %d, cs_fence: %d\n", |
data[2] & 0x3ff, |
(data[2] >> 10) & 0x3ff, |
(data[2] >> 20) & 0x7ff); |
return len; |
case 0x6001: |
kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); |
kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", |
(data[1] >> 4) & 0x1f, |
(((data[1] >> 4) & 0x1f) + 1) * 64, |
data[1] & 0x7); |
return len; |
case 0x6002: |
kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", |
(data[0] >> 8) & 1 ? "valid" : "invalid"); |
kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", |
data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); |
return len; |
case 0x6101: |
i = 0; |
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); |
assert(len == 8); |
state_base_out(data, offset, i++, "general"); |
state_base_out(data, offset, i++, "surface"); |
state_base_out(data, offset, i++, "media"); |
state_base_out(data, offset, i++, "instruction"); |
state_max_out(data, offset, i++, "general"); |
state_max_out(data, offset, i++, "media"); |
state_max_out(data, offset, i++, "instruction"); |
return len; |
case 0x7801: |
assert(len == 6); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_BINDING_TABLE_POINTERS\n"); |
kgem_debug_print(data, offset, 1, "VS binding table\n"); |
kgem_debug_print(data, offset, 2, "GS binding table\n"); |
kgem_debug_print(data, offset, 3, "CLIP binding table\n"); |
kgem_debug_print(data, offset, 4, "SF binding table\n"); |
kgem_debug_print(data, offset, 5, "WM binding table\n"); |
return len; |
case 0x7808: |
assert((len - 1) % 4 == 0); |
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); |
for (i = 1; i < len;) { |
gen5_update_vertex_buffer(kgem, data + i); |
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", |
data[i] >> 27, |
data[i] & (1 << 20) ? "random" : "sequential", |
data[i] & 0x07ff); |
i++; |
kgem_debug_print(data, offset, i++, "buffer address\n"); |
kgem_debug_print(data, offset, i++, "max index\n"); |
kgem_debug_print(data, offset, i++, "mbz\n"); |
} |
return len; |
case 0x7809: |
assert((len + 1) % 2 == 0); |
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); |
memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ |
for (i = 1; i < len;) { |
gen5_update_vertex_elements(kgem, (i - 1)/2, data + i); |
kgem_debug_print(data, offset, i, |
"buffer %d: %svalid, type 0x%04x, " |
"src offset 0x%04x bytes\n", |
data[i] >> 27, |
data[i] & (1 << 26) ? "" : "in", |
(data[i] >> 16) & 0x1ff, |
data[i] & 0x07ff); |
i++; |
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s)\n", |
get_965_element_component(data[i], 0), |
get_965_element_component(data[i], 1), |
get_965_element_component(data[i], 2), |
get_965_element_component(data[i], 3)); |
i++; |
} |
state.num_ve = (len - 1) / 2; /* XXX? */ |
return len; |
case 0x780a: |
assert(len == 3); |
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); |
kgem_debug_print(data, offset, 1, "beginning buffer address\n"); |
kgem_debug_print(data, offset, 2, "ending buffer address\n"); |
return len; |
case 0x7900: |
assert(len == 4); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_DRAWING_RECTANGLE\n"); |
kgem_debug_print(data, offset, 1, "top left: %d,%d\n", |
data[1] & 0xffff, |
(data[1] >> 16) & 0xffff); |
kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", |
data[2] & 0xffff, |
(data[2] >> 16) & 0xffff); |
kgem_debug_print(data, offset, 3, "origin: %d,%d\n", |
(int)data[3] & 0xffff, |
((int)data[3] >> 16) & 0xffff); |
return len; |
case 0x7905: |
assert(len == 7); |
kgem_debug_print(data, offset, 0, |
"3DSTATE_DEPTH_BUFFER\n"); |
kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", |
get_965_surfacetype(data[1] >> 29), |
get_965_depthformat((data[1] >> 18) & 0x7), |
(data[1] & 0x0001ffff) + 1, |
data[1] & (1 << 27) ? "" : "not ", |
(data[1] & (1 << 22)) != 0, |
(data[1] & (1 << 21)) != 0); |
kgem_debug_print(data, offset, 2, "depth offset\n"); |
kgem_debug_print(data, offset, 3, "%dx%d\n", |
((data[3] & 0x0007ffc0) >> 6) + 1, |
((data[3] & 0xfff80000) >> 19) + 1); |
kgem_debug_print(data, offset, 4, "volume depth\n"); |
kgem_debug_print(data, offset, 5, "\n"); |
kgem_debug_print(data, offset, 6, "\n"); |
return len; |
case 0x7a00: |
assert(len == 4 || len == 5); |
switch ((data[1] >> 14) & 0x3) { |
case 0: desc1 = "no write"; break; |
case 1: desc1 = "qword write"; break; |
case 2: desc1 = "PS_DEPTH_COUNT write"; break; |
case 3: desc1 = "TIMESTAMP write"; break; |
} |
kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); |
kgem_debug_print(data, offset, 1, |
"%s, %scs stall, %stlb invalidate, " |
"%ssync gfdt, %sdepth stall, %sRC write flush, " |
"%sinst flush, %sTC flush\n", |
desc1, |
data[1] & (1 << 20) ? "" : "no ", |
data[1] & (1 << 18) ? "" : "no ", |
data[1] & (1 << 17) ? "" : "no ", |
data[1] & (1 << 13) ? "" : "no ", |
data[1] & (1 << 12) ? "" : "no ", |
data[1] & (1 << 11) ? "" : "no ", |
data[1] & (1 << 10) ? "" : "no "); |
if (len == 5) { |
kgem_debug_print(data, offset, 2, "destination address\n"); |
kgem_debug_print(data, offset, 3, "immediate dword low\n"); |
kgem_debug_print(data, offset, 4, "immediate dword high\n"); |
} else { |
for (i = 2; i < len; i++) { |
kgem_debug_print(data, offset, i, "\n"); |
} |
} |
return len; |
case 0x7b00: |
assert(len == 6); |
kgem_debug_print(data, offset, 0, |
"3DPRIMITIVE: %s %s\n", |
get_965_prim_type(data[0]), |
(data[0] & (1 << 15)) ? "random" : "sequential"); |
kgem_debug_print(data, offset, 1, "vertex count\n"); |
kgem_debug_print(data, offset, 2, "start vertex\n"); |
kgem_debug_print(data, offset, 3, "instance count\n"); |
kgem_debug_print(data, offset, 4, "start instance\n"); |
kgem_debug_print(data, offset, 5, "index bias\n"); |
primitive_out(kgem, data); |
return len; |
} |
/* For the rest, just dump the bytes */ |
for (i = 0; i < ARRAY_SIZE(opcodes); i++) |
if (op == opcodes[i].opcode) |
break; |
assert(i < ARRAY_SIZE(opcodes)); |
len = 1; |
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); |
if (opcodes[i].max_len > 1) { |
len = (data[0] & 0xff) + 2; |
assert(len >= opcodes[i].min_len && |
len <= opcodes[i].max_len); |
} |
for (i = 1; i < len; i++) |
kgem_debug_print(data, offset, i, "dword %d\n", i); |
return len; |
} |
void kgem_gen5_finish_state(struct kgem *kgem) |
{ |
memset(&state, 0, sizeof(state)); |
} |
/drivers/video/Intel-2D/kgem_debug_gen6.c |
---|
39,8 → 39,6 |
#include "kgem_debug.h" |
#define ErrorF printf |
static struct state { |
struct vertex_buffer { |
int handle; |
/drivers/video/Intel-2D/kgem_debug_gen7.c |
---|
0,0 → 1,715 |
/* |
* Copyright © 2007-2011 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Eric Anholt <eric@anholt.net> |
* Chris Wilson <chris"chris-wilson.co.uk> |
* |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include <assert.h> |
#include "sna.h" |
#include "sna_reg.h" |
#include "gen7_render.h" |
#include "kgem_debug.h" |
static struct state { |
struct vertex_buffer { |
int handle; |
void *base; |
const char *ptr; |
int pitch; |
struct kgem_bo *current; |
} vb[33]; |
struct vertex_elements { |
int buffer; |
int offset; |
bool valid; |
uint32_t type; |
uint8_t swizzle[4]; |
} ve[33]; |
int num_ve; |
struct dynamic_state { |
struct kgem_bo *current; |
void *base, *ptr; |
} dynamic_state; |
} state; |
static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) |
{ |
uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); |
struct kgem_bo *bo = NULL; |
void *base, *ptr; |
int i; |
for (i = 0; i < kgem->nreloc; i++) |
if (kgem->reloc[i].offset == reloc) |
break; |
assert(i < kgem->nreloc); |
reloc = kgem->reloc[i].target_handle; |
if (reloc == 0) { |
base = kgem->batch; |
} else { |
list_for_each_entry(bo, &kgem->next_request->buffers, request) |
if (bo->handle == reloc) |
break; |
assert(&bo->request != &kgem->next_request->buffers); |
base = kgem_bo_map__debug(kgem, bo); |
} |
ptr = (char *)base + kgem->reloc[i].delta; |
i = data[0] >> 26; |
state.vb[i].current = bo; |
state.vb[i].base = base; |
state.vb[i].ptr = ptr; |
state.vb[i].pitch = data[0] & 0x7ff; |
} |
static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) |
{ |
uint32_t reloc = sizeof(uint32_t) * offset; |
struct kgem_bo *bo = NULL; |
void *base, *ptr; |
int i; |
if ((kgem->batch[offset] & 1) == 0) |
return; |
for (i = 0; i < kgem->nreloc; i++) |
if (kgem->reloc[i].offset == reloc) |
break; |
if(i < kgem->nreloc) { |
reloc = kgem->reloc[i].target_handle; |
if (reloc == 0) { |
base = kgem->batch; |
} else { |
list_for_each_entry(bo, &kgem->next_request->buffers, request) |
if (bo->handle == reloc) |
break; |
assert(&bo->request != &kgem->next_request->buffers); |
base = kgem_bo_map__debug(kgem, bo); |
} |
ptr = (char *)base + (kgem->reloc[i].delta & ~1); |
} else { |
bo = NULL; |
base = NULL; |
ptr = NULL; |
} |
state.dynamic_state.current = bo; |
state.dynamic_state.base = base; |
state.dynamic_state.ptr = ptr; |
} |
static uint32_t |
get_ve_component(uint32_t data, int component) |
{ |
return (data >> (16 + (3 - component) * 4)) & 0x7; |
} |
static void gen7_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) |
{ |
state.ve[id].buffer = data[0] >> 26; |
state.ve[id].valid = !!(data[0] & (1 << 25)); |
state.ve[id].type = (data[0] >> 16) & 0x1ff; |
state.ve[id].offset = data[0] & 0x7ff; |
state.ve[id].swizzle[0] = get_ve_component(data[1], 0); |
state.ve[id].swizzle[1] = get_ve_component(data[1], 1); |
state.ve[id].swizzle[2] = get_ve_component(data[1], 2); |
state.ve[id].swizzle[3] = get_ve_component(data[1], 3); |
} |
static void gen7_update_sf_state(struct kgem *kgem, uint32_t *data) |
{ |
state.num_ve = 1 + ((data[1] >> 22) & 0x3f); |
} |
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) |
{ |
int c; |
ErrorF("("); |
for (c = 0; c < max; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("%d", v[c]); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
for (; c < 4; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("1.0"); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
ErrorF(")"); |
} |
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) |
{ |
int c, o; |
ErrorF("("); |
for (c = o = 0; c < 4 && o < max; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("%f", f[o++]); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
for (; c < 4; c++) { |
switch (ve->swizzle[c]) { |
case 0: ErrorF("#"); break; |
case 1: ErrorF("1.0"); break; |
case 2: ErrorF("0.0"); break; |
case 3: ErrorF("1.0"); break; |
case 4: ErrorF("0x1"); break; |
case 5: break; |
default: ErrorF("?"); |
} |
if (c < 3) |
ErrorF(", "); |
} |
ErrorF(")"); |
} |
static void ve_out(const struct vertex_elements *ve, const void *ptr) |
{ |
switch (ve->type) { |
case GEN7_SURFACEFORMAT_R32_FLOAT: |
vertices_float_out(ve, ptr, 1); |
break; |
case GEN7_SURFACEFORMAT_R32G32_FLOAT: |
vertices_float_out(ve, ptr, 2); |
break; |
case GEN7_SURFACEFORMAT_R32G32B32_FLOAT: |
vertices_float_out(ve, ptr, 3); |
break; |
case GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT: |
vertices_float_out(ve, ptr, 4); |
break; |
case GEN7_SURFACEFORMAT_R16_SINT: |
vertices_sint16_out(ve, ptr, 1); |
break; |
case GEN7_SURFACEFORMAT_R16G16_SINT: |
vertices_sint16_out(ve, ptr, 2); |
break; |
case GEN7_SURFACEFORMAT_R16G16B16A16_SINT: |
vertices_sint16_out(ve, ptr, 4); |
break; |
case GEN7_SURFACEFORMAT_R16_SSCALED: |
vertices_sint16_out(ve, ptr, 1); |
break; |
case GEN7_SURFACEFORMAT_R16G16_SSCALED: |
vertices_sint16_out(ve, ptr, 2); |
break; |
case GEN7_SURFACEFORMAT_R16G16B16A16_SSCALED: |
vertices_sint16_out(ve, ptr, 4); |
break; |
} |
} |
static void indirect_vertex_out(struct kgem *kgem, uint32_t v) |
{ |
int i = 1; |
do { |
const struct vertex_elements *ve = &state.ve[i]; |
const struct vertex_buffer *vb = &state.vb[ve->buffer]; |
const void *ptr = vb->ptr + v * vb->pitch + ve->offset; |
if (!ve->valid) |
continue; |
ve_out(ve, ptr); |
while (++i <= state.num_ve && !state.ve[i].valid) |
; |
if (i <= state.num_ve) |
ErrorF(", "); |
} while (i <= state.num_ve); |
} |
static void primitive_out(struct kgem *kgem, uint32_t *data) |
{ |
int n; |
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ |
for (n = 0; n < data[2]; n++) { |
int v = data[3] + n; |
ErrorF(" [%d:%d] = ", n, v); |
indirect_vertex_out(kgem, v); |
ErrorF("\n"); |
} |
} |
static void finish_state(struct kgem *kgem) |
{ |
memset(&state, 0, sizeof(state)); |
} |
static void |
state_base_out(uint32_t *data, uint32_t offset, unsigned int index, |
const char *name) |
{ |
if (data[index] & 1) |
kgem_debug_print(data, offset, index, |
"%s state base address 0x%08x\n", |
name, data[index] & ~1); |
else |
kgem_debug_print(data, offset, index, |
"%s state base not updated\n", |
name); |
} |
static void |
state_max_out(uint32_t *data, uint32_t offset, unsigned int index, |
const char *name) |
{ |
if (data[index] == 1) |
kgem_debug_print(data, offset, index, |
"%s state upper bound disabled\n", name); |
else if (data[index] & 1) |
kgem_debug_print(data, offset, index, |
"%s state upper bound 0x%08x\n", |
name, data[index] & ~1); |
else |
kgem_debug_print(data, offset, index, |
"%s state upper bound not updated\n", |
name); |
} |
static const char * |
get_965_surfacetype(unsigned int surfacetype) |
{ |
switch (surfacetype) { |
case 0: return "1D"; |
case 1: return "2D"; |
case 2: return "3D"; |
case 3: return "CUBE"; |
case 4: return "BUFFER"; |
case 7: return "NULL"; |
default: return "unknown"; |
} |
} |
static const char * |
get_965_depthformat(unsigned int depthformat) |
{ |
switch (depthformat) { |
case 0: return "s8_z24float"; |
case 1: return "z32float"; |
case 2: return "z24s8"; |
case 5: return "z16"; |
default: return "unknown"; |
} |
} |
static const char * |
get_element_component(uint32_t data, int component) |
{ |
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; |
switch (component_control) { |
case 0: |
return "nostore"; |
case 1: |
switch (component) { |
case 0: return "X"; |
case 1: return "Y"; |
case 2: return "Z"; |
case 3: return "W"; |
default: return "fail"; |
} |
case 2: |
return "0.0"; |
case 3: |
return "1.0"; |
case 4: |
return "0x1"; |
case 5: |
return "VID"; |
default: |
return "fail"; |
} |
} |
static const char * |
get_prim_type(uint32_t data) |
{ |
uint32_t primtype = data & 0x1f; |
switch (primtype) { |
case 0x01: return "point list"; |
case 0x02: return "line list"; |
case 0x03: return "line strip"; |
case 0x04: return "tri list"; |
case 0x05: return "tri strip"; |
case 0x06: return "tri fan"; |
case 0x07: return "quad list"; |
case 0x08: return "quad strip"; |
case 0x09: return "line list adj"; |
case 0x0a: return "line strip adj"; |
case 0x0b: return "tri list adj"; |
case 0x0c: return "tri strip adj"; |
case 0x0d: return "tri strip reverse"; |
case 0x0e: return "polygon"; |
case 0x0f: return "rect list"; |
case 0x10: return "line loop"; |
case 0x11: return "point list bf"; |
case 0x12: return "line strip cont"; |
case 0x13: return "line strip bf"; |
case 0x14: return "line strip cont bf"; |
case 0x15: return "tri fan no stipple"; |
default: return "fail"; |
} |
} |
struct reloc { |
struct kgem_bo *bo; |
void *base; |
}; |
static void * |
get_reloc(struct kgem *kgem, |
void *base, const uint32_t *reloc, |
struct reloc *r) |
{ |
uint32_t delta = *reloc; |
memset(r, 0, sizeof(*r)); |
if (base == 0) { |
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); |
struct kgem_bo *bo = NULL; |
int i; |
for (i = 0; i < kgem->nreloc; i++) |
if (kgem->reloc[i].offset == handle) |
break; |
assert(i < kgem->nreloc); |
handle = kgem->reloc[i].target_handle; |
delta = kgem->reloc[i].delta; |
if (handle == 0) { |
base = kgem->batch; |
} else { |
list_for_each_entry(bo, &kgem->next_request->buffers, request) |
if (bo->handle == handle) |
break; |
assert(&bo->request != &kgem->next_request->buffers); |
base = kgem_bo_map__debug(kgem, bo); |
r->bo = bo; |
r->base = base; |
} |
} |
return (char *)base + (delta & ~3); |
} |
static const char * |
gen7_filter_to_string(uint32_t filter) |
{ |
switch (filter) { |
default: |
case GEN7_MAPFILTER_NEAREST: return "nearest"; |
case GEN7_MAPFILTER_LINEAR: return "linear"; |
} |
} |
static const char * |
gen7_repeat_to_string(uint32_t repeat) |
{ |
switch (repeat) { |
default: |
case GEN7_TEXCOORDMODE_CLAMP_BORDER: return "border"; |
case GEN7_TEXCOORDMODE_WRAP: return "wrap"; |
case GEN7_TEXCOORDMODE_CLAMP: return "clamp"; |
case GEN7_TEXCOORDMODE_MIRROR: return "mirror"; |
} |
} |
static void |
gen7_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc) |
{ |
const struct gen7_sampler_state *ss; |
struct reloc r; |
const char *min, *mag; |
const char *s_wrap, *t_wrap, *r_wrap; |
ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); |
min = gen7_filter_to_string(ss->ss0.min_filter); |
mag = gen7_filter_to_string(ss->ss0.mag_filter); |
s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode); |
t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode); |
r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode); |
ErrorF(" Sampler 0:\n"); |
ErrorF(" filter: min=%s, mag=%s\n", min, mag); |
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); |
ss++; |
min = gen7_filter_to_string(ss->ss0.min_filter); |
mag = gen7_filter_to_string(ss->ss0.mag_filter); |
s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode); |
t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode); |
r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode); |
ErrorF(" Sampler 1:\n"); |
ErrorF(" filter: min=%s, mag=%s\n", min, mag); |
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); |
} |
static const char * |
gen7_blend_factor_to_string(uint32_t v) |
{ |
switch (v) { |
#define C(x) case GEN7_BLENDFACTOR_##x: return #x; |
C(ONE); |
C(SRC_COLOR); |
C(SRC_ALPHA); |
C(DST_ALPHA); |
C(DST_COLOR); |
C(SRC_ALPHA_SATURATE); |
C(CONST_COLOR); |
C(CONST_ALPHA); |
C(SRC1_COLOR); |
C(SRC1_ALPHA); |
C(ZERO); |
C(INV_SRC_COLOR); |
C(INV_SRC_ALPHA); |
C(INV_DST_ALPHA); |
C(INV_DST_COLOR); |
C(INV_CONST_COLOR); |
C(INV_CONST_ALPHA); |
C(INV_SRC1_COLOR); |
C(INV_SRC1_ALPHA); |
#undef C |
default: return "???"; |
} |
} |
static const char * |
gen7_blend_function_to_string(uint32_t v) |
{ |
switch (v) { |
#define C(x) case GEN7_BLENDFUNCTION_##x: return #x; |
C(ADD); |
C(SUBTRACT); |
C(REVERSE_SUBTRACT); |
C(MIN); |
C(MAX); |
#undef C |
default: return "???"; |
} |
} |
static void |
gen7_decode_blend(struct kgem *kgem, const uint32_t *reloc) |
{ |
const struct gen7_blend_state *blend; |
struct reloc r; |
const char *dst, *src; |
const char *func; |
blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); |
dst = gen7_blend_factor_to_string(blend->blend0.dest_blend_factor); |
src = gen7_blend_factor_to_string(blend->blend0.source_blend_factor); |
func = gen7_blend_function_to_string(blend->blend0.blend_func); |
ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n", |
blend->blend0.blend_enable ? "enabled" : "disabled", |
func, src, dst); |
} |
int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset) |
{ |
static const struct { |
uint32_t opcode; |
int min_len; |
int max_len; |
const char *name; |
} opcodes[] = { |
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, |
{ 0x6102, 2, 2 , "STATE_SIP" }, |
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, |
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, |
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, |
}; |
uint32_t *data = kgem->batch + offset; |
uint32_t op; |
unsigned int len; |
int i; |
const char *name; |
len = (data[0] & 0xff) + 2; |
op = (data[0] & 0xffff0000) >> 16; |
switch (op) { |
case 0x6101: |
i = 0; |
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); |
assert(len == 10); |
state_base_out(data, offset, i++, "general"); |
state_base_out(data, offset, i++, "surface"); |
state_base_out(data, offset, i++, "dynamic"); |
state_base_out(data, offset, i++, "indirect"); |
state_base_out(data, offset, i++, "instruction"); |
state_max_out(data, offset, i++, "general"); |
state_max_out(data, offset, i++, "dynamic"); |
state_max_out(data, offset, i++, "indirect"); |
state_max_out(data, offset, i++, "instruction"); |
gen7_update_dynamic_buffer(kgem, offset + 3); |
return len; |
case 0x7808: |
assert((len - 1) % 4 == 0); |
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); |
for (i = 1; i < len;) { |
gen7_update_vertex_buffer(kgem, data + i); |
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", |
data[i] >> 26, |
data[i] & (1 << 20) ? "random" : "sequential", |
data[i] & 0x07ff); |
i++; |
kgem_debug_print(data, offset, i++, "buffer address\n"); |
kgem_debug_print(data, offset, i++, "max index\n"); |
kgem_debug_print(data, offset, i++, "mbz\n"); |
} |
return len; |
case 0x7809: |
assert((len + 1) % 2 == 0); |
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); |
for (i = 1; i < len;) { |
gen7_update_vertex_elements(kgem, (i - 1)/2, data + i); |
kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " |
"src offset 0x%04x bytes\n", |
data[i] >> 26, |
data[i] & (1 << 25) ? "" : "in", |
(data[i] >> 16) & 0x1ff, |
data[i] & 0x07ff); |
i++; |
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " |
"dst offset 0x%02x bytes\n", |
get_element_component(data[i], 0), |
get_element_component(data[i], 1), |
get_element_component(data[i], 2), |
get_element_component(data[i], 3), |
(data[i] & 0xff) * 4); |
i++; |
} |
return len; |
case 0x780a: |
assert(len == 3); |
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); |
kgem_debug_print(data, offset, 1, "beginning buffer address\n"); |
kgem_debug_print(data, offset, 2, "ending buffer address\n"); |
return len; |
case 0x7b00: |
assert(len == 7); |
kgem_debug_print(data, offset, 0, "3DPRIMITIVE\n"); |
kgem_debug_print(data, offset, 1, "type %s, %s\n", |
get_prim_type(data[1]), |
(data[1] & (1 << 15)) ? "random" : "sequential"); |
kgem_debug_print(data, offset, 2, "vertex count\n"); |
kgem_debug_print(data, offset, 3, "start vertex\n"); |
kgem_debug_print(data, offset, 4, "instance count\n"); |
kgem_debug_print(data, offset, 5, "start instance\n"); |
kgem_debug_print(data, offset, 6, "index bias\n"); |
primitive_out(kgem, data); |
return len; |
} |
/* For the rest, just dump the bytes */ |
name = NULL; |
for (i = 0; i < ARRAY_SIZE(opcodes); i++) |
if (op == opcodes[i].opcode) { |
name = opcodes[i].name; |
break; |
} |
len = (data[0] & 0xff) + 2; |
if (name == NULL) { |
kgem_debug_print(data, offset, 0, "unknown\n"); |
} else { |
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); |
if (opcodes[i].max_len > 1) { |
assert(len >= opcodes[i].min_len && |
len <= opcodes[i].max_len); |
} |
} |
for (i = 1; i < len; i++) |
kgem_debug_print(data, offset, i, "dword %d\n", i); |
return len; |
} |
void kgem_gen7_finish_state(struct kgem *kgem) |
{ |
finish_state(kgem); |
} |
/drivers/video/Intel-2D/sna.c |
---|
1,11 → 1,51 |
/************************************************************************** |
Copyright 2001 VA Linux Systems Inc., Fremont, California. |
Copyright © 2002 by David Dawes |
All Rights Reserved. |
Permission is hereby granted, free of charge, to any person obtaining a |
copy of this software and associated documentation files (the "Software"), |
to deal in the Software without restriction, including without limitation |
on the rights to use, copy, modify, merge, publish, distribute, sub |
license, and/or sell copies of the Software, and to permit persons to whom |
the Software is furnished to do so, subject to the following conditions: |
The above copyright notice and this permission notice (including the next |
paragraph) shall be included in all copies or substantial portions of the |
Software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
USE OR OTHER DEALINGS IN THE SOFTWARE. |
**************************************************************************/ |
/* |
* Authors: Jeff Hartmann <jhartmann@valinux.com> |
* Abraham van der Merwe <abraham@2d3d.co.za> |
* David Dawes <dawes@xfree86.org> |
* Alan Hourihane <alanh@tungstengraphics.com> |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include <memory.h> |
#include <malloc.h> |
#include <kos32sys.h> |
#include <pixlib2.h> |
#include "i915_pciids.h" |
#include "compiler.h" |
#include "sna.h" |
#include "intel_driver.h" |
#define to_surface(x) (surface_t*)((x)->handle) |
46,6 → 86,8 |
uint32_t kgem_surface_size(struct kgem *kgem,bool relaxed_fencing, |
unsigned flags, uint32_t width, uint32_t height, |
uint32_t bpp, uint32_t tiling, uint32_t *pitch); |
struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle, |
int pitch, int height); |
void kgem_close_batches(struct kgem *kgem); |
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); |
53,8 → 95,6 |
const struct intel_device_info * |
intel_detect_chipset(struct pci_device *pci); |
//struct kgem_bo *create_bo(bitmap_t *bitmap); |
static bool sna_solid_cache_init(struct sna *sna); |
struct sna *sna_device; |
66,8 → 106,46 |
(void)sna; |
} |
void no_render_init(struct sna *sna) |
static void no_render_flush(struct sna *sna) |
{ |
(void)sna; |
} |
static void |
no_render_context_switch(struct kgem *kgem, |
int new_mode) |
{ |
if (!kgem->nbatch) |
return; |
if (kgem_ring_is_idle(kgem, kgem->ring)) { |
DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); |
_kgem_submit(kgem); |
} |
(void)new_mode; |
} |
static void |
no_render_retire(struct kgem *kgem) |
{ |
(void)kgem; |
} |
static void |
no_render_expire(struct kgem *kgem) |
{ |
(void)kgem; |
} |
static void |
no_render_fini(struct sna *sna) |
{ |
(void)sna; |
} |
const char *no_render_init(struct sna *sna) |
{ |
struct sna_render *render = &sna->render; |
memset (render,0, sizeof (*render)); |
77,28 → 155,20 |
render->vertices = render->vertex_data; |
render->vertex_size = ARRAY_SIZE(render->vertex_data); |
// render->composite = no_render_composite; |
// render->copy_boxes = no_render_copy_boxes; |
// render->copy = no_render_copy; |
// render->fill_boxes = no_render_fill_boxes; |
// render->fill = no_render_fill; |
// render->fill_one = no_render_fill_one; |
// render->clear = no_render_clear; |
render->reset = no_render_reset; |
// render->flush = no_render_flush; |
// render->fini = no_render_fini; |
render->flush = no_render_flush; |
render->fini = no_render_fini; |
// sna->kgem.context_switch = no_render_context_switch; |
// sna->kgem.retire = no_render_retire; |
sna->kgem.context_switch = no_render_context_switch; |
sna->kgem.retire = no_render_retire; |
sna->kgem.expire = no_render_expire; |
if (sna->kgem.gen >= 60) |
sna->kgem.ring = KGEM_RENDER; |
sna->kgem.mode = KGEM_RENDER; |
sna->kgem.ring = KGEM_RENDER; |
sna_vertex_init(sna); |
} |
sna_vertex_init(sna); |
return "generic"; |
} |
void sna_vertex_init(struct sna *sna) |
{ |
111,47 → 181,27 |
{ |
const char *backend; |
// list_init(&sna->deferred_free); |
// list_init(&sna->dirty_pixmaps); |
// list_init(&sna->active_pixmaps); |
// list_init(&sna->inactive_clock[0]); |
// list_init(&sna->inactive_clock[1]); |
backend = no_render_init(sna); |
if (sna->info->gen >= 0100) |
(void)backend; |
else if (sna->info->gen >= 070) |
backend = gen7_render_init(sna, backend); |
else if (sna->info->gen >= 060) |
backend = gen6_render_init(sna, backend); |
else if (sna->info->gen >= 050) |
backend = gen5_render_init(sna, backend); |
else if (sna->info->gen >= 040) |
backend = gen4_render_init(sna, backend); |
else if (sna->info->gen >= 030) |
backend = gen3_render_init(sna, backend); |
// sna_accel_install_timers(sna); |
backend = "no"; |
no_render_init(sna); |
if (sna->info->gen >= 0100) { |
} else if (sna->info->gen >= 070) { |
if (gen7_render_init(sna)) |
backend = "IvyBridge"; |
} else if (sna->info->gen >= 060) { |
if (gen6_render_init(sna)) |
backend = "SandyBridge"; |
} else if (sna->info->gen >= 050) { |
if (gen5_render_init(sna)) |
backend = "Ironlake"; |
} else if (sna->info->gen >= 040) { |
if (gen4_render_init(sna)) |
backend = "Broadwater/Crestline"; |
} else if (sna->info->gen >= 030) { |
if (gen3_render_init(sna)) |
backend = "gen3"; |
} |
DBG(("%s(backend=%s, prefer_gpu=%x)\n", |
__FUNCTION__, backend, sna->render.prefer_gpu)); |
kgem_reset(&sna->kgem); |
kgem_reset(&sna->kgem); |
// if (!sna_solid_cache_init(sna)) |
// return false; |
sna_device = sna; |
return kgem_init_fb(&sna->kgem, &sna_fb); |
} |
169,7 → 219,7 |
if(sna_device) |
goto done; |
io.handle = service; |
io.io_code = SRV_GET_PCI_INFO; |
io.input = &device; |
179,40 → 229,24 |
if (call_service(&io)!=0) |
goto err1; |
sna = malloc(sizeof(*sna)); |
if (sna == NULL) |
goto err1; |
memset(sna, 0, sizeof(*sna)); |
sna->cpu_features = sna_cpu_detect(); |
sna->PciInfo = &device; |
sna->info = intel_detect_chipset(sna->PciInfo); |
sna->scrn = service; |
kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen); |
/* |
if (!xf86ReturnOptValBool(sna->Options, |
OPTION_RELAXED_FENCING, |
sna->kgem.has_relaxed_fencing)) { |
xf86DrvMsg(scrn->scrnIndex, |
sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED, |
"Disabling use of relaxed fencing\n"); |
sna->kgem.has_relaxed_fencing = 0; |
} |
if (!xf86ReturnOptValBool(sna->Options, |
OPTION_VMAP, |
sna->kgem.has_vmap)) { |
xf86DrvMsg(scrn->scrnIndex, |
sna->kgem.has_vmap ? X_CONFIG : X_PROBED, |
"Disabling use of vmap\n"); |
sna->kgem.has_vmap = 0; |
} |
*/ |
/* Disable tiling by default */ |
sna->tiling = SNA_TILING_DISABLE; |
sna->tiling = 0; |
/* Default fail-safe value of 75 Hz */ |
// sna->vblank_interval = 1000 * 1000 * 1000 / 75; |
222,16 → 256,16 |
sna_accel_init(sna); |
tls_mask = tls_alloc(); |
// printf("tls mask %x\n", tls_mask); |
done: |
caps = sna_device->render.caps; |
err1: |
__lock_release_recursive(__sna_lock); |
return caps; |
return caps; |
} |
void sna_fini() |
239,17 → 273,17 |
if( sna_device ) |
{ |
struct kgem_bo *mask; |
__lock_acquire_recursive(__sna_lock); |
mask = tls_get(tls_mask); |
sna_device->render.fini(sna_device); |
if(mask) |
kgem_bo_destroy(&sna_device->kgem, mask); |
kgem_close_batches(&sna_device->kgem); |
kgem_close_batches(&sna_device->kgem); |
kgem_cleanup_cache(&sna_device->kgem); |
sna_device = NULL; |
__lock_release_recursive(__sna_lock); |
}; |
402,7 → 436,7 |
winx = *(uint32_t*)(proc_info+34); |
winy = *(uint32_t*)(proc_info+38); |
memset(&src, 0, sizeof(src)); |
memset(&dst, 0, sizeof(dst)); |
413,36 → 447,36 |
dst.drawable.bitsPerPixel = 32; |
dst.drawable.width = sna_fb.width; |
dst.drawable.height = sna_fb.height; |
memset(©, 0, sizeof(copy)); |
src_bo = (struct kgem_bo*)src_bitmap->handle; |
if( sna_device->render.copy(sna_device, GXcopy, |
&src, src_bo, |
&dst, sna_fb.fb_bo, ©) ) |
{ |
{ |
copy.blt(sna_device, ©, src_x, src_y, w, h, winx+dst_x, winy+dst_y); |
copy.done(sna_device, ©); |
} |
kgem_submit(&sna_device->kgem); |
return 0; |
// __asm__ __volatile__("int3"); |
}; |
typedef struct |
typedef struct |
{ |
uint32_t width; |
uint32_t height; |
void *data; |
uint32_t pitch; |
struct kgem_bo *bo; |
uint32_t bo_size; |
uint32_t flags; |
struct kgem_bo *bo; |
uint32_t bo_size; |
uint32_t flags; |
}surface_t; |
451,23 → 485,23 |
{ |
surface_t *sf; |
struct kgem_bo *bo; |
sf = malloc(sizeof(*sf)); |
if(sf == NULL) |
goto err_1; |
__lock_acquire_recursive(__sna_lock); |
bo = kgem_create_2d(&sna_device->kgem, bitmap->width, bitmap->height, |
32,I915_TILING_NONE, CREATE_CPU_MAP); |
if(bo == NULL) |
goto err_2; |
void *map = kgem_bo_map(&sna_device->kgem, bo); |
if(map == NULL) |
goto err_3; |
sf->width = bitmap->width; |
sf->height = bitmap->height; |
sf->data = map; |
475,33 → 509,74 |
sf->bo = bo; |
sf->bo_size = PAGE_SIZE * bo->size.pages.count; |
sf->flags = bitmap->flags; |
bitmap->handle = (uint32_t)sf; |
__lock_release_recursive(__sna_lock); |
return 0; |
err_3: |
kgem_bo_destroy(&sna_device->kgem, bo); |
err_2: |
__lock_release_recursive(__sna_lock); |
free(sf); |
free(sf); |
err_1: |
return -1; |
return -1; |
}; |
int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle) |
{ |
surface_t *sf; |
struct kgem_bo *bo; |
sf = malloc(sizeof(*sf)); |
if(sf == NULL) |
goto err_1; |
__lock_acquire_recursive(__sna_lock); |
bo = kgem_bo_from_handle(&sna_device->kgem, handle, bitmap->pitch, bitmap->height); |
__lock_release_recursive(__sna_lock); |
sf->width = bitmap->width; |
sf->height = bitmap->height; |
sf->data = NULL; |
sf->pitch = bo->pitch; |
sf->bo = bo; |
sf->bo_size = PAGE_SIZE * bo->size.pages.count; |
sf->flags = bitmap->flags; |
bitmap->handle = (uint32_t)sf; |
return 0; |
err_2: |
__lock_release_recursive(__sna_lock); |
free(sf); |
err_1: |
return -1; |
}; |
void sna_set_bo_handle(bitmap_t *bitmap, int handle) |
{ |
surface_t *sf = to_surface(bitmap); |
struct kgem_bo *bo = sf->bo; |
bo->handle = handle; |
} |
int sna_destroy_bitmap(bitmap_t *bitmap) |
{ |
surface_t *sf = to_surface(bitmap); |
__lock_acquire_recursive(__sna_lock); |
kgem_bo_destroy(&sna_device->kgem, sf->bo); |
__lock_release_recursive(__sna_lock); |
free(sf); |
bitmap->handle = -1; |
bitmap->data = (void*)-1; |
bitmap->pitch = -1; |
511,17 → 586,17 |
int sna_lock_bitmap(bitmap_t *bitmap) |
{ |
surface_t *sf = to_surface(bitmap); |
surface_t *sf = to_surface(bitmap); |
// printf("%s\n", __FUNCTION__); |
__lock_acquire_recursive(__sna_lock); |
kgem_bo_sync__cpu(&sna_device->kgem, sf->bo); |
__lock_release_recursive(__sna_lock); |
bitmap->data = sf->data; |
bitmap->pitch = sf->pitch; |
bitmap->pitch = sf->pitch; |
return 0; |
}; |
530,8 → 605,8 |
{ |
surface_t *sf = to_surface(bitmap); |
struct kgem *kgem = &sna_device->kgem; |
struct kgem_bo *bo = sf->bo; |
struct kgem_bo *bo = sf->bo; |
uint32_t size; |
uint32_t pitch; |
541,24 → 616,24 |
size = kgem_surface_size(kgem,kgem->has_relaxed_fencing, CREATE_CPU_MAP, |
bitmap->width, bitmap->height, 32, I915_TILING_NONE, &pitch); |
assert(size && size <= kgem->max_object_size); |
if(sf->bo_size >= size) |
{ |
sf->width = bitmap->width; |
sf->height = bitmap->height; |
sf->pitch = pitch; |
bo->pitch = pitch; |
bo->pitch = pitch; |
return 0; |
} |
else |
{ |
__lock_acquire_recursive(__sna_lock); |
sna_bo_destroy(kgem, bo); |
sf->bo = NULL; |
bo = kgem_create_2d(kgem, bitmap->width, bitmap->height, |
32, I915_TILING_NONE, CREATE_CPU_MAP); |
567,7 → 642,7 |
__lock_release_recursive(__sna_lock); |
return -1; |
}; |
void *map = kgem_bo_map(kgem, bo); |
if(map == NULL) |
{ |
575,9 → 650,9 |
__lock_release_recursive(__sna_lock); |
return -1; |
}; |
__lock_release_recursive(__sna_lock); |
sf->width = bitmap->width; |
sf->height = bitmap->height; |
sf->data = map; |
586,7 → 661,7 |
sf->bo_size = PAGE_SIZE * bo->size.pages.count; |
} |
return 0; |
return 0; |
}; |
596,32 → 671,32 |
struct kgem_bo *bo; |
// printf("%s width %d height %d\n", __FUNCTION__, sna_fb.width, sna_fb.height); |
__lock_acquire_recursive(__sna_lock); |
bo = kgem_create_2d(&sna_device->kgem, sna_fb.width, sna_fb.height, |
8,I915_TILING_NONE, CREATE_CPU_MAP); |
if(unlikely(bo == NULL)) |
goto err_1; |
int *map = kgem_bo_map(&sna_device->kgem, bo); |
if(map == NULL) |
goto err_2; |
__lock_release_recursive(__sna_lock); |
memset(map, 0, bo->pitch * sna_fb.height); |
tls_set(tls_mask, bo); |
return 0; |
err_2: |
kgem_bo_destroy(&sna_device->kgem, bo); |
err_1: |
__lock_release_recursive(__sna_lock); |
return -1; |
return -1; |
}; |
630,7 → 705,7 |
uint8_t op, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
644,10 → 719,10 |
int w, int h, int src_x, int src_y) |
{ |
surface_t *sf = to_surface(bitmap); |
surface_t *sf = to_surface(bitmap); |
struct drm_i915_mask_update update; |
struct sna_composite_op composite; |
struct _Pixmap src, dst, mask; |
struct kgem_bo *src_bo, *mask_bo; |
661,41 → 736,41 |
winy = *(uint32_t*)(proc_info+38); |
// winw = *(uint32_t*)(proc_info+42)+1; |
// winh = *(uint32_t*)(proc_info+46)+1; |
mask_bo = tls_get(tls_mask); |
if(unlikely(mask_bo == NULL)) |
{ |
sna_create_mask(); |
mask_bo = tls_get(tls_mask); |
if( mask_bo == NULL) |
return -1; |
return -1; |
}; |
if(kgem_update_fb(&sna_device->kgem, &sna_fb)) |
{ |
__lock_acquire_recursive(__sna_lock); |
kgem_bo_destroy(&sna_device->kgem, mask_bo); |
__lock_release_recursive(__sna_lock); |
sna_create_mask(); |
mask_bo = tls_get(tls_mask); |
if( mask_bo == NULL) |
return -1; |
return -1; |
} |
VG_CLEAR(update); |
update.handle = mask_bo->handle; |
update.bo_map = (__u32)MAP(mask_bo->map); |
update.bo_map = (int)kgem_bo_map__cpu(&sna_device->kgem, mask_bo); |
drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update); |
mask_bo->pitch = update.bo_pitch; |
memset(&src, 0, sizeof(src)); |
memset(&dst, 0, sizeof(dst)); |
memset(&mask, 0, sizeof(dst)); |
src.drawable.bitsPerPixel = 32; |
src.drawable.width = sf->width; |
src.drawable.height = sf->height; |
702,7 → 777,7 |
dst.drawable.bitsPerPixel = 32; |
dst.drawable.width = sna_fb.width; |
dst.drawable.height = sna_fb.height; |
mask.drawable.bitsPerPixel = 8; |
mask.drawable.width = update.width; |
mask.drawable.height = update.height; |
710,14 → 785,14 |
memset(&composite, 0, sizeof(composite)); |
src_bo = sf->bo; |
__lock_acquire_recursive(__sna_lock); |
if( sna_device->render.blit_tex(sna_device, PictOpSrc,scale, |
&src, src_bo, |
&mask, mask_bo, |
&dst, sna_fb.fb_bo, |
&dst, sna_fb.fb_bo, |
src_x, src_y, |
dst_x, dst_y, |
winx+dst_x, winy+dst_y, |
725,7 → 800,7 |
&composite) ) |
{ |
struct sna_composite_rectangles r; |
r.src.x = src_x; |
r.src.y = src_y; |
r.mask.x = dst_x; |
734,20 → 809,20 |
r.dst.y = winy+dst_y; |
r.width = w; |
r.height = h; |
composite.blt(sna_device, &composite, &r); |
composite.done(sna_device, &composite); |
}; |
kgem_submit(&sna_device->kgem); |
__lock_release_recursive(__sna_lock); |
bitmap->data = (void*)-1; |
bitmap->pitch = -1; |
return 0; |
return 0; |
} |
756,8 → 831,6 |
static const struct intel_device_info intel_generic_info = { |
.gen = -1, |
}; |
807,100 → 880,37 |
static const struct pci_id_match intel_device_match[] = { |
INTEL_I915G_IDS(&intel_i915_info), |
INTEL_I915GM_IDS(&intel_i915_info), |
INTEL_I945G_IDS(&intel_i945_info), |
INTEL_I945GM_IDS(&intel_i945_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_I915_G, &intel_i915_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_E7221_G, &intel_i915_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I915_GM, &intel_i915_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I945_G, &intel_i945_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GM, &intel_i945_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GME, &intel_i945_info ), |
INTEL_G33_IDS(&intel_g33_info), |
INTEL_PINEVIEW_IDS(&intel_g33_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_M, &intel_g33_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_G, &intel_g33_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_G33_G, &intel_g33_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_Q33_G, &intel_g33_info ), |
/* Another marketing win: Q35 is another g33 device not a gen4 part |
* like its G35 brethren. |
*/ |
INTEL_DEVICE_MATCH (PCI_CHIP_Q35_G, &intel_g33_info ), |
INTEL_I965G_IDS(&intel_i965_info), |
INTEL_I965GM_IDS(&intel_i965_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_I965_G, &intel_i965_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_G35_G, &intel_i965_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I965_Q, &intel_i965_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I946_GZ, &intel_i965_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GM, &intel_i965_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GME, &intel_i965_info ), |
INTEL_G45_IDS(&intel_g4x_info), |
INTEL_GM45_IDS(&intel_g4x_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_GM45_GM, &intel_g4x_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_G45_E_G, &intel_g4x_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_G45_G, &intel_g4x_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_Q45_G, &intel_g4x_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_G41_G, &intel_g4x_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G, &intel_g4x_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G1, &intel_g4x_info ), |
INTEL_IRONLAKE_D_IDS(&intel_ironlake_info), |
INTEL_IRONLAKE_M_IDS(&intel_ironlake_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_D_G, &intel_ironlake_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_M_G, &intel_ironlake_info ), |
INTEL_SNB_D_IDS(&intel_sandybridge_info), |
INTEL_SNB_M_IDS(&intel_sandybridge_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT1, &intel_sandybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2, &intel_sandybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2_PLUS, &intel_sandybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT1, &intel_sandybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2, &intel_sandybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS, &intel_sandybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_S_GT, &intel_sandybridge_info ), |
INTEL_IVB_D_IDS(&intel_ivybridge_info), |
INTEL_IVB_M_IDS(&intel_ivybridge_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT1, &intel_ivybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT2, &intel_ivybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ), |
INTEL_HSW_D_IDS(&intel_haswell_info), |
INTEL_HSW_M_IDS(&intel_haswell_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2_PLUS, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT1, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2, &intel_haswell_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ), |
INTEL_VLV_D_IDS(&intel_valleyview_info), |
INTEL_VLV_M_IDS(&intel_valleyview_info), |
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ), |
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ), |
INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info), |
INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ), |
{ 0, 0, 0 }, |
}; |
920,14 → 930,14 |
{ |
const struct pci_id_match *ent = NULL; |
ent = PciDevMatch(pci->device_id, intel_device_match); |
ent = PciDevMatch(pci->device_id, intel_device_match); |
if(ent != NULL) |
return (const struct intel_device_info*)ent->match_data; |
else |
else |
return &intel_generic_info; |
#if 0 |
#if 0 |
for (i = 0; intel_chipsets[i].name != NULL; i++) { |
if (DEVICE_ID(pci) == intel_chipsets[i].token) { |
name = intel_chipsets[i].name; |
945,10 → 955,24 |
scrn->chipset = name; |
#endif |
} |
int intel_get_device_id(int fd) |
{ |
struct drm_i915_getparam gp; |
int devid = 0; |
memset(&gp, 0, sizeof(gp)); |
gp.param = I915_PARAM_CHIPSET_ID; |
gp.value = &devid; |
if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) |
return 0; |
return devid; |
} |
int drmIoctl(int fd, unsigned long request, void *arg) |
{ |
ioctl_t io; |
/drivers/video/Intel-2D/sna.h |
---|
37,26 → 37,83 |
#ifndef _SNA_H_ |
#define _SNA_H_ |
#include <stdint.h> |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include <stdint.h> |
#include "compiler.h" |
#include <memory.h> |
#include <malloc.h> |
#include <errno.h> |
#include <kos32sys.h> |
#include "intel_driver.h" |
#include "pciaccess.h" |
#include "compiler.h" |
#include <drm.h> |
#include <i915_drm.h> |
//#define DBG(x) |
//#define DBG(x) ErrorF x |
#ifdef HAVE_DRI2_H |
#include <dri2.h> |
#endif |
#define assert(x) |
#if HAVE_UDEV |
#include <libudev.h> |
#endif |
#if 0 |
#include <xorg-server.h> |
#include <xf86Crtc.h> |
#if XF86_CRTC_VERSION >= 5 |
#define HAS_PIXMAP_SHARING 1 |
#endif |
#include <xf86str.h> |
#include <windowstr.h> |
#include <glyphstr.h> |
#include <picturestr.h> |
#include <gcstruct.h> |
#include <xvdix.h> |
#include <pciaccess.h> |
#include <xf86drmMode.h> |
#include "../compat-api.h" |
#endif |
#include <assert.h> |
#define ErrorF printf |
#if HAS_DEBUG_FULL |
#define DBG(x) ErrorF x |
#else |
#define DBG(x) |
#endif |
#define DEBUG_NO_BLT 0 |
#define DEBUG_FLUSH_BATCH 0 |
#define TEST_ALL 0 |
#define TEST_ACCEL (TEST_ALL || 0) |
#define TEST_BATCH (TEST_ALL || 0) |
#define TEST_BLT (TEST_ALL || 0) |
#define TEST_COMPOSITE (TEST_ALL || 0) |
#define TEST_DAMAGE (TEST_ALL || 0) |
#define TEST_GRADIENT (TEST_ALL || 0) |
#define TEST_GLYPHS (TEST_ALL || 0) |
#define TEST_IO (TEST_ALL || 0) |
#define TEST_KGEM (TEST_ALL || 0) |
#define TEST_RENDER (TEST_ALL || 0) |
int drmIoctl(int fd, unsigned long request, void *arg); |
94,12 → 151,69 |
PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10), |
PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10), |
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0) |
/* sRGB formats */ |
PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8), |
/* 24bpp formats */ |
PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8), |
PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8), |
/* 16bpp formats */ |
PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5), |
PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5), |
PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5), |
PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5), |
PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5), |
PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5), |
PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4), |
PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4), |
PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4), |
PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4), |
/* 8bpp formats */ |
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0), |
PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2), |
PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2), |
PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2), |
PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2), |
PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), |
PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), |
PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0), |
PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), |
PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), |
/* 4bpp formats */ |
PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0), |
PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1), |
PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1), |
PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1), |
PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1), |
PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0), |
PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0), |
/* 1bpp formats */ |
PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0), |
PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0), |
/* YUV formats */ |
PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0), |
PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0) |
} pixman_format_code_t; |
typedef enum _PictFormatShort { |
PICT_a2r10g10b10 = PIXMAN_a2r10g10b10, |
PICT_x2r10g10b10 = PIXMAN_x2r10g10b10, |
PICT_a2b10g10r10 = PIXMAN_a2b10g10r10, |
PICT_x2b10g10r10 = PIXMAN_x2b10g10r10, |
PICT_a8r8g8b8 = PIXMAN_a8r8g8b8, |
PICT_x8r8g8b8 = PIXMAN_x8r8g8b8, |
PICT_a8b8g8r8 = PIXMAN_a8b8g8r8, |
107,10 → 221,52 |
PICT_b8g8r8a8 = PIXMAN_b8g8r8a8, |
PICT_b8g8r8x8 = PIXMAN_b8g8r8x8, |
/* 24bpp formats */ |
PICT_r8g8b8 = PIXMAN_r8g8b8, |
PICT_b8g8r8 = PIXMAN_b8g8r8, |
/* 16bpp formats */ |
PICT_r5g6b5 = PIXMAN_r5g6b5, |
PICT_b5g6r5 = PIXMAN_b5g6r5, |
PICT_a1r5g5b5 = PIXMAN_a1r5g5b5, |
PICT_x1r5g5b5 = PIXMAN_x1r5g5b5, |
PICT_a1b5g5r5 = PIXMAN_a1b5g5r5, |
PICT_x1b5g5r5 = PIXMAN_x1b5g5r5, |
PICT_a4r4g4b4 = PIXMAN_a4r4g4b4, |
PICT_x4r4g4b4 = PIXMAN_x4r4g4b4, |
PICT_a4b4g4r4 = PIXMAN_a4b4g4r4, |
PICT_x4b4g4r4 = PIXMAN_x4b4g4r4, |
/* 8bpp formats */ |
PICT_a8 = PIXMAN_a8, |
PICT_r3g3b2 = PIXMAN_r3g3b2, |
PICT_b2g3r3 = PIXMAN_b2g3r3, |
PICT_a2r2g2b2 = PIXMAN_a2r2g2b2, |
PICT_a2b2g2r2 = PIXMAN_a2b2g2r2, |
PICT_c8 = PIXMAN_c8, |
PICT_g8 = PIXMAN_g8, |
PICT_x4a4 = PIXMAN_x4a4, |
PICT_x4c4 = PIXMAN_x4c4, |
PICT_x4g4 = PIXMAN_x4g4, |
/* 4bpp formats */ |
PICT_a4 = PIXMAN_a4, |
PICT_r1g2b1 = PIXMAN_r1g2b1, |
PICT_b1g2r1 = PIXMAN_b1g2r1, |
PICT_a1r1g1b1 = PIXMAN_a1r1g1b1, |
PICT_a1b1g1r1 = PIXMAN_a1b1g1r1, |
PICT_c4 = PIXMAN_c4, |
PICT_g4 = PIXMAN_g4, |
/* 1bpp formats */ |
PICT_a1 = PIXMAN_a1, |
PICT_g1 = PIXMAN_g1 |
} PictFormatShort; |
#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f) |
230,7 → 386,7 |
uint32_t height; |
uint32_t pitch; |
uint32_t tiling; |
struct kgem_bo *fb_bo; |
}; |
266,8 → 422,22 |
#define PictOpMaximum 13 |
#define SNA_CURSOR_X 64 |
#define SNA_CURSOR_Y SNA_CURSOR_X |
struct sna_client { |
int is_compositor; /* only 4 bits used */ |
}; |
//#define assert(x) |
struct sna { |
struct kgem kgem; |
unsigned scrn; |
unsigned flags; |
#define SNA_NO_WAIT 0x1 |
#define SNA_NO_FLIP 0x2 |
274,21 → 444,31 |
#define SNA_TRIPLE_BUFFER 0x4 |
#define SNA_TEAR_FREE 0x10 |
#define SNA_FORCE_SHADOW 0x20 |
#define SNA_FLUSH_GTT 0x40 |
#define SNA_IS_HOSTED 0x80 |
#define SNA_PERFORMANCE 0x100 |
#define SNA_POWERSAVE 0x200 |
#define SNA_REPROBE 0x80000000 |
unsigned cpu_features; |
#define MMX 0x1 |
#define SSE 0x2 |
#define SSE2 0x4 |
#define SSE3 0x8 |
#define SSSE3 0x10 |
#define SSE4_1 0x20 |
#define SSE4_2 0x40 |
#define AVX 0x80 |
#define AVX2 0x100 |
struct list flush_pixmaps; |
struct list active_pixmaps; |
// int vblank_interval; |
// struct list deferred_free; |
// struct list dirty_pixmaps; |
// struct list active_pixmaps; |
// struct list inactive_clock[2]; |
unsigned int tiling; |
#define SNA_TILING_DISABLE 0x0 |
#define SNA_TILING_FB 0x1 |
#define SNA_TILING_2D 0x2 |
#define SNA_TILING_ALL (~0) |
303,7 → 483,6 |
uint32_t fill_alu; |
} blt_state; |
union { |
// struct gen2_render_state gen2; |
struct gen3_render_state gen3; |
struct gen4_render_state gen4; |
struct gen5_render_state gen5; |
318,12 → 497,11 |
/* Driver phase/state information */ |
// Bool suspended; |
struct kgem kgem; |
struct sna_render render; |
#if DEBUG_MEMORY |
struct { |
int shadow_pixels_allocs; |
int pixmap_allocs; |
int cpu_bo_allocs; |
size_t shadow_pixels_bytes; |
size_t cpu_bo_bytes; |
352,4 → 530,78 |
#ifndef MAX |
#define MAX(a,b) ((a) >= (b) ? (a) : (b)) |
#endif |
static inline bool |
_sna_transform_point(const PictTransform *transform, |
int64_t x, int64_t y, int64_t result[3]) |
{ |
int j; |
for (j = 0; j < 3; j++) |
result[j] = (transform->matrix[j][0] * x + |
transform->matrix[j][1] * y + |
transform->matrix[j][2]); |
return result[2] != 0; |
} |
static inline void |
_sna_get_transformed_coordinates(int x, int y, |
const PictTransform *transform, |
float *x_out, float *y_out) |
{ |
int64_t result[3]; |
_sna_transform_point(transform, x, y, result); |
*x_out = result[0] / (double)result[2]; |
*y_out = result[1] / (double)result[2]; |
} |
static inline void |
_sna_get_transformed_scaled(int x, int y, |
const PictTransform *transform, const float *sf, |
float *x_out, float *y_out) |
{ |
*x_out = sf[0] * (transform->matrix[0][0] * x + |
transform->matrix[0][1] * y + |
transform->matrix[0][2]); |
*y_out = sf[1] * (transform->matrix[1][0] * x + |
transform->matrix[1][1] * y + |
transform->matrix[1][2]); |
} |
void |
sna_get_transformed_coordinates(int x, int y, |
const PictTransform *transform, |
float *x_out, float *y_out); |
void |
sna_get_transformed_coordinates_3d(int x, int y, |
const PictTransform *transform, |
float *x_out, float *y_out, float *z_out); |
bool sna_transform_is_affine(const PictTransform *t); |
bool sna_transform_is_integer_translation(const PictTransform *t, |
int16_t *tx, int16_t *ty); |
bool sna_transform_is_translation(const PictTransform *t, |
pixman_fixed_t *tx, pixman_fixed_t *ty); |
static inline bool |
sna_affine_transform_is_rotation(const PictTransform *t) |
{ |
assert(sna_transform_is_affine(t)); |
return t->matrix[0][1] | t->matrix[1][0]; |
} |
static inline bool |
sna_transform_equal(const PictTransform *a, const PictTransform *b) |
{ |
if (a == b) |
return true; |
if (a == NULL || b == NULL) |
return false; |
return memcmp(a, b, sizeof(*a)) == 0; |
} |
#endif /* _SNA_H */ |
/drivers/video/Intel-2D/sna_cpu.c |
---|
0,0 → 1,116 |
/* |
* Copyright (c) 2013 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include "sna.h" |
#include "sna_cpuid.h" |
#define xgetbv(index,eax,edx) \ |
__asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index)) |
#define has_YMM 0x1 |
unsigned sna_cpu_detect(void) |
{ |
unsigned max = __get_cpuid_max(BASIC_CPUID, NULL); |
unsigned int eax, ebx, ecx, edx; |
unsigned features = 0; |
unsigned extra = 0; |
if (max >= 1) { |
__cpuid(1, eax, ebx, ecx, edx); |
if (ecx & bit_SSE3) |
features |= SSE3; |
if (ecx & bit_SSSE3) |
features |= SSSE3; |
if (ecx & bit_SSE4_1) |
features |= SSE4_1; |
if (ecx & bit_SSE4_2) |
features |= SSE4_2; |
if (ecx & bit_OSXSAVE) { |
unsigned int bv_eax, bv_ecx; |
xgetbv(0, bv_eax, bv_ecx); |
if ((bv_eax & 6) == 6) |
extra |= has_YMM; |
} |
if ((extra & has_YMM) && (ecx & bit_AVX)) |
features |= AVX; |
if (edx & bit_MMX) |
features |= MMX; |
if (edx & bit_SSE) |
features |= SSE; |
if (edx & bit_SSE2) |
features |= SSE2; |
} |
if (max >= 7) { |
__cpuid_count(7, 0, eax, ebx, ecx, edx); |
if ((extra & has_YMM) && (ebx & bit_AVX2)) |
features |= AVX2; |
} |
return features; |
} |
char *sna_cpu_features_to_string(unsigned features, char *line) |
{ |
char *ret = line; |
#ifdef __x86_64__ |
line += sprintf (line, "x86-64"); |
#else |
line += sprintf (line, "x86"); |
#endif |
if (features & SSE2) |
line += sprintf (line, ", sse2"); |
if (features & SSE3) |
line += sprintf (line, ", sse3"); |
if (features & SSSE3) |
line += sprintf (line, ", ssse3"); |
if (features & SSE4_1) |
line += sprintf (line, ", sse4.1"); |
if (features & SSE4_2) |
line += sprintf (line, ", sse4.2"); |
if (features & AVX) |
line += sprintf (line, ", avx"); |
if (features & AVX2) |
line += sprintf (line, ", avx2"); |
return ret; |
} |
/drivers/video/Intel-2D/sna_cpuid.h |
---|
0,0 → 1,86 |
/* |
* Copyright (c) 2013 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Chris Wilson <chris@chris-wilson.co.uk> |
* |
*/ |
/* Small wrapper around compiler specific implementation details of cpuid */ |
#ifndef SNA_CPUID_H |
#define SNA_CPUID_H |
#include "compiler.h" |
#if HAS_GCC(4, 4) /* for __cpuid_count() */ |
#include <cpuid.h> |
#else |
#define __get_cpuid_max(x, y) 0 |
#define __cpuid(level, a, b, c, d) |
#define __cpuid_count(level, count, a, b, c, d) |
#endif |
#define BASIC_CPUID 0x0 |
#define EXTENDED_CPUID 0x80000000 |
#ifndef bit_MMX |
#define bit_MMX (1 << 23) |
#endif |
#ifndef bit_SSE |
#define bit_SSE (1 << 25) |
#endif |
#ifndef bit_SSE2 |
#define bit_SSE2 (1 << 26) |
#endif |
#ifndef bit_SSE3 |
#define bit_SSE3 (1 << 0) |
#endif |
#ifndef bit_SSSE3 |
#define bit_SSSE3 (1 << 9) |
#endif |
#ifndef bit_SSE4_1 |
#define bit_SSE4_1 (1 << 19) |
#endif |
#ifndef bit_SSE4_2 |
#define bit_SSE4_2 (1 << 20) |
#endif |
#ifndef bit_OSXSAVE |
#define bit_OSXSAVE (1 << 27) |
#endif |
#ifndef bit_AVX |
#define bit_AVX (1 << 28) |
#endif |
#ifndef bit_AVX2 |
#define bit_AVX2 (1<<5) |
#endif |
#endif /* SNA_CPUID_H */ |
/drivers/video/Intel-2D/sna_render.h |
---|
1,729 → 1,786 |
#ifndef SNA_RENDER_H |
#define SNA_RENDER_H |
#include "compiler.h" |
#include <stdbool.h> |
#include <stdint.h> |
#define GRADIENT_CACHE_SIZE 16 |
#define GXinvalid 0xff |
#define HW_BIT_BLIT (1<<0) /* BGRX blitter */ |
#define HW_TEX_BLIT (1<<1) /* stretch blit */ |
#define HW_VID_BLIT (1<<2) /* planar and packed video */ |
struct sna; |
struct sna_glyph; |
struct sna_video; |
struct sna_video_frame; |
struct brw_compile; |
struct sna_composite_rectangles { |
struct sna_coordinate { |
int16_t x, y; |
} src, mask, dst; |
int16_t width, height; |
}; |
struct sna_composite_op { |
fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r); |
fastcall void (*box)(struct sna *sna, |
const struct sna_composite_op *op, |
const BoxRec *box); |
void (*boxes)(struct sna *sna, const struct sna_composite_op *op, |
const BoxRec *box, int nbox); |
void (*done)(struct sna *sna, const struct sna_composite_op *op); |
struct sna_damage **damage; |
uint32_t op; |
struct { |
PixmapPtr pixmap; |
CARD32 format; |
struct kgem_bo *bo; |
int16_t x, y; |
uint16_t width, height; |
} dst; |
struct sna_composite_channel { |
struct kgem_bo *bo; |
PictTransform *transform; |
uint16_t width; |
uint16_t height; |
uint32_t pict_format; |
uint32_t card_format; |
uint32_t filter; |
uint32_t repeat; |
uint32_t is_affine : 1; |
uint32_t is_solid : 1; |
uint32_t is_linear : 1; |
uint32_t is_opaque : 1; |
uint32_t alpha_fixup : 1; |
uint32_t rb_reversed : 1; |
int16_t offset[2]; |
float scale[2]; |
// pixman_transform_t embedded_transform; |
union { |
struct { |
float dx, dy, offset; |
} linear; |
struct { |
uint32_t pixel; |
} gen2; |
struct gen3_shader_channel { |
int type; |
uint32_t mode; |
uint32_t constants; |
} gen3; |
} u; |
} src, mask; |
uint32_t is_affine : 1; |
uint32_t has_component_alpha : 1; |
uint32_t need_magic_ca_pass : 1; |
uint32_t rb_reversed : 1; |
int16_t floats_per_vertex; |
int16_t floats_per_rect; |
fastcall void (*prim_emit)(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r); |
struct sna_composite_redirect { |
struct kgem_bo *real_bo; |
struct sna_damage **real_damage, *damage; |
BoxRec box; |
} redirect; |
union { |
struct sna_blt_state { |
PixmapPtr src_pixmap; |
int16_t sx, sy; |
uint32_t inplace :1; |
uint32_t overwrites:1; |
uint32_t bpp : 6; |
uint32_t cmd; |
uint32_t br13; |
uint32_t pitch[2]; |
uint32_t pixel; |
struct kgem_bo *bo[2]; |
} blt; |
struct { |
float constants[8]; |
uint32_t num_constants; |
} gen3; |
struct { |
int wm_kernel; |
int ve_id; |
} gen4; |
struct { |
int16_t wm_kernel; |
int16_t ve_id; |
} gen5; |
struct { |
uint32_t flags; |
} gen6; |
struct { |
uint32_t flags; |
} gen7; |
} u; |
void *priv; |
}; |
struct sna_copy_op { |
struct sna_composite_op base; |
void (*blt)(struct sna *sna, const struct sna_copy_op *op, |
int16_t sx, int16_t sy, |
int16_t w, int16_t h, |
int16_t dx, int16_t dy); |
void (*done)(struct sna *sna, const struct sna_copy_op *op); |
}; |
struct sna_render { |
int active; |
int caps; |
int max_3d_size; |
int max_3d_pitch; |
unsigned prefer_gpu; |
#define PREFER_GPU_BLT 0x1 |
#define PREFER_GPU_RENDER 0x2 |
#define PREFER_GPU_SPANS 0x4 |
bool (*composite)(struct sna *sna, uint8_t op, |
PicturePtr dst, PicturePtr src, PicturePtr mask, |
int16_t src_x, int16_t src_y, |
int16_t msk_x, int16_t msk_y, |
int16_t dst_x, int16_t dst_y, |
int16_t w, int16_t h, |
struct sna_composite_op *tmp); |
#if 0 |
bool (*check_composite_spans)(struct sna *sna, uint8_t op, |
PicturePtr dst, PicturePtr src, |
int16_t w, int16_t h, unsigned flags); |
bool (*composite_spans)(struct sna *sna, uint8_t op, |
PicturePtr dst, PicturePtr src, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
int16_t w, int16_t h, |
unsigned flags, |
struct sna_composite_spans_op *tmp); |
#define COMPOSITE_SPANS_RECTILINEAR 0x1 |
#define COMPOSITE_SPANS_INPLACE_HINT 0x2 |
bool (*video)(struct sna *sna, |
struct sna_video *video, |
struct sna_video_frame *frame, |
RegionPtr dstRegion, |
short src_w, short src_h, |
short drw_w, short drw_h, |
short dx, short dy, |
PixmapPtr pixmap); |
bool (*fill_boxes)(struct sna *sna, |
CARD8 op, |
PictFormat format, |
const xRenderColor *color, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
const BoxRec *box, int n); |
bool (*fill)(struct sna *sna, uint8_t alu, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
uint32_t color, |
struct sna_fill_op *tmp); |
bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, |
uint32_t color, |
int16_t x1, int16_t y1, int16_t x2, int16_t y2, |
uint8_t alu); |
bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo); |
bool (*copy_boxes)(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n, unsigned flags); |
#define COPY_LAST 0x1 |
#define COPY_SYNC 0x2 |
#endif |
bool (*blit_tex)(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp); |
bool (*copy)(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
struct sna_copy_op *op); |
void (*flush)(struct sna *sna); |
void (*reset)(struct sna *sna); |
void (*fini)(struct sna *sna); |
#if 0 |
struct sna_alpha_cache { |
struct kgem_bo *cache_bo; |
struct kgem_bo *bo[256+7]; |
} alpha_cache; |
struct sna_solid_cache { |
struct kgem_bo *cache_bo; |
struct kgem_bo *bo[1024]; |
uint32_t color[1025]; |
int last; |
int size; |
int dirty; |
} solid_cache; |
struct { |
struct sna_gradient_cache { |
struct kgem_bo *bo; |
int nstops; |
PictGradientStop *stops; |
} cache[GRADIENT_CACHE_SIZE]; |
int size; |
} gradient_cache; |
struct sna_glyph_cache{ |
PicturePtr picture; |
struct sna_glyph **glyphs; |
uint16_t count; |
uint16_t evict; |
} glyph[2]; |
pixman_image_t *white_image; |
PicturePtr white_picture; |
#if HAS_PIXMAN_GLYPHS |
pixman_glyph_cache_t *glyph_cache; |
#endif |
#endif |
uint16_t vb_id; |
uint16_t vertex_offset; |
uint16_t vertex_start; |
uint16_t vertex_index; |
uint16_t vertex_used; |
uint16_t vertex_size; |
uint16_t vertex_reloc[16]; |
int nvertex_reloc; |
struct kgem_bo *vbo; |
float *vertices; |
float vertex_data[1024]; |
}; |
struct gen2_render_state { |
uint32_t target; |
bool need_invariant; |
uint32_t logic_op_enabled; |
uint32_t ls1, ls2, vft; |
uint32_t diffuse; |
uint32_t specular; |
}; |
struct gen3_render_state { |
uint32_t current_dst; |
bool need_invariant; |
uint32_t tex_count; |
uint32_t last_drawrect_limit; |
uint32_t last_target; |
uint32_t last_blend; |
uint32_t last_constants; |
uint32_t last_sampler; |
uint32_t last_shader; |
uint32_t last_diffuse; |
uint32_t last_specular; |
uint16_t last_vertex_offset; |
uint16_t floats_per_vertex; |
uint16_t last_floats_per_vertex; |
uint32_t tex_map[4]; |
uint32_t tex_handle[2]; |
uint32_t tex_delta[2]; |
}; |
struct gen4_render_state { |
struct kgem_bo *general_bo; |
uint32_t vs; |
uint32_t sf; |
uint32_t wm; |
uint32_t cc; |
int ve_id; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint32_t last_pipelined_pointers; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
bool needs_invariant; |
bool needs_urb; |
}; |
struct gen5_render_state { |
struct kgem_bo *general_bo; |
uint32_t vs; |
uint32_t sf[2]; |
uint32_t wm; |
uint32_t cc; |
int ve_id; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
uint16_t last_pipelined_pointers; |
bool needs_invariant; |
}; |
enum { |
GEN6_WM_KERNEL_NOMASK = 0, |
GEN6_WM_KERNEL_NOMASK_P, |
GEN6_WM_KERNEL_MASK, |
GEN6_WM_KERNEL_MASK_P, |
GEN6_WM_KERNEL_MASKCA, |
GEN6_WM_KERNEL_MASKCA_P, |
GEN6_WM_KERNEL_MASKSA, |
GEN6_WM_KERNEL_MASKSA_P, |
GEN6_WM_KERNEL_OPACITY, |
GEN6_WM_KERNEL_OPACITY_P, |
GEN6_WM_KERNEL_VIDEO_PLANAR, |
GEN6_WM_KERNEL_VIDEO_PACKED, |
GEN6_KERNEL_COUNT |
}; |
struct gen6_render_state { |
const struct gt_info *info; |
struct kgem_bo *general_bo; |
uint32_t vs_state; |
uint32_t sf_state; |
uint32_t sf_mask_state; |
uint32_t wm_state; |
uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; |
uint32_t cc_blend; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint32_t blend; |
uint32_t samplers; |
uint32_t kernel; |
uint16_t num_sf_outputs; |
uint16_t ve_id; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
bool needs_invariant; |
bool first_state_packet; |
}; |
enum { |
GEN7_WM_KERNEL_NOMASK = 0, |
GEN7_WM_KERNEL_NOMASK_P, |
GEN7_WM_KERNEL_MASK, |
GEN7_WM_KERNEL_MASK_P, |
GEN7_WM_KERNEL_MASKCA, |
GEN7_WM_KERNEL_MASKCA_P, |
GEN7_WM_KERNEL_MASKSA, |
GEN7_WM_KERNEL_MASKSA_P, |
GEN7_WM_KERNEL_OPACITY, |
GEN7_WM_KERNEL_OPACITY_P, |
GEN7_WM_KERNEL_VIDEO_PLANAR, |
GEN7_WM_KERNEL_VIDEO_PACKED, |
GEN7_WM_KERNEL_COUNT |
}; |
struct gen7_render_state { |
const struct gt_info *info; |
struct kgem_bo *general_bo; |
uint32_t vs_state; |
uint32_t sf_state; |
uint32_t sf_mask_state; |
uint32_t wm_state; |
uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; |
uint32_t cc_blend; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint32_t blend; |
uint32_t samplers; |
uint32_t kernel; |
uint16_t num_sf_outputs; |
uint16_t ve_id; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
bool needs_invariant; |
bool emit_flush; |
}; |
struct sna_static_stream { |
uint32_t size, used; |
uint8_t *data; |
}; |
int sna_static_stream_init(struct sna_static_stream *stream); |
uint32_t sna_static_stream_add(struct sna_static_stream *stream, |
const void *data, uint32_t len, uint32_t align); |
void *sna_static_stream_map(struct sna_static_stream *stream, |
uint32_t len, uint32_t align); |
uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, |
void *ptr); |
unsigned sna_static_stream_compile_sf(struct sna *sna, |
struct sna_static_stream *stream, |
bool (*compile)(struct brw_compile *)); |
unsigned sna_static_stream_compile_wm(struct sna *sna, |
struct sna_static_stream *stream, |
bool (*compile)(struct brw_compile *, int), |
int width); |
struct kgem_bo *sna_static_stream_fini(struct sna *sna, |
struct sna_static_stream *stream); |
struct kgem_bo * |
sna_render_get_solid(struct sna *sna, |
uint32_t color); |
void |
sna_render_flush_solid(struct sna *sna); |
uint32_t sna_rgba_for_color(uint32_t color, int depth); |
uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); |
bool sna_get_rgba_from_pixel(uint32_t pixel, |
uint16_t *red, |
uint16_t *green, |
uint16_t *blue, |
uint16_t *alpha, |
uint32_t format); |
bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); |
void no_render_init(struct sna *sna); |
bool gen2_render_init(struct sna *sna); |
bool gen3_render_init(struct sna *sna); |
bool gen4_render_init(struct sna *sna); |
bool gen5_render_init(struct sna *sna); |
bool gen6_render_init(struct sna *sna); |
bool gen7_render_init(struct sna *sna); |
#if 0 |
bool sna_tiling_composite(uint32_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t mask_x, int16_t mask_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp); |
bool sna_tiling_fill_boxes(struct sna *sna, |
CARD8 op, |
PictFormat format, |
const xRenderColor *color, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
const BoxRec *box, int n); |
bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n); |
bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, |
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
int bpp, const BoxRec *box, int nbox); |
bool sna_blt_composite(struct sna *sna, |
uint32_t op, |
PicturePtr src, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp, |
bool fallback); |
bool sna_blt_composite__convert(struct sna *sna, |
int x, int y, |
int width, int height, |
struct sna_composite_op *tmp); |
bool sna_blt_fill(struct sna *sna, uint8_t alu, |
struct kgem_bo *bo, |
int bpp, |
uint32_t pixel, |
struct sna_fill_op *fill); |
bool sna_blt_copy(struct sna *sna, uint8_t alu, |
struct kgem_bo *src, |
struct kgem_bo *dst, |
int bpp, |
struct sna_copy_op *copy); |
bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, |
struct kgem_bo *bo, |
int bpp, |
uint32_t pixel, |
const BoxRec *box, int n); |
bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, |
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
int bpp, |
const BoxRec *box, int n); |
bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int nbox); |
bool _sna_get_pixel_from_rgba(uint32_t *pixel, |
uint16_t red, |
uint16_t green, |
uint16_t blue, |
uint16_t alpha, |
uint32_t format); |
static inline bool |
sna_get_pixel_from_rgba(uint32_t * pixel, |
uint16_t red, |
uint16_t green, |
uint16_t blue, |
uint16_t alpha, |
uint32_t format) |
{ |
switch (format) { |
case PICT_x8r8g8b8: |
alpha = 0xffff; |
/* fall through to re-use a8r8g8b8 expansion */ |
case PICT_a8r8g8b8: |
*pixel = ((alpha >> 8 << 24) | |
(red >> 8 << 16) | |
(green & 0xff00) | |
(blue >> 8)); |
return TRUE; |
case PICT_a8: |
*pixel = alpha >> 8; |
return TRUE; |
} |
return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format); |
} |
struct kgem_bo * |
__sna_render_pixmap_bo(struct sna *sna, |
PixmapPtr pixmap, |
const BoxRec *box, |
bool blt); |
int |
sna_render_pixmap_bo(struct sna *sna, |
struct sna_composite_channel *channel, |
PixmapPtr pixmap, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
bool |
sna_render_pixmap_partial(struct sna *sna, |
PixmapPtr pixmap, |
struct kgem_bo *bo, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h); |
int |
sna_render_picture_extract(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
int |
sna_render_picture_approximate_gradient(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
int |
sna_render_picture_fixup(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
int |
sna_render_picture_convert(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
PixmapPtr pixmap, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y, |
bool fixup_alpha); |
inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) |
{ |
struct sna_composite_redirect *t = &op->redirect; |
t->real_bo = NULL; |
t->damage = NULL; |
} |
bool |
sna_render_composite_redirect(struct sna *sna, |
struct sna_composite_op *op, |
int x, int y, int width, int height); |
void |
sna_render_composite_redirect_done(struct sna *sna, |
const struct sna_composite_op *op); |
bool |
sna_composite_mask_is_opaque(PicturePtr mask); |
#endif |
void sna_vertex_init(struct sna *sna); |
static inline void sna_vertex_lock(struct sna_render *r) |
{ |
// pthread_mutex_lock(&r->lock); |
} |
static inline void sna_vertex_acquire__locked(struct sna_render *r) |
{ |
r->active++; |
} |
static inline void sna_vertex_unlock(struct sna_render *r) |
{ |
// pthread_mutex_unlock(&r->lock); |
} |
static inline void sna_vertex_release__locked(struct sna_render *r) |
{ |
assert(r->active > 0); |
--r->active; |
// if (--r->active == 0) |
// pthread_cond_signal(&r->wait); |
} |
static inline bool sna_vertex_wait__locked(struct sna_render *r) |
{ |
bool was_active = r->active; |
// while (r->active) |
// pthread_cond_wait(&r->wait, &r->lock); |
return was_active; |
} |
#endif /* SNA_RENDER_H */ |
#ifndef SNA_RENDER_H |
#define SNA_RENDER_H |
#include "compiler.h" |
#include <stdbool.h> |
#include <stdint.h> |
#define GRADIENT_CACHE_SIZE 16 |
#define GXinvalid 0xff |
#define HW_BIT_BLIT (1<<0) /* BGRX blitter */ |
#define HW_TEX_BLIT (1<<1) /* stretch blit */ |
#define HW_VID_BLIT (1<<2) /* planar and packed video */ |
struct sna; |
struct sna_glyph; |
struct sna_video; |
struct sna_video_frame; |
struct brw_compile; |
struct sna_composite_rectangles { |
struct sna_coordinate { |
int16_t x, y; |
} src, mask, dst; |
int16_t width, height; |
}; |
struct sna_composite_op { |
fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r); |
void (*done)(struct sna *sna, const struct sna_composite_op *op); |
struct sna_damage **damage; |
uint32_t op; |
struct { |
PixmapPtr pixmap; |
CARD32 format; |
struct kgem_bo *bo; |
int16_t x, y; |
uint16_t width, height; |
} dst; |
struct sna_composite_channel { |
struct kgem_bo *bo; |
PictTransform *transform; |
uint16_t width; |
uint16_t height; |
uint32_t pict_format; |
uint32_t card_format; |
uint32_t filter; |
uint32_t repeat; |
uint32_t is_affine : 1; |
uint32_t is_solid : 1; |
uint32_t is_linear : 1; |
uint32_t is_opaque : 1; |
uint32_t alpha_fixup : 1; |
uint32_t rb_reversed : 1; |
int16_t offset[2]; |
float scale[2]; |
// pixman_transform_t embedded_transform; |
union { |
struct { |
float dx, dy, offset; |
} linear; |
struct { |
uint32_t pixel; |
} gen2; |
struct gen3_shader_channel { |
int type; |
uint32_t mode; |
uint32_t constants; |
} gen3; |
} u; |
} src, mask; |
uint32_t is_affine : 1; |
uint32_t has_component_alpha : 1; |
uint32_t need_magic_ca_pass : 1; |
uint32_t rb_reversed : 1; |
int16_t floats_per_vertex; |
int16_t floats_per_rect; |
fastcall void (*prim_emit)(struct sna *sna, |
const struct sna_composite_op *op, |
const struct sna_composite_rectangles *r); |
struct sna_composite_redirect { |
struct kgem_bo *real_bo; |
struct sna_damage **real_damage, *damage; |
BoxRec box; |
} redirect; |
union { |
struct sna_blt_state { |
PixmapPtr src_pixmap; |
int16_t sx, sy; |
uint32_t inplace :1; |
uint32_t overwrites:1; |
uint32_t bpp : 6; |
uint32_t cmd; |
uint32_t br13; |
uint32_t pitch[2]; |
uint32_t pixel; |
struct kgem_bo *bo[2]; |
} blt; |
struct { |
float constants[8]; |
uint32_t num_constants; |
} gen3; |
struct { |
int wm_kernel; |
int ve_id; |
} gen4; |
struct { |
int16_t wm_kernel; |
int16_t ve_id; |
} gen5; |
struct { |
uint32_t flags; |
} gen6; |
struct { |
uint32_t flags; |
} gen7; |
} u; |
void *priv; |
}; |
struct sna_copy_op { |
struct sna_composite_op base; |
void (*blt)(struct sna *sna, const struct sna_copy_op *op, |
int16_t sx, int16_t sy, |
int16_t w, int16_t h, |
int16_t dx, int16_t dy); |
void (*done)(struct sna *sna, const struct sna_copy_op *op); |
}; |
struct sna_render { |
int active; |
int caps; |
int max_3d_size; |
int max_3d_pitch; |
unsigned prefer_gpu; |
#define PREFER_GPU_BLT 0x1 |
#define PREFER_GPU_RENDER 0x2 |
#define PREFER_GPU_SPANS 0x4 |
bool (*composite)(struct sna *sna, uint8_t op, |
PicturePtr dst, PicturePtr src, PicturePtr mask, |
int16_t src_x, int16_t src_y, |
int16_t msk_x, int16_t msk_y, |
int16_t dst_x, int16_t dst_y, |
int16_t w, int16_t h, |
struct sna_composite_op *tmp); |
#if 0 |
bool (*check_composite_spans)(struct sna *sna, uint8_t op, |
PicturePtr dst, PicturePtr src, |
int16_t w, int16_t h, unsigned flags); |
bool (*composite_spans)(struct sna *sna, uint8_t op, |
PicturePtr dst, PicturePtr src, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
int16_t w, int16_t h, |
unsigned flags, |
struct sna_composite_spans_op *tmp); |
#define COMPOSITE_SPANS_RECTILINEAR 0x1 |
#define COMPOSITE_SPANS_INPLACE_HINT 0x2 |
bool (*video)(struct sna *sna, |
struct sna_video *video, |
struct sna_video_frame *frame, |
RegionPtr dstRegion, |
PixmapPtr pixmap); |
bool (*fill_boxes)(struct sna *sna, |
CARD8 op, |
PictFormat format, |
const xRenderColor *color, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
const BoxRec *box, int n); |
bool (*fill)(struct sna *sna, uint8_t alu, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
uint32_t color, |
struct sna_fill_op *tmp); |
bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, |
uint32_t color, |
int16_t x1, int16_t y1, int16_t x2, int16_t y2, |
uint8_t alu); |
bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo); |
bool (*copy_boxes)(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n, unsigned flags); |
#define COPY_LAST 0x1 |
#define COPY_SYNC 0x2 |
#endif |
bool (*blit_tex)(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp); |
bool (*copy)(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
struct sna_copy_op *op); |
void (*flush)(struct sna *sna); |
void (*reset)(struct sna *sna); |
void (*fini)(struct sna *sna); |
#if 0 |
struct sna_alpha_cache { |
struct kgem_bo *cache_bo; |
struct kgem_bo *bo[256+7]; |
} alpha_cache; |
struct sna_solid_cache { |
struct kgem_bo *cache_bo; |
struct kgem_bo *bo[1024]; |
uint32_t color[1025]; |
int last; |
int size; |
int dirty; |
} solid_cache; |
struct { |
struct sna_gradient_cache { |
struct kgem_bo *bo; |
int nstops; |
PictGradientStop *stops; |
} cache[GRADIENT_CACHE_SIZE]; |
int size; |
} gradient_cache; |
struct sna_glyph_cache{ |
PicturePtr picture; |
struct sna_glyph **glyphs; |
uint16_t count; |
uint16_t evict; |
} glyph[2]; |
pixman_image_t *white_image; |
PicturePtr white_picture; |
#if HAS_PIXMAN_GLYPHS |
pixman_glyph_cache_t *glyph_cache; |
#endif |
#endif |
uint16_t vb_id; |
uint16_t vertex_offset; |
uint16_t vertex_start; |
uint16_t vertex_index; |
uint16_t vertex_used; |
uint16_t vertex_size; |
uint16_t vertex_reloc[16]; |
int nvertex_reloc; |
struct kgem_bo *vbo; |
float *vertices; |
float vertex_data[1024]; |
}; |
struct gen2_render_state { |
uint32_t target; |
bool need_invariant; |
uint32_t logic_op_enabled; |
uint32_t ls1, ls2, vft; |
uint32_t diffuse; |
uint32_t specular; |
}; |
struct gen3_render_state { |
uint32_t current_dst; |
bool need_invariant; |
uint32_t tex_count; |
uint32_t last_drawrect_limit; |
uint32_t last_target; |
uint32_t last_blend; |
uint32_t last_constants; |
uint32_t last_sampler; |
uint32_t last_shader; |
uint32_t last_diffuse; |
uint32_t last_specular; |
uint16_t last_vertex_offset; |
uint16_t floats_per_vertex; |
uint16_t last_floats_per_vertex; |
uint32_t tex_map[4]; |
uint32_t tex_handle[2]; |
uint32_t tex_delta[2]; |
}; |
struct gen4_render_state { |
struct kgem_bo *general_bo; |
uint32_t vs; |
uint32_t sf; |
uint32_t wm; |
uint32_t cc; |
int ve_id; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint32_t last_pipelined_pointers; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
bool needs_invariant; |
bool needs_urb; |
}; |
struct gen5_render_state { |
struct kgem_bo *general_bo; |
uint32_t vs; |
uint32_t sf[2]; |
uint32_t wm; |
uint32_t cc; |
int ve_id; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint32_t last_pipelined_pointers; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
bool needs_invariant; |
}; |
enum { |
GEN6_WM_KERNEL_NOMASK = 0, |
GEN6_WM_KERNEL_NOMASK_P, |
GEN6_WM_KERNEL_MASK, |
GEN6_WM_KERNEL_MASK_P, |
GEN6_WM_KERNEL_MASKCA, |
GEN6_WM_KERNEL_MASKCA_P, |
GEN6_WM_KERNEL_MASKSA, |
GEN6_WM_KERNEL_MASKSA_P, |
GEN6_WM_KERNEL_OPACITY, |
GEN6_WM_KERNEL_OPACITY_P, |
GEN6_WM_KERNEL_VIDEO_PLANAR, |
GEN6_WM_KERNEL_VIDEO_PACKED, |
GEN6_KERNEL_COUNT |
}; |
struct gen6_render_state { |
const struct gt_info *info; |
struct kgem_bo *general_bo; |
uint32_t vs_state; |
uint32_t sf_state; |
uint32_t sf_mask_state; |
uint32_t wm_state; |
uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; |
uint32_t cc_blend; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint32_t blend; |
uint32_t samplers; |
uint32_t kernel; |
uint16_t num_sf_outputs; |
uint16_t ve_id; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
bool needs_invariant; |
bool first_state_packet; |
}; |
enum { |
GEN7_WM_KERNEL_NOMASK = 0, |
GEN7_WM_KERNEL_NOMASK_P, |
GEN7_WM_KERNEL_MASK, |
GEN7_WM_KERNEL_MASK_P, |
GEN7_WM_KERNEL_MASKCA, |
GEN7_WM_KERNEL_MASKCA_P, |
GEN7_WM_KERNEL_MASKSA, |
GEN7_WM_KERNEL_MASKSA_P, |
GEN7_WM_KERNEL_OPACITY, |
GEN7_WM_KERNEL_OPACITY_P, |
GEN7_WM_KERNEL_VIDEO_PLANAR, |
GEN7_WM_KERNEL_VIDEO_PACKED, |
GEN7_WM_KERNEL_COUNT |
}; |
struct gen7_render_state { |
const struct gt_info *info; |
struct kgem_bo *general_bo; |
uint32_t vs_state; |
uint32_t sf_state; |
uint32_t sf_mask_state; |
uint32_t wm_state; |
uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; |
uint32_t cc_blend; |
uint32_t drawrect_offset; |
uint32_t drawrect_limit; |
uint32_t blend; |
uint32_t samplers; |
uint32_t kernel; |
uint16_t num_sf_outputs; |
uint16_t ve_id; |
uint16_t last_primitive; |
int16_t floats_per_vertex; |
uint16_t surface_table; |
bool needs_invariant; |
bool emit_flush; |
}; |
struct sna_static_stream { |
uint32_t size, used; |
uint8_t *data; |
}; |
int sna_static_stream_init(struct sna_static_stream *stream); |
uint32_t sna_static_stream_add(struct sna_static_stream *stream, |
const void *data, uint32_t len, uint32_t align); |
void *sna_static_stream_map(struct sna_static_stream *stream, |
uint32_t len, uint32_t align); |
uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, |
void *ptr); |
unsigned sna_static_stream_compile_sf(struct sna *sna, |
struct sna_static_stream *stream, |
bool (*compile)(struct brw_compile *)); |
unsigned sna_static_stream_compile_wm(struct sna *sna, |
struct sna_static_stream *stream, |
bool (*compile)(struct brw_compile *, int), |
int width); |
struct kgem_bo *sna_static_stream_fini(struct sna *sna, |
struct sna_static_stream *stream); |
struct kgem_bo * |
sna_render_get_solid(struct sna *sna, |
uint32_t color); |
void |
sna_render_flush_solid(struct sna *sna); |
uint32_t sna_rgba_for_color(uint32_t color, int depth); |
uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); |
bool sna_get_rgba_from_pixel(uint32_t pixel, |
uint16_t *red, |
uint16_t *green, |
uint16_t *blue, |
uint16_t *alpha, |
uint32_t format); |
bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); |
const char *no_render_init(struct sna *sna); |
const char *gen2_render_init(struct sna *sna, const char *backend); |
const char *gen3_render_init(struct sna *sna, const char *backend); |
const char *gen4_render_init(struct sna *sna, const char *backend); |
const char *gen5_render_init(struct sna *sna, const char *backend); |
const char *gen6_render_init(struct sna *sna, const char *backend); |
const char *gen7_render_init(struct sna *sna, const char *backend); |
#if 0 |
bool sna_tiling_composite(uint32_t op, |
PicturePtr src, |
PicturePtr mask, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t mask_x, int16_t mask_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp); |
bool sna_tiling_fill_boxes(struct sna *sna, |
CARD8 op, |
PictFormat format, |
const xRenderColor *color, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
const BoxRec *box, int n); |
bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int n); |
bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, |
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
int bpp, const BoxRec *box, int nbox); |
bool sna_blt_composite(struct sna *sna, |
uint32_t op, |
PicturePtr src, |
PicturePtr dst, |
int16_t src_x, int16_t src_y, |
int16_t dst_x, int16_t dst_y, |
int16_t width, int16_t height, |
struct sna_composite_op *tmp, |
bool fallback); |
bool sna_blt_composite__convert(struct sna *sna, |
int x, int y, |
int width, int height, |
struct sna_composite_op *tmp); |
bool sna_blt_fill(struct sna *sna, uint8_t alu, |
struct kgem_bo *bo, |
int bpp, |
uint32_t pixel, |
struct sna_fill_op *fill); |
bool sna_blt_copy(struct sna *sna, uint8_t alu, |
struct kgem_bo *src, |
struct kgem_bo *dst, |
int bpp, |
struct sna_copy_op *copy); |
bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, |
struct kgem_bo *bo, |
int bpp, |
uint32_t pixel, |
const BoxRec *box, int n); |
bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, |
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
int bpp, |
const BoxRec *box, int n); |
bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, |
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, |
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, |
const BoxRec *box, int nbox); |
bool _sna_get_pixel_from_rgba(uint32_t *pixel, |
uint16_t red, |
uint16_t green, |
uint16_t blue, |
uint16_t alpha, |
uint32_t format); |
static inline bool |
sna_get_pixel_from_rgba(uint32_t * pixel, |
uint16_t red, |
uint16_t green, |
uint16_t blue, |
uint16_t alpha, |
uint32_t format) |
{ |
switch (format) { |
case PICT_x8r8g8b8: |
alpha = 0xffff; |
/* fall through to re-use a8r8g8b8 expansion */ |
case PICT_a8r8g8b8: |
*pixel = ((alpha >> 8 << 24) | |
(red >> 8 << 16) | |
(green & 0xff00) | |
(blue >> 8)); |
return TRUE; |
case PICT_a8: |
*pixel = alpha >> 8; |
return TRUE; |
} |
return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format); |
} |
struct kgem_bo * |
__sna_render_pixmap_bo(struct sna *sna, |
PixmapPtr pixmap, |
const BoxRec *box, |
bool blt); |
int |
sna_render_pixmap_bo(struct sna *sna, |
struct sna_composite_channel *channel, |
PixmapPtr pixmap, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
bool |
sna_render_pixmap_partial(struct sna *sna, |
PixmapPtr pixmap, |
struct kgem_bo *bo, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h); |
int |
sna_render_picture_extract(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
int |
sna_render_picture_approximate_gradient(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
int |
sna_render_picture_fixup(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y); |
int |
sna_render_picture_convert(struct sna *sna, |
PicturePtr picture, |
struct sna_composite_channel *channel, |
PixmapPtr pixmap, |
int16_t x, int16_t y, |
int16_t w, int16_t h, |
int16_t dst_x, int16_t dst_y, |
bool fixup_alpha); |
inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) |
{ |
struct sna_composite_redirect *t = &op->redirect; |
t->real_bo = NULL; |
t->damage = NULL; |
} |
bool |
sna_render_composite_redirect(struct sna *sna, |
struct sna_composite_op *op, |
int x, int y, int width, int height, |
bool partial); |
void |
sna_render_composite_redirect_done(struct sna *sna, |
const struct sna_composite_op *op); |
bool |
sna_composite_mask_is_opaque(PicturePtr mask); |
#endif |
void sna_vertex_init(struct sna *sna); |
static inline void sna_vertex_lock(struct sna_render *r) |
{ |
// pthread_mutex_lock(&r->lock); |
} |
static inline void sna_vertex_acquire__locked(struct sna_render *r) |
{ |
r->active++; |
} |
static inline void sna_vertex_unlock(struct sna_render *r) |
{ |
// pthread_mutex_unlock(&r->lock); |
} |
static inline void sna_vertex_release__locked(struct sna_render *r) |
{ |
assert(r->active > 0); |
--r->active; |
// if (--r->active == 0) |
// pthread_cond_signal(&r->wait); |
} |
static inline bool sna_vertex_wait__locked(struct sna_render *r) |
{ |
bool was_active = r->active; |
// while (r->active) |
// pthread_cond_wait(&r->wait, &r->lock); |
return was_active; |
} |
#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \ |
PICT_FORMAT_TYPE(format), \ |
0, \ |
PICT_FORMAT_R(format), \ |
PICT_FORMAT_G(format), \ |
PICT_FORMAT_B(format)) |
static bool |
gen3_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp); |
static bool |
gen4_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp); |
static bool |
gen5_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp); |
static bool |
gen6_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp); |
static bool |
gen7_blit_tex(struct sna *sna, |
uint8_t op, bool scale, |
PixmapPtr src, struct kgem_bo *src_bo, |
PixmapPtr mask,struct kgem_bo *mask_bo, |
PixmapPtr dst, struct kgem_bo *dst_bo, |
int32_t src_x, int32_t src_y, |
int32_t msk_x, int32_t msk_y, |
int32_t dst_x, int32_t dst_y, |
int32_t width, int32_t height, |
struct sna_composite_op *tmp); |
#endif /* SNA_RENDER_H */ |
/drivers/video/Intel-2D/sna_render_inline.h |
---|
17,7 → 17,7 |
dst->drawable.height > sna->render.max_3d_size); |
} |
static inline float pack_2s(int16_t x, int16_t y) |
static force_inline float pack_2s(int16_t x, int16_t y) |
{ |
union { |
struct sna_coordinate p; |
28,21 → 28,21 |
return u.f; |
} |
static inline int vertex_space(struct sna *sna) |
static force_inline int vertex_space(struct sna *sna) |
{ |
return sna->render.vertex_size - sna->render.vertex_used; |
} |
static inline void vertex_emit(struct sna *sna, float v) |
static force_inline void vertex_emit(struct sna *sna, float v) |
{ |
assert(sna->render.vertex_used < sna->render.vertex_size); |
sna->render.vertices[sna->render.vertex_used++] = v; |
} |
static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y) |
static force_inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y) |
{ |
vertex_emit(sna, pack_2s(x, y)); |
} |
static inline int batch_space(struct sna *sna) |
static force_inline int batch_space(struct sna *sna) |
{ |
assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); |
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface); |
49,7 → 49,7 |
return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED; |
} |
static inline void batch_emit(struct sna *sna, uint32_t dword) |
static force_inline void batch_emit(struct sna *sna, uint32_t dword) |
{ |
assert(sna->kgem.mode != KGEM_NONE); |
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface); |
56,7 → 56,7 |
sna->kgem.batch[sna->kgem.nbatch++] = dword; |
} |
static inline void batch_emit_float(struct sna *sna, float f) |
static force_inline void batch_emit_float(struct sna *sna, float f) |
{ |
union { |
uint32_t dw; |
67,5 → 67,4 |
} |
#endif /* SNA_RENDER_INLINE_H */ |
/drivers/video/Intel-2D/sna_transform.c |
---|
0,0 → 1,147 |
/* |
* Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. All Rights Reserved. |
* Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org> |
* Copyright © 2010 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
* SOFTWARE. |
* |
* Authors: |
* Jesse Barns <jbarnes@virtuousgeek.org> |
* Chris Wilson <chris@chris-wilson.co.uk> |
*/ |
#ifdef HAVE_CONFIG_H |
#include "config.h" |
#endif |
#include "sna.h" |
#define pixman_fixed_e ((pixman_fixed_t) 1) |
#define pixman_fixed_1 (pixman_int_to_fixed(1)) |
#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e) |
#define pixman_fixed_to_int(f) ((int) ((f) >> 16)) |
#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16)) |
#define pixman_fixed_fraction(f) ((f) & pixman_fixed_1_minus_e) |
#define IntToxFixed(i) pixman_int_to_fixed(i) |
/** |
* Returns whether the provided transform is affine. |
* |
* transform may be null. |
*/ |
bool sna_transform_is_affine(const PictTransform *t) |
{ |
if (t == NULL) |
return true; |
return t->matrix[2][0] == 0 && t->matrix[2][1] == 0; |
} |
bool |
sna_transform_is_translation(const PictTransform *t, |
pixman_fixed_t *tx, |
pixman_fixed_t *ty) |
{ |
if (t == NULL) { |
*tx = *ty = 0; |
return true; |
} |
if (t->matrix[0][0] != IntToxFixed(1) || |
t->matrix[0][1] != 0 || |
t->matrix[1][0] != 0 || |
t->matrix[1][1] != IntToxFixed(1) || |
t->matrix[2][0] != 0 || |
t->matrix[2][1] != 0 || |
t->matrix[2][2] != IntToxFixed(1)) |
return false; |
*tx = t->matrix[0][2]; |
*ty = t->matrix[1][2]; |
return true; |
} |
bool |
sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty) |
{ |
if (t == NULL) { |
*tx = *ty = 0; |
return true; |
} |
if (t->matrix[0][0] != IntToxFixed(1) || |
t->matrix[0][1] != 0 || |
t->matrix[1][0] != 0 || |
t->matrix[1][1] != IntToxFixed(1) || |
t->matrix[2][0] != 0 || |
t->matrix[2][1] != 0 || |
t->matrix[2][2] != IntToxFixed(1)) |
return false; |
if (pixman_fixed_fraction(t->matrix[0][2]) || |
pixman_fixed_fraction(t->matrix[1][2])) |
return false; |
*tx = pixman_fixed_to_int(t->matrix[0][2]); |
*ty = pixman_fixed_to_int(t->matrix[1][2]); |
return true; |
} |
/** |
* Returns the floating-point coordinates transformed by the given transform. |
*/ |
void |
sna_get_transformed_coordinates(int x, int y, |
const PictTransform *transform, |
float *x_out, float *y_out) |
{ |
if (transform == NULL) { |
*x_out = x; |
*y_out = y; |
} else |
_sna_get_transformed_coordinates(x, y, transform, x_out, y_out); |
} |
/** |
* Returns the un-normalized floating-point coordinates transformed by the given transform. |
*/ |
void |
sna_get_transformed_coordinates_3d(int x, int y, |
const PictTransform *transform, |
float *x_out, float *y_out, float *w_out) |
{ |
if (transform == NULL) { |
*x_out = x; |
*y_out = y; |
*w_out = 1; |
} else { |
int64_t result[3]; |
if (_sna_transform_point(transform, x, y, result)) { |
*x_out = result[0] / 65536.; |
*y_out = result[1] / 65536.; |
*w_out = result[2] / 65536.; |
} else { |
*x_out = *y_out = 0; |
*w_out = 1.; |
} |
} |
} |